1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static void udp_lrput(queue_t *, mblk_t *); 172 static void udp_lwput(queue_t *, mblk_t *); 173 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 174 cred_t *credp, boolean_t isv6); 175 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp); 177 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 180 int *errorp, udpattrs_t *udpattrs); 181 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 182 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 183 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 184 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 185 cred_t *cr); 186 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 187 ipha_t *ipha); 188 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 189 t_scalar_t destlen, t_scalar_t err); 190 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 191 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 192 boolean_t random); 193 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 194 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 195 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 196 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 197 static void udp_wput_other(queue_t *q, mblk_t *mp); 198 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 199 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 200 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 201 202 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 203 static void udp_stack_fini(netstackid_t stackid, void *arg); 204 205 static void *udp_kstat_init(netstackid_t stackid); 206 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 207 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 208 static void udp_kstat2_fini(netstackid_t, kstat_t *); 209 static int udp_kstat_update(kstat_t *kp, int rw); 210 211 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 212 213 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 214 cred_t *, pid_t); 215 static void udp_ulp_recv(conn_t *, mblk_t *); 216 217 /* Common routine for TPI and socket module */ 218 static conn_t *udp_do_open(cred_t *, boolean_t, int); 219 static void udp_do_close(conn_t *); 220 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 221 boolean_t); 222 static int udp_do_unbind(conn_t *); 223 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 224 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 225 226 int udp_getsockname(sock_lower_handle_t, 227 struct sockaddr *, socklen_t *, cred_t *); 228 int udp_getpeername(sock_lower_handle_t, 229 struct sockaddr *, socklen_t *, cred_t *); 230 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 231 cred_t *cr); 232 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 233 234 #define UDP_RECV_HIWATER (56 * 1024) 235 #define UDP_RECV_LOWATER 128 236 #define UDP_XMIT_HIWATER (56 * 1024) 237 #define UDP_XMIT_LOWATER 1024 238 239 /* 240 * The following is defined in tcp.c 241 */ 242 extern int (*cl_inet_connect2)(netstackid_t stack_id, 243 uint8_t protocol, boolean_t is_outgoing, 244 sa_family_t addr_family, 245 uint8_t *laddrp, in_port_t lport, 246 uint8_t *faddrp, in_port_t fport, void *args); 247 248 /* 249 * Checks if the given destination addr/port is allowed out. 250 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 251 * Called for each connect() and for sendto()/sendmsg() to a different 252 * destination. 253 * For connect(), called in udp_connect(). 254 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 255 * 256 * This macro assumes that the cl_inet_connect2 hook is not NULL. 257 * Please check this before calling this macro. 258 * 259 * void 260 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 261 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 262 */ 263 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 264 (err) = 0; \ 265 /* \ 266 * Running in cluster mode - check and register active \ 267 * "connection" information \ 268 */ \ 269 if ((udp)->udp_ipversion == IPV4_VERSION) \ 270 (err) = (*cl_inet_connect2)( \ 271 (cp)->conn_netstack->netstack_stackid, \ 272 IPPROTO_UDP, is_outgoing, AF_INET, \ 273 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 274 (udp)->udp_port, \ 275 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 276 (in_port_t)(fport), NULL); \ 277 else \ 278 (err) = (*cl_inet_connect2)( \ 279 (cp)->conn_netstack->netstack_stackid, \ 280 IPPROTO_UDP, is_outgoing, AF_INET6, \ 281 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 282 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 283 } 284 285 static struct module_info udp_mod_info = { 286 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 287 }; 288 289 /* 290 * Entry points for UDP as a device. 291 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 292 */ 293 static struct qinit udp_rinitv4 = { 294 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 295 }; 296 297 static struct qinit udp_rinitv6 = { 298 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 299 }; 300 301 static struct qinit udp_winit = { 302 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 303 }; 304 305 /* UDP entry point during fallback */ 306 struct qinit udp_fallback_sock_winit = { 307 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 308 }; 309 310 /* 311 * UDP needs to handle I_LINK and I_PLINK since ifconfig 312 * likes to use it as a place to hang the various streams. 313 */ 314 static struct qinit udp_lrinit = { 315 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 316 }; 317 318 static struct qinit udp_lwinit = { 319 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 320 }; 321 322 /* For AF_INET aka /dev/udp */ 323 struct streamtab udpinfov4 = { 324 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 325 }; 326 327 /* For AF_INET6 aka /dev/udp6 */ 328 struct streamtab udpinfov6 = { 329 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 330 }; 331 332 static sin_t sin_null; /* Zero address for quick clears */ 333 static sin6_t sin6_null; /* Zero address for quick clears */ 334 335 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 336 337 /* Default structure copied into T_INFO_ACK messages */ 338 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 339 T_INFO_ACK, 340 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 341 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 342 T_INVALID, /* CDATA_size. udp does not support connect data. */ 343 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 344 sizeof (sin_t), /* ADDR_size. */ 345 0, /* OPT_size - not initialized here */ 346 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 347 T_CLTS, /* SERV_type. udp supports connection-less. */ 348 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 349 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 350 }; 351 352 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 353 354 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 355 T_INFO_ACK, 356 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 357 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 358 T_INVALID, /* CDATA_size. udp does not support connect data. */ 359 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 360 sizeof (sin6_t), /* ADDR_size. */ 361 0, /* OPT_size - not initialized here */ 362 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 363 T_CLTS, /* SERV_type. udp supports connection-less. */ 364 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 365 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 366 }; 367 368 /* largest UDP port number */ 369 #define UDP_MAX_PORT 65535 370 371 /* 372 * Table of ND variables supported by udp. These are loaded into us_nd 373 * in udp_open. 374 * All of these are alterable, within the min/max values given, at run time. 375 */ 376 /* BEGIN CSTYLED */ 377 udpparam_t udp_param_arr[] = { 378 /*min max value name */ 379 { 0L, 256, 32, "udp_wroff_extra" }, 380 { 1L, 255, 255, "udp_ipv4_ttl" }, 381 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 382 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 383 { 0, 1, 1, "udp_do_checksum" }, 384 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 385 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 386 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 387 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 388 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 389 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 390 }; 391 /* END CSTYLED */ 392 393 /* Setable in /etc/system */ 394 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 395 uint32_t udp_random_anon_port = 1; 396 397 /* 398 * Hook functions to enable cluster networking. 399 * On non-clustered systems these vectors must always be NULL 400 */ 401 402 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 403 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 404 void *args) = NULL; 405 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 406 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 407 void *args) = NULL; 408 409 typedef union T_primitives *t_primp_t; 410 411 /* 412 * Return the next anonymous port in the privileged port range for 413 * bind checking. 414 * 415 * Trusted Extension (TX) notes: TX allows administrator to mark or 416 * reserve ports as Multilevel ports (MLP). MLP has special function 417 * on TX systems. Once a port is made MLP, it's not available as 418 * ordinary port. This creates "holes" in the port name space. It 419 * may be necessary to skip the "holes" find a suitable anon port. 420 */ 421 static in_port_t 422 udp_get_next_priv_port(udp_t *udp) 423 { 424 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 425 in_port_t nextport; 426 boolean_t restart = B_FALSE; 427 udp_stack_t *us = udp->udp_us; 428 429 retry: 430 if (next_priv_port < us->us_min_anonpriv_port || 431 next_priv_port >= IPPORT_RESERVED) { 432 next_priv_port = IPPORT_RESERVED - 1; 433 if (restart) 434 return (0); 435 restart = B_TRUE; 436 } 437 438 if (is_system_labeled() && 439 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 440 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 441 next_priv_port = nextport; 442 goto retry; 443 } 444 445 return (next_priv_port--); 446 } 447 448 /* 449 * Hash list removal routine for udp_t structures. 450 */ 451 static void 452 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 453 { 454 udp_t *udpnext; 455 kmutex_t *lockp; 456 udp_stack_t *us = udp->udp_us; 457 458 if (udp->udp_ptpbhn == NULL) 459 return; 460 461 /* 462 * Extract the lock pointer in case there are concurrent 463 * hash_remove's for this instance. 464 */ 465 ASSERT(udp->udp_port != 0); 466 if (!caller_holds_lock) { 467 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 468 us->us_bind_fanout_size)].uf_lock; 469 ASSERT(lockp != NULL); 470 mutex_enter(lockp); 471 } 472 if (udp->udp_ptpbhn != NULL) { 473 udpnext = udp->udp_bind_hash; 474 if (udpnext != NULL) { 475 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 476 udp->udp_bind_hash = NULL; 477 } 478 *udp->udp_ptpbhn = udpnext; 479 udp->udp_ptpbhn = NULL; 480 } 481 if (!caller_holds_lock) { 482 mutex_exit(lockp); 483 } 484 } 485 486 static void 487 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 488 { 489 udp_t **udpp; 490 udp_t *udpnext; 491 492 ASSERT(MUTEX_HELD(&uf->uf_lock)); 493 ASSERT(udp->udp_ptpbhn == NULL); 494 udpp = &uf->uf_udp; 495 udpnext = udpp[0]; 496 if (udpnext != NULL) { 497 /* 498 * If the new udp bound to the INADDR_ANY address 499 * and the first one in the list is not bound to 500 * INADDR_ANY we skip all entries until we find the 501 * first one bound to INADDR_ANY. 502 * This makes sure that applications binding to a 503 * specific address get preference over those binding to 504 * INADDR_ANY. 505 */ 506 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 507 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 508 while ((udpnext = udpp[0]) != NULL && 509 !V6_OR_V4_INADDR_ANY( 510 udpnext->udp_bound_v6src)) { 511 udpp = &(udpnext->udp_bind_hash); 512 } 513 if (udpnext != NULL) 514 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 515 } else { 516 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 517 } 518 } 519 udp->udp_bind_hash = udpnext; 520 udp->udp_ptpbhn = udpp; 521 udpp[0] = udp; 522 } 523 524 /* 525 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 526 * passed to udp_wput. 527 * It associates a port number and local address with the stream. 528 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 529 * protocol type (IPPROTO_UDP) placed in the message following the address. 530 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 531 * (Called as writer.) 532 * 533 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 534 * without setting SO_REUSEADDR. This is needed so that they 535 * can be viewed as two independent transport protocols. 536 * However, anonymouns ports are allocated from the same range to avoid 537 * duplicating the us->us_next_port_to_try. 538 */ 539 static void 540 udp_tpi_bind(queue_t *q, mblk_t *mp) 541 { 542 sin_t *sin; 543 sin6_t *sin6; 544 mblk_t *mp1; 545 struct T_bind_req *tbr; 546 conn_t *connp; 547 udp_t *udp; 548 int error; 549 struct sockaddr *sa; 550 cred_t *cr; 551 552 /* 553 * All Solaris components should pass a db_credp 554 * for this TPI message, hence we ASSERT. 555 * But in case there is some other M_PROTO that looks 556 * like a TPI message sent by some other kernel 557 * component, we check and return an error. 558 */ 559 cr = msg_getcred(mp, NULL); 560 ASSERT(cr != NULL); 561 if (cr == NULL) { 562 udp_err_ack(q, mp, TSYSERR, EINVAL); 563 return; 564 } 565 566 connp = Q_TO_CONN(q); 567 udp = connp->conn_udp; 568 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 569 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 570 "udp_bind: bad req, len %u", 571 (uint_t)(mp->b_wptr - mp->b_rptr)); 572 udp_err_ack(q, mp, TPROTO, 0); 573 return; 574 } 575 if (udp->udp_state != TS_UNBND) { 576 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 577 "udp_bind: bad state, %u", udp->udp_state); 578 udp_err_ack(q, mp, TOUTSTATE, 0); 579 return; 580 } 581 /* 582 * Reallocate the message to make sure we have enough room for an 583 * address and the protocol type. 584 */ 585 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 586 if (!mp1) { 587 udp_err_ack(q, mp, TSYSERR, ENOMEM); 588 return; 589 } 590 591 mp = mp1; 592 593 /* Reset the message type in preparation for shipping it back. */ 594 DB_TYPE(mp) = M_PCPROTO; 595 596 tbr = (struct T_bind_req *)mp->b_rptr; 597 switch (tbr->ADDR_length) { 598 case 0: /* Request for a generic port */ 599 tbr->ADDR_offset = sizeof (struct T_bind_req); 600 if (udp->udp_family == AF_INET) { 601 tbr->ADDR_length = sizeof (sin_t); 602 sin = (sin_t *)&tbr[1]; 603 *sin = sin_null; 604 sin->sin_family = AF_INET; 605 mp->b_wptr = (uchar_t *)&sin[1]; 606 sa = (struct sockaddr *)sin; 607 } else { 608 ASSERT(udp->udp_family == AF_INET6); 609 tbr->ADDR_length = sizeof (sin6_t); 610 sin6 = (sin6_t *)&tbr[1]; 611 *sin6 = sin6_null; 612 sin6->sin6_family = AF_INET6; 613 mp->b_wptr = (uchar_t *)&sin6[1]; 614 sa = (struct sockaddr *)sin6; 615 } 616 break; 617 618 case sizeof (sin_t): /* Complete IPv4 address */ 619 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 620 sizeof (sin_t)); 621 if (sa == NULL || !OK_32PTR((char *)sa)) { 622 udp_err_ack(q, mp, TSYSERR, EINVAL); 623 return; 624 } 625 if (udp->udp_family != AF_INET || 626 sa->sa_family != AF_INET) { 627 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 628 return; 629 } 630 break; 631 632 case sizeof (sin6_t): /* complete IPv6 address */ 633 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 634 sizeof (sin6_t)); 635 if (sa == NULL || !OK_32PTR((char *)sa)) { 636 udp_err_ack(q, mp, TSYSERR, EINVAL); 637 return; 638 } 639 if (udp->udp_family != AF_INET6 || 640 sa->sa_family != AF_INET6) { 641 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 642 return; 643 } 644 break; 645 646 default: /* Invalid request */ 647 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 648 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 649 udp_err_ack(q, mp, TBADADDR, 0); 650 return; 651 } 652 653 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 654 tbr->PRIM_type != O_T_BIND_REQ); 655 656 if (error != 0) { 657 if (error > 0) { 658 udp_err_ack(q, mp, TSYSERR, error); 659 } else { 660 udp_err_ack(q, mp, -error, 0); 661 } 662 } else { 663 tbr->PRIM_type = T_BIND_ACK; 664 qreply(q, mp); 665 } 666 } 667 668 /* 669 * This routine handles each T_CONN_REQ message passed to udp. It 670 * associates a default destination address with the stream. 671 * 672 * This routine sends down a T_BIND_REQ to IP with the following mblks: 673 * T_BIND_REQ - specifying local and remote address/port 674 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 675 * T_OK_ACK - for the T_CONN_REQ 676 * T_CONN_CON - to keep the TPI user happy 677 * 678 * The connect completes in udp_do_connect. 679 * When a T_BIND_ACK is received information is extracted from the IRE 680 * and the two appended messages are sent to the TPI user. 681 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 682 * convert it to an error ack for the appropriate primitive. 683 */ 684 static void 685 udp_tpi_connect(queue_t *q, mblk_t *mp) 686 { 687 udp_t *udp; 688 conn_t *connp = Q_TO_CONN(q); 689 int error; 690 socklen_t len; 691 struct sockaddr *sa; 692 struct T_conn_req *tcr; 693 cred_t *cr; 694 695 /* 696 * All Solaris components should pass a db_credp 697 * for this TPI message, hence we ASSERT. 698 * But in case there is some other M_PROTO that looks 699 * like a TPI message sent by some other kernel 700 * component, we check and return an error. 701 */ 702 cr = msg_getcred(mp, NULL); 703 ASSERT(cr != NULL); 704 if (cr == NULL) { 705 udp_err_ack(q, mp, TSYSERR, EINVAL); 706 return; 707 } 708 709 udp = connp->conn_udp; 710 tcr = (struct T_conn_req *)mp->b_rptr; 711 712 /* A bit of sanity checking */ 713 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 714 udp_err_ack(q, mp, TPROTO, 0); 715 return; 716 } 717 718 if (tcr->OPT_length != 0) { 719 udp_err_ack(q, mp, TBADOPT, 0); 720 return; 721 } 722 723 /* 724 * Determine packet type based on type of address passed in 725 * the request should contain an IPv4 or IPv6 address. 726 * Make sure that address family matches the type of 727 * family of the the address passed down 728 */ 729 len = tcr->DEST_length; 730 switch (tcr->DEST_length) { 731 default: 732 udp_err_ack(q, mp, TBADADDR, 0); 733 return; 734 735 case sizeof (sin_t): 736 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 737 sizeof (sin_t)); 738 break; 739 740 case sizeof (sin6_t): 741 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 742 sizeof (sin6_t)); 743 break; 744 } 745 746 error = proto_verify_ip_addr(udp->udp_family, sa, len); 747 if (error != 0) { 748 udp_err_ack(q, mp, TSYSERR, error); 749 return; 750 } 751 752 error = udp_do_connect(connp, sa, len, cr); 753 if (error != 0) { 754 if (error < 0) 755 udp_err_ack(q, mp, -error, 0); 756 else 757 udp_err_ack(q, mp, TSYSERR, error); 758 } else { 759 mblk_t *mp1; 760 /* 761 * We have to send a connection confirmation to 762 * keep TLI happy. 763 */ 764 if (udp->udp_family == AF_INET) { 765 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 766 sizeof (sin_t), NULL, 0); 767 } else { 768 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 769 sizeof (sin6_t), NULL, 0); 770 } 771 if (mp1 == NULL) { 772 udp_err_ack(q, mp, TSYSERR, ENOMEM); 773 return; 774 } 775 776 /* 777 * Send ok_ack for T_CONN_REQ 778 */ 779 mp = mi_tpi_ok_ack_alloc(mp); 780 if (mp == NULL) { 781 /* Unable to reuse the T_CONN_REQ for the ack. */ 782 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 783 return; 784 } 785 786 putnext(connp->conn_rq, mp); 787 putnext(connp->conn_rq, mp1); 788 } 789 } 790 791 static int 792 udp_tpi_close(queue_t *q, int flags) 793 { 794 conn_t *connp; 795 796 if (flags & SO_FALLBACK) { 797 /* 798 * stream is being closed while in fallback 799 * simply free the resources that were allocated 800 */ 801 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 802 qprocsoff(q); 803 goto done; 804 } 805 806 connp = Q_TO_CONN(q); 807 udp_do_close(connp); 808 done: 809 q->q_ptr = WR(q)->q_ptr = NULL; 810 return (0); 811 } 812 813 /* 814 * Called in the close path to quiesce the conn 815 */ 816 void 817 udp_quiesce_conn(conn_t *connp) 818 { 819 udp_t *udp = connp->conn_udp; 820 821 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 822 /* 823 * Running in cluster mode - register unbind information 824 */ 825 if (udp->udp_ipversion == IPV4_VERSION) { 826 (*cl_inet_unbind)( 827 connp->conn_netstack->netstack_stackid, 828 IPPROTO_UDP, AF_INET, 829 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 830 (in_port_t)udp->udp_port, NULL); 831 } else { 832 (*cl_inet_unbind)( 833 connp->conn_netstack->netstack_stackid, 834 IPPROTO_UDP, AF_INET6, 835 (uint8_t *)(&(udp->udp_v6src)), 836 (in_port_t)udp->udp_port, NULL); 837 } 838 } 839 840 udp_bind_hash_remove(udp, B_FALSE); 841 842 } 843 844 void 845 udp_close_free(conn_t *connp) 846 { 847 udp_t *udp = connp->conn_udp; 848 849 /* If there are any options associated with the stream, free them. */ 850 if (udp->udp_ip_snd_options != NULL) { 851 mi_free((char *)udp->udp_ip_snd_options); 852 udp->udp_ip_snd_options = NULL; 853 udp->udp_ip_snd_options_len = 0; 854 } 855 856 if (udp->udp_ip_rcv_options != NULL) { 857 mi_free((char *)udp->udp_ip_rcv_options); 858 udp->udp_ip_rcv_options = NULL; 859 udp->udp_ip_rcv_options_len = 0; 860 } 861 862 /* Free memory associated with sticky options */ 863 if (udp->udp_sticky_hdrs_len != 0) { 864 kmem_free(udp->udp_sticky_hdrs, 865 udp->udp_sticky_hdrs_len); 866 udp->udp_sticky_hdrs = NULL; 867 udp->udp_sticky_hdrs_len = 0; 868 } 869 if (udp->udp_last_cred != NULL) { 870 crfree(udp->udp_last_cred); 871 udp->udp_last_cred = NULL; 872 } 873 if (udp->udp_effective_cred != NULL) { 874 crfree(udp->udp_effective_cred); 875 udp->udp_effective_cred = NULL; 876 } 877 878 ip6_pkt_free(&udp->udp_sticky_ipp); 879 880 /* 881 * Clear any fields which the kmem_cache constructor clears. 882 * Only udp_connp needs to be preserved. 883 * TBD: We should make this more efficient to avoid clearing 884 * everything. 885 */ 886 ASSERT(udp->udp_connp == connp); 887 bzero(udp, sizeof (udp_t)); 888 udp->udp_connp = connp; 889 } 890 891 static int 892 udp_do_disconnect(conn_t *connp) 893 { 894 udp_t *udp; 895 mblk_t *ire_mp; 896 udp_fanout_t *udpf; 897 udp_stack_t *us; 898 int error; 899 900 udp = connp->conn_udp; 901 us = udp->udp_us; 902 rw_enter(&udp->udp_rwlock, RW_WRITER); 903 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 904 rw_exit(&udp->udp_rwlock); 905 return (-TOUTSTATE); 906 } 907 udp->udp_pending_op = T_DISCON_REQ; 908 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 909 us->us_bind_fanout_size)]; 910 mutex_enter(&udpf->uf_lock); 911 udp->udp_v6src = udp->udp_bound_v6src; 912 udp->udp_state = TS_IDLE; 913 mutex_exit(&udpf->uf_lock); 914 915 if (udp->udp_family == AF_INET6) { 916 /* Rebuild the header template */ 917 error = udp_build_hdrs(udp); 918 if (error != 0) { 919 udp->udp_pending_op = -1; 920 rw_exit(&udp->udp_rwlock); 921 return (error); 922 } 923 } 924 925 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 926 if (ire_mp == NULL) { 927 mutex_enter(&udpf->uf_lock); 928 udp->udp_pending_op = -1; 929 mutex_exit(&udpf->uf_lock); 930 rw_exit(&udp->udp_rwlock); 931 return (ENOMEM); 932 } 933 934 rw_exit(&udp->udp_rwlock); 935 936 if (udp->udp_family == AF_INET6) { 937 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 938 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 939 } else { 940 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 941 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 942 } 943 944 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 945 } 946 947 948 static void 949 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 950 { 951 conn_t *connp = Q_TO_CONN(q); 952 int error; 953 954 /* 955 * Allocate the largest primitive we need to send back 956 * T_error_ack is > than T_ok_ack 957 */ 958 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 959 if (mp == NULL) { 960 /* Unable to reuse the T_DISCON_REQ for the ack. */ 961 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 962 return; 963 } 964 965 error = udp_do_disconnect(connp); 966 967 if (error != 0) { 968 if (error < 0) { 969 udp_err_ack(q, mp, -error, 0); 970 } else { 971 udp_err_ack(q, mp, TSYSERR, error); 972 } 973 } else { 974 mp = mi_tpi_ok_ack_alloc(mp); 975 ASSERT(mp != NULL); 976 qreply(q, mp); 977 } 978 } 979 980 int 981 udp_disconnect(conn_t *connp) 982 { 983 int error; 984 udp_t *udp = connp->conn_udp; 985 986 udp->udp_dgram_errind = B_FALSE; 987 988 error = udp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 993 return (error); 994 } 995 996 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 997 static void 998 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 999 { 1000 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1001 qreply(q, mp); 1002 } 1003 1004 /* Shorthand to generate and send TPI error acks to our client */ 1005 static void 1006 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1007 int sys_error) 1008 { 1009 struct T_error_ack *teackp; 1010 1011 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1012 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1013 teackp = (struct T_error_ack *)mp->b_rptr; 1014 teackp->ERROR_prim = primitive; 1015 teackp->TLI_error = t_error; 1016 teackp->UNIX_error = sys_error; 1017 qreply(q, mp); 1018 } 1019 } 1020 1021 /*ARGSUSED*/ 1022 static int 1023 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1024 { 1025 int i; 1026 udp_t *udp = Q_TO_UDP(q); 1027 udp_stack_t *us = udp->udp_us; 1028 1029 for (i = 0; i < us->us_num_epriv_ports; i++) { 1030 if (us->us_epriv_ports[i] != 0) 1031 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1032 } 1033 return (0); 1034 } 1035 1036 /* ARGSUSED */ 1037 static int 1038 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1039 cred_t *cr) 1040 { 1041 long new_value; 1042 int i; 1043 udp_t *udp = Q_TO_UDP(q); 1044 udp_stack_t *us = udp->udp_us; 1045 1046 /* 1047 * Fail the request if the new value does not lie within the 1048 * port number limits. 1049 */ 1050 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1051 new_value <= 0 || new_value >= 65536) { 1052 return (EINVAL); 1053 } 1054 1055 /* Check if the value is already in the list */ 1056 for (i = 0; i < us->us_num_epriv_ports; i++) { 1057 if (new_value == us->us_epriv_ports[i]) { 1058 return (EEXIST); 1059 } 1060 } 1061 /* Find an empty slot */ 1062 for (i = 0; i < us->us_num_epriv_ports; i++) { 1063 if (us->us_epriv_ports[i] == 0) 1064 break; 1065 } 1066 if (i == us->us_num_epriv_ports) { 1067 return (EOVERFLOW); 1068 } 1069 1070 /* Set the new value */ 1071 us->us_epriv_ports[i] = (in_port_t)new_value; 1072 return (0); 1073 } 1074 1075 /* ARGSUSED */ 1076 static int 1077 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1078 cred_t *cr) 1079 { 1080 long new_value; 1081 int i; 1082 udp_t *udp = Q_TO_UDP(q); 1083 udp_stack_t *us = udp->udp_us; 1084 1085 /* 1086 * Fail the request if the new value does not lie within the 1087 * port number limits. 1088 */ 1089 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1090 new_value <= 0 || new_value >= 65536) { 1091 return (EINVAL); 1092 } 1093 1094 /* Check that the value is already in the list */ 1095 for (i = 0; i < us->us_num_epriv_ports; i++) { 1096 if (us->us_epriv_ports[i] == new_value) 1097 break; 1098 } 1099 if (i == us->us_num_epriv_ports) { 1100 return (ESRCH); 1101 } 1102 1103 /* Clear the value */ 1104 us->us_epriv_ports[i] = 0; 1105 return (0); 1106 } 1107 1108 /* At minimum we need 4 bytes of UDP header */ 1109 #define ICMP_MIN_UDP_HDR 4 1110 1111 /* 1112 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1113 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1114 * Assumes that IP has pulled up everything up to and including the ICMP header. 1115 */ 1116 static void 1117 udp_icmp_error(conn_t *connp, mblk_t *mp) 1118 { 1119 icmph_t *icmph; 1120 ipha_t *ipha; 1121 int iph_hdr_length; 1122 udpha_t *udpha; 1123 sin_t sin; 1124 sin6_t sin6; 1125 mblk_t *mp1; 1126 int error = 0; 1127 udp_t *udp = connp->conn_udp; 1128 1129 mp1 = NULL; 1130 ipha = (ipha_t *)mp->b_rptr; 1131 1132 ASSERT(OK_32PTR(mp->b_rptr)); 1133 1134 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1135 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1136 udp_icmp_error_ipv6(connp, mp); 1137 return; 1138 } 1139 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1140 1141 /* Skip past the outer IP and ICMP headers */ 1142 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1143 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1144 ipha = (ipha_t *)&icmph[1]; 1145 1146 /* Skip past the inner IP and find the ULP header */ 1147 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1148 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1149 1150 switch (icmph->icmph_type) { 1151 case ICMP_DEST_UNREACHABLE: 1152 switch (icmph->icmph_code) { 1153 case ICMP_FRAGMENTATION_NEEDED: 1154 /* 1155 * IP has already adjusted the path MTU. 1156 */ 1157 break; 1158 case ICMP_PORT_UNREACHABLE: 1159 case ICMP_PROTOCOL_UNREACHABLE: 1160 error = ECONNREFUSED; 1161 break; 1162 default: 1163 /* Transient errors */ 1164 break; 1165 } 1166 break; 1167 default: 1168 /* Transient errors */ 1169 break; 1170 } 1171 if (error == 0) { 1172 freemsg(mp); 1173 return; 1174 } 1175 1176 /* 1177 * Deliver T_UDERROR_IND when the application has asked for it. 1178 * The socket layer enables this automatically when connected. 1179 */ 1180 if (!udp->udp_dgram_errind) { 1181 freemsg(mp); 1182 return; 1183 } 1184 1185 1186 switch (udp->udp_family) { 1187 case AF_INET: 1188 sin = sin_null; 1189 sin.sin_family = AF_INET; 1190 sin.sin_addr.s_addr = ipha->ipha_dst; 1191 sin.sin_port = udpha->uha_dst_port; 1192 if (IPCL_IS_NONSTR(connp)) { 1193 rw_enter(&udp->udp_rwlock, RW_WRITER); 1194 if (udp->udp_state == TS_DATA_XFER) { 1195 if (sin.sin_port == udp->udp_dstport && 1196 sin.sin_addr.s_addr == 1197 V4_PART_OF_V6(udp->udp_v6dst)) { 1198 rw_exit(&udp->udp_rwlock); 1199 (*connp->conn_upcalls->su_set_error) 1200 (connp->conn_upper_handle, error); 1201 goto done; 1202 } 1203 } else { 1204 udp->udp_delayed_error = error; 1205 *((sin_t *)&udp->udp_delayed_addr) = sin; 1206 } 1207 rw_exit(&udp->udp_rwlock); 1208 } else { 1209 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1210 NULL, 0, error); 1211 } 1212 break; 1213 case AF_INET6: 1214 sin6 = sin6_null; 1215 sin6.sin6_family = AF_INET6; 1216 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1217 sin6.sin6_port = udpha->uha_dst_port; 1218 if (IPCL_IS_NONSTR(connp)) { 1219 rw_enter(&udp->udp_rwlock, RW_WRITER); 1220 if (udp->udp_state == TS_DATA_XFER) { 1221 if (sin6.sin6_port == udp->udp_dstport && 1222 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1223 &udp->udp_v6dst)) { 1224 rw_exit(&udp->udp_rwlock); 1225 (*connp->conn_upcalls->su_set_error) 1226 (connp->conn_upper_handle, error); 1227 goto done; 1228 } 1229 } else { 1230 udp->udp_delayed_error = error; 1231 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1232 } 1233 rw_exit(&udp->udp_rwlock); 1234 } else { 1235 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1236 NULL, 0, error); 1237 } 1238 break; 1239 } 1240 if (mp1 != NULL) 1241 putnext(connp->conn_rq, mp1); 1242 done: 1243 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1244 freemsg(mp); 1245 } 1246 1247 /* 1248 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1249 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1250 * Assumes that IP has pulled up all the extension headers as well as the 1251 * ICMPv6 header. 1252 */ 1253 static void 1254 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1255 { 1256 icmp6_t *icmp6; 1257 ip6_t *ip6h, *outer_ip6h; 1258 uint16_t iph_hdr_length; 1259 uint8_t *nexthdrp; 1260 udpha_t *udpha; 1261 sin6_t sin6; 1262 mblk_t *mp1; 1263 int error = 0; 1264 udp_t *udp = connp->conn_udp; 1265 udp_stack_t *us = udp->udp_us; 1266 1267 outer_ip6h = (ip6_t *)mp->b_rptr; 1268 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1269 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1270 else 1271 iph_hdr_length = IPV6_HDR_LEN; 1272 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1273 ip6h = (ip6_t *)&icmp6[1]; 1274 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1275 freemsg(mp); 1276 return; 1277 } 1278 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1279 1280 switch (icmp6->icmp6_type) { 1281 case ICMP6_DST_UNREACH: 1282 switch (icmp6->icmp6_code) { 1283 case ICMP6_DST_UNREACH_NOPORT: 1284 error = ECONNREFUSED; 1285 break; 1286 case ICMP6_DST_UNREACH_ADMIN: 1287 case ICMP6_DST_UNREACH_NOROUTE: 1288 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1289 case ICMP6_DST_UNREACH_ADDR: 1290 /* Transient errors */ 1291 break; 1292 default: 1293 break; 1294 } 1295 break; 1296 case ICMP6_PACKET_TOO_BIG: { 1297 struct T_unitdata_ind *tudi; 1298 struct T_opthdr *toh; 1299 size_t udi_size; 1300 mblk_t *newmp; 1301 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1302 sizeof (struct ip6_mtuinfo); 1303 sin6_t *sin6; 1304 struct ip6_mtuinfo *mtuinfo; 1305 1306 /* 1307 * If the application has requested to receive path mtu 1308 * information, send up an empty message containing an 1309 * IPV6_PATHMTU ancillary data item. 1310 */ 1311 if (!udp->udp_ipv6_recvpathmtu) 1312 break; 1313 1314 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1315 opt_length; 1316 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1317 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1318 break; 1319 } 1320 1321 /* 1322 * newmp->b_cont is left to NULL on purpose. This is an 1323 * empty message containing only ancillary data. 1324 */ 1325 newmp->b_datap->db_type = M_PROTO; 1326 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1327 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1328 tudi->PRIM_type = T_UNITDATA_IND; 1329 tudi->SRC_length = sizeof (sin6_t); 1330 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1331 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1332 tudi->OPT_length = opt_length; 1333 1334 sin6 = (sin6_t *)&tudi[1]; 1335 bzero(sin6, sizeof (sin6_t)); 1336 sin6->sin6_family = AF_INET6; 1337 sin6->sin6_addr = udp->udp_v6dst; 1338 1339 toh = (struct T_opthdr *)&sin6[1]; 1340 toh->level = IPPROTO_IPV6; 1341 toh->name = IPV6_PATHMTU; 1342 toh->len = opt_length; 1343 toh->status = 0; 1344 1345 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1346 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1347 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1348 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1349 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1350 /* 1351 * We've consumed everything we need from the original 1352 * message. Free it, then send our empty message. 1353 */ 1354 freemsg(mp); 1355 udp_ulp_recv(connp, newmp); 1356 1357 return; 1358 } 1359 case ICMP6_TIME_EXCEEDED: 1360 /* Transient errors */ 1361 break; 1362 case ICMP6_PARAM_PROB: 1363 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1364 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1365 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1366 (uchar_t *)nexthdrp) { 1367 error = ECONNREFUSED; 1368 break; 1369 } 1370 break; 1371 } 1372 if (error == 0) { 1373 freemsg(mp); 1374 return; 1375 } 1376 1377 /* 1378 * Deliver T_UDERROR_IND when the application has asked for it. 1379 * The socket layer enables this automatically when connected. 1380 */ 1381 if (!udp->udp_dgram_errind) { 1382 freemsg(mp); 1383 return; 1384 } 1385 1386 sin6 = sin6_null; 1387 sin6.sin6_family = AF_INET6; 1388 sin6.sin6_addr = ip6h->ip6_dst; 1389 sin6.sin6_port = udpha->uha_dst_port; 1390 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1391 1392 if (IPCL_IS_NONSTR(connp)) { 1393 rw_enter(&udp->udp_rwlock, RW_WRITER); 1394 if (udp->udp_state == TS_DATA_XFER) { 1395 if (sin6.sin6_port == udp->udp_dstport && 1396 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1397 &udp->udp_v6dst)) { 1398 rw_exit(&udp->udp_rwlock); 1399 (*connp->conn_upcalls->su_set_error) 1400 (connp->conn_upper_handle, error); 1401 goto done; 1402 } 1403 } else { 1404 udp->udp_delayed_error = error; 1405 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1406 } 1407 rw_exit(&udp->udp_rwlock); 1408 } else { 1409 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1410 NULL, 0, error); 1411 if (mp1 != NULL) 1412 putnext(connp->conn_rq, mp1); 1413 } 1414 done: 1415 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1416 freemsg(mp); 1417 } 1418 1419 /* 1420 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1421 * The local address is filled in if endpoint is bound. The remote address 1422 * is filled in if remote address has been precified ("connected endpoint") 1423 * (The concept of connected CLTS sockets is alien to published TPI 1424 * but we support it anyway). 1425 */ 1426 static void 1427 udp_addr_req(queue_t *q, mblk_t *mp) 1428 { 1429 sin_t *sin; 1430 sin6_t *sin6; 1431 mblk_t *ackmp; 1432 struct T_addr_ack *taa; 1433 udp_t *udp = Q_TO_UDP(q); 1434 1435 /* Make it large enough for worst case */ 1436 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1437 2 * sizeof (sin6_t), 1); 1438 if (ackmp == NULL) { 1439 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1440 return; 1441 } 1442 taa = (struct T_addr_ack *)ackmp->b_rptr; 1443 1444 bzero(taa, sizeof (struct T_addr_ack)); 1445 ackmp->b_wptr = (uchar_t *)&taa[1]; 1446 1447 taa->PRIM_type = T_ADDR_ACK; 1448 ackmp->b_datap->db_type = M_PCPROTO; 1449 rw_enter(&udp->udp_rwlock, RW_READER); 1450 /* 1451 * Note: Following code assumes 32 bit alignment of basic 1452 * data structures like sin_t and struct T_addr_ack. 1453 */ 1454 if (udp->udp_state != TS_UNBND) { 1455 /* 1456 * Fill in local address first 1457 */ 1458 taa->LOCADDR_offset = sizeof (*taa); 1459 if (udp->udp_family == AF_INET) { 1460 taa->LOCADDR_length = sizeof (sin_t); 1461 sin = (sin_t *)&taa[1]; 1462 /* Fill zeroes and then initialize non-zero fields */ 1463 *sin = sin_null; 1464 sin->sin_family = AF_INET; 1465 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1466 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1467 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1468 sin->sin_addr.s_addr); 1469 } else { 1470 /* 1471 * INADDR_ANY 1472 * udp_v6src is not set, we might be bound to 1473 * broadcast/multicast. Use udp_bound_v6src as 1474 * local address instead (that could 1475 * also still be INADDR_ANY) 1476 */ 1477 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1478 sin->sin_addr.s_addr); 1479 } 1480 sin->sin_port = udp->udp_port; 1481 ackmp->b_wptr = (uchar_t *)&sin[1]; 1482 if (udp->udp_state == TS_DATA_XFER) { 1483 /* 1484 * connected, fill remote address too 1485 */ 1486 taa->REMADDR_length = sizeof (sin_t); 1487 /* assumed 32-bit alignment */ 1488 taa->REMADDR_offset = taa->LOCADDR_offset + 1489 taa->LOCADDR_length; 1490 1491 sin = (sin_t *)(ackmp->b_rptr + 1492 taa->REMADDR_offset); 1493 /* initialize */ 1494 *sin = sin_null; 1495 sin->sin_family = AF_INET; 1496 sin->sin_addr.s_addr = 1497 V4_PART_OF_V6(udp->udp_v6dst); 1498 sin->sin_port = udp->udp_dstport; 1499 ackmp->b_wptr = (uchar_t *)&sin[1]; 1500 } 1501 } else { 1502 taa->LOCADDR_length = sizeof (sin6_t); 1503 sin6 = (sin6_t *)&taa[1]; 1504 /* Fill zeroes and then initialize non-zero fields */ 1505 *sin6 = sin6_null; 1506 sin6->sin6_family = AF_INET6; 1507 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1508 sin6->sin6_addr = udp->udp_v6src; 1509 } else { 1510 /* 1511 * UNSPECIFIED 1512 * udp_v6src is not set, we might be bound to 1513 * broadcast/multicast. Use udp_bound_v6src as 1514 * local address instead (that could 1515 * also still be UNSPECIFIED) 1516 */ 1517 sin6->sin6_addr = 1518 udp->udp_bound_v6src; 1519 } 1520 sin6->sin6_port = udp->udp_port; 1521 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1522 if (udp->udp_state == TS_DATA_XFER) { 1523 /* 1524 * connected, fill remote address too 1525 */ 1526 taa->REMADDR_length = sizeof (sin6_t); 1527 /* assumed 32-bit alignment */ 1528 taa->REMADDR_offset = taa->LOCADDR_offset + 1529 taa->LOCADDR_length; 1530 1531 sin6 = (sin6_t *)(ackmp->b_rptr + 1532 taa->REMADDR_offset); 1533 /* initialize */ 1534 *sin6 = sin6_null; 1535 sin6->sin6_family = AF_INET6; 1536 sin6->sin6_addr = udp->udp_v6dst; 1537 sin6->sin6_port = udp->udp_dstport; 1538 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1539 } 1540 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1541 } 1542 } 1543 rw_exit(&udp->udp_rwlock); 1544 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1545 qreply(q, ackmp); 1546 } 1547 1548 static void 1549 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1550 { 1551 if (udp->udp_family == AF_INET) { 1552 *tap = udp_g_t_info_ack_ipv4; 1553 } else { 1554 *tap = udp_g_t_info_ack_ipv6; 1555 } 1556 tap->CURRENT_state = udp->udp_state; 1557 tap->OPT_size = udp_max_optsize; 1558 } 1559 1560 static void 1561 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1562 t_uscalar_t cap_bits1) 1563 { 1564 tcap->CAP_bits1 = 0; 1565 1566 if (cap_bits1 & TC1_INFO) { 1567 udp_copy_info(&tcap->INFO_ack, udp); 1568 tcap->CAP_bits1 |= TC1_INFO; 1569 } 1570 } 1571 1572 /* 1573 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1574 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1575 * udp_g_t_info_ack. The current state of the stream is copied from 1576 * udp_state. 1577 */ 1578 static void 1579 udp_capability_req(queue_t *q, mblk_t *mp) 1580 { 1581 t_uscalar_t cap_bits1; 1582 struct T_capability_ack *tcap; 1583 udp_t *udp = Q_TO_UDP(q); 1584 1585 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1586 1587 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1588 mp->b_datap->db_type, T_CAPABILITY_ACK); 1589 if (!mp) 1590 return; 1591 1592 tcap = (struct T_capability_ack *)mp->b_rptr; 1593 udp_do_capability_ack(udp, tcap, cap_bits1); 1594 1595 qreply(q, mp); 1596 } 1597 1598 /* 1599 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1600 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1601 * The current state of the stream is copied from udp_state. 1602 */ 1603 static void 1604 udp_info_req(queue_t *q, mblk_t *mp) 1605 { 1606 udp_t *udp = Q_TO_UDP(q); 1607 1608 /* Create a T_INFO_ACK message. */ 1609 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1610 T_INFO_ACK); 1611 if (!mp) 1612 return; 1613 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1614 qreply(q, mp); 1615 } 1616 1617 /* For /dev/udp aka AF_INET open */ 1618 static int 1619 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1620 { 1621 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1622 } 1623 1624 /* For /dev/udp6 aka AF_INET6 open */ 1625 static int 1626 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1627 { 1628 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1629 } 1630 1631 /* 1632 * This is the open routine for udp. It allocates a udp_t structure for 1633 * the stream and, on the first open of the module, creates an ND table. 1634 */ 1635 /*ARGSUSED2*/ 1636 static int 1637 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1638 boolean_t isv6) 1639 { 1640 int error; 1641 udp_t *udp; 1642 conn_t *connp; 1643 dev_t conn_dev; 1644 udp_stack_t *us; 1645 vmem_t *minor_arena; 1646 1647 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1648 1649 /* If the stream is already open, return immediately. */ 1650 if (q->q_ptr != NULL) 1651 return (0); 1652 1653 if (sflag == MODOPEN) 1654 return (EINVAL); 1655 1656 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1657 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1658 minor_arena = ip_minor_arena_la; 1659 } else { 1660 /* 1661 * Either minor numbers in the large arena were exhausted 1662 * or a non socket application is doing the open. 1663 * Try to allocate from the small arena. 1664 */ 1665 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1666 return (EBUSY); 1667 1668 minor_arena = ip_minor_arena_sa; 1669 } 1670 1671 if (flag & SO_FALLBACK) { 1672 /* 1673 * Non streams socket needs a stream to fallback to 1674 */ 1675 RD(q)->q_ptr = (void *)conn_dev; 1676 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1677 WR(q)->q_ptr = (void *)minor_arena; 1678 qprocson(q); 1679 return (0); 1680 } 1681 1682 connp = udp_do_open(credp, isv6, KM_SLEEP); 1683 if (connp == NULL) { 1684 inet_minor_free(minor_arena, conn_dev); 1685 return (ENOMEM); 1686 } 1687 udp = connp->conn_udp; 1688 us = udp->udp_us; 1689 1690 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1691 connp->conn_dev = conn_dev; 1692 connp->conn_minor_arena = minor_arena; 1693 1694 /* 1695 * Initialize the udp_t structure for this stream. 1696 */ 1697 q->q_ptr = connp; 1698 WR(q)->q_ptr = connp; 1699 connp->conn_rq = q; 1700 connp->conn_wq = WR(q); 1701 1702 rw_enter(&udp->udp_rwlock, RW_WRITER); 1703 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1704 ASSERT(connp->conn_udp == udp); 1705 ASSERT(udp->udp_connp == connp); 1706 1707 if (flag & SO_SOCKSTR) { 1708 connp->conn_flags |= IPCL_SOCKET; 1709 udp->udp_issocket = B_TRUE; 1710 } 1711 1712 q->q_hiwat = us->us_recv_hiwat; 1713 WR(q)->q_hiwat = us->us_xmit_hiwat; 1714 WR(q)->q_lowat = us->us_xmit_lowat; 1715 1716 qprocson(q); 1717 1718 if (udp->udp_family == AF_INET6) { 1719 /* Build initial header template for transmit */ 1720 if ((error = udp_build_hdrs(udp)) != 0) { 1721 rw_exit(&udp->udp_rwlock); 1722 qprocsoff(q); 1723 inet_minor_free(minor_arena, conn_dev); 1724 ipcl_conn_destroy(connp); 1725 return (error); 1726 } 1727 } 1728 rw_exit(&udp->udp_rwlock); 1729 1730 /* Set the Stream head write offset and high watermark. */ 1731 (void) proto_set_tx_wroff(q, connp, 1732 udp->udp_max_hdr_len + us->us_wroff_extra); 1733 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1734 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1735 1736 mutex_enter(&connp->conn_lock); 1737 connp->conn_state_flags &= ~CONN_INCIPIENT; 1738 mutex_exit(&connp->conn_lock); 1739 return (0); 1740 } 1741 1742 /* 1743 * Which UDP options OK to set through T_UNITDATA_REQ... 1744 */ 1745 /* ARGSUSED */ 1746 static boolean_t 1747 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1748 { 1749 return (B_TRUE); 1750 } 1751 1752 /* 1753 * This routine gets default values of certain options whose default 1754 * values are maintained by protcol specific code 1755 */ 1756 /* ARGSUSED */ 1757 int 1758 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1759 { 1760 udp_t *udp = Q_TO_UDP(q); 1761 udp_stack_t *us = udp->udp_us; 1762 int *i1 = (int *)ptr; 1763 1764 switch (level) { 1765 case IPPROTO_IP: 1766 switch (name) { 1767 case IP_MULTICAST_TTL: 1768 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1769 return (sizeof (uchar_t)); 1770 case IP_MULTICAST_LOOP: 1771 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1772 return (sizeof (uchar_t)); 1773 } 1774 break; 1775 case IPPROTO_IPV6: 1776 switch (name) { 1777 case IPV6_MULTICAST_HOPS: 1778 *i1 = IP_DEFAULT_MULTICAST_TTL; 1779 return (sizeof (int)); 1780 case IPV6_MULTICAST_LOOP: 1781 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1782 return (sizeof (int)); 1783 case IPV6_UNICAST_HOPS: 1784 *i1 = us->us_ipv6_hoplimit; 1785 return (sizeof (int)); 1786 } 1787 break; 1788 } 1789 return (-1); 1790 } 1791 1792 /* 1793 * This routine retrieves the current status of socket options. 1794 * It returns the size of the option retrieved. 1795 */ 1796 static int 1797 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1798 { 1799 udp_t *udp = connp->conn_udp; 1800 udp_stack_t *us = udp->udp_us; 1801 int *i1 = (int *)ptr; 1802 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1803 int len; 1804 1805 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1806 switch (level) { 1807 case SOL_SOCKET: 1808 switch (name) { 1809 case SO_DEBUG: 1810 *i1 = udp->udp_debug; 1811 break; /* goto sizeof (int) option return */ 1812 case SO_REUSEADDR: 1813 *i1 = udp->udp_reuseaddr; 1814 break; /* goto sizeof (int) option return */ 1815 case SO_TYPE: 1816 *i1 = SOCK_DGRAM; 1817 break; /* goto sizeof (int) option return */ 1818 1819 /* 1820 * The following three items are available here, 1821 * but are only meaningful to IP. 1822 */ 1823 case SO_DONTROUTE: 1824 *i1 = udp->udp_dontroute; 1825 break; /* goto sizeof (int) option return */ 1826 case SO_USELOOPBACK: 1827 *i1 = udp->udp_useloopback; 1828 break; /* goto sizeof (int) option return */ 1829 case SO_BROADCAST: 1830 *i1 = udp->udp_broadcast; 1831 break; /* goto sizeof (int) option return */ 1832 1833 case SO_SNDBUF: 1834 *i1 = udp->udp_xmit_hiwat; 1835 break; /* goto sizeof (int) option return */ 1836 case SO_RCVBUF: 1837 *i1 = udp->udp_rcv_disply_hiwat; 1838 break; /* goto sizeof (int) option return */ 1839 case SO_DGRAM_ERRIND: 1840 *i1 = udp->udp_dgram_errind; 1841 break; /* goto sizeof (int) option return */ 1842 case SO_RECVUCRED: 1843 *i1 = udp->udp_recvucred; 1844 break; /* goto sizeof (int) option return */ 1845 case SO_TIMESTAMP: 1846 *i1 = udp->udp_timestamp; 1847 break; /* goto sizeof (int) option return */ 1848 case SO_ANON_MLP: 1849 *i1 = connp->conn_anon_mlp; 1850 break; /* goto sizeof (int) option return */ 1851 case SO_MAC_EXEMPT: 1852 *i1 = connp->conn_mac_exempt; 1853 break; /* goto sizeof (int) option return */ 1854 case SO_ALLZONES: 1855 *i1 = connp->conn_allzones; 1856 break; /* goto sizeof (int) option return */ 1857 case SO_EXCLBIND: 1858 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 1859 break; 1860 case SO_PROTOTYPE: 1861 *i1 = IPPROTO_UDP; 1862 break; 1863 case SO_DOMAIN: 1864 *i1 = udp->udp_family; 1865 break; 1866 default: 1867 return (-1); 1868 } 1869 break; 1870 case IPPROTO_IP: 1871 if (udp->udp_family != AF_INET) 1872 return (-1); 1873 switch (name) { 1874 case IP_OPTIONS: 1875 case T_IP_OPTIONS: 1876 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 1877 if (len > 0) { 1878 bcopy(udp->udp_ip_rcv_options + 1879 udp->udp_label_len, ptr, len); 1880 } 1881 return (len); 1882 case IP_TOS: 1883 case T_IP_TOS: 1884 *i1 = (int)udp->udp_type_of_service; 1885 break; /* goto sizeof (int) option return */ 1886 case IP_TTL: 1887 *i1 = (int)udp->udp_ttl; 1888 break; /* goto sizeof (int) option return */ 1889 case IP_DHCPINIT_IF: 1890 return (-EINVAL); 1891 case IP_NEXTHOP: 1892 case IP_RECVPKTINFO: 1893 /* 1894 * This also handles IP_PKTINFO. 1895 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1896 * Differentiation is based on the size of the argument 1897 * passed in. 1898 * This option is handled in IP which will return an 1899 * error for IP_PKTINFO as it's not supported as a 1900 * sticky option. 1901 */ 1902 return (-EINVAL); 1903 case IP_MULTICAST_IF: 1904 /* 0 address if not set */ 1905 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 1906 return (sizeof (ipaddr_t)); 1907 case IP_MULTICAST_TTL: 1908 *(uchar_t *)ptr = udp->udp_multicast_ttl; 1909 return (sizeof (uchar_t)); 1910 case IP_MULTICAST_LOOP: 1911 *ptr = connp->conn_multicast_loop; 1912 return (sizeof (uint8_t)); 1913 case IP_RECVOPTS: 1914 *i1 = udp->udp_recvopts; 1915 break; /* goto sizeof (int) option return */ 1916 case IP_RECVDSTADDR: 1917 *i1 = udp->udp_recvdstaddr; 1918 break; /* goto sizeof (int) option return */ 1919 case IP_RECVIF: 1920 *i1 = udp->udp_recvif; 1921 break; /* goto sizeof (int) option return */ 1922 case IP_RECVSLLA: 1923 *i1 = udp->udp_recvslla; 1924 break; /* goto sizeof (int) option return */ 1925 case IP_RECVTTL: 1926 *i1 = udp->udp_recvttl; 1927 break; /* goto sizeof (int) option return */ 1928 case IP_ADD_MEMBERSHIP: 1929 case IP_DROP_MEMBERSHIP: 1930 case IP_BLOCK_SOURCE: 1931 case IP_UNBLOCK_SOURCE: 1932 case IP_ADD_SOURCE_MEMBERSHIP: 1933 case IP_DROP_SOURCE_MEMBERSHIP: 1934 case MCAST_JOIN_GROUP: 1935 case MCAST_LEAVE_GROUP: 1936 case MCAST_BLOCK_SOURCE: 1937 case MCAST_UNBLOCK_SOURCE: 1938 case MCAST_JOIN_SOURCE_GROUP: 1939 case MCAST_LEAVE_SOURCE_GROUP: 1940 /* cannot "get" the value for these */ 1941 return (-1); 1942 case IP_BOUND_IF: 1943 /* Zero if not set */ 1944 *i1 = udp->udp_bound_if; 1945 break; /* goto sizeof (int) option return */ 1946 case IP_UNSPEC_SRC: 1947 *i1 = udp->udp_unspec_source; 1948 break; /* goto sizeof (int) option return */ 1949 case IP_BROADCAST_TTL: 1950 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1951 return (sizeof (uchar_t)); 1952 default: 1953 return (-1); 1954 } 1955 break; 1956 case IPPROTO_IPV6: 1957 if (udp->udp_family != AF_INET6) 1958 return (-1); 1959 switch (name) { 1960 case IPV6_UNICAST_HOPS: 1961 *i1 = (unsigned int)udp->udp_ttl; 1962 break; /* goto sizeof (int) option return */ 1963 case IPV6_MULTICAST_IF: 1964 /* 0 index if not set */ 1965 *i1 = udp->udp_multicast_if_index; 1966 break; /* goto sizeof (int) option return */ 1967 case IPV6_MULTICAST_HOPS: 1968 *i1 = udp->udp_multicast_ttl; 1969 break; /* goto sizeof (int) option return */ 1970 case IPV6_MULTICAST_LOOP: 1971 *i1 = connp->conn_multicast_loop; 1972 break; /* goto sizeof (int) option return */ 1973 case IPV6_JOIN_GROUP: 1974 case IPV6_LEAVE_GROUP: 1975 case MCAST_JOIN_GROUP: 1976 case MCAST_LEAVE_GROUP: 1977 case MCAST_BLOCK_SOURCE: 1978 case MCAST_UNBLOCK_SOURCE: 1979 case MCAST_JOIN_SOURCE_GROUP: 1980 case MCAST_LEAVE_SOURCE_GROUP: 1981 /* cannot "get" the value for these */ 1982 return (-1); 1983 case IPV6_BOUND_IF: 1984 /* Zero if not set */ 1985 *i1 = udp->udp_bound_if; 1986 break; /* goto sizeof (int) option return */ 1987 case IPV6_UNSPEC_SRC: 1988 *i1 = udp->udp_unspec_source; 1989 break; /* goto sizeof (int) option return */ 1990 case IPV6_RECVPKTINFO: 1991 *i1 = udp->udp_ip_recvpktinfo; 1992 break; /* goto sizeof (int) option return */ 1993 case IPV6_RECVTCLASS: 1994 *i1 = udp->udp_ipv6_recvtclass; 1995 break; /* goto sizeof (int) option return */ 1996 case IPV6_RECVPATHMTU: 1997 *i1 = udp->udp_ipv6_recvpathmtu; 1998 break; /* goto sizeof (int) option return */ 1999 case IPV6_RECVHOPLIMIT: 2000 *i1 = udp->udp_ipv6_recvhoplimit; 2001 break; /* goto sizeof (int) option return */ 2002 case IPV6_RECVHOPOPTS: 2003 *i1 = udp->udp_ipv6_recvhopopts; 2004 break; /* goto sizeof (int) option return */ 2005 case IPV6_RECVDSTOPTS: 2006 *i1 = udp->udp_ipv6_recvdstopts; 2007 break; /* goto sizeof (int) option return */ 2008 case _OLD_IPV6_RECVDSTOPTS: 2009 *i1 = udp->udp_old_ipv6_recvdstopts; 2010 break; /* goto sizeof (int) option return */ 2011 case IPV6_RECVRTHDRDSTOPTS: 2012 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2013 break; /* goto sizeof (int) option return */ 2014 case IPV6_RECVRTHDR: 2015 *i1 = udp->udp_ipv6_recvrthdr; 2016 break; /* goto sizeof (int) option return */ 2017 case IPV6_PKTINFO: { 2018 /* XXX assumes that caller has room for max size! */ 2019 struct in6_pktinfo *pkti; 2020 2021 pkti = (struct in6_pktinfo *)ptr; 2022 if (ipp->ipp_fields & IPPF_IFINDEX) 2023 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2024 else 2025 pkti->ipi6_ifindex = 0; 2026 if (ipp->ipp_fields & IPPF_ADDR) 2027 pkti->ipi6_addr = ipp->ipp_addr; 2028 else 2029 pkti->ipi6_addr = ipv6_all_zeros; 2030 return (sizeof (struct in6_pktinfo)); 2031 } 2032 case IPV6_TCLASS: 2033 if (ipp->ipp_fields & IPPF_TCLASS) 2034 *i1 = ipp->ipp_tclass; 2035 else 2036 *i1 = IPV6_FLOW_TCLASS( 2037 IPV6_DEFAULT_VERS_AND_FLOW); 2038 break; /* goto sizeof (int) option return */ 2039 case IPV6_NEXTHOP: { 2040 sin6_t *sin6 = (sin6_t *)ptr; 2041 2042 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2043 return (0); 2044 *sin6 = sin6_null; 2045 sin6->sin6_family = AF_INET6; 2046 sin6->sin6_addr = ipp->ipp_nexthop; 2047 return (sizeof (sin6_t)); 2048 } 2049 case IPV6_HOPOPTS: 2050 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2051 return (0); 2052 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2053 return (0); 2054 /* 2055 * The cipso/label option is added by kernel. 2056 * User is not usually aware of this option. 2057 * We copy out the hbh opt after the label option. 2058 */ 2059 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2060 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2061 if (udp->udp_label_len_v6 > 0) { 2062 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2063 ptr[1] = (ipp->ipp_hopoptslen - 2064 udp->udp_label_len_v6 + 7) / 8 - 1; 2065 } 2066 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2067 case IPV6_RTHDRDSTOPTS: 2068 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2069 return (0); 2070 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2071 return (ipp->ipp_rtdstoptslen); 2072 case IPV6_RTHDR: 2073 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2074 return (0); 2075 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2076 return (ipp->ipp_rthdrlen); 2077 case IPV6_DSTOPTS: 2078 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2079 return (0); 2080 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2081 return (ipp->ipp_dstoptslen); 2082 case IPV6_PATHMTU: 2083 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2084 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2085 us->us_netstack)); 2086 default: 2087 return (-1); 2088 } 2089 break; 2090 case IPPROTO_UDP: 2091 switch (name) { 2092 case UDP_ANONPRIVBIND: 2093 *i1 = udp->udp_anon_priv_bind; 2094 break; 2095 case UDP_EXCLBIND: 2096 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2097 break; 2098 case UDP_RCVHDR: 2099 *i1 = udp->udp_rcvhdr ? 1 : 0; 2100 break; 2101 case UDP_NAT_T_ENDPOINT: 2102 *i1 = udp->udp_nat_t_endpoint; 2103 break; 2104 default: 2105 return (-1); 2106 } 2107 break; 2108 default: 2109 return (-1); 2110 } 2111 return (sizeof (int)); 2112 } 2113 2114 int 2115 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2116 { 2117 udp_t *udp; 2118 int err; 2119 2120 udp = Q_TO_UDP(q); 2121 2122 rw_enter(&udp->udp_rwlock, RW_READER); 2123 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2124 rw_exit(&udp->udp_rwlock); 2125 return (err); 2126 } 2127 2128 /* 2129 * This routine sets socket options. 2130 */ 2131 /* ARGSUSED */ 2132 static int 2133 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2134 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2135 void *thisdg_attrs, boolean_t checkonly) 2136 { 2137 udpattrs_t *attrs = thisdg_attrs; 2138 int *i1 = (int *)invalp; 2139 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2140 udp_t *udp = connp->conn_udp; 2141 udp_stack_t *us = udp->udp_us; 2142 int error; 2143 uint_t newlen; 2144 size_t sth_wroff; 2145 2146 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2147 /* 2148 * For fixed length options, no sanity check 2149 * of passed in length is done. It is assumed *_optcom_req() 2150 * routines do the right thing. 2151 */ 2152 switch (level) { 2153 case SOL_SOCKET: 2154 switch (name) { 2155 case SO_REUSEADDR: 2156 if (!checkonly) { 2157 udp->udp_reuseaddr = onoff; 2158 PASS_OPT_TO_IP(connp); 2159 } 2160 break; 2161 case SO_DEBUG: 2162 if (!checkonly) 2163 udp->udp_debug = onoff; 2164 break; 2165 /* 2166 * The following three items are available here, 2167 * but are only meaningful to IP. 2168 */ 2169 case SO_DONTROUTE: 2170 if (!checkonly) { 2171 udp->udp_dontroute = onoff; 2172 PASS_OPT_TO_IP(connp); 2173 } 2174 break; 2175 case SO_USELOOPBACK: 2176 if (!checkonly) { 2177 udp->udp_useloopback = onoff; 2178 PASS_OPT_TO_IP(connp); 2179 } 2180 break; 2181 case SO_BROADCAST: 2182 if (!checkonly) { 2183 udp->udp_broadcast = onoff; 2184 PASS_OPT_TO_IP(connp); 2185 } 2186 break; 2187 2188 case SO_SNDBUF: 2189 if (*i1 > us->us_max_buf) { 2190 *outlenp = 0; 2191 return (ENOBUFS); 2192 } 2193 if (!checkonly) { 2194 udp->udp_xmit_hiwat = *i1; 2195 connp->conn_wq->q_hiwat = *i1; 2196 } 2197 break; 2198 case SO_RCVBUF: 2199 if (*i1 > us->us_max_buf) { 2200 *outlenp = 0; 2201 return (ENOBUFS); 2202 } 2203 if (!checkonly) { 2204 int size; 2205 2206 udp->udp_rcv_disply_hiwat = *i1; 2207 size = udp_set_rcv_hiwat(udp, *i1); 2208 rw_exit(&udp->udp_rwlock); 2209 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2210 size); 2211 rw_enter(&udp->udp_rwlock, RW_WRITER); 2212 } 2213 break; 2214 case SO_DGRAM_ERRIND: 2215 if (!checkonly) 2216 udp->udp_dgram_errind = onoff; 2217 break; 2218 case SO_RECVUCRED: 2219 if (!checkonly) 2220 udp->udp_recvucred = onoff; 2221 break; 2222 case SO_ALLZONES: 2223 /* 2224 * "soft" error (negative) 2225 * option not handled at this level 2226 * Do not modify *outlenp. 2227 */ 2228 return (-EINVAL); 2229 case SO_TIMESTAMP: 2230 if (!checkonly) 2231 udp->udp_timestamp = onoff; 2232 break; 2233 case SO_ANON_MLP: 2234 if (!checkonly) { 2235 connp->conn_anon_mlp = onoff; 2236 PASS_OPT_TO_IP(connp); 2237 } 2238 break; 2239 case SO_MAC_EXEMPT: 2240 if (secpolicy_net_mac_aware(cr) != 0 || 2241 udp->udp_state != TS_UNBND) 2242 return (EACCES); 2243 if (!checkonly) { 2244 connp->conn_mac_exempt = onoff; 2245 PASS_OPT_TO_IP(connp); 2246 } 2247 break; 2248 case SCM_UCRED: { 2249 struct ucred_s *ucr; 2250 cred_t *cr, *newcr; 2251 ts_label_t *tsl; 2252 2253 /* 2254 * Only sockets that have proper privileges and are 2255 * bound to MLPs will have any other value here, so 2256 * this implicitly tests for privilege to set label. 2257 */ 2258 if (connp->conn_mlp_type == mlptSingle) 2259 break; 2260 ucr = (struct ucred_s *)invalp; 2261 if (inlen != ucredsize || 2262 ucr->uc_labeloff < sizeof (*ucr) || 2263 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2264 return (EINVAL); 2265 if (!checkonly) { 2266 mblk_t *mb; 2267 pid_t cpid; 2268 2269 if (attrs == NULL || 2270 (mb = attrs->udpattr_mb) == NULL) 2271 return (EINVAL); 2272 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2273 cr = udp->udp_connp->conn_cred; 2274 ASSERT(cr != NULL); 2275 if ((tsl = crgetlabel(cr)) == NULL) 2276 return (EINVAL); 2277 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2278 tsl->tsl_doi, KM_NOSLEEP); 2279 if (newcr == NULL) 2280 return (ENOSR); 2281 mblk_setcred(mb, newcr, cpid); 2282 attrs->udpattr_credset = B_TRUE; 2283 crfree(newcr); 2284 } 2285 break; 2286 } 2287 case SO_EXCLBIND: 2288 if (!checkonly) 2289 udp->udp_exclbind = onoff; 2290 break; 2291 case SO_RCVTIMEO: 2292 case SO_SNDTIMEO: 2293 /* 2294 * Pass these two options in order for third part 2295 * protocol usage. Here just return directly. 2296 */ 2297 return (0); 2298 default: 2299 *outlenp = 0; 2300 return (EINVAL); 2301 } 2302 break; 2303 case IPPROTO_IP: 2304 if (udp->udp_family != AF_INET) { 2305 *outlenp = 0; 2306 return (ENOPROTOOPT); 2307 } 2308 switch (name) { 2309 case IP_OPTIONS: 2310 case T_IP_OPTIONS: 2311 /* Save options for use by IP. */ 2312 newlen = inlen + udp->udp_label_len; 2313 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2314 *outlenp = 0; 2315 return (EINVAL); 2316 } 2317 if (checkonly) 2318 break; 2319 2320 /* 2321 * Update the stored options taking into account 2322 * any CIPSO option which we should not overwrite. 2323 */ 2324 if (!tsol_option_set(&udp->udp_ip_snd_options, 2325 &udp->udp_ip_snd_options_len, 2326 udp->udp_label_len, invalp, inlen)) { 2327 *outlenp = 0; 2328 return (ENOMEM); 2329 } 2330 2331 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2332 UDPH_SIZE + udp->udp_ip_snd_options_len; 2333 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2334 rw_exit(&udp->udp_rwlock); 2335 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2336 sth_wroff); 2337 rw_enter(&udp->udp_rwlock, RW_WRITER); 2338 break; 2339 2340 case IP_TTL: 2341 if (!checkonly) { 2342 udp->udp_ttl = (uchar_t)*i1; 2343 } 2344 break; 2345 case IP_TOS: 2346 case T_IP_TOS: 2347 if (!checkonly) { 2348 udp->udp_type_of_service = (uchar_t)*i1; 2349 } 2350 break; 2351 case IP_MULTICAST_IF: { 2352 /* 2353 * TODO should check OPTMGMT reply and undo this if 2354 * there is an error. 2355 */ 2356 struct in_addr *inap = (struct in_addr *)invalp; 2357 if (!checkonly) { 2358 udp->udp_multicast_if_addr = 2359 inap->s_addr; 2360 PASS_OPT_TO_IP(connp); 2361 } 2362 break; 2363 } 2364 case IP_MULTICAST_TTL: 2365 if (!checkonly) 2366 udp->udp_multicast_ttl = *invalp; 2367 break; 2368 case IP_MULTICAST_LOOP: 2369 if (!checkonly) { 2370 connp->conn_multicast_loop = *invalp; 2371 PASS_OPT_TO_IP(connp); 2372 } 2373 break; 2374 case IP_RECVOPTS: 2375 if (!checkonly) 2376 udp->udp_recvopts = onoff; 2377 break; 2378 case IP_RECVDSTADDR: 2379 if (!checkonly) 2380 udp->udp_recvdstaddr = onoff; 2381 break; 2382 case IP_RECVIF: 2383 if (!checkonly) { 2384 udp->udp_recvif = onoff; 2385 PASS_OPT_TO_IP(connp); 2386 } 2387 break; 2388 case IP_RECVSLLA: 2389 if (!checkonly) { 2390 udp->udp_recvslla = onoff; 2391 PASS_OPT_TO_IP(connp); 2392 } 2393 break; 2394 case IP_RECVTTL: 2395 if (!checkonly) 2396 udp->udp_recvttl = onoff; 2397 break; 2398 case IP_PKTINFO: { 2399 /* 2400 * This also handles IP_RECVPKTINFO. 2401 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2402 * Differentiation is based on the size of the 2403 * argument passed in. 2404 */ 2405 struct in_pktinfo *pktinfop; 2406 ip4_pkt_t *attr_pktinfop; 2407 2408 if (checkonly) 2409 break; 2410 2411 if (inlen == sizeof (int)) { 2412 /* 2413 * This is IP_RECVPKTINFO option. 2414 * Keep a local copy of whether this option is 2415 * set or not and pass it down to IP for 2416 * processing. 2417 */ 2418 2419 udp->udp_ip_recvpktinfo = onoff; 2420 return (-EINVAL); 2421 } 2422 2423 if (attrs == NULL || 2424 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2425 /* 2426 * sticky option or no buffer to return 2427 * the results. 2428 */ 2429 return (EINVAL); 2430 } 2431 2432 if (inlen != sizeof (struct in_pktinfo)) 2433 return (EINVAL); 2434 2435 pktinfop = (struct in_pktinfo *)invalp; 2436 2437 /* 2438 * At least one of the values should be specified 2439 */ 2440 if (pktinfop->ipi_ifindex == 0 && 2441 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2442 return (EINVAL); 2443 } 2444 2445 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2446 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2447 2448 break; 2449 } 2450 case IP_ADD_MEMBERSHIP: 2451 case IP_DROP_MEMBERSHIP: 2452 case IP_BLOCK_SOURCE: 2453 case IP_UNBLOCK_SOURCE: 2454 case IP_ADD_SOURCE_MEMBERSHIP: 2455 case IP_DROP_SOURCE_MEMBERSHIP: 2456 case MCAST_JOIN_GROUP: 2457 case MCAST_LEAVE_GROUP: 2458 case MCAST_BLOCK_SOURCE: 2459 case MCAST_UNBLOCK_SOURCE: 2460 case MCAST_JOIN_SOURCE_GROUP: 2461 case MCAST_LEAVE_SOURCE_GROUP: 2462 case IP_SEC_OPT: 2463 case IP_NEXTHOP: 2464 case IP_DHCPINIT_IF: 2465 /* 2466 * "soft" error (negative) 2467 * option not handled at this level 2468 * Do not modify *outlenp. 2469 */ 2470 return (-EINVAL); 2471 case IP_BOUND_IF: 2472 if (!checkonly) { 2473 udp->udp_bound_if = *i1; 2474 PASS_OPT_TO_IP(connp); 2475 } 2476 break; 2477 case IP_UNSPEC_SRC: 2478 if (!checkonly) { 2479 udp->udp_unspec_source = onoff; 2480 PASS_OPT_TO_IP(connp); 2481 } 2482 break; 2483 case IP_BROADCAST_TTL: 2484 if (!checkonly) 2485 connp->conn_broadcast_ttl = *invalp; 2486 break; 2487 default: 2488 *outlenp = 0; 2489 return (EINVAL); 2490 } 2491 break; 2492 case IPPROTO_IPV6: { 2493 ip6_pkt_t *ipp; 2494 boolean_t sticky; 2495 2496 if (udp->udp_family != AF_INET6) { 2497 *outlenp = 0; 2498 return (ENOPROTOOPT); 2499 } 2500 /* 2501 * Deal with both sticky options and ancillary data 2502 */ 2503 sticky = B_FALSE; 2504 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2505 NULL) { 2506 /* sticky options, or none */ 2507 ipp = &udp->udp_sticky_ipp; 2508 sticky = B_TRUE; 2509 } 2510 2511 switch (name) { 2512 case IPV6_MULTICAST_IF: 2513 if (!checkonly) { 2514 udp->udp_multicast_if_index = *i1; 2515 PASS_OPT_TO_IP(connp); 2516 } 2517 break; 2518 case IPV6_UNICAST_HOPS: 2519 /* -1 means use default */ 2520 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2521 *outlenp = 0; 2522 return (EINVAL); 2523 } 2524 if (!checkonly) { 2525 if (*i1 == -1) { 2526 udp->udp_ttl = ipp->ipp_unicast_hops = 2527 us->us_ipv6_hoplimit; 2528 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2529 /* Pass modified value to IP. */ 2530 *i1 = udp->udp_ttl; 2531 } else { 2532 udp->udp_ttl = ipp->ipp_unicast_hops = 2533 (uint8_t)*i1; 2534 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2535 } 2536 /* Rebuild the header template */ 2537 error = udp_build_hdrs(udp); 2538 if (error != 0) { 2539 *outlenp = 0; 2540 return (error); 2541 } 2542 } 2543 break; 2544 case IPV6_MULTICAST_HOPS: 2545 /* -1 means use default */ 2546 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2547 *outlenp = 0; 2548 return (EINVAL); 2549 } 2550 if (!checkonly) { 2551 if (*i1 == -1) { 2552 udp->udp_multicast_ttl = 2553 ipp->ipp_multicast_hops = 2554 IP_DEFAULT_MULTICAST_TTL; 2555 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2556 /* Pass modified value to IP. */ 2557 *i1 = udp->udp_multicast_ttl; 2558 } else { 2559 udp->udp_multicast_ttl = 2560 ipp->ipp_multicast_hops = 2561 (uint8_t)*i1; 2562 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2563 } 2564 } 2565 break; 2566 case IPV6_MULTICAST_LOOP: 2567 if (*i1 != 0 && *i1 != 1) { 2568 *outlenp = 0; 2569 return (EINVAL); 2570 } 2571 if (!checkonly) { 2572 connp->conn_multicast_loop = *i1; 2573 PASS_OPT_TO_IP(connp); 2574 } 2575 break; 2576 case IPV6_JOIN_GROUP: 2577 case IPV6_LEAVE_GROUP: 2578 case MCAST_JOIN_GROUP: 2579 case MCAST_LEAVE_GROUP: 2580 case MCAST_BLOCK_SOURCE: 2581 case MCAST_UNBLOCK_SOURCE: 2582 case MCAST_JOIN_SOURCE_GROUP: 2583 case MCAST_LEAVE_SOURCE_GROUP: 2584 /* 2585 * "soft" error (negative) 2586 * option not handled at this level 2587 * Note: Do not modify *outlenp 2588 */ 2589 return (-EINVAL); 2590 case IPV6_BOUND_IF: 2591 if (!checkonly) { 2592 udp->udp_bound_if = *i1; 2593 PASS_OPT_TO_IP(connp); 2594 } 2595 break; 2596 case IPV6_UNSPEC_SRC: 2597 if (!checkonly) { 2598 udp->udp_unspec_source = onoff; 2599 PASS_OPT_TO_IP(connp); 2600 } 2601 break; 2602 /* 2603 * Set boolean switches for ancillary data delivery 2604 */ 2605 case IPV6_RECVPKTINFO: 2606 if (!checkonly) { 2607 udp->udp_ip_recvpktinfo = onoff; 2608 PASS_OPT_TO_IP(connp); 2609 } 2610 break; 2611 case IPV6_RECVTCLASS: 2612 if (!checkonly) { 2613 udp->udp_ipv6_recvtclass = onoff; 2614 PASS_OPT_TO_IP(connp); 2615 } 2616 break; 2617 case IPV6_RECVPATHMTU: 2618 if (!checkonly) { 2619 udp->udp_ipv6_recvpathmtu = onoff; 2620 PASS_OPT_TO_IP(connp); 2621 } 2622 break; 2623 case IPV6_RECVHOPLIMIT: 2624 if (!checkonly) { 2625 udp->udp_ipv6_recvhoplimit = onoff; 2626 PASS_OPT_TO_IP(connp); 2627 } 2628 break; 2629 case IPV6_RECVHOPOPTS: 2630 if (!checkonly) { 2631 udp->udp_ipv6_recvhopopts = onoff; 2632 PASS_OPT_TO_IP(connp); 2633 } 2634 break; 2635 case IPV6_RECVDSTOPTS: 2636 if (!checkonly) { 2637 udp->udp_ipv6_recvdstopts = onoff; 2638 PASS_OPT_TO_IP(connp); 2639 } 2640 break; 2641 case _OLD_IPV6_RECVDSTOPTS: 2642 if (!checkonly) 2643 udp->udp_old_ipv6_recvdstopts = onoff; 2644 break; 2645 case IPV6_RECVRTHDRDSTOPTS: 2646 if (!checkonly) { 2647 udp->udp_ipv6_recvrthdrdstopts = onoff; 2648 PASS_OPT_TO_IP(connp); 2649 } 2650 break; 2651 case IPV6_RECVRTHDR: 2652 if (!checkonly) { 2653 udp->udp_ipv6_recvrthdr = onoff; 2654 PASS_OPT_TO_IP(connp); 2655 } 2656 break; 2657 /* 2658 * Set sticky options or ancillary data. 2659 * If sticky options, (re)build any extension headers 2660 * that might be needed as a result. 2661 */ 2662 case IPV6_PKTINFO: 2663 /* 2664 * The source address and ifindex are verified 2665 * in ip_opt_set(). For ancillary data the 2666 * source address is checked in ip_wput_v6. 2667 */ 2668 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2669 return (EINVAL); 2670 if (checkonly) 2671 break; 2672 2673 if (inlen == 0) { 2674 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2675 ipp->ipp_sticky_ignored |= 2676 (IPPF_IFINDEX|IPPF_ADDR); 2677 } else { 2678 struct in6_pktinfo *pkti; 2679 2680 pkti = (struct in6_pktinfo *)invalp; 2681 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2682 ipp->ipp_addr = pkti->ipi6_addr; 2683 if (ipp->ipp_ifindex != 0) 2684 ipp->ipp_fields |= IPPF_IFINDEX; 2685 else 2686 ipp->ipp_fields &= ~IPPF_IFINDEX; 2687 if (!IN6_IS_ADDR_UNSPECIFIED( 2688 &ipp->ipp_addr)) 2689 ipp->ipp_fields |= IPPF_ADDR; 2690 else 2691 ipp->ipp_fields &= ~IPPF_ADDR; 2692 } 2693 if (sticky) { 2694 error = udp_build_hdrs(udp); 2695 if (error != 0) 2696 return (error); 2697 PASS_OPT_TO_IP(connp); 2698 } 2699 break; 2700 case IPV6_HOPLIMIT: 2701 if (sticky) 2702 return (EINVAL); 2703 if (inlen != 0 && inlen != sizeof (int)) 2704 return (EINVAL); 2705 if (checkonly) 2706 break; 2707 2708 if (inlen == 0) { 2709 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2710 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2711 } else { 2712 if (*i1 > 255 || *i1 < -1) 2713 return (EINVAL); 2714 if (*i1 == -1) 2715 ipp->ipp_hoplimit = 2716 us->us_ipv6_hoplimit; 2717 else 2718 ipp->ipp_hoplimit = *i1; 2719 ipp->ipp_fields |= IPPF_HOPLIMIT; 2720 } 2721 break; 2722 case IPV6_TCLASS: 2723 if (inlen != 0 && inlen != sizeof (int)) 2724 return (EINVAL); 2725 if (checkonly) 2726 break; 2727 2728 if (inlen == 0) { 2729 ipp->ipp_fields &= ~IPPF_TCLASS; 2730 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2731 } else { 2732 if (*i1 > 255 || *i1 < -1) 2733 return (EINVAL); 2734 if (*i1 == -1) 2735 ipp->ipp_tclass = 0; 2736 else 2737 ipp->ipp_tclass = *i1; 2738 ipp->ipp_fields |= IPPF_TCLASS; 2739 } 2740 if (sticky) { 2741 error = udp_build_hdrs(udp); 2742 if (error != 0) 2743 return (error); 2744 } 2745 break; 2746 case IPV6_NEXTHOP: 2747 /* 2748 * IP will verify that the nexthop is reachable 2749 * and fail for sticky options. 2750 */ 2751 if (inlen != 0 && inlen != sizeof (sin6_t)) 2752 return (EINVAL); 2753 if (checkonly) 2754 break; 2755 2756 if (inlen == 0) { 2757 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2758 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2759 } else { 2760 sin6_t *sin6 = (sin6_t *)invalp; 2761 2762 if (sin6->sin6_family != AF_INET6) { 2763 return (EAFNOSUPPORT); 2764 } 2765 if (IN6_IS_ADDR_V4MAPPED( 2766 &sin6->sin6_addr)) 2767 return (EADDRNOTAVAIL); 2768 ipp->ipp_nexthop = sin6->sin6_addr; 2769 if (!IN6_IS_ADDR_UNSPECIFIED( 2770 &ipp->ipp_nexthop)) 2771 ipp->ipp_fields |= IPPF_NEXTHOP; 2772 else 2773 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2774 } 2775 if (sticky) { 2776 error = udp_build_hdrs(udp); 2777 if (error != 0) 2778 return (error); 2779 PASS_OPT_TO_IP(connp); 2780 } 2781 break; 2782 case IPV6_HOPOPTS: { 2783 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2784 /* 2785 * Sanity checks - minimum size, size a multiple of 2786 * eight bytes, and matching size passed in. 2787 */ 2788 if (inlen != 0 && 2789 inlen != (8 * (hopts->ip6h_len + 1))) 2790 return (EINVAL); 2791 2792 if (checkonly) 2793 break; 2794 2795 error = optcom_pkt_set(invalp, inlen, sticky, 2796 (uchar_t **)&ipp->ipp_hopopts, 2797 &ipp->ipp_hopoptslen, 2798 sticky ? udp->udp_label_len_v6 : 0); 2799 if (error != 0) 2800 return (error); 2801 if (ipp->ipp_hopoptslen == 0) { 2802 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2803 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2804 } else { 2805 ipp->ipp_fields |= IPPF_HOPOPTS; 2806 } 2807 if (sticky) { 2808 error = udp_build_hdrs(udp); 2809 if (error != 0) 2810 return (error); 2811 } 2812 break; 2813 } 2814 case IPV6_RTHDRDSTOPTS: { 2815 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2816 2817 /* 2818 * Sanity checks - minimum size, size a multiple of 2819 * eight bytes, and matching size passed in. 2820 */ 2821 if (inlen != 0 && 2822 inlen != (8 * (dopts->ip6d_len + 1))) 2823 return (EINVAL); 2824 2825 if (checkonly) 2826 break; 2827 2828 if (inlen == 0) { 2829 if (sticky && 2830 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2831 kmem_free(ipp->ipp_rtdstopts, 2832 ipp->ipp_rtdstoptslen); 2833 ipp->ipp_rtdstopts = NULL; 2834 ipp->ipp_rtdstoptslen = 0; 2835 } 2836 2837 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2838 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2839 } else { 2840 error = optcom_pkt_set(invalp, inlen, sticky, 2841 (uchar_t **)&ipp->ipp_rtdstopts, 2842 &ipp->ipp_rtdstoptslen, 0); 2843 if (error != 0) 2844 return (error); 2845 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2846 } 2847 if (sticky) { 2848 error = udp_build_hdrs(udp); 2849 if (error != 0) 2850 return (error); 2851 } 2852 break; 2853 } 2854 case IPV6_DSTOPTS: { 2855 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2856 2857 /* 2858 * Sanity checks - minimum size, size a multiple of 2859 * eight bytes, and matching size passed in. 2860 */ 2861 if (inlen != 0 && 2862 inlen != (8 * (dopts->ip6d_len + 1))) 2863 return (EINVAL); 2864 2865 if (checkonly) 2866 break; 2867 2868 if (inlen == 0) { 2869 if (sticky && 2870 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2871 kmem_free(ipp->ipp_dstopts, 2872 ipp->ipp_dstoptslen); 2873 ipp->ipp_dstopts = NULL; 2874 ipp->ipp_dstoptslen = 0; 2875 } 2876 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2877 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2878 } else { 2879 error = optcom_pkt_set(invalp, inlen, sticky, 2880 (uchar_t **)&ipp->ipp_dstopts, 2881 &ipp->ipp_dstoptslen, 0); 2882 if (error != 0) 2883 return (error); 2884 ipp->ipp_fields |= IPPF_DSTOPTS; 2885 } 2886 if (sticky) { 2887 error = udp_build_hdrs(udp); 2888 if (error != 0) 2889 return (error); 2890 } 2891 break; 2892 } 2893 case IPV6_RTHDR: { 2894 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2895 2896 /* 2897 * Sanity checks - minimum size, size a multiple of 2898 * eight bytes, and matching size passed in. 2899 */ 2900 if (inlen != 0 && 2901 inlen != (8 * (rt->ip6r_len + 1))) 2902 return (EINVAL); 2903 2904 if (checkonly) 2905 break; 2906 2907 if (inlen == 0) { 2908 if (sticky && 2909 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2910 kmem_free(ipp->ipp_rthdr, 2911 ipp->ipp_rthdrlen); 2912 ipp->ipp_rthdr = NULL; 2913 ipp->ipp_rthdrlen = 0; 2914 } 2915 ipp->ipp_fields &= ~IPPF_RTHDR; 2916 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2917 } else { 2918 error = optcom_pkt_set(invalp, inlen, sticky, 2919 (uchar_t **)&ipp->ipp_rthdr, 2920 &ipp->ipp_rthdrlen, 0); 2921 if (error != 0) 2922 return (error); 2923 ipp->ipp_fields |= IPPF_RTHDR; 2924 } 2925 if (sticky) { 2926 error = udp_build_hdrs(udp); 2927 if (error != 0) 2928 return (error); 2929 } 2930 break; 2931 } 2932 2933 case IPV6_DONTFRAG: 2934 if (checkonly) 2935 break; 2936 2937 if (onoff) { 2938 ipp->ipp_fields |= IPPF_DONTFRAG; 2939 } else { 2940 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2941 } 2942 break; 2943 2944 case IPV6_USE_MIN_MTU: 2945 if (inlen != sizeof (int)) 2946 return (EINVAL); 2947 2948 if (*i1 < -1 || *i1 > 1) 2949 return (EINVAL); 2950 2951 if (checkonly) 2952 break; 2953 2954 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2955 ipp->ipp_use_min_mtu = *i1; 2956 break; 2957 2958 case IPV6_SEC_OPT: 2959 case IPV6_SRC_PREFERENCES: 2960 case IPV6_V6ONLY: 2961 /* Handled at the IP level */ 2962 return (-EINVAL); 2963 default: 2964 *outlenp = 0; 2965 return (EINVAL); 2966 } 2967 break; 2968 } /* end IPPROTO_IPV6 */ 2969 case IPPROTO_UDP: 2970 switch (name) { 2971 case UDP_ANONPRIVBIND: 2972 if ((error = secpolicy_net_privaddr(cr, 0, 2973 IPPROTO_UDP)) != 0) { 2974 *outlenp = 0; 2975 return (error); 2976 } 2977 if (!checkonly) { 2978 udp->udp_anon_priv_bind = onoff; 2979 } 2980 break; 2981 case UDP_EXCLBIND: 2982 if (!checkonly) 2983 udp->udp_exclbind = onoff; 2984 break; 2985 case UDP_RCVHDR: 2986 if (!checkonly) 2987 udp->udp_rcvhdr = onoff; 2988 break; 2989 case UDP_NAT_T_ENDPOINT: 2990 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 2991 *outlenp = 0; 2992 return (error); 2993 } 2994 2995 /* 2996 * Use udp_family instead so we can avoid ambiguitites 2997 * with AF_INET6 sockets that may switch from IPv4 2998 * to IPv6. 2999 */ 3000 if (udp->udp_family != AF_INET) { 3001 *outlenp = 0; 3002 return (EAFNOSUPPORT); 3003 } 3004 3005 if (!checkonly) { 3006 int size; 3007 3008 udp->udp_nat_t_endpoint = onoff; 3009 3010 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3011 UDPH_SIZE + udp->udp_ip_snd_options_len; 3012 3013 /* Also, adjust wroff */ 3014 if (onoff) { 3015 udp->udp_max_hdr_len += 3016 sizeof (uint32_t); 3017 } 3018 size = udp->udp_max_hdr_len + 3019 us->us_wroff_extra; 3020 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3021 size); 3022 } 3023 break; 3024 default: 3025 *outlenp = 0; 3026 return (EINVAL); 3027 } 3028 break; 3029 default: 3030 *outlenp = 0; 3031 return (EINVAL); 3032 } 3033 /* 3034 * Common case of OK return with outval same as inval. 3035 */ 3036 if (invalp != outvalp) { 3037 /* don't trust bcopy for identical src/dst */ 3038 (void) bcopy(invalp, outvalp, inlen); 3039 } 3040 *outlenp = inlen; 3041 return (0); 3042 } 3043 3044 int 3045 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3046 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3047 void *thisdg_attrs, cred_t *cr) 3048 { 3049 int error; 3050 boolean_t checkonly; 3051 3052 error = 0; 3053 switch (optset_context) { 3054 case SETFN_OPTCOM_CHECKONLY: 3055 checkonly = B_TRUE; 3056 /* 3057 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3058 * inlen != 0 implies value supplied and 3059 * we have to "pretend" to set it. 3060 * inlen == 0 implies that there is no 3061 * value part in T_CHECK request and just validation 3062 * done elsewhere should be enough, we just return here. 3063 */ 3064 if (inlen == 0) { 3065 *outlenp = 0; 3066 goto done; 3067 } 3068 break; 3069 case SETFN_OPTCOM_NEGOTIATE: 3070 checkonly = B_FALSE; 3071 break; 3072 case SETFN_UD_NEGOTIATE: 3073 case SETFN_CONN_NEGOTIATE: 3074 checkonly = B_FALSE; 3075 /* 3076 * Negotiating local and "association-related" options 3077 * through T_UNITDATA_REQ. 3078 * 3079 * Following routine can filter out ones we do not 3080 * want to be "set" this way. 3081 */ 3082 if (!udp_opt_allow_udr_set(level, name)) { 3083 *outlenp = 0; 3084 error = EINVAL; 3085 goto done; 3086 } 3087 break; 3088 default: 3089 /* 3090 * We should never get here 3091 */ 3092 *outlenp = 0; 3093 error = EINVAL; 3094 goto done; 3095 } 3096 3097 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3098 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3099 3100 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3101 outvalp, cr, thisdg_attrs, checkonly); 3102 done: 3103 return (error); 3104 } 3105 3106 /* ARGSUSED */ 3107 int 3108 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3109 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3110 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3111 { 3112 conn_t *connp = Q_TO_CONN(q); 3113 int error; 3114 udp_t *udp = connp->conn_udp; 3115 3116 rw_enter(&udp->udp_rwlock, RW_WRITER); 3117 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3118 outlenp, outvalp, thisdg_attrs, cr); 3119 rw_exit(&udp->udp_rwlock); 3120 return (error); 3121 } 3122 3123 /* 3124 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3125 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3126 * headers, and the udp header. 3127 * Returns failure if can't allocate memory. 3128 */ 3129 static int 3130 udp_build_hdrs(udp_t *udp) 3131 { 3132 udp_stack_t *us = udp->udp_us; 3133 uchar_t *hdrs; 3134 uint_t hdrs_len; 3135 ip6_t *ip6h; 3136 ip6i_t *ip6i; 3137 udpha_t *udpha; 3138 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3139 size_t sth_wroff; 3140 conn_t *connp = udp->udp_connp; 3141 3142 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3143 ASSERT(connp != NULL); 3144 3145 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3146 ASSERT(hdrs_len != 0); 3147 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3148 /* Need to reallocate */ 3149 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3150 if (hdrs == NULL) 3151 return (ENOMEM); 3152 3153 if (udp->udp_sticky_hdrs_len != 0) { 3154 kmem_free(udp->udp_sticky_hdrs, 3155 udp->udp_sticky_hdrs_len); 3156 } 3157 udp->udp_sticky_hdrs = hdrs; 3158 udp->udp_sticky_hdrs_len = hdrs_len; 3159 } 3160 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3161 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3162 3163 /* Set header fields not in ipp */ 3164 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3165 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3166 ip6h = (ip6_t *)&ip6i[1]; 3167 } else { 3168 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3169 } 3170 3171 if (!(ipp->ipp_fields & IPPF_ADDR)) 3172 ip6h->ip6_src = udp->udp_v6src; 3173 3174 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3175 udpha->uha_src_port = udp->udp_port; 3176 3177 /* Try to get everything in a single mblk */ 3178 if (hdrs_len > udp->udp_max_hdr_len) { 3179 udp->udp_max_hdr_len = hdrs_len; 3180 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3181 rw_exit(&udp->udp_rwlock); 3182 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3183 udp->udp_connp, sth_wroff); 3184 rw_enter(&udp->udp_rwlock, RW_WRITER); 3185 } 3186 return (0); 3187 } 3188 3189 /* 3190 * This routine retrieves the value of an ND variable in a udpparam_t 3191 * structure. It is called through nd_getset when a user reads the 3192 * variable. 3193 */ 3194 /* ARGSUSED */ 3195 static int 3196 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3197 { 3198 udpparam_t *udppa = (udpparam_t *)cp; 3199 3200 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3201 return (0); 3202 } 3203 3204 /* 3205 * Walk through the param array specified registering each element with the 3206 * named dispatch (ND) handler. 3207 */ 3208 static boolean_t 3209 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3210 { 3211 for (; cnt-- > 0; udppa++) { 3212 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3213 if (!nd_load(ndp, udppa->udp_param_name, 3214 udp_param_get, udp_param_set, 3215 (caddr_t)udppa)) { 3216 nd_free(ndp); 3217 return (B_FALSE); 3218 } 3219 } 3220 } 3221 if (!nd_load(ndp, "udp_extra_priv_ports", 3222 udp_extra_priv_ports_get, NULL, NULL)) { 3223 nd_free(ndp); 3224 return (B_FALSE); 3225 } 3226 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3227 NULL, udp_extra_priv_ports_add, NULL)) { 3228 nd_free(ndp); 3229 return (B_FALSE); 3230 } 3231 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3232 NULL, udp_extra_priv_ports_del, NULL)) { 3233 nd_free(ndp); 3234 return (B_FALSE); 3235 } 3236 return (B_TRUE); 3237 } 3238 3239 /* This routine sets an ND variable in a udpparam_t structure. */ 3240 /* ARGSUSED */ 3241 static int 3242 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3243 { 3244 long new_value; 3245 udpparam_t *udppa = (udpparam_t *)cp; 3246 3247 /* 3248 * Fail the request if the new value does not lie within the 3249 * required bounds. 3250 */ 3251 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3252 new_value < udppa->udp_param_min || 3253 new_value > udppa->udp_param_max) { 3254 return (EINVAL); 3255 } 3256 3257 /* Set the new value */ 3258 udppa->udp_param_value = new_value; 3259 return (0); 3260 } 3261 3262 /* 3263 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3264 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3265 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3266 * then it's assumed to be allocated to be large enough. 3267 * 3268 * Returns zero if trimming of the security option causes all options to go 3269 * away. 3270 */ 3271 static size_t 3272 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3273 { 3274 struct T_opthdr *toh; 3275 size_t hol = ipp->ipp_hopoptslen; 3276 ip6_hbh_t *dstopt = NULL; 3277 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3278 size_t tlen, olen, plen; 3279 boolean_t deleting; 3280 const struct ip6_opt *sopt, *lastpad; 3281 struct ip6_opt *dopt; 3282 3283 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3284 toh->level = IPPROTO_IPV6; 3285 toh->name = IPV6_HOPOPTS; 3286 toh->status = 0; 3287 dstopt = (ip6_hbh_t *)(toh + 1); 3288 } 3289 3290 /* 3291 * If labeling is enabled, then skip the label option 3292 * but get other options if there are any. 3293 */ 3294 if (is_system_labeled()) { 3295 dopt = NULL; 3296 if (dstopt != NULL) { 3297 /* will fill in ip6h_len later */ 3298 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3299 dopt = (struct ip6_opt *)(dstopt + 1); 3300 } 3301 sopt = (const struct ip6_opt *)(srcopt + 1); 3302 hol -= sizeof (*srcopt); 3303 tlen = sizeof (*dstopt); 3304 lastpad = NULL; 3305 deleting = B_FALSE; 3306 /* 3307 * This loop finds the first (lastpad pointer) of any number of 3308 * pads that preceeds the security option, then treats the 3309 * security option as though it were a pad, and then finds the 3310 * next non-pad option (or end of list). 3311 * 3312 * It then treats the entire block as one big pad. To preserve 3313 * alignment of any options that follow, or just the end of the 3314 * list, it computes a minimal new padding size that keeps the 3315 * same alignment for the next option. 3316 * 3317 * If it encounters just a sequence of pads with no security 3318 * option, those are copied as-is rather than collapsed. 3319 * 3320 * Note that to handle the end of list case, the code makes one 3321 * loop with 'hol' set to zero. 3322 */ 3323 for (;;) { 3324 if (hol > 0) { 3325 if (sopt->ip6o_type == IP6OPT_PAD1) { 3326 if (lastpad == NULL) 3327 lastpad = sopt; 3328 sopt = (const struct ip6_opt *) 3329 &sopt->ip6o_len; 3330 hol--; 3331 continue; 3332 } 3333 olen = sopt->ip6o_len + sizeof (*sopt); 3334 if (olen > hol) 3335 olen = hol; 3336 if (sopt->ip6o_type == IP6OPT_PADN || 3337 sopt->ip6o_type == ip6opt_ls) { 3338 if (sopt->ip6o_type == ip6opt_ls) 3339 deleting = B_TRUE; 3340 if (lastpad == NULL) 3341 lastpad = sopt; 3342 sopt = (const struct ip6_opt *) 3343 ((const char *)sopt + olen); 3344 hol -= olen; 3345 continue; 3346 } 3347 } else { 3348 /* if nothing was copied at all, then delete */ 3349 if (tlen == sizeof (*dstopt)) 3350 return (0); 3351 /* last pass; pick up any trailing padding */ 3352 olen = 0; 3353 } 3354 if (deleting) { 3355 /* 3356 * compute aligning effect of deleted material 3357 * to reproduce with pad. 3358 */ 3359 plen = ((const char *)sopt - 3360 (const char *)lastpad) & 7; 3361 tlen += plen; 3362 if (dopt != NULL) { 3363 if (plen == 1) { 3364 dopt->ip6o_type = IP6OPT_PAD1; 3365 } else if (plen > 1) { 3366 plen -= sizeof (*dopt); 3367 dopt->ip6o_type = IP6OPT_PADN; 3368 dopt->ip6o_len = plen; 3369 if (plen > 0) 3370 bzero(dopt + 1, plen); 3371 } 3372 dopt = (struct ip6_opt *) 3373 ((char *)dopt + plen); 3374 } 3375 deleting = B_FALSE; 3376 lastpad = NULL; 3377 } 3378 /* if there's uncopied padding, then copy that now */ 3379 if (lastpad != NULL) { 3380 olen += (const char *)sopt - 3381 (const char *)lastpad; 3382 sopt = lastpad; 3383 lastpad = NULL; 3384 } 3385 if (dopt != NULL && olen > 0) { 3386 bcopy(sopt, dopt, olen); 3387 dopt = (struct ip6_opt *)((char *)dopt + olen); 3388 } 3389 if (hol == 0) 3390 break; 3391 tlen += olen; 3392 sopt = (const struct ip6_opt *) 3393 ((const char *)sopt + olen); 3394 hol -= olen; 3395 } 3396 /* go back and patch up the length value, rounded upward */ 3397 if (dstopt != NULL) 3398 dstopt->ip6h_len = (tlen - 1) >> 3; 3399 } else { 3400 tlen = hol; 3401 if (dstopt != NULL) 3402 bcopy(srcopt, dstopt, hol); 3403 } 3404 3405 tlen += sizeof (*toh); 3406 if (toh != NULL) 3407 toh->len = tlen; 3408 3409 return (tlen); 3410 } 3411 3412 /* 3413 * Update udp_rcv_opt_len from the packet. 3414 * Called when options received, and when no options received but 3415 * udp_ip_recv_opt_len has previously recorded options. 3416 */ 3417 static void 3418 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3419 { 3420 /* Save the options if any */ 3421 if (opt_len > 0) { 3422 if (opt_len > udp->udp_ip_rcv_options_len) { 3423 /* Need to allocate larger buffer */ 3424 if (udp->udp_ip_rcv_options_len != 0) 3425 mi_free((char *)udp->udp_ip_rcv_options); 3426 udp->udp_ip_rcv_options_len = 0; 3427 udp->udp_ip_rcv_options = 3428 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3429 if (udp->udp_ip_rcv_options != NULL) 3430 udp->udp_ip_rcv_options_len = opt_len; 3431 } 3432 if (udp->udp_ip_rcv_options_len != 0) { 3433 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3434 /* Adjust length if we are resusing the space */ 3435 udp->udp_ip_rcv_options_len = opt_len; 3436 } 3437 } else if (udp->udp_ip_rcv_options_len != 0) { 3438 /* Clear out previously recorded options */ 3439 mi_free((char *)udp->udp_ip_rcv_options); 3440 udp->udp_ip_rcv_options = NULL; 3441 udp->udp_ip_rcv_options_len = 0; 3442 } 3443 } 3444 3445 static mblk_t * 3446 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3447 { 3448 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3449 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3450 /* 3451 * fallback has started but messages have not been moved yet 3452 */ 3453 if (udp->udp_fallback_queue_head == NULL) { 3454 ASSERT(udp->udp_fallback_queue_tail == NULL); 3455 udp->udp_fallback_queue_head = mp; 3456 udp->udp_fallback_queue_tail = mp; 3457 } else { 3458 ASSERT(udp->udp_fallback_queue_tail != NULL); 3459 udp->udp_fallback_queue_tail->b_next = mp; 3460 udp->udp_fallback_queue_tail = mp; 3461 } 3462 return (NULL); 3463 } else { 3464 /* 3465 * Fallback completed, let the caller putnext() the mblk. 3466 */ 3467 return (mp); 3468 } 3469 } 3470 3471 /* 3472 * Deliver data to ULP. In case we have a socket, and it's falling back to 3473 * TPI, then we'll queue the mp for later processing. 3474 */ 3475 static void 3476 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3477 { 3478 if (IPCL_IS_NONSTR(connp)) { 3479 udp_t *udp = connp->conn_udp; 3480 int error; 3481 3482 if ((*connp->conn_upcalls->su_recv) 3483 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3484 NULL) < 0) { 3485 mutex_enter(&udp->udp_recv_lock); 3486 if (error == ENOSPC) { 3487 /* 3488 * let's confirm while holding the lock 3489 */ 3490 if ((*connp->conn_upcalls->su_recv) 3491 (connp->conn_upper_handle, NULL, 0, 0, 3492 &error, NULL) < 0) { 3493 ASSERT(error == ENOSPC); 3494 if (error == ENOSPC) { 3495 connp->conn_flow_cntrld = 3496 B_TRUE; 3497 } 3498 } 3499 mutex_exit(&udp->udp_recv_lock); 3500 } else { 3501 ASSERT(error == EOPNOTSUPP); 3502 mp = udp_queue_fallback(udp, mp); 3503 mutex_exit(&udp->udp_recv_lock); 3504 if (mp != NULL) 3505 putnext(connp->conn_rq, mp); 3506 } 3507 } 3508 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3509 } else { 3510 putnext(connp->conn_rq, mp); 3511 } 3512 } 3513 3514 /* ARGSUSED2 */ 3515 static void 3516 udp_input(void *arg1, mblk_t *mp, void *arg2) 3517 { 3518 conn_t *connp = (conn_t *)arg1; 3519 struct T_unitdata_ind *tudi; 3520 uchar_t *rptr; /* Pointer to IP header */ 3521 int hdr_length; /* Length of IP+UDP headers */ 3522 int opt_len; 3523 int udi_size; /* Size of T_unitdata_ind */ 3524 int mp_len; 3525 udp_t *udp; 3526 udpha_t *udpha; 3527 int ipversion; 3528 ip6_pkt_t ipp; 3529 ip6_t *ip6h; 3530 ip6i_t *ip6i; 3531 mblk_t *mp1; 3532 mblk_t *options_mp = NULL; 3533 ip_pktinfo_t *pinfo = NULL; 3534 cred_t *cr = NULL; 3535 pid_t cpid; 3536 uint32_t udp_ip_rcv_options_len; 3537 udp_bits_t udp_bits; 3538 cred_t *rcr = connp->conn_cred; 3539 udp_stack_t *us; 3540 3541 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3542 3543 udp = connp->conn_udp; 3544 us = udp->udp_us; 3545 rptr = mp->b_rptr; 3546 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3547 ASSERT(OK_32PTR(rptr)); 3548 3549 /* 3550 * IP should have prepended the options data in an M_CTL 3551 * Check M_CTL "type" to make sure are not here bcos of 3552 * a valid ICMP message 3553 */ 3554 if (DB_TYPE(mp) == M_CTL) { 3555 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3556 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3557 IN_PKTINFO) { 3558 /* 3559 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3560 * has been prepended to the packet by IP. We need to 3561 * extract the mblk and adjust the rptr 3562 */ 3563 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3564 options_mp = mp; 3565 mp = mp->b_cont; 3566 rptr = mp->b_rptr; 3567 UDP_STAT(us, udp_in_pktinfo); 3568 } else { 3569 /* 3570 * ICMP messages. 3571 */ 3572 udp_icmp_error(connp, mp); 3573 return; 3574 } 3575 } 3576 3577 mp_len = msgdsize(mp); 3578 /* 3579 * This is the inbound data path. 3580 * First, we check to make sure the IP version number is correct, 3581 * and then pull the IP and UDP headers into the first mblk. 3582 */ 3583 3584 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3585 ipp.ipp_fields = 0; 3586 3587 ipversion = IPH_HDR_VERSION(rptr); 3588 3589 rw_enter(&udp->udp_rwlock, RW_READER); 3590 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3591 udp_bits = udp->udp_bits; 3592 rw_exit(&udp->udp_rwlock); 3593 3594 switch (ipversion) { 3595 case IPV4_VERSION: 3596 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3597 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3598 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3599 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3600 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3601 udp->udp_family == AF_INET) { 3602 /* 3603 * Record/update udp_ip_rcv_options with the lock 3604 * held. Not needed for AF_INET6 sockets 3605 * since they don't support a getsockopt of IP_OPTIONS. 3606 */ 3607 rw_enter(&udp->udp_rwlock, RW_WRITER); 3608 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3609 opt_len); 3610 rw_exit(&udp->udp_rwlock); 3611 } 3612 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3613 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3614 udp->udp_ip_recvpktinfo) { 3615 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3616 ipp.ipp_fields |= IPPF_IFINDEX; 3617 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3618 } 3619 } 3620 break; 3621 case IPV6_VERSION: 3622 /* 3623 * IPv6 packets can only be received by applications 3624 * that are prepared to receive IPv6 addresses. 3625 * The IP fanout must ensure this. 3626 */ 3627 ASSERT(udp->udp_family == AF_INET6); 3628 3629 ip6h = (ip6_t *)rptr; 3630 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3631 3632 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3633 uint8_t nexthdrp; 3634 /* Look for ifindex information */ 3635 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3636 ip6i = (ip6i_t *)ip6h; 3637 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3638 goto tossit; 3639 3640 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3641 ASSERT(ip6i->ip6i_ifindex != 0); 3642 ipp.ipp_fields |= IPPF_IFINDEX; 3643 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3644 } 3645 rptr = (uchar_t *)&ip6i[1]; 3646 mp->b_rptr = rptr; 3647 if (rptr == mp->b_wptr) { 3648 mp1 = mp->b_cont; 3649 freeb(mp); 3650 mp = mp1; 3651 rptr = mp->b_rptr; 3652 } 3653 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3654 goto tossit; 3655 ip6h = (ip6_t *)rptr; 3656 mp_len = msgdsize(mp); 3657 } 3658 /* 3659 * Find any potentially interesting extension headers 3660 * as well as the length of the IPv6 + extension 3661 * headers. 3662 */ 3663 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3664 UDPH_SIZE; 3665 ASSERT(nexthdrp == IPPROTO_UDP); 3666 } else { 3667 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3668 ip6i = NULL; 3669 } 3670 break; 3671 default: 3672 ASSERT(0); 3673 } 3674 3675 /* 3676 * IP inspected the UDP header thus all of it must be in the mblk. 3677 * UDP length check is performed for IPv6 packets and IPv4 packets 3678 * to check if the size of the packet as specified 3679 * by the header is the same as the physical size of the packet. 3680 * FIXME? Didn't IP already check this? 3681 */ 3682 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3683 if ((MBLKL(mp) < hdr_length) || 3684 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3685 goto tossit; 3686 } 3687 3688 3689 /* Walk past the headers unless UDP_RCVHDR was set. */ 3690 if (!udp_bits.udpb_rcvhdr) { 3691 mp->b_rptr = rptr + hdr_length; 3692 mp_len -= hdr_length; 3693 } 3694 3695 /* 3696 * This is the inbound data path. Packets are passed upstream as 3697 * T_UNITDATA_IND messages with full IP headers still attached. 3698 */ 3699 if (udp->udp_family == AF_INET) { 3700 sin_t *sin; 3701 3702 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3703 3704 /* 3705 * Normally only send up the source address. 3706 * If IP_RECVDSTADDR is set we include the destination IP 3707 * address as an option. With IP_RECVOPTS we include all 3708 * the IP options. 3709 */ 3710 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3711 if (udp_bits.udpb_recvdstaddr) { 3712 udi_size += sizeof (struct T_opthdr) + 3713 sizeof (struct in_addr); 3714 UDP_STAT(us, udp_in_recvdstaddr); 3715 } 3716 3717 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3718 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3719 udi_size += sizeof (struct T_opthdr) + 3720 sizeof (struct in_pktinfo); 3721 UDP_STAT(us, udp_ip_rcvpktinfo); 3722 } 3723 3724 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3725 udi_size += sizeof (struct T_opthdr) + opt_len; 3726 UDP_STAT(us, udp_in_recvopts); 3727 } 3728 3729 /* 3730 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3731 * space accordingly 3732 */ 3733 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3734 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3735 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3736 UDP_STAT(us, udp_in_recvif); 3737 } 3738 3739 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3740 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3741 udi_size += sizeof (struct T_opthdr) + 3742 sizeof (struct sockaddr_dl); 3743 UDP_STAT(us, udp_in_recvslla); 3744 } 3745 3746 if ((udp_bits.udpb_recvucred) && 3747 (cr = msg_getcred(mp, &cpid)) != NULL) { 3748 udi_size += sizeof (struct T_opthdr) + ucredsize; 3749 UDP_STAT(us, udp_in_recvucred); 3750 } 3751 3752 /* 3753 * If SO_TIMESTAMP is set allocate the appropriate sized 3754 * buffer. Since gethrestime() expects a pointer aligned 3755 * argument, we allocate space necessary for extra 3756 * alignment (even though it might not be used). 3757 */ 3758 if (udp_bits.udpb_timestamp) { 3759 udi_size += sizeof (struct T_opthdr) + 3760 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3761 UDP_STAT(us, udp_in_timestamp); 3762 } 3763 3764 /* 3765 * If IP_RECVTTL is set allocate the appropriate sized buffer 3766 */ 3767 if (udp_bits.udpb_recvttl) { 3768 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3769 UDP_STAT(us, udp_in_recvttl); 3770 } 3771 3772 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3773 mp1 = allocb(udi_size, BPRI_MED); 3774 if (mp1 == NULL) { 3775 freemsg(mp); 3776 if (options_mp != NULL) 3777 freeb(options_mp); 3778 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3779 return; 3780 } 3781 mp1->b_cont = mp; 3782 mp = mp1; 3783 mp->b_datap->db_type = M_PROTO; 3784 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3785 mp->b_wptr = (uchar_t *)tudi + udi_size; 3786 tudi->PRIM_type = T_UNITDATA_IND; 3787 tudi->SRC_length = sizeof (sin_t); 3788 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3789 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3790 sizeof (sin_t); 3791 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3792 tudi->OPT_length = udi_size; 3793 sin = (sin_t *)&tudi[1]; 3794 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3795 sin->sin_port = udpha->uha_src_port; 3796 sin->sin_family = udp->udp_family; 3797 *(uint32_t *)&sin->sin_zero[0] = 0; 3798 *(uint32_t *)&sin->sin_zero[4] = 0; 3799 3800 /* 3801 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3802 * IP_RECVTTL has been set. 3803 */ 3804 if (udi_size != 0) { 3805 /* 3806 * Copy in destination address before options to avoid 3807 * any padding issues. 3808 */ 3809 char *dstopt; 3810 3811 dstopt = (char *)&sin[1]; 3812 if (udp_bits.udpb_recvdstaddr) { 3813 struct T_opthdr *toh; 3814 ipaddr_t *dstptr; 3815 3816 toh = (struct T_opthdr *)dstopt; 3817 toh->level = IPPROTO_IP; 3818 toh->name = IP_RECVDSTADDR; 3819 toh->len = sizeof (struct T_opthdr) + 3820 sizeof (ipaddr_t); 3821 toh->status = 0; 3822 dstopt += sizeof (struct T_opthdr); 3823 dstptr = (ipaddr_t *)dstopt; 3824 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3825 dstopt += sizeof (ipaddr_t); 3826 udi_size -= toh->len; 3827 } 3828 3829 if (udp_bits.udpb_recvopts && opt_len > 0) { 3830 struct T_opthdr *toh; 3831 3832 toh = (struct T_opthdr *)dstopt; 3833 toh->level = IPPROTO_IP; 3834 toh->name = IP_RECVOPTS; 3835 toh->len = sizeof (struct T_opthdr) + opt_len; 3836 toh->status = 0; 3837 dstopt += sizeof (struct T_opthdr); 3838 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3839 opt_len); 3840 dstopt += opt_len; 3841 udi_size -= toh->len; 3842 } 3843 3844 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3845 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3846 struct T_opthdr *toh; 3847 struct in_pktinfo *pktinfop; 3848 3849 toh = (struct T_opthdr *)dstopt; 3850 toh->level = IPPROTO_IP; 3851 toh->name = IP_PKTINFO; 3852 toh->len = sizeof (struct T_opthdr) + 3853 sizeof (*pktinfop); 3854 toh->status = 0; 3855 dstopt += sizeof (struct T_opthdr); 3856 pktinfop = (struct in_pktinfo *)dstopt; 3857 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3858 pktinfop->ipi_spec_dst = 3859 pinfo->ip_pkt_match_addr; 3860 pktinfop->ipi_addr.s_addr = 3861 ((ipha_t *)rptr)->ipha_dst; 3862 3863 dstopt += sizeof (struct in_pktinfo); 3864 udi_size -= toh->len; 3865 } 3866 3867 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3868 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3869 3870 struct T_opthdr *toh; 3871 struct sockaddr_dl *dstptr; 3872 3873 toh = (struct T_opthdr *)dstopt; 3874 toh->level = IPPROTO_IP; 3875 toh->name = IP_RECVSLLA; 3876 toh->len = sizeof (struct T_opthdr) + 3877 sizeof (struct sockaddr_dl); 3878 toh->status = 0; 3879 dstopt += sizeof (struct T_opthdr); 3880 dstptr = (struct sockaddr_dl *)dstopt; 3881 bcopy(&pinfo->ip_pkt_slla, dstptr, 3882 sizeof (struct sockaddr_dl)); 3883 dstopt += sizeof (struct sockaddr_dl); 3884 udi_size -= toh->len; 3885 } 3886 3887 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3888 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3889 3890 struct T_opthdr *toh; 3891 uint_t *dstptr; 3892 3893 toh = (struct T_opthdr *)dstopt; 3894 toh->level = IPPROTO_IP; 3895 toh->name = IP_RECVIF; 3896 toh->len = sizeof (struct T_opthdr) + 3897 sizeof (uint_t); 3898 toh->status = 0; 3899 dstopt += sizeof (struct T_opthdr); 3900 dstptr = (uint_t *)dstopt; 3901 *dstptr = pinfo->ip_pkt_ifindex; 3902 dstopt += sizeof (uint_t); 3903 udi_size -= toh->len; 3904 } 3905 3906 if (cr != NULL) { 3907 struct T_opthdr *toh; 3908 3909 toh = (struct T_opthdr *)dstopt; 3910 toh->level = SOL_SOCKET; 3911 toh->name = SCM_UCRED; 3912 toh->len = sizeof (struct T_opthdr) + ucredsize; 3913 toh->status = 0; 3914 dstopt += sizeof (struct T_opthdr); 3915 (void) cred2ucred(cr, cpid, dstopt, rcr); 3916 dstopt += ucredsize; 3917 udi_size -= toh->len; 3918 } 3919 3920 if (udp_bits.udpb_timestamp) { 3921 struct T_opthdr *toh; 3922 3923 toh = (struct T_opthdr *)dstopt; 3924 toh->level = SOL_SOCKET; 3925 toh->name = SCM_TIMESTAMP; 3926 toh->len = sizeof (struct T_opthdr) + 3927 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3928 toh->status = 0; 3929 dstopt += sizeof (struct T_opthdr); 3930 /* Align for gethrestime() */ 3931 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3932 sizeof (intptr_t)); 3933 gethrestime((timestruc_t *)dstopt); 3934 dstopt = (char *)toh + toh->len; 3935 udi_size -= toh->len; 3936 } 3937 3938 /* 3939 * CAUTION: 3940 * Due to aligment issues 3941 * Processing of IP_RECVTTL option 3942 * should always be the last. Adding 3943 * any option processing after this will 3944 * cause alignment panic. 3945 */ 3946 if (udp_bits.udpb_recvttl) { 3947 struct T_opthdr *toh; 3948 uint8_t *dstptr; 3949 3950 toh = (struct T_opthdr *)dstopt; 3951 toh->level = IPPROTO_IP; 3952 toh->name = IP_RECVTTL; 3953 toh->len = sizeof (struct T_opthdr) + 3954 sizeof (uint8_t); 3955 toh->status = 0; 3956 dstopt += sizeof (struct T_opthdr); 3957 dstptr = (uint8_t *)dstopt; 3958 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 3959 dstopt += sizeof (uint8_t); 3960 udi_size -= toh->len; 3961 } 3962 3963 /* Consumed all of allocated space */ 3964 ASSERT(udi_size == 0); 3965 } 3966 } else { 3967 sin6_t *sin6; 3968 3969 /* 3970 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 3971 * 3972 * Normally we only send up the address. If receiving of any 3973 * optional receive side information is enabled, we also send 3974 * that up as options. 3975 */ 3976 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3977 3978 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3979 IPPF_RTHDR|IPPF_IFINDEX)) { 3980 if ((udp_bits.udpb_ipv6_recvhopopts) && 3981 (ipp.ipp_fields & IPPF_HOPOPTS)) { 3982 size_t hlen; 3983 3984 UDP_STAT(us, udp_in_recvhopopts); 3985 hlen = copy_hop_opts(&ipp, NULL); 3986 if (hlen == 0) 3987 ipp.ipp_fields &= ~IPPF_HOPOPTS; 3988 udi_size += hlen; 3989 } 3990 if (((udp_bits.udpb_ipv6_recvdstopts) || 3991 udp_bits.udpb_old_ipv6_recvdstopts) && 3992 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3993 udi_size += sizeof (struct T_opthdr) + 3994 ipp.ipp_dstoptslen; 3995 UDP_STAT(us, udp_in_recvdstopts); 3996 } 3997 if ((((udp_bits.udpb_ipv6_recvdstopts) && 3998 udp_bits.udpb_ipv6_recvrthdr && 3999 (ipp.ipp_fields & IPPF_RTHDR)) || 4000 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4001 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4002 udi_size += sizeof (struct T_opthdr) + 4003 ipp.ipp_rtdstoptslen; 4004 UDP_STAT(us, udp_in_recvrtdstopts); 4005 } 4006 if ((udp_bits.udpb_ipv6_recvrthdr) && 4007 (ipp.ipp_fields & IPPF_RTHDR)) { 4008 udi_size += sizeof (struct T_opthdr) + 4009 ipp.ipp_rthdrlen; 4010 UDP_STAT(us, udp_in_recvrthdr); 4011 } 4012 if ((udp_bits.udpb_ip_recvpktinfo) && 4013 (ipp.ipp_fields & IPPF_IFINDEX)) { 4014 udi_size += sizeof (struct T_opthdr) + 4015 sizeof (struct in6_pktinfo); 4016 UDP_STAT(us, udp_in_recvpktinfo); 4017 } 4018 4019 } 4020 if ((udp_bits.udpb_recvucred) && 4021 (cr = msg_getcred(mp, &cpid)) != NULL) { 4022 udi_size += sizeof (struct T_opthdr) + ucredsize; 4023 UDP_STAT(us, udp_in_recvucred); 4024 } 4025 4026 /* 4027 * If SO_TIMESTAMP is set allocate the appropriate sized 4028 * buffer. Since gethrestime() expects a pointer aligned 4029 * argument, we allocate space necessary for extra 4030 * alignment (even though it might not be used). 4031 */ 4032 if (udp_bits.udpb_timestamp) { 4033 udi_size += sizeof (struct T_opthdr) + 4034 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4035 UDP_STAT(us, udp_in_timestamp); 4036 } 4037 4038 if (udp_bits.udpb_ipv6_recvhoplimit) { 4039 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4040 UDP_STAT(us, udp_in_recvhoplimit); 4041 } 4042 4043 if (udp_bits.udpb_ipv6_recvtclass) { 4044 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4045 UDP_STAT(us, udp_in_recvtclass); 4046 } 4047 4048 mp1 = allocb(udi_size, BPRI_MED); 4049 if (mp1 == NULL) { 4050 freemsg(mp); 4051 if (options_mp != NULL) 4052 freeb(options_mp); 4053 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4054 return; 4055 } 4056 mp1->b_cont = mp; 4057 mp = mp1; 4058 mp->b_datap->db_type = M_PROTO; 4059 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4060 mp->b_wptr = (uchar_t *)tudi + udi_size; 4061 tudi->PRIM_type = T_UNITDATA_IND; 4062 tudi->SRC_length = sizeof (sin6_t); 4063 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4064 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4065 sizeof (sin6_t); 4066 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4067 tudi->OPT_length = udi_size; 4068 sin6 = (sin6_t *)&tudi[1]; 4069 if (ipversion == IPV4_VERSION) { 4070 in6_addr_t v6dst; 4071 4072 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4073 &sin6->sin6_addr); 4074 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4075 &v6dst); 4076 sin6->sin6_flowinfo = 0; 4077 sin6->sin6_scope_id = 0; 4078 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4079 connp->conn_zoneid, us->us_netstack); 4080 } else { 4081 sin6->sin6_addr = ip6h->ip6_src; 4082 /* No sin6_flowinfo per API */ 4083 sin6->sin6_flowinfo = 0; 4084 /* For link-scope source pass up scope id */ 4085 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4086 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4087 sin6->sin6_scope_id = ipp.ipp_ifindex; 4088 else 4089 sin6->sin6_scope_id = 0; 4090 sin6->__sin6_src_id = ip_srcid_find_addr( 4091 &ip6h->ip6_dst, connp->conn_zoneid, 4092 us->us_netstack); 4093 } 4094 sin6->sin6_port = udpha->uha_src_port; 4095 sin6->sin6_family = udp->udp_family; 4096 4097 if (udi_size != 0) { 4098 uchar_t *dstopt; 4099 4100 dstopt = (uchar_t *)&sin6[1]; 4101 if ((udp_bits.udpb_ip_recvpktinfo) && 4102 (ipp.ipp_fields & IPPF_IFINDEX)) { 4103 struct T_opthdr *toh; 4104 struct in6_pktinfo *pkti; 4105 4106 toh = (struct T_opthdr *)dstopt; 4107 toh->level = IPPROTO_IPV6; 4108 toh->name = IPV6_PKTINFO; 4109 toh->len = sizeof (struct T_opthdr) + 4110 sizeof (*pkti); 4111 toh->status = 0; 4112 dstopt += sizeof (struct T_opthdr); 4113 pkti = (struct in6_pktinfo *)dstopt; 4114 if (ipversion == IPV6_VERSION) 4115 pkti->ipi6_addr = ip6h->ip6_dst; 4116 else 4117 IN6_IPADDR_TO_V4MAPPED( 4118 ((ipha_t *)rptr)->ipha_dst, 4119 &pkti->ipi6_addr); 4120 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4121 dstopt += sizeof (*pkti); 4122 udi_size -= toh->len; 4123 } 4124 if (udp_bits.udpb_ipv6_recvhoplimit) { 4125 struct T_opthdr *toh; 4126 4127 toh = (struct T_opthdr *)dstopt; 4128 toh->level = IPPROTO_IPV6; 4129 toh->name = IPV6_HOPLIMIT; 4130 toh->len = sizeof (struct T_opthdr) + 4131 sizeof (uint_t); 4132 toh->status = 0; 4133 dstopt += sizeof (struct T_opthdr); 4134 if (ipversion == IPV6_VERSION) 4135 *(uint_t *)dstopt = ip6h->ip6_hops; 4136 else 4137 *(uint_t *)dstopt = 4138 ((ipha_t *)rptr)->ipha_ttl; 4139 dstopt += sizeof (uint_t); 4140 udi_size -= toh->len; 4141 } 4142 if (udp_bits.udpb_ipv6_recvtclass) { 4143 struct T_opthdr *toh; 4144 4145 toh = (struct T_opthdr *)dstopt; 4146 toh->level = IPPROTO_IPV6; 4147 toh->name = IPV6_TCLASS; 4148 toh->len = sizeof (struct T_opthdr) + 4149 sizeof (uint_t); 4150 toh->status = 0; 4151 dstopt += sizeof (struct T_opthdr); 4152 if (ipversion == IPV6_VERSION) { 4153 *(uint_t *)dstopt = 4154 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4155 } else { 4156 ipha_t *ipha = (ipha_t *)rptr; 4157 *(uint_t *)dstopt = 4158 ipha->ipha_type_of_service; 4159 } 4160 dstopt += sizeof (uint_t); 4161 udi_size -= toh->len; 4162 } 4163 if ((udp_bits.udpb_ipv6_recvhopopts) && 4164 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4165 size_t hlen; 4166 4167 hlen = copy_hop_opts(&ipp, dstopt); 4168 dstopt += hlen; 4169 udi_size -= hlen; 4170 } 4171 if ((udp_bits.udpb_ipv6_recvdstopts) && 4172 (udp_bits.udpb_ipv6_recvrthdr) && 4173 (ipp.ipp_fields & IPPF_RTHDR) && 4174 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4175 struct T_opthdr *toh; 4176 4177 toh = (struct T_opthdr *)dstopt; 4178 toh->level = IPPROTO_IPV6; 4179 toh->name = IPV6_DSTOPTS; 4180 toh->len = sizeof (struct T_opthdr) + 4181 ipp.ipp_rtdstoptslen; 4182 toh->status = 0; 4183 dstopt += sizeof (struct T_opthdr); 4184 bcopy(ipp.ipp_rtdstopts, dstopt, 4185 ipp.ipp_rtdstoptslen); 4186 dstopt += ipp.ipp_rtdstoptslen; 4187 udi_size -= toh->len; 4188 } 4189 if ((udp_bits.udpb_ipv6_recvrthdr) && 4190 (ipp.ipp_fields & IPPF_RTHDR)) { 4191 struct T_opthdr *toh; 4192 4193 toh = (struct T_opthdr *)dstopt; 4194 toh->level = IPPROTO_IPV6; 4195 toh->name = IPV6_RTHDR; 4196 toh->len = sizeof (struct T_opthdr) + 4197 ipp.ipp_rthdrlen; 4198 toh->status = 0; 4199 dstopt += sizeof (struct T_opthdr); 4200 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4201 dstopt += ipp.ipp_rthdrlen; 4202 udi_size -= toh->len; 4203 } 4204 if ((udp_bits.udpb_ipv6_recvdstopts) && 4205 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4206 struct T_opthdr *toh; 4207 4208 toh = (struct T_opthdr *)dstopt; 4209 toh->level = IPPROTO_IPV6; 4210 toh->name = IPV6_DSTOPTS; 4211 toh->len = sizeof (struct T_opthdr) + 4212 ipp.ipp_dstoptslen; 4213 toh->status = 0; 4214 dstopt += sizeof (struct T_opthdr); 4215 bcopy(ipp.ipp_dstopts, dstopt, 4216 ipp.ipp_dstoptslen); 4217 dstopt += ipp.ipp_dstoptslen; 4218 udi_size -= toh->len; 4219 } 4220 if (cr != NULL) { 4221 struct T_opthdr *toh; 4222 4223 toh = (struct T_opthdr *)dstopt; 4224 toh->level = SOL_SOCKET; 4225 toh->name = SCM_UCRED; 4226 toh->len = sizeof (struct T_opthdr) + ucredsize; 4227 toh->status = 0; 4228 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4229 dstopt += toh->len; 4230 udi_size -= toh->len; 4231 } 4232 if (udp_bits.udpb_timestamp) { 4233 struct T_opthdr *toh; 4234 4235 toh = (struct T_opthdr *)dstopt; 4236 toh->level = SOL_SOCKET; 4237 toh->name = SCM_TIMESTAMP; 4238 toh->len = sizeof (struct T_opthdr) + 4239 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4240 toh->status = 0; 4241 dstopt += sizeof (struct T_opthdr); 4242 /* Align for gethrestime() */ 4243 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4244 sizeof (intptr_t)); 4245 gethrestime((timestruc_t *)dstopt); 4246 dstopt = (uchar_t *)toh + toh->len; 4247 udi_size -= toh->len; 4248 } 4249 4250 /* Consumed all of allocated space */ 4251 ASSERT(udi_size == 0); 4252 } 4253 #undef sin6 4254 /* No IP_RECVDSTADDR for IPv6. */ 4255 } 4256 4257 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4258 if (options_mp != NULL) 4259 freeb(options_mp); 4260 4261 udp_ulp_recv(connp, mp); 4262 4263 return; 4264 4265 tossit: 4266 freemsg(mp); 4267 if (options_mp != NULL) 4268 freeb(options_mp); 4269 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4270 } 4271 4272 /* 4273 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4274 * information that can be changing beneath us. 4275 */ 4276 mblk_t * 4277 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4278 { 4279 mblk_t *mpdata; 4280 mblk_t *mp_conn_ctl; 4281 mblk_t *mp_attr_ctl; 4282 mblk_t *mp6_conn_ctl; 4283 mblk_t *mp6_attr_ctl; 4284 mblk_t *mp_conn_tail; 4285 mblk_t *mp_attr_tail; 4286 mblk_t *mp6_conn_tail; 4287 mblk_t *mp6_attr_tail; 4288 struct opthdr *optp; 4289 mib2_udpEntry_t ude; 4290 mib2_udp6Entry_t ude6; 4291 mib2_transportMLPEntry_t mlp; 4292 int state; 4293 zoneid_t zoneid; 4294 int i; 4295 connf_t *connfp; 4296 conn_t *connp = Q_TO_CONN(q); 4297 int v4_conn_idx; 4298 int v6_conn_idx; 4299 boolean_t needattr; 4300 udp_t *udp; 4301 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4302 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4303 mblk_t *mp2ctl; 4304 4305 /* 4306 * make a copy of the original message 4307 */ 4308 mp2ctl = copymsg(mpctl); 4309 4310 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4311 if (mpctl == NULL || 4312 (mpdata = mpctl->b_cont) == NULL || 4313 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4314 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4315 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4316 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4317 freemsg(mp_conn_ctl); 4318 freemsg(mp_attr_ctl); 4319 freemsg(mp6_conn_ctl); 4320 freemsg(mpctl); 4321 freemsg(mp2ctl); 4322 return (0); 4323 } 4324 4325 zoneid = connp->conn_zoneid; 4326 4327 /* fixed length structure for IPv4 and IPv6 counters */ 4328 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4329 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4330 /* synchronize 64- and 32-bit counters */ 4331 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4332 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4333 4334 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4335 optp->level = MIB2_UDP; 4336 optp->name = 0; 4337 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4338 sizeof (us->us_udp_mib)); 4339 optp->len = msgdsize(mpdata); 4340 qreply(q, mpctl); 4341 4342 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4343 v4_conn_idx = v6_conn_idx = 0; 4344 4345 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4346 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4347 connp = NULL; 4348 4349 while ((connp = ipcl_get_next_conn(connfp, connp, 4350 IPCL_UDPCONN))) { 4351 udp = connp->conn_udp; 4352 if (zoneid != connp->conn_zoneid) 4353 continue; 4354 4355 /* 4356 * Note that the port numbers are sent in 4357 * host byte order 4358 */ 4359 4360 if (udp->udp_state == TS_UNBND) 4361 state = MIB2_UDP_unbound; 4362 else if (udp->udp_state == TS_IDLE) 4363 state = MIB2_UDP_idle; 4364 else if (udp->udp_state == TS_DATA_XFER) 4365 state = MIB2_UDP_connected; 4366 else 4367 state = MIB2_UDP_unknown; 4368 4369 needattr = B_FALSE; 4370 bzero(&mlp, sizeof (mlp)); 4371 if (connp->conn_mlp_type != mlptSingle) { 4372 if (connp->conn_mlp_type == mlptShared || 4373 connp->conn_mlp_type == mlptBoth) 4374 mlp.tme_flags |= MIB2_TMEF_SHARED; 4375 if (connp->conn_mlp_type == mlptPrivate || 4376 connp->conn_mlp_type == mlptBoth) 4377 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4378 needattr = B_TRUE; 4379 } 4380 if (connp->conn_anon_mlp) { 4381 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 4382 needattr = B_TRUE; 4383 } 4384 if (connp->conn_mac_exempt) { 4385 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 4386 needattr = B_TRUE; 4387 } 4388 4389 /* 4390 * Create an IPv4 table entry for IPv4 entries and also 4391 * any IPv6 entries which are bound to in6addr_any 4392 * (i.e. anything a IPv4 peer could connect/send to). 4393 */ 4394 if (udp->udp_ipversion == IPV4_VERSION || 4395 (udp->udp_state <= TS_IDLE && 4396 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4397 ude.udpEntryInfo.ue_state = state; 4398 /* 4399 * If in6addr_any this will set it to 4400 * INADDR_ANY 4401 */ 4402 ude.udpLocalAddress = 4403 V4_PART_OF_V6(udp->udp_v6src); 4404 ude.udpLocalPort = ntohs(udp->udp_port); 4405 if (udp->udp_state == TS_DATA_XFER) { 4406 /* 4407 * Can potentially get here for 4408 * v6 socket if another process 4409 * (say, ping) has just done a 4410 * sendto(), changing the state 4411 * from the TS_IDLE above to 4412 * TS_DATA_XFER by the time we hit 4413 * this part of the code. 4414 */ 4415 ude.udpEntryInfo.ue_RemoteAddress = 4416 V4_PART_OF_V6(udp->udp_v6dst); 4417 ude.udpEntryInfo.ue_RemotePort = 4418 ntohs(udp->udp_dstport); 4419 } else { 4420 ude.udpEntryInfo.ue_RemoteAddress = 0; 4421 ude.udpEntryInfo.ue_RemotePort = 0; 4422 } 4423 4424 /* 4425 * We make the assumption that all udp_t 4426 * structs will be created within an address 4427 * region no larger than 32-bits. 4428 */ 4429 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4430 ude.udpCreationProcess = 4431 (udp->udp_open_pid < 0) ? 4432 MIB2_UNKNOWN_PROCESS : 4433 udp->udp_open_pid; 4434 ude.udpCreationTime = udp->udp_open_time; 4435 4436 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4437 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4438 mlp.tme_connidx = v4_conn_idx++; 4439 if (needattr) 4440 (void) snmp_append_data2( 4441 mp_attr_ctl->b_cont, &mp_attr_tail, 4442 (char *)&mlp, sizeof (mlp)); 4443 } 4444 if (udp->udp_ipversion == IPV6_VERSION) { 4445 ude6.udp6EntryInfo.ue_state = state; 4446 ude6.udp6LocalAddress = udp->udp_v6src; 4447 ude6.udp6LocalPort = ntohs(udp->udp_port); 4448 ude6.udp6IfIndex = udp->udp_bound_if; 4449 if (udp->udp_state == TS_DATA_XFER) { 4450 ude6.udp6EntryInfo.ue_RemoteAddress = 4451 udp->udp_v6dst; 4452 ude6.udp6EntryInfo.ue_RemotePort = 4453 ntohs(udp->udp_dstport); 4454 } else { 4455 ude6.udp6EntryInfo.ue_RemoteAddress = 4456 sin6_null.sin6_addr; 4457 ude6.udp6EntryInfo.ue_RemotePort = 0; 4458 } 4459 /* 4460 * We make the assumption that all udp_t 4461 * structs will be created within an address 4462 * region no larger than 32-bits. 4463 */ 4464 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4465 ude6.udp6CreationProcess = 4466 (udp->udp_open_pid < 0) ? 4467 MIB2_UNKNOWN_PROCESS : 4468 udp->udp_open_pid; 4469 ude6.udp6CreationTime = udp->udp_open_time; 4470 4471 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4472 &mp6_conn_tail, (char *)&ude6, 4473 sizeof (ude6)); 4474 mlp.tme_connidx = v6_conn_idx++; 4475 if (needattr) 4476 (void) snmp_append_data2( 4477 mp6_attr_ctl->b_cont, 4478 &mp6_attr_tail, (char *)&mlp, 4479 sizeof (mlp)); 4480 } 4481 } 4482 } 4483 4484 /* IPv4 UDP endpoints */ 4485 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4486 sizeof (struct T_optmgmt_ack)]; 4487 optp->level = MIB2_UDP; 4488 optp->name = MIB2_UDP_ENTRY; 4489 optp->len = msgdsize(mp_conn_ctl->b_cont); 4490 qreply(q, mp_conn_ctl); 4491 4492 /* table of MLP attributes... */ 4493 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4494 sizeof (struct T_optmgmt_ack)]; 4495 optp->level = MIB2_UDP; 4496 optp->name = EXPER_XPORT_MLP; 4497 optp->len = msgdsize(mp_attr_ctl->b_cont); 4498 if (optp->len == 0) 4499 freemsg(mp_attr_ctl); 4500 else 4501 qreply(q, mp_attr_ctl); 4502 4503 /* IPv6 UDP endpoints */ 4504 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4505 sizeof (struct T_optmgmt_ack)]; 4506 optp->level = MIB2_UDP6; 4507 optp->name = MIB2_UDP6_ENTRY; 4508 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4509 qreply(q, mp6_conn_ctl); 4510 4511 /* table of MLP attributes... */ 4512 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4513 sizeof (struct T_optmgmt_ack)]; 4514 optp->level = MIB2_UDP6; 4515 optp->name = EXPER_XPORT_MLP; 4516 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4517 if (optp->len == 0) 4518 freemsg(mp6_attr_ctl); 4519 else 4520 qreply(q, mp6_attr_ctl); 4521 4522 return (mp2ctl); 4523 } 4524 4525 /* 4526 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4527 * NOTE: Per MIB-II, UDP has no writable data. 4528 * TODO: If this ever actually tries to set anything, it needs to be 4529 * to do the appropriate locking. 4530 */ 4531 /* ARGSUSED */ 4532 int 4533 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4534 uchar_t *ptr, int len) 4535 { 4536 switch (level) { 4537 case MIB2_UDP: 4538 return (0); 4539 default: 4540 return (1); 4541 } 4542 } 4543 4544 /* 4545 * This routine creates a T_UDERROR_IND message and passes it upstream. 4546 * The address and options are copied from the T_UNITDATA_REQ message 4547 * passed in mp. This message is freed. 4548 */ 4549 static void 4550 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4551 t_scalar_t err) 4552 { 4553 struct T_unitdata_req *tudr; 4554 mblk_t *mp1; 4555 uchar_t *optaddr; 4556 t_scalar_t optlen; 4557 4558 if (DB_TYPE(mp) == M_DATA) { 4559 ASSERT(destaddr != NULL && destlen != 0); 4560 optaddr = NULL; 4561 optlen = 0; 4562 } else { 4563 if ((mp->b_wptr < mp->b_rptr) || 4564 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4565 goto done; 4566 } 4567 tudr = (struct T_unitdata_req *)mp->b_rptr; 4568 destaddr = mp->b_rptr + tudr->DEST_offset; 4569 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4570 destaddr + tudr->DEST_length < mp->b_rptr || 4571 destaddr + tudr->DEST_length > mp->b_wptr) { 4572 goto done; 4573 } 4574 optaddr = mp->b_rptr + tudr->OPT_offset; 4575 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4576 optaddr + tudr->OPT_length < mp->b_rptr || 4577 optaddr + tudr->OPT_length > mp->b_wptr) { 4578 goto done; 4579 } 4580 destlen = tudr->DEST_length; 4581 optlen = tudr->OPT_length; 4582 } 4583 4584 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4585 (char *)optaddr, optlen, err); 4586 if (mp1 != NULL) 4587 qreply(q, mp1); 4588 4589 done: 4590 freemsg(mp); 4591 } 4592 4593 /* 4594 * This routine removes a port number association from a stream. It 4595 * is called by udp_wput to handle T_UNBIND_REQ messages. 4596 */ 4597 static void 4598 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4599 { 4600 conn_t *connp = Q_TO_CONN(q); 4601 int error; 4602 4603 error = udp_do_unbind(connp); 4604 if (error) { 4605 if (error < 0) 4606 udp_err_ack(q, mp, -error, 0); 4607 else 4608 udp_err_ack(q, mp, TSYSERR, error); 4609 return; 4610 } 4611 4612 mp = mi_tpi_ok_ack_alloc(mp); 4613 ASSERT(mp != NULL); 4614 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4615 qreply(q, mp); 4616 } 4617 4618 /* 4619 * Don't let port fall into the privileged range. 4620 * Since the extra privileged ports can be arbitrary we also 4621 * ensure that we exclude those from consideration. 4622 * us->us_epriv_ports is not sorted thus we loop over it until 4623 * there are no changes. 4624 */ 4625 static in_port_t 4626 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4627 { 4628 int i; 4629 in_port_t nextport; 4630 boolean_t restart = B_FALSE; 4631 udp_stack_t *us = udp->udp_us; 4632 4633 if (random && udp_random_anon_port != 0) { 4634 (void) random_get_pseudo_bytes((uint8_t *)&port, 4635 sizeof (in_port_t)); 4636 /* 4637 * Unless changed by a sys admin, the smallest anon port 4638 * is 32768 and the largest anon port is 65535. It is 4639 * very likely (50%) for the random port to be smaller 4640 * than the smallest anon port. When that happens, 4641 * add port % (anon port range) to the smallest anon 4642 * port to get the random port. It should fall into the 4643 * valid anon port range. 4644 */ 4645 if (port < us->us_smallest_anon_port) { 4646 port = us->us_smallest_anon_port + 4647 port % (us->us_largest_anon_port - 4648 us->us_smallest_anon_port); 4649 } 4650 } 4651 4652 retry: 4653 if (port < us->us_smallest_anon_port) 4654 port = us->us_smallest_anon_port; 4655 4656 if (port > us->us_largest_anon_port) { 4657 port = us->us_smallest_anon_port; 4658 if (restart) 4659 return (0); 4660 restart = B_TRUE; 4661 } 4662 4663 if (port < us->us_smallest_nonpriv_port) 4664 port = us->us_smallest_nonpriv_port; 4665 4666 for (i = 0; i < us->us_num_epriv_ports; i++) { 4667 if (port == us->us_epriv_ports[i]) { 4668 port++; 4669 /* 4670 * Make sure that the port is in the 4671 * valid range. 4672 */ 4673 goto retry; 4674 } 4675 } 4676 4677 if (is_system_labeled() && 4678 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4679 port, IPPROTO_UDP, B_TRUE)) != 0) { 4680 port = nextport; 4681 goto retry; 4682 } 4683 4684 return (port); 4685 } 4686 4687 static int 4688 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 4689 { 4690 int err; 4691 cred_t *cred; 4692 cred_t *orig_cred = NULL; 4693 cred_t *effective_cred = NULL; 4694 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4695 udp_t *udp = Q_TO_UDP(wq); 4696 udp_stack_t *us = udp->udp_us; 4697 4698 /* 4699 * All Solaris components should pass a db_credp 4700 * for this message, hence we ASSERT. 4701 * On production kernels we return an error to be robust against 4702 * random streams modules sitting on top of us. 4703 */ 4704 cred = orig_cred = msg_getcred(mp, NULL); 4705 ASSERT(cred != NULL); 4706 if (cred == NULL) 4707 return (EINVAL); 4708 4709 /* 4710 * Verify the destination is allowed to receive packets at 4711 * the security label of the message data. tsol_check_dest() 4712 * may create a new effective cred for this message with a 4713 * modified label or label flags. Note that we use the cred/label 4714 * from the message to handle MLP 4715 */ 4716 if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, 4717 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 4718 goto done; 4719 if (effective_cred != NULL) 4720 cred = effective_cred; 4721 4722 /* 4723 * Calculate the security label to be placed in the text 4724 * of the message (if any). 4725 */ 4726 if ((err = tsol_compute_label(cred, dst, opt_storage, 4727 us->us_netstack->netstack_ip)) != 0) 4728 goto done; 4729 4730 /* 4731 * Insert the security label in the cached ip options, 4732 * removing any old label that may exist. 4733 */ 4734 if ((err = tsol_update_options(&udp->udp_ip_snd_options, 4735 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4736 opt_storage)) != 0) 4737 goto done; 4738 4739 /* 4740 * Save the destination address and creds we used to 4741 * generate the security label text. 4742 */ 4743 if (cred != udp->udp_effective_cred) { 4744 if (udp->udp_effective_cred != NULL) 4745 crfree(udp->udp_effective_cred); 4746 crhold(cred); 4747 udp->udp_effective_cred = cred; 4748 } 4749 if (orig_cred != udp->udp_last_cred) { 4750 if (udp->udp_last_cred != NULL) 4751 crfree(udp->udp_last_cred); 4752 crhold(orig_cred); 4753 udp->udp_last_cred = orig_cred; 4754 } 4755 done: 4756 if (effective_cred != NULL) 4757 crfree(effective_cred); 4758 4759 if (err != 0) { 4760 DTRACE_PROBE4( 4761 tx__ip__log__info__updatelabel__udp, 4762 char *, "queue(1) failed to update options(2) on mp(3)", 4763 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4764 } 4765 return (err); 4766 } 4767 4768 static mblk_t * 4769 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4770 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4771 cred_t *cr, pid_t pid) 4772 { 4773 udp_t *udp = connp->conn_udp; 4774 mblk_t *mp1 = mp; 4775 mblk_t *mp2; 4776 ipha_t *ipha; 4777 int ip_hdr_length; 4778 uint32_t ip_len; 4779 udpha_t *udpha; 4780 boolean_t lock_held = B_FALSE; 4781 in_port_t uha_src_port; 4782 udpattrs_t attrs; 4783 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4784 uint32_t ip_snd_opt_len = 0; 4785 ip4_pkt_t pktinfo; 4786 ip4_pkt_t *pktinfop = &pktinfo; 4787 ip_opt_info_t optinfo; 4788 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4789 udp_stack_t *us = udp->udp_us; 4790 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4791 queue_t *q = connp->conn_wq; 4792 ire_t *ire; 4793 in6_addr_t v6dst; 4794 boolean_t update_lastdst = B_FALSE; 4795 4796 *error = 0; 4797 pktinfop->ip4_ill_index = 0; 4798 pktinfop->ip4_addr = INADDR_ANY; 4799 optinfo.ip_opt_flags = 0; 4800 optinfo.ip_opt_ill_index = 0; 4801 4802 if (v4dst == INADDR_ANY) 4803 v4dst = htonl(INADDR_LOOPBACK); 4804 4805 /* 4806 * If options passed in, feed it for verification and handling 4807 */ 4808 attrs.udpattr_credset = B_FALSE; 4809 if (IPCL_IS_NONSTR(connp)) { 4810 if (msg->msg_controllen != 0) { 4811 attrs.udpattr_ipp4 = pktinfop; 4812 attrs.udpattr_mb = mp; 4813 4814 rw_enter(&udp->udp_rwlock, RW_WRITER); 4815 *error = process_auxiliary_options(connp, 4816 msg->msg_control, msg->msg_controllen, 4817 &attrs, &udp_opt_obj, udp_opt_set, cr); 4818 rw_exit(&udp->udp_rwlock); 4819 if (*error) 4820 goto done; 4821 } 4822 } else { 4823 if (DB_TYPE(mp) != M_DATA) { 4824 mp1 = mp->b_cont; 4825 if (((struct T_unitdata_req *) 4826 mp->b_rptr)->OPT_length != 0) { 4827 attrs.udpattr_ipp4 = pktinfop; 4828 attrs.udpattr_mb = mp; 4829 if (udp_unitdata_opt_process(q, mp, error, 4830 &attrs) < 0) 4831 goto done; 4832 /* 4833 * Note: success in processing options. 4834 * mp option buffer represented by 4835 * OPT_length/offset now potentially modified 4836 * and contain option setting results 4837 */ 4838 ASSERT(*error == 0); 4839 } 4840 } 4841 } 4842 4843 /* mp1 points to the M_DATA mblk carrying the packet */ 4844 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4845 4846 /* 4847 * Determine whether we need to mark the mblk with the user's 4848 * credentials. 4849 * If labeled then sockfs would have already done this. 4850 */ 4851 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 4852 4853 ire = connp->conn_ire_cache; 4854 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 4855 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 4856 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 4857 mblk_setcred(mp, cr, pid); 4858 } 4859 4860 rw_enter(&udp->udp_rwlock, RW_READER); 4861 lock_held = B_TRUE; 4862 4863 /* 4864 * Cluster and TSOL note: 4865 * udp.udp_v6lastdst is shared by Cluster and TSOL 4866 * udp.udp_lastdstport is used by Cluster 4867 * 4868 * Both Cluster and TSOL need to update the dest addr and/or port. 4869 * Updating is done after both Cluster and TSOL checks, protected 4870 * by conn_lock. 4871 */ 4872 mutex_enter(&connp->conn_lock); 4873 4874 if (cl_inet_connect2 != NULL && 4875 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4876 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4877 udp->udp_lastdstport != port)) { 4878 mutex_exit(&connp->conn_lock); 4879 *error = 0; 4880 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 4881 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 4882 if (*error != 0) { 4883 *error = EHOSTUNREACH; 4884 goto done; 4885 } 4886 update_lastdst = B_TRUE; 4887 mutex_enter(&connp->conn_lock); 4888 } 4889 4890 /* 4891 * Check if our saved options are valid; update if not. 4892 * TSOL Note: Since we are not in WRITER mode, UDP packets 4893 * to different destination may require different labels, 4894 * or worse, UDP packets to same IP address may require 4895 * different labels due to use of shared all-zones address. 4896 * We use conn_lock to ensure that lastdst, ip_snd_options, 4897 * and ip_snd_options_len are consistent for the current 4898 * destination and are updated atomically. 4899 */ 4900 if (is_system_labeled()) { 4901 cred_t *credp; 4902 pid_t cpid; 4903 4904 /* Using UDP MLP requires SCM_UCRED from user */ 4905 if (connp->conn_mlp_type != mlptSingle && 4906 !attrs.udpattr_credset) { 4907 mutex_exit(&connp->conn_lock); 4908 DTRACE_PROBE4( 4909 tx__ip__log__info__output__udp, 4910 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 4911 mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q); 4912 *error = EINVAL; 4913 goto done; 4914 } 4915 /* 4916 * Update label option for this UDP socket if 4917 * - the destination has changed, 4918 * - the UDP socket is MLP, or 4919 * - the cred attached to the mblk changed. 4920 */ 4921 credp = msg_getcred(mp, &cpid); 4922 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4923 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4924 connp->conn_mlp_type != mlptSingle || 4925 credp != udp->udp_last_cred) { 4926 if ((*error = udp_update_label(q, mp, v4dst)) != 0) { 4927 mutex_exit(&connp->conn_lock); 4928 goto done; 4929 } 4930 update_lastdst = B_TRUE; 4931 } 4932 4933 /* 4934 * Attach the effective cred to the mblk to ensure future 4935 * routing decisions will be based on it's label. 4936 */ 4937 mblk_setcred(mp, udp->udp_effective_cred, cpid); 4938 } 4939 if (update_lastdst) { 4940 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 4941 udp->udp_lastdstport = port; 4942 } 4943 if (udp->udp_ip_snd_options_len > 0) { 4944 ip_snd_opt_len = udp->udp_ip_snd_options_len; 4945 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 4946 } 4947 mutex_exit(&connp->conn_lock); 4948 4949 /* Add an IP header */ 4950 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 4951 (insert_spi ? sizeof (uint32_t) : 0); 4952 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4953 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 4954 !OK_32PTR(ipha)) { 4955 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 4956 if (mp2 == NULL) { 4957 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 4958 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 4959 *error = ENOMEM; 4960 goto done; 4961 } 4962 mp2->b_wptr = DB_LIM(mp2); 4963 mp2->b_cont = mp1; 4964 mp1 = mp2; 4965 if (DB_TYPE(mp) != M_DATA) 4966 mp->b_cont = mp1; 4967 else 4968 mp = mp1; 4969 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 4970 } 4971 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 4972 #ifdef _BIG_ENDIAN 4973 /* Set version, header length, and tos */ 4974 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4975 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4976 udp->udp_type_of_service); 4977 /* Set ttl and protocol */ 4978 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 4979 #else 4980 /* Set version, header length, and tos */ 4981 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4982 ((udp->udp_type_of_service << 8) | 4983 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4984 /* Set ttl and protocol */ 4985 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 4986 #endif 4987 if (pktinfop->ip4_addr != INADDR_ANY) { 4988 ipha->ipha_src = pktinfop->ip4_addr; 4989 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4990 } else { 4991 /* 4992 * Copy our address into the packet. If this is zero, 4993 * first look at __sin6_src_id for a hint. If we leave the 4994 * source as INADDR_ANY then ip will fill in the real source 4995 * address. 4996 */ 4997 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 4998 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 4999 in6_addr_t v6src; 5000 5001 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5002 us->us_netstack); 5003 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5004 } 5005 } 5006 uha_src_port = udp->udp_port; 5007 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5008 rw_exit(&udp->udp_rwlock); 5009 lock_held = B_FALSE; 5010 } 5011 5012 if (pktinfop->ip4_ill_index != 0) { 5013 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5014 } 5015 5016 ipha->ipha_fragment_offset_and_flags = 0; 5017 ipha->ipha_ident = 0; 5018 5019 mp1->b_rptr = (uchar_t *)ipha; 5020 5021 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5022 (uintptr_t)UINT_MAX); 5023 5024 /* Determine length of packet */ 5025 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5026 if ((mp2 = mp1->b_cont) != NULL) { 5027 do { 5028 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5029 ip_len += (uint32_t)MBLKL(mp2); 5030 } while ((mp2 = mp2->b_cont) != NULL); 5031 } 5032 /* 5033 * If the size of the packet is greater than the maximum allowed by 5034 * ip, return an error. Passing this down could cause panics because 5035 * the size will have wrapped and be inconsistent with the msg size. 5036 */ 5037 if (ip_len > IP_MAXPACKET) { 5038 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5039 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5040 *error = EMSGSIZE; 5041 goto done; 5042 } 5043 ipha->ipha_length = htons((uint16_t)ip_len); 5044 ip_len -= ip_hdr_length; 5045 ip_len = htons((uint16_t)ip_len); 5046 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5047 5048 /* Insert all-0s SPI now. */ 5049 if (insert_spi) 5050 *((uint32_t *)(udpha + 1)) = 0; 5051 5052 /* 5053 * Copy in the destination address 5054 */ 5055 ipha->ipha_dst = v4dst; 5056 5057 /* 5058 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5059 */ 5060 if (CLASSD(v4dst)) 5061 ipha->ipha_ttl = udp->udp_multicast_ttl; 5062 5063 udpha->uha_dst_port = port; 5064 udpha->uha_src_port = uha_src_port; 5065 5066 if (ip_snd_opt_len > 0) { 5067 uint32_t cksum; 5068 5069 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5070 lock_held = B_FALSE; 5071 rw_exit(&udp->udp_rwlock); 5072 /* 5073 * Massage source route putting first source route in ipha_dst. 5074 * Ignore the destination in T_unitdata_req. 5075 * Create a checksum adjustment for a source route, if any. 5076 */ 5077 cksum = ip_massage_options(ipha, us->us_netstack); 5078 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5079 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5080 (ipha->ipha_dst & 0xFFFF); 5081 if ((int)cksum < 0) 5082 cksum--; 5083 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5084 /* 5085 * IP does the checksum if uha_checksum is non-zero, 5086 * We make it easy for IP to include our pseudo header 5087 * by putting our length in uha_checksum. 5088 */ 5089 cksum += ip_len; 5090 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5091 /* There might be a carry. */ 5092 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5093 #ifdef _LITTLE_ENDIAN 5094 if (us->us_do_checksum) 5095 ip_len = (cksum << 16) | ip_len; 5096 #else 5097 if (us->us_do_checksum) 5098 ip_len = (ip_len << 16) | cksum; 5099 else 5100 ip_len <<= 16; 5101 #endif 5102 } else { 5103 /* 5104 * IP does the checksum if uha_checksum is non-zero, 5105 * We make it easy for IP to include our pseudo header 5106 * by putting our length in uha_checksum. 5107 */ 5108 if (us->us_do_checksum) 5109 ip_len |= (ip_len << 16); 5110 #ifndef _LITTLE_ENDIAN 5111 else 5112 ip_len <<= 16; 5113 #endif 5114 } 5115 ASSERT(!lock_held); 5116 /* Set UDP length and checksum */ 5117 *((uint32_t *)&udpha->uha_length) = ip_len; 5118 5119 if (DB_TYPE(mp) != M_DATA) { 5120 cred_t *cr; 5121 pid_t cpid; 5122 5123 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5124 cr = msg_extractcred(mp, &cpid); 5125 if (cr != NULL) { 5126 if (mp1->b_datap->db_credp != NULL) 5127 crfree(mp1->b_datap->db_credp); 5128 mp1->b_datap->db_credp = cr; 5129 mp1->b_datap->db_cpid = cpid; 5130 } 5131 ASSERT(mp != mp1); 5132 freeb(mp); 5133 } 5134 5135 /* mp has been consumed and we'll return success */ 5136 ASSERT(*error == 0); 5137 mp = NULL; 5138 5139 /* We're done. Pass the packet to ip. */ 5140 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5141 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5142 "udp_wput_end: q %p (%S)", q, "end"); 5143 5144 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5145 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5146 connp->conn_dontroute || 5147 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5148 optinfo.ip_opt_ill_index != 0 || 5149 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5150 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5151 ipst->ips_ip_g_mrouter != NULL) { 5152 UDP_STAT(us, udp_ip_send); 5153 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5154 &optinfo); 5155 } else { 5156 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5157 } 5158 5159 done: 5160 if (lock_held) 5161 rw_exit(&udp->udp_rwlock); 5162 if (*error != 0) { 5163 ASSERT(mp != NULL); 5164 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5165 } 5166 return (mp); 5167 } 5168 5169 static void 5170 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5171 { 5172 conn_t *connp = udp->udp_connp; 5173 ipaddr_t src, dst; 5174 ire_t *ire; 5175 ipif_t *ipif = NULL; 5176 mblk_t *ire_fp_mp; 5177 boolean_t retry_caching; 5178 udp_stack_t *us = udp->udp_us; 5179 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5180 5181 dst = ipha->ipha_dst; 5182 src = ipha->ipha_src; 5183 ASSERT(ipha->ipha_ident == 0); 5184 5185 if (CLASSD(dst)) { 5186 int err; 5187 5188 ipif = conn_get_held_ipif(connp, 5189 &connp->conn_multicast_ipif, &err); 5190 5191 if (ipif == NULL || ipif->ipif_isv6 || 5192 (ipif->ipif_ill->ill_phyint->phyint_flags & 5193 PHYI_LOOPBACK)) { 5194 if (ipif != NULL) 5195 ipif_refrele(ipif); 5196 UDP_STAT(us, udp_ip_send); 5197 ip_output(connp, mp, q, IP_WPUT); 5198 return; 5199 } 5200 } 5201 5202 retry_caching = B_FALSE; 5203 mutex_enter(&connp->conn_lock); 5204 ire = connp->conn_ire_cache; 5205 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5206 5207 if (ire == NULL || ire->ire_addr != dst || 5208 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5209 retry_caching = B_TRUE; 5210 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5211 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5212 5213 ASSERT(ipif != NULL); 5214 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5215 retry_caching = B_TRUE; 5216 } 5217 5218 if (!retry_caching) { 5219 ASSERT(ire != NULL); 5220 IRE_REFHOLD(ire); 5221 mutex_exit(&connp->conn_lock); 5222 } else { 5223 boolean_t cached = B_FALSE; 5224 5225 connp->conn_ire_cache = NULL; 5226 mutex_exit(&connp->conn_lock); 5227 5228 /* Release the old ire */ 5229 if (ire != NULL) { 5230 IRE_REFRELE_NOTR(ire); 5231 ire = NULL; 5232 } 5233 5234 if (CLASSD(dst)) { 5235 ASSERT(ipif != NULL); 5236 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5237 connp->conn_zoneid, msg_getlabel(mp), 5238 MATCH_IRE_ILL, ipst); 5239 } else { 5240 ASSERT(ipif == NULL); 5241 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5242 msg_getlabel(mp), ipst); 5243 } 5244 5245 if (ire == NULL) { 5246 if (ipif != NULL) 5247 ipif_refrele(ipif); 5248 UDP_STAT(us, udp_ire_null); 5249 ip_output(connp, mp, q, IP_WPUT); 5250 return; 5251 } 5252 IRE_REFHOLD_NOTR(ire); 5253 5254 mutex_enter(&connp->conn_lock); 5255 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5256 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5257 irb_t *irb = ire->ire_bucket; 5258 5259 /* 5260 * IRE's created for non-connection oriented transports 5261 * are normally initialized with IRE_MARK_TEMPORARY set 5262 * in the ire_marks. These IRE's are preferentially 5263 * reaped when the hash chain length in the cache 5264 * bucket exceeds the maximum value specified in 5265 * ip[6]_ire_max_bucket_cnt. This can severely affect 5266 * UDP performance if IRE cache entries that we need 5267 * to reuse are continually removed. To remedy this, 5268 * when we cache the IRE in the conn_t, we remove the 5269 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5270 * set. 5271 */ 5272 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5273 rw_enter(&irb->irb_lock, RW_WRITER); 5274 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5275 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5276 irb->irb_tmp_ire_cnt--; 5277 } 5278 rw_exit(&irb->irb_lock); 5279 } 5280 connp->conn_ire_cache = ire; 5281 cached = B_TRUE; 5282 } 5283 mutex_exit(&connp->conn_lock); 5284 5285 /* 5286 * We can continue to use the ire but since it was not 5287 * cached, we should drop the extra reference. 5288 */ 5289 if (!cached) 5290 IRE_REFRELE_NOTR(ire); 5291 } 5292 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5293 ASSERT(!CLASSD(dst) || ipif != NULL); 5294 5295 /* 5296 * Check if we can take the fast-path. 5297 * Note that "incomplete" ire's (where the link-layer for next hop 5298 * is not resolved, or where the fast-path header in nce_fp_mp is not 5299 * available yet) are sent down the legacy (slow) path 5300 */ 5301 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5302 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5303 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5304 ((ire->ire_nce == NULL) || 5305 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5306 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5307 if (ipif != NULL) 5308 ipif_refrele(ipif); 5309 UDP_STAT(us, udp_ip_ire_send); 5310 IRE_REFRELE(ire); 5311 ip_output(connp, mp, q, IP_WPUT); 5312 return; 5313 } 5314 5315 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5316 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5317 ipha->ipha_src = ipif->ipif_src_addr; 5318 else 5319 ipha->ipha_src = ire->ire_src_addr; 5320 } 5321 5322 if (ipif != NULL) 5323 ipif_refrele(ipif); 5324 5325 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5326 } 5327 5328 static void 5329 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5330 { 5331 ipaddr_t src, dst; 5332 ill_t *ill; 5333 mblk_t *ire_fp_mp; 5334 uint_t ire_fp_mp_len; 5335 uint16_t *up; 5336 uint32_t cksum, hcksum_txflags; 5337 queue_t *dev_q; 5338 udp_t *udp = connp->conn_udp; 5339 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5340 udp_stack_t *us = udp->udp_us; 5341 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5342 boolean_t ll_multicast = B_FALSE; 5343 boolean_t direct_send; 5344 5345 dev_q = ire->ire_stq->q_next; 5346 ASSERT(dev_q != NULL); 5347 5348 ill = ire_to_ill(ire); 5349 ASSERT(ill != NULL); 5350 5351 /* 5352 * For the direct send case, if resetting of conn_direct_blocked 5353 * was missed, it is still ok because the putq() would enable 5354 * the queue and write service will drain it out. 5355 */ 5356 direct_send = ILL_DIRECT_CAPABLE(ill); 5357 5358 /* is queue flow controlled? */ 5359 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5360 DEV_Q_FLOW_BLOCKED(dev_q))) { 5361 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5362 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5363 if (ipst->ips_ip_output_queue) { 5364 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5365 (void) putq(connp->conn_wq, mp); 5366 } else { 5367 freemsg(mp); 5368 } 5369 ire_refrele(ire); 5370 return; 5371 } 5372 5373 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5374 ire_fp_mp_len = MBLKL(ire_fp_mp); 5375 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5376 5377 dst = ipha->ipha_dst; 5378 src = ipha->ipha_src; 5379 5380 5381 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5382 5383 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5384 #ifndef _BIG_ENDIAN 5385 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5386 #endif 5387 5388 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5389 ASSERT(ill->ill_hcksum_capab != NULL); 5390 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5391 } else { 5392 hcksum_txflags = 0; 5393 } 5394 5395 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5396 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5397 5398 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5399 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5400 if (*up != 0) { 5401 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5402 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5403 ntohs(ipha->ipha_length), cksum); 5404 5405 /* Software checksum? */ 5406 if (DB_CKSUMFLAGS(mp) == 0) { 5407 UDP_STAT(us, udp_out_sw_cksum); 5408 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5409 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5410 } 5411 } 5412 5413 if (!CLASSD(dst)) { 5414 ipha->ipha_fragment_offset_and_flags |= 5415 (uint32_t)htons(ire->ire_frag_flag); 5416 } 5417 5418 /* Calculate IP header checksum if hardware isn't capable */ 5419 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5420 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5421 ((uint16_t *)ipha)[4]); 5422 } 5423 5424 if (CLASSD(dst)) { 5425 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5426 ip_multicast_loopback(q, ill, mp, 5427 connp->conn_multicast_loop ? 0 : 5428 IP_FF_NO_MCAST_LOOP, zoneid); 5429 } 5430 5431 /* If multicast TTL is 0 then we are done */ 5432 if (ipha->ipha_ttl == 0) { 5433 freemsg(mp); 5434 ire_refrele(ire); 5435 return; 5436 } 5437 ll_multicast = B_TRUE; 5438 } 5439 5440 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5441 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5442 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5443 5444 UPDATE_OB_PKT_COUNT(ire); 5445 ire->ire_last_used_time = lbolt; 5446 5447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5448 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5449 ntohs(ipha->ipha_length)); 5450 5451 DTRACE_PROBE4(ip4__physical__out__start, 5452 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5453 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5454 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5455 ll_multicast, ipst); 5456 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5457 if (ipst->ips_ip4_observe.he_interested && mp != NULL) { 5458 zoneid_t szone; 5459 5460 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5461 ipst, ALL_ZONES); 5462 5463 /* 5464 * The IP observability hook expects b_rptr to be 5465 * where the IP header starts, so advance past the 5466 * link layer header. 5467 */ 5468 mp->b_rptr += ire_fp_mp_len; 5469 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5470 ALL_ZONES, ill, ipst); 5471 mp->b_rptr -= ire_fp_mp_len; 5472 } 5473 5474 if (mp == NULL) 5475 goto bail; 5476 5477 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5478 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5479 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5480 5481 if (direct_send) { 5482 uintptr_t cookie; 5483 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5484 5485 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5486 (uintptr_t)connp, 0); 5487 if (cookie != NULL) { 5488 idl_tx_list_t *idl_txl; 5489 5490 /* 5491 * Flow controlled. 5492 */ 5493 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5494 cookie, conn_t *, connp); 5495 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5496 mutex_enter(&idl_txl->txl_lock); 5497 /* 5498 * Check again after holding txl_lock to see if Tx 5499 * ring is still blocked and only then insert the 5500 * connp into the drain list. 5501 */ 5502 if (connp->conn_direct_blocked || 5503 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5504 cookie) == 0)) { 5505 mutex_exit(&idl_txl->txl_lock); 5506 goto bail; 5507 } 5508 if (idl_txl->txl_cookie != NULL && 5509 idl_txl->txl_cookie != cookie) { 5510 DTRACE_PROBE2(udp__xmit__collision, 5511 uintptr_t, cookie, 5512 uintptr_t, idl_txl->txl_cookie); 5513 UDP_STAT(us, udp_cookie_coll); 5514 } else { 5515 connp->conn_direct_blocked = B_TRUE; 5516 idl_txl->txl_cookie = cookie; 5517 conn_drain_insert(connp, idl_txl); 5518 DTRACE_PROBE1(udp__xmit__insert, 5519 conn_t *, connp); 5520 } 5521 mutex_exit(&idl_txl->txl_lock); 5522 } 5523 } else { 5524 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5525 putnext(ire->ire_stq, mp); 5526 } 5527 bail: 5528 IRE_REFRELE(ire); 5529 } 5530 5531 static boolean_t 5532 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 5533 { 5534 udp_t *udp = Q_TO_UDP(wq); 5535 int err; 5536 cred_t *cred; 5537 cred_t *orig_cred; 5538 cred_t *effective_cred = NULL; 5539 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5540 udp_stack_t *us = udp->udp_us; 5541 5542 /* 5543 * All Solaris components should pass a db_credp 5544 * for this message, hence we ASSERT. 5545 * On production kernels we return an error to be robust against 5546 * random streams modules sitting on top of us. 5547 */ 5548 cred = orig_cred = msg_getcred(mp, NULL); 5549 ASSERT(cred != NULL); 5550 if (cred == NULL) 5551 return (EINVAL); 5552 5553 /* 5554 * Verify the destination is allowed to receive packets at 5555 * the security label of the message data. tsol_check_dest() 5556 * may create a new effective cred for this message with a 5557 * modified label or label flags. Note that we use the 5558 * cred/label from the message to handle MLP. 5559 */ 5560 if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, 5561 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 5562 goto done; 5563 if (effective_cred != NULL) 5564 cred = effective_cred; 5565 5566 /* 5567 * Calculate the security label to be placed in the text 5568 * of the message (if any). 5569 */ 5570 if ((err = tsol_compute_label_v6(cred, dst, opt_storage, 5571 us->us_netstack->netstack_ip)) != 0) 5572 goto done; 5573 5574 /* 5575 * Insert the security label in the cached ip options, 5576 * removing any old label that may exist. 5577 */ 5578 if ((err = tsol_update_sticky(&udp->udp_sticky_ipp, 5579 &udp->udp_label_len_v6, opt_storage)) != 0) 5580 goto done; 5581 5582 /* 5583 * Save the destination address and cred we used to 5584 * generate the security label text. 5585 */ 5586 if (cred != udp->udp_effective_cred) { 5587 if (udp->udp_effective_cred != NULL) 5588 crfree(udp->udp_effective_cred); 5589 crhold(cred); 5590 udp->udp_effective_cred = cred; 5591 } 5592 if (orig_cred != udp->udp_last_cred) { 5593 if (udp->udp_last_cred != NULL) 5594 crfree(udp->udp_last_cred); 5595 crhold(orig_cred); 5596 udp->udp_last_cred = orig_cred; 5597 } 5598 5599 done: 5600 if (effective_cred != NULL) 5601 crfree(effective_cred); 5602 5603 if (err != 0) { 5604 DTRACE_PROBE4( 5605 tx__ip__log__drop__updatelabel__udp6, 5606 char *, "queue(1) failed to update options(2) on mp(3)", 5607 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5608 } 5609 return (err); 5610 } 5611 5612 static int 5613 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5614 pid_t pid) 5615 { 5616 udp_t *udp = connp->conn_udp; 5617 udp_stack_t *us = udp->udp_us; 5618 ipaddr_t v4dst; 5619 in_port_t dstport; 5620 boolean_t mapped_addr; 5621 struct sockaddr_storage ss; 5622 sin_t *sin; 5623 sin6_t *sin6; 5624 struct sockaddr *addr; 5625 socklen_t addrlen; 5626 int error; 5627 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5628 5629 /* M_DATA for connected socket */ 5630 5631 ASSERT(udp->udp_issocket); 5632 UDP_DBGSTAT(us, udp_data_conn); 5633 5634 mutex_enter(&connp->conn_lock); 5635 if (udp->udp_state != TS_DATA_XFER) { 5636 mutex_exit(&connp->conn_lock); 5637 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5638 UDP_STAT(us, udp_out_err_notconn); 5639 freemsg(mp); 5640 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5641 "udp_wput_end: connp %p (%S)", connp, 5642 "not-connected; address required"); 5643 return (EDESTADDRREQ); 5644 } 5645 5646 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5647 if (mapped_addr) 5648 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5649 5650 /* Initialize addr and addrlen as if they're passed in */ 5651 if (udp->udp_family == AF_INET) { 5652 sin = (sin_t *)&ss; 5653 sin->sin_family = AF_INET; 5654 dstport = sin->sin_port = udp->udp_dstport; 5655 ASSERT(mapped_addr); 5656 sin->sin_addr.s_addr = v4dst; 5657 addr = (struct sockaddr *)sin; 5658 addrlen = sizeof (*sin); 5659 } else { 5660 sin6 = (sin6_t *)&ss; 5661 sin6->sin6_family = AF_INET6; 5662 dstport = sin6->sin6_port = udp->udp_dstport; 5663 sin6->sin6_flowinfo = udp->udp_flowinfo; 5664 sin6->sin6_addr = udp->udp_v6dst; 5665 sin6->sin6_scope_id = 0; 5666 sin6->__sin6_src_id = 0; 5667 addr = (struct sockaddr *)sin6; 5668 addrlen = sizeof (*sin6); 5669 } 5670 mutex_exit(&connp->conn_lock); 5671 5672 if (mapped_addr) { 5673 /* 5674 * Handle both AF_INET and AF_INET6; the latter 5675 * for IPV4 mapped destination addresses. Note 5676 * here that both addr and addrlen point to the 5677 * corresponding struct depending on the address 5678 * family of the socket. 5679 */ 5680 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5681 insert_spi, msg, cr, pid); 5682 } else { 5683 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5684 } 5685 if (error == 0) { 5686 ASSERT(mp == NULL); 5687 return (0); 5688 } 5689 5690 UDP_STAT(us, udp_out_err_output); 5691 ASSERT(mp != NULL); 5692 if (IPCL_IS_NONSTR(connp)) { 5693 freemsg(mp); 5694 return (error); 5695 } else { 5696 /* mp is freed by the following routine */ 5697 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5698 (t_scalar_t)addrlen, (t_scalar_t)error); 5699 return (0); 5700 } 5701 } 5702 5703 /* ARGSUSED */ 5704 static int 5705 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5706 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5707 { 5708 5709 udp_t *udp = connp->conn_udp; 5710 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5711 int error = 0; 5712 sin6_t *sin6; 5713 sin_t *sin; 5714 uint_t srcid; 5715 uint16_t port; 5716 ipaddr_t v4dst; 5717 5718 5719 ASSERT(addr != NULL); 5720 5721 switch (udp->udp_family) { 5722 case AF_INET6: 5723 sin6 = (sin6_t *)addr; 5724 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5725 /* 5726 * Destination is a non-IPv4-compatible IPv6 address. 5727 * Send out an IPv6 format packet. 5728 */ 5729 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5730 pid); 5731 if (error != 0) 5732 goto ud_error; 5733 5734 return (0); 5735 } 5736 /* 5737 * If the local address is not zero or a mapped address 5738 * return an error. It would be possible to send an IPv4 5739 * packet but the response would never make it back to the 5740 * application since it is bound to a non-mapped address. 5741 */ 5742 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5743 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5744 error = EADDRNOTAVAIL; 5745 goto ud_error; 5746 } 5747 /* Send IPv4 packet without modifying udp_ipversion */ 5748 /* Extract port and ipaddr */ 5749 port = sin6->sin6_port; 5750 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5751 srcid = sin6->__sin6_src_id; 5752 break; 5753 5754 case AF_INET: 5755 sin = (sin_t *)addr; 5756 /* Extract port and ipaddr */ 5757 port = sin->sin_port; 5758 v4dst = sin->sin_addr.s_addr; 5759 srcid = 0; 5760 break; 5761 } 5762 5763 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5764 msg, cr, pid); 5765 5766 if (error == 0) { 5767 ASSERT(mp == NULL); 5768 return (0); 5769 } 5770 5771 ud_error: 5772 ASSERT(mp != NULL); 5773 5774 return (error); 5775 } 5776 5777 /* 5778 * This routine handles all messages passed downstream. It either 5779 * consumes the message or passes it downstream; it never queues a 5780 * a message. 5781 * 5782 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5783 * is valid when we are directly beneath the stream head, and thus sockfs 5784 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5785 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5786 * connected endpoints. 5787 */ 5788 void 5789 udp_wput(queue_t *q, mblk_t *mp) 5790 { 5791 conn_t *connp = Q_TO_CONN(q); 5792 udp_t *udp = connp->conn_udp; 5793 int error = 0; 5794 struct sockaddr *addr; 5795 socklen_t addrlen; 5796 udp_stack_t *us = udp->udp_us; 5797 5798 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5799 "udp_wput_start: queue %p mp %p", q, mp); 5800 5801 /* 5802 * We directly handle several cases here: T_UNITDATA_REQ message 5803 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5804 * socket. 5805 */ 5806 switch (DB_TYPE(mp)) { 5807 case M_DATA: 5808 /* 5809 * Quick check for error cases. Checks will be done again 5810 * under the lock later on 5811 */ 5812 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 5813 /* Not connected; address is required */ 5814 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5815 UDP_STAT(us, udp_out_err_notconn); 5816 freemsg(mp); 5817 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5818 "udp_wput_end: connp %p (%S)", connp, 5819 "not-connected; address required"); 5820 return; 5821 } 5822 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5823 return; 5824 5825 case M_PROTO: 5826 case M_PCPROTO: { 5827 struct T_unitdata_req *tudr; 5828 5829 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5830 tudr = (struct T_unitdata_req *)mp->b_rptr; 5831 5832 /* Handle valid T_UNITDATA_REQ here */ 5833 if (MBLKL(mp) >= sizeof (*tudr) && 5834 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5835 if (mp->b_cont == NULL) { 5836 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5837 "udp_wput_end: q %p (%S)", q, "badaddr"); 5838 error = EPROTO; 5839 goto ud_error; 5840 } 5841 5842 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5843 tudr->DEST_length)) { 5844 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5845 "udp_wput_end: q %p (%S)", q, "badaddr"); 5846 error = EADDRNOTAVAIL; 5847 goto ud_error; 5848 } 5849 /* 5850 * If a port has not been bound to the stream, fail. 5851 * This is not a problem when sockfs is directly 5852 * above us, because it will ensure that the socket 5853 * is first bound before allowing data to be sent. 5854 */ 5855 if (udp->udp_state == TS_UNBND) { 5856 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5857 "udp_wput_end: q %p (%S)", q, "outstate"); 5858 error = EPROTO; 5859 goto ud_error; 5860 } 5861 addr = (struct sockaddr *) 5862 &mp->b_rptr[tudr->DEST_offset]; 5863 addrlen = tudr->DEST_length; 5864 if (tudr->OPT_length != 0) 5865 UDP_STAT(us, udp_out_opt); 5866 break; 5867 } 5868 /* FALLTHRU */ 5869 } 5870 default: 5871 udp_wput_other(q, mp); 5872 return; 5873 } 5874 ASSERT(addr != NULL); 5875 5876 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5877 -1); 5878 if (error != 0) { 5879 ud_error: 5880 UDP_STAT(us, udp_out_err_output); 5881 ASSERT(mp != NULL); 5882 /* mp is freed by the following routine */ 5883 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5884 (t_scalar_t)error); 5885 } 5886 } 5887 5888 /* ARGSUSED */ 5889 static void 5890 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5891 { 5892 #ifdef DEBUG 5893 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5894 #endif 5895 freemsg(mp); 5896 } 5897 5898 5899 /* 5900 * udp_output_v6(): 5901 * Assumes that udp_wput did some sanity checking on the destination 5902 * address. 5903 */ 5904 static mblk_t * 5905 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5906 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5907 { 5908 ip6_t *ip6h; 5909 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5910 mblk_t *mp1 = mp; 5911 mblk_t *mp2; 5912 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5913 size_t ip_len; 5914 udpha_t *udph; 5915 udp_t *udp = connp->conn_udp; 5916 udp_stack_t *us = udp->udp_us; 5917 queue_t *q = connp->conn_wq; 5918 ip6_pkt_t ipp_s; /* For ancillary data options */ 5919 ip6_pkt_t *ipp = &ipp_s; 5920 ip6_pkt_t *tipp; /* temporary ipp */ 5921 uint32_t csum = 0; 5922 uint_t ignore = 0; 5923 uint_t option_exists = 0, is_sticky = 0; 5924 uint8_t *cp; 5925 uint8_t *nxthdr_ptr; 5926 in6_addr_t ip6_dst; 5927 in_port_t port; 5928 udpattrs_t attrs; 5929 boolean_t opt_present; 5930 ip6_hbh_t *hopoptsptr = NULL; 5931 uint_t hopoptslen = 0; 5932 boolean_t is_ancillary = B_FALSE; 5933 size_t sth_wroff = 0; 5934 ire_t *ire; 5935 boolean_t update_lastdst = B_FALSE; 5936 5937 *error = 0; 5938 5939 /* 5940 * If the local address is a mapped address return 5941 * an error. 5942 * It would be possible to send an IPv6 packet but the 5943 * response would never make it back to the application 5944 * since it is bound to a mapped address. 5945 */ 5946 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5947 *error = EADDRNOTAVAIL; 5948 goto done; 5949 } 5950 5951 ipp->ipp_fields = 0; 5952 ipp->ipp_sticky_ignored = 0; 5953 5954 /* 5955 * If TPI options passed in, feed it for verification and handling 5956 */ 5957 attrs.udpattr_credset = B_FALSE; 5958 opt_present = B_FALSE; 5959 if (IPCL_IS_NONSTR(connp)) { 5960 if (msg->msg_controllen != 0) { 5961 attrs.udpattr_ipp6 = ipp; 5962 attrs.udpattr_mb = mp; 5963 5964 rw_enter(&udp->udp_rwlock, RW_WRITER); 5965 *error = process_auxiliary_options(connp, 5966 msg->msg_control, msg->msg_controllen, 5967 &attrs, &udp_opt_obj, udp_opt_set, cr); 5968 rw_exit(&udp->udp_rwlock); 5969 if (*error) 5970 goto done; 5971 ASSERT(*error == 0); 5972 opt_present = B_TRUE; 5973 } 5974 } else { 5975 if (DB_TYPE(mp) != M_DATA) { 5976 mp1 = mp->b_cont; 5977 if (((struct T_unitdata_req *) 5978 mp->b_rptr)->OPT_length != 0) { 5979 attrs.udpattr_ipp6 = ipp; 5980 attrs.udpattr_mb = mp; 5981 if (udp_unitdata_opt_process(q, mp, error, 5982 &attrs) < 0) { 5983 goto done; 5984 } 5985 ASSERT(*error == 0); 5986 opt_present = B_TRUE; 5987 } 5988 } 5989 } 5990 5991 /* 5992 * Determine whether we need to mark the mblk with the user's 5993 * credentials. 5994 * If labeled then sockfs would have already done this. 5995 */ 5996 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5997 ire = connp->conn_ire_cache; 5998 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 5999 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6000 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6001 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6002 mblk_setcred(mp, cr, pid); 6003 } 6004 6005 rw_enter(&udp->udp_rwlock, RW_READER); 6006 ignore = ipp->ipp_sticky_ignored; 6007 6008 /* mp1 points to the M_DATA mblk carrying the packet */ 6009 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6010 6011 if (sin6->sin6_scope_id != 0 && 6012 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6013 /* 6014 * IPPF_SCOPE_ID is special. It's neither a sticky 6015 * option nor ancillary data. It needs to be 6016 * explicitly set in options_exists. 6017 */ 6018 option_exists |= IPPF_SCOPE_ID; 6019 } 6020 6021 /* 6022 * Compute the destination address 6023 */ 6024 ip6_dst = sin6->sin6_addr; 6025 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6026 ip6_dst = ipv6_loopback; 6027 6028 port = sin6->sin6_port; 6029 6030 /* 6031 * Cluster and TSOL notes, Cluster check: 6032 * see comments in udp_output_v4(). 6033 */ 6034 mutex_enter(&connp->conn_lock); 6035 6036 if (cl_inet_connect2 != NULL && 6037 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6038 port != udp->udp_lastdstport)) { 6039 mutex_exit(&connp->conn_lock); 6040 *error = 0; 6041 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6042 if (*error != 0) { 6043 *error = EHOSTUNREACH; 6044 rw_exit(&udp->udp_rwlock); 6045 goto done; 6046 } 6047 update_lastdst = B_TRUE; 6048 mutex_enter(&connp->conn_lock); 6049 } 6050 6051 /* 6052 * If we're not going to the same destination as last time, then 6053 * recompute the label required. This is done in a separate routine to 6054 * avoid blowing up our stack here. 6055 * 6056 * TSOL Note: Since we are not in WRITER mode, UDP packets 6057 * to different destination may require different labels, 6058 * or worse, UDP packets to same IP address may require 6059 * different labels due to use of shared all-zones address. 6060 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6061 * and sticky ipp_hopoptslen are consistent for the current 6062 * destination and are updated atomically. 6063 */ 6064 if (is_system_labeled()) { 6065 cred_t *credp; 6066 pid_t cpid; 6067 6068 /* Using UDP MLP requires SCM_UCRED from user */ 6069 if (connp->conn_mlp_type != mlptSingle && 6070 !attrs.udpattr_credset) { 6071 DTRACE_PROBE4( 6072 tx__ip__log__info__output__udp6, 6073 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6074 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6075 *error = EINVAL; 6076 rw_exit(&udp->udp_rwlock); 6077 mutex_exit(&connp->conn_lock); 6078 goto done; 6079 } 6080 /* 6081 * update label option for this UDP socket if 6082 * - the destination has changed, 6083 * - the UDP socket is MLP, or 6084 * - the cred attached to the mblk changed. 6085 */ 6086 credp = msg_getcred(mp, &cpid); 6087 if (opt_present || 6088 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6089 connp->conn_mlp_type != mlptSingle || 6090 credp != udp->udp_last_cred) { 6091 if ((*error = udp_update_label_v6(q, mp, &ip6_dst)) 6092 != 0) { 6093 rw_exit(&udp->udp_rwlock); 6094 mutex_exit(&connp->conn_lock); 6095 goto done; 6096 } 6097 update_lastdst = B_TRUE; 6098 } 6099 /* 6100 * Attach the effective cred to the mblk to ensure future 6101 * routing decisions will be based on it's label. 6102 */ 6103 mblk_setcred(mp, udp->udp_effective_cred, cpid); 6104 } 6105 6106 if (update_lastdst) { 6107 udp->udp_v6lastdst = ip6_dst; 6108 udp->udp_lastdstport = port; 6109 } 6110 6111 /* 6112 * If there's a security label here, then we ignore any options the 6113 * user may try to set. We keep the peer's label as a hidden sticky 6114 * option. We make a private copy of this label before releasing the 6115 * lock so that label is kept consistent with the destination addr. 6116 */ 6117 if (udp->udp_label_len_v6 > 0) { 6118 ignore &= ~IPPF_HOPOPTS; 6119 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6120 } 6121 6122 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6123 /* No sticky options nor ancillary data. */ 6124 mutex_exit(&connp->conn_lock); 6125 goto no_options; 6126 } 6127 6128 /* 6129 * Go through the options figuring out where each is going to 6130 * come from and build two masks. The first mask indicates if 6131 * the option exists at all. The second mask indicates if the 6132 * option is sticky or ancillary. 6133 */ 6134 if (!(ignore & IPPF_HOPOPTS)) { 6135 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6136 option_exists |= IPPF_HOPOPTS; 6137 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6138 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6139 option_exists |= IPPF_HOPOPTS; 6140 is_sticky |= IPPF_HOPOPTS; 6141 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6142 hopoptsptr = kmem_alloc( 6143 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6144 if (hopoptsptr == NULL) { 6145 *error = ENOMEM; 6146 mutex_exit(&connp->conn_lock); 6147 goto done; 6148 } 6149 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6150 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6151 hopoptslen); 6152 udp_ip_hdr_len += hopoptslen; 6153 } 6154 } 6155 mutex_exit(&connp->conn_lock); 6156 6157 if (!(ignore & IPPF_RTHDR)) { 6158 if (ipp->ipp_fields & IPPF_RTHDR) { 6159 option_exists |= IPPF_RTHDR; 6160 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6161 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6162 option_exists |= IPPF_RTHDR; 6163 is_sticky |= IPPF_RTHDR; 6164 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6165 } 6166 } 6167 6168 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6169 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6170 option_exists |= IPPF_RTDSTOPTS; 6171 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6172 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6173 option_exists |= IPPF_RTDSTOPTS; 6174 is_sticky |= IPPF_RTDSTOPTS; 6175 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6176 } 6177 } 6178 6179 if (!(ignore & IPPF_DSTOPTS)) { 6180 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6181 option_exists |= IPPF_DSTOPTS; 6182 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6183 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6184 option_exists |= IPPF_DSTOPTS; 6185 is_sticky |= IPPF_DSTOPTS; 6186 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6187 } 6188 } 6189 6190 if (!(ignore & IPPF_IFINDEX)) { 6191 if (ipp->ipp_fields & IPPF_IFINDEX) { 6192 option_exists |= IPPF_IFINDEX; 6193 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6194 option_exists |= IPPF_IFINDEX; 6195 is_sticky |= IPPF_IFINDEX; 6196 } 6197 } 6198 6199 if (!(ignore & IPPF_ADDR)) { 6200 if (ipp->ipp_fields & IPPF_ADDR) { 6201 option_exists |= IPPF_ADDR; 6202 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6203 option_exists |= IPPF_ADDR; 6204 is_sticky |= IPPF_ADDR; 6205 } 6206 } 6207 6208 if (!(ignore & IPPF_DONTFRAG)) { 6209 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6210 option_exists |= IPPF_DONTFRAG; 6211 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6212 option_exists |= IPPF_DONTFRAG; 6213 is_sticky |= IPPF_DONTFRAG; 6214 } 6215 } 6216 6217 if (!(ignore & IPPF_USE_MIN_MTU)) { 6218 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6219 option_exists |= IPPF_USE_MIN_MTU; 6220 } else if (udp->udp_sticky_ipp.ipp_fields & 6221 IPPF_USE_MIN_MTU) { 6222 option_exists |= IPPF_USE_MIN_MTU; 6223 is_sticky |= IPPF_USE_MIN_MTU; 6224 } 6225 } 6226 6227 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6228 option_exists |= IPPF_HOPLIMIT; 6229 /* IPV6_HOPLIMIT can never be sticky */ 6230 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6231 6232 if (!(ignore & IPPF_UNICAST_HOPS) && 6233 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6234 option_exists |= IPPF_UNICAST_HOPS; 6235 is_sticky |= IPPF_UNICAST_HOPS; 6236 } 6237 6238 if (!(ignore & IPPF_MULTICAST_HOPS) && 6239 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6240 option_exists |= IPPF_MULTICAST_HOPS; 6241 is_sticky |= IPPF_MULTICAST_HOPS; 6242 } 6243 6244 if (!(ignore & IPPF_TCLASS)) { 6245 if (ipp->ipp_fields & IPPF_TCLASS) { 6246 option_exists |= IPPF_TCLASS; 6247 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6248 option_exists |= IPPF_TCLASS; 6249 is_sticky |= IPPF_TCLASS; 6250 } 6251 } 6252 6253 if (!(ignore & IPPF_NEXTHOP) && 6254 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6255 option_exists |= IPPF_NEXTHOP; 6256 is_sticky |= IPPF_NEXTHOP; 6257 } 6258 6259 no_options: 6260 6261 /* 6262 * If any options carried in the ip6i_t were specified, we 6263 * need to account for the ip6i_t in the data we'll be sending 6264 * down. 6265 */ 6266 if (option_exists & IPPF_HAS_IP6I) 6267 udp_ip_hdr_len += sizeof (ip6i_t); 6268 6269 /* check/fix buffer config, setup pointers into it */ 6270 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6271 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6272 !OK_32PTR(ip6h)) { 6273 6274 /* Try to get everything in a single mblk next time */ 6275 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6276 udp->udp_max_hdr_len = udp_ip_hdr_len; 6277 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6278 } 6279 6280 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6281 if (mp2 == NULL) { 6282 *error = ENOMEM; 6283 rw_exit(&udp->udp_rwlock); 6284 goto done; 6285 } 6286 mp2->b_wptr = DB_LIM(mp2); 6287 mp2->b_cont = mp1; 6288 mp1 = mp2; 6289 if (DB_TYPE(mp) != M_DATA) 6290 mp->b_cont = mp1; 6291 else 6292 mp = mp1; 6293 6294 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6295 } 6296 mp1->b_rptr = (unsigned char *)ip6h; 6297 ip6i = (ip6i_t *)ip6h; 6298 6299 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6300 if (option_exists & IPPF_HAS_IP6I) { 6301 ip6h = (ip6_t *)&ip6i[1]; 6302 ip6i->ip6i_flags = 0; 6303 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6304 6305 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6306 if (option_exists & IPPF_SCOPE_ID) { 6307 ip6i->ip6i_flags |= IP6I_IFINDEX; 6308 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6309 } else if (option_exists & IPPF_IFINDEX) { 6310 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6311 ASSERT(tipp->ipp_ifindex != 0); 6312 ip6i->ip6i_flags |= IP6I_IFINDEX; 6313 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6314 } 6315 6316 if (option_exists & IPPF_ADDR) { 6317 /* 6318 * Enable per-packet source address verification if 6319 * IPV6_PKTINFO specified the source address. 6320 * ip6_src is set in the transport's _wput function. 6321 */ 6322 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6323 } 6324 6325 if (option_exists & IPPF_DONTFRAG) { 6326 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6327 } 6328 6329 if (option_exists & IPPF_USE_MIN_MTU) { 6330 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6331 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6332 } 6333 6334 if (option_exists & IPPF_NEXTHOP) { 6335 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6336 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6337 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6338 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6339 } 6340 6341 /* 6342 * tell IP this is an ip6i_t private header 6343 */ 6344 ip6i->ip6i_nxt = IPPROTO_RAW; 6345 } 6346 6347 /* Initialize IPv6 header */ 6348 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6349 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6350 6351 /* Set the hoplimit of the outgoing packet. */ 6352 if (option_exists & IPPF_HOPLIMIT) { 6353 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6354 ip6h->ip6_hops = ipp->ipp_hoplimit; 6355 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6356 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6357 ip6h->ip6_hops = udp->udp_multicast_ttl; 6358 if (option_exists & IPPF_MULTICAST_HOPS) 6359 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6360 } else { 6361 ip6h->ip6_hops = udp->udp_ttl; 6362 if (option_exists & IPPF_UNICAST_HOPS) 6363 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6364 } 6365 6366 if (option_exists & IPPF_ADDR) { 6367 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6368 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6369 ip6h->ip6_src = tipp->ipp_addr; 6370 } else { 6371 /* 6372 * The source address was not set using IPV6_PKTINFO. 6373 * First look at the bound source. 6374 * If unspecified fallback to __sin6_src_id. 6375 */ 6376 ip6h->ip6_src = udp->udp_v6src; 6377 if (sin6->__sin6_src_id != 0 && 6378 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6379 ip_srcid_find_id(sin6->__sin6_src_id, 6380 &ip6h->ip6_src, connp->conn_zoneid, 6381 us->us_netstack); 6382 } 6383 } 6384 6385 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6386 cp = (uint8_t *)&ip6h[1]; 6387 6388 /* 6389 * Here's where we have to start stringing together 6390 * any extension headers in the right order: 6391 * Hop-by-hop, destination, routing, and final destination opts. 6392 */ 6393 if (option_exists & IPPF_HOPOPTS) { 6394 /* Hop-by-hop options */ 6395 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6396 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6397 if (hopoptslen == 0) { 6398 hopoptsptr = tipp->ipp_hopopts; 6399 hopoptslen = tipp->ipp_hopoptslen; 6400 is_ancillary = B_TRUE; 6401 } 6402 6403 *nxthdr_ptr = IPPROTO_HOPOPTS; 6404 nxthdr_ptr = &hbh->ip6h_nxt; 6405 6406 bcopy(hopoptsptr, cp, hopoptslen); 6407 cp += hopoptslen; 6408 6409 if (hopoptsptr != NULL && !is_ancillary) { 6410 kmem_free(hopoptsptr, hopoptslen); 6411 hopoptsptr = NULL; 6412 hopoptslen = 0; 6413 } 6414 } 6415 /* 6416 * En-route destination options 6417 * Only do them if there's a routing header as well 6418 */ 6419 if (option_exists & IPPF_RTDSTOPTS) { 6420 ip6_dest_t *dst = (ip6_dest_t *)cp; 6421 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6422 6423 *nxthdr_ptr = IPPROTO_DSTOPTS; 6424 nxthdr_ptr = &dst->ip6d_nxt; 6425 6426 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6427 cp += tipp->ipp_rtdstoptslen; 6428 } 6429 /* 6430 * Routing header next 6431 */ 6432 if (option_exists & IPPF_RTHDR) { 6433 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6434 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6435 6436 *nxthdr_ptr = IPPROTO_ROUTING; 6437 nxthdr_ptr = &rt->ip6r_nxt; 6438 6439 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6440 cp += tipp->ipp_rthdrlen; 6441 } 6442 /* 6443 * Do ultimate destination options 6444 */ 6445 if (option_exists & IPPF_DSTOPTS) { 6446 ip6_dest_t *dest = (ip6_dest_t *)cp; 6447 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6448 6449 *nxthdr_ptr = IPPROTO_DSTOPTS; 6450 nxthdr_ptr = &dest->ip6d_nxt; 6451 6452 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6453 cp += tipp->ipp_dstoptslen; 6454 } 6455 /* 6456 * Now set the last header pointer to the proto passed in 6457 */ 6458 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6459 *nxthdr_ptr = IPPROTO_UDP; 6460 6461 /* Update UDP header */ 6462 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6463 udph->uha_dst_port = sin6->sin6_port; 6464 udph->uha_src_port = udp->udp_port; 6465 6466 /* 6467 * Copy in the destination address 6468 */ 6469 ip6h->ip6_dst = ip6_dst; 6470 6471 ip6h->ip6_vcf = 6472 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6473 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6474 6475 if (option_exists & IPPF_TCLASS) { 6476 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6477 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6478 tipp->ipp_tclass); 6479 } 6480 rw_exit(&udp->udp_rwlock); 6481 6482 if (option_exists & IPPF_RTHDR) { 6483 ip6_rthdr_t *rth; 6484 6485 /* 6486 * Perform any processing needed for source routing. 6487 * We know that all extension headers will be in the same mblk 6488 * as the IPv6 header. 6489 */ 6490 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6491 if (rth != NULL && rth->ip6r_segleft != 0) { 6492 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6493 /* 6494 * Drop packet - only support Type 0 routing. 6495 * Notify the application as well. 6496 */ 6497 *error = EPROTO; 6498 goto done; 6499 } 6500 6501 /* 6502 * rth->ip6r_len is twice the number of 6503 * addresses in the header. Thus it must be even. 6504 */ 6505 if (rth->ip6r_len & 0x1) { 6506 *error = EPROTO; 6507 goto done; 6508 } 6509 /* 6510 * Shuffle the routing header and ip6_dst 6511 * addresses, and get the checksum difference 6512 * between the first hop (in ip6_dst) and 6513 * the destination (in the last routing hdr entry). 6514 */ 6515 csum = ip_massage_options_v6(ip6h, rth, 6516 us->us_netstack); 6517 /* 6518 * Verify that the first hop isn't a mapped address. 6519 * Routers along the path need to do this verification 6520 * for subsequent hops. 6521 */ 6522 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6523 *error = EADDRNOTAVAIL; 6524 goto done; 6525 } 6526 6527 cp += (rth->ip6r_len + 1)*8; 6528 } 6529 } 6530 6531 /* count up length of UDP packet */ 6532 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6533 if ((mp2 = mp1->b_cont) != NULL) { 6534 do { 6535 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6536 ip_len += (uint32_t)MBLKL(mp2); 6537 } while ((mp2 = mp2->b_cont) != NULL); 6538 } 6539 6540 /* 6541 * If the size of the packet is greater than the maximum allowed by 6542 * ip, return an error. Passing this down could cause panics because 6543 * the size will have wrapped and be inconsistent with the msg size. 6544 */ 6545 if (ip_len > IP_MAXPACKET) { 6546 *error = EMSGSIZE; 6547 goto done; 6548 } 6549 6550 /* Store the UDP length. Subtract length of extension hdrs */ 6551 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6552 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6553 6554 /* 6555 * We make it easy for IP to include our pseudo header 6556 * by putting our length in uh_checksum, modified (if 6557 * we have a routing header) by the checksum difference 6558 * between the ultimate destination and first hop addresses. 6559 * Note: UDP over IPv6 must always checksum the packet. 6560 */ 6561 csum += udph->uha_length; 6562 csum = (csum & 0xFFFF) + (csum >> 16); 6563 udph->uha_checksum = (uint16_t)csum; 6564 6565 #ifdef _LITTLE_ENDIAN 6566 ip_len = htons(ip_len); 6567 #endif 6568 ip6h->ip6_plen = ip_len; 6569 6570 if (DB_TYPE(mp) != M_DATA) { 6571 cred_t *cr; 6572 pid_t cpid; 6573 6574 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6575 cr = msg_extractcred(mp, &cpid); 6576 if (cr != NULL) { 6577 if (mp1->b_datap->db_credp != NULL) 6578 crfree(mp1->b_datap->db_credp); 6579 mp1->b_datap->db_credp = cr; 6580 mp1->b_datap->db_cpid = cpid; 6581 } 6582 6583 ASSERT(mp != mp1); 6584 freeb(mp); 6585 } 6586 6587 /* mp has been consumed and we'll return success */ 6588 ASSERT(*error == 0); 6589 mp = NULL; 6590 6591 /* We're done. Pass the packet to IP */ 6592 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6593 ip_output_v6(connp, mp1, q, IP_WPUT); 6594 6595 done: 6596 if (sth_wroff != 0) { 6597 (void) proto_set_tx_wroff(RD(q), connp, 6598 udp->udp_max_hdr_len + us->us_wroff_extra); 6599 } 6600 if (hopoptsptr != NULL && !is_ancillary) { 6601 kmem_free(hopoptsptr, hopoptslen); 6602 hopoptsptr = NULL; 6603 } 6604 if (*error != 0) { 6605 ASSERT(mp != NULL); 6606 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6607 } 6608 return (mp); 6609 } 6610 6611 6612 static int 6613 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6614 { 6615 sin_t *sin = (sin_t *)sa; 6616 sin6_t *sin6 = (sin6_t *)sa; 6617 6618 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6619 6620 if (udp->udp_state != TS_DATA_XFER) 6621 return (ENOTCONN); 6622 6623 switch (udp->udp_family) { 6624 case AF_INET: 6625 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6626 6627 if (*salenp < sizeof (sin_t)) 6628 return (EINVAL); 6629 6630 *salenp = sizeof (sin_t); 6631 *sin = sin_null; 6632 sin->sin_family = AF_INET; 6633 sin->sin_port = udp->udp_dstport; 6634 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6635 break; 6636 6637 case AF_INET6: 6638 if (*salenp < sizeof (sin6_t)) 6639 return (EINVAL); 6640 6641 *salenp = sizeof (sin6_t); 6642 *sin6 = sin6_null; 6643 sin6->sin6_family = AF_INET6; 6644 sin6->sin6_port = udp->udp_dstport; 6645 sin6->sin6_addr = udp->udp_v6dst; 6646 sin6->sin6_flowinfo = udp->udp_flowinfo; 6647 break; 6648 } 6649 6650 return (0); 6651 } 6652 6653 static int 6654 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6655 { 6656 sin_t *sin = (sin_t *)sa; 6657 sin6_t *sin6 = (sin6_t *)sa; 6658 6659 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6660 6661 switch (udp->udp_family) { 6662 case AF_INET: 6663 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6664 6665 if (*salenp < sizeof (sin_t)) 6666 return (EINVAL); 6667 6668 *salenp = sizeof (sin_t); 6669 *sin = sin_null; 6670 sin->sin_family = AF_INET; 6671 sin->sin_port = udp->udp_port; 6672 6673 /* 6674 * If udp_v6src is unspecified, we might be bound to broadcast 6675 * / multicast. Use udp_bound_v6src as local address instead 6676 * (that could also still be unspecified). 6677 */ 6678 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6679 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6680 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6681 } else { 6682 sin->sin_addr.s_addr = 6683 V4_PART_OF_V6(udp->udp_bound_v6src); 6684 } 6685 break; 6686 6687 case AF_INET6: 6688 if (*salenp < sizeof (sin6_t)) 6689 return (EINVAL); 6690 6691 *salenp = sizeof (sin6_t); 6692 *sin6 = sin6_null; 6693 sin6->sin6_family = AF_INET6; 6694 sin6->sin6_port = udp->udp_port; 6695 sin6->sin6_flowinfo = udp->udp_flowinfo; 6696 6697 /* 6698 * If udp_v6src is unspecified, we might be bound to broadcast 6699 * / multicast. Use udp_bound_v6src as local address instead 6700 * (that could also still be unspecified). 6701 */ 6702 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6703 sin6->sin6_addr = udp->udp_v6src; 6704 else 6705 sin6->sin6_addr = udp->udp_bound_v6src; 6706 break; 6707 } 6708 6709 return (0); 6710 } 6711 6712 /* 6713 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6714 */ 6715 static void 6716 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6717 { 6718 void *data; 6719 mblk_t *datamp = mp->b_cont; 6720 udp_t *udp = Q_TO_UDP(q); 6721 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6722 6723 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6724 cmdp->cb_error = EPROTO; 6725 qreply(q, mp); 6726 return; 6727 } 6728 data = datamp->b_rptr; 6729 6730 rw_enter(&udp->udp_rwlock, RW_READER); 6731 switch (cmdp->cb_cmd) { 6732 case TI_GETPEERNAME: 6733 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6734 break; 6735 case TI_GETMYNAME: 6736 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6737 break; 6738 default: 6739 cmdp->cb_error = EINVAL; 6740 break; 6741 } 6742 rw_exit(&udp->udp_rwlock); 6743 6744 qreply(q, mp); 6745 } 6746 6747 static void 6748 udp_use_pure_tpi(udp_t *udp) 6749 { 6750 rw_enter(&udp->udp_rwlock, RW_WRITER); 6751 udp->udp_issocket = B_FALSE; 6752 rw_exit(&udp->udp_rwlock); 6753 6754 UDP_STAT(udp->udp_us, udp_sock_fallback); 6755 } 6756 6757 static void 6758 udp_wput_other(queue_t *q, mblk_t *mp) 6759 { 6760 uchar_t *rptr = mp->b_rptr; 6761 struct datab *db; 6762 struct iocblk *iocp; 6763 cred_t *cr; 6764 conn_t *connp = Q_TO_CONN(q); 6765 udp_t *udp = connp->conn_udp; 6766 udp_stack_t *us; 6767 6768 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6769 "udp_wput_other_start: q %p", q); 6770 6771 us = udp->udp_us; 6772 db = mp->b_datap; 6773 6774 switch (db->db_type) { 6775 case M_CMD: 6776 udp_wput_cmdblk(q, mp); 6777 return; 6778 6779 case M_PROTO: 6780 case M_PCPROTO: 6781 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6782 freemsg(mp); 6783 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6784 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6785 return; 6786 } 6787 switch (((t_primp_t)rptr)->type) { 6788 case T_ADDR_REQ: 6789 udp_addr_req(q, mp); 6790 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6791 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6792 return; 6793 case O_T_BIND_REQ: 6794 case T_BIND_REQ: 6795 udp_tpi_bind(q, mp); 6796 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6797 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6798 return; 6799 case T_CONN_REQ: 6800 udp_tpi_connect(q, mp); 6801 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6802 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6803 return; 6804 case T_CAPABILITY_REQ: 6805 udp_capability_req(q, mp); 6806 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6807 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6808 return; 6809 case T_INFO_REQ: 6810 udp_info_req(q, mp); 6811 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6812 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6813 return; 6814 case T_UNITDATA_REQ: 6815 /* 6816 * If a T_UNITDATA_REQ gets here, the address must 6817 * be bad. Valid T_UNITDATA_REQs are handled 6818 * in udp_wput. 6819 */ 6820 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6821 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6822 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6823 return; 6824 case T_UNBIND_REQ: 6825 udp_tpi_unbind(q, mp); 6826 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6827 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6828 return; 6829 case T_SVR4_OPTMGMT_REQ: 6830 /* 6831 * All Solaris components should pass a db_credp 6832 * for this TPI message, hence we ASSERT. 6833 * But in case there is some other M_PROTO that looks 6834 * like a TPI message sent by some other kernel 6835 * component, we check and return an error. 6836 */ 6837 cr = msg_getcred(mp, NULL); 6838 ASSERT(cr != NULL); 6839 if (cr == NULL) { 6840 udp_err_ack(q, mp, TSYSERR, EINVAL); 6841 return; 6842 } 6843 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6844 cr)) { 6845 (void) svr4_optcom_req(q, 6846 mp, cr, &udp_opt_obj, B_TRUE); 6847 } 6848 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6849 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6850 return; 6851 6852 case T_OPTMGMT_REQ: 6853 /* 6854 * All Solaris components should pass a db_credp 6855 * for this TPI message, hence we ASSERT. 6856 * But in case there is some other M_PROTO that looks 6857 * like a TPI message sent by some other kernel 6858 * component, we check and return an error. 6859 */ 6860 cr = msg_getcred(mp, NULL); 6861 ASSERT(cr != NULL); 6862 if (cr == NULL) { 6863 udp_err_ack(q, mp, TSYSERR, EINVAL); 6864 return; 6865 } 6866 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6867 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6868 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6869 return; 6870 6871 case T_DISCON_REQ: 6872 udp_tpi_disconnect(q, mp); 6873 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6874 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6875 return; 6876 6877 /* The following TPI message is not supported by udp. */ 6878 case O_T_CONN_RES: 6879 case T_CONN_RES: 6880 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6881 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6882 "udp_wput_other_end: q %p (%S)", q, 6883 "connres/disconreq"); 6884 return; 6885 6886 /* The following 3 TPI messages are illegal for udp. */ 6887 case T_DATA_REQ: 6888 case T_EXDATA_REQ: 6889 case T_ORDREL_REQ: 6890 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6891 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6892 "udp_wput_other_end: q %p (%S)", q, 6893 "data/exdata/ordrel"); 6894 return; 6895 default: 6896 break; 6897 } 6898 break; 6899 case M_FLUSH: 6900 if (*rptr & FLUSHW) 6901 flushq(q, FLUSHDATA); 6902 break; 6903 case M_IOCTL: 6904 iocp = (struct iocblk *)mp->b_rptr; 6905 switch (iocp->ioc_cmd) { 6906 case TI_GETPEERNAME: 6907 if (udp->udp_state != TS_DATA_XFER) { 6908 /* 6909 * If a default destination address has not 6910 * been associated with the stream, then we 6911 * don't know the peer's name. 6912 */ 6913 iocp->ioc_error = ENOTCONN; 6914 iocp->ioc_count = 0; 6915 mp->b_datap->db_type = M_IOCACK; 6916 qreply(q, mp); 6917 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6918 "udp_wput_other_end: q %p (%S)", q, 6919 "getpeername"); 6920 return; 6921 } 6922 /* FALLTHRU */ 6923 case TI_GETMYNAME: { 6924 /* 6925 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6926 * need to copyin the user's strbuf structure. 6927 * Processing will continue in the M_IOCDATA case 6928 * below. 6929 */ 6930 mi_copyin(q, mp, NULL, 6931 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6932 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6933 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6934 return; 6935 } 6936 case ND_SET: 6937 /* nd_getset performs the necessary checking */ 6938 case ND_GET: 6939 if (nd_getset(q, us->us_nd, mp)) { 6940 qreply(q, mp); 6941 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6942 "udp_wput_other_end: q %p (%S)", q, "get"); 6943 return; 6944 } 6945 break; 6946 case _SIOCSOCKFALLBACK: 6947 /* 6948 * Either sockmod is about to be popped and the 6949 * socket would now be treated as a plain stream, 6950 * or a module is about to be pushed so we have 6951 * to follow pure TPI semantics. 6952 */ 6953 if (!udp->udp_issocket) { 6954 DB_TYPE(mp) = M_IOCNAK; 6955 iocp->ioc_error = EINVAL; 6956 } else { 6957 udp_use_pure_tpi(udp); 6958 6959 DB_TYPE(mp) = M_IOCACK; 6960 iocp->ioc_error = 0; 6961 } 6962 iocp->ioc_count = 0; 6963 iocp->ioc_rval = 0; 6964 qreply(q, mp); 6965 return; 6966 default: 6967 break; 6968 } 6969 break; 6970 case M_IOCDATA: 6971 udp_wput_iocdata(q, mp); 6972 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6973 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 6974 return; 6975 default: 6976 /* Unrecognized messages are passed through without change. */ 6977 break; 6978 } 6979 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6980 "udp_wput_other_end: q %p (%S)", q, "end"); 6981 ip_output(connp, mp, q, IP_WPUT); 6982 } 6983 6984 /* 6985 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 6986 * messages. 6987 */ 6988 static void 6989 udp_wput_iocdata(queue_t *q, mblk_t *mp) 6990 { 6991 mblk_t *mp1; 6992 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 6993 STRUCT_HANDLE(strbuf, sb); 6994 udp_t *udp = Q_TO_UDP(q); 6995 int error; 6996 uint_t addrlen; 6997 6998 /* Make sure it is one of ours. */ 6999 switch (iocp->ioc_cmd) { 7000 case TI_GETMYNAME: 7001 case TI_GETPEERNAME: 7002 break; 7003 default: 7004 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7005 return; 7006 } 7007 7008 switch (mi_copy_state(q, mp, &mp1)) { 7009 case -1: 7010 return; 7011 case MI_COPY_CASE(MI_COPY_IN, 1): 7012 break; 7013 case MI_COPY_CASE(MI_COPY_OUT, 1): 7014 /* 7015 * The address has been copied out, so now 7016 * copyout the strbuf. 7017 */ 7018 mi_copyout(q, mp); 7019 return; 7020 case MI_COPY_CASE(MI_COPY_OUT, 2): 7021 /* 7022 * The address and strbuf have been copied out. 7023 * We're done, so just acknowledge the original 7024 * M_IOCTL. 7025 */ 7026 mi_copy_done(q, mp, 0); 7027 return; 7028 default: 7029 /* 7030 * Something strange has happened, so acknowledge 7031 * the original M_IOCTL with an EPROTO error. 7032 */ 7033 mi_copy_done(q, mp, EPROTO); 7034 return; 7035 } 7036 7037 /* 7038 * Now we have the strbuf structure for TI_GETMYNAME 7039 * and TI_GETPEERNAME. Next we copyout the requested 7040 * address and then we'll copyout the strbuf. 7041 */ 7042 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7043 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7044 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7045 mi_copy_done(q, mp, EINVAL); 7046 return; 7047 } 7048 7049 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7050 7051 if (mp1 == NULL) 7052 return; 7053 7054 rw_enter(&udp->udp_rwlock, RW_READER); 7055 switch (iocp->ioc_cmd) { 7056 case TI_GETMYNAME: 7057 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7058 break; 7059 case TI_GETPEERNAME: 7060 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7061 break; 7062 } 7063 rw_exit(&udp->udp_rwlock); 7064 7065 if (error != 0) { 7066 mi_copy_done(q, mp, error); 7067 } else { 7068 mp1->b_wptr += addrlen; 7069 STRUCT_FSET(sb, len, addrlen); 7070 7071 /* Copy out the address */ 7072 mi_copyout(q, mp); 7073 } 7074 } 7075 7076 static int 7077 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7078 udpattrs_t *udpattrs) 7079 { 7080 struct T_unitdata_req *udreqp; 7081 int is_absreq_failure; 7082 cred_t *cr; 7083 7084 ASSERT(((t_primp_t)mp->b_rptr)->type); 7085 7086 /* 7087 * All Solaris components should pass a db_credp 7088 * for this TPI message, hence we should ASSERT. 7089 * However, RPC (svc_clts_ksend) does this odd thing where it 7090 * passes the options from a T_UNITDATA_IND unchanged in a 7091 * T_UNITDATA_REQ. While that is the right thing to do for 7092 * some options, SCM_UCRED being the key one, this also makes it 7093 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7094 */ 7095 cr = msg_getcred(mp, NULL); 7096 if (cr == NULL) { 7097 cr = Q_TO_CONN(q)->conn_cred; 7098 } 7099 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7100 7101 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7102 udreqp->OPT_offset, cr, &udp_opt_obj, 7103 udpattrs, &is_absreq_failure); 7104 7105 if (*errorp != 0) { 7106 /* 7107 * Note: No special action needed in this 7108 * module for "is_absreq_failure" 7109 */ 7110 return (-1); /* failure */ 7111 } 7112 ASSERT(is_absreq_failure == 0); 7113 return (0); /* success */ 7114 } 7115 7116 void 7117 udp_ddi_g_init(void) 7118 { 7119 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7120 udp_opt_obj.odb_opt_arr_cnt); 7121 7122 /* 7123 * We want to be informed each time a stack is created or 7124 * destroyed in the kernel, so we can maintain the 7125 * set of udp_stack_t's. 7126 */ 7127 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7128 } 7129 7130 void 7131 udp_ddi_g_destroy(void) 7132 { 7133 netstack_unregister(NS_UDP); 7134 } 7135 7136 #define INET_NAME "ip" 7137 7138 /* 7139 * Initialize the UDP stack instance. 7140 */ 7141 static void * 7142 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7143 { 7144 udp_stack_t *us; 7145 udpparam_t *pa; 7146 int i; 7147 int error = 0; 7148 major_t major; 7149 7150 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7151 us->us_netstack = ns; 7152 7153 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7154 us->us_epriv_ports[0] = 2049; 7155 us->us_epriv_ports[1] = 4045; 7156 7157 /* 7158 * The smallest anonymous port in the priviledged port range which UDP 7159 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7160 */ 7161 us->us_min_anonpriv_port = 512; 7162 7163 us->us_bind_fanout_size = udp_bind_fanout_size; 7164 7165 /* Roundup variable that might have been modified in /etc/system */ 7166 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7167 /* Not a power of two. Round up to nearest power of two */ 7168 for (i = 0; i < 31; i++) { 7169 if (us->us_bind_fanout_size < (1 << i)) 7170 break; 7171 } 7172 us->us_bind_fanout_size = 1 << i; 7173 } 7174 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7175 sizeof (udp_fanout_t), KM_SLEEP); 7176 for (i = 0; i < us->us_bind_fanout_size; i++) { 7177 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7178 NULL); 7179 } 7180 7181 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7182 7183 us->us_param_arr = pa; 7184 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7185 7186 (void) udp_param_register(&us->us_nd, 7187 us->us_param_arr, A_CNT(udp_param_arr)); 7188 7189 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7190 us->us_mibkp = udp_kstat_init(stackid); 7191 7192 major = mod_name_to_major(INET_NAME); 7193 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7194 ASSERT(error == 0); 7195 return (us); 7196 } 7197 7198 /* 7199 * Free the UDP stack instance. 7200 */ 7201 static void 7202 udp_stack_fini(netstackid_t stackid, void *arg) 7203 { 7204 udp_stack_t *us = (udp_stack_t *)arg; 7205 int i; 7206 7207 for (i = 0; i < us->us_bind_fanout_size; i++) { 7208 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7209 } 7210 7211 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7212 sizeof (udp_fanout_t)); 7213 7214 us->us_bind_fanout = NULL; 7215 7216 nd_free(&us->us_nd); 7217 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7218 us->us_param_arr = NULL; 7219 7220 udp_kstat_fini(stackid, us->us_mibkp); 7221 us->us_mibkp = NULL; 7222 7223 udp_kstat2_fini(stackid, us->us_kstat); 7224 us->us_kstat = NULL; 7225 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7226 7227 ldi_ident_release(us->us_ldi_ident); 7228 kmem_free(us, sizeof (*us)); 7229 } 7230 7231 static void * 7232 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7233 { 7234 kstat_t *ksp; 7235 7236 udp_stat_t template = { 7237 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7238 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7239 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7240 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7241 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7242 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7243 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7244 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7245 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7246 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7247 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7248 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7249 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7250 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7251 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7252 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7253 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7254 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7255 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7256 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7257 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7258 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7259 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7260 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7261 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7262 #ifdef DEBUG 7263 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7264 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7265 #endif 7266 }; 7267 7268 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7269 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7270 KSTAT_FLAG_VIRTUAL, stackid); 7271 7272 if (ksp == NULL) 7273 return (NULL); 7274 7275 bcopy(&template, us_statisticsp, sizeof (template)); 7276 ksp->ks_data = (void *)us_statisticsp; 7277 ksp->ks_private = (void *)(uintptr_t)stackid; 7278 7279 kstat_install(ksp); 7280 return (ksp); 7281 } 7282 7283 static void 7284 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7285 { 7286 if (ksp != NULL) { 7287 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7288 kstat_delete_netstack(ksp, stackid); 7289 } 7290 } 7291 7292 static void * 7293 udp_kstat_init(netstackid_t stackid) 7294 { 7295 kstat_t *ksp; 7296 7297 udp_named_kstat_t template = { 7298 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7299 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7300 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7301 { "entrySize", KSTAT_DATA_INT32, 0 }, 7302 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7303 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7304 }; 7305 7306 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7307 KSTAT_TYPE_NAMED, 7308 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7309 7310 if (ksp == NULL || ksp->ks_data == NULL) 7311 return (NULL); 7312 7313 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7314 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7315 7316 bcopy(&template, ksp->ks_data, sizeof (template)); 7317 ksp->ks_update = udp_kstat_update; 7318 ksp->ks_private = (void *)(uintptr_t)stackid; 7319 7320 kstat_install(ksp); 7321 return (ksp); 7322 } 7323 7324 static void 7325 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7326 { 7327 if (ksp != NULL) { 7328 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7329 kstat_delete_netstack(ksp, stackid); 7330 } 7331 } 7332 7333 static int 7334 udp_kstat_update(kstat_t *kp, int rw) 7335 { 7336 udp_named_kstat_t *udpkp; 7337 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7338 netstack_t *ns; 7339 udp_stack_t *us; 7340 7341 if ((kp == NULL) || (kp->ks_data == NULL)) 7342 return (EIO); 7343 7344 if (rw == KSTAT_WRITE) 7345 return (EACCES); 7346 7347 ns = netstack_find_by_stackid(stackid); 7348 if (ns == NULL) 7349 return (-1); 7350 us = ns->netstack_udp; 7351 if (us == NULL) { 7352 netstack_rele(ns); 7353 return (-1); 7354 } 7355 udpkp = (udp_named_kstat_t *)kp->ks_data; 7356 7357 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7358 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7359 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7360 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7361 netstack_rele(ns); 7362 return (0); 7363 } 7364 7365 static size_t 7366 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7367 { 7368 udp_stack_t *us = udp->udp_us; 7369 7370 /* We add a bit of extra buffering */ 7371 size += size >> 1; 7372 if (size > us->us_max_buf) 7373 size = us->us_max_buf; 7374 7375 udp->udp_rcv_hiwat = size; 7376 return (size); 7377 } 7378 7379 /* 7380 * For the lower queue so that UDP can be a dummy mux. 7381 * Nobody should be sending 7382 * packets up this stream 7383 */ 7384 static void 7385 udp_lrput(queue_t *q, mblk_t *mp) 7386 { 7387 mblk_t *mp1; 7388 7389 switch (mp->b_datap->db_type) { 7390 case M_FLUSH: 7391 /* Turn around */ 7392 if (*mp->b_rptr & FLUSHW) { 7393 *mp->b_rptr &= ~FLUSHR; 7394 qreply(q, mp); 7395 return; 7396 } 7397 break; 7398 } 7399 /* Could receive messages that passed through ar_rput */ 7400 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7401 mp1->b_prev = mp1->b_next = NULL; 7402 freemsg(mp); 7403 } 7404 7405 /* 7406 * For the lower queue so that UDP can be a dummy mux. 7407 * Nobody should be sending packets down this stream. 7408 */ 7409 /* ARGSUSED */ 7410 void 7411 udp_lwput(queue_t *q, mblk_t *mp) 7412 { 7413 freemsg(mp); 7414 } 7415 7416 /* 7417 * Below routines for UDP socket module. 7418 */ 7419 7420 static conn_t * 7421 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7422 { 7423 udp_t *udp; 7424 conn_t *connp; 7425 zoneid_t zoneid; 7426 netstack_t *ns; 7427 udp_stack_t *us; 7428 7429 ns = netstack_find_by_cred(credp); 7430 ASSERT(ns != NULL); 7431 us = ns->netstack_udp; 7432 ASSERT(us != NULL); 7433 7434 /* 7435 * For exclusive stacks we set the zoneid to zero 7436 * to make UDP operate as if in the global zone. 7437 */ 7438 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7439 zoneid = GLOBAL_ZONEID; 7440 else 7441 zoneid = crgetzoneid(credp); 7442 7443 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7444 7445 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7446 if (connp == NULL) { 7447 netstack_rele(ns); 7448 return (NULL); 7449 } 7450 udp = connp->conn_udp; 7451 7452 /* 7453 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7454 * done by netstack_find_by_cred() 7455 */ 7456 netstack_rele(ns); 7457 7458 rw_enter(&udp->udp_rwlock, RW_WRITER); 7459 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7460 ASSERT(connp->conn_udp == udp); 7461 ASSERT(udp->udp_connp == connp); 7462 7463 /* Set the initial state of the stream and the privilege status. */ 7464 udp->udp_state = TS_UNBND; 7465 if (isv6) { 7466 udp->udp_family = AF_INET6; 7467 udp->udp_ipversion = IPV6_VERSION; 7468 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7469 udp->udp_ttl = us->us_ipv6_hoplimit; 7470 connp->conn_af_isv6 = B_TRUE; 7471 } else { 7472 udp->udp_family = AF_INET; 7473 udp->udp_ipversion = IPV4_VERSION; 7474 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7475 udp->udp_ttl = us->us_ipv4_ttl; 7476 connp->conn_af_isv6 = B_FALSE; 7477 } 7478 7479 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7480 udp->udp_pending_op = -1; 7481 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7482 connp->conn_zoneid = zoneid; 7483 7484 udp->udp_open_time = lbolt64; 7485 udp->udp_open_pid = curproc->p_pid; 7486 7487 /* 7488 * If the caller has the process-wide flag set, then default to MAC 7489 * exempt mode. This allows read-down to unlabeled hosts. 7490 */ 7491 if (getpflags(NET_MAC_AWARE, credp) != 0) 7492 connp->conn_mac_exempt = B_TRUE; 7493 7494 connp->conn_ulp_labeled = is_system_labeled(); 7495 7496 udp->udp_us = us; 7497 7498 connp->conn_recv = udp_input; 7499 crhold(credp); 7500 connp->conn_cred = credp; 7501 7502 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7503 7504 rw_exit(&udp->udp_rwlock); 7505 7506 return (connp); 7507 } 7508 7509 /* ARGSUSED */ 7510 sock_lower_handle_t 7511 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7512 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7513 { 7514 udp_t *udp = NULL; 7515 udp_stack_t *us; 7516 conn_t *connp; 7517 boolean_t isv6; 7518 7519 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7520 (proto != 0 && proto != IPPROTO_UDP)) { 7521 *errorp = EPROTONOSUPPORT; 7522 return (NULL); 7523 } 7524 7525 if (family == AF_INET6) 7526 isv6 = B_TRUE; 7527 else 7528 isv6 = B_FALSE; 7529 7530 connp = udp_do_open(credp, isv6, flags); 7531 if (connp == NULL) { 7532 *errorp = ENOMEM; 7533 return (NULL); 7534 } 7535 7536 udp = connp->conn_udp; 7537 ASSERT(udp != NULL); 7538 us = udp->udp_us; 7539 ASSERT(us != NULL); 7540 7541 udp->udp_issocket = B_TRUE; 7542 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7543 7544 /* Set flow control */ 7545 rw_enter(&udp->udp_rwlock, RW_WRITER); 7546 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7547 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7548 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7549 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7550 udp->udp_xmit_lowat = us->us_xmit_lowat; 7551 7552 if (udp->udp_family == AF_INET6) { 7553 /* Build initial header template for transmit */ 7554 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7555 rw_exit(&udp->udp_rwlock); 7556 ipcl_conn_destroy(connp); 7557 return (NULL); 7558 } 7559 } 7560 rw_exit(&udp->udp_rwlock); 7561 7562 connp->conn_flow_cntrld = B_FALSE; 7563 7564 ASSERT(us->us_ldi_ident != NULL); 7565 7566 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7567 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7568 udp_do_close(connp); 7569 return (NULL); 7570 } 7571 7572 /* Set the send flow control */ 7573 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7574 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7575 7576 mutex_enter(&connp->conn_lock); 7577 connp->conn_state_flags &= ~CONN_INCIPIENT; 7578 mutex_exit(&connp->conn_lock); 7579 7580 *errorp = 0; 7581 *smodep = SM_ATOMIC; 7582 *sock_downcalls = &sock_udp_downcalls; 7583 return ((sock_lower_handle_t)connp); 7584 } 7585 7586 /* ARGSUSED */ 7587 void 7588 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7589 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7590 { 7591 conn_t *connp = (conn_t *)proto_handle; 7592 udp_t *udp = connp->conn_udp; 7593 udp_stack_t *us = udp->udp_us; 7594 struct sock_proto_props sopp; 7595 7596 /* All Solaris components should pass a cred for this operation. */ 7597 ASSERT(cr != NULL); 7598 7599 connp->conn_upcalls = sock_upcalls; 7600 connp->conn_upper_handle = sock_handle; 7601 7602 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7603 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7604 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7605 sopp.sopp_maxblk = INFPSZ; 7606 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7607 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7608 sopp.sopp_maxpsz = 7609 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7610 UDP_MAXPACKET_IPV6; 7611 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7612 udp_mod_info.mi_minpsz; 7613 7614 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7615 &sopp); 7616 } 7617 7618 static void 7619 udp_do_close(conn_t *connp) 7620 { 7621 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7622 7623 udp_quiesce_conn(connp); 7624 ip_quiesce_conn(connp); 7625 7626 if (!IPCL_IS_NONSTR(connp)) { 7627 ASSERT(connp->conn_wq != NULL); 7628 ASSERT(connp->conn_rq != NULL); 7629 qprocsoff(connp->conn_rq); 7630 } 7631 7632 udp_close_free(connp); 7633 7634 /* 7635 * Now we are truly single threaded on this stream, and can 7636 * delete the things hanging off the connp, and finally the connp. 7637 * We removed this connp from the fanout list, it cannot be 7638 * accessed thru the fanouts, and we already waited for the 7639 * conn_ref to drop to 0. We are already in close, so 7640 * there cannot be any other thread from the top. qprocsoff 7641 * has completed, and service has completed or won't run in 7642 * future. 7643 */ 7644 ASSERT(connp->conn_ref == 1); 7645 if (!IPCL_IS_NONSTR(connp)) { 7646 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7647 } else { 7648 ip_free_helper_stream(connp); 7649 } 7650 7651 connp->conn_ref--; 7652 ipcl_conn_destroy(connp); 7653 } 7654 7655 /* ARGSUSED */ 7656 int 7657 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7658 { 7659 conn_t *connp = (conn_t *)proto_handle; 7660 7661 /* All Solaris components should pass a cred for this operation. */ 7662 ASSERT(cr != NULL); 7663 7664 udp_do_close(connp); 7665 return (0); 7666 } 7667 7668 static int 7669 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7670 boolean_t bind_to_req_port_only) 7671 { 7672 sin_t *sin; 7673 sin6_t *sin6; 7674 sin6_t sin6addr; 7675 in_port_t port; /* Host byte order */ 7676 in_port_t requested_port; /* Host byte order */ 7677 int count; 7678 in6_addr_t v6src; 7679 int loopmax; 7680 udp_fanout_t *udpf; 7681 in_port_t lport; /* Network byte order */ 7682 zoneid_t zoneid; 7683 udp_t *udp; 7684 boolean_t is_inaddr_any; 7685 mlp_type_t addrtype, mlptype; 7686 udp_stack_t *us; 7687 int error = 0; 7688 mblk_t *mp = NULL; 7689 7690 udp = connp->conn_udp; 7691 us = udp->udp_us; 7692 7693 if (udp->udp_state != TS_UNBND) { 7694 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7695 "udp_bind: bad state, %u", udp->udp_state); 7696 return (-TOUTSTATE); 7697 } 7698 7699 switch (len) { 7700 case 0: 7701 if (udp->udp_family == AF_INET) { 7702 sin = (sin_t *)&sin6addr; 7703 *sin = sin_null; 7704 sin->sin_family = AF_INET; 7705 sin->sin_addr.s_addr = INADDR_ANY; 7706 udp->udp_ipversion = IPV4_VERSION; 7707 } else { 7708 ASSERT(udp->udp_family == AF_INET6); 7709 sin6 = (sin6_t *)&sin6addr; 7710 *sin6 = sin6_null; 7711 sin6->sin6_family = AF_INET6; 7712 V6_SET_ZERO(sin6->sin6_addr); 7713 udp->udp_ipversion = IPV6_VERSION; 7714 } 7715 port = 0; 7716 break; 7717 7718 case sizeof (sin_t): /* Complete IPv4 address */ 7719 sin = (sin_t *)sa; 7720 7721 if (sin == NULL || !OK_32PTR((char *)sin)) 7722 return (EINVAL); 7723 7724 if (udp->udp_family != AF_INET || 7725 sin->sin_family != AF_INET) { 7726 return (EAFNOSUPPORT); 7727 } 7728 port = ntohs(sin->sin_port); 7729 break; 7730 7731 case sizeof (sin6_t): /* complete IPv6 address */ 7732 sin6 = (sin6_t *)sa; 7733 7734 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 7735 return (EINVAL); 7736 7737 if (udp->udp_family != AF_INET6 || 7738 sin6->sin6_family != AF_INET6) { 7739 return (EAFNOSUPPORT); 7740 } 7741 port = ntohs(sin6->sin6_port); 7742 break; 7743 7744 default: /* Invalid request */ 7745 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7746 "udp_bind: bad ADDR_length length %u", len); 7747 return (-TBADADDR); 7748 } 7749 7750 requested_port = port; 7751 7752 if (requested_port == 0 || !bind_to_req_port_only) 7753 bind_to_req_port_only = B_FALSE; 7754 else /* T_BIND_REQ and requested_port != 0 */ 7755 bind_to_req_port_only = B_TRUE; 7756 7757 if (requested_port == 0) { 7758 /* 7759 * If the application passed in zero for the port number, it 7760 * doesn't care which port number we bind to. Get one in the 7761 * valid range. 7762 */ 7763 if (udp->udp_anon_priv_bind) { 7764 port = udp_get_next_priv_port(udp); 7765 } else { 7766 port = udp_update_next_port(udp, 7767 us->us_next_port_to_try, B_TRUE); 7768 } 7769 } else { 7770 /* 7771 * If the port is in the well-known privileged range, 7772 * make sure the caller was privileged. 7773 */ 7774 int i; 7775 boolean_t priv = B_FALSE; 7776 7777 if (port < us->us_smallest_nonpriv_port) { 7778 priv = B_TRUE; 7779 } else { 7780 for (i = 0; i < us->us_num_epriv_ports; i++) { 7781 if (port == us->us_epriv_ports[i]) { 7782 priv = B_TRUE; 7783 break; 7784 } 7785 } 7786 } 7787 7788 if (priv) { 7789 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 7790 return (-TACCES); 7791 } 7792 } 7793 7794 if (port == 0) 7795 return (-TNOADDR); 7796 7797 /* 7798 * The state must be TS_UNBND. TPI mandates that users must send 7799 * TPI primitives only 1 at a time and wait for the response before 7800 * sending the next primitive. 7801 */ 7802 rw_enter(&udp->udp_rwlock, RW_WRITER); 7803 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 7804 rw_exit(&udp->udp_rwlock); 7805 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7806 "udp_bind: bad state, %u", udp->udp_state); 7807 return (-TOUTSTATE); 7808 } 7809 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 7810 udp->udp_pending_op = T_BIND_REQ; 7811 /* 7812 * Copy the source address into our udp structure. This address 7813 * may still be zero; if so, IP will fill in the correct address 7814 * each time an outbound packet is passed to it. Since the udp is 7815 * not yet in the bind hash list, we don't grab the uf_lock to 7816 * change udp_ipversion 7817 */ 7818 if (udp->udp_family == AF_INET) { 7819 ASSERT(sin != NULL); 7820 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7821 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 7822 udp->udp_ip_snd_options_len; 7823 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 7824 } else { 7825 ASSERT(sin6 != NULL); 7826 v6src = sin6->sin6_addr; 7827 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 7828 /* 7829 * no need to hold the uf_lock to set the udp_ipversion 7830 * since we are not yet in the fanout list 7831 */ 7832 udp->udp_ipversion = IPV4_VERSION; 7833 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 7834 UDPH_SIZE + udp->udp_ip_snd_options_len; 7835 } else { 7836 udp->udp_ipversion = IPV6_VERSION; 7837 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 7838 } 7839 } 7840 7841 /* 7842 * If udp_reuseaddr is not set, then we have to make sure that 7843 * the IP address and port number the application requested 7844 * (or we selected for the application) is not being used by 7845 * another stream. If another stream is already using the 7846 * requested IP address and port, the behavior depends on 7847 * "bind_to_req_port_only". If set the bind fails; otherwise we 7848 * search for any an unused port to bind to the the stream. 7849 * 7850 * As per the BSD semantics, as modified by the Deering multicast 7851 * changes, if udp_reuseaddr is set, then we allow multiple binds 7852 * to the same port independent of the local IP address. 7853 * 7854 * This is slightly different than in SunOS 4.X which did not 7855 * support IP multicast. Note that the change implemented by the 7856 * Deering multicast code effects all binds - not only binding 7857 * to IP multicast addresses. 7858 * 7859 * Note that when binding to port zero we ignore SO_REUSEADDR in 7860 * order to guarantee a unique port. 7861 */ 7862 7863 count = 0; 7864 if (udp->udp_anon_priv_bind) { 7865 /* 7866 * loopmax = (IPPORT_RESERVED-1) - 7867 * us->us_min_anonpriv_port + 1 7868 */ 7869 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 7870 } else { 7871 loopmax = us->us_largest_anon_port - 7872 us->us_smallest_anon_port + 1; 7873 } 7874 7875 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 7876 zoneid = connp->conn_zoneid; 7877 7878 for (;;) { 7879 udp_t *udp1; 7880 boolean_t found_exclbind = B_FALSE; 7881 7882 /* 7883 * Walk through the list of udp streams bound to 7884 * requested port with the same IP address. 7885 */ 7886 lport = htons(port); 7887 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 7888 us->us_bind_fanout_size)]; 7889 mutex_enter(&udpf->uf_lock); 7890 for (udp1 = udpf->uf_udp; udp1 != NULL; 7891 udp1 = udp1->udp_bind_hash) { 7892 if (lport != udp1->udp_port) 7893 continue; 7894 7895 /* 7896 * On a labeled system, we must treat bindings to ports 7897 * on shared IP addresses by sockets with MAC exemption 7898 * privilege as being in all zones, as there's 7899 * otherwise no way to identify the right receiver. 7900 */ 7901 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 7902 IPCL_ZONE_MATCH(connp, 7903 udp1->udp_connp->conn_zoneid)) && 7904 !connp->conn_mac_exempt && \ 7905 !udp1->udp_connp->conn_mac_exempt) 7906 continue; 7907 7908 /* 7909 * If UDP_EXCLBIND is set for either the bound or 7910 * binding endpoint, the semantics of bind 7911 * is changed according to the following chart. 7912 * 7913 * spec = specified address (v4 or v6) 7914 * unspec = unspecified address (v4 or v6) 7915 * A = specified addresses are different for endpoints 7916 * 7917 * bound bind to allowed? 7918 * ------------------------------------- 7919 * unspec unspec no 7920 * unspec spec no 7921 * spec unspec no 7922 * spec spec yes if A 7923 * 7924 * For labeled systems, SO_MAC_EXEMPT behaves the same 7925 * as UDP_EXCLBIND, except that zoneid is ignored. 7926 */ 7927 if (udp1->udp_exclbind || udp->udp_exclbind || 7928 udp1->udp_connp->conn_mac_exempt || 7929 connp->conn_mac_exempt) { 7930 if (V6_OR_V4_INADDR_ANY( 7931 udp1->udp_bound_v6src) || 7932 is_inaddr_any || 7933 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7934 &v6src)) { 7935 found_exclbind = B_TRUE; 7936 break; 7937 } 7938 continue; 7939 } 7940 7941 /* 7942 * Check ipversion to allow IPv4 and IPv6 sockets to 7943 * have disjoint port number spaces. 7944 */ 7945 if (udp->udp_ipversion != udp1->udp_ipversion) { 7946 7947 /* 7948 * On the first time through the loop, if the 7949 * the user intentionally specified a 7950 * particular port number, then ignore any 7951 * bindings of the other protocol that may 7952 * conflict. This allows the user to bind IPv6 7953 * alone and get both v4 and v6, or bind both 7954 * both and get each seperately. On subsequent 7955 * times through the loop, we're checking a 7956 * port that we chose (not the user) and thus 7957 * we do not allow casual duplicate bindings. 7958 */ 7959 if (count == 0 && requested_port != 0) 7960 continue; 7961 } 7962 7963 /* 7964 * No difference depending on SO_REUSEADDR. 7965 * 7966 * If existing port is bound to a 7967 * non-wildcard IP address and 7968 * the requesting stream is bound to 7969 * a distinct different IP addresses 7970 * (non-wildcard, also), keep going. 7971 */ 7972 if (!is_inaddr_any && 7973 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 7974 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7975 &v6src)) { 7976 continue; 7977 } 7978 break; 7979 } 7980 7981 if (!found_exclbind && 7982 (udp->udp_reuseaddr && requested_port != 0)) { 7983 break; 7984 } 7985 7986 if (udp1 == NULL) { 7987 /* 7988 * No other stream has this IP address 7989 * and port number. We can use it. 7990 */ 7991 break; 7992 } 7993 mutex_exit(&udpf->uf_lock); 7994 if (bind_to_req_port_only) { 7995 /* 7996 * We get here only when requested port 7997 * is bound (and only first of the for() 7998 * loop iteration). 7999 * 8000 * The semantics of this bind request 8001 * require it to fail so we return from 8002 * the routine (and exit the loop). 8003 * 8004 */ 8005 udp->udp_pending_op = -1; 8006 rw_exit(&udp->udp_rwlock); 8007 return (-TADDRBUSY); 8008 } 8009 8010 if (udp->udp_anon_priv_bind) { 8011 port = udp_get_next_priv_port(udp); 8012 } else { 8013 if ((count == 0) && (requested_port != 0)) { 8014 /* 8015 * If the application wants us to find 8016 * a port, get one to start with. Set 8017 * requested_port to 0, so that we will 8018 * update us->us_next_port_to_try below. 8019 */ 8020 port = udp_update_next_port(udp, 8021 us->us_next_port_to_try, B_TRUE); 8022 requested_port = 0; 8023 } else { 8024 port = udp_update_next_port(udp, port + 1, 8025 B_FALSE); 8026 } 8027 } 8028 8029 if (port == 0 || ++count >= loopmax) { 8030 /* 8031 * We've tried every possible port number and 8032 * there are none available, so send an error 8033 * to the user. 8034 */ 8035 udp->udp_pending_op = -1; 8036 rw_exit(&udp->udp_rwlock); 8037 return (-TNOADDR); 8038 } 8039 } 8040 8041 /* 8042 * Copy the source address into our udp structure. This address 8043 * may still be zero; if so, ip will fill in the correct address 8044 * each time an outbound packet is passed to it. 8045 * If we are binding to a broadcast or multicast address then 8046 * udp_post_ip_bind_connect will clear the source address 8047 * when udp_do_bind success. 8048 */ 8049 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8050 udp->udp_port = lport; 8051 /* 8052 * Now reset the the next anonymous port if the application requested 8053 * an anonymous port, or we handed out the next anonymous port. 8054 */ 8055 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8056 us->us_next_port_to_try = port + 1; 8057 } 8058 8059 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8060 if (udp->udp_family == AF_INET) { 8061 sin->sin_port = udp->udp_port; 8062 } else { 8063 sin6->sin6_port = udp->udp_port; 8064 /* Rebuild the header template */ 8065 error = udp_build_hdrs(udp); 8066 if (error != 0) { 8067 udp->udp_pending_op = -1; 8068 rw_exit(&udp->udp_rwlock); 8069 mutex_exit(&udpf->uf_lock); 8070 return (error); 8071 } 8072 } 8073 udp->udp_state = TS_IDLE; 8074 udp_bind_hash_insert(udpf, udp); 8075 mutex_exit(&udpf->uf_lock); 8076 rw_exit(&udp->udp_rwlock); 8077 8078 if (cl_inet_bind) { 8079 /* 8080 * Running in cluster mode - register bind information 8081 */ 8082 if (udp->udp_ipversion == IPV4_VERSION) { 8083 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8084 IPPROTO_UDP, AF_INET, 8085 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8086 (in_port_t)udp->udp_port, NULL); 8087 } else { 8088 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8089 IPPROTO_UDP, AF_INET6, 8090 (uint8_t *)&(udp->udp_v6src), 8091 (in_port_t)udp->udp_port, NULL); 8092 } 8093 } 8094 8095 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8096 if (is_system_labeled() && (!connp->conn_anon_port || 8097 connp->conn_anon_mlp)) { 8098 uint16_t mlpport; 8099 zone_t *zone; 8100 8101 zone = crgetzone(cr); 8102 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8103 mlptSingle; 8104 addrtype = tsol_mlp_addr_type( 8105 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 8106 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 8107 if (addrtype == mlptSingle) { 8108 rw_enter(&udp->udp_rwlock, RW_WRITER); 8109 udp->udp_pending_op = -1; 8110 rw_exit(&udp->udp_rwlock); 8111 connp->conn_anon_port = B_FALSE; 8112 connp->conn_mlp_type = mlptSingle; 8113 return (-TNOADDR); 8114 } 8115 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8116 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8117 addrtype); 8118 8119 /* 8120 * It is a coding error to attempt to bind an MLP port 8121 * without first setting SOL_SOCKET/SCM_UCRED. 8122 */ 8123 if (mlptype != mlptSingle && 8124 connp->conn_mlp_type == mlptSingle) { 8125 rw_enter(&udp->udp_rwlock, RW_WRITER); 8126 udp->udp_pending_op = -1; 8127 rw_exit(&udp->udp_rwlock); 8128 connp->conn_anon_port = B_FALSE; 8129 connp->conn_mlp_type = mlptSingle; 8130 return (EINVAL); 8131 } 8132 8133 /* 8134 * It is an access violation to attempt to bind an MLP port 8135 * without NET_BINDMLP privilege. 8136 */ 8137 if (mlptype != mlptSingle && 8138 secpolicy_net_bindmlp(cr) != 0) { 8139 if (udp->udp_debug) { 8140 (void) strlog(UDP_MOD_ID, 0, 1, 8141 SL_ERROR|SL_TRACE, 8142 "udp_bind: no priv for multilevel port %d", 8143 mlpport); 8144 } 8145 rw_enter(&udp->udp_rwlock, RW_WRITER); 8146 udp->udp_pending_op = -1; 8147 rw_exit(&udp->udp_rwlock); 8148 connp->conn_anon_port = B_FALSE; 8149 connp->conn_mlp_type = mlptSingle; 8150 return (-TACCES); 8151 } 8152 8153 /* 8154 * If we're specifically binding a shared IP address and the 8155 * port is MLP on shared addresses, then check to see if this 8156 * zone actually owns the MLP. Reject if not. 8157 */ 8158 if (mlptype == mlptShared && addrtype == mlptShared) { 8159 /* 8160 * No need to handle exclusive-stack zones since 8161 * ALL_ZONES only applies to the shared stack. 8162 */ 8163 zoneid_t mlpzone; 8164 8165 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8166 htons(mlpport)); 8167 if (connp->conn_zoneid != mlpzone) { 8168 if (udp->udp_debug) { 8169 (void) strlog(UDP_MOD_ID, 0, 1, 8170 SL_ERROR|SL_TRACE, 8171 "udp_bind: attempt to bind port " 8172 "%d on shared addr in zone %d " 8173 "(should be %d)", 8174 mlpport, connp->conn_zoneid, 8175 mlpzone); 8176 } 8177 rw_enter(&udp->udp_rwlock, RW_WRITER); 8178 udp->udp_pending_op = -1; 8179 rw_exit(&udp->udp_rwlock); 8180 connp->conn_anon_port = B_FALSE; 8181 connp->conn_mlp_type = mlptSingle; 8182 return (-TACCES); 8183 } 8184 } 8185 if (connp->conn_anon_port) { 8186 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8187 port, B_TRUE); 8188 if (error != 0) { 8189 if (udp->udp_debug) { 8190 (void) strlog(UDP_MOD_ID, 0, 1, 8191 SL_ERROR|SL_TRACE, 8192 "udp_bind: cannot establish anon " 8193 "MLP for port %d", port); 8194 } 8195 rw_enter(&udp->udp_rwlock, RW_WRITER); 8196 udp->udp_pending_op = -1; 8197 rw_exit(&udp->udp_rwlock); 8198 connp->conn_anon_port = B_FALSE; 8199 connp->conn_mlp_type = mlptSingle; 8200 return (-TACCES); 8201 } 8202 } 8203 connp->conn_mlp_type = mlptype; 8204 } 8205 8206 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8207 /* 8208 * Append a request for an IRE if udp_v6src not 8209 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8210 */ 8211 mp = allocb(sizeof (ire_t), BPRI_HI); 8212 if (!mp) { 8213 rw_enter(&udp->udp_rwlock, RW_WRITER); 8214 udp->udp_pending_op = -1; 8215 rw_exit(&udp->udp_rwlock); 8216 return (ENOMEM); 8217 } 8218 mp->b_wptr += sizeof (ire_t); 8219 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8220 } 8221 if (udp->udp_family == AF_INET6) { 8222 ASSERT(udp->udp_connp->conn_af_isv6); 8223 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8224 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8225 } else { 8226 ASSERT(!udp->udp_connp->conn_af_isv6); 8227 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8228 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8229 B_TRUE); 8230 } 8231 8232 (void) udp_post_ip_bind_connect(udp, mp, error); 8233 return (error); 8234 } 8235 8236 int 8237 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8238 socklen_t len, cred_t *cr) 8239 { 8240 int error; 8241 conn_t *connp; 8242 8243 /* All Solaris components should pass a cred for this operation. */ 8244 ASSERT(cr != NULL); 8245 8246 connp = (conn_t *)proto_handle; 8247 8248 if (sa == NULL) 8249 error = udp_do_unbind(connp); 8250 else 8251 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8252 8253 if (error < 0) { 8254 if (error == -TOUTSTATE) 8255 error = EINVAL; 8256 else 8257 error = proto_tlitosyserr(-error); 8258 } 8259 8260 return (error); 8261 } 8262 8263 static int 8264 udp_implicit_bind(conn_t *connp, cred_t *cr) 8265 { 8266 int error; 8267 8268 /* All Solaris components should pass a cred for this operation. */ 8269 ASSERT(cr != NULL); 8270 8271 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8272 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8273 } 8274 8275 /* 8276 * This routine removes a port number association from a stream. It 8277 * is called by udp_unbind and udp_tpi_unbind. 8278 */ 8279 static int 8280 udp_do_unbind(conn_t *connp) 8281 { 8282 udp_t *udp = connp->conn_udp; 8283 udp_fanout_t *udpf; 8284 udp_stack_t *us = udp->udp_us; 8285 8286 if (cl_inet_unbind != NULL) { 8287 /* 8288 * Running in cluster mode - register unbind information 8289 */ 8290 if (udp->udp_ipversion == IPV4_VERSION) { 8291 (*cl_inet_unbind)( 8292 connp->conn_netstack->netstack_stackid, 8293 IPPROTO_UDP, AF_INET, 8294 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8295 (in_port_t)udp->udp_port, NULL); 8296 } else { 8297 (*cl_inet_unbind)( 8298 connp->conn_netstack->netstack_stackid, 8299 IPPROTO_UDP, AF_INET6, 8300 (uint8_t *)&(udp->udp_v6src), 8301 (in_port_t)udp->udp_port, NULL); 8302 } 8303 } 8304 8305 rw_enter(&udp->udp_rwlock, RW_WRITER); 8306 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8307 rw_exit(&udp->udp_rwlock); 8308 return (-TOUTSTATE); 8309 } 8310 udp->udp_pending_op = T_UNBIND_REQ; 8311 rw_exit(&udp->udp_rwlock); 8312 8313 /* 8314 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8315 * and therefore ip_unbind must never return NULL. 8316 */ 8317 ip_unbind(connp); 8318 8319 /* 8320 * Once we're unbound from IP, the pending operation may be cleared 8321 * here. 8322 */ 8323 rw_enter(&udp->udp_rwlock, RW_WRITER); 8324 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8325 us->us_bind_fanout_size)]; 8326 8327 mutex_enter(&udpf->uf_lock); 8328 udp_bind_hash_remove(udp, B_TRUE); 8329 V6_SET_ZERO(udp->udp_v6src); 8330 V6_SET_ZERO(udp->udp_bound_v6src); 8331 udp->udp_port = 0; 8332 mutex_exit(&udpf->uf_lock); 8333 8334 udp->udp_pending_op = -1; 8335 udp->udp_state = TS_UNBND; 8336 if (udp->udp_family == AF_INET6) 8337 (void) udp_build_hdrs(udp); 8338 rw_exit(&udp->udp_rwlock); 8339 8340 return (0); 8341 } 8342 8343 static int 8344 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8345 { 8346 ire_t *ire; 8347 udp_fanout_t *udpf; 8348 udp_stack_t *us = udp->udp_us; 8349 8350 ASSERT(udp->udp_pending_op != -1); 8351 rw_enter(&udp->udp_rwlock, RW_WRITER); 8352 if (error == 0) { 8353 /* For udp_do_connect() success */ 8354 /* udp_do_bind() success will do nothing in here */ 8355 /* 8356 * If a broadcast/multicast address was bound, set 8357 * the source address to 0. 8358 * This ensures no datagrams with broadcast address 8359 * as source address are emitted (which would violate 8360 * RFC1122 - Hosts requirements) 8361 * 8362 * Note that when connecting the returned IRE is 8363 * for the destination address and we only perform 8364 * the broadcast check for the source address (it 8365 * is OK to connect to a broadcast/multicast address.) 8366 */ 8367 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8368 ire = (ire_t *)ire_mp->b_rptr; 8369 8370 /* 8371 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8372 * multicast local address. 8373 */ 8374 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8375 us->us_bind_fanout_size)]; 8376 if (ire->ire_type == IRE_BROADCAST && 8377 udp->udp_state != TS_DATA_XFER) { 8378 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8379 udp->udp_pending_op == O_T_BIND_REQ); 8380 /* 8381 * This was just a local bind to a broadcast 8382 * addr. 8383 */ 8384 mutex_enter(&udpf->uf_lock); 8385 V6_SET_ZERO(udp->udp_v6src); 8386 mutex_exit(&udpf->uf_lock); 8387 if (udp->udp_family == AF_INET6) 8388 (void) udp_build_hdrs(udp); 8389 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8390 if (udp->udp_family == AF_INET6) 8391 (void) udp_build_hdrs(udp); 8392 } 8393 } 8394 } else { 8395 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8396 us->us_bind_fanout_size)]; 8397 mutex_enter(&udpf->uf_lock); 8398 8399 if (udp->udp_state == TS_DATA_XFER) { 8400 /* Connect failed */ 8401 /* Revert back to the bound source */ 8402 udp->udp_v6src = udp->udp_bound_v6src; 8403 udp->udp_state = TS_IDLE; 8404 } else { 8405 /* For udp_do_bind() failed */ 8406 V6_SET_ZERO(udp->udp_v6src); 8407 V6_SET_ZERO(udp->udp_bound_v6src); 8408 udp->udp_state = TS_UNBND; 8409 udp_bind_hash_remove(udp, B_TRUE); 8410 udp->udp_port = 0; 8411 } 8412 mutex_exit(&udpf->uf_lock); 8413 if (udp->udp_family == AF_INET6) 8414 (void) udp_build_hdrs(udp); 8415 } 8416 udp->udp_pending_op = -1; 8417 rw_exit(&udp->udp_rwlock); 8418 if (ire_mp != NULL) 8419 freeb(ire_mp); 8420 return (error); 8421 } 8422 8423 /* 8424 * It associates a default destination address with the stream. 8425 */ 8426 static int 8427 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8428 cred_t *cr) 8429 { 8430 sin6_t *sin6; 8431 sin_t *sin; 8432 in6_addr_t v6dst; 8433 ipaddr_t v4dst; 8434 uint16_t dstport; 8435 uint32_t flowinfo; 8436 mblk_t *ire_mp; 8437 udp_fanout_t *udpf; 8438 udp_t *udp, *udp1; 8439 ushort_t ipversion; 8440 udp_stack_t *us; 8441 int error; 8442 8443 udp = connp->conn_udp; 8444 us = udp->udp_us; 8445 8446 /* 8447 * Address has been verified by the caller 8448 */ 8449 switch (len) { 8450 default: 8451 /* 8452 * Should never happen 8453 */ 8454 return (EINVAL); 8455 8456 case sizeof (sin_t): 8457 sin = (sin_t *)sa; 8458 v4dst = sin->sin_addr.s_addr; 8459 dstport = sin->sin_port; 8460 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8461 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8462 ipversion = IPV4_VERSION; 8463 break; 8464 8465 case sizeof (sin6_t): 8466 sin6 = (sin6_t *)sa; 8467 v6dst = sin6->sin6_addr; 8468 dstport = sin6->sin6_port; 8469 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8470 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8471 ipversion = IPV4_VERSION; 8472 flowinfo = 0; 8473 } else { 8474 ipversion = IPV6_VERSION; 8475 flowinfo = sin6->sin6_flowinfo; 8476 } 8477 break; 8478 } 8479 8480 if (dstport == 0) 8481 return (-TBADADDR); 8482 8483 rw_enter(&udp->udp_rwlock, RW_WRITER); 8484 8485 /* 8486 * This UDP must have bound to a port already before doing a connect. 8487 * TPI mandates that users must send TPI primitives only 1 at a time 8488 * and wait for the response before sending the next primitive. 8489 */ 8490 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8491 rw_exit(&udp->udp_rwlock); 8492 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8493 "udp_connect: bad state, %u", udp->udp_state); 8494 return (-TOUTSTATE); 8495 } 8496 udp->udp_pending_op = T_CONN_REQ; 8497 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8498 8499 if (ipversion == IPV4_VERSION) { 8500 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8501 udp->udp_ip_snd_options_len; 8502 } else { 8503 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8504 } 8505 8506 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8507 us->us_bind_fanout_size)]; 8508 8509 mutex_enter(&udpf->uf_lock); 8510 if (udp->udp_state == TS_DATA_XFER) { 8511 /* Already connected - clear out state */ 8512 udp->udp_v6src = udp->udp_bound_v6src; 8513 udp->udp_state = TS_IDLE; 8514 } 8515 8516 /* 8517 * Create a default IP header with no IP options. 8518 */ 8519 udp->udp_dstport = dstport; 8520 udp->udp_ipversion = ipversion; 8521 if (ipversion == IPV4_VERSION) { 8522 /* 8523 * Interpret a zero destination to mean loopback. 8524 * Update the T_CONN_REQ (sin/sin6) since it is used to 8525 * generate the T_CONN_CON. 8526 */ 8527 if (v4dst == INADDR_ANY) { 8528 v4dst = htonl(INADDR_LOOPBACK); 8529 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8530 if (udp->udp_family == AF_INET) { 8531 sin->sin_addr.s_addr = v4dst; 8532 } else { 8533 sin6->sin6_addr = v6dst; 8534 } 8535 } 8536 udp->udp_v6dst = v6dst; 8537 udp->udp_flowinfo = 0; 8538 8539 /* 8540 * If the destination address is multicast and 8541 * an outgoing multicast interface has been set, 8542 * use the address of that interface as our 8543 * source address if no source address has been set. 8544 */ 8545 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8546 CLASSD(v4dst) && 8547 udp->udp_multicast_if_addr != INADDR_ANY) { 8548 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8549 &udp->udp_v6src); 8550 } 8551 } else { 8552 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8553 /* 8554 * Interpret a zero destination to mean loopback. 8555 * Update the T_CONN_REQ (sin/sin6) since it is used to 8556 * generate the T_CONN_CON. 8557 */ 8558 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8559 v6dst = ipv6_loopback; 8560 sin6->sin6_addr = v6dst; 8561 } 8562 udp->udp_v6dst = v6dst; 8563 udp->udp_flowinfo = flowinfo; 8564 /* 8565 * If the destination address is multicast and 8566 * an outgoing multicast interface has been set, 8567 * then the ip bind logic will pick the correct source 8568 * address (i.e. matching the outgoing multicast interface). 8569 */ 8570 } 8571 8572 /* 8573 * Verify that the src/port/dst/port is unique for all 8574 * connections in TS_DATA_XFER 8575 */ 8576 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8577 if (udp1->udp_state != TS_DATA_XFER) 8578 continue; 8579 if (udp->udp_port != udp1->udp_port || 8580 udp->udp_ipversion != udp1->udp_ipversion || 8581 dstport != udp1->udp_dstport || 8582 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8583 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8584 !(IPCL_ZONE_MATCH(udp->udp_connp, 8585 udp1->udp_connp->conn_zoneid) || 8586 IPCL_ZONE_MATCH(udp1->udp_connp, 8587 udp->udp_connp->conn_zoneid))) 8588 continue; 8589 mutex_exit(&udpf->uf_lock); 8590 udp->udp_pending_op = -1; 8591 rw_exit(&udp->udp_rwlock); 8592 return (-TBADADDR); 8593 } 8594 8595 if (cl_inet_connect2 != NULL) { 8596 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8597 if (error != 0) { 8598 mutex_exit(&udpf->uf_lock); 8599 udp->udp_pending_op = -1; 8600 rw_exit(&udp->udp_rwlock); 8601 return (-TBADADDR); 8602 } 8603 } 8604 8605 udp->udp_state = TS_DATA_XFER; 8606 mutex_exit(&udpf->uf_lock); 8607 8608 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8609 if (ire_mp == NULL) { 8610 mutex_enter(&udpf->uf_lock); 8611 udp->udp_state = TS_IDLE; 8612 udp->udp_pending_op = -1; 8613 mutex_exit(&udpf->uf_lock); 8614 rw_exit(&udp->udp_rwlock); 8615 return (ENOMEM); 8616 } 8617 8618 rw_exit(&udp->udp_rwlock); 8619 8620 ire_mp->b_wptr += sizeof (ire_t); 8621 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8622 8623 if (udp->udp_family == AF_INET) { 8624 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8625 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8626 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8627 B_TRUE, B_TRUE, cr); 8628 } else { 8629 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8630 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8631 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 8632 } 8633 8634 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8635 } 8636 8637 /* ARGSUSED */ 8638 static int 8639 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8640 socklen_t len, sock_connid_t *id, cred_t *cr) 8641 { 8642 conn_t *connp = (conn_t *)proto_handle; 8643 udp_t *udp = connp->conn_udp; 8644 int error; 8645 boolean_t did_bind = B_FALSE; 8646 8647 /* All Solaris components should pass a cred for this operation. */ 8648 ASSERT(cr != NULL); 8649 8650 if (sa == NULL) { 8651 /* 8652 * Disconnect 8653 * Make sure we are connected 8654 */ 8655 if (udp->udp_state != TS_DATA_XFER) 8656 return (EINVAL); 8657 8658 error = udp_disconnect(connp); 8659 return (error); 8660 } 8661 8662 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8663 if (error != 0) 8664 goto done; 8665 8666 /* do an implicit bind if necessary */ 8667 if (udp->udp_state == TS_UNBND) { 8668 error = udp_implicit_bind(connp, cr); 8669 /* 8670 * We could be racing with an actual bind, in which case 8671 * we would see EPROTO. We cross our fingers and try 8672 * to connect. 8673 */ 8674 if (!(error == 0 || error == EPROTO)) 8675 goto done; 8676 did_bind = B_TRUE; 8677 } 8678 /* 8679 * set SO_DGRAM_ERRIND 8680 */ 8681 udp->udp_dgram_errind = B_TRUE; 8682 8683 error = udp_do_connect(connp, sa, len, cr); 8684 8685 if (error != 0 && did_bind) { 8686 int unbind_err; 8687 8688 unbind_err = udp_do_unbind(connp); 8689 ASSERT(unbind_err == 0); 8690 } 8691 8692 if (error == 0) { 8693 *id = 0; 8694 (*connp->conn_upcalls->su_connected) 8695 (connp->conn_upper_handle, 0, NULL, -1); 8696 } else if (error < 0) { 8697 error = proto_tlitosyserr(-error); 8698 } 8699 8700 done: 8701 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8702 /* 8703 * No need to hold locks to set state 8704 * after connect failure socket state is undefined 8705 * We set the state only to imitate old sockfs behavior 8706 */ 8707 udp->udp_state = TS_IDLE; 8708 } 8709 return (error); 8710 } 8711 8712 /* ARGSUSED */ 8713 int 8714 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8715 cred_t *cr) 8716 { 8717 conn_t *connp = (conn_t *)proto_handle; 8718 udp_t *udp = connp->conn_udp; 8719 udp_stack_t *us = udp->udp_us; 8720 int error = 0; 8721 8722 ASSERT(DB_TYPE(mp) == M_DATA); 8723 8724 /* All Solaris components should pass a cred for this operation. */ 8725 ASSERT(cr != NULL); 8726 8727 /* If labeled then sockfs should have already set db_credp */ 8728 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 8729 8730 /* 8731 * If the socket is connected and no change in destination 8732 */ 8733 if (msg->msg_namelen == 0) { 8734 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8735 if (error == EDESTADDRREQ) 8736 return (error); 8737 else 8738 return (udp->udp_dgram_errind ? error : 0); 8739 } 8740 8741 /* 8742 * Do an implicit bind if necessary. 8743 */ 8744 if (udp->udp_state == TS_UNBND) { 8745 error = udp_implicit_bind(connp, cr); 8746 /* 8747 * We could be racing with an actual bind, in which case 8748 * we would see EPROTO. We cross our fingers and try 8749 * to send. 8750 */ 8751 if (!(error == 0 || error == EPROTO)) { 8752 freemsg(mp); 8753 return (error); 8754 } 8755 } 8756 8757 rw_enter(&udp->udp_rwlock, RW_WRITER); 8758 8759 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 8760 rw_exit(&udp->udp_rwlock); 8761 freemsg(mp); 8762 return (EISCONN); 8763 } 8764 8765 8766 if (udp->udp_delayed_error != 0) { 8767 boolean_t match; 8768 8769 error = udp->udp_delayed_error; 8770 match = B_FALSE; 8771 udp->udp_delayed_error = 0; 8772 switch (udp->udp_family) { 8773 case AF_INET: { 8774 /* Compare just IP address and port */ 8775 sin_t *sin1 = (sin_t *)msg->msg_name; 8776 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 8777 8778 if (msg->msg_namelen == sizeof (sin_t) && 8779 sin1->sin_port == sin2->sin_port && 8780 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 8781 match = B_TRUE; 8782 8783 break; 8784 } 8785 case AF_INET6: { 8786 sin6_t *sin1 = (sin6_t *)msg->msg_name; 8787 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 8788 8789 if (msg->msg_namelen == sizeof (sin6_t) && 8790 sin1->sin6_port == sin2->sin6_port && 8791 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 8792 &sin2->sin6_addr)) 8793 match = B_TRUE; 8794 break; 8795 } 8796 default: 8797 ASSERT(0); 8798 } 8799 8800 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 8801 8802 if (match) { 8803 rw_exit(&udp->udp_rwlock); 8804 freemsg(mp); 8805 return (error); 8806 } 8807 } 8808 8809 error = proto_verify_ip_addr(udp->udp_family, 8810 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 8811 rw_exit(&udp->udp_rwlock); 8812 8813 if (error != 0) { 8814 freemsg(mp); 8815 return (error); 8816 } 8817 8818 error = udp_send_not_connected(connp, mp, 8819 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 8820 curproc->p_pid); 8821 if (error != 0) { 8822 UDP_STAT(us, udp_out_err_output); 8823 freemsg(mp); 8824 } 8825 return (udp->udp_dgram_errind ? error : 0); 8826 } 8827 8828 int 8829 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 8830 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 8831 { 8832 conn_t *connp = (conn_t *)proto_handle; 8833 udp_t *udp; 8834 struct T_capability_ack tca; 8835 struct sockaddr_in6 laddr, faddr; 8836 socklen_t laddrlen, faddrlen; 8837 short opts; 8838 struct stroptions *stropt; 8839 mblk_t *stropt_mp; 8840 int error; 8841 8842 udp = connp->conn_udp; 8843 8844 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 8845 8846 /* 8847 * setup the fallback stream that was allocated 8848 */ 8849 connp->conn_dev = (dev_t)RD(q)->q_ptr; 8850 connp->conn_minor_arena = WR(q)->q_ptr; 8851 8852 RD(q)->q_ptr = WR(q)->q_ptr = connp; 8853 8854 WR(q)->q_qinfo = &udp_winit; 8855 8856 connp->conn_rq = RD(q); 8857 connp->conn_wq = WR(q); 8858 8859 /* Notify stream head about options before sending up data */ 8860 stropt_mp->b_datap->db_type = M_SETOPTS; 8861 stropt_mp->b_wptr += sizeof (*stropt); 8862 stropt = (struct stroptions *)stropt_mp->b_rptr; 8863 stropt->so_flags = SO_WROFF | SO_HIWAT; 8864 stropt->so_wroff = 8865 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 8866 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 8867 putnext(RD(q), stropt_mp); 8868 8869 /* 8870 * Free the helper stream 8871 */ 8872 ip_free_helper_stream(connp); 8873 8874 if (!issocket) 8875 udp_use_pure_tpi(udp); 8876 8877 /* 8878 * Collect the information needed to sync with the sonode 8879 */ 8880 udp_do_capability_ack(udp, &tca, TC1_INFO); 8881 8882 laddrlen = faddrlen = sizeof (sin6_t); 8883 (void) udp_getsockname((sock_lower_handle_t)connp, 8884 (struct sockaddr *)&laddr, &laddrlen, CRED()); 8885 error = udp_getpeername((sock_lower_handle_t)connp, 8886 (struct sockaddr *)&faddr, &faddrlen, CRED()); 8887 if (error != 0) 8888 faddrlen = 0; 8889 8890 opts = 0; 8891 if (udp->udp_dgram_errind) 8892 opts |= SO_DGRAM_ERRIND; 8893 if (udp->udp_dontroute) 8894 opts |= SO_DONTROUTE; 8895 8896 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 8897 (struct sockaddr *)&laddr, laddrlen, 8898 (struct sockaddr *)&faddr, faddrlen, opts); 8899 8900 mutex_enter(&udp->udp_recv_lock); 8901 /* 8902 * Attempts to send data up during fallback will result in it being 8903 * queued in udp_t. Now we push up any queued packets. 8904 */ 8905 while (udp->udp_fallback_queue_head != NULL) { 8906 mblk_t *mp; 8907 mp = udp->udp_fallback_queue_head; 8908 udp->udp_fallback_queue_head = mp->b_next; 8909 mutex_exit(&udp->udp_recv_lock); 8910 mp->b_next = NULL; 8911 putnext(RD(q), mp); 8912 mutex_enter(&udp->udp_recv_lock); 8913 } 8914 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 8915 /* 8916 * No longer a streams less socket 8917 */ 8918 rw_enter(&udp->udp_rwlock, RW_WRITER); 8919 connp->conn_flags &= ~IPCL_NONSTR; 8920 rw_exit(&udp->udp_rwlock); 8921 8922 mutex_exit(&udp->udp_recv_lock); 8923 8924 ASSERT(connp->conn_ref >= 1); 8925 8926 return (0); 8927 } 8928 8929 static int 8930 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8931 { 8932 sin_t *sin = (sin_t *)sa; 8933 sin6_t *sin6 = (sin6_t *)sa; 8934 8935 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 8936 ASSERT(udp != NULL); 8937 8938 if (udp->udp_state != TS_DATA_XFER) 8939 return (ENOTCONN); 8940 8941 switch (udp->udp_family) { 8942 case AF_INET: 8943 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8944 8945 if (*salenp < sizeof (sin_t)) 8946 return (EINVAL); 8947 8948 *salenp = sizeof (sin_t); 8949 *sin = sin_null; 8950 sin->sin_family = AF_INET; 8951 sin->sin_port = udp->udp_dstport; 8952 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 8953 break; 8954 case AF_INET6: 8955 if (*salenp < sizeof (sin6_t)) 8956 return (EINVAL); 8957 8958 *salenp = sizeof (sin6_t); 8959 *sin6 = sin6_null; 8960 sin6->sin6_family = AF_INET6; 8961 sin6->sin6_port = udp->udp_dstport; 8962 sin6->sin6_addr = udp->udp_v6dst; 8963 sin6->sin6_flowinfo = udp->udp_flowinfo; 8964 break; 8965 } 8966 8967 return (0); 8968 } 8969 8970 /* ARGSUSED */ 8971 int 8972 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8973 socklen_t *salenp, cred_t *cr) 8974 { 8975 conn_t *connp = (conn_t *)proto_handle; 8976 udp_t *udp = connp->conn_udp; 8977 int error; 8978 8979 /* All Solaris components should pass a cred for this operation. */ 8980 ASSERT(cr != NULL); 8981 8982 ASSERT(udp != NULL); 8983 8984 rw_enter(&udp->udp_rwlock, RW_READER); 8985 8986 error = udp_do_getpeername(udp, sa, salenp); 8987 8988 rw_exit(&udp->udp_rwlock); 8989 8990 return (error); 8991 } 8992 8993 static int 8994 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8995 { 8996 sin_t *sin = (sin_t *)sa; 8997 sin6_t *sin6 = (sin6_t *)sa; 8998 8999 ASSERT(udp != NULL); 9000 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9001 9002 switch (udp->udp_family) { 9003 case AF_INET: 9004 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9005 9006 if (*salenp < sizeof (sin_t)) 9007 return (EINVAL); 9008 9009 *salenp = sizeof (sin_t); 9010 *sin = sin_null; 9011 sin->sin_family = AF_INET; 9012 if (udp->udp_state == TS_UNBND) { 9013 break; 9014 } 9015 sin->sin_port = udp->udp_port; 9016 9017 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9018 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9019 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9020 } else { 9021 /* 9022 * INADDR_ANY 9023 * udp_v6src is not set, we might be bound to 9024 * broadcast/multicast. Use udp_bound_v6src as 9025 * local address instead (that could 9026 * also still be INADDR_ANY) 9027 */ 9028 sin->sin_addr.s_addr = 9029 V4_PART_OF_V6(udp->udp_bound_v6src); 9030 } 9031 break; 9032 9033 case AF_INET6: 9034 if (*salenp < sizeof (sin6_t)) 9035 return (EINVAL); 9036 9037 *salenp = sizeof (sin6_t); 9038 *sin6 = sin6_null; 9039 sin6->sin6_family = AF_INET6; 9040 if (udp->udp_state == TS_UNBND) { 9041 break; 9042 } 9043 sin6->sin6_port = udp->udp_port; 9044 9045 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9046 sin6->sin6_addr = udp->udp_v6src; 9047 } else { 9048 /* 9049 * UNSPECIFIED 9050 * udp_v6src is not set, we might be bound to 9051 * broadcast/multicast. Use udp_bound_v6src as 9052 * local address instead (that could 9053 * also still be UNSPECIFIED) 9054 */ 9055 sin6->sin6_addr = udp->udp_bound_v6src; 9056 } 9057 } 9058 return (0); 9059 } 9060 9061 /* ARGSUSED */ 9062 int 9063 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9064 socklen_t *salenp, cred_t *cr) 9065 { 9066 conn_t *connp = (conn_t *)proto_handle; 9067 udp_t *udp = connp->conn_udp; 9068 int error; 9069 9070 /* All Solaris components should pass a cred for this operation. */ 9071 ASSERT(cr != NULL); 9072 9073 ASSERT(udp != NULL); 9074 rw_enter(&udp->udp_rwlock, RW_READER); 9075 9076 error = udp_do_getsockname(udp, sa, salenp); 9077 9078 rw_exit(&udp->udp_rwlock); 9079 9080 return (error); 9081 } 9082 9083 int 9084 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9085 void *optvalp, socklen_t *optlen, cred_t *cr) 9086 { 9087 conn_t *connp = (conn_t *)proto_handle; 9088 udp_t *udp = connp->conn_udp; 9089 int error; 9090 t_uscalar_t max_optbuf_len; 9091 void *optvalp_buf; 9092 int len; 9093 9094 /* All Solaris components should pass a cred for this operation. */ 9095 ASSERT(cr != NULL); 9096 9097 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9098 udp_opt_obj.odb_opt_des_arr, 9099 udp_opt_obj.odb_opt_arr_cnt, 9100 udp_opt_obj.odb_topmost_tpiprovider, 9101 B_FALSE, B_TRUE, cr); 9102 if (error != 0) { 9103 if (error < 0) 9104 error = proto_tlitosyserr(-error); 9105 return (error); 9106 } 9107 9108 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9109 rw_enter(&udp->udp_rwlock, RW_READER); 9110 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9111 rw_exit(&udp->udp_rwlock); 9112 9113 if (len < 0) { 9114 /* 9115 * Pass on to IP 9116 */ 9117 kmem_free(optvalp_buf, max_optbuf_len); 9118 return (ip_get_options(connp, level, option_name, 9119 optvalp, optlen, cr)); 9120 } else { 9121 /* 9122 * update optlen and copy option value 9123 */ 9124 t_uscalar_t size = MIN(len, *optlen); 9125 bcopy(optvalp_buf, optvalp, size); 9126 bcopy(&size, optlen, sizeof (size)); 9127 9128 kmem_free(optvalp_buf, max_optbuf_len); 9129 return (0); 9130 } 9131 } 9132 9133 int 9134 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9135 const void *optvalp, socklen_t optlen, cred_t *cr) 9136 { 9137 conn_t *connp = (conn_t *)proto_handle; 9138 udp_t *udp = connp->conn_udp; 9139 int error; 9140 9141 /* All Solaris components should pass a cred for this operation. */ 9142 ASSERT(cr != NULL); 9143 9144 error = proto_opt_check(level, option_name, optlen, NULL, 9145 udp_opt_obj.odb_opt_des_arr, 9146 udp_opt_obj.odb_opt_arr_cnt, 9147 udp_opt_obj.odb_topmost_tpiprovider, 9148 B_TRUE, B_FALSE, cr); 9149 9150 if (error != 0) { 9151 if (error < 0) 9152 error = proto_tlitosyserr(-error); 9153 return (error); 9154 } 9155 9156 rw_enter(&udp->udp_rwlock, RW_WRITER); 9157 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9158 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9159 NULL, cr); 9160 rw_exit(&udp->udp_rwlock); 9161 9162 if (error < 0) { 9163 /* 9164 * Pass on to ip 9165 */ 9166 error = ip_set_options(connp, level, option_name, optvalp, 9167 optlen, cr); 9168 } 9169 9170 return (error); 9171 } 9172 9173 void 9174 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9175 { 9176 conn_t *connp = (conn_t *)proto_handle; 9177 udp_t *udp = connp->conn_udp; 9178 9179 mutex_enter(&udp->udp_recv_lock); 9180 connp->conn_flow_cntrld = B_FALSE; 9181 mutex_exit(&udp->udp_recv_lock); 9182 } 9183 9184 /* ARGSUSED */ 9185 int 9186 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9187 { 9188 conn_t *connp = (conn_t *)proto_handle; 9189 9190 /* All Solaris components should pass a cred for this operation. */ 9191 ASSERT(cr != NULL); 9192 9193 /* shut down the send side */ 9194 if (how != SHUT_RD) 9195 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9196 SOCK_OPCTL_SHUT_SEND, 0); 9197 /* shut down the recv side */ 9198 if (how != SHUT_WR) 9199 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9200 SOCK_OPCTL_SHUT_RECV, 0); 9201 return (0); 9202 } 9203 9204 int 9205 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9206 int mode, int32_t *rvalp, cred_t *cr) 9207 { 9208 conn_t *connp = (conn_t *)proto_handle; 9209 int error; 9210 9211 /* All Solaris components should pass a cred for this operation. */ 9212 ASSERT(cr != NULL); 9213 9214 switch (cmd) { 9215 case ND_SET: 9216 case ND_GET: 9217 case _SIOCSOCKFALLBACK: 9218 case TI_GETPEERNAME: 9219 case TI_GETMYNAME: 9220 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9221 cmd)); 9222 error = EINVAL; 9223 break; 9224 default: 9225 /* 9226 * Pass on to IP using helper stream 9227 */ 9228 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9229 cmd, arg, mode, cr, rvalp); 9230 break; 9231 } 9232 return (error); 9233 } 9234 9235 /* ARGSUSED */ 9236 int 9237 udp_accept(sock_lower_handle_t lproto_handle, 9238 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9239 cred_t *cr) 9240 { 9241 return (EOPNOTSUPP); 9242 } 9243 9244 /* ARGSUSED */ 9245 int 9246 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9247 { 9248 return (EOPNOTSUPP); 9249 } 9250 9251 sock_downcalls_t sock_udp_downcalls = { 9252 udp_activate, /* sd_activate */ 9253 udp_accept, /* sd_accept */ 9254 udp_bind, /* sd_bind */ 9255 udp_listen, /* sd_listen */ 9256 udp_connect, /* sd_connect */ 9257 udp_getpeername, /* sd_getpeername */ 9258 udp_getsockname, /* sd_getsockname */ 9259 udp_getsockopt, /* sd_getsockopt */ 9260 udp_setsockopt, /* sd_setsockopt */ 9261 udp_send, /* sd_send */ 9262 NULL, /* sd_send_uio */ 9263 NULL, /* sd_recv_uio */ 9264 NULL, /* sd_poll */ 9265 udp_shutdown, /* sd_shutdown */ 9266 udp_clr_flowctrl, /* sd_setflowctrl */ 9267 udp_ioctl, /* sd_ioctl */ 9268 udp_close /* sd_close */ 9269 }; 9270