1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static void udp_lrput(queue_t *, mblk_t *); 172 static void udp_lwput(queue_t *, mblk_t *); 173 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 174 cred_t *credp, boolean_t isv6); 175 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp); 177 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 180 int *errorp, udpattrs_t *udpattrs); 181 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 182 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 183 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 184 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 185 cred_t *cr); 186 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 187 ipha_t *ipha); 188 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 189 t_scalar_t destlen, t_scalar_t err); 190 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 191 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 192 boolean_t random); 193 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 194 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 195 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 196 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 197 static void udp_wput_other(queue_t *q, mblk_t *mp); 198 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 199 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 200 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 201 202 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 203 static void udp_stack_fini(netstackid_t stackid, void *arg); 204 205 static void *udp_kstat_init(netstackid_t stackid); 206 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 207 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 208 static void udp_kstat2_fini(netstackid_t, kstat_t *); 209 static int udp_kstat_update(kstat_t *kp, int rw); 210 211 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 212 213 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 214 cred_t *, pid_t); 215 static void udp_ulp_recv(conn_t *, mblk_t *); 216 217 /* Common routine for TPI and socket module */ 218 static conn_t *udp_do_open(cred_t *, boolean_t, int); 219 static void udp_do_close(conn_t *); 220 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 221 boolean_t); 222 static int udp_do_unbind(conn_t *); 223 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 224 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 225 226 int udp_getsockname(sock_lower_handle_t, 227 struct sockaddr *, socklen_t *, cred_t *); 228 int udp_getpeername(sock_lower_handle_t, 229 struct sockaddr *, socklen_t *, cred_t *); 230 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 231 cred_t *cr); 232 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 233 234 #define UDP_RECV_HIWATER (56 * 1024) 235 #define UDP_RECV_LOWATER 128 236 #define UDP_XMIT_HIWATER (56 * 1024) 237 #define UDP_XMIT_LOWATER 1024 238 239 /* 240 * The following is defined in tcp.c 241 */ 242 extern int (*cl_inet_connect2)(netstackid_t stack_id, 243 uint8_t protocol, boolean_t is_outgoing, 244 sa_family_t addr_family, 245 uint8_t *laddrp, in_port_t lport, 246 uint8_t *faddrp, in_port_t fport, void *args); 247 248 /* 249 * Checks if the given destination addr/port is allowed out. 250 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 251 * Called for each connect() and for sendto()/sendmsg() to a different 252 * destination. 253 * For connect(), called in udp_connect(). 254 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 255 * 256 * This macro assumes that the cl_inet_connect2 hook is not NULL. 257 * Please check this before calling this macro. 258 * 259 * void 260 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 261 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 262 */ 263 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 264 (err) = 0; \ 265 /* \ 266 * Running in cluster mode - check and register active \ 267 * "connection" information \ 268 */ \ 269 if ((udp)->udp_ipversion == IPV4_VERSION) \ 270 (err) = (*cl_inet_connect2)( \ 271 (cp)->conn_netstack->netstack_stackid, \ 272 IPPROTO_UDP, is_outgoing, AF_INET, \ 273 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 274 (udp)->udp_port, \ 275 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 276 (in_port_t)(fport), NULL); \ 277 else \ 278 (err) = (*cl_inet_connect2)( \ 279 (cp)->conn_netstack->netstack_stackid, \ 280 IPPROTO_UDP, is_outgoing, AF_INET6, \ 281 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 282 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 283 } 284 285 static struct module_info udp_mod_info = { 286 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 287 }; 288 289 /* 290 * Entry points for UDP as a device. 291 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 292 */ 293 static struct qinit udp_rinitv4 = { 294 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 295 }; 296 297 static struct qinit udp_rinitv6 = { 298 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 299 }; 300 301 static struct qinit udp_winit = { 302 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 303 }; 304 305 /* UDP entry point during fallback */ 306 struct qinit udp_fallback_sock_winit = { 307 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 308 }; 309 310 /* 311 * UDP needs to handle I_LINK and I_PLINK since ifconfig 312 * likes to use it as a place to hang the various streams. 313 */ 314 static struct qinit udp_lrinit = { 315 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 316 }; 317 318 static struct qinit udp_lwinit = { 319 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 320 }; 321 322 /* For AF_INET aka /dev/udp */ 323 struct streamtab udpinfov4 = { 324 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 325 }; 326 327 /* For AF_INET6 aka /dev/udp6 */ 328 struct streamtab udpinfov6 = { 329 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 330 }; 331 332 static sin_t sin_null; /* Zero address for quick clears */ 333 static sin6_t sin6_null; /* Zero address for quick clears */ 334 335 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 336 337 /* Default structure copied into T_INFO_ACK messages */ 338 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 339 T_INFO_ACK, 340 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 341 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 342 T_INVALID, /* CDATA_size. udp does not support connect data. */ 343 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 344 sizeof (sin_t), /* ADDR_size. */ 345 0, /* OPT_size - not initialized here */ 346 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 347 T_CLTS, /* SERV_type. udp supports connection-less. */ 348 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 349 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 350 }; 351 352 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 353 354 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 355 T_INFO_ACK, 356 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 357 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 358 T_INVALID, /* CDATA_size. udp does not support connect data. */ 359 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 360 sizeof (sin6_t), /* ADDR_size. */ 361 0, /* OPT_size - not initialized here */ 362 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 363 T_CLTS, /* SERV_type. udp supports connection-less. */ 364 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 365 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 366 }; 367 368 /* largest UDP port number */ 369 #define UDP_MAX_PORT 65535 370 371 /* 372 * Table of ND variables supported by udp. These are loaded into us_nd 373 * in udp_open. 374 * All of these are alterable, within the min/max values given, at run time. 375 */ 376 /* BEGIN CSTYLED */ 377 udpparam_t udp_param_arr[] = { 378 /*min max value name */ 379 { 0L, 256, 32, "udp_wroff_extra" }, 380 { 1L, 255, 255, "udp_ipv4_ttl" }, 381 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 382 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 383 { 0, 1, 1, "udp_do_checksum" }, 384 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 385 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 386 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 387 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 388 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 389 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 390 }; 391 /* END CSTYLED */ 392 393 /* Setable in /etc/system */ 394 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 395 uint32_t udp_random_anon_port = 1; 396 397 /* 398 * Hook functions to enable cluster networking. 399 * On non-clustered systems these vectors must always be NULL 400 */ 401 402 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 403 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 404 void *args) = NULL; 405 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 406 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 407 void *args) = NULL; 408 409 typedef union T_primitives *t_primp_t; 410 411 /* 412 * Return the next anonymous port in the privileged port range for 413 * bind checking. 414 * 415 * Trusted Extension (TX) notes: TX allows administrator to mark or 416 * reserve ports as Multilevel ports (MLP). MLP has special function 417 * on TX systems. Once a port is made MLP, it's not available as 418 * ordinary port. This creates "holes" in the port name space. It 419 * may be necessary to skip the "holes" find a suitable anon port. 420 */ 421 static in_port_t 422 udp_get_next_priv_port(udp_t *udp) 423 { 424 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 425 in_port_t nextport; 426 boolean_t restart = B_FALSE; 427 udp_stack_t *us = udp->udp_us; 428 429 retry: 430 if (next_priv_port < us->us_min_anonpriv_port || 431 next_priv_port >= IPPORT_RESERVED) { 432 next_priv_port = IPPORT_RESERVED - 1; 433 if (restart) 434 return (0); 435 restart = B_TRUE; 436 } 437 438 if (is_system_labeled() && 439 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 440 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 441 next_priv_port = nextport; 442 goto retry; 443 } 444 445 return (next_priv_port--); 446 } 447 448 /* 449 * Hash list removal routine for udp_t structures. 450 */ 451 static void 452 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 453 { 454 udp_t *udpnext; 455 kmutex_t *lockp; 456 udp_stack_t *us = udp->udp_us; 457 458 if (udp->udp_ptpbhn == NULL) 459 return; 460 461 /* 462 * Extract the lock pointer in case there are concurrent 463 * hash_remove's for this instance. 464 */ 465 ASSERT(udp->udp_port != 0); 466 if (!caller_holds_lock) { 467 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 468 us->us_bind_fanout_size)].uf_lock; 469 ASSERT(lockp != NULL); 470 mutex_enter(lockp); 471 } 472 if (udp->udp_ptpbhn != NULL) { 473 udpnext = udp->udp_bind_hash; 474 if (udpnext != NULL) { 475 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 476 udp->udp_bind_hash = NULL; 477 } 478 *udp->udp_ptpbhn = udpnext; 479 udp->udp_ptpbhn = NULL; 480 } 481 if (!caller_holds_lock) { 482 mutex_exit(lockp); 483 } 484 } 485 486 static void 487 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 488 { 489 udp_t **udpp; 490 udp_t *udpnext; 491 492 ASSERT(MUTEX_HELD(&uf->uf_lock)); 493 ASSERT(udp->udp_ptpbhn == NULL); 494 udpp = &uf->uf_udp; 495 udpnext = udpp[0]; 496 if (udpnext != NULL) { 497 /* 498 * If the new udp bound to the INADDR_ANY address 499 * and the first one in the list is not bound to 500 * INADDR_ANY we skip all entries until we find the 501 * first one bound to INADDR_ANY. 502 * This makes sure that applications binding to a 503 * specific address get preference over those binding to 504 * INADDR_ANY. 505 */ 506 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 507 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 508 while ((udpnext = udpp[0]) != NULL && 509 !V6_OR_V4_INADDR_ANY( 510 udpnext->udp_bound_v6src)) { 511 udpp = &(udpnext->udp_bind_hash); 512 } 513 if (udpnext != NULL) 514 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 515 } else { 516 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 517 } 518 } 519 udp->udp_bind_hash = udpnext; 520 udp->udp_ptpbhn = udpp; 521 udpp[0] = udp; 522 } 523 524 /* 525 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 526 * passed to udp_wput. 527 * It associates a port number and local address with the stream. 528 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 529 * protocol type (IPPROTO_UDP) placed in the message following the address. 530 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 531 * (Called as writer.) 532 * 533 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 534 * without setting SO_REUSEADDR. This is needed so that they 535 * can be viewed as two independent transport protocols. 536 * However, anonymouns ports are allocated from the same range to avoid 537 * duplicating the us->us_next_port_to_try. 538 */ 539 static void 540 udp_tpi_bind(queue_t *q, mblk_t *mp) 541 { 542 sin_t *sin; 543 sin6_t *sin6; 544 mblk_t *mp1; 545 struct T_bind_req *tbr; 546 conn_t *connp; 547 udp_t *udp; 548 int error; 549 struct sockaddr *sa; 550 cred_t *cr; 551 552 /* 553 * All Solaris components should pass a db_credp 554 * for this TPI message, hence we ASSERT. 555 * But in case there is some other M_PROTO that looks 556 * like a TPI message sent by some other kernel 557 * component, we check and return an error. 558 */ 559 cr = msg_getcred(mp, NULL); 560 ASSERT(cr != NULL); 561 if (cr == NULL) { 562 udp_err_ack(q, mp, TSYSERR, EINVAL); 563 return; 564 } 565 566 connp = Q_TO_CONN(q); 567 udp = connp->conn_udp; 568 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 569 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 570 "udp_bind: bad req, len %u", 571 (uint_t)(mp->b_wptr - mp->b_rptr)); 572 udp_err_ack(q, mp, TPROTO, 0); 573 return; 574 } 575 if (udp->udp_state != TS_UNBND) { 576 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 577 "udp_bind: bad state, %u", udp->udp_state); 578 udp_err_ack(q, mp, TOUTSTATE, 0); 579 return; 580 } 581 /* 582 * Reallocate the message to make sure we have enough room for an 583 * address and the protocol type. 584 */ 585 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 586 if (!mp1) { 587 udp_err_ack(q, mp, TSYSERR, ENOMEM); 588 return; 589 } 590 591 mp = mp1; 592 593 /* Reset the message type in preparation for shipping it back. */ 594 DB_TYPE(mp) = M_PCPROTO; 595 596 tbr = (struct T_bind_req *)mp->b_rptr; 597 switch (tbr->ADDR_length) { 598 case 0: /* Request for a generic port */ 599 tbr->ADDR_offset = sizeof (struct T_bind_req); 600 if (udp->udp_family == AF_INET) { 601 tbr->ADDR_length = sizeof (sin_t); 602 sin = (sin_t *)&tbr[1]; 603 *sin = sin_null; 604 sin->sin_family = AF_INET; 605 mp->b_wptr = (uchar_t *)&sin[1]; 606 sa = (struct sockaddr *)sin; 607 } else { 608 ASSERT(udp->udp_family == AF_INET6); 609 tbr->ADDR_length = sizeof (sin6_t); 610 sin6 = (sin6_t *)&tbr[1]; 611 *sin6 = sin6_null; 612 sin6->sin6_family = AF_INET6; 613 mp->b_wptr = (uchar_t *)&sin6[1]; 614 sa = (struct sockaddr *)sin6; 615 } 616 break; 617 618 case sizeof (sin_t): /* Complete IPv4 address */ 619 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 620 sizeof (sin_t)); 621 if (sa == NULL || !OK_32PTR((char *)sa)) { 622 udp_err_ack(q, mp, TSYSERR, EINVAL); 623 return; 624 } 625 if (udp->udp_family != AF_INET || 626 sa->sa_family != AF_INET) { 627 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 628 return; 629 } 630 break; 631 632 case sizeof (sin6_t): /* complete IPv6 address */ 633 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 634 sizeof (sin6_t)); 635 if (sa == NULL || !OK_32PTR((char *)sa)) { 636 udp_err_ack(q, mp, TSYSERR, EINVAL); 637 return; 638 } 639 if (udp->udp_family != AF_INET6 || 640 sa->sa_family != AF_INET6) { 641 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 642 return; 643 } 644 break; 645 646 default: /* Invalid request */ 647 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 648 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 649 udp_err_ack(q, mp, TBADADDR, 0); 650 return; 651 } 652 653 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 654 tbr->PRIM_type != O_T_BIND_REQ); 655 656 if (error != 0) { 657 if (error > 0) { 658 udp_err_ack(q, mp, TSYSERR, error); 659 } else { 660 udp_err_ack(q, mp, -error, 0); 661 } 662 } else { 663 tbr->PRIM_type = T_BIND_ACK; 664 qreply(q, mp); 665 } 666 } 667 668 /* 669 * This routine handles each T_CONN_REQ message passed to udp. It 670 * associates a default destination address with the stream. 671 * 672 * This routine sends down a T_BIND_REQ to IP with the following mblks: 673 * T_BIND_REQ - specifying local and remote address/port 674 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 675 * T_OK_ACK - for the T_CONN_REQ 676 * T_CONN_CON - to keep the TPI user happy 677 * 678 * The connect completes in udp_do_connect. 679 * When a T_BIND_ACK is received information is extracted from the IRE 680 * and the two appended messages are sent to the TPI user. 681 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 682 * convert it to an error ack for the appropriate primitive. 683 */ 684 static void 685 udp_tpi_connect(queue_t *q, mblk_t *mp) 686 { 687 udp_t *udp; 688 conn_t *connp = Q_TO_CONN(q); 689 int error; 690 socklen_t len; 691 struct sockaddr *sa; 692 struct T_conn_req *tcr; 693 cred_t *cr; 694 695 /* 696 * All Solaris components should pass a db_credp 697 * for this TPI message, hence we ASSERT. 698 * But in case there is some other M_PROTO that looks 699 * like a TPI message sent by some other kernel 700 * component, we check and return an error. 701 */ 702 cr = msg_getcred(mp, NULL); 703 ASSERT(cr != NULL); 704 if (cr == NULL) { 705 udp_err_ack(q, mp, TSYSERR, EINVAL); 706 return; 707 } 708 709 udp = connp->conn_udp; 710 tcr = (struct T_conn_req *)mp->b_rptr; 711 712 /* A bit of sanity checking */ 713 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 714 udp_err_ack(q, mp, TPROTO, 0); 715 return; 716 } 717 718 if (tcr->OPT_length != 0) { 719 udp_err_ack(q, mp, TBADOPT, 0); 720 return; 721 } 722 723 /* 724 * Determine packet type based on type of address passed in 725 * the request should contain an IPv4 or IPv6 address. 726 * Make sure that address family matches the type of 727 * family of the the address passed down 728 */ 729 len = tcr->DEST_length; 730 switch (tcr->DEST_length) { 731 default: 732 udp_err_ack(q, mp, TBADADDR, 0); 733 return; 734 735 case sizeof (sin_t): 736 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 737 sizeof (sin_t)); 738 break; 739 740 case sizeof (sin6_t): 741 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 742 sizeof (sin6_t)); 743 break; 744 } 745 746 error = proto_verify_ip_addr(udp->udp_family, sa, len); 747 if (error != 0) { 748 udp_err_ack(q, mp, TSYSERR, error); 749 return; 750 } 751 752 error = udp_do_connect(connp, sa, len, cr); 753 if (error != 0) { 754 if (error < 0) 755 udp_err_ack(q, mp, -error, 0); 756 else 757 udp_err_ack(q, mp, TSYSERR, error); 758 } else { 759 mblk_t *mp1; 760 /* 761 * We have to send a connection confirmation to 762 * keep TLI happy. 763 */ 764 if (udp->udp_family == AF_INET) { 765 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 766 sizeof (sin_t), NULL, 0); 767 } else { 768 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 769 sizeof (sin6_t), NULL, 0); 770 } 771 if (mp1 == NULL) { 772 udp_err_ack(q, mp, TSYSERR, ENOMEM); 773 return; 774 } 775 776 /* 777 * Send ok_ack for T_CONN_REQ 778 */ 779 mp = mi_tpi_ok_ack_alloc(mp); 780 if (mp == NULL) { 781 /* Unable to reuse the T_CONN_REQ for the ack. */ 782 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 783 return; 784 } 785 786 putnext(connp->conn_rq, mp); 787 putnext(connp->conn_rq, mp1); 788 } 789 } 790 791 static int 792 udp_tpi_close(queue_t *q, int flags) 793 { 794 conn_t *connp; 795 796 if (flags & SO_FALLBACK) { 797 /* 798 * stream is being closed while in fallback 799 * simply free the resources that were allocated 800 */ 801 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 802 qprocsoff(q); 803 goto done; 804 } 805 806 connp = Q_TO_CONN(q); 807 udp_do_close(connp); 808 done: 809 q->q_ptr = WR(q)->q_ptr = NULL; 810 return (0); 811 } 812 813 /* 814 * Called in the close path to quiesce the conn 815 */ 816 void 817 udp_quiesce_conn(conn_t *connp) 818 { 819 udp_t *udp = connp->conn_udp; 820 821 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 822 /* 823 * Running in cluster mode - register unbind information 824 */ 825 if (udp->udp_ipversion == IPV4_VERSION) { 826 (*cl_inet_unbind)( 827 connp->conn_netstack->netstack_stackid, 828 IPPROTO_UDP, AF_INET, 829 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 830 (in_port_t)udp->udp_port, NULL); 831 } else { 832 (*cl_inet_unbind)( 833 connp->conn_netstack->netstack_stackid, 834 IPPROTO_UDP, AF_INET6, 835 (uint8_t *)(&(udp->udp_v6src)), 836 (in_port_t)udp->udp_port, NULL); 837 } 838 } 839 840 udp_bind_hash_remove(udp, B_FALSE); 841 842 } 843 844 void 845 udp_close_free(conn_t *connp) 846 { 847 udp_t *udp = connp->conn_udp; 848 849 /* If there are any options associated with the stream, free them. */ 850 if (udp->udp_ip_snd_options != NULL) { 851 mi_free((char *)udp->udp_ip_snd_options); 852 udp->udp_ip_snd_options = NULL; 853 udp->udp_ip_snd_options_len = 0; 854 } 855 856 if (udp->udp_ip_rcv_options != NULL) { 857 mi_free((char *)udp->udp_ip_rcv_options); 858 udp->udp_ip_rcv_options = NULL; 859 udp->udp_ip_rcv_options_len = 0; 860 } 861 862 /* Free memory associated with sticky options */ 863 if (udp->udp_sticky_hdrs_len != 0) { 864 kmem_free(udp->udp_sticky_hdrs, 865 udp->udp_sticky_hdrs_len); 866 udp->udp_sticky_hdrs = NULL; 867 udp->udp_sticky_hdrs_len = 0; 868 } 869 if (udp->udp_last_cred != NULL) { 870 crfree(udp->udp_last_cred); 871 udp->udp_last_cred = NULL; 872 } 873 if (udp->udp_effective_cred != NULL) { 874 crfree(udp->udp_effective_cred); 875 udp->udp_effective_cred = NULL; 876 } 877 878 ip6_pkt_free(&udp->udp_sticky_ipp); 879 880 /* 881 * Clear any fields which the kmem_cache constructor clears. 882 * Only udp_connp needs to be preserved. 883 * TBD: We should make this more efficient to avoid clearing 884 * everything. 885 */ 886 ASSERT(udp->udp_connp == connp); 887 bzero(udp, sizeof (udp_t)); 888 udp->udp_connp = connp; 889 } 890 891 static int 892 udp_do_disconnect(conn_t *connp) 893 { 894 udp_t *udp; 895 mblk_t *ire_mp; 896 udp_fanout_t *udpf; 897 udp_stack_t *us; 898 int error; 899 900 udp = connp->conn_udp; 901 us = udp->udp_us; 902 rw_enter(&udp->udp_rwlock, RW_WRITER); 903 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 904 rw_exit(&udp->udp_rwlock); 905 return (-TOUTSTATE); 906 } 907 udp->udp_pending_op = T_DISCON_REQ; 908 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 909 us->us_bind_fanout_size)]; 910 mutex_enter(&udpf->uf_lock); 911 udp->udp_v6src = udp->udp_bound_v6src; 912 udp->udp_state = TS_IDLE; 913 mutex_exit(&udpf->uf_lock); 914 915 if (udp->udp_family == AF_INET6) { 916 /* Rebuild the header template */ 917 error = udp_build_hdrs(udp); 918 if (error != 0) { 919 udp->udp_pending_op = -1; 920 rw_exit(&udp->udp_rwlock); 921 return (error); 922 } 923 } 924 925 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 926 if (ire_mp == NULL) { 927 mutex_enter(&udpf->uf_lock); 928 udp->udp_pending_op = -1; 929 mutex_exit(&udpf->uf_lock); 930 rw_exit(&udp->udp_rwlock); 931 return (ENOMEM); 932 } 933 934 rw_exit(&udp->udp_rwlock); 935 936 if (udp->udp_family == AF_INET6) { 937 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 938 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 939 } else { 940 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 941 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 942 } 943 944 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 945 } 946 947 948 static void 949 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 950 { 951 conn_t *connp = Q_TO_CONN(q); 952 int error; 953 954 /* 955 * Allocate the largest primitive we need to send back 956 * T_error_ack is > than T_ok_ack 957 */ 958 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 959 if (mp == NULL) { 960 /* Unable to reuse the T_DISCON_REQ for the ack. */ 961 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 962 return; 963 } 964 965 error = udp_do_disconnect(connp); 966 967 if (error != 0) { 968 if (error < 0) { 969 udp_err_ack(q, mp, -error, 0); 970 } else { 971 udp_err_ack(q, mp, TSYSERR, error); 972 } 973 } else { 974 mp = mi_tpi_ok_ack_alloc(mp); 975 ASSERT(mp != NULL); 976 qreply(q, mp); 977 } 978 } 979 980 int 981 udp_disconnect(conn_t *connp) 982 { 983 int error; 984 udp_t *udp = connp->conn_udp; 985 986 udp->udp_dgram_errind = B_FALSE; 987 988 error = udp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 993 return (error); 994 } 995 996 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 997 static void 998 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 999 { 1000 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1001 qreply(q, mp); 1002 } 1003 1004 /* Shorthand to generate and send TPI error acks to our client */ 1005 static void 1006 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1007 int sys_error) 1008 { 1009 struct T_error_ack *teackp; 1010 1011 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1012 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1013 teackp = (struct T_error_ack *)mp->b_rptr; 1014 teackp->ERROR_prim = primitive; 1015 teackp->TLI_error = t_error; 1016 teackp->UNIX_error = sys_error; 1017 qreply(q, mp); 1018 } 1019 } 1020 1021 /*ARGSUSED*/ 1022 static int 1023 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1024 { 1025 int i; 1026 udp_t *udp = Q_TO_UDP(q); 1027 udp_stack_t *us = udp->udp_us; 1028 1029 for (i = 0; i < us->us_num_epriv_ports; i++) { 1030 if (us->us_epriv_ports[i] != 0) 1031 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1032 } 1033 return (0); 1034 } 1035 1036 /* ARGSUSED */ 1037 static int 1038 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1039 cred_t *cr) 1040 { 1041 long new_value; 1042 int i; 1043 udp_t *udp = Q_TO_UDP(q); 1044 udp_stack_t *us = udp->udp_us; 1045 1046 /* 1047 * Fail the request if the new value does not lie within the 1048 * port number limits. 1049 */ 1050 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1051 new_value <= 0 || new_value >= 65536) { 1052 return (EINVAL); 1053 } 1054 1055 /* Check if the value is already in the list */ 1056 for (i = 0; i < us->us_num_epriv_ports; i++) { 1057 if (new_value == us->us_epriv_ports[i]) { 1058 return (EEXIST); 1059 } 1060 } 1061 /* Find an empty slot */ 1062 for (i = 0; i < us->us_num_epriv_ports; i++) { 1063 if (us->us_epriv_ports[i] == 0) 1064 break; 1065 } 1066 if (i == us->us_num_epriv_ports) { 1067 return (EOVERFLOW); 1068 } 1069 1070 /* Set the new value */ 1071 us->us_epriv_ports[i] = (in_port_t)new_value; 1072 return (0); 1073 } 1074 1075 /* ARGSUSED */ 1076 static int 1077 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1078 cred_t *cr) 1079 { 1080 long new_value; 1081 int i; 1082 udp_t *udp = Q_TO_UDP(q); 1083 udp_stack_t *us = udp->udp_us; 1084 1085 /* 1086 * Fail the request if the new value does not lie within the 1087 * port number limits. 1088 */ 1089 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1090 new_value <= 0 || new_value >= 65536) { 1091 return (EINVAL); 1092 } 1093 1094 /* Check that the value is already in the list */ 1095 for (i = 0; i < us->us_num_epriv_ports; i++) { 1096 if (us->us_epriv_ports[i] == new_value) 1097 break; 1098 } 1099 if (i == us->us_num_epriv_ports) { 1100 return (ESRCH); 1101 } 1102 1103 /* Clear the value */ 1104 us->us_epriv_ports[i] = 0; 1105 return (0); 1106 } 1107 1108 /* At minimum we need 4 bytes of UDP header */ 1109 #define ICMP_MIN_UDP_HDR 4 1110 1111 /* 1112 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1113 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1114 * Assumes that IP has pulled up everything up to and including the ICMP header. 1115 */ 1116 static void 1117 udp_icmp_error(conn_t *connp, mblk_t *mp) 1118 { 1119 icmph_t *icmph; 1120 ipha_t *ipha; 1121 int iph_hdr_length; 1122 udpha_t *udpha; 1123 sin_t sin; 1124 sin6_t sin6; 1125 mblk_t *mp1; 1126 int error = 0; 1127 udp_t *udp = connp->conn_udp; 1128 1129 mp1 = NULL; 1130 ipha = (ipha_t *)mp->b_rptr; 1131 1132 ASSERT(OK_32PTR(mp->b_rptr)); 1133 1134 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1135 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1136 udp_icmp_error_ipv6(connp, mp); 1137 return; 1138 } 1139 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1140 1141 /* Skip past the outer IP and ICMP headers */ 1142 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1143 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1144 ipha = (ipha_t *)&icmph[1]; 1145 1146 /* Skip past the inner IP and find the ULP header */ 1147 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1148 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1149 1150 switch (icmph->icmph_type) { 1151 case ICMP_DEST_UNREACHABLE: 1152 switch (icmph->icmph_code) { 1153 case ICMP_FRAGMENTATION_NEEDED: 1154 /* 1155 * IP has already adjusted the path MTU. 1156 */ 1157 break; 1158 case ICMP_PORT_UNREACHABLE: 1159 case ICMP_PROTOCOL_UNREACHABLE: 1160 error = ECONNREFUSED; 1161 break; 1162 default: 1163 /* Transient errors */ 1164 break; 1165 } 1166 break; 1167 default: 1168 /* Transient errors */ 1169 break; 1170 } 1171 if (error == 0) { 1172 freemsg(mp); 1173 return; 1174 } 1175 1176 /* 1177 * Deliver T_UDERROR_IND when the application has asked for it. 1178 * The socket layer enables this automatically when connected. 1179 */ 1180 if (!udp->udp_dgram_errind) { 1181 freemsg(mp); 1182 return; 1183 } 1184 1185 1186 switch (udp->udp_family) { 1187 case AF_INET: 1188 sin = sin_null; 1189 sin.sin_family = AF_INET; 1190 sin.sin_addr.s_addr = ipha->ipha_dst; 1191 sin.sin_port = udpha->uha_dst_port; 1192 if (IPCL_IS_NONSTR(connp)) { 1193 rw_enter(&udp->udp_rwlock, RW_WRITER); 1194 if (udp->udp_state == TS_DATA_XFER) { 1195 if (sin.sin_port == udp->udp_dstport && 1196 sin.sin_addr.s_addr == 1197 V4_PART_OF_V6(udp->udp_v6dst)) { 1198 rw_exit(&udp->udp_rwlock); 1199 (*connp->conn_upcalls->su_set_error) 1200 (connp->conn_upper_handle, error); 1201 goto done; 1202 } 1203 } else { 1204 udp->udp_delayed_error = error; 1205 *((sin_t *)&udp->udp_delayed_addr) = sin; 1206 } 1207 rw_exit(&udp->udp_rwlock); 1208 } else { 1209 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1210 NULL, 0, error); 1211 } 1212 break; 1213 case AF_INET6: 1214 sin6 = sin6_null; 1215 sin6.sin6_family = AF_INET6; 1216 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1217 sin6.sin6_port = udpha->uha_dst_port; 1218 if (IPCL_IS_NONSTR(connp)) { 1219 rw_enter(&udp->udp_rwlock, RW_WRITER); 1220 if (udp->udp_state == TS_DATA_XFER) { 1221 if (sin6.sin6_port == udp->udp_dstport && 1222 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1223 &udp->udp_v6dst)) { 1224 rw_exit(&udp->udp_rwlock); 1225 (*connp->conn_upcalls->su_set_error) 1226 (connp->conn_upper_handle, error); 1227 goto done; 1228 } 1229 } else { 1230 udp->udp_delayed_error = error; 1231 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1232 } 1233 rw_exit(&udp->udp_rwlock); 1234 } else { 1235 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1236 NULL, 0, error); 1237 } 1238 break; 1239 } 1240 if (mp1 != NULL) 1241 putnext(connp->conn_rq, mp1); 1242 done: 1243 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1244 freemsg(mp); 1245 } 1246 1247 /* 1248 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1249 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1250 * Assumes that IP has pulled up all the extension headers as well as the 1251 * ICMPv6 header. 1252 */ 1253 static void 1254 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1255 { 1256 icmp6_t *icmp6; 1257 ip6_t *ip6h, *outer_ip6h; 1258 uint16_t iph_hdr_length; 1259 uint8_t *nexthdrp; 1260 udpha_t *udpha; 1261 sin6_t sin6; 1262 mblk_t *mp1; 1263 int error = 0; 1264 udp_t *udp = connp->conn_udp; 1265 udp_stack_t *us = udp->udp_us; 1266 1267 outer_ip6h = (ip6_t *)mp->b_rptr; 1268 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1269 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1270 else 1271 iph_hdr_length = IPV6_HDR_LEN; 1272 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1273 ip6h = (ip6_t *)&icmp6[1]; 1274 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1275 freemsg(mp); 1276 return; 1277 } 1278 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1279 1280 switch (icmp6->icmp6_type) { 1281 case ICMP6_DST_UNREACH: 1282 switch (icmp6->icmp6_code) { 1283 case ICMP6_DST_UNREACH_NOPORT: 1284 error = ECONNREFUSED; 1285 break; 1286 case ICMP6_DST_UNREACH_ADMIN: 1287 case ICMP6_DST_UNREACH_NOROUTE: 1288 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1289 case ICMP6_DST_UNREACH_ADDR: 1290 /* Transient errors */ 1291 break; 1292 default: 1293 break; 1294 } 1295 break; 1296 case ICMP6_PACKET_TOO_BIG: { 1297 struct T_unitdata_ind *tudi; 1298 struct T_opthdr *toh; 1299 size_t udi_size; 1300 mblk_t *newmp; 1301 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1302 sizeof (struct ip6_mtuinfo); 1303 sin6_t *sin6; 1304 struct ip6_mtuinfo *mtuinfo; 1305 1306 /* 1307 * If the application has requested to receive path mtu 1308 * information, send up an empty message containing an 1309 * IPV6_PATHMTU ancillary data item. 1310 */ 1311 if (!udp->udp_ipv6_recvpathmtu) 1312 break; 1313 1314 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1315 opt_length; 1316 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1317 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1318 break; 1319 } 1320 1321 /* 1322 * newmp->b_cont is left to NULL on purpose. This is an 1323 * empty message containing only ancillary data. 1324 */ 1325 newmp->b_datap->db_type = M_PROTO; 1326 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1327 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1328 tudi->PRIM_type = T_UNITDATA_IND; 1329 tudi->SRC_length = sizeof (sin6_t); 1330 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1331 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1332 tudi->OPT_length = opt_length; 1333 1334 sin6 = (sin6_t *)&tudi[1]; 1335 bzero(sin6, sizeof (sin6_t)); 1336 sin6->sin6_family = AF_INET6; 1337 sin6->sin6_addr = udp->udp_v6dst; 1338 1339 toh = (struct T_opthdr *)&sin6[1]; 1340 toh->level = IPPROTO_IPV6; 1341 toh->name = IPV6_PATHMTU; 1342 toh->len = opt_length; 1343 toh->status = 0; 1344 1345 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1346 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1347 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1348 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1349 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1350 /* 1351 * We've consumed everything we need from the original 1352 * message. Free it, then send our empty message. 1353 */ 1354 freemsg(mp); 1355 udp_ulp_recv(connp, newmp); 1356 1357 return; 1358 } 1359 case ICMP6_TIME_EXCEEDED: 1360 /* Transient errors */ 1361 break; 1362 case ICMP6_PARAM_PROB: 1363 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1364 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1365 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1366 (uchar_t *)nexthdrp) { 1367 error = ECONNREFUSED; 1368 break; 1369 } 1370 break; 1371 } 1372 if (error == 0) { 1373 freemsg(mp); 1374 return; 1375 } 1376 1377 /* 1378 * Deliver T_UDERROR_IND when the application has asked for it. 1379 * The socket layer enables this automatically when connected. 1380 */ 1381 if (!udp->udp_dgram_errind) { 1382 freemsg(mp); 1383 return; 1384 } 1385 1386 sin6 = sin6_null; 1387 sin6.sin6_family = AF_INET6; 1388 sin6.sin6_addr = ip6h->ip6_dst; 1389 sin6.sin6_port = udpha->uha_dst_port; 1390 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1391 1392 if (IPCL_IS_NONSTR(connp)) { 1393 rw_enter(&udp->udp_rwlock, RW_WRITER); 1394 if (udp->udp_state == TS_DATA_XFER) { 1395 if (sin6.sin6_port == udp->udp_dstport && 1396 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1397 &udp->udp_v6dst)) { 1398 rw_exit(&udp->udp_rwlock); 1399 (*connp->conn_upcalls->su_set_error) 1400 (connp->conn_upper_handle, error); 1401 goto done; 1402 } 1403 } else { 1404 udp->udp_delayed_error = error; 1405 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1406 } 1407 rw_exit(&udp->udp_rwlock); 1408 } else { 1409 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1410 NULL, 0, error); 1411 if (mp1 != NULL) 1412 putnext(connp->conn_rq, mp1); 1413 } 1414 done: 1415 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1416 freemsg(mp); 1417 } 1418 1419 /* 1420 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1421 * The local address is filled in if endpoint is bound. The remote address 1422 * is filled in if remote address has been precified ("connected endpoint") 1423 * (The concept of connected CLTS sockets is alien to published TPI 1424 * but we support it anyway). 1425 */ 1426 static void 1427 udp_addr_req(queue_t *q, mblk_t *mp) 1428 { 1429 sin_t *sin; 1430 sin6_t *sin6; 1431 mblk_t *ackmp; 1432 struct T_addr_ack *taa; 1433 udp_t *udp = Q_TO_UDP(q); 1434 1435 /* Make it large enough for worst case */ 1436 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1437 2 * sizeof (sin6_t), 1); 1438 if (ackmp == NULL) { 1439 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1440 return; 1441 } 1442 taa = (struct T_addr_ack *)ackmp->b_rptr; 1443 1444 bzero(taa, sizeof (struct T_addr_ack)); 1445 ackmp->b_wptr = (uchar_t *)&taa[1]; 1446 1447 taa->PRIM_type = T_ADDR_ACK; 1448 ackmp->b_datap->db_type = M_PCPROTO; 1449 rw_enter(&udp->udp_rwlock, RW_READER); 1450 /* 1451 * Note: Following code assumes 32 bit alignment of basic 1452 * data structures like sin_t and struct T_addr_ack. 1453 */ 1454 if (udp->udp_state != TS_UNBND) { 1455 /* 1456 * Fill in local address first 1457 */ 1458 taa->LOCADDR_offset = sizeof (*taa); 1459 if (udp->udp_family == AF_INET) { 1460 taa->LOCADDR_length = sizeof (sin_t); 1461 sin = (sin_t *)&taa[1]; 1462 /* Fill zeroes and then initialize non-zero fields */ 1463 *sin = sin_null; 1464 sin->sin_family = AF_INET; 1465 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1466 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1467 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1468 sin->sin_addr.s_addr); 1469 } else { 1470 /* 1471 * INADDR_ANY 1472 * udp_v6src is not set, we might be bound to 1473 * broadcast/multicast. Use udp_bound_v6src as 1474 * local address instead (that could 1475 * also still be INADDR_ANY) 1476 */ 1477 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1478 sin->sin_addr.s_addr); 1479 } 1480 sin->sin_port = udp->udp_port; 1481 ackmp->b_wptr = (uchar_t *)&sin[1]; 1482 if (udp->udp_state == TS_DATA_XFER) { 1483 /* 1484 * connected, fill remote address too 1485 */ 1486 taa->REMADDR_length = sizeof (sin_t); 1487 /* assumed 32-bit alignment */ 1488 taa->REMADDR_offset = taa->LOCADDR_offset + 1489 taa->LOCADDR_length; 1490 1491 sin = (sin_t *)(ackmp->b_rptr + 1492 taa->REMADDR_offset); 1493 /* initialize */ 1494 *sin = sin_null; 1495 sin->sin_family = AF_INET; 1496 sin->sin_addr.s_addr = 1497 V4_PART_OF_V6(udp->udp_v6dst); 1498 sin->sin_port = udp->udp_dstport; 1499 ackmp->b_wptr = (uchar_t *)&sin[1]; 1500 } 1501 } else { 1502 taa->LOCADDR_length = sizeof (sin6_t); 1503 sin6 = (sin6_t *)&taa[1]; 1504 /* Fill zeroes and then initialize non-zero fields */ 1505 *sin6 = sin6_null; 1506 sin6->sin6_family = AF_INET6; 1507 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1508 sin6->sin6_addr = udp->udp_v6src; 1509 } else { 1510 /* 1511 * UNSPECIFIED 1512 * udp_v6src is not set, we might be bound to 1513 * broadcast/multicast. Use udp_bound_v6src as 1514 * local address instead (that could 1515 * also still be UNSPECIFIED) 1516 */ 1517 sin6->sin6_addr = 1518 udp->udp_bound_v6src; 1519 } 1520 sin6->sin6_port = udp->udp_port; 1521 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1522 if (udp->udp_state == TS_DATA_XFER) { 1523 /* 1524 * connected, fill remote address too 1525 */ 1526 taa->REMADDR_length = sizeof (sin6_t); 1527 /* assumed 32-bit alignment */ 1528 taa->REMADDR_offset = taa->LOCADDR_offset + 1529 taa->LOCADDR_length; 1530 1531 sin6 = (sin6_t *)(ackmp->b_rptr + 1532 taa->REMADDR_offset); 1533 /* initialize */ 1534 *sin6 = sin6_null; 1535 sin6->sin6_family = AF_INET6; 1536 sin6->sin6_addr = udp->udp_v6dst; 1537 sin6->sin6_port = udp->udp_dstport; 1538 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1539 } 1540 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1541 } 1542 } 1543 rw_exit(&udp->udp_rwlock); 1544 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1545 qreply(q, ackmp); 1546 } 1547 1548 static void 1549 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1550 { 1551 if (udp->udp_family == AF_INET) { 1552 *tap = udp_g_t_info_ack_ipv4; 1553 } else { 1554 *tap = udp_g_t_info_ack_ipv6; 1555 } 1556 tap->CURRENT_state = udp->udp_state; 1557 tap->OPT_size = udp_max_optsize; 1558 } 1559 1560 static void 1561 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1562 t_uscalar_t cap_bits1) 1563 { 1564 tcap->CAP_bits1 = 0; 1565 1566 if (cap_bits1 & TC1_INFO) { 1567 udp_copy_info(&tcap->INFO_ack, udp); 1568 tcap->CAP_bits1 |= TC1_INFO; 1569 } 1570 } 1571 1572 /* 1573 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1574 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1575 * udp_g_t_info_ack. The current state of the stream is copied from 1576 * udp_state. 1577 */ 1578 static void 1579 udp_capability_req(queue_t *q, mblk_t *mp) 1580 { 1581 t_uscalar_t cap_bits1; 1582 struct T_capability_ack *tcap; 1583 udp_t *udp = Q_TO_UDP(q); 1584 1585 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1586 1587 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1588 mp->b_datap->db_type, T_CAPABILITY_ACK); 1589 if (!mp) 1590 return; 1591 1592 tcap = (struct T_capability_ack *)mp->b_rptr; 1593 udp_do_capability_ack(udp, tcap, cap_bits1); 1594 1595 qreply(q, mp); 1596 } 1597 1598 /* 1599 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1600 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1601 * The current state of the stream is copied from udp_state. 1602 */ 1603 static void 1604 udp_info_req(queue_t *q, mblk_t *mp) 1605 { 1606 udp_t *udp = Q_TO_UDP(q); 1607 1608 /* Create a T_INFO_ACK message. */ 1609 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1610 T_INFO_ACK); 1611 if (!mp) 1612 return; 1613 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1614 qreply(q, mp); 1615 } 1616 1617 /* For /dev/udp aka AF_INET open */ 1618 static int 1619 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1620 { 1621 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1622 } 1623 1624 /* For /dev/udp6 aka AF_INET6 open */ 1625 static int 1626 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1627 { 1628 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1629 } 1630 1631 /* 1632 * This is the open routine for udp. It allocates a udp_t structure for 1633 * the stream and, on the first open of the module, creates an ND table. 1634 */ 1635 /*ARGSUSED2*/ 1636 static int 1637 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1638 boolean_t isv6) 1639 { 1640 int error; 1641 udp_t *udp; 1642 conn_t *connp; 1643 dev_t conn_dev; 1644 udp_stack_t *us; 1645 vmem_t *minor_arena; 1646 1647 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1648 1649 /* If the stream is already open, return immediately. */ 1650 if (q->q_ptr != NULL) 1651 return (0); 1652 1653 if (sflag == MODOPEN) 1654 return (EINVAL); 1655 1656 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1657 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1658 minor_arena = ip_minor_arena_la; 1659 } else { 1660 /* 1661 * Either minor numbers in the large arena were exhausted 1662 * or a non socket application is doing the open. 1663 * Try to allocate from the small arena. 1664 */ 1665 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1666 return (EBUSY); 1667 1668 minor_arena = ip_minor_arena_sa; 1669 } 1670 1671 if (flag & SO_FALLBACK) { 1672 /* 1673 * Non streams socket needs a stream to fallback to 1674 */ 1675 RD(q)->q_ptr = (void *)conn_dev; 1676 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1677 WR(q)->q_ptr = (void *)minor_arena; 1678 qprocson(q); 1679 return (0); 1680 } 1681 1682 connp = udp_do_open(credp, isv6, KM_SLEEP); 1683 if (connp == NULL) { 1684 inet_minor_free(minor_arena, conn_dev); 1685 return (ENOMEM); 1686 } 1687 udp = connp->conn_udp; 1688 us = udp->udp_us; 1689 1690 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1691 connp->conn_dev = conn_dev; 1692 connp->conn_minor_arena = minor_arena; 1693 1694 /* 1695 * Initialize the udp_t structure for this stream. 1696 */ 1697 q->q_ptr = connp; 1698 WR(q)->q_ptr = connp; 1699 connp->conn_rq = q; 1700 connp->conn_wq = WR(q); 1701 1702 rw_enter(&udp->udp_rwlock, RW_WRITER); 1703 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1704 ASSERT(connp->conn_udp == udp); 1705 ASSERT(udp->udp_connp == connp); 1706 1707 if (flag & SO_SOCKSTR) { 1708 connp->conn_flags |= IPCL_SOCKET; 1709 udp->udp_issocket = B_TRUE; 1710 } 1711 1712 q->q_hiwat = us->us_recv_hiwat; 1713 WR(q)->q_hiwat = us->us_xmit_hiwat; 1714 WR(q)->q_lowat = us->us_xmit_lowat; 1715 1716 qprocson(q); 1717 1718 if (udp->udp_family == AF_INET6) { 1719 /* Build initial header template for transmit */ 1720 if ((error = udp_build_hdrs(udp)) != 0) { 1721 rw_exit(&udp->udp_rwlock); 1722 qprocsoff(q); 1723 inet_minor_free(minor_arena, conn_dev); 1724 ipcl_conn_destroy(connp); 1725 return (error); 1726 } 1727 } 1728 rw_exit(&udp->udp_rwlock); 1729 1730 /* Set the Stream head write offset and high watermark. */ 1731 (void) proto_set_tx_wroff(q, connp, 1732 udp->udp_max_hdr_len + us->us_wroff_extra); 1733 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1734 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1735 1736 mutex_enter(&connp->conn_lock); 1737 connp->conn_state_flags &= ~CONN_INCIPIENT; 1738 mutex_exit(&connp->conn_lock); 1739 return (0); 1740 } 1741 1742 /* 1743 * Which UDP options OK to set through T_UNITDATA_REQ... 1744 */ 1745 /* ARGSUSED */ 1746 static boolean_t 1747 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1748 { 1749 return (B_TRUE); 1750 } 1751 1752 /* 1753 * This routine gets default values of certain options whose default 1754 * values are maintained by protcol specific code 1755 */ 1756 /* ARGSUSED */ 1757 int 1758 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1759 { 1760 udp_t *udp = Q_TO_UDP(q); 1761 udp_stack_t *us = udp->udp_us; 1762 int *i1 = (int *)ptr; 1763 1764 switch (level) { 1765 case IPPROTO_IP: 1766 switch (name) { 1767 case IP_MULTICAST_TTL: 1768 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1769 return (sizeof (uchar_t)); 1770 case IP_MULTICAST_LOOP: 1771 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1772 return (sizeof (uchar_t)); 1773 } 1774 break; 1775 case IPPROTO_IPV6: 1776 switch (name) { 1777 case IPV6_MULTICAST_HOPS: 1778 *i1 = IP_DEFAULT_MULTICAST_TTL; 1779 return (sizeof (int)); 1780 case IPV6_MULTICAST_LOOP: 1781 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1782 return (sizeof (int)); 1783 case IPV6_UNICAST_HOPS: 1784 *i1 = us->us_ipv6_hoplimit; 1785 return (sizeof (int)); 1786 } 1787 break; 1788 } 1789 return (-1); 1790 } 1791 1792 /* 1793 * This routine retrieves the current status of socket options. 1794 * It returns the size of the option retrieved. 1795 */ 1796 static int 1797 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1798 { 1799 udp_t *udp = connp->conn_udp; 1800 udp_stack_t *us = udp->udp_us; 1801 int *i1 = (int *)ptr; 1802 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1803 int len; 1804 1805 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1806 switch (level) { 1807 case SOL_SOCKET: 1808 switch (name) { 1809 case SO_DEBUG: 1810 *i1 = udp->udp_debug; 1811 break; /* goto sizeof (int) option return */ 1812 case SO_REUSEADDR: 1813 *i1 = udp->udp_reuseaddr; 1814 break; /* goto sizeof (int) option return */ 1815 case SO_TYPE: 1816 *i1 = SOCK_DGRAM; 1817 break; /* goto sizeof (int) option return */ 1818 1819 /* 1820 * The following three items are available here, 1821 * but are only meaningful to IP. 1822 */ 1823 case SO_DONTROUTE: 1824 *i1 = udp->udp_dontroute; 1825 break; /* goto sizeof (int) option return */ 1826 case SO_USELOOPBACK: 1827 *i1 = udp->udp_useloopback; 1828 break; /* goto sizeof (int) option return */ 1829 case SO_BROADCAST: 1830 *i1 = udp->udp_broadcast; 1831 break; /* goto sizeof (int) option return */ 1832 1833 case SO_SNDBUF: 1834 *i1 = udp->udp_xmit_hiwat; 1835 break; /* goto sizeof (int) option return */ 1836 case SO_RCVBUF: 1837 *i1 = udp->udp_rcv_disply_hiwat; 1838 break; /* goto sizeof (int) option return */ 1839 case SO_DGRAM_ERRIND: 1840 *i1 = udp->udp_dgram_errind; 1841 break; /* goto sizeof (int) option return */ 1842 case SO_RECVUCRED: 1843 *i1 = udp->udp_recvucred; 1844 break; /* goto sizeof (int) option return */ 1845 case SO_TIMESTAMP: 1846 *i1 = udp->udp_timestamp; 1847 break; /* goto sizeof (int) option return */ 1848 case SO_ANON_MLP: 1849 *i1 = connp->conn_anon_mlp; 1850 break; /* goto sizeof (int) option return */ 1851 case SO_MAC_EXEMPT: 1852 *i1 = connp->conn_mac_exempt; 1853 break; /* goto sizeof (int) option return */ 1854 case SO_ALLZONES: 1855 *i1 = connp->conn_allzones; 1856 break; /* goto sizeof (int) option return */ 1857 case SO_EXCLBIND: 1858 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 1859 break; 1860 case SO_PROTOTYPE: 1861 *i1 = IPPROTO_UDP; 1862 break; 1863 case SO_DOMAIN: 1864 *i1 = udp->udp_family; 1865 break; 1866 default: 1867 return (-1); 1868 } 1869 break; 1870 case IPPROTO_IP: 1871 if (udp->udp_family != AF_INET) 1872 return (-1); 1873 switch (name) { 1874 case IP_OPTIONS: 1875 case T_IP_OPTIONS: 1876 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 1877 if (len > 0) { 1878 bcopy(udp->udp_ip_rcv_options + 1879 udp->udp_label_len, ptr, len); 1880 } 1881 return (len); 1882 case IP_TOS: 1883 case T_IP_TOS: 1884 *i1 = (int)udp->udp_type_of_service; 1885 break; /* goto sizeof (int) option return */ 1886 case IP_TTL: 1887 *i1 = (int)udp->udp_ttl; 1888 break; /* goto sizeof (int) option return */ 1889 case IP_DHCPINIT_IF: 1890 return (-EINVAL); 1891 case IP_NEXTHOP: 1892 case IP_RECVPKTINFO: 1893 /* 1894 * This also handles IP_PKTINFO. 1895 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1896 * Differentiation is based on the size of the argument 1897 * passed in. 1898 * This option is handled in IP which will return an 1899 * error for IP_PKTINFO as it's not supported as a 1900 * sticky option. 1901 */ 1902 return (-EINVAL); 1903 case IP_MULTICAST_IF: 1904 /* 0 address if not set */ 1905 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 1906 return (sizeof (ipaddr_t)); 1907 case IP_MULTICAST_TTL: 1908 *(uchar_t *)ptr = udp->udp_multicast_ttl; 1909 return (sizeof (uchar_t)); 1910 case IP_MULTICAST_LOOP: 1911 *ptr = connp->conn_multicast_loop; 1912 return (sizeof (uint8_t)); 1913 case IP_RECVOPTS: 1914 *i1 = udp->udp_recvopts; 1915 break; /* goto sizeof (int) option return */ 1916 case IP_RECVDSTADDR: 1917 *i1 = udp->udp_recvdstaddr; 1918 break; /* goto sizeof (int) option return */ 1919 case IP_RECVIF: 1920 *i1 = udp->udp_recvif; 1921 break; /* goto sizeof (int) option return */ 1922 case IP_RECVSLLA: 1923 *i1 = udp->udp_recvslla; 1924 break; /* goto sizeof (int) option return */ 1925 case IP_RECVTTL: 1926 *i1 = udp->udp_recvttl; 1927 break; /* goto sizeof (int) option return */ 1928 case IP_ADD_MEMBERSHIP: 1929 case IP_DROP_MEMBERSHIP: 1930 case IP_BLOCK_SOURCE: 1931 case IP_UNBLOCK_SOURCE: 1932 case IP_ADD_SOURCE_MEMBERSHIP: 1933 case IP_DROP_SOURCE_MEMBERSHIP: 1934 case MCAST_JOIN_GROUP: 1935 case MCAST_LEAVE_GROUP: 1936 case MCAST_BLOCK_SOURCE: 1937 case MCAST_UNBLOCK_SOURCE: 1938 case MCAST_JOIN_SOURCE_GROUP: 1939 case MCAST_LEAVE_SOURCE_GROUP: 1940 /* cannot "get" the value for these */ 1941 return (-1); 1942 case IP_BOUND_IF: 1943 /* Zero if not set */ 1944 *i1 = udp->udp_bound_if; 1945 break; /* goto sizeof (int) option return */ 1946 case IP_UNSPEC_SRC: 1947 *i1 = udp->udp_unspec_source; 1948 break; /* goto sizeof (int) option return */ 1949 case IP_BROADCAST_TTL: 1950 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1951 return (sizeof (uchar_t)); 1952 default: 1953 return (-1); 1954 } 1955 break; 1956 case IPPROTO_IPV6: 1957 if (udp->udp_family != AF_INET6) 1958 return (-1); 1959 switch (name) { 1960 case IPV6_UNICAST_HOPS: 1961 *i1 = (unsigned int)udp->udp_ttl; 1962 break; /* goto sizeof (int) option return */ 1963 case IPV6_MULTICAST_IF: 1964 /* 0 index if not set */ 1965 *i1 = udp->udp_multicast_if_index; 1966 break; /* goto sizeof (int) option return */ 1967 case IPV6_MULTICAST_HOPS: 1968 *i1 = udp->udp_multicast_ttl; 1969 break; /* goto sizeof (int) option return */ 1970 case IPV6_MULTICAST_LOOP: 1971 *i1 = connp->conn_multicast_loop; 1972 break; /* goto sizeof (int) option return */ 1973 case IPV6_JOIN_GROUP: 1974 case IPV6_LEAVE_GROUP: 1975 case MCAST_JOIN_GROUP: 1976 case MCAST_LEAVE_GROUP: 1977 case MCAST_BLOCK_SOURCE: 1978 case MCAST_UNBLOCK_SOURCE: 1979 case MCAST_JOIN_SOURCE_GROUP: 1980 case MCAST_LEAVE_SOURCE_GROUP: 1981 /* cannot "get" the value for these */ 1982 return (-1); 1983 case IPV6_BOUND_IF: 1984 /* Zero if not set */ 1985 *i1 = udp->udp_bound_if; 1986 break; /* goto sizeof (int) option return */ 1987 case IPV6_UNSPEC_SRC: 1988 *i1 = udp->udp_unspec_source; 1989 break; /* goto sizeof (int) option return */ 1990 case IPV6_RECVPKTINFO: 1991 *i1 = udp->udp_ip_recvpktinfo; 1992 break; /* goto sizeof (int) option return */ 1993 case IPV6_RECVTCLASS: 1994 *i1 = udp->udp_ipv6_recvtclass; 1995 break; /* goto sizeof (int) option return */ 1996 case IPV6_RECVPATHMTU: 1997 *i1 = udp->udp_ipv6_recvpathmtu; 1998 break; /* goto sizeof (int) option return */ 1999 case IPV6_RECVHOPLIMIT: 2000 *i1 = udp->udp_ipv6_recvhoplimit; 2001 break; /* goto sizeof (int) option return */ 2002 case IPV6_RECVHOPOPTS: 2003 *i1 = udp->udp_ipv6_recvhopopts; 2004 break; /* goto sizeof (int) option return */ 2005 case IPV6_RECVDSTOPTS: 2006 *i1 = udp->udp_ipv6_recvdstopts; 2007 break; /* goto sizeof (int) option return */ 2008 case _OLD_IPV6_RECVDSTOPTS: 2009 *i1 = udp->udp_old_ipv6_recvdstopts; 2010 break; /* goto sizeof (int) option return */ 2011 case IPV6_RECVRTHDRDSTOPTS: 2012 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2013 break; /* goto sizeof (int) option return */ 2014 case IPV6_RECVRTHDR: 2015 *i1 = udp->udp_ipv6_recvrthdr; 2016 break; /* goto sizeof (int) option return */ 2017 case IPV6_PKTINFO: { 2018 /* XXX assumes that caller has room for max size! */ 2019 struct in6_pktinfo *pkti; 2020 2021 pkti = (struct in6_pktinfo *)ptr; 2022 if (ipp->ipp_fields & IPPF_IFINDEX) 2023 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2024 else 2025 pkti->ipi6_ifindex = 0; 2026 if (ipp->ipp_fields & IPPF_ADDR) 2027 pkti->ipi6_addr = ipp->ipp_addr; 2028 else 2029 pkti->ipi6_addr = ipv6_all_zeros; 2030 return (sizeof (struct in6_pktinfo)); 2031 } 2032 case IPV6_TCLASS: 2033 if (ipp->ipp_fields & IPPF_TCLASS) 2034 *i1 = ipp->ipp_tclass; 2035 else 2036 *i1 = IPV6_FLOW_TCLASS( 2037 IPV6_DEFAULT_VERS_AND_FLOW); 2038 break; /* goto sizeof (int) option return */ 2039 case IPV6_NEXTHOP: { 2040 sin6_t *sin6 = (sin6_t *)ptr; 2041 2042 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2043 return (0); 2044 *sin6 = sin6_null; 2045 sin6->sin6_family = AF_INET6; 2046 sin6->sin6_addr = ipp->ipp_nexthop; 2047 return (sizeof (sin6_t)); 2048 } 2049 case IPV6_HOPOPTS: 2050 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2051 return (0); 2052 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2053 return (0); 2054 /* 2055 * The cipso/label option is added by kernel. 2056 * User is not usually aware of this option. 2057 * We copy out the hbh opt after the label option. 2058 */ 2059 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2060 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2061 if (udp->udp_label_len_v6 > 0) { 2062 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2063 ptr[1] = (ipp->ipp_hopoptslen - 2064 udp->udp_label_len_v6 + 7) / 8 - 1; 2065 } 2066 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2067 case IPV6_RTHDRDSTOPTS: 2068 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2069 return (0); 2070 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2071 return (ipp->ipp_rtdstoptslen); 2072 case IPV6_RTHDR: 2073 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2074 return (0); 2075 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2076 return (ipp->ipp_rthdrlen); 2077 case IPV6_DSTOPTS: 2078 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2079 return (0); 2080 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2081 return (ipp->ipp_dstoptslen); 2082 case IPV6_PATHMTU: 2083 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2084 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2085 us->us_netstack)); 2086 default: 2087 return (-1); 2088 } 2089 break; 2090 case IPPROTO_UDP: 2091 switch (name) { 2092 case UDP_ANONPRIVBIND: 2093 *i1 = udp->udp_anon_priv_bind; 2094 break; 2095 case UDP_EXCLBIND: 2096 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2097 break; 2098 case UDP_RCVHDR: 2099 *i1 = udp->udp_rcvhdr ? 1 : 0; 2100 break; 2101 case UDP_NAT_T_ENDPOINT: 2102 *i1 = udp->udp_nat_t_endpoint; 2103 break; 2104 default: 2105 return (-1); 2106 } 2107 break; 2108 default: 2109 return (-1); 2110 } 2111 return (sizeof (int)); 2112 } 2113 2114 int 2115 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2116 { 2117 udp_t *udp; 2118 int err; 2119 2120 udp = Q_TO_UDP(q); 2121 2122 rw_enter(&udp->udp_rwlock, RW_READER); 2123 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2124 rw_exit(&udp->udp_rwlock); 2125 return (err); 2126 } 2127 2128 /* 2129 * This routine sets socket options. 2130 */ 2131 /* ARGSUSED */ 2132 static int 2133 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2134 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2135 void *thisdg_attrs, boolean_t checkonly) 2136 { 2137 udpattrs_t *attrs = thisdg_attrs; 2138 int *i1 = (int *)invalp; 2139 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2140 udp_t *udp = connp->conn_udp; 2141 udp_stack_t *us = udp->udp_us; 2142 int error; 2143 uint_t newlen; 2144 size_t sth_wroff; 2145 2146 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2147 /* 2148 * For fixed length options, no sanity check 2149 * of passed in length is done. It is assumed *_optcom_req() 2150 * routines do the right thing. 2151 */ 2152 switch (level) { 2153 case SOL_SOCKET: 2154 switch (name) { 2155 case SO_REUSEADDR: 2156 if (!checkonly) { 2157 udp->udp_reuseaddr = onoff; 2158 PASS_OPT_TO_IP(connp); 2159 } 2160 break; 2161 case SO_DEBUG: 2162 if (!checkonly) 2163 udp->udp_debug = onoff; 2164 break; 2165 /* 2166 * The following three items are available here, 2167 * but are only meaningful to IP. 2168 */ 2169 case SO_DONTROUTE: 2170 if (!checkonly) { 2171 udp->udp_dontroute = onoff; 2172 PASS_OPT_TO_IP(connp); 2173 } 2174 break; 2175 case SO_USELOOPBACK: 2176 if (!checkonly) { 2177 udp->udp_useloopback = onoff; 2178 PASS_OPT_TO_IP(connp); 2179 } 2180 break; 2181 case SO_BROADCAST: 2182 if (!checkonly) { 2183 udp->udp_broadcast = onoff; 2184 PASS_OPT_TO_IP(connp); 2185 } 2186 break; 2187 2188 case SO_SNDBUF: 2189 if (*i1 > us->us_max_buf) { 2190 *outlenp = 0; 2191 return (ENOBUFS); 2192 } 2193 if (!checkonly) { 2194 udp->udp_xmit_hiwat = *i1; 2195 connp->conn_wq->q_hiwat = *i1; 2196 } 2197 break; 2198 case SO_RCVBUF: 2199 if (*i1 > us->us_max_buf) { 2200 *outlenp = 0; 2201 return (ENOBUFS); 2202 } 2203 if (!checkonly) { 2204 int size; 2205 2206 udp->udp_rcv_disply_hiwat = *i1; 2207 size = udp_set_rcv_hiwat(udp, *i1); 2208 rw_exit(&udp->udp_rwlock); 2209 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2210 size); 2211 rw_enter(&udp->udp_rwlock, RW_WRITER); 2212 } 2213 break; 2214 case SO_DGRAM_ERRIND: 2215 if (!checkonly) 2216 udp->udp_dgram_errind = onoff; 2217 break; 2218 case SO_RECVUCRED: 2219 if (!checkonly) 2220 udp->udp_recvucred = onoff; 2221 break; 2222 case SO_ALLZONES: 2223 /* 2224 * "soft" error (negative) 2225 * option not handled at this level 2226 * Do not modify *outlenp. 2227 */ 2228 return (-EINVAL); 2229 case SO_TIMESTAMP: 2230 if (!checkonly) 2231 udp->udp_timestamp = onoff; 2232 break; 2233 case SO_ANON_MLP: 2234 if (!checkonly) { 2235 connp->conn_anon_mlp = onoff; 2236 PASS_OPT_TO_IP(connp); 2237 } 2238 break; 2239 case SO_MAC_EXEMPT: 2240 if (secpolicy_net_mac_aware(cr) != 0 || 2241 udp->udp_state != TS_UNBND) 2242 return (EACCES); 2243 if (!checkonly) { 2244 connp->conn_mac_exempt = onoff; 2245 PASS_OPT_TO_IP(connp); 2246 } 2247 break; 2248 case SCM_UCRED: { 2249 struct ucred_s *ucr; 2250 cred_t *cr, *newcr; 2251 ts_label_t *tsl; 2252 2253 /* 2254 * Only sockets that have proper privileges and are 2255 * bound to MLPs will have any other value here, so 2256 * this implicitly tests for privilege to set label. 2257 */ 2258 if (connp->conn_mlp_type == mlptSingle) 2259 break; 2260 ucr = (struct ucred_s *)invalp; 2261 if (inlen != ucredsize || 2262 ucr->uc_labeloff < sizeof (*ucr) || 2263 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2264 return (EINVAL); 2265 if (!checkonly) { 2266 mblk_t *mb; 2267 pid_t cpid; 2268 2269 if (attrs == NULL || 2270 (mb = attrs->udpattr_mb) == NULL) 2271 return (EINVAL); 2272 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2273 cr = udp->udp_connp->conn_cred; 2274 ASSERT(cr != NULL); 2275 if ((tsl = crgetlabel(cr)) == NULL) 2276 return (EINVAL); 2277 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2278 tsl->tsl_doi, KM_NOSLEEP); 2279 if (newcr == NULL) 2280 return (ENOSR); 2281 mblk_setcred(mb, newcr, cpid); 2282 attrs->udpattr_credset = B_TRUE; 2283 crfree(newcr); 2284 } 2285 break; 2286 } 2287 case SO_EXCLBIND: 2288 if (!checkonly) 2289 udp->udp_exclbind = onoff; 2290 break; 2291 case SO_RCVTIMEO: 2292 case SO_SNDTIMEO: 2293 /* 2294 * Pass these two options in order for third part 2295 * protocol usage. Here just return directly. 2296 */ 2297 return (0); 2298 default: 2299 *outlenp = 0; 2300 return (EINVAL); 2301 } 2302 break; 2303 case IPPROTO_IP: 2304 if (udp->udp_family != AF_INET) { 2305 *outlenp = 0; 2306 return (ENOPROTOOPT); 2307 } 2308 switch (name) { 2309 case IP_OPTIONS: 2310 case T_IP_OPTIONS: 2311 /* Save options for use by IP. */ 2312 newlen = inlen + udp->udp_label_len; 2313 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2314 *outlenp = 0; 2315 return (EINVAL); 2316 } 2317 if (checkonly) 2318 break; 2319 2320 /* 2321 * Update the stored options taking into account 2322 * any CIPSO option which we should not overwrite. 2323 */ 2324 if (!tsol_option_set(&udp->udp_ip_snd_options, 2325 &udp->udp_ip_snd_options_len, 2326 udp->udp_label_len, invalp, inlen)) { 2327 *outlenp = 0; 2328 return (ENOMEM); 2329 } 2330 2331 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2332 UDPH_SIZE + udp->udp_ip_snd_options_len; 2333 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2334 rw_exit(&udp->udp_rwlock); 2335 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2336 sth_wroff); 2337 rw_enter(&udp->udp_rwlock, RW_WRITER); 2338 break; 2339 2340 case IP_TTL: 2341 if (!checkonly) { 2342 udp->udp_ttl = (uchar_t)*i1; 2343 } 2344 break; 2345 case IP_TOS: 2346 case T_IP_TOS: 2347 if (!checkonly) { 2348 udp->udp_type_of_service = (uchar_t)*i1; 2349 } 2350 break; 2351 case IP_MULTICAST_IF: { 2352 /* 2353 * TODO should check OPTMGMT reply and undo this if 2354 * there is an error. 2355 */ 2356 struct in_addr *inap = (struct in_addr *)invalp; 2357 if (!checkonly) { 2358 udp->udp_multicast_if_addr = 2359 inap->s_addr; 2360 PASS_OPT_TO_IP(connp); 2361 } 2362 break; 2363 } 2364 case IP_MULTICAST_TTL: 2365 if (!checkonly) 2366 udp->udp_multicast_ttl = *invalp; 2367 break; 2368 case IP_MULTICAST_LOOP: 2369 if (!checkonly) { 2370 connp->conn_multicast_loop = *invalp; 2371 PASS_OPT_TO_IP(connp); 2372 } 2373 break; 2374 case IP_RECVOPTS: 2375 if (!checkonly) 2376 udp->udp_recvopts = onoff; 2377 break; 2378 case IP_RECVDSTADDR: 2379 if (!checkonly) 2380 udp->udp_recvdstaddr = onoff; 2381 break; 2382 case IP_RECVIF: 2383 if (!checkonly) { 2384 udp->udp_recvif = onoff; 2385 PASS_OPT_TO_IP(connp); 2386 } 2387 break; 2388 case IP_RECVSLLA: 2389 if (!checkonly) { 2390 udp->udp_recvslla = onoff; 2391 PASS_OPT_TO_IP(connp); 2392 } 2393 break; 2394 case IP_RECVTTL: 2395 if (!checkonly) 2396 udp->udp_recvttl = onoff; 2397 break; 2398 case IP_PKTINFO: { 2399 /* 2400 * This also handles IP_RECVPKTINFO. 2401 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2402 * Differentiation is based on the size of the 2403 * argument passed in. 2404 */ 2405 struct in_pktinfo *pktinfop; 2406 ip4_pkt_t *attr_pktinfop; 2407 2408 if (checkonly) 2409 break; 2410 2411 if (inlen == sizeof (int)) { 2412 /* 2413 * This is IP_RECVPKTINFO option. 2414 * Keep a local copy of whether this option is 2415 * set or not and pass it down to IP for 2416 * processing. 2417 */ 2418 2419 udp->udp_ip_recvpktinfo = onoff; 2420 return (-EINVAL); 2421 } 2422 2423 if (attrs == NULL || 2424 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2425 /* 2426 * sticky option or no buffer to return 2427 * the results. 2428 */ 2429 return (EINVAL); 2430 } 2431 2432 if (inlen != sizeof (struct in_pktinfo)) 2433 return (EINVAL); 2434 2435 pktinfop = (struct in_pktinfo *)invalp; 2436 2437 /* 2438 * At least one of the values should be specified 2439 */ 2440 if (pktinfop->ipi_ifindex == 0 && 2441 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2442 return (EINVAL); 2443 } 2444 2445 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2446 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2447 2448 break; 2449 } 2450 case IP_ADD_MEMBERSHIP: 2451 case IP_DROP_MEMBERSHIP: 2452 case IP_BLOCK_SOURCE: 2453 case IP_UNBLOCK_SOURCE: 2454 case IP_ADD_SOURCE_MEMBERSHIP: 2455 case IP_DROP_SOURCE_MEMBERSHIP: 2456 case MCAST_JOIN_GROUP: 2457 case MCAST_LEAVE_GROUP: 2458 case MCAST_BLOCK_SOURCE: 2459 case MCAST_UNBLOCK_SOURCE: 2460 case MCAST_JOIN_SOURCE_GROUP: 2461 case MCAST_LEAVE_SOURCE_GROUP: 2462 case IP_SEC_OPT: 2463 case IP_NEXTHOP: 2464 case IP_DHCPINIT_IF: 2465 /* 2466 * "soft" error (negative) 2467 * option not handled at this level 2468 * Do not modify *outlenp. 2469 */ 2470 return (-EINVAL); 2471 case IP_BOUND_IF: 2472 if (!checkonly) { 2473 udp->udp_bound_if = *i1; 2474 PASS_OPT_TO_IP(connp); 2475 } 2476 break; 2477 case IP_UNSPEC_SRC: 2478 if (!checkonly) { 2479 udp->udp_unspec_source = onoff; 2480 PASS_OPT_TO_IP(connp); 2481 } 2482 break; 2483 case IP_BROADCAST_TTL: 2484 if (!checkonly) 2485 connp->conn_broadcast_ttl = *invalp; 2486 break; 2487 default: 2488 *outlenp = 0; 2489 return (EINVAL); 2490 } 2491 break; 2492 case IPPROTO_IPV6: { 2493 ip6_pkt_t *ipp; 2494 boolean_t sticky; 2495 2496 if (udp->udp_family != AF_INET6) { 2497 *outlenp = 0; 2498 return (ENOPROTOOPT); 2499 } 2500 /* 2501 * Deal with both sticky options and ancillary data 2502 */ 2503 sticky = B_FALSE; 2504 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2505 NULL) { 2506 /* sticky options, or none */ 2507 ipp = &udp->udp_sticky_ipp; 2508 sticky = B_TRUE; 2509 } 2510 2511 switch (name) { 2512 case IPV6_MULTICAST_IF: 2513 if (!checkonly) { 2514 udp->udp_multicast_if_index = *i1; 2515 PASS_OPT_TO_IP(connp); 2516 } 2517 break; 2518 case IPV6_UNICAST_HOPS: 2519 /* -1 means use default */ 2520 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2521 *outlenp = 0; 2522 return (EINVAL); 2523 } 2524 if (!checkonly) { 2525 if (*i1 == -1) { 2526 udp->udp_ttl = ipp->ipp_unicast_hops = 2527 us->us_ipv6_hoplimit; 2528 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2529 /* Pass modified value to IP. */ 2530 *i1 = udp->udp_ttl; 2531 } else { 2532 udp->udp_ttl = ipp->ipp_unicast_hops = 2533 (uint8_t)*i1; 2534 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2535 } 2536 /* Rebuild the header template */ 2537 error = udp_build_hdrs(udp); 2538 if (error != 0) { 2539 *outlenp = 0; 2540 return (error); 2541 } 2542 } 2543 break; 2544 case IPV6_MULTICAST_HOPS: 2545 /* -1 means use default */ 2546 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2547 *outlenp = 0; 2548 return (EINVAL); 2549 } 2550 if (!checkonly) { 2551 if (*i1 == -1) { 2552 udp->udp_multicast_ttl = 2553 ipp->ipp_multicast_hops = 2554 IP_DEFAULT_MULTICAST_TTL; 2555 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2556 /* Pass modified value to IP. */ 2557 *i1 = udp->udp_multicast_ttl; 2558 } else { 2559 udp->udp_multicast_ttl = 2560 ipp->ipp_multicast_hops = 2561 (uint8_t)*i1; 2562 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2563 } 2564 } 2565 break; 2566 case IPV6_MULTICAST_LOOP: 2567 if (*i1 != 0 && *i1 != 1) { 2568 *outlenp = 0; 2569 return (EINVAL); 2570 } 2571 if (!checkonly) { 2572 connp->conn_multicast_loop = *i1; 2573 PASS_OPT_TO_IP(connp); 2574 } 2575 break; 2576 case IPV6_JOIN_GROUP: 2577 case IPV6_LEAVE_GROUP: 2578 case MCAST_JOIN_GROUP: 2579 case MCAST_LEAVE_GROUP: 2580 case MCAST_BLOCK_SOURCE: 2581 case MCAST_UNBLOCK_SOURCE: 2582 case MCAST_JOIN_SOURCE_GROUP: 2583 case MCAST_LEAVE_SOURCE_GROUP: 2584 /* 2585 * "soft" error (negative) 2586 * option not handled at this level 2587 * Note: Do not modify *outlenp 2588 */ 2589 return (-EINVAL); 2590 case IPV6_BOUND_IF: 2591 if (!checkonly) { 2592 udp->udp_bound_if = *i1; 2593 PASS_OPT_TO_IP(connp); 2594 } 2595 break; 2596 case IPV6_UNSPEC_SRC: 2597 if (!checkonly) { 2598 udp->udp_unspec_source = onoff; 2599 PASS_OPT_TO_IP(connp); 2600 } 2601 break; 2602 /* 2603 * Set boolean switches for ancillary data delivery 2604 */ 2605 case IPV6_RECVPKTINFO: 2606 if (!checkonly) { 2607 udp->udp_ip_recvpktinfo = onoff; 2608 PASS_OPT_TO_IP(connp); 2609 } 2610 break; 2611 case IPV6_RECVTCLASS: 2612 if (!checkonly) { 2613 udp->udp_ipv6_recvtclass = onoff; 2614 PASS_OPT_TO_IP(connp); 2615 } 2616 break; 2617 case IPV6_RECVPATHMTU: 2618 if (!checkonly) { 2619 udp->udp_ipv6_recvpathmtu = onoff; 2620 PASS_OPT_TO_IP(connp); 2621 } 2622 break; 2623 case IPV6_RECVHOPLIMIT: 2624 if (!checkonly) { 2625 udp->udp_ipv6_recvhoplimit = onoff; 2626 PASS_OPT_TO_IP(connp); 2627 } 2628 break; 2629 case IPV6_RECVHOPOPTS: 2630 if (!checkonly) { 2631 udp->udp_ipv6_recvhopopts = onoff; 2632 PASS_OPT_TO_IP(connp); 2633 } 2634 break; 2635 case IPV6_RECVDSTOPTS: 2636 if (!checkonly) { 2637 udp->udp_ipv6_recvdstopts = onoff; 2638 PASS_OPT_TO_IP(connp); 2639 } 2640 break; 2641 case _OLD_IPV6_RECVDSTOPTS: 2642 if (!checkonly) 2643 udp->udp_old_ipv6_recvdstopts = onoff; 2644 break; 2645 case IPV6_RECVRTHDRDSTOPTS: 2646 if (!checkonly) { 2647 udp->udp_ipv6_recvrthdrdstopts = onoff; 2648 PASS_OPT_TO_IP(connp); 2649 } 2650 break; 2651 case IPV6_RECVRTHDR: 2652 if (!checkonly) { 2653 udp->udp_ipv6_recvrthdr = onoff; 2654 PASS_OPT_TO_IP(connp); 2655 } 2656 break; 2657 /* 2658 * Set sticky options or ancillary data. 2659 * If sticky options, (re)build any extension headers 2660 * that might be needed as a result. 2661 */ 2662 case IPV6_PKTINFO: 2663 /* 2664 * The source address and ifindex are verified 2665 * in ip_opt_set(). For ancillary data the 2666 * source address is checked in ip_wput_v6. 2667 */ 2668 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2669 return (EINVAL); 2670 if (checkonly) 2671 break; 2672 2673 if (inlen == 0) { 2674 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2675 ipp->ipp_sticky_ignored |= 2676 (IPPF_IFINDEX|IPPF_ADDR); 2677 } else { 2678 struct in6_pktinfo *pkti; 2679 2680 pkti = (struct in6_pktinfo *)invalp; 2681 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2682 ipp->ipp_addr = pkti->ipi6_addr; 2683 if (ipp->ipp_ifindex != 0) 2684 ipp->ipp_fields |= IPPF_IFINDEX; 2685 else 2686 ipp->ipp_fields &= ~IPPF_IFINDEX; 2687 if (!IN6_IS_ADDR_UNSPECIFIED( 2688 &ipp->ipp_addr)) 2689 ipp->ipp_fields |= IPPF_ADDR; 2690 else 2691 ipp->ipp_fields &= ~IPPF_ADDR; 2692 } 2693 if (sticky) { 2694 error = udp_build_hdrs(udp); 2695 if (error != 0) 2696 return (error); 2697 PASS_OPT_TO_IP(connp); 2698 } 2699 break; 2700 case IPV6_HOPLIMIT: 2701 if (sticky) 2702 return (EINVAL); 2703 if (inlen != 0 && inlen != sizeof (int)) 2704 return (EINVAL); 2705 if (checkonly) 2706 break; 2707 2708 if (inlen == 0) { 2709 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2710 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2711 } else { 2712 if (*i1 > 255 || *i1 < -1) 2713 return (EINVAL); 2714 if (*i1 == -1) 2715 ipp->ipp_hoplimit = 2716 us->us_ipv6_hoplimit; 2717 else 2718 ipp->ipp_hoplimit = *i1; 2719 ipp->ipp_fields |= IPPF_HOPLIMIT; 2720 } 2721 break; 2722 case IPV6_TCLASS: 2723 if (inlen != 0 && inlen != sizeof (int)) 2724 return (EINVAL); 2725 if (checkonly) 2726 break; 2727 2728 if (inlen == 0) { 2729 ipp->ipp_fields &= ~IPPF_TCLASS; 2730 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2731 } else { 2732 if (*i1 > 255 || *i1 < -1) 2733 return (EINVAL); 2734 if (*i1 == -1) 2735 ipp->ipp_tclass = 0; 2736 else 2737 ipp->ipp_tclass = *i1; 2738 ipp->ipp_fields |= IPPF_TCLASS; 2739 } 2740 if (sticky) { 2741 error = udp_build_hdrs(udp); 2742 if (error != 0) 2743 return (error); 2744 } 2745 break; 2746 case IPV6_NEXTHOP: 2747 /* 2748 * IP will verify that the nexthop is reachable 2749 * and fail for sticky options. 2750 */ 2751 if (inlen != 0 && inlen != sizeof (sin6_t)) 2752 return (EINVAL); 2753 if (checkonly) 2754 break; 2755 2756 if (inlen == 0) { 2757 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2758 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2759 } else { 2760 sin6_t *sin6 = (sin6_t *)invalp; 2761 2762 if (sin6->sin6_family != AF_INET6) { 2763 return (EAFNOSUPPORT); 2764 } 2765 if (IN6_IS_ADDR_V4MAPPED( 2766 &sin6->sin6_addr)) 2767 return (EADDRNOTAVAIL); 2768 ipp->ipp_nexthop = sin6->sin6_addr; 2769 if (!IN6_IS_ADDR_UNSPECIFIED( 2770 &ipp->ipp_nexthop)) 2771 ipp->ipp_fields |= IPPF_NEXTHOP; 2772 else 2773 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2774 } 2775 if (sticky) { 2776 error = udp_build_hdrs(udp); 2777 if (error != 0) 2778 return (error); 2779 PASS_OPT_TO_IP(connp); 2780 } 2781 break; 2782 case IPV6_HOPOPTS: { 2783 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2784 /* 2785 * Sanity checks - minimum size, size a multiple of 2786 * eight bytes, and matching size passed in. 2787 */ 2788 if (inlen != 0 && 2789 inlen != (8 * (hopts->ip6h_len + 1))) 2790 return (EINVAL); 2791 2792 if (checkonly) 2793 break; 2794 2795 error = optcom_pkt_set(invalp, inlen, sticky, 2796 (uchar_t **)&ipp->ipp_hopopts, 2797 &ipp->ipp_hopoptslen, 2798 sticky ? udp->udp_label_len_v6 : 0); 2799 if (error != 0) 2800 return (error); 2801 if (ipp->ipp_hopoptslen == 0) { 2802 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2803 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2804 } else { 2805 ipp->ipp_fields |= IPPF_HOPOPTS; 2806 } 2807 if (sticky) { 2808 error = udp_build_hdrs(udp); 2809 if (error != 0) 2810 return (error); 2811 } 2812 break; 2813 } 2814 case IPV6_RTHDRDSTOPTS: { 2815 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2816 2817 /* 2818 * Sanity checks - minimum size, size a multiple of 2819 * eight bytes, and matching size passed in. 2820 */ 2821 if (inlen != 0 && 2822 inlen != (8 * (dopts->ip6d_len + 1))) 2823 return (EINVAL); 2824 2825 if (checkonly) 2826 break; 2827 2828 if (inlen == 0) { 2829 if (sticky && 2830 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2831 kmem_free(ipp->ipp_rtdstopts, 2832 ipp->ipp_rtdstoptslen); 2833 ipp->ipp_rtdstopts = NULL; 2834 ipp->ipp_rtdstoptslen = 0; 2835 } 2836 2837 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2838 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2839 } else { 2840 error = optcom_pkt_set(invalp, inlen, sticky, 2841 (uchar_t **)&ipp->ipp_rtdstopts, 2842 &ipp->ipp_rtdstoptslen, 0); 2843 if (error != 0) 2844 return (error); 2845 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2846 } 2847 if (sticky) { 2848 error = udp_build_hdrs(udp); 2849 if (error != 0) 2850 return (error); 2851 } 2852 break; 2853 } 2854 case IPV6_DSTOPTS: { 2855 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2856 2857 /* 2858 * Sanity checks - minimum size, size a multiple of 2859 * eight bytes, and matching size passed in. 2860 */ 2861 if (inlen != 0 && 2862 inlen != (8 * (dopts->ip6d_len + 1))) 2863 return (EINVAL); 2864 2865 if (checkonly) 2866 break; 2867 2868 if (inlen == 0) { 2869 if (sticky && 2870 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2871 kmem_free(ipp->ipp_dstopts, 2872 ipp->ipp_dstoptslen); 2873 ipp->ipp_dstopts = NULL; 2874 ipp->ipp_dstoptslen = 0; 2875 } 2876 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2877 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2878 } else { 2879 error = optcom_pkt_set(invalp, inlen, sticky, 2880 (uchar_t **)&ipp->ipp_dstopts, 2881 &ipp->ipp_dstoptslen, 0); 2882 if (error != 0) 2883 return (error); 2884 ipp->ipp_fields |= IPPF_DSTOPTS; 2885 } 2886 if (sticky) { 2887 error = udp_build_hdrs(udp); 2888 if (error != 0) 2889 return (error); 2890 } 2891 break; 2892 } 2893 case IPV6_RTHDR: { 2894 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2895 2896 /* 2897 * Sanity checks - minimum size, size a multiple of 2898 * eight bytes, and matching size passed in. 2899 */ 2900 if (inlen != 0 && 2901 inlen != (8 * (rt->ip6r_len + 1))) 2902 return (EINVAL); 2903 2904 if (checkonly) 2905 break; 2906 2907 if (inlen == 0) { 2908 if (sticky && 2909 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2910 kmem_free(ipp->ipp_rthdr, 2911 ipp->ipp_rthdrlen); 2912 ipp->ipp_rthdr = NULL; 2913 ipp->ipp_rthdrlen = 0; 2914 } 2915 ipp->ipp_fields &= ~IPPF_RTHDR; 2916 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2917 } else { 2918 error = optcom_pkt_set(invalp, inlen, sticky, 2919 (uchar_t **)&ipp->ipp_rthdr, 2920 &ipp->ipp_rthdrlen, 0); 2921 if (error != 0) 2922 return (error); 2923 ipp->ipp_fields |= IPPF_RTHDR; 2924 } 2925 if (sticky) { 2926 error = udp_build_hdrs(udp); 2927 if (error != 0) 2928 return (error); 2929 } 2930 break; 2931 } 2932 2933 case IPV6_DONTFRAG: 2934 if (checkonly) 2935 break; 2936 2937 if (onoff) { 2938 ipp->ipp_fields |= IPPF_DONTFRAG; 2939 } else { 2940 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2941 } 2942 break; 2943 2944 case IPV6_USE_MIN_MTU: 2945 if (inlen != sizeof (int)) 2946 return (EINVAL); 2947 2948 if (*i1 < -1 || *i1 > 1) 2949 return (EINVAL); 2950 2951 if (checkonly) 2952 break; 2953 2954 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2955 ipp->ipp_use_min_mtu = *i1; 2956 break; 2957 2958 case IPV6_SEC_OPT: 2959 case IPV6_SRC_PREFERENCES: 2960 case IPV6_V6ONLY: 2961 /* Handled at the IP level */ 2962 return (-EINVAL); 2963 default: 2964 *outlenp = 0; 2965 return (EINVAL); 2966 } 2967 break; 2968 } /* end IPPROTO_IPV6 */ 2969 case IPPROTO_UDP: 2970 switch (name) { 2971 case UDP_ANONPRIVBIND: 2972 if ((error = secpolicy_net_privaddr(cr, 0, 2973 IPPROTO_UDP)) != 0) { 2974 *outlenp = 0; 2975 return (error); 2976 } 2977 if (!checkonly) { 2978 udp->udp_anon_priv_bind = onoff; 2979 } 2980 break; 2981 case UDP_EXCLBIND: 2982 if (!checkonly) 2983 udp->udp_exclbind = onoff; 2984 break; 2985 case UDP_RCVHDR: 2986 if (!checkonly) 2987 udp->udp_rcvhdr = onoff; 2988 break; 2989 case UDP_NAT_T_ENDPOINT: 2990 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 2991 *outlenp = 0; 2992 return (error); 2993 } 2994 2995 /* 2996 * Use udp_family instead so we can avoid ambiguitites 2997 * with AF_INET6 sockets that may switch from IPv4 2998 * to IPv6. 2999 */ 3000 if (udp->udp_family != AF_INET) { 3001 *outlenp = 0; 3002 return (EAFNOSUPPORT); 3003 } 3004 3005 if (!checkonly) { 3006 int size; 3007 3008 udp->udp_nat_t_endpoint = onoff; 3009 3010 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3011 UDPH_SIZE + udp->udp_ip_snd_options_len; 3012 3013 /* Also, adjust wroff */ 3014 if (onoff) { 3015 udp->udp_max_hdr_len += 3016 sizeof (uint32_t); 3017 } 3018 size = udp->udp_max_hdr_len + 3019 us->us_wroff_extra; 3020 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3021 size); 3022 } 3023 break; 3024 default: 3025 *outlenp = 0; 3026 return (EINVAL); 3027 } 3028 break; 3029 default: 3030 *outlenp = 0; 3031 return (EINVAL); 3032 } 3033 /* 3034 * Common case of OK return with outval same as inval. 3035 */ 3036 if (invalp != outvalp) { 3037 /* don't trust bcopy for identical src/dst */ 3038 (void) bcopy(invalp, outvalp, inlen); 3039 } 3040 *outlenp = inlen; 3041 return (0); 3042 } 3043 3044 int 3045 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3046 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3047 void *thisdg_attrs, cred_t *cr) 3048 { 3049 int error; 3050 boolean_t checkonly; 3051 3052 error = 0; 3053 switch (optset_context) { 3054 case SETFN_OPTCOM_CHECKONLY: 3055 checkonly = B_TRUE; 3056 /* 3057 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3058 * inlen != 0 implies value supplied and 3059 * we have to "pretend" to set it. 3060 * inlen == 0 implies that there is no 3061 * value part in T_CHECK request and just validation 3062 * done elsewhere should be enough, we just return here. 3063 */ 3064 if (inlen == 0) { 3065 *outlenp = 0; 3066 goto done; 3067 } 3068 break; 3069 case SETFN_OPTCOM_NEGOTIATE: 3070 checkonly = B_FALSE; 3071 break; 3072 case SETFN_UD_NEGOTIATE: 3073 case SETFN_CONN_NEGOTIATE: 3074 checkonly = B_FALSE; 3075 /* 3076 * Negotiating local and "association-related" options 3077 * through T_UNITDATA_REQ. 3078 * 3079 * Following routine can filter out ones we do not 3080 * want to be "set" this way. 3081 */ 3082 if (!udp_opt_allow_udr_set(level, name)) { 3083 *outlenp = 0; 3084 error = EINVAL; 3085 goto done; 3086 } 3087 break; 3088 default: 3089 /* 3090 * We should never get here 3091 */ 3092 *outlenp = 0; 3093 error = EINVAL; 3094 goto done; 3095 } 3096 3097 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3098 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3099 3100 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3101 outvalp, cr, thisdg_attrs, checkonly); 3102 done: 3103 return (error); 3104 } 3105 3106 /* ARGSUSED */ 3107 int 3108 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3109 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3110 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3111 { 3112 conn_t *connp = Q_TO_CONN(q); 3113 int error; 3114 udp_t *udp = connp->conn_udp; 3115 3116 rw_enter(&udp->udp_rwlock, RW_WRITER); 3117 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3118 outlenp, outvalp, thisdg_attrs, cr); 3119 rw_exit(&udp->udp_rwlock); 3120 return (error); 3121 } 3122 3123 /* 3124 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3125 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3126 * headers, and the udp header. 3127 * Returns failure if can't allocate memory. 3128 */ 3129 static int 3130 udp_build_hdrs(udp_t *udp) 3131 { 3132 udp_stack_t *us = udp->udp_us; 3133 uchar_t *hdrs; 3134 uint_t hdrs_len; 3135 ip6_t *ip6h; 3136 ip6i_t *ip6i; 3137 udpha_t *udpha; 3138 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3139 size_t sth_wroff; 3140 conn_t *connp = udp->udp_connp; 3141 3142 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3143 ASSERT(connp != NULL); 3144 3145 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3146 ASSERT(hdrs_len != 0); 3147 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3148 /* Need to reallocate */ 3149 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3150 if (hdrs == NULL) 3151 return (ENOMEM); 3152 3153 if (udp->udp_sticky_hdrs_len != 0) { 3154 kmem_free(udp->udp_sticky_hdrs, 3155 udp->udp_sticky_hdrs_len); 3156 } 3157 udp->udp_sticky_hdrs = hdrs; 3158 udp->udp_sticky_hdrs_len = hdrs_len; 3159 } 3160 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3161 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3162 3163 /* Set header fields not in ipp */ 3164 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3165 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3166 ip6h = (ip6_t *)&ip6i[1]; 3167 } else { 3168 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3169 } 3170 3171 if (!(ipp->ipp_fields & IPPF_ADDR)) 3172 ip6h->ip6_src = udp->udp_v6src; 3173 3174 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3175 udpha->uha_src_port = udp->udp_port; 3176 3177 /* Try to get everything in a single mblk */ 3178 if (hdrs_len > udp->udp_max_hdr_len) { 3179 udp->udp_max_hdr_len = hdrs_len; 3180 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3181 rw_exit(&udp->udp_rwlock); 3182 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3183 udp->udp_connp, sth_wroff); 3184 rw_enter(&udp->udp_rwlock, RW_WRITER); 3185 } 3186 return (0); 3187 } 3188 3189 /* 3190 * This routine retrieves the value of an ND variable in a udpparam_t 3191 * structure. It is called through nd_getset when a user reads the 3192 * variable. 3193 */ 3194 /* ARGSUSED */ 3195 static int 3196 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3197 { 3198 udpparam_t *udppa = (udpparam_t *)cp; 3199 3200 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3201 return (0); 3202 } 3203 3204 /* 3205 * Walk through the param array specified registering each element with the 3206 * named dispatch (ND) handler. 3207 */ 3208 static boolean_t 3209 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3210 { 3211 for (; cnt-- > 0; udppa++) { 3212 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3213 if (!nd_load(ndp, udppa->udp_param_name, 3214 udp_param_get, udp_param_set, 3215 (caddr_t)udppa)) { 3216 nd_free(ndp); 3217 return (B_FALSE); 3218 } 3219 } 3220 } 3221 if (!nd_load(ndp, "udp_extra_priv_ports", 3222 udp_extra_priv_ports_get, NULL, NULL)) { 3223 nd_free(ndp); 3224 return (B_FALSE); 3225 } 3226 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3227 NULL, udp_extra_priv_ports_add, NULL)) { 3228 nd_free(ndp); 3229 return (B_FALSE); 3230 } 3231 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3232 NULL, udp_extra_priv_ports_del, NULL)) { 3233 nd_free(ndp); 3234 return (B_FALSE); 3235 } 3236 return (B_TRUE); 3237 } 3238 3239 /* This routine sets an ND variable in a udpparam_t structure. */ 3240 /* ARGSUSED */ 3241 static int 3242 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3243 { 3244 long new_value; 3245 udpparam_t *udppa = (udpparam_t *)cp; 3246 3247 /* 3248 * Fail the request if the new value does not lie within the 3249 * required bounds. 3250 */ 3251 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3252 new_value < udppa->udp_param_min || 3253 new_value > udppa->udp_param_max) { 3254 return (EINVAL); 3255 } 3256 3257 /* Set the new value */ 3258 udppa->udp_param_value = new_value; 3259 return (0); 3260 } 3261 3262 /* 3263 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3264 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3265 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3266 * then it's assumed to be allocated to be large enough. 3267 * 3268 * Returns zero if trimming of the security option causes all options to go 3269 * away. 3270 */ 3271 static size_t 3272 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3273 { 3274 struct T_opthdr *toh; 3275 size_t hol = ipp->ipp_hopoptslen; 3276 ip6_hbh_t *dstopt = NULL; 3277 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3278 size_t tlen, olen, plen; 3279 boolean_t deleting; 3280 const struct ip6_opt *sopt, *lastpad; 3281 struct ip6_opt *dopt; 3282 3283 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3284 toh->level = IPPROTO_IPV6; 3285 toh->name = IPV6_HOPOPTS; 3286 toh->status = 0; 3287 dstopt = (ip6_hbh_t *)(toh + 1); 3288 } 3289 3290 /* 3291 * If labeling is enabled, then skip the label option 3292 * but get other options if there are any. 3293 */ 3294 if (is_system_labeled()) { 3295 dopt = NULL; 3296 if (dstopt != NULL) { 3297 /* will fill in ip6h_len later */ 3298 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3299 dopt = (struct ip6_opt *)(dstopt + 1); 3300 } 3301 sopt = (const struct ip6_opt *)(srcopt + 1); 3302 hol -= sizeof (*srcopt); 3303 tlen = sizeof (*dstopt); 3304 lastpad = NULL; 3305 deleting = B_FALSE; 3306 /* 3307 * This loop finds the first (lastpad pointer) of any number of 3308 * pads that preceeds the security option, then treats the 3309 * security option as though it were a pad, and then finds the 3310 * next non-pad option (or end of list). 3311 * 3312 * It then treats the entire block as one big pad. To preserve 3313 * alignment of any options that follow, or just the end of the 3314 * list, it computes a minimal new padding size that keeps the 3315 * same alignment for the next option. 3316 * 3317 * If it encounters just a sequence of pads with no security 3318 * option, those are copied as-is rather than collapsed. 3319 * 3320 * Note that to handle the end of list case, the code makes one 3321 * loop with 'hol' set to zero. 3322 */ 3323 for (;;) { 3324 if (hol > 0) { 3325 if (sopt->ip6o_type == IP6OPT_PAD1) { 3326 if (lastpad == NULL) 3327 lastpad = sopt; 3328 sopt = (const struct ip6_opt *) 3329 &sopt->ip6o_len; 3330 hol--; 3331 continue; 3332 } 3333 olen = sopt->ip6o_len + sizeof (*sopt); 3334 if (olen > hol) 3335 olen = hol; 3336 if (sopt->ip6o_type == IP6OPT_PADN || 3337 sopt->ip6o_type == ip6opt_ls) { 3338 if (sopt->ip6o_type == ip6opt_ls) 3339 deleting = B_TRUE; 3340 if (lastpad == NULL) 3341 lastpad = sopt; 3342 sopt = (const struct ip6_opt *) 3343 ((const char *)sopt + olen); 3344 hol -= olen; 3345 continue; 3346 } 3347 } else { 3348 /* if nothing was copied at all, then delete */ 3349 if (tlen == sizeof (*dstopt)) 3350 return (0); 3351 /* last pass; pick up any trailing padding */ 3352 olen = 0; 3353 } 3354 if (deleting) { 3355 /* 3356 * compute aligning effect of deleted material 3357 * to reproduce with pad. 3358 */ 3359 plen = ((const char *)sopt - 3360 (const char *)lastpad) & 7; 3361 tlen += plen; 3362 if (dopt != NULL) { 3363 if (plen == 1) { 3364 dopt->ip6o_type = IP6OPT_PAD1; 3365 } else if (plen > 1) { 3366 plen -= sizeof (*dopt); 3367 dopt->ip6o_type = IP6OPT_PADN; 3368 dopt->ip6o_len = plen; 3369 if (plen > 0) 3370 bzero(dopt + 1, plen); 3371 } 3372 dopt = (struct ip6_opt *) 3373 ((char *)dopt + plen); 3374 } 3375 deleting = B_FALSE; 3376 lastpad = NULL; 3377 } 3378 /* if there's uncopied padding, then copy that now */ 3379 if (lastpad != NULL) { 3380 olen += (const char *)sopt - 3381 (const char *)lastpad; 3382 sopt = lastpad; 3383 lastpad = NULL; 3384 } 3385 if (dopt != NULL && olen > 0) { 3386 bcopy(sopt, dopt, olen); 3387 dopt = (struct ip6_opt *)((char *)dopt + olen); 3388 } 3389 if (hol == 0) 3390 break; 3391 tlen += olen; 3392 sopt = (const struct ip6_opt *) 3393 ((const char *)sopt + olen); 3394 hol -= olen; 3395 } 3396 /* go back and patch up the length value, rounded upward */ 3397 if (dstopt != NULL) 3398 dstopt->ip6h_len = (tlen - 1) >> 3; 3399 } else { 3400 tlen = hol; 3401 if (dstopt != NULL) 3402 bcopy(srcopt, dstopt, hol); 3403 } 3404 3405 tlen += sizeof (*toh); 3406 if (toh != NULL) 3407 toh->len = tlen; 3408 3409 return (tlen); 3410 } 3411 3412 /* 3413 * Update udp_rcv_opt_len from the packet. 3414 * Called when options received, and when no options received but 3415 * udp_ip_recv_opt_len has previously recorded options. 3416 */ 3417 static void 3418 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3419 { 3420 /* Save the options if any */ 3421 if (opt_len > 0) { 3422 if (opt_len > udp->udp_ip_rcv_options_len) { 3423 /* Need to allocate larger buffer */ 3424 if (udp->udp_ip_rcv_options_len != 0) 3425 mi_free((char *)udp->udp_ip_rcv_options); 3426 udp->udp_ip_rcv_options_len = 0; 3427 udp->udp_ip_rcv_options = 3428 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3429 if (udp->udp_ip_rcv_options != NULL) 3430 udp->udp_ip_rcv_options_len = opt_len; 3431 } 3432 if (udp->udp_ip_rcv_options_len != 0) { 3433 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3434 /* Adjust length if we are resusing the space */ 3435 udp->udp_ip_rcv_options_len = opt_len; 3436 } 3437 } else if (udp->udp_ip_rcv_options_len != 0) { 3438 /* Clear out previously recorded options */ 3439 mi_free((char *)udp->udp_ip_rcv_options); 3440 udp->udp_ip_rcv_options = NULL; 3441 udp->udp_ip_rcv_options_len = 0; 3442 } 3443 } 3444 3445 static mblk_t * 3446 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3447 { 3448 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3449 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3450 /* 3451 * fallback has started but messages have not been moved yet 3452 */ 3453 if (udp->udp_fallback_queue_head == NULL) { 3454 ASSERT(udp->udp_fallback_queue_tail == NULL); 3455 udp->udp_fallback_queue_head = mp; 3456 udp->udp_fallback_queue_tail = mp; 3457 } else { 3458 ASSERT(udp->udp_fallback_queue_tail != NULL); 3459 udp->udp_fallback_queue_tail->b_next = mp; 3460 udp->udp_fallback_queue_tail = mp; 3461 } 3462 return (NULL); 3463 } else { 3464 /* 3465 * Fallback completed, let the caller putnext() the mblk. 3466 */ 3467 return (mp); 3468 } 3469 } 3470 3471 /* 3472 * Deliver data to ULP. In case we have a socket, and it's falling back to 3473 * TPI, then we'll queue the mp for later processing. 3474 */ 3475 static void 3476 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3477 { 3478 if (IPCL_IS_NONSTR(connp)) { 3479 udp_t *udp = connp->conn_udp; 3480 int error; 3481 3482 if ((*connp->conn_upcalls->su_recv) 3483 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3484 NULL) < 0) { 3485 mutex_enter(&udp->udp_recv_lock); 3486 if (error == ENOSPC) { 3487 /* 3488 * let's confirm while holding the lock 3489 */ 3490 if ((*connp->conn_upcalls->su_recv) 3491 (connp->conn_upper_handle, NULL, 0, 0, 3492 &error, NULL) < 0) { 3493 ASSERT(error == ENOSPC); 3494 if (error == ENOSPC) { 3495 connp->conn_flow_cntrld = 3496 B_TRUE; 3497 } 3498 } 3499 mutex_exit(&udp->udp_recv_lock); 3500 } else { 3501 ASSERT(error == EOPNOTSUPP); 3502 mp = udp_queue_fallback(udp, mp); 3503 mutex_exit(&udp->udp_recv_lock); 3504 if (mp != NULL) 3505 putnext(connp->conn_rq, mp); 3506 } 3507 } 3508 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3509 } else { 3510 putnext(connp->conn_rq, mp); 3511 } 3512 } 3513 3514 /* ARGSUSED2 */ 3515 static void 3516 udp_input(void *arg1, mblk_t *mp, void *arg2) 3517 { 3518 conn_t *connp = (conn_t *)arg1; 3519 struct T_unitdata_ind *tudi; 3520 uchar_t *rptr; /* Pointer to IP header */ 3521 int hdr_length; /* Length of IP+UDP headers */ 3522 int opt_len; 3523 int udi_size; /* Size of T_unitdata_ind */ 3524 int mp_len; 3525 udp_t *udp; 3526 udpha_t *udpha; 3527 int ipversion; 3528 ip6_pkt_t ipp; 3529 ip6_t *ip6h; 3530 ip6i_t *ip6i; 3531 mblk_t *mp1; 3532 mblk_t *options_mp = NULL; 3533 ip_pktinfo_t *pinfo = NULL; 3534 cred_t *cr = NULL; 3535 pid_t cpid; 3536 uint32_t udp_ip_rcv_options_len; 3537 udp_bits_t udp_bits; 3538 cred_t *rcr = connp->conn_cred; 3539 udp_stack_t *us; 3540 3541 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3542 3543 udp = connp->conn_udp; 3544 us = udp->udp_us; 3545 rptr = mp->b_rptr; 3546 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3547 ASSERT(OK_32PTR(rptr)); 3548 3549 /* 3550 * IP should have prepended the options data in an M_CTL 3551 * Check M_CTL "type" to make sure are not here bcos of 3552 * a valid ICMP message 3553 */ 3554 if (DB_TYPE(mp) == M_CTL) { 3555 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3556 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3557 IN_PKTINFO) { 3558 /* 3559 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3560 * has been prepended to the packet by IP. We need to 3561 * extract the mblk and adjust the rptr 3562 */ 3563 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3564 options_mp = mp; 3565 mp = mp->b_cont; 3566 rptr = mp->b_rptr; 3567 UDP_STAT(us, udp_in_pktinfo); 3568 } else { 3569 /* 3570 * ICMP messages. 3571 */ 3572 udp_icmp_error(connp, mp); 3573 return; 3574 } 3575 } 3576 3577 mp_len = msgdsize(mp); 3578 /* 3579 * This is the inbound data path. 3580 * First, we check to make sure the IP version number is correct, 3581 * and then pull the IP and UDP headers into the first mblk. 3582 */ 3583 3584 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3585 ipp.ipp_fields = 0; 3586 3587 ipversion = IPH_HDR_VERSION(rptr); 3588 3589 rw_enter(&udp->udp_rwlock, RW_READER); 3590 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3591 udp_bits = udp->udp_bits; 3592 rw_exit(&udp->udp_rwlock); 3593 3594 switch (ipversion) { 3595 case IPV4_VERSION: 3596 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3597 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3598 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3599 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3600 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3601 udp->udp_family == AF_INET) { 3602 /* 3603 * Record/update udp_ip_rcv_options with the lock 3604 * held. Not needed for AF_INET6 sockets 3605 * since they don't support a getsockopt of IP_OPTIONS. 3606 */ 3607 rw_enter(&udp->udp_rwlock, RW_WRITER); 3608 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3609 opt_len); 3610 rw_exit(&udp->udp_rwlock); 3611 } 3612 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3613 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3614 udp->udp_ip_recvpktinfo) { 3615 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3616 ipp.ipp_fields |= IPPF_IFINDEX; 3617 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3618 } 3619 } 3620 break; 3621 case IPV6_VERSION: 3622 /* 3623 * IPv6 packets can only be received by applications 3624 * that are prepared to receive IPv6 addresses. 3625 * The IP fanout must ensure this. 3626 */ 3627 ASSERT(udp->udp_family == AF_INET6); 3628 3629 ip6h = (ip6_t *)rptr; 3630 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3631 3632 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3633 uint8_t nexthdrp; 3634 /* Look for ifindex information */ 3635 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3636 ip6i = (ip6i_t *)ip6h; 3637 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3638 goto tossit; 3639 3640 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3641 ASSERT(ip6i->ip6i_ifindex != 0); 3642 ipp.ipp_fields |= IPPF_IFINDEX; 3643 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3644 } 3645 rptr = (uchar_t *)&ip6i[1]; 3646 mp->b_rptr = rptr; 3647 if (rptr == mp->b_wptr) { 3648 mp1 = mp->b_cont; 3649 freeb(mp); 3650 mp = mp1; 3651 rptr = mp->b_rptr; 3652 } 3653 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3654 goto tossit; 3655 ip6h = (ip6_t *)rptr; 3656 mp_len = msgdsize(mp); 3657 } 3658 /* 3659 * Find any potentially interesting extension headers 3660 * as well as the length of the IPv6 + extension 3661 * headers. 3662 */ 3663 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3664 UDPH_SIZE; 3665 ASSERT(nexthdrp == IPPROTO_UDP); 3666 } else { 3667 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3668 ip6i = NULL; 3669 } 3670 break; 3671 default: 3672 ASSERT(0); 3673 } 3674 3675 /* 3676 * IP inspected the UDP header thus all of it must be in the mblk. 3677 * UDP length check is performed for IPv6 packets and IPv4 packets 3678 * to check if the size of the packet as specified 3679 * by the header is the same as the physical size of the packet. 3680 * FIXME? Didn't IP already check this? 3681 */ 3682 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3683 if ((MBLKL(mp) < hdr_length) || 3684 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3685 goto tossit; 3686 } 3687 3688 3689 /* Walk past the headers unless UDP_RCVHDR was set. */ 3690 if (!udp_bits.udpb_rcvhdr) { 3691 mp->b_rptr = rptr + hdr_length; 3692 mp_len -= hdr_length; 3693 } 3694 3695 /* 3696 * This is the inbound data path. Packets are passed upstream as 3697 * T_UNITDATA_IND messages with full IP headers still attached. 3698 */ 3699 if (udp->udp_family == AF_INET) { 3700 sin_t *sin; 3701 3702 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3703 3704 /* 3705 * Normally only send up the source address. 3706 * If IP_RECVDSTADDR is set we include the destination IP 3707 * address as an option. With IP_RECVOPTS we include all 3708 * the IP options. 3709 */ 3710 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3711 if (udp_bits.udpb_recvdstaddr) { 3712 udi_size += sizeof (struct T_opthdr) + 3713 sizeof (struct in_addr); 3714 UDP_STAT(us, udp_in_recvdstaddr); 3715 } 3716 3717 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3718 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3719 udi_size += sizeof (struct T_opthdr) + 3720 sizeof (struct in_pktinfo); 3721 UDP_STAT(us, udp_ip_rcvpktinfo); 3722 } 3723 3724 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3725 udi_size += sizeof (struct T_opthdr) + opt_len; 3726 UDP_STAT(us, udp_in_recvopts); 3727 } 3728 3729 /* 3730 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3731 * space accordingly 3732 */ 3733 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3734 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3735 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3736 UDP_STAT(us, udp_in_recvif); 3737 } 3738 3739 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3740 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3741 udi_size += sizeof (struct T_opthdr) + 3742 sizeof (struct sockaddr_dl); 3743 UDP_STAT(us, udp_in_recvslla); 3744 } 3745 3746 if ((udp_bits.udpb_recvucred) && 3747 (cr = msg_getcred(mp, &cpid)) != NULL) { 3748 udi_size += sizeof (struct T_opthdr) + ucredsize; 3749 UDP_STAT(us, udp_in_recvucred); 3750 } 3751 3752 /* 3753 * If SO_TIMESTAMP is set allocate the appropriate sized 3754 * buffer. Since gethrestime() expects a pointer aligned 3755 * argument, we allocate space necessary for extra 3756 * alignment (even though it might not be used). 3757 */ 3758 if (udp_bits.udpb_timestamp) { 3759 udi_size += sizeof (struct T_opthdr) + 3760 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3761 UDP_STAT(us, udp_in_timestamp); 3762 } 3763 3764 /* 3765 * If IP_RECVTTL is set allocate the appropriate sized buffer 3766 */ 3767 if (udp_bits.udpb_recvttl) { 3768 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3769 UDP_STAT(us, udp_in_recvttl); 3770 } 3771 3772 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3773 mp1 = allocb(udi_size, BPRI_MED); 3774 if (mp1 == NULL) { 3775 freemsg(mp); 3776 if (options_mp != NULL) 3777 freeb(options_mp); 3778 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3779 return; 3780 } 3781 mp1->b_cont = mp; 3782 mp = mp1; 3783 mp->b_datap->db_type = M_PROTO; 3784 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3785 mp->b_wptr = (uchar_t *)tudi + udi_size; 3786 tudi->PRIM_type = T_UNITDATA_IND; 3787 tudi->SRC_length = sizeof (sin_t); 3788 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3789 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3790 sizeof (sin_t); 3791 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3792 tudi->OPT_length = udi_size; 3793 sin = (sin_t *)&tudi[1]; 3794 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3795 sin->sin_port = udpha->uha_src_port; 3796 sin->sin_family = udp->udp_family; 3797 *(uint32_t *)&sin->sin_zero[0] = 0; 3798 *(uint32_t *)&sin->sin_zero[4] = 0; 3799 3800 /* 3801 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3802 * IP_RECVTTL has been set. 3803 */ 3804 if (udi_size != 0) { 3805 /* 3806 * Copy in destination address before options to avoid 3807 * any padding issues. 3808 */ 3809 char *dstopt; 3810 3811 dstopt = (char *)&sin[1]; 3812 if (udp_bits.udpb_recvdstaddr) { 3813 struct T_opthdr *toh; 3814 ipaddr_t *dstptr; 3815 3816 toh = (struct T_opthdr *)dstopt; 3817 toh->level = IPPROTO_IP; 3818 toh->name = IP_RECVDSTADDR; 3819 toh->len = sizeof (struct T_opthdr) + 3820 sizeof (ipaddr_t); 3821 toh->status = 0; 3822 dstopt += sizeof (struct T_opthdr); 3823 dstptr = (ipaddr_t *)dstopt; 3824 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3825 dstopt += sizeof (ipaddr_t); 3826 udi_size -= toh->len; 3827 } 3828 3829 if (udp_bits.udpb_recvopts && opt_len > 0) { 3830 struct T_opthdr *toh; 3831 3832 toh = (struct T_opthdr *)dstopt; 3833 toh->level = IPPROTO_IP; 3834 toh->name = IP_RECVOPTS; 3835 toh->len = sizeof (struct T_opthdr) + opt_len; 3836 toh->status = 0; 3837 dstopt += sizeof (struct T_opthdr); 3838 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3839 opt_len); 3840 dstopt += opt_len; 3841 udi_size -= toh->len; 3842 } 3843 3844 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3845 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3846 struct T_opthdr *toh; 3847 struct in_pktinfo *pktinfop; 3848 3849 toh = (struct T_opthdr *)dstopt; 3850 toh->level = IPPROTO_IP; 3851 toh->name = IP_PKTINFO; 3852 toh->len = sizeof (struct T_opthdr) + 3853 sizeof (*pktinfop); 3854 toh->status = 0; 3855 dstopt += sizeof (struct T_opthdr); 3856 pktinfop = (struct in_pktinfo *)dstopt; 3857 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3858 pktinfop->ipi_spec_dst = 3859 pinfo->ip_pkt_match_addr; 3860 pktinfop->ipi_addr.s_addr = 3861 ((ipha_t *)rptr)->ipha_dst; 3862 3863 dstopt += sizeof (struct in_pktinfo); 3864 udi_size -= toh->len; 3865 } 3866 3867 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3868 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3869 3870 struct T_opthdr *toh; 3871 struct sockaddr_dl *dstptr; 3872 3873 toh = (struct T_opthdr *)dstopt; 3874 toh->level = IPPROTO_IP; 3875 toh->name = IP_RECVSLLA; 3876 toh->len = sizeof (struct T_opthdr) + 3877 sizeof (struct sockaddr_dl); 3878 toh->status = 0; 3879 dstopt += sizeof (struct T_opthdr); 3880 dstptr = (struct sockaddr_dl *)dstopt; 3881 bcopy(&pinfo->ip_pkt_slla, dstptr, 3882 sizeof (struct sockaddr_dl)); 3883 dstopt += sizeof (struct sockaddr_dl); 3884 udi_size -= toh->len; 3885 } 3886 3887 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3888 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3889 3890 struct T_opthdr *toh; 3891 uint_t *dstptr; 3892 3893 toh = (struct T_opthdr *)dstopt; 3894 toh->level = IPPROTO_IP; 3895 toh->name = IP_RECVIF; 3896 toh->len = sizeof (struct T_opthdr) + 3897 sizeof (uint_t); 3898 toh->status = 0; 3899 dstopt += sizeof (struct T_opthdr); 3900 dstptr = (uint_t *)dstopt; 3901 *dstptr = pinfo->ip_pkt_ifindex; 3902 dstopt += sizeof (uint_t); 3903 udi_size -= toh->len; 3904 } 3905 3906 if (cr != NULL) { 3907 struct T_opthdr *toh; 3908 3909 toh = (struct T_opthdr *)dstopt; 3910 toh->level = SOL_SOCKET; 3911 toh->name = SCM_UCRED; 3912 toh->len = sizeof (struct T_opthdr) + ucredsize; 3913 toh->status = 0; 3914 dstopt += sizeof (struct T_opthdr); 3915 (void) cred2ucred(cr, cpid, dstopt, rcr); 3916 dstopt += ucredsize; 3917 udi_size -= toh->len; 3918 } 3919 3920 if (udp_bits.udpb_timestamp) { 3921 struct T_opthdr *toh; 3922 3923 toh = (struct T_opthdr *)dstopt; 3924 toh->level = SOL_SOCKET; 3925 toh->name = SCM_TIMESTAMP; 3926 toh->len = sizeof (struct T_opthdr) + 3927 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3928 toh->status = 0; 3929 dstopt += sizeof (struct T_opthdr); 3930 /* Align for gethrestime() */ 3931 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3932 sizeof (intptr_t)); 3933 gethrestime((timestruc_t *)dstopt); 3934 dstopt = (char *)toh + toh->len; 3935 udi_size -= toh->len; 3936 } 3937 3938 /* 3939 * CAUTION: 3940 * Due to aligment issues 3941 * Processing of IP_RECVTTL option 3942 * should always be the last. Adding 3943 * any option processing after this will 3944 * cause alignment panic. 3945 */ 3946 if (udp_bits.udpb_recvttl) { 3947 struct T_opthdr *toh; 3948 uint8_t *dstptr; 3949 3950 toh = (struct T_opthdr *)dstopt; 3951 toh->level = IPPROTO_IP; 3952 toh->name = IP_RECVTTL; 3953 toh->len = sizeof (struct T_opthdr) + 3954 sizeof (uint8_t); 3955 toh->status = 0; 3956 dstopt += sizeof (struct T_opthdr); 3957 dstptr = (uint8_t *)dstopt; 3958 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 3959 dstopt += sizeof (uint8_t); 3960 udi_size -= toh->len; 3961 } 3962 3963 /* Consumed all of allocated space */ 3964 ASSERT(udi_size == 0); 3965 } 3966 } else { 3967 sin6_t *sin6; 3968 3969 /* 3970 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 3971 * 3972 * Normally we only send up the address. If receiving of any 3973 * optional receive side information is enabled, we also send 3974 * that up as options. 3975 */ 3976 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3977 3978 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3979 IPPF_RTHDR|IPPF_IFINDEX)) { 3980 if ((udp_bits.udpb_ipv6_recvhopopts) && 3981 (ipp.ipp_fields & IPPF_HOPOPTS)) { 3982 size_t hlen; 3983 3984 UDP_STAT(us, udp_in_recvhopopts); 3985 hlen = copy_hop_opts(&ipp, NULL); 3986 if (hlen == 0) 3987 ipp.ipp_fields &= ~IPPF_HOPOPTS; 3988 udi_size += hlen; 3989 } 3990 if (((udp_bits.udpb_ipv6_recvdstopts) || 3991 udp_bits.udpb_old_ipv6_recvdstopts) && 3992 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3993 udi_size += sizeof (struct T_opthdr) + 3994 ipp.ipp_dstoptslen; 3995 UDP_STAT(us, udp_in_recvdstopts); 3996 } 3997 if ((((udp_bits.udpb_ipv6_recvdstopts) && 3998 udp_bits.udpb_ipv6_recvrthdr && 3999 (ipp.ipp_fields & IPPF_RTHDR)) || 4000 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4001 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4002 udi_size += sizeof (struct T_opthdr) + 4003 ipp.ipp_rtdstoptslen; 4004 UDP_STAT(us, udp_in_recvrtdstopts); 4005 } 4006 if ((udp_bits.udpb_ipv6_recvrthdr) && 4007 (ipp.ipp_fields & IPPF_RTHDR)) { 4008 udi_size += sizeof (struct T_opthdr) + 4009 ipp.ipp_rthdrlen; 4010 UDP_STAT(us, udp_in_recvrthdr); 4011 } 4012 if ((udp_bits.udpb_ip_recvpktinfo) && 4013 (ipp.ipp_fields & IPPF_IFINDEX)) { 4014 udi_size += sizeof (struct T_opthdr) + 4015 sizeof (struct in6_pktinfo); 4016 UDP_STAT(us, udp_in_recvpktinfo); 4017 } 4018 4019 } 4020 if ((udp_bits.udpb_recvucred) && 4021 (cr = msg_getcred(mp, &cpid)) != NULL) { 4022 udi_size += sizeof (struct T_opthdr) + ucredsize; 4023 UDP_STAT(us, udp_in_recvucred); 4024 } 4025 4026 /* 4027 * If SO_TIMESTAMP is set allocate the appropriate sized 4028 * buffer. Since gethrestime() expects a pointer aligned 4029 * argument, we allocate space necessary for extra 4030 * alignment (even though it might not be used). 4031 */ 4032 if (udp_bits.udpb_timestamp) { 4033 udi_size += sizeof (struct T_opthdr) + 4034 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4035 UDP_STAT(us, udp_in_timestamp); 4036 } 4037 4038 if (udp_bits.udpb_ipv6_recvhoplimit) { 4039 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4040 UDP_STAT(us, udp_in_recvhoplimit); 4041 } 4042 4043 if (udp_bits.udpb_ipv6_recvtclass) { 4044 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4045 UDP_STAT(us, udp_in_recvtclass); 4046 } 4047 4048 mp1 = allocb(udi_size, BPRI_MED); 4049 if (mp1 == NULL) { 4050 freemsg(mp); 4051 if (options_mp != NULL) 4052 freeb(options_mp); 4053 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4054 return; 4055 } 4056 mp1->b_cont = mp; 4057 mp = mp1; 4058 mp->b_datap->db_type = M_PROTO; 4059 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4060 mp->b_wptr = (uchar_t *)tudi + udi_size; 4061 tudi->PRIM_type = T_UNITDATA_IND; 4062 tudi->SRC_length = sizeof (sin6_t); 4063 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4064 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4065 sizeof (sin6_t); 4066 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4067 tudi->OPT_length = udi_size; 4068 sin6 = (sin6_t *)&tudi[1]; 4069 if (ipversion == IPV4_VERSION) { 4070 in6_addr_t v6dst; 4071 4072 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4073 &sin6->sin6_addr); 4074 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4075 &v6dst); 4076 sin6->sin6_flowinfo = 0; 4077 sin6->sin6_scope_id = 0; 4078 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4079 connp->conn_zoneid, us->us_netstack); 4080 } else { 4081 sin6->sin6_addr = ip6h->ip6_src; 4082 /* No sin6_flowinfo per API */ 4083 sin6->sin6_flowinfo = 0; 4084 /* For link-scope source pass up scope id */ 4085 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4086 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4087 sin6->sin6_scope_id = ipp.ipp_ifindex; 4088 else 4089 sin6->sin6_scope_id = 0; 4090 sin6->__sin6_src_id = ip_srcid_find_addr( 4091 &ip6h->ip6_dst, connp->conn_zoneid, 4092 us->us_netstack); 4093 } 4094 sin6->sin6_port = udpha->uha_src_port; 4095 sin6->sin6_family = udp->udp_family; 4096 4097 if (udi_size != 0) { 4098 uchar_t *dstopt; 4099 4100 dstopt = (uchar_t *)&sin6[1]; 4101 if ((udp_bits.udpb_ip_recvpktinfo) && 4102 (ipp.ipp_fields & IPPF_IFINDEX)) { 4103 struct T_opthdr *toh; 4104 struct in6_pktinfo *pkti; 4105 4106 toh = (struct T_opthdr *)dstopt; 4107 toh->level = IPPROTO_IPV6; 4108 toh->name = IPV6_PKTINFO; 4109 toh->len = sizeof (struct T_opthdr) + 4110 sizeof (*pkti); 4111 toh->status = 0; 4112 dstopt += sizeof (struct T_opthdr); 4113 pkti = (struct in6_pktinfo *)dstopt; 4114 if (ipversion == IPV6_VERSION) 4115 pkti->ipi6_addr = ip6h->ip6_dst; 4116 else 4117 IN6_IPADDR_TO_V4MAPPED( 4118 ((ipha_t *)rptr)->ipha_dst, 4119 &pkti->ipi6_addr); 4120 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4121 dstopt += sizeof (*pkti); 4122 udi_size -= toh->len; 4123 } 4124 if (udp_bits.udpb_ipv6_recvhoplimit) { 4125 struct T_opthdr *toh; 4126 4127 toh = (struct T_opthdr *)dstopt; 4128 toh->level = IPPROTO_IPV6; 4129 toh->name = IPV6_HOPLIMIT; 4130 toh->len = sizeof (struct T_opthdr) + 4131 sizeof (uint_t); 4132 toh->status = 0; 4133 dstopt += sizeof (struct T_opthdr); 4134 if (ipversion == IPV6_VERSION) 4135 *(uint_t *)dstopt = ip6h->ip6_hops; 4136 else 4137 *(uint_t *)dstopt = 4138 ((ipha_t *)rptr)->ipha_ttl; 4139 dstopt += sizeof (uint_t); 4140 udi_size -= toh->len; 4141 } 4142 if (udp_bits.udpb_ipv6_recvtclass) { 4143 struct T_opthdr *toh; 4144 4145 toh = (struct T_opthdr *)dstopt; 4146 toh->level = IPPROTO_IPV6; 4147 toh->name = IPV6_TCLASS; 4148 toh->len = sizeof (struct T_opthdr) + 4149 sizeof (uint_t); 4150 toh->status = 0; 4151 dstopt += sizeof (struct T_opthdr); 4152 if (ipversion == IPV6_VERSION) { 4153 *(uint_t *)dstopt = 4154 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4155 } else { 4156 ipha_t *ipha = (ipha_t *)rptr; 4157 *(uint_t *)dstopt = 4158 ipha->ipha_type_of_service; 4159 } 4160 dstopt += sizeof (uint_t); 4161 udi_size -= toh->len; 4162 } 4163 if ((udp_bits.udpb_ipv6_recvhopopts) && 4164 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4165 size_t hlen; 4166 4167 hlen = copy_hop_opts(&ipp, dstopt); 4168 dstopt += hlen; 4169 udi_size -= hlen; 4170 } 4171 if ((udp_bits.udpb_ipv6_recvdstopts) && 4172 (udp_bits.udpb_ipv6_recvrthdr) && 4173 (ipp.ipp_fields & IPPF_RTHDR) && 4174 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4175 struct T_opthdr *toh; 4176 4177 toh = (struct T_opthdr *)dstopt; 4178 toh->level = IPPROTO_IPV6; 4179 toh->name = IPV6_DSTOPTS; 4180 toh->len = sizeof (struct T_opthdr) + 4181 ipp.ipp_rtdstoptslen; 4182 toh->status = 0; 4183 dstopt += sizeof (struct T_opthdr); 4184 bcopy(ipp.ipp_rtdstopts, dstopt, 4185 ipp.ipp_rtdstoptslen); 4186 dstopt += ipp.ipp_rtdstoptslen; 4187 udi_size -= toh->len; 4188 } 4189 if ((udp_bits.udpb_ipv6_recvrthdr) && 4190 (ipp.ipp_fields & IPPF_RTHDR)) { 4191 struct T_opthdr *toh; 4192 4193 toh = (struct T_opthdr *)dstopt; 4194 toh->level = IPPROTO_IPV6; 4195 toh->name = IPV6_RTHDR; 4196 toh->len = sizeof (struct T_opthdr) + 4197 ipp.ipp_rthdrlen; 4198 toh->status = 0; 4199 dstopt += sizeof (struct T_opthdr); 4200 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4201 dstopt += ipp.ipp_rthdrlen; 4202 udi_size -= toh->len; 4203 } 4204 if ((udp_bits.udpb_ipv6_recvdstopts) && 4205 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4206 struct T_opthdr *toh; 4207 4208 toh = (struct T_opthdr *)dstopt; 4209 toh->level = IPPROTO_IPV6; 4210 toh->name = IPV6_DSTOPTS; 4211 toh->len = sizeof (struct T_opthdr) + 4212 ipp.ipp_dstoptslen; 4213 toh->status = 0; 4214 dstopt += sizeof (struct T_opthdr); 4215 bcopy(ipp.ipp_dstopts, dstopt, 4216 ipp.ipp_dstoptslen); 4217 dstopt += ipp.ipp_dstoptslen; 4218 udi_size -= toh->len; 4219 } 4220 if (cr != NULL) { 4221 struct T_opthdr *toh; 4222 4223 toh = (struct T_opthdr *)dstopt; 4224 toh->level = SOL_SOCKET; 4225 toh->name = SCM_UCRED; 4226 toh->len = sizeof (struct T_opthdr) + ucredsize; 4227 toh->status = 0; 4228 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4229 dstopt += toh->len; 4230 udi_size -= toh->len; 4231 } 4232 if (udp_bits.udpb_timestamp) { 4233 struct T_opthdr *toh; 4234 4235 toh = (struct T_opthdr *)dstopt; 4236 toh->level = SOL_SOCKET; 4237 toh->name = SCM_TIMESTAMP; 4238 toh->len = sizeof (struct T_opthdr) + 4239 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4240 toh->status = 0; 4241 dstopt += sizeof (struct T_opthdr); 4242 /* Align for gethrestime() */ 4243 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4244 sizeof (intptr_t)); 4245 gethrestime((timestruc_t *)dstopt); 4246 dstopt = (uchar_t *)toh + toh->len; 4247 udi_size -= toh->len; 4248 } 4249 4250 /* Consumed all of allocated space */ 4251 ASSERT(udi_size == 0); 4252 } 4253 #undef sin6 4254 /* No IP_RECVDSTADDR for IPv6. */ 4255 } 4256 4257 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4258 if (options_mp != NULL) 4259 freeb(options_mp); 4260 4261 udp_ulp_recv(connp, mp); 4262 4263 return; 4264 4265 tossit: 4266 freemsg(mp); 4267 if (options_mp != NULL) 4268 freeb(options_mp); 4269 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4270 } 4271 4272 /* 4273 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4274 * information that can be changing beneath us. 4275 */ 4276 mblk_t * 4277 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4278 { 4279 mblk_t *mpdata; 4280 mblk_t *mp_conn_ctl; 4281 mblk_t *mp_attr_ctl; 4282 mblk_t *mp6_conn_ctl; 4283 mblk_t *mp6_attr_ctl; 4284 mblk_t *mp_conn_tail; 4285 mblk_t *mp_attr_tail; 4286 mblk_t *mp6_conn_tail; 4287 mblk_t *mp6_attr_tail; 4288 struct opthdr *optp; 4289 mib2_udpEntry_t ude; 4290 mib2_udp6Entry_t ude6; 4291 mib2_transportMLPEntry_t mlp; 4292 int state; 4293 zoneid_t zoneid; 4294 int i; 4295 connf_t *connfp; 4296 conn_t *connp = Q_TO_CONN(q); 4297 int v4_conn_idx; 4298 int v6_conn_idx; 4299 boolean_t needattr; 4300 udp_t *udp; 4301 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4302 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4303 mblk_t *mp2ctl; 4304 4305 /* 4306 * make a copy of the original message 4307 */ 4308 mp2ctl = copymsg(mpctl); 4309 4310 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4311 if (mpctl == NULL || 4312 (mpdata = mpctl->b_cont) == NULL || 4313 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4314 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4315 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4316 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4317 freemsg(mp_conn_ctl); 4318 freemsg(mp_attr_ctl); 4319 freemsg(mp6_conn_ctl); 4320 freemsg(mpctl); 4321 freemsg(mp2ctl); 4322 return (0); 4323 } 4324 4325 zoneid = connp->conn_zoneid; 4326 4327 /* fixed length structure for IPv4 and IPv6 counters */ 4328 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4329 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4330 /* synchronize 64- and 32-bit counters */ 4331 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4332 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4333 4334 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4335 optp->level = MIB2_UDP; 4336 optp->name = 0; 4337 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4338 sizeof (us->us_udp_mib)); 4339 optp->len = msgdsize(mpdata); 4340 qreply(q, mpctl); 4341 4342 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4343 v4_conn_idx = v6_conn_idx = 0; 4344 4345 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4346 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4347 connp = NULL; 4348 4349 while ((connp = ipcl_get_next_conn(connfp, connp, 4350 IPCL_UDPCONN))) { 4351 udp = connp->conn_udp; 4352 if (zoneid != connp->conn_zoneid) 4353 continue; 4354 4355 /* 4356 * Note that the port numbers are sent in 4357 * host byte order 4358 */ 4359 4360 if (udp->udp_state == TS_UNBND) 4361 state = MIB2_UDP_unbound; 4362 else if (udp->udp_state == TS_IDLE) 4363 state = MIB2_UDP_idle; 4364 else if (udp->udp_state == TS_DATA_XFER) 4365 state = MIB2_UDP_connected; 4366 else 4367 state = MIB2_UDP_unknown; 4368 4369 needattr = B_FALSE; 4370 bzero(&mlp, sizeof (mlp)); 4371 if (connp->conn_mlp_type != mlptSingle) { 4372 if (connp->conn_mlp_type == mlptShared || 4373 connp->conn_mlp_type == mlptBoth) 4374 mlp.tme_flags |= MIB2_TMEF_SHARED; 4375 if (connp->conn_mlp_type == mlptPrivate || 4376 connp->conn_mlp_type == mlptBoth) 4377 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4378 needattr = B_TRUE; 4379 } 4380 if (connp->conn_anon_mlp) { 4381 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 4382 needattr = B_TRUE; 4383 } 4384 if (connp->conn_mac_exempt) { 4385 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 4386 needattr = B_TRUE; 4387 } 4388 4389 /* 4390 * Create an IPv4 table entry for IPv4 entries and also 4391 * any IPv6 entries which are bound to in6addr_any 4392 * (i.e. anything a IPv4 peer could connect/send to). 4393 */ 4394 if (udp->udp_ipversion == IPV4_VERSION || 4395 (udp->udp_state <= TS_IDLE && 4396 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4397 ude.udpEntryInfo.ue_state = state; 4398 /* 4399 * If in6addr_any this will set it to 4400 * INADDR_ANY 4401 */ 4402 ude.udpLocalAddress = 4403 V4_PART_OF_V6(udp->udp_v6src); 4404 ude.udpLocalPort = ntohs(udp->udp_port); 4405 if (udp->udp_state == TS_DATA_XFER) { 4406 /* 4407 * Can potentially get here for 4408 * v6 socket if another process 4409 * (say, ping) has just done a 4410 * sendto(), changing the state 4411 * from the TS_IDLE above to 4412 * TS_DATA_XFER by the time we hit 4413 * this part of the code. 4414 */ 4415 ude.udpEntryInfo.ue_RemoteAddress = 4416 V4_PART_OF_V6(udp->udp_v6dst); 4417 ude.udpEntryInfo.ue_RemotePort = 4418 ntohs(udp->udp_dstport); 4419 } else { 4420 ude.udpEntryInfo.ue_RemoteAddress = 0; 4421 ude.udpEntryInfo.ue_RemotePort = 0; 4422 } 4423 4424 /* 4425 * We make the assumption that all udp_t 4426 * structs will be created within an address 4427 * region no larger than 32-bits. 4428 */ 4429 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4430 ude.udpCreationProcess = 4431 (udp->udp_open_pid < 0) ? 4432 MIB2_UNKNOWN_PROCESS : 4433 udp->udp_open_pid; 4434 ude.udpCreationTime = udp->udp_open_time; 4435 4436 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4437 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4438 mlp.tme_connidx = v4_conn_idx++; 4439 if (needattr) 4440 (void) snmp_append_data2( 4441 mp_attr_ctl->b_cont, &mp_attr_tail, 4442 (char *)&mlp, sizeof (mlp)); 4443 } 4444 if (udp->udp_ipversion == IPV6_VERSION) { 4445 ude6.udp6EntryInfo.ue_state = state; 4446 ude6.udp6LocalAddress = udp->udp_v6src; 4447 ude6.udp6LocalPort = ntohs(udp->udp_port); 4448 ude6.udp6IfIndex = udp->udp_bound_if; 4449 if (udp->udp_state == TS_DATA_XFER) { 4450 ude6.udp6EntryInfo.ue_RemoteAddress = 4451 udp->udp_v6dst; 4452 ude6.udp6EntryInfo.ue_RemotePort = 4453 ntohs(udp->udp_dstport); 4454 } else { 4455 ude6.udp6EntryInfo.ue_RemoteAddress = 4456 sin6_null.sin6_addr; 4457 ude6.udp6EntryInfo.ue_RemotePort = 0; 4458 } 4459 /* 4460 * We make the assumption that all udp_t 4461 * structs will be created within an address 4462 * region no larger than 32-bits. 4463 */ 4464 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4465 ude6.udp6CreationProcess = 4466 (udp->udp_open_pid < 0) ? 4467 MIB2_UNKNOWN_PROCESS : 4468 udp->udp_open_pid; 4469 ude6.udp6CreationTime = udp->udp_open_time; 4470 4471 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4472 &mp6_conn_tail, (char *)&ude6, 4473 sizeof (ude6)); 4474 mlp.tme_connidx = v6_conn_idx++; 4475 if (needattr) 4476 (void) snmp_append_data2( 4477 mp6_attr_ctl->b_cont, 4478 &mp6_attr_tail, (char *)&mlp, 4479 sizeof (mlp)); 4480 } 4481 } 4482 } 4483 4484 /* IPv4 UDP endpoints */ 4485 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4486 sizeof (struct T_optmgmt_ack)]; 4487 optp->level = MIB2_UDP; 4488 optp->name = MIB2_UDP_ENTRY; 4489 optp->len = msgdsize(mp_conn_ctl->b_cont); 4490 qreply(q, mp_conn_ctl); 4491 4492 /* table of MLP attributes... */ 4493 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4494 sizeof (struct T_optmgmt_ack)]; 4495 optp->level = MIB2_UDP; 4496 optp->name = EXPER_XPORT_MLP; 4497 optp->len = msgdsize(mp_attr_ctl->b_cont); 4498 if (optp->len == 0) 4499 freemsg(mp_attr_ctl); 4500 else 4501 qreply(q, mp_attr_ctl); 4502 4503 /* IPv6 UDP endpoints */ 4504 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4505 sizeof (struct T_optmgmt_ack)]; 4506 optp->level = MIB2_UDP6; 4507 optp->name = MIB2_UDP6_ENTRY; 4508 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4509 qreply(q, mp6_conn_ctl); 4510 4511 /* table of MLP attributes... */ 4512 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4513 sizeof (struct T_optmgmt_ack)]; 4514 optp->level = MIB2_UDP6; 4515 optp->name = EXPER_XPORT_MLP; 4516 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4517 if (optp->len == 0) 4518 freemsg(mp6_attr_ctl); 4519 else 4520 qreply(q, mp6_attr_ctl); 4521 4522 return (mp2ctl); 4523 } 4524 4525 /* 4526 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4527 * NOTE: Per MIB-II, UDP has no writable data. 4528 * TODO: If this ever actually tries to set anything, it needs to be 4529 * to do the appropriate locking. 4530 */ 4531 /* ARGSUSED */ 4532 int 4533 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4534 uchar_t *ptr, int len) 4535 { 4536 switch (level) { 4537 case MIB2_UDP: 4538 return (0); 4539 default: 4540 return (1); 4541 } 4542 } 4543 4544 /* 4545 * This routine creates a T_UDERROR_IND message and passes it upstream. 4546 * The address and options are copied from the T_UNITDATA_REQ message 4547 * passed in mp. This message is freed. 4548 */ 4549 static void 4550 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4551 t_scalar_t err) 4552 { 4553 struct T_unitdata_req *tudr; 4554 mblk_t *mp1; 4555 uchar_t *optaddr; 4556 t_scalar_t optlen; 4557 4558 if (DB_TYPE(mp) == M_DATA) { 4559 ASSERT(destaddr != NULL && destlen != 0); 4560 optaddr = NULL; 4561 optlen = 0; 4562 } else { 4563 if ((mp->b_wptr < mp->b_rptr) || 4564 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4565 goto done; 4566 } 4567 tudr = (struct T_unitdata_req *)mp->b_rptr; 4568 destaddr = mp->b_rptr + tudr->DEST_offset; 4569 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4570 destaddr + tudr->DEST_length < mp->b_rptr || 4571 destaddr + tudr->DEST_length > mp->b_wptr) { 4572 goto done; 4573 } 4574 optaddr = mp->b_rptr + tudr->OPT_offset; 4575 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4576 optaddr + tudr->OPT_length < mp->b_rptr || 4577 optaddr + tudr->OPT_length > mp->b_wptr) { 4578 goto done; 4579 } 4580 destlen = tudr->DEST_length; 4581 optlen = tudr->OPT_length; 4582 } 4583 4584 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4585 (char *)optaddr, optlen, err); 4586 if (mp1 != NULL) 4587 qreply(q, mp1); 4588 4589 done: 4590 freemsg(mp); 4591 } 4592 4593 /* 4594 * This routine removes a port number association from a stream. It 4595 * is called by udp_wput to handle T_UNBIND_REQ messages. 4596 */ 4597 static void 4598 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4599 { 4600 conn_t *connp = Q_TO_CONN(q); 4601 int error; 4602 4603 error = udp_do_unbind(connp); 4604 if (error) { 4605 if (error < 0) 4606 udp_err_ack(q, mp, -error, 0); 4607 else 4608 udp_err_ack(q, mp, TSYSERR, error); 4609 return; 4610 } 4611 4612 mp = mi_tpi_ok_ack_alloc(mp); 4613 ASSERT(mp != NULL); 4614 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4615 qreply(q, mp); 4616 } 4617 4618 /* 4619 * Don't let port fall into the privileged range. 4620 * Since the extra privileged ports can be arbitrary we also 4621 * ensure that we exclude those from consideration. 4622 * us->us_epriv_ports is not sorted thus we loop over it until 4623 * there are no changes. 4624 */ 4625 static in_port_t 4626 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4627 { 4628 int i; 4629 in_port_t nextport; 4630 boolean_t restart = B_FALSE; 4631 udp_stack_t *us = udp->udp_us; 4632 4633 if (random && udp_random_anon_port != 0) { 4634 (void) random_get_pseudo_bytes((uint8_t *)&port, 4635 sizeof (in_port_t)); 4636 /* 4637 * Unless changed by a sys admin, the smallest anon port 4638 * is 32768 and the largest anon port is 65535. It is 4639 * very likely (50%) for the random port to be smaller 4640 * than the smallest anon port. When that happens, 4641 * add port % (anon port range) to the smallest anon 4642 * port to get the random port. It should fall into the 4643 * valid anon port range. 4644 */ 4645 if (port < us->us_smallest_anon_port) { 4646 port = us->us_smallest_anon_port + 4647 port % (us->us_largest_anon_port - 4648 us->us_smallest_anon_port); 4649 } 4650 } 4651 4652 retry: 4653 if (port < us->us_smallest_anon_port) 4654 port = us->us_smallest_anon_port; 4655 4656 if (port > us->us_largest_anon_port) { 4657 port = us->us_smallest_anon_port; 4658 if (restart) 4659 return (0); 4660 restart = B_TRUE; 4661 } 4662 4663 if (port < us->us_smallest_nonpriv_port) 4664 port = us->us_smallest_nonpriv_port; 4665 4666 for (i = 0; i < us->us_num_epriv_ports; i++) { 4667 if (port == us->us_epriv_ports[i]) { 4668 port++; 4669 /* 4670 * Make sure that the port is in the 4671 * valid range. 4672 */ 4673 goto retry; 4674 } 4675 } 4676 4677 if (is_system_labeled() && 4678 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4679 port, IPPROTO_UDP, B_TRUE)) != 0) { 4680 port = nextport; 4681 goto retry; 4682 } 4683 4684 return (port); 4685 } 4686 4687 static int 4688 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 4689 { 4690 int err; 4691 cred_t *cred; 4692 cred_t *orig_cred = NULL; 4693 cred_t *effective_cred = NULL; 4694 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4695 udp_t *udp = Q_TO_UDP(wq); 4696 udp_stack_t *us = udp->udp_us; 4697 4698 /* 4699 * All Solaris components should pass a db_credp 4700 * for this message, hence we ASSERT. 4701 * On production kernels we return an error to be robust against 4702 * random streams modules sitting on top of us. 4703 */ 4704 cred = orig_cred = msg_getcred(mp, NULL); 4705 ASSERT(cred != NULL); 4706 if (cred == NULL) 4707 return (EINVAL); 4708 4709 /* 4710 * Verify the destination is allowed to receive packets at 4711 * the security label of the message data. tsol_check_dest() 4712 * may create a new effective cred for this message with a 4713 * modified label or label flags. Note that we use the cred/label 4714 * from the message to handle MLP 4715 */ 4716 if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, 4717 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 4718 goto done; 4719 if (effective_cred != NULL) 4720 cred = effective_cred; 4721 4722 /* 4723 * Calculate the security label to be placed in the text 4724 * of the message (if any). 4725 */ 4726 if ((err = tsol_compute_label(cred, dst, opt_storage, 4727 us->us_netstack->netstack_ip)) != 0) 4728 goto done; 4729 4730 /* 4731 * Insert the security label in the cached ip options, 4732 * removing any old label that may exist. 4733 */ 4734 if ((err = tsol_update_options(&udp->udp_ip_snd_options, 4735 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4736 opt_storage)) != 0) 4737 goto done; 4738 4739 /* 4740 * Save the destination address and creds we used to 4741 * generate the security label text. 4742 */ 4743 if (cred != udp->udp_effective_cred) { 4744 if (udp->udp_effective_cred != NULL) 4745 crfree(udp->udp_effective_cred); 4746 crhold(cred); 4747 udp->udp_effective_cred = cred; 4748 } 4749 if (orig_cred != udp->udp_last_cred) { 4750 if (udp->udp_last_cred != NULL) 4751 crfree(udp->udp_last_cred); 4752 crhold(orig_cred); 4753 udp->udp_last_cred = orig_cred; 4754 } 4755 done: 4756 if (effective_cred != NULL) 4757 crfree(effective_cred); 4758 4759 if (err != 0) { 4760 DTRACE_PROBE4( 4761 tx__ip__log__info__updatelabel__udp, 4762 char *, "queue(1) failed to update options(2) on mp(3)", 4763 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4764 } 4765 return (err); 4766 } 4767 4768 static mblk_t * 4769 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4770 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4771 cred_t *cr, pid_t pid) 4772 { 4773 udp_t *udp = connp->conn_udp; 4774 mblk_t *mp1 = mp; 4775 mblk_t *mp2; 4776 ipha_t *ipha; 4777 int ip_hdr_length; 4778 uint32_t ip_len; 4779 udpha_t *udpha; 4780 boolean_t lock_held = B_FALSE; 4781 in_port_t uha_src_port; 4782 udpattrs_t attrs; 4783 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4784 uint32_t ip_snd_opt_len = 0; 4785 ip4_pkt_t pktinfo; 4786 ip4_pkt_t *pktinfop = &pktinfo; 4787 ip_opt_info_t optinfo; 4788 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4789 udp_stack_t *us = udp->udp_us; 4790 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4791 queue_t *q = connp->conn_wq; 4792 ire_t *ire; 4793 in6_addr_t v6dst; 4794 boolean_t update_lastdst = B_FALSE; 4795 4796 *error = 0; 4797 pktinfop->ip4_ill_index = 0; 4798 pktinfop->ip4_addr = INADDR_ANY; 4799 optinfo.ip_opt_flags = 0; 4800 optinfo.ip_opt_ill_index = 0; 4801 4802 if (v4dst == INADDR_ANY) 4803 v4dst = htonl(INADDR_LOOPBACK); 4804 4805 /* 4806 * If options passed in, feed it for verification and handling 4807 */ 4808 attrs.udpattr_credset = B_FALSE; 4809 if (IPCL_IS_NONSTR(connp)) { 4810 if (msg->msg_controllen != 0) { 4811 attrs.udpattr_ipp4 = pktinfop; 4812 attrs.udpattr_mb = mp; 4813 4814 rw_enter(&udp->udp_rwlock, RW_WRITER); 4815 *error = process_auxiliary_options(connp, 4816 msg->msg_control, msg->msg_controllen, 4817 &attrs, &udp_opt_obj, udp_opt_set, cr); 4818 rw_exit(&udp->udp_rwlock); 4819 if (*error) 4820 goto done; 4821 } 4822 } else { 4823 if (DB_TYPE(mp) != M_DATA) { 4824 mp1 = mp->b_cont; 4825 if (((struct T_unitdata_req *) 4826 mp->b_rptr)->OPT_length != 0) { 4827 attrs.udpattr_ipp4 = pktinfop; 4828 attrs.udpattr_mb = mp; 4829 if (udp_unitdata_opt_process(q, mp, error, 4830 &attrs) < 0) 4831 goto done; 4832 /* 4833 * Note: success in processing options. 4834 * mp option buffer represented by 4835 * OPT_length/offset now potentially modified 4836 * and contain option setting results 4837 */ 4838 ASSERT(*error == 0); 4839 } 4840 } 4841 } 4842 4843 /* mp1 points to the M_DATA mblk carrying the packet */ 4844 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4845 4846 /* 4847 * Determine whether we need to mark the mblk with the user's 4848 * credentials. 4849 * If labeled then sockfs would have already done this. 4850 */ 4851 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 4852 4853 ire = connp->conn_ire_cache; 4854 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 4855 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 4856 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 4857 mblk_setcred(mp, cr, pid); 4858 } 4859 4860 rw_enter(&udp->udp_rwlock, RW_READER); 4861 lock_held = B_TRUE; 4862 4863 /* 4864 * Cluster and TSOL note: 4865 * udp.udp_v6lastdst is shared by Cluster and TSOL 4866 * udp.udp_lastdstport is used by Cluster 4867 * 4868 * Both Cluster and TSOL need to update the dest addr and/or port. 4869 * Updating is done after both Cluster and TSOL checks, protected 4870 * by conn_lock. 4871 */ 4872 mutex_enter(&connp->conn_lock); 4873 4874 if (cl_inet_connect2 != NULL && 4875 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4876 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4877 udp->udp_lastdstport != port)) { 4878 mutex_exit(&connp->conn_lock); 4879 *error = 0; 4880 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 4881 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 4882 if (*error != 0) { 4883 *error = EHOSTUNREACH; 4884 goto done; 4885 } 4886 update_lastdst = B_TRUE; 4887 mutex_enter(&connp->conn_lock); 4888 } 4889 4890 /* 4891 * Check if our saved options are valid; update if not. 4892 * TSOL Note: Since we are not in WRITER mode, UDP packets 4893 * to different destination may require different labels, 4894 * or worse, UDP packets to same IP address may require 4895 * different labels due to use of shared all-zones address. 4896 * We use conn_lock to ensure that lastdst, ip_snd_options, 4897 * and ip_snd_options_len are consistent for the current 4898 * destination and are updated atomically. 4899 */ 4900 if (is_system_labeled()) { 4901 cred_t *credp; 4902 pid_t cpid; 4903 4904 /* Using UDP MLP requires SCM_UCRED from user */ 4905 if (connp->conn_mlp_type != mlptSingle && 4906 !attrs.udpattr_credset) { 4907 mutex_exit(&connp->conn_lock); 4908 DTRACE_PROBE4( 4909 tx__ip__log__info__output__udp, 4910 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 4911 mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q); 4912 *error = EINVAL; 4913 goto done; 4914 } 4915 /* 4916 * Update label option for this UDP socket if 4917 * - the destination has changed, 4918 * - the UDP socket is MLP, or 4919 * - the cred attached to the mblk changed. 4920 */ 4921 credp = msg_getcred(mp, &cpid); 4922 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4923 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4924 connp->conn_mlp_type != mlptSingle || 4925 credp != udp->udp_last_cred) { 4926 if ((*error = udp_update_label(q, mp, v4dst)) != 0) { 4927 mutex_exit(&connp->conn_lock); 4928 goto done; 4929 } 4930 update_lastdst = B_TRUE; 4931 } 4932 4933 /* 4934 * Attach the effective cred to the mblk to ensure future 4935 * routing decisions will be based on it's label. 4936 */ 4937 mblk_setcred(mp, udp->udp_effective_cred, cpid); 4938 } 4939 if (update_lastdst) { 4940 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 4941 udp->udp_lastdstport = port; 4942 } 4943 if (udp->udp_ip_snd_options_len > 0) { 4944 ip_snd_opt_len = udp->udp_ip_snd_options_len; 4945 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 4946 } 4947 mutex_exit(&connp->conn_lock); 4948 4949 /* Add an IP header */ 4950 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 4951 (insert_spi ? sizeof (uint32_t) : 0); 4952 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4953 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 4954 !OK_32PTR(ipha)) { 4955 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 4956 if (mp2 == NULL) { 4957 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 4958 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 4959 *error = ENOMEM; 4960 goto done; 4961 } 4962 mp2->b_wptr = DB_LIM(mp2); 4963 mp2->b_cont = mp1; 4964 mp1 = mp2; 4965 if (DB_TYPE(mp) != M_DATA) 4966 mp->b_cont = mp1; 4967 else 4968 mp = mp1; 4969 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 4970 } 4971 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 4972 #ifdef _BIG_ENDIAN 4973 /* Set version, header length, and tos */ 4974 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4975 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4976 udp->udp_type_of_service); 4977 /* Set ttl and protocol */ 4978 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 4979 #else 4980 /* Set version, header length, and tos */ 4981 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4982 ((udp->udp_type_of_service << 8) | 4983 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4984 /* Set ttl and protocol */ 4985 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 4986 #endif 4987 if (pktinfop->ip4_addr != INADDR_ANY) { 4988 ipha->ipha_src = pktinfop->ip4_addr; 4989 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4990 } else { 4991 /* 4992 * Copy our address into the packet. If this is zero, 4993 * first look at __sin6_src_id for a hint. If we leave the 4994 * source as INADDR_ANY then ip will fill in the real source 4995 * address. 4996 */ 4997 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 4998 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 4999 in6_addr_t v6src; 5000 5001 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5002 us->us_netstack); 5003 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5004 } 5005 } 5006 uha_src_port = udp->udp_port; 5007 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5008 rw_exit(&udp->udp_rwlock); 5009 lock_held = B_FALSE; 5010 } 5011 5012 if (pktinfop->ip4_ill_index != 0) { 5013 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5014 } 5015 5016 ipha->ipha_fragment_offset_and_flags = 0; 5017 ipha->ipha_ident = 0; 5018 5019 mp1->b_rptr = (uchar_t *)ipha; 5020 5021 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5022 (uintptr_t)UINT_MAX); 5023 5024 /* Determine length of packet */ 5025 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5026 if ((mp2 = mp1->b_cont) != NULL) { 5027 do { 5028 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5029 ip_len += (uint32_t)MBLKL(mp2); 5030 } while ((mp2 = mp2->b_cont) != NULL); 5031 } 5032 /* 5033 * If the size of the packet is greater than the maximum allowed by 5034 * ip, return an error. Passing this down could cause panics because 5035 * the size will have wrapped and be inconsistent with the msg size. 5036 */ 5037 if (ip_len > IP_MAXPACKET) { 5038 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5039 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5040 *error = EMSGSIZE; 5041 goto done; 5042 } 5043 ipha->ipha_length = htons((uint16_t)ip_len); 5044 ip_len -= ip_hdr_length; 5045 ip_len = htons((uint16_t)ip_len); 5046 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5047 5048 /* Insert all-0s SPI now. */ 5049 if (insert_spi) 5050 *((uint32_t *)(udpha + 1)) = 0; 5051 5052 /* 5053 * Copy in the destination address 5054 */ 5055 ipha->ipha_dst = v4dst; 5056 5057 /* 5058 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5059 */ 5060 if (CLASSD(v4dst)) 5061 ipha->ipha_ttl = udp->udp_multicast_ttl; 5062 5063 udpha->uha_dst_port = port; 5064 udpha->uha_src_port = uha_src_port; 5065 5066 if (ip_snd_opt_len > 0) { 5067 uint32_t cksum; 5068 5069 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5070 lock_held = B_FALSE; 5071 rw_exit(&udp->udp_rwlock); 5072 /* 5073 * Massage source route putting first source route in ipha_dst. 5074 * Ignore the destination in T_unitdata_req. 5075 * Create a checksum adjustment for a source route, if any. 5076 */ 5077 cksum = ip_massage_options(ipha, us->us_netstack); 5078 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5079 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5080 (ipha->ipha_dst & 0xFFFF); 5081 if ((int)cksum < 0) 5082 cksum--; 5083 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5084 /* 5085 * IP does the checksum if uha_checksum is non-zero, 5086 * We make it easy for IP to include our pseudo header 5087 * by putting our length in uha_checksum. 5088 */ 5089 cksum += ip_len; 5090 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5091 /* There might be a carry. */ 5092 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5093 #ifdef _LITTLE_ENDIAN 5094 if (us->us_do_checksum) 5095 ip_len = (cksum << 16) | ip_len; 5096 #else 5097 if (us->us_do_checksum) 5098 ip_len = (ip_len << 16) | cksum; 5099 else 5100 ip_len <<= 16; 5101 #endif 5102 } else { 5103 /* 5104 * IP does the checksum if uha_checksum is non-zero, 5105 * We make it easy for IP to include our pseudo header 5106 * by putting our length in uha_checksum. 5107 */ 5108 if (us->us_do_checksum) 5109 ip_len |= (ip_len << 16); 5110 #ifndef _LITTLE_ENDIAN 5111 else 5112 ip_len <<= 16; 5113 #endif 5114 } 5115 ASSERT(!lock_held); 5116 /* Set UDP length and checksum */ 5117 *((uint32_t *)&udpha->uha_length) = ip_len; 5118 5119 if (DB_TYPE(mp) != M_DATA) { 5120 cred_t *cr; 5121 pid_t cpid; 5122 5123 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5124 cr = msg_extractcred(mp, &cpid); 5125 if (cr != NULL) { 5126 if (mp1->b_datap->db_credp != NULL) 5127 crfree(mp1->b_datap->db_credp); 5128 mp1->b_datap->db_credp = cr; 5129 mp1->b_datap->db_cpid = cpid; 5130 } 5131 ASSERT(mp != mp1); 5132 freeb(mp); 5133 } 5134 5135 /* mp has been consumed and we'll return success */ 5136 ASSERT(*error == 0); 5137 mp = NULL; 5138 5139 /* We're done. Pass the packet to ip. */ 5140 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5141 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5142 "udp_wput_end: q %p (%S)", q, "end"); 5143 5144 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5145 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5146 connp->conn_dontroute || 5147 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5148 optinfo.ip_opt_ill_index != 0 || 5149 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5150 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5151 ipst->ips_ip_g_mrouter != NULL) { 5152 UDP_STAT(us, udp_ip_send); 5153 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5154 &optinfo); 5155 } else { 5156 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5157 } 5158 5159 done: 5160 if (lock_held) 5161 rw_exit(&udp->udp_rwlock); 5162 if (*error != 0) { 5163 ASSERT(mp != NULL); 5164 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5165 } 5166 return (mp); 5167 } 5168 5169 static void 5170 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5171 { 5172 conn_t *connp = udp->udp_connp; 5173 ipaddr_t src, dst; 5174 ire_t *ire; 5175 ipif_t *ipif = NULL; 5176 mblk_t *ire_fp_mp; 5177 boolean_t retry_caching; 5178 udp_stack_t *us = udp->udp_us; 5179 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5180 5181 dst = ipha->ipha_dst; 5182 src = ipha->ipha_src; 5183 ASSERT(ipha->ipha_ident == 0); 5184 5185 if (CLASSD(dst)) { 5186 int err; 5187 5188 ipif = conn_get_held_ipif(connp, 5189 &connp->conn_multicast_ipif, &err); 5190 5191 if (ipif == NULL || ipif->ipif_isv6 || 5192 (ipif->ipif_ill->ill_phyint->phyint_flags & 5193 PHYI_LOOPBACK)) { 5194 if (ipif != NULL) 5195 ipif_refrele(ipif); 5196 UDP_STAT(us, udp_ip_send); 5197 ip_output(connp, mp, q, IP_WPUT); 5198 return; 5199 } 5200 } 5201 5202 retry_caching = B_FALSE; 5203 mutex_enter(&connp->conn_lock); 5204 ire = connp->conn_ire_cache; 5205 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5206 5207 if (ire == NULL || ire->ire_addr != dst || 5208 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5209 retry_caching = B_TRUE; 5210 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5211 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5212 5213 ASSERT(ipif != NULL); 5214 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5215 retry_caching = B_TRUE; 5216 } 5217 5218 if (!retry_caching) { 5219 ASSERT(ire != NULL); 5220 IRE_REFHOLD(ire); 5221 mutex_exit(&connp->conn_lock); 5222 } else { 5223 boolean_t cached = B_FALSE; 5224 5225 connp->conn_ire_cache = NULL; 5226 mutex_exit(&connp->conn_lock); 5227 5228 /* Release the old ire */ 5229 if (ire != NULL) { 5230 IRE_REFRELE_NOTR(ire); 5231 ire = NULL; 5232 } 5233 5234 if (CLASSD(dst)) { 5235 ASSERT(ipif != NULL); 5236 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5237 connp->conn_zoneid, msg_getlabel(mp), 5238 MATCH_IRE_ILL, ipst); 5239 } else { 5240 ASSERT(ipif == NULL); 5241 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5242 msg_getlabel(mp), ipst); 5243 } 5244 5245 if (ire == NULL) { 5246 if (ipif != NULL) 5247 ipif_refrele(ipif); 5248 UDP_STAT(us, udp_ire_null); 5249 ip_output(connp, mp, q, IP_WPUT); 5250 return; 5251 } 5252 IRE_REFHOLD_NOTR(ire); 5253 5254 mutex_enter(&connp->conn_lock); 5255 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5256 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5257 irb_t *irb = ire->ire_bucket; 5258 5259 /* 5260 * IRE's created for non-connection oriented transports 5261 * are normally initialized with IRE_MARK_TEMPORARY set 5262 * in the ire_marks. These IRE's are preferentially 5263 * reaped when the hash chain length in the cache 5264 * bucket exceeds the maximum value specified in 5265 * ip[6]_ire_max_bucket_cnt. This can severely affect 5266 * UDP performance if IRE cache entries that we need 5267 * to reuse are continually removed. To remedy this, 5268 * when we cache the IRE in the conn_t, we remove the 5269 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5270 * set. 5271 */ 5272 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5273 rw_enter(&irb->irb_lock, RW_WRITER); 5274 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5275 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5276 irb->irb_tmp_ire_cnt--; 5277 } 5278 rw_exit(&irb->irb_lock); 5279 } 5280 connp->conn_ire_cache = ire; 5281 cached = B_TRUE; 5282 } 5283 mutex_exit(&connp->conn_lock); 5284 5285 /* 5286 * We can continue to use the ire but since it was not 5287 * cached, we should drop the extra reference. 5288 */ 5289 if (!cached) 5290 IRE_REFRELE_NOTR(ire); 5291 } 5292 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5293 ASSERT(!CLASSD(dst) || ipif != NULL); 5294 5295 /* 5296 * Check if we can take the fast-path. 5297 * Note that "incomplete" ire's (where the link-layer for next hop 5298 * is not resolved, or where the fast-path header in nce_fp_mp is not 5299 * available yet) are sent down the legacy (slow) path 5300 */ 5301 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5302 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5303 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5304 ((ire->ire_nce == NULL) || 5305 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5306 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5307 if (ipif != NULL) 5308 ipif_refrele(ipif); 5309 UDP_STAT(us, udp_ip_ire_send); 5310 IRE_REFRELE(ire); 5311 ip_output(connp, mp, q, IP_WPUT); 5312 return; 5313 } 5314 5315 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5316 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5317 ipha->ipha_src = ipif->ipif_src_addr; 5318 else 5319 ipha->ipha_src = ire->ire_src_addr; 5320 } 5321 5322 if (ipif != NULL) 5323 ipif_refrele(ipif); 5324 5325 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5326 } 5327 5328 static void 5329 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5330 { 5331 ipaddr_t src, dst; 5332 ill_t *ill; 5333 mblk_t *ire_fp_mp; 5334 uint_t ire_fp_mp_len; 5335 uint16_t *up; 5336 uint32_t cksum, hcksum_txflags; 5337 queue_t *dev_q; 5338 udp_t *udp = connp->conn_udp; 5339 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5340 udp_stack_t *us = udp->udp_us; 5341 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5342 boolean_t ll_multicast = B_FALSE; 5343 boolean_t direct_send; 5344 5345 dev_q = ire->ire_stq->q_next; 5346 ASSERT(dev_q != NULL); 5347 5348 ill = ire_to_ill(ire); 5349 ASSERT(ill != NULL); 5350 5351 /* 5352 * For the direct send case, if resetting of conn_direct_blocked 5353 * was missed, it is still ok because the putq() would enable 5354 * the queue and write service will drain it out. 5355 */ 5356 direct_send = ILL_DIRECT_CAPABLE(ill); 5357 5358 /* is queue flow controlled? */ 5359 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5360 DEV_Q_FLOW_BLOCKED(dev_q))) { 5361 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5362 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5363 if (ipst->ips_ip_output_queue) { 5364 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5365 (void) putq(connp->conn_wq, mp); 5366 } else { 5367 freemsg(mp); 5368 } 5369 ire_refrele(ire); 5370 return; 5371 } 5372 5373 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5374 ire_fp_mp_len = MBLKL(ire_fp_mp); 5375 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5376 5377 dst = ipha->ipha_dst; 5378 src = ipha->ipha_src; 5379 5380 5381 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5382 5383 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5384 #ifndef _BIG_ENDIAN 5385 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5386 #endif 5387 5388 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5389 ASSERT(ill->ill_hcksum_capab != NULL); 5390 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5391 } else { 5392 hcksum_txflags = 0; 5393 } 5394 5395 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5396 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5397 5398 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5399 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5400 if (*up != 0) { 5401 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5402 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5403 ntohs(ipha->ipha_length), cksum); 5404 5405 /* Software checksum? */ 5406 if (DB_CKSUMFLAGS(mp) == 0) { 5407 UDP_STAT(us, udp_out_sw_cksum); 5408 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5409 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5410 } 5411 } 5412 5413 if (!CLASSD(dst)) { 5414 ipha->ipha_fragment_offset_and_flags |= 5415 (uint32_t)htons(ire->ire_frag_flag); 5416 } 5417 5418 /* Calculate IP header checksum if hardware isn't capable */ 5419 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5420 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5421 ((uint16_t *)ipha)[4]); 5422 } 5423 5424 if (CLASSD(dst)) { 5425 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5426 ip_multicast_loopback(q, ill, mp, 5427 connp->conn_multicast_loop ? 0 : 5428 IP_FF_NO_MCAST_LOOP, zoneid); 5429 } 5430 5431 /* If multicast TTL is 0 then we are done */ 5432 if (ipha->ipha_ttl == 0) { 5433 freemsg(mp); 5434 ire_refrele(ire); 5435 return; 5436 } 5437 ll_multicast = B_TRUE; 5438 } 5439 5440 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5441 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5442 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5443 5444 UPDATE_OB_PKT_COUNT(ire); 5445 ire->ire_last_used_time = lbolt; 5446 5447 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5448 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5449 ntohs(ipha->ipha_length)); 5450 5451 DTRACE_PROBE4(ip4__physical__out__start, 5452 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5453 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5454 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5455 ll_multicast, ipst); 5456 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5457 if (ipst->ips_ipobs_enabled && mp != NULL) { 5458 zoneid_t szone; 5459 5460 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5461 ipst, ALL_ZONES); 5462 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5463 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5464 } 5465 5466 if (mp == NULL) 5467 goto bail; 5468 5469 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5470 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5471 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5472 5473 if (direct_send) { 5474 uintptr_t cookie; 5475 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5476 5477 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5478 (uintptr_t)connp, 0); 5479 if (cookie != NULL) { 5480 idl_tx_list_t *idl_txl; 5481 5482 /* 5483 * Flow controlled. 5484 */ 5485 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5486 cookie, conn_t *, connp); 5487 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5488 mutex_enter(&idl_txl->txl_lock); 5489 /* 5490 * Check again after holding txl_lock to see if Tx 5491 * ring is still blocked and only then insert the 5492 * connp into the drain list. 5493 */ 5494 if (connp->conn_direct_blocked || 5495 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5496 cookie) == 0)) { 5497 mutex_exit(&idl_txl->txl_lock); 5498 goto bail; 5499 } 5500 if (idl_txl->txl_cookie != NULL && 5501 idl_txl->txl_cookie != cookie) { 5502 DTRACE_PROBE2(udp__xmit__collision, 5503 uintptr_t, cookie, 5504 uintptr_t, idl_txl->txl_cookie); 5505 UDP_STAT(us, udp_cookie_coll); 5506 } else { 5507 connp->conn_direct_blocked = B_TRUE; 5508 idl_txl->txl_cookie = cookie; 5509 conn_drain_insert(connp, idl_txl); 5510 DTRACE_PROBE1(udp__xmit__insert, 5511 conn_t *, connp); 5512 } 5513 mutex_exit(&idl_txl->txl_lock); 5514 } 5515 } else { 5516 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5517 putnext(ire->ire_stq, mp); 5518 } 5519 bail: 5520 IRE_REFRELE(ire); 5521 } 5522 5523 static boolean_t 5524 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 5525 { 5526 udp_t *udp = Q_TO_UDP(wq); 5527 int err; 5528 cred_t *cred; 5529 cred_t *orig_cred; 5530 cred_t *effective_cred = NULL; 5531 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5532 udp_stack_t *us = udp->udp_us; 5533 5534 /* 5535 * All Solaris components should pass a db_credp 5536 * for this message, hence we ASSERT. 5537 * On production kernels we return an error to be robust against 5538 * random streams modules sitting on top of us. 5539 */ 5540 cred = orig_cred = msg_getcred(mp, NULL); 5541 ASSERT(cred != NULL); 5542 if (cred == NULL) 5543 return (EINVAL); 5544 5545 /* 5546 * Verify the destination is allowed to receive packets at 5547 * the security label of the message data. tsol_check_dest() 5548 * may create a new effective cred for this message with a 5549 * modified label or label flags. Note that we use the 5550 * cred/label from the message to handle MLP. 5551 */ 5552 if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, 5553 udp->udp_connp->conn_mac_exempt, &effective_cred)) != 0) 5554 goto done; 5555 if (effective_cred != NULL) 5556 cred = effective_cred; 5557 5558 /* 5559 * Calculate the security label to be placed in the text 5560 * of the message (if any). 5561 */ 5562 if ((err = tsol_compute_label_v6(cred, dst, opt_storage, 5563 us->us_netstack->netstack_ip)) != 0) 5564 goto done; 5565 5566 /* 5567 * Insert the security label in the cached ip options, 5568 * removing any old label that may exist. 5569 */ 5570 if ((err = tsol_update_sticky(&udp->udp_sticky_ipp, 5571 &udp->udp_label_len_v6, opt_storage)) != 0) 5572 goto done; 5573 5574 /* 5575 * Save the destination address and cred we used to 5576 * generate the security label text. 5577 */ 5578 if (cred != udp->udp_effective_cred) { 5579 if (udp->udp_effective_cred != NULL) 5580 crfree(udp->udp_effective_cred); 5581 crhold(cred); 5582 udp->udp_effective_cred = cred; 5583 } 5584 if (orig_cred != udp->udp_last_cred) { 5585 if (udp->udp_last_cred != NULL) 5586 crfree(udp->udp_last_cred); 5587 crhold(orig_cred); 5588 udp->udp_last_cred = orig_cred; 5589 } 5590 5591 done: 5592 if (effective_cred != NULL) 5593 crfree(effective_cred); 5594 5595 if (err != 0) { 5596 DTRACE_PROBE4( 5597 tx__ip__log__drop__updatelabel__udp6, 5598 char *, "queue(1) failed to update options(2) on mp(3)", 5599 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5600 } 5601 return (err); 5602 } 5603 5604 static int 5605 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5606 pid_t pid) 5607 { 5608 udp_t *udp = connp->conn_udp; 5609 udp_stack_t *us = udp->udp_us; 5610 ipaddr_t v4dst; 5611 in_port_t dstport; 5612 boolean_t mapped_addr; 5613 struct sockaddr_storage ss; 5614 sin_t *sin; 5615 sin6_t *sin6; 5616 struct sockaddr *addr; 5617 socklen_t addrlen; 5618 int error; 5619 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5620 5621 /* M_DATA for connected socket */ 5622 5623 ASSERT(udp->udp_issocket); 5624 UDP_DBGSTAT(us, udp_data_conn); 5625 5626 mutex_enter(&connp->conn_lock); 5627 if (udp->udp_state != TS_DATA_XFER) { 5628 mutex_exit(&connp->conn_lock); 5629 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5630 UDP_STAT(us, udp_out_err_notconn); 5631 freemsg(mp); 5632 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5633 "udp_wput_end: connp %p (%S)", connp, 5634 "not-connected; address required"); 5635 return (EDESTADDRREQ); 5636 } 5637 5638 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5639 if (mapped_addr) 5640 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5641 5642 /* Initialize addr and addrlen as if they're passed in */ 5643 if (udp->udp_family == AF_INET) { 5644 sin = (sin_t *)&ss; 5645 sin->sin_family = AF_INET; 5646 dstport = sin->sin_port = udp->udp_dstport; 5647 ASSERT(mapped_addr); 5648 sin->sin_addr.s_addr = v4dst; 5649 addr = (struct sockaddr *)sin; 5650 addrlen = sizeof (*sin); 5651 } else { 5652 sin6 = (sin6_t *)&ss; 5653 sin6->sin6_family = AF_INET6; 5654 dstport = sin6->sin6_port = udp->udp_dstport; 5655 sin6->sin6_flowinfo = udp->udp_flowinfo; 5656 sin6->sin6_addr = udp->udp_v6dst; 5657 sin6->sin6_scope_id = 0; 5658 sin6->__sin6_src_id = 0; 5659 addr = (struct sockaddr *)sin6; 5660 addrlen = sizeof (*sin6); 5661 } 5662 mutex_exit(&connp->conn_lock); 5663 5664 if (mapped_addr) { 5665 /* 5666 * Handle both AF_INET and AF_INET6; the latter 5667 * for IPV4 mapped destination addresses. Note 5668 * here that both addr and addrlen point to the 5669 * corresponding struct depending on the address 5670 * family of the socket. 5671 */ 5672 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5673 insert_spi, msg, cr, pid); 5674 } else { 5675 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5676 } 5677 if (error == 0) { 5678 ASSERT(mp == NULL); 5679 return (0); 5680 } 5681 5682 UDP_STAT(us, udp_out_err_output); 5683 ASSERT(mp != NULL); 5684 if (IPCL_IS_NONSTR(connp)) { 5685 freemsg(mp); 5686 return (error); 5687 } else { 5688 /* mp is freed by the following routine */ 5689 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5690 (t_scalar_t)addrlen, (t_scalar_t)error); 5691 return (0); 5692 } 5693 } 5694 5695 /* ARGSUSED */ 5696 static int 5697 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5698 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5699 { 5700 5701 udp_t *udp = connp->conn_udp; 5702 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5703 int error = 0; 5704 sin6_t *sin6; 5705 sin_t *sin; 5706 uint_t srcid; 5707 uint16_t port; 5708 ipaddr_t v4dst; 5709 5710 5711 ASSERT(addr != NULL); 5712 5713 switch (udp->udp_family) { 5714 case AF_INET6: 5715 sin6 = (sin6_t *)addr; 5716 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5717 /* 5718 * Destination is a non-IPv4-compatible IPv6 address. 5719 * Send out an IPv6 format packet. 5720 */ 5721 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5722 pid); 5723 if (error != 0) 5724 goto ud_error; 5725 5726 return (0); 5727 } 5728 /* 5729 * If the local address is not zero or a mapped address 5730 * return an error. It would be possible to send an IPv4 5731 * packet but the response would never make it back to the 5732 * application since it is bound to a non-mapped address. 5733 */ 5734 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5735 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5736 error = EADDRNOTAVAIL; 5737 goto ud_error; 5738 } 5739 /* Send IPv4 packet without modifying udp_ipversion */ 5740 /* Extract port and ipaddr */ 5741 port = sin6->sin6_port; 5742 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5743 srcid = sin6->__sin6_src_id; 5744 break; 5745 5746 case AF_INET: 5747 sin = (sin_t *)addr; 5748 /* Extract port and ipaddr */ 5749 port = sin->sin_port; 5750 v4dst = sin->sin_addr.s_addr; 5751 srcid = 0; 5752 break; 5753 } 5754 5755 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5756 msg, cr, pid); 5757 5758 if (error == 0) { 5759 ASSERT(mp == NULL); 5760 return (0); 5761 } 5762 5763 ud_error: 5764 ASSERT(mp != NULL); 5765 5766 return (error); 5767 } 5768 5769 /* 5770 * This routine handles all messages passed downstream. It either 5771 * consumes the message or passes it downstream; it never queues a 5772 * a message. 5773 * 5774 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5775 * is valid when we are directly beneath the stream head, and thus sockfs 5776 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5777 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5778 * connected endpoints. 5779 */ 5780 void 5781 udp_wput(queue_t *q, mblk_t *mp) 5782 { 5783 conn_t *connp = Q_TO_CONN(q); 5784 udp_t *udp = connp->conn_udp; 5785 int error = 0; 5786 struct sockaddr *addr; 5787 socklen_t addrlen; 5788 udp_stack_t *us = udp->udp_us; 5789 5790 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5791 "udp_wput_start: queue %p mp %p", q, mp); 5792 5793 /* 5794 * We directly handle several cases here: T_UNITDATA_REQ message 5795 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5796 * socket. 5797 */ 5798 switch (DB_TYPE(mp)) { 5799 case M_DATA: 5800 /* 5801 * Quick check for error cases. Checks will be done again 5802 * under the lock later on 5803 */ 5804 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 5805 /* Not connected; address is required */ 5806 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5807 UDP_STAT(us, udp_out_err_notconn); 5808 freemsg(mp); 5809 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5810 "udp_wput_end: connp %p (%S)", connp, 5811 "not-connected; address required"); 5812 return; 5813 } 5814 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5815 return; 5816 5817 case M_PROTO: 5818 case M_PCPROTO: { 5819 struct T_unitdata_req *tudr; 5820 5821 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5822 tudr = (struct T_unitdata_req *)mp->b_rptr; 5823 5824 /* Handle valid T_UNITDATA_REQ here */ 5825 if (MBLKL(mp) >= sizeof (*tudr) && 5826 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5827 if (mp->b_cont == NULL) { 5828 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5829 "udp_wput_end: q %p (%S)", q, "badaddr"); 5830 error = EPROTO; 5831 goto ud_error; 5832 } 5833 5834 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5835 tudr->DEST_length)) { 5836 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5837 "udp_wput_end: q %p (%S)", q, "badaddr"); 5838 error = EADDRNOTAVAIL; 5839 goto ud_error; 5840 } 5841 /* 5842 * If a port has not been bound to the stream, fail. 5843 * This is not a problem when sockfs is directly 5844 * above us, because it will ensure that the socket 5845 * is first bound before allowing data to be sent. 5846 */ 5847 if (udp->udp_state == TS_UNBND) { 5848 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5849 "udp_wput_end: q %p (%S)", q, "outstate"); 5850 error = EPROTO; 5851 goto ud_error; 5852 } 5853 addr = (struct sockaddr *) 5854 &mp->b_rptr[tudr->DEST_offset]; 5855 addrlen = tudr->DEST_length; 5856 if (tudr->OPT_length != 0) 5857 UDP_STAT(us, udp_out_opt); 5858 break; 5859 } 5860 /* FALLTHRU */ 5861 } 5862 default: 5863 udp_wput_other(q, mp); 5864 return; 5865 } 5866 ASSERT(addr != NULL); 5867 5868 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5869 -1); 5870 if (error != 0) { 5871 ud_error: 5872 UDP_STAT(us, udp_out_err_output); 5873 ASSERT(mp != NULL); 5874 /* mp is freed by the following routine */ 5875 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5876 (t_scalar_t)error); 5877 } 5878 } 5879 5880 /* ARGSUSED */ 5881 static void 5882 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5883 { 5884 #ifdef DEBUG 5885 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5886 #endif 5887 freemsg(mp); 5888 } 5889 5890 5891 /* 5892 * udp_output_v6(): 5893 * Assumes that udp_wput did some sanity checking on the destination 5894 * address. 5895 */ 5896 static mblk_t * 5897 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5898 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5899 { 5900 ip6_t *ip6h; 5901 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5902 mblk_t *mp1 = mp; 5903 mblk_t *mp2; 5904 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5905 size_t ip_len; 5906 udpha_t *udph; 5907 udp_t *udp = connp->conn_udp; 5908 udp_stack_t *us = udp->udp_us; 5909 queue_t *q = connp->conn_wq; 5910 ip6_pkt_t ipp_s; /* For ancillary data options */ 5911 ip6_pkt_t *ipp = &ipp_s; 5912 ip6_pkt_t *tipp; /* temporary ipp */ 5913 uint32_t csum = 0; 5914 uint_t ignore = 0; 5915 uint_t option_exists = 0, is_sticky = 0; 5916 uint8_t *cp; 5917 uint8_t *nxthdr_ptr; 5918 in6_addr_t ip6_dst; 5919 in_port_t port; 5920 udpattrs_t attrs; 5921 boolean_t opt_present; 5922 ip6_hbh_t *hopoptsptr = NULL; 5923 uint_t hopoptslen = 0; 5924 boolean_t is_ancillary = B_FALSE; 5925 size_t sth_wroff = 0; 5926 ire_t *ire; 5927 boolean_t update_lastdst = B_FALSE; 5928 5929 *error = 0; 5930 5931 /* 5932 * If the local address is a mapped address return 5933 * an error. 5934 * It would be possible to send an IPv6 packet but the 5935 * response would never make it back to the application 5936 * since it is bound to a mapped address. 5937 */ 5938 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5939 *error = EADDRNOTAVAIL; 5940 goto done; 5941 } 5942 5943 ipp->ipp_fields = 0; 5944 ipp->ipp_sticky_ignored = 0; 5945 5946 /* 5947 * If TPI options passed in, feed it for verification and handling 5948 */ 5949 attrs.udpattr_credset = B_FALSE; 5950 opt_present = B_FALSE; 5951 if (IPCL_IS_NONSTR(connp)) { 5952 if (msg->msg_controllen != 0) { 5953 attrs.udpattr_ipp6 = ipp; 5954 attrs.udpattr_mb = mp; 5955 5956 rw_enter(&udp->udp_rwlock, RW_WRITER); 5957 *error = process_auxiliary_options(connp, 5958 msg->msg_control, msg->msg_controllen, 5959 &attrs, &udp_opt_obj, udp_opt_set, cr); 5960 rw_exit(&udp->udp_rwlock); 5961 if (*error) 5962 goto done; 5963 ASSERT(*error == 0); 5964 opt_present = B_TRUE; 5965 } 5966 } else { 5967 if (DB_TYPE(mp) != M_DATA) { 5968 mp1 = mp->b_cont; 5969 if (((struct T_unitdata_req *) 5970 mp->b_rptr)->OPT_length != 0) { 5971 attrs.udpattr_ipp6 = ipp; 5972 attrs.udpattr_mb = mp; 5973 if (udp_unitdata_opt_process(q, mp, error, 5974 &attrs) < 0) { 5975 goto done; 5976 } 5977 ASSERT(*error == 0); 5978 opt_present = B_TRUE; 5979 } 5980 } 5981 } 5982 5983 /* 5984 * Determine whether we need to mark the mblk with the user's 5985 * credentials. 5986 * If labeled then sockfs would have already done this. 5987 */ 5988 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5989 ire = connp->conn_ire_cache; 5990 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 5991 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 5992 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 5993 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 5994 mblk_setcred(mp, cr, pid); 5995 } 5996 5997 rw_enter(&udp->udp_rwlock, RW_READER); 5998 ignore = ipp->ipp_sticky_ignored; 5999 6000 /* mp1 points to the M_DATA mblk carrying the packet */ 6001 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6002 6003 if (sin6->sin6_scope_id != 0 && 6004 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6005 /* 6006 * IPPF_SCOPE_ID is special. It's neither a sticky 6007 * option nor ancillary data. It needs to be 6008 * explicitly set in options_exists. 6009 */ 6010 option_exists |= IPPF_SCOPE_ID; 6011 } 6012 6013 /* 6014 * Compute the destination address 6015 */ 6016 ip6_dst = sin6->sin6_addr; 6017 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6018 ip6_dst = ipv6_loopback; 6019 6020 port = sin6->sin6_port; 6021 6022 /* 6023 * Cluster and TSOL notes, Cluster check: 6024 * see comments in udp_output_v4(). 6025 */ 6026 mutex_enter(&connp->conn_lock); 6027 6028 if (cl_inet_connect2 != NULL && 6029 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6030 port != udp->udp_lastdstport)) { 6031 mutex_exit(&connp->conn_lock); 6032 *error = 0; 6033 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6034 if (*error != 0) { 6035 *error = EHOSTUNREACH; 6036 rw_exit(&udp->udp_rwlock); 6037 goto done; 6038 } 6039 update_lastdst = B_TRUE; 6040 mutex_enter(&connp->conn_lock); 6041 } 6042 6043 /* 6044 * If we're not going to the same destination as last time, then 6045 * recompute the label required. This is done in a separate routine to 6046 * avoid blowing up our stack here. 6047 * 6048 * TSOL Note: Since we are not in WRITER mode, UDP packets 6049 * to different destination may require different labels, 6050 * or worse, UDP packets to same IP address may require 6051 * different labels due to use of shared all-zones address. 6052 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6053 * and sticky ipp_hopoptslen are consistent for the current 6054 * destination and are updated atomically. 6055 */ 6056 if (is_system_labeled()) { 6057 cred_t *credp; 6058 pid_t cpid; 6059 6060 /* Using UDP MLP requires SCM_UCRED from user */ 6061 if (connp->conn_mlp_type != mlptSingle && 6062 !attrs.udpattr_credset) { 6063 DTRACE_PROBE4( 6064 tx__ip__log__info__output__udp6, 6065 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6066 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6067 *error = EINVAL; 6068 rw_exit(&udp->udp_rwlock); 6069 mutex_exit(&connp->conn_lock); 6070 goto done; 6071 } 6072 /* 6073 * update label option for this UDP socket if 6074 * - the destination has changed, 6075 * - the UDP socket is MLP, or 6076 * - the cred attached to the mblk changed. 6077 */ 6078 credp = msg_getcred(mp, &cpid); 6079 if (opt_present || 6080 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6081 connp->conn_mlp_type != mlptSingle || 6082 credp != udp->udp_last_cred) { 6083 if ((*error = udp_update_label_v6(q, mp, &ip6_dst)) 6084 != 0) { 6085 rw_exit(&udp->udp_rwlock); 6086 mutex_exit(&connp->conn_lock); 6087 goto done; 6088 } 6089 update_lastdst = B_TRUE; 6090 } 6091 /* 6092 * Attach the effective cred to the mblk to ensure future 6093 * routing decisions will be based on it's label. 6094 */ 6095 mblk_setcred(mp, udp->udp_effective_cred, cpid); 6096 } 6097 6098 if (update_lastdst) { 6099 udp->udp_v6lastdst = ip6_dst; 6100 udp->udp_lastdstport = port; 6101 } 6102 6103 /* 6104 * If there's a security label here, then we ignore any options the 6105 * user may try to set. We keep the peer's label as a hidden sticky 6106 * option. We make a private copy of this label before releasing the 6107 * lock so that label is kept consistent with the destination addr. 6108 */ 6109 if (udp->udp_label_len_v6 > 0) { 6110 ignore &= ~IPPF_HOPOPTS; 6111 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6112 } 6113 6114 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6115 /* No sticky options nor ancillary data. */ 6116 mutex_exit(&connp->conn_lock); 6117 goto no_options; 6118 } 6119 6120 /* 6121 * Go through the options figuring out where each is going to 6122 * come from and build two masks. The first mask indicates if 6123 * the option exists at all. The second mask indicates if the 6124 * option is sticky or ancillary. 6125 */ 6126 if (!(ignore & IPPF_HOPOPTS)) { 6127 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6128 option_exists |= IPPF_HOPOPTS; 6129 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6130 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6131 option_exists |= IPPF_HOPOPTS; 6132 is_sticky |= IPPF_HOPOPTS; 6133 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6134 hopoptsptr = kmem_alloc( 6135 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6136 if (hopoptsptr == NULL) { 6137 *error = ENOMEM; 6138 mutex_exit(&connp->conn_lock); 6139 goto done; 6140 } 6141 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6142 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6143 hopoptslen); 6144 udp_ip_hdr_len += hopoptslen; 6145 } 6146 } 6147 mutex_exit(&connp->conn_lock); 6148 6149 if (!(ignore & IPPF_RTHDR)) { 6150 if (ipp->ipp_fields & IPPF_RTHDR) { 6151 option_exists |= IPPF_RTHDR; 6152 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6153 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6154 option_exists |= IPPF_RTHDR; 6155 is_sticky |= IPPF_RTHDR; 6156 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6157 } 6158 } 6159 6160 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6161 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6162 option_exists |= IPPF_RTDSTOPTS; 6163 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6164 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6165 option_exists |= IPPF_RTDSTOPTS; 6166 is_sticky |= IPPF_RTDSTOPTS; 6167 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6168 } 6169 } 6170 6171 if (!(ignore & IPPF_DSTOPTS)) { 6172 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6173 option_exists |= IPPF_DSTOPTS; 6174 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6175 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6176 option_exists |= IPPF_DSTOPTS; 6177 is_sticky |= IPPF_DSTOPTS; 6178 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6179 } 6180 } 6181 6182 if (!(ignore & IPPF_IFINDEX)) { 6183 if (ipp->ipp_fields & IPPF_IFINDEX) { 6184 option_exists |= IPPF_IFINDEX; 6185 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6186 option_exists |= IPPF_IFINDEX; 6187 is_sticky |= IPPF_IFINDEX; 6188 } 6189 } 6190 6191 if (!(ignore & IPPF_ADDR)) { 6192 if (ipp->ipp_fields & IPPF_ADDR) { 6193 option_exists |= IPPF_ADDR; 6194 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6195 option_exists |= IPPF_ADDR; 6196 is_sticky |= IPPF_ADDR; 6197 } 6198 } 6199 6200 if (!(ignore & IPPF_DONTFRAG)) { 6201 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6202 option_exists |= IPPF_DONTFRAG; 6203 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6204 option_exists |= IPPF_DONTFRAG; 6205 is_sticky |= IPPF_DONTFRAG; 6206 } 6207 } 6208 6209 if (!(ignore & IPPF_USE_MIN_MTU)) { 6210 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6211 option_exists |= IPPF_USE_MIN_MTU; 6212 } else if (udp->udp_sticky_ipp.ipp_fields & 6213 IPPF_USE_MIN_MTU) { 6214 option_exists |= IPPF_USE_MIN_MTU; 6215 is_sticky |= IPPF_USE_MIN_MTU; 6216 } 6217 } 6218 6219 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6220 option_exists |= IPPF_HOPLIMIT; 6221 /* IPV6_HOPLIMIT can never be sticky */ 6222 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6223 6224 if (!(ignore & IPPF_UNICAST_HOPS) && 6225 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6226 option_exists |= IPPF_UNICAST_HOPS; 6227 is_sticky |= IPPF_UNICAST_HOPS; 6228 } 6229 6230 if (!(ignore & IPPF_MULTICAST_HOPS) && 6231 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6232 option_exists |= IPPF_MULTICAST_HOPS; 6233 is_sticky |= IPPF_MULTICAST_HOPS; 6234 } 6235 6236 if (!(ignore & IPPF_TCLASS)) { 6237 if (ipp->ipp_fields & IPPF_TCLASS) { 6238 option_exists |= IPPF_TCLASS; 6239 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6240 option_exists |= IPPF_TCLASS; 6241 is_sticky |= IPPF_TCLASS; 6242 } 6243 } 6244 6245 if (!(ignore & IPPF_NEXTHOP) && 6246 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6247 option_exists |= IPPF_NEXTHOP; 6248 is_sticky |= IPPF_NEXTHOP; 6249 } 6250 6251 no_options: 6252 6253 /* 6254 * If any options carried in the ip6i_t were specified, we 6255 * need to account for the ip6i_t in the data we'll be sending 6256 * down. 6257 */ 6258 if (option_exists & IPPF_HAS_IP6I) 6259 udp_ip_hdr_len += sizeof (ip6i_t); 6260 6261 /* check/fix buffer config, setup pointers into it */ 6262 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6263 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6264 !OK_32PTR(ip6h)) { 6265 6266 /* Try to get everything in a single mblk next time */ 6267 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6268 udp->udp_max_hdr_len = udp_ip_hdr_len; 6269 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6270 } 6271 6272 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6273 if (mp2 == NULL) { 6274 *error = ENOMEM; 6275 rw_exit(&udp->udp_rwlock); 6276 goto done; 6277 } 6278 mp2->b_wptr = DB_LIM(mp2); 6279 mp2->b_cont = mp1; 6280 mp1 = mp2; 6281 if (DB_TYPE(mp) != M_DATA) 6282 mp->b_cont = mp1; 6283 else 6284 mp = mp1; 6285 6286 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6287 } 6288 mp1->b_rptr = (unsigned char *)ip6h; 6289 ip6i = (ip6i_t *)ip6h; 6290 6291 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6292 if (option_exists & IPPF_HAS_IP6I) { 6293 ip6h = (ip6_t *)&ip6i[1]; 6294 ip6i->ip6i_flags = 0; 6295 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6296 6297 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6298 if (option_exists & IPPF_SCOPE_ID) { 6299 ip6i->ip6i_flags |= IP6I_IFINDEX; 6300 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6301 } else if (option_exists & IPPF_IFINDEX) { 6302 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6303 ASSERT(tipp->ipp_ifindex != 0); 6304 ip6i->ip6i_flags |= IP6I_IFINDEX; 6305 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6306 } 6307 6308 if (option_exists & IPPF_ADDR) { 6309 /* 6310 * Enable per-packet source address verification if 6311 * IPV6_PKTINFO specified the source address. 6312 * ip6_src is set in the transport's _wput function. 6313 */ 6314 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6315 } 6316 6317 if (option_exists & IPPF_DONTFRAG) { 6318 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6319 } 6320 6321 if (option_exists & IPPF_USE_MIN_MTU) { 6322 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6323 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6324 } 6325 6326 if (option_exists & IPPF_NEXTHOP) { 6327 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6328 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6329 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6330 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6331 } 6332 6333 /* 6334 * tell IP this is an ip6i_t private header 6335 */ 6336 ip6i->ip6i_nxt = IPPROTO_RAW; 6337 } 6338 6339 /* Initialize IPv6 header */ 6340 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6341 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6342 6343 /* Set the hoplimit of the outgoing packet. */ 6344 if (option_exists & IPPF_HOPLIMIT) { 6345 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6346 ip6h->ip6_hops = ipp->ipp_hoplimit; 6347 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6348 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6349 ip6h->ip6_hops = udp->udp_multicast_ttl; 6350 if (option_exists & IPPF_MULTICAST_HOPS) 6351 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6352 } else { 6353 ip6h->ip6_hops = udp->udp_ttl; 6354 if (option_exists & IPPF_UNICAST_HOPS) 6355 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6356 } 6357 6358 if (option_exists & IPPF_ADDR) { 6359 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6360 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6361 ip6h->ip6_src = tipp->ipp_addr; 6362 } else { 6363 /* 6364 * The source address was not set using IPV6_PKTINFO. 6365 * First look at the bound source. 6366 * If unspecified fallback to __sin6_src_id. 6367 */ 6368 ip6h->ip6_src = udp->udp_v6src; 6369 if (sin6->__sin6_src_id != 0 && 6370 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6371 ip_srcid_find_id(sin6->__sin6_src_id, 6372 &ip6h->ip6_src, connp->conn_zoneid, 6373 us->us_netstack); 6374 } 6375 } 6376 6377 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6378 cp = (uint8_t *)&ip6h[1]; 6379 6380 /* 6381 * Here's where we have to start stringing together 6382 * any extension headers in the right order: 6383 * Hop-by-hop, destination, routing, and final destination opts. 6384 */ 6385 if (option_exists & IPPF_HOPOPTS) { 6386 /* Hop-by-hop options */ 6387 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6388 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6389 if (hopoptslen == 0) { 6390 hopoptsptr = tipp->ipp_hopopts; 6391 hopoptslen = tipp->ipp_hopoptslen; 6392 is_ancillary = B_TRUE; 6393 } 6394 6395 *nxthdr_ptr = IPPROTO_HOPOPTS; 6396 nxthdr_ptr = &hbh->ip6h_nxt; 6397 6398 bcopy(hopoptsptr, cp, hopoptslen); 6399 cp += hopoptslen; 6400 6401 if (hopoptsptr != NULL && !is_ancillary) { 6402 kmem_free(hopoptsptr, hopoptslen); 6403 hopoptsptr = NULL; 6404 hopoptslen = 0; 6405 } 6406 } 6407 /* 6408 * En-route destination options 6409 * Only do them if there's a routing header as well 6410 */ 6411 if (option_exists & IPPF_RTDSTOPTS) { 6412 ip6_dest_t *dst = (ip6_dest_t *)cp; 6413 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6414 6415 *nxthdr_ptr = IPPROTO_DSTOPTS; 6416 nxthdr_ptr = &dst->ip6d_nxt; 6417 6418 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6419 cp += tipp->ipp_rtdstoptslen; 6420 } 6421 /* 6422 * Routing header next 6423 */ 6424 if (option_exists & IPPF_RTHDR) { 6425 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6426 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6427 6428 *nxthdr_ptr = IPPROTO_ROUTING; 6429 nxthdr_ptr = &rt->ip6r_nxt; 6430 6431 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6432 cp += tipp->ipp_rthdrlen; 6433 } 6434 /* 6435 * Do ultimate destination options 6436 */ 6437 if (option_exists & IPPF_DSTOPTS) { 6438 ip6_dest_t *dest = (ip6_dest_t *)cp; 6439 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6440 6441 *nxthdr_ptr = IPPROTO_DSTOPTS; 6442 nxthdr_ptr = &dest->ip6d_nxt; 6443 6444 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6445 cp += tipp->ipp_dstoptslen; 6446 } 6447 /* 6448 * Now set the last header pointer to the proto passed in 6449 */ 6450 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6451 *nxthdr_ptr = IPPROTO_UDP; 6452 6453 /* Update UDP header */ 6454 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6455 udph->uha_dst_port = sin6->sin6_port; 6456 udph->uha_src_port = udp->udp_port; 6457 6458 /* 6459 * Copy in the destination address 6460 */ 6461 ip6h->ip6_dst = ip6_dst; 6462 6463 ip6h->ip6_vcf = 6464 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6465 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6466 6467 if (option_exists & IPPF_TCLASS) { 6468 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6469 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6470 tipp->ipp_tclass); 6471 } 6472 rw_exit(&udp->udp_rwlock); 6473 6474 if (option_exists & IPPF_RTHDR) { 6475 ip6_rthdr_t *rth; 6476 6477 /* 6478 * Perform any processing needed for source routing. 6479 * We know that all extension headers will be in the same mblk 6480 * as the IPv6 header. 6481 */ 6482 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6483 if (rth != NULL && rth->ip6r_segleft != 0) { 6484 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6485 /* 6486 * Drop packet - only support Type 0 routing. 6487 * Notify the application as well. 6488 */ 6489 *error = EPROTO; 6490 goto done; 6491 } 6492 6493 /* 6494 * rth->ip6r_len is twice the number of 6495 * addresses in the header. Thus it must be even. 6496 */ 6497 if (rth->ip6r_len & 0x1) { 6498 *error = EPROTO; 6499 goto done; 6500 } 6501 /* 6502 * Shuffle the routing header and ip6_dst 6503 * addresses, and get the checksum difference 6504 * between the first hop (in ip6_dst) and 6505 * the destination (in the last routing hdr entry). 6506 */ 6507 csum = ip_massage_options_v6(ip6h, rth, 6508 us->us_netstack); 6509 /* 6510 * Verify that the first hop isn't a mapped address. 6511 * Routers along the path need to do this verification 6512 * for subsequent hops. 6513 */ 6514 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6515 *error = EADDRNOTAVAIL; 6516 goto done; 6517 } 6518 6519 cp += (rth->ip6r_len + 1)*8; 6520 } 6521 } 6522 6523 /* count up length of UDP packet */ 6524 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6525 if ((mp2 = mp1->b_cont) != NULL) { 6526 do { 6527 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6528 ip_len += (uint32_t)MBLKL(mp2); 6529 } while ((mp2 = mp2->b_cont) != NULL); 6530 } 6531 6532 /* 6533 * If the size of the packet is greater than the maximum allowed by 6534 * ip, return an error. Passing this down could cause panics because 6535 * the size will have wrapped and be inconsistent with the msg size. 6536 */ 6537 if (ip_len > IP_MAXPACKET) { 6538 *error = EMSGSIZE; 6539 goto done; 6540 } 6541 6542 /* Store the UDP length. Subtract length of extension hdrs */ 6543 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6544 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6545 6546 /* 6547 * We make it easy for IP to include our pseudo header 6548 * by putting our length in uh_checksum, modified (if 6549 * we have a routing header) by the checksum difference 6550 * between the ultimate destination and first hop addresses. 6551 * Note: UDP over IPv6 must always checksum the packet. 6552 */ 6553 csum += udph->uha_length; 6554 csum = (csum & 0xFFFF) + (csum >> 16); 6555 udph->uha_checksum = (uint16_t)csum; 6556 6557 #ifdef _LITTLE_ENDIAN 6558 ip_len = htons(ip_len); 6559 #endif 6560 ip6h->ip6_plen = ip_len; 6561 6562 if (DB_TYPE(mp) != M_DATA) { 6563 cred_t *cr; 6564 pid_t cpid; 6565 6566 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6567 cr = msg_extractcred(mp, &cpid); 6568 if (cr != NULL) { 6569 if (mp1->b_datap->db_credp != NULL) 6570 crfree(mp1->b_datap->db_credp); 6571 mp1->b_datap->db_credp = cr; 6572 mp1->b_datap->db_cpid = cpid; 6573 } 6574 6575 ASSERT(mp != mp1); 6576 freeb(mp); 6577 } 6578 6579 /* mp has been consumed and we'll return success */ 6580 ASSERT(*error == 0); 6581 mp = NULL; 6582 6583 /* We're done. Pass the packet to IP */ 6584 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6585 ip_output_v6(connp, mp1, q, IP_WPUT); 6586 6587 done: 6588 if (sth_wroff != 0) { 6589 (void) proto_set_tx_wroff(RD(q), connp, 6590 udp->udp_max_hdr_len + us->us_wroff_extra); 6591 } 6592 if (hopoptsptr != NULL && !is_ancillary) { 6593 kmem_free(hopoptsptr, hopoptslen); 6594 hopoptsptr = NULL; 6595 } 6596 if (*error != 0) { 6597 ASSERT(mp != NULL); 6598 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6599 } 6600 return (mp); 6601 } 6602 6603 6604 static int 6605 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6606 { 6607 sin_t *sin = (sin_t *)sa; 6608 sin6_t *sin6 = (sin6_t *)sa; 6609 6610 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6611 6612 if (udp->udp_state != TS_DATA_XFER) 6613 return (ENOTCONN); 6614 6615 switch (udp->udp_family) { 6616 case AF_INET: 6617 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6618 6619 if (*salenp < sizeof (sin_t)) 6620 return (EINVAL); 6621 6622 *salenp = sizeof (sin_t); 6623 *sin = sin_null; 6624 sin->sin_family = AF_INET; 6625 sin->sin_port = udp->udp_dstport; 6626 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6627 break; 6628 6629 case AF_INET6: 6630 if (*salenp < sizeof (sin6_t)) 6631 return (EINVAL); 6632 6633 *salenp = sizeof (sin6_t); 6634 *sin6 = sin6_null; 6635 sin6->sin6_family = AF_INET6; 6636 sin6->sin6_port = udp->udp_dstport; 6637 sin6->sin6_addr = udp->udp_v6dst; 6638 sin6->sin6_flowinfo = udp->udp_flowinfo; 6639 break; 6640 } 6641 6642 return (0); 6643 } 6644 6645 static int 6646 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6647 { 6648 sin_t *sin = (sin_t *)sa; 6649 sin6_t *sin6 = (sin6_t *)sa; 6650 6651 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6652 6653 switch (udp->udp_family) { 6654 case AF_INET: 6655 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6656 6657 if (*salenp < sizeof (sin_t)) 6658 return (EINVAL); 6659 6660 *salenp = sizeof (sin_t); 6661 *sin = sin_null; 6662 sin->sin_family = AF_INET; 6663 sin->sin_port = udp->udp_port; 6664 6665 /* 6666 * If udp_v6src is unspecified, we might be bound to broadcast 6667 * / multicast. Use udp_bound_v6src as local address instead 6668 * (that could also still be unspecified). 6669 */ 6670 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6671 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6672 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6673 } else { 6674 sin->sin_addr.s_addr = 6675 V4_PART_OF_V6(udp->udp_bound_v6src); 6676 } 6677 break; 6678 6679 case AF_INET6: 6680 if (*salenp < sizeof (sin6_t)) 6681 return (EINVAL); 6682 6683 *salenp = sizeof (sin6_t); 6684 *sin6 = sin6_null; 6685 sin6->sin6_family = AF_INET6; 6686 sin6->sin6_port = udp->udp_port; 6687 sin6->sin6_flowinfo = udp->udp_flowinfo; 6688 6689 /* 6690 * If udp_v6src is unspecified, we might be bound to broadcast 6691 * / multicast. Use udp_bound_v6src as local address instead 6692 * (that could also still be unspecified). 6693 */ 6694 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6695 sin6->sin6_addr = udp->udp_v6src; 6696 else 6697 sin6->sin6_addr = udp->udp_bound_v6src; 6698 break; 6699 } 6700 6701 return (0); 6702 } 6703 6704 /* 6705 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6706 */ 6707 static void 6708 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6709 { 6710 void *data; 6711 mblk_t *datamp = mp->b_cont; 6712 udp_t *udp = Q_TO_UDP(q); 6713 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6714 6715 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6716 cmdp->cb_error = EPROTO; 6717 qreply(q, mp); 6718 return; 6719 } 6720 data = datamp->b_rptr; 6721 6722 rw_enter(&udp->udp_rwlock, RW_READER); 6723 switch (cmdp->cb_cmd) { 6724 case TI_GETPEERNAME: 6725 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6726 break; 6727 case TI_GETMYNAME: 6728 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6729 break; 6730 default: 6731 cmdp->cb_error = EINVAL; 6732 break; 6733 } 6734 rw_exit(&udp->udp_rwlock); 6735 6736 qreply(q, mp); 6737 } 6738 6739 static void 6740 udp_use_pure_tpi(udp_t *udp) 6741 { 6742 rw_enter(&udp->udp_rwlock, RW_WRITER); 6743 udp->udp_issocket = B_FALSE; 6744 rw_exit(&udp->udp_rwlock); 6745 6746 UDP_STAT(udp->udp_us, udp_sock_fallback); 6747 } 6748 6749 static void 6750 udp_wput_other(queue_t *q, mblk_t *mp) 6751 { 6752 uchar_t *rptr = mp->b_rptr; 6753 struct datab *db; 6754 struct iocblk *iocp; 6755 cred_t *cr; 6756 conn_t *connp = Q_TO_CONN(q); 6757 udp_t *udp = connp->conn_udp; 6758 udp_stack_t *us; 6759 6760 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6761 "udp_wput_other_start: q %p", q); 6762 6763 us = udp->udp_us; 6764 db = mp->b_datap; 6765 6766 switch (db->db_type) { 6767 case M_CMD: 6768 udp_wput_cmdblk(q, mp); 6769 return; 6770 6771 case M_PROTO: 6772 case M_PCPROTO: 6773 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6774 freemsg(mp); 6775 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6776 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6777 return; 6778 } 6779 switch (((t_primp_t)rptr)->type) { 6780 case T_ADDR_REQ: 6781 udp_addr_req(q, mp); 6782 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6783 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6784 return; 6785 case O_T_BIND_REQ: 6786 case T_BIND_REQ: 6787 udp_tpi_bind(q, mp); 6788 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6789 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6790 return; 6791 case T_CONN_REQ: 6792 udp_tpi_connect(q, mp); 6793 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6794 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6795 return; 6796 case T_CAPABILITY_REQ: 6797 udp_capability_req(q, mp); 6798 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6799 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6800 return; 6801 case T_INFO_REQ: 6802 udp_info_req(q, mp); 6803 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6804 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6805 return; 6806 case T_UNITDATA_REQ: 6807 /* 6808 * If a T_UNITDATA_REQ gets here, the address must 6809 * be bad. Valid T_UNITDATA_REQs are handled 6810 * in udp_wput. 6811 */ 6812 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6813 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6814 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6815 return; 6816 case T_UNBIND_REQ: 6817 udp_tpi_unbind(q, mp); 6818 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6819 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6820 return; 6821 case T_SVR4_OPTMGMT_REQ: 6822 /* 6823 * All Solaris components should pass a db_credp 6824 * for this TPI message, hence we ASSERT. 6825 * But in case there is some other M_PROTO that looks 6826 * like a TPI message sent by some other kernel 6827 * component, we check and return an error. 6828 */ 6829 cr = msg_getcred(mp, NULL); 6830 ASSERT(cr != NULL); 6831 if (cr == NULL) { 6832 udp_err_ack(q, mp, TSYSERR, EINVAL); 6833 return; 6834 } 6835 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6836 cr)) { 6837 (void) svr4_optcom_req(q, 6838 mp, cr, &udp_opt_obj, B_TRUE); 6839 } 6840 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6841 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6842 return; 6843 6844 case T_OPTMGMT_REQ: 6845 /* 6846 * All Solaris components should pass a db_credp 6847 * for this TPI message, hence we ASSERT. 6848 * But in case there is some other M_PROTO that looks 6849 * like a TPI message sent by some other kernel 6850 * component, we check and return an error. 6851 */ 6852 cr = msg_getcred(mp, NULL); 6853 ASSERT(cr != NULL); 6854 if (cr == NULL) { 6855 udp_err_ack(q, mp, TSYSERR, EINVAL); 6856 return; 6857 } 6858 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6859 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6860 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6861 return; 6862 6863 case T_DISCON_REQ: 6864 udp_tpi_disconnect(q, mp); 6865 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6866 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6867 return; 6868 6869 /* The following TPI message is not supported by udp. */ 6870 case O_T_CONN_RES: 6871 case T_CONN_RES: 6872 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6873 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6874 "udp_wput_other_end: q %p (%S)", q, 6875 "connres/disconreq"); 6876 return; 6877 6878 /* The following 3 TPI messages are illegal for udp. */ 6879 case T_DATA_REQ: 6880 case T_EXDATA_REQ: 6881 case T_ORDREL_REQ: 6882 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6883 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6884 "udp_wput_other_end: q %p (%S)", q, 6885 "data/exdata/ordrel"); 6886 return; 6887 default: 6888 break; 6889 } 6890 break; 6891 case M_FLUSH: 6892 if (*rptr & FLUSHW) 6893 flushq(q, FLUSHDATA); 6894 break; 6895 case M_IOCTL: 6896 iocp = (struct iocblk *)mp->b_rptr; 6897 switch (iocp->ioc_cmd) { 6898 case TI_GETPEERNAME: 6899 if (udp->udp_state != TS_DATA_XFER) { 6900 /* 6901 * If a default destination address has not 6902 * been associated with the stream, then we 6903 * don't know the peer's name. 6904 */ 6905 iocp->ioc_error = ENOTCONN; 6906 iocp->ioc_count = 0; 6907 mp->b_datap->db_type = M_IOCACK; 6908 qreply(q, mp); 6909 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6910 "udp_wput_other_end: q %p (%S)", q, 6911 "getpeername"); 6912 return; 6913 } 6914 /* FALLTHRU */ 6915 case TI_GETMYNAME: { 6916 /* 6917 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6918 * need to copyin the user's strbuf structure. 6919 * Processing will continue in the M_IOCDATA case 6920 * below. 6921 */ 6922 mi_copyin(q, mp, NULL, 6923 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6924 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6925 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6926 return; 6927 } 6928 case ND_SET: 6929 /* nd_getset performs the necessary checking */ 6930 case ND_GET: 6931 if (nd_getset(q, us->us_nd, mp)) { 6932 qreply(q, mp); 6933 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6934 "udp_wput_other_end: q %p (%S)", q, "get"); 6935 return; 6936 } 6937 break; 6938 case _SIOCSOCKFALLBACK: 6939 /* 6940 * Either sockmod is about to be popped and the 6941 * socket would now be treated as a plain stream, 6942 * or a module is about to be pushed so we have 6943 * to follow pure TPI semantics. 6944 */ 6945 if (!udp->udp_issocket) { 6946 DB_TYPE(mp) = M_IOCNAK; 6947 iocp->ioc_error = EINVAL; 6948 } else { 6949 udp_use_pure_tpi(udp); 6950 6951 DB_TYPE(mp) = M_IOCACK; 6952 iocp->ioc_error = 0; 6953 } 6954 iocp->ioc_count = 0; 6955 iocp->ioc_rval = 0; 6956 qreply(q, mp); 6957 return; 6958 default: 6959 break; 6960 } 6961 break; 6962 case M_IOCDATA: 6963 udp_wput_iocdata(q, mp); 6964 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6965 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 6966 return; 6967 default: 6968 /* Unrecognized messages are passed through without change. */ 6969 break; 6970 } 6971 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6972 "udp_wput_other_end: q %p (%S)", q, "end"); 6973 ip_output(connp, mp, q, IP_WPUT); 6974 } 6975 6976 /* 6977 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 6978 * messages. 6979 */ 6980 static void 6981 udp_wput_iocdata(queue_t *q, mblk_t *mp) 6982 { 6983 mblk_t *mp1; 6984 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 6985 STRUCT_HANDLE(strbuf, sb); 6986 udp_t *udp = Q_TO_UDP(q); 6987 int error; 6988 uint_t addrlen; 6989 6990 /* Make sure it is one of ours. */ 6991 switch (iocp->ioc_cmd) { 6992 case TI_GETMYNAME: 6993 case TI_GETPEERNAME: 6994 break; 6995 default: 6996 ip_output(udp->udp_connp, mp, q, IP_WPUT); 6997 return; 6998 } 6999 7000 switch (mi_copy_state(q, mp, &mp1)) { 7001 case -1: 7002 return; 7003 case MI_COPY_CASE(MI_COPY_IN, 1): 7004 break; 7005 case MI_COPY_CASE(MI_COPY_OUT, 1): 7006 /* 7007 * The address has been copied out, so now 7008 * copyout the strbuf. 7009 */ 7010 mi_copyout(q, mp); 7011 return; 7012 case MI_COPY_CASE(MI_COPY_OUT, 2): 7013 /* 7014 * The address and strbuf have been copied out. 7015 * We're done, so just acknowledge the original 7016 * M_IOCTL. 7017 */ 7018 mi_copy_done(q, mp, 0); 7019 return; 7020 default: 7021 /* 7022 * Something strange has happened, so acknowledge 7023 * the original M_IOCTL with an EPROTO error. 7024 */ 7025 mi_copy_done(q, mp, EPROTO); 7026 return; 7027 } 7028 7029 /* 7030 * Now we have the strbuf structure for TI_GETMYNAME 7031 * and TI_GETPEERNAME. Next we copyout the requested 7032 * address and then we'll copyout the strbuf. 7033 */ 7034 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7035 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7036 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7037 mi_copy_done(q, mp, EINVAL); 7038 return; 7039 } 7040 7041 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7042 7043 if (mp1 == NULL) 7044 return; 7045 7046 rw_enter(&udp->udp_rwlock, RW_READER); 7047 switch (iocp->ioc_cmd) { 7048 case TI_GETMYNAME: 7049 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7050 break; 7051 case TI_GETPEERNAME: 7052 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7053 break; 7054 } 7055 rw_exit(&udp->udp_rwlock); 7056 7057 if (error != 0) { 7058 mi_copy_done(q, mp, error); 7059 } else { 7060 mp1->b_wptr += addrlen; 7061 STRUCT_FSET(sb, len, addrlen); 7062 7063 /* Copy out the address */ 7064 mi_copyout(q, mp); 7065 } 7066 } 7067 7068 static int 7069 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7070 udpattrs_t *udpattrs) 7071 { 7072 struct T_unitdata_req *udreqp; 7073 int is_absreq_failure; 7074 cred_t *cr; 7075 7076 ASSERT(((t_primp_t)mp->b_rptr)->type); 7077 7078 /* 7079 * All Solaris components should pass a db_credp 7080 * for this TPI message, hence we should ASSERT. 7081 * However, RPC (svc_clts_ksend) does this odd thing where it 7082 * passes the options from a T_UNITDATA_IND unchanged in a 7083 * T_UNITDATA_REQ. While that is the right thing to do for 7084 * some options, SCM_UCRED being the key one, this also makes it 7085 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7086 */ 7087 cr = msg_getcred(mp, NULL); 7088 if (cr == NULL) { 7089 cr = Q_TO_CONN(q)->conn_cred; 7090 } 7091 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7092 7093 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7094 udreqp->OPT_offset, cr, &udp_opt_obj, 7095 udpattrs, &is_absreq_failure); 7096 7097 if (*errorp != 0) { 7098 /* 7099 * Note: No special action needed in this 7100 * module for "is_absreq_failure" 7101 */ 7102 return (-1); /* failure */ 7103 } 7104 ASSERT(is_absreq_failure == 0); 7105 return (0); /* success */ 7106 } 7107 7108 void 7109 udp_ddi_g_init(void) 7110 { 7111 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7112 udp_opt_obj.odb_opt_arr_cnt); 7113 7114 /* 7115 * We want to be informed each time a stack is created or 7116 * destroyed in the kernel, so we can maintain the 7117 * set of udp_stack_t's. 7118 */ 7119 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7120 } 7121 7122 void 7123 udp_ddi_g_destroy(void) 7124 { 7125 netstack_unregister(NS_UDP); 7126 } 7127 7128 #define INET_NAME "ip" 7129 7130 /* 7131 * Initialize the UDP stack instance. 7132 */ 7133 static void * 7134 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7135 { 7136 udp_stack_t *us; 7137 udpparam_t *pa; 7138 int i; 7139 int error = 0; 7140 major_t major; 7141 7142 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7143 us->us_netstack = ns; 7144 7145 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7146 us->us_epriv_ports[0] = 2049; 7147 us->us_epriv_ports[1] = 4045; 7148 7149 /* 7150 * The smallest anonymous port in the priviledged port range which UDP 7151 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7152 */ 7153 us->us_min_anonpriv_port = 512; 7154 7155 us->us_bind_fanout_size = udp_bind_fanout_size; 7156 7157 /* Roundup variable that might have been modified in /etc/system */ 7158 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7159 /* Not a power of two. Round up to nearest power of two */ 7160 for (i = 0; i < 31; i++) { 7161 if (us->us_bind_fanout_size < (1 << i)) 7162 break; 7163 } 7164 us->us_bind_fanout_size = 1 << i; 7165 } 7166 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7167 sizeof (udp_fanout_t), KM_SLEEP); 7168 for (i = 0; i < us->us_bind_fanout_size; i++) { 7169 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7170 NULL); 7171 } 7172 7173 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7174 7175 us->us_param_arr = pa; 7176 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7177 7178 (void) udp_param_register(&us->us_nd, 7179 us->us_param_arr, A_CNT(udp_param_arr)); 7180 7181 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7182 us->us_mibkp = udp_kstat_init(stackid); 7183 7184 major = mod_name_to_major(INET_NAME); 7185 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7186 ASSERT(error == 0); 7187 return (us); 7188 } 7189 7190 /* 7191 * Free the UDP stack instance. 7192 */ 7193 static void 7194 udp_stack_fini(netstackid_t stackid, void *arg) 7195 { 7196 udp_stack_t *us = (udp_stack_t *)arg; 7197 int i; 7198 7199 for (i = 0; i < us->us_bind_fanout_size; i++) { 7200 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7201 } 7202 7203 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7204 sizeof (udp_fanout_t)); 7205 7206 us->us_bind_fanout = NULL; 7207 7208 nd_free(&us->us_nd); 7209 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7210 us->us_param_arr = NULL; 7211 7212 udp_kstat_fini(stackid, us->us_mibkp); 7213 us->us_mibkp = NULL; 7214 7215 udp_kstat2_fini(stackid, us->us_kstat); 7216 us->us_kstat = NULL; 7217 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7218 7219 ldi_ident_release(us->us_ldi_ident); 7220 kmem_free(us, sizeof (*us)); 7221 } 7222 7223 static void * 7224 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7225 { 7226 kstat_t *ksp; 7227 7228 udp_stat_t template = { 7229 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7230 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7231 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7232 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7233 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7234 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7235 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7236 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7237 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7238 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7239 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7240 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7241 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7242 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7243 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7244 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7245 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7246 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7247 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7248 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7249 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7250 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7251 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7252 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7253 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7254 #ifdef DEBUG 7255 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7256 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7257 #endif 7258 }; 7259 7260 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7261 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7262 KSTAT_FLAG_VIRTUAL, stackid); 7263 7264 if (ksp == NULL) 7265 return (NULL); 7266 7267 bcopy(&template, us_statisticsp, sizeof (template)); 7268 ksp->ks_data = (void *)us_statisticsp; 7269 ksp->ks_private = (void *)(uintptr_t)stackid; 7270 7271 kstat_install(ksp); 7272 return (ksp); 7273 } 7274 7275 static void 7276 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7277 { 7278 if (ksp != NULL) { 7279 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7280 kstat_delete_netstack(ksp, stackid); 7281 } 7282 } 7283 7284 static void * 7285 udp_kstat_init(netstackid_t stackid) 7286 { 7287 kstat_t *ksp; 7288 7289 udp_named_kstat_t template = { 7290 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7291 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7292 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7293 { "entrySize", KSTAT_DATA_INT32, 0 }, 7294 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7295 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7296 }; 7297 7298 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7299 KSTAT_TYPE_NAMED, 7300 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7301 7302 if (ksp == NULL || ksp->ks_data == NULL) 7303 return (NULL); 7304 7305 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7306 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7307 7308 bcopy(&template, ksp->ks_data, sizeof (template)); 7309 ksp->ks_update = udp_kstat_update; 7310 ksp->ks_private = (void *)(uintptr_t)stackid; 7311 7312 kstat_install(ksp); 7313 return (ksp); 7314 } 7315 7316 static void 7317 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7318 { 7319 if (ksp != NULL) { 7320 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7321 kstat_delete_netstack(ksp, stackid); 7322 } 7323 } 7324 7325 static int 7326 udp_kstat_update(kstat_t *kp, int rw) 7327 { 7328 udp_named_kstat_t *udpkp; 7329 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7330 netstack_t *ns; 7331 udp_stack_t *us; 7332 7333 if ((kp == NULL) || (kp->ks_data == NULL)) 7334 return (EIO); 7335 7336 if (rw == KSTAT_WRITE) 7337 return (EACCES); 7338 7339 ns = netstack_find_by_stackid(stackid); 7340 if (ns == NULL) 7341 return (-1); 7342 us = ns->netstack_udp; 7343 if (us == NULL) { 7344 netstack_rele(ns); 7345 return (-1); 7346 } 7347 udpkp = (udp_named_kstat_t *)kp->ks_data; 7348 7349 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7350 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7351 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7352 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7353 netstack_rele(ns); 7354 return (0); 7355 } 7356 7357 static size_t 7358 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7359 { 7360 udp_stack_t *us = udp->udp_us; 7361 7362 /* We add a bit of extra buffering */ 7363 size += size >> 1; 7364 if (size > us->us_max_buf) 7365 size = us->us_max_buf; 7366 7367 udp->udp_rcv_hiwat = size; 7368 return (size); 7369 } 7370 7371 /* 7372 * For the lower queue so that UDP can be a dummy mux. 7373 * Nobody should be sending 7374 * packets up this stream 7375 */ 7376 static void 7377 udp_lrput(queue_t *q, mblk_t *mp) 7378 { 7379 mblk_t *mp1; 7380 7381 switch (mp->b_datap->db_type) { 7382 case M_FLUSH: 7383 /* Turn around */ 7384 if (*mp->b_rptr & FLUSHW) { 7385 *mp->b_rptr &= ~FLUSHR; 7386 qreply(q, mp); 7387 return; 7388 } 7389 break; 7390 } 7391 /* Could receive messages that passed through ar_rput */ 7392 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7393 mp1->b_prev = mp1->b_next = NULL; 7394 freemsg(mp); 7395 } 7396 7397 /* 7398 * For the lower queue so that UDP can be a dummy mux. 7399 * Nobody should be sending packets down this stream. 7400 */ 7401 /* ARGSUSED */ 7402 void 7403 udp_lwput(queue_t *q, mblk_t *mp) 7404 { 7405 freemsg(mp); 7406 } 7407 7408 /* 7409 * Below routines for UDP socket module. 7410 */ 7411 7412 static conn_t * 7413 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7414 { 7415 udp_t *udp; 7416 conn_t *connp; 7417 zoneid_t zoneid; 7418 netstack_t *ns; 7419 udp_stack_t *us; 7420 7421 ns = netstack_find_by_cred(credp); 7422 ASSERT(ns != NULL); 7423 us = ns->netstack_udp; 7424 ASSERT(us != NULL); 7425 7426 /* 7427 * For exclusive stacks we set the zoneid to zero 7428 * to make UDP operate as if in the global zone. 7429 */ 7430 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7431 zoneid = GLOBAL_ZONEID; 7432 else 7433 zoneid = crgetzoneid(credp); 7434 7435 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7436 7437 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7438 if (connp == NULL) { 7439 netstack_rele(ns); 7440 return (NULL); 7441 } 7442 udp = connp->conn_udp; 7443 7444 /* 7445 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7446 * done by netstack_find_by_cred() 7447 */ 7448 netstack_rele(ns); 7449 7450 rw_enter(&udp->udp_rwlock, RW_WRITER); 7451 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7452 ASSERT(connp->conn_udp == udp); 7453 ASSERT(udp->udp_connp == connp); 7454 7455 /* Set the initial state of the stream and the privilege status. */ 7456 udp->udp_state = TS_UNBND; 7457 if (isv6) { 7458 udp->udp_family = AF_INET6; 7459 udp->udp_ipversion = IPV6_VERSION; 7460 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7461 udp->udp_ttl = us->us_ipv6_hoplimit; 7462 connp->conn_af_isv6 = B_TRUE; 7463 connp->conn_flags |= IPCL_ISV6; 7464 } else { 7465 udp->udp_family = AF_INET; 7466 udp->udp_ipversion = IPV4_VERSION; 7467 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7468 udp->udp_ttl = us->us_ipv4_ttl; 7469 connp->conn_af_isv6 = B_FALSE; 7470 connp->conn_flags &= ~IPCL_ISV6; 7471 } 7472 7473 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7474 udp->udp_pending_op = -1; 7475 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7476 connp->conn_zoneid = zoneid; 7477 7478 udp->udp_open_time = lbolt64; 7479 udp->udp_open_pid = curproc->p_pid; 7480 7481 /* 7482 * If the caller has the process-wide flag set, then default to MAC 7483 * exempt mode. This allows read-down to unlabeled hosts. 7484 */ 7485 if (getpflags(NET_MAC_AWARE, credp) != 0) 7486 connp->conn_mac_exempt = B_TRUE; 7487 7488 connp->conn_ulp_labeled = is_system_labeled(); 7489 7490 udp->udp_us = us; 7491 7492 connp->conn_recv = udp_input; 7493 crhold(credp); 7494 connp->conn_cred = credp; 7495 7496 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7497 7498 rw_exit(&udp->udp_rwlock); 7499 7500 return (connp); 7501 } 7502 7503 /* ARGSUSED */ 7504 sock_lower_handle_t 7505 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7506 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7507 { 7508 udp_t *udp = NULL; 7509 udp_stack_t *us; 7510 conn_t *connp; 7511 boolean_t isv6; 7512 7513 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7514 (proto != 0 && proto != IPPROTO_UDP)) { 7515 *errorp = EPROTONOSUPPORT; 7516 return (NULL); 7517 } 7518 7519 if (family == AF_INET6) 7520 isv6 = B_TRUE; 7521 else 7522 isv6 = B_FALSE; 7523 7524 connp = udp_do_open(credp, isv6, flags); 7525 if (connp == NULL) { 7526 *errorp = ENOMEM; 7527 return (NULL); 7528 } 7529 7530 udp = connp->conn_udp; 7531 ASSERT(udp != NULL); 7532 us = udp->udp_us; 7533 ASSERT(us != NULL); 7534 7535 udp->udp_issocket = B_TRUE; 7536 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7537 7538 /* Set flow control */ 7539 rw_enter(&udp->udp_rwlock, RW_WRITER); 7540 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7541 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7542 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7543 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7544 udp->udp_xmit_lowat = us->us_xmit_lowat; 7545 7546 if (udp->udp_family == AF_INET6) { 7547 /* Build initial header template for transmit */ 7548 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7549 rw_exit(&udp->udp_rwlock); 7550 ipcl_conn_destroy(connp); 7551 return (NULL); 7552 } 7553 } 7554 rw_exit(&udp->udp_rwlock); 7555 7556 connp->conn_flow_cntrld = B_FALSE; 7557 7558 ASSERT(us->us_ldi_ident != NULL); 7559 7560 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7561 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7562 udp_do_close(connp); 7563 return (NULL); 7564 } 7565 7566 /* Set the send flow control */ 7567 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7568 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7569 7570 mutex_enter(&connp->conn_lock); 7571 connp->conn_state_flags &= ~CONN_INCIPIENT; 7572 mutex_exit(&connp->conn_lock); 7573 7574 *errorp = 0; 7575 *smodep = SM_ATOMIC; 7576 *sock_downcalls = &sock_udp_downcalls; 7577 return ((sock_lower_handle_t)connp); 7578 } 7579 7580 /* ARGSUSED */ 7581 void 7582 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7583 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7584 { 7585 conn_t *connp = (conn_t *)proto_handle; 7586 udp_t *udp = connp->conn_udp; 7587 udp_stack_t *us = udp->udp_us; 7588 struct sock_proto_props sopp; 7589 7590 /* All Solaris components should pass a cred for this operation. */ 7591 ASSERT(cr != NULL); 7592 7593 connp->conn_upcalls = sock_upcalls; 7594 connp->conn_upper_handle = sock_handle; 7595 7596 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7597 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7598 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7599 sopp.sopp_maxblk = INFPSZ; 7600 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7601 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7602 sopp.sopp_maxpsz = 7603 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7604 UDP_MAXPACKET_IPV6; 7605 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7606 udp_mod_info.mi_minpsz; 7607 7608 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7609 &sopp); 7610 } 7611 7612 static void 7613 udp_do_close(conn_t *connp) 7614 { 7615 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7616 7617 udp_quiesce_conn(connp); 7618 ip_quiesce_conn(connp); 7619 7620 if (!IPCL_IS_NONSTR(connp)) { 7621 ASSERT(connp->conn_wq != NULL); 7622 ASSERT(connp->conn_rq != NULL); 7623 qprocsoff(connp->conn_rq); 7624 } 7625 7626 udp_close_free(connp); 7627 7628 /* 7629 * Now we are truly single threaded on this stream, and can 7630 * delete the things hanging off the connp, and finally the connp. 7631 * We removed this connp from the fanout list, it cannot be 7632 * accessed thru the fanouts, and we already waited for the 7633 * conn_ref to drop to 0. We are already in close, so 7634 * there cannot be any other thread from the top. qprocsoff 7635 * has completed, and service has completed or won't run in 7636 * future. 7637 */ 7638 ASSERT(connp->conn_ref == 1); 7639 if (!IPCL_IS_NONSTR(connp)) { 7640 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7641 } else { 7642 ip_free_helper_stream(connp); 7643 } 7644 7645 connp->conn_ref--; 7646 ipcl_conn_destroy(connp); 7647 } 7648 7649 /* ARGSUSED */ 7650 int 7651 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7652 { 7653 conn_t *connp = (conn_t *)proto_handle; 7654 7655 /* All Solaris components should pass a cred for this operation. */ 7656 ASSERT(cr != NULL); 7657 7658 udp_do_close(connp); 7659 return (0); 7660 } 7661 7662 static int 7663 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7664 boolean_t bind_to_req_port_only) 7665 { 7666 sin_t *sin; 7667 sin6_t *sin6; 7668 sin6_t sin6addr; 7669 in_port_t port; /* Host byte order */ 7670 in_port_t requested_port; /* Host byte order */ 7671 int count; 7672 in6_addr_t v6src; 7673 int loopmax; 7674 udp_fanout_t *udpf; 7675 in_port_t lport; /* Network byte order */ 7676 zoneid_t zoneid; 7677 udp_t *udp; 7678 boolean_t is_inaddr_any; 7679 mlp_type_t addrtype, mlptype; 7680 udp_stack_t *us; 7681 int error = 0; 7682 mblk_t *mp = NULL; 7683 7684 udp = connp->conn_udp; 7685 us = udp->udp_us; 7686 7687 if (udp->udp_state != TS_UNBND) { 7688 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7689 "udp_bind: bad state, %u", udp->udp_state); 7690 return (-TOUTSTATE); 7691 } 7692 7693 switch (len) { 7694 case 0: 7695 if (udp->udp_family == AF_INET) { 7696 sin = (sin_t *)&sin6addr; 7697 *sin = sin_null; 7698 sin->sin_family = AF_INET; 7699 sin->sin_addr.s_addr = INADDR_ANY; 7700 udp->udp_ipversion = IPV4_VERSION; 7701 } else { 7702 ASSERT(udp->udp_family == AF_INET6); 7703 sin6 = (sin6_t *)&sin6addr; 7704 *sin6 = sin6_null; 7705 sin6->sin6_family = AF_INET6; 7706 V6_SET_ZERO(sin6->sin6_addr); 7707 udp->udp_ipversion = IPV6_VERSION; 7708 } 7709 port = 0; 7710 break; 7711 7712 case sizeof (sin_t): /* Complete IPv4 address */ 7713 sin = (sin_t *)sa; 7714 7715 if (sin == NULL || !OK_32PTR((char *)sin)) 7716 return (EINVAL); 7717 7718 if (udp->udp_family != AF_INET || 7719 sin->sin_family != AF_INET) { 7720 return (EAFNOSUPPORT); 7721 } 7722 port = ntohs(sin->sin_port); 7723 break; 7724 7725 case sizeof (sin6_t): /* complete IPv6 address */ 7726 sin6 = (sin6_t *)sa; 7727 7728 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 7729 return (EINVAL); 7730 7731 if (udp->udp_family != AF_INET6 || 7732 sin6->sin6_family != AF_INET6) { 7733 return (EAFNOSUPPORT); 7734 } 7735 port = ntohs(sin6->sin6_port); 7736 break; 7737 7738 default: /* Invalid request */ 7739 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7740 "udp_bind: bad ADDR_length length %u", len); 7741 return (-TBADADDR); 7742 } 7743 7744 requested_port = port; 7745 7746 if (requested_port == 0 || !bind_to_req_port_only) 7747 bind_to_req_port_only = B_FALSE; 7748 else /* T_BIND_REQ and requested_port != 0 */ 7749 bind_to_req_port_only = B_TRUE; 7750 7751 if (requested_port == 0) { 7752 /* 7753 * If the application passed in zero for the port number, it 7754 * doesn't care which port number we bind to. Get one in the 7755 * valid range. 7756 */ 7757 if (udp->udp_anon_priv_bind) { 7758 port = udp_get_next_priv_port(udp); 7759 } else { 7760 port = udp_update_next_port(udp, 7761 us->us_next_port_to_try, B_TRUE); 7762 } 7763 } else { 7764 /* 7765 * If the port is in the well-known privileged range, 7766 * make sure the caller was privileged. 7767 */ 7768 int i; 7769 boolean_t priv = B_FALSE; 7770 7771 if (port < us->us_smallest_nonpriv_port) { 7772 priv = B_TRUE; 7773 } else { 7774 for (i = 0; i < us->us_num_epriv_ports; i++) { 7775 if (port == us->us_epriv_ports[i]) { 7776 priv = B_TRUE; 7777 break; 7778 } 7779 } 7780 } 7781 7782 if (priv) { 7783 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 7784 return (-TACCES); 7785 } 7786 } 7787 7788 if (port == 0) 7789 return (-TNOADDR); 7790 7791 /* 7792 * The state must be TS_UNBND. TPI mandates that users must send 7793 * TPI primitives only 1 at a time and wait for the response before 7794 * sending the next primitive. 7795 */ 7796 rw_enter(&udp->udp_rwlock, RW_WRITER); 7797 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 7798 rw_exit(&udp->udp_rwlock); 7799 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7800 "udp_bind: bad state, %u", udp->udp_state); 7801 return (-TOUTSTATE); 7802 } 7803 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 7804 udp->udp_pending_op = T_BIND_REQ; 7805 /* 7806 * Copy the source address into our udp structure. This address 7807 * may still be zero; if so, IP will fill in the correct address 7808 * each time an outbound packet is passed to it. Since the udp is 7809 * not yet in the bind hash list, we don't grab the uf_lock to 7810 * change udp_ipversion 7811 */ 7812 if (udp->udp_family == AF_INET) { 7813 ASSERT(sin != NULL); 7814 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7815 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 7816 udp->udp_ip_snd_options_len; 7817 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 7818 } else { 7819 ASSERT(sin6 != NULL); 7820 v6src = sin6->sin6_addr; 7821 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 7822 /* 7823 * no need to hold the uf_lock to set the udp_ipversion 7824 * since we are not yet in the fanout list 7825 */ 7826 udp->udp_ipversion = IPV4_VERSION; 7827 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 7828 UDPH_SIZE + udp->udp_ip_snd_options_len; 7829 } else { 7830 udp->udp_ipversion = IPV6_VERSION; 7831 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 7832 } 7833 } 7834 7835 /* 7836 * If udp_reuseaddr is not set, then we have to make sure that 7837 * the IP address and port number the application requested 7838 * (or we selected for the application) is not being used by 7839 * another stream. If another stream is already using the 7840 * requested IP address and port, the behavior depends on 7841 * "bind_to_req_port_only". If set the bind fails; otherwise we 7842 * search for any an unused port to bind to the the stream. 7843 * 7844 * As per the BSD semantics, as modified by the Deering multicast 7845 * changes, if udp_reuseaddr is set, then we allow multiple binds 7846 * to the same port independent of the local IP address. 7847 * 7848 * This is slightly different than in SunOS 4.X which did not 7849 * support IP multicast. Note that the change implemented by the 7850 * Deering multicast code effects all binds - not only binding 7851 * to IP multicast addresses. 7852 * 7853 * Note that when binding to port zero we ignore SO_REUSEADDR in 7854 * order to guarantee a unique port. 7855 */ 7856 7857 count = 0; 7858 if (udp->udp_anon_priv_bind) { 7859 /* 7860 * loopmax = (IPPORT_RESERVED-1) - 7861 * us->us_min_anonpriv_port + 1 7862 */ 7863 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 7864 } else { 7865 loopmax = us->us_largest_anon_port - 7866 us->us_smallest_anon_port + 1; 7867 } 7868 7869 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 7870 zoneid = connp->conn_zoneid; 7871 7872 for (;;) { 7873 udp_t *udp1; 7874 boolean_t found_exclbind = B_FALSE; 7875 7876 /* 7877 * Walk through the list of udp streams bound to 7878 * requested port with the same IP address. 7879 */ 7880 lport = htons(port); 7881 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 7882 us->us_bind_fanout_size)]; 7883 mutex_enter(&udpf->uf_lock); 7884 for (udp1 = udpf->uf_udp; udp1 != NULL; 7885 udp1 = udp1->udp_bind_hash) { 7886 if (lport != udp1->udp_port) 7887 continue; 7888 7889 /* 7890 * On a labeled system, we must treat bindings to ports 7891 * on shared IP addresses by sockets with MAC exemption 7892 * privilege as being in all zones, as there's 7893 * otherwise no way to identify the right receiver. 7894 */ 7895 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 7896 IPCL_ZONE_MATCH(connp, 7897 udp1->udp_connp->conn_zoneid)) && 7898 !connp->conn_mac_exempt && \ 7899 !udp1->udp_connp->conn_mac_exempt) 7900 continue; 7901 7902 /* 7903 * If UDP_EXCLBIND is set for either the bound or 7904 * binding endpoint, the semantics of bind 7905 * is changed according to the following chart. 7906 * 7907 * spec = specified address (v4 or v6) 7908 * unspec = unspecified address (v4 or v6) 7909 * A = specified addresses are different for endpoints 7910 * 7911 * bound bind to allowed? 7912 * ------------------------------------- 7913 * unspec unspec no 7914 * unspec spec no 7915 * spec unspec no 7916 * spec spec yes if A 7917 * 7918 * For labeled systems, SO_MAC_EXEMPT behaves the same 7919 * as UDP_EXCLBIND, except that zoneid is ignored. 7920 */ 7921 if (udp1->udp_exclbind || udp->udp_exclbind || 7922 udp1->udp_connp->conn_mac_exempt || 7923 connp->conn_mac_exempt) { 7924 if (V6_OR_V4_INADDR_ANY( 7925 udp1->udp_bound_v6src) || 7926 is_inaddr_any || 7927 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7928 &v6src)) { 7929 found_exclbind = B_TRUE; 7930 break; 7931 } 7932 continue; 7933 } 7934 7935 /* 7936 * Check ipversion to allow IPv4 and IPv6 sockets to 7937 * have disjoint port number spaces. 7938 */ 7939 if (udp->udp_ipversion != udp1->udp_ipversion) { 7940 7941 /* 7942 * On the first time through the loop, if the 7943 * the user intentionally specified a 7944 * particular port number, then ignore any 7945 * bindings of the other protocol that may 7946 * conflict. This allows the user to bind IPv6 7947 * alone and get both v4 and v6, or bind both 7948 * both and get each seperately. On subsequent 7949 * times through the loop, we're checking a 7950 * port that we chose (not the user) and thus 7951 * we do not allow casual duplicate bindings. 7952 */ 7953 if (count == 0 && requested_port != 0) 7954 continue; 7955 } 7956 7957 /* 7958 * No difference depending on SO_REUSEADDR. 7959 * 7960 * If existing port is bound to a 7961 * non-wildcard IP address and 7962 * the requesting stream is bound to 7963 * a distinct different IP addresses 7964 * (non-wildcard, also), keep going. 7965 */ 7966 if (!is_inaddr_any && 7967 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 7968 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7969 &v6src)) { 7970 continue; 7971 } 7972 break; 7973 } 7974 7975 if (!found_exclbind && 7976 (udp->udp_reuseaddr && requested_port != 0)) { 7977 break; 7978 } 7979 7980 if (udp1 == NULL) { 7981 /* 7982 * No other stream has this IP address 7983 * and port number. We can use it. 7984 */ 7985 break; 7986 } 7987 mutex_exit(&udpf->uf_lock); 7988 if (bind_to_req_port_only) { 7989 /* 7990 * We get here only when requested port 7991 * is bound (and only first of the for() 7992 * loop iteration). 7993 * 7994 * The semantics of this bind request 7995 * require it to fail so we return from 7996 * the routine (and exit the loop). 7997 * 7998 */ 7999 udp->udp_pending_op = -1; 8000 rw_exit(&udp->udp_rwlock); 8001 return (-TADDRBUSY); 8002 } 8003 8004 if (udp->udp_anon_priv_bind) { 8005 port = udp_get_next_priv_port(udp); 8006 } else { 8007 if ((count == 0) && (requested_port != 0)) { 8008 /* 8009 * If the application wants us to find 8010 * a port, get one to start with. Set 8011 * requested_port to 0, so that we will 8012 * update us->us_next_port_to_try below. 8013 */ 8014 port = udp_update_next_port(udp, 8015 us->us_next_port_to_try, B_TRUE); 8016 requested_port = 0; 8017 } else { 8018 port = udp_update_next_port(udp, port + 1, 8019 B_FALSE); 8020 } 8021 } 8022 8023 if (port == 0 || ++count >= loopmax) { 8024 /* 8025 * We've tried every possible port number and 8026 * there are none available, so send an error 8027 * to the user. 8028 */ 8029 udp->udp_pending_op = -1; 8030 rw_exit(&udp->udp_rwlock); 8031 return (-TNOADDR); 8032 } 8033 } 8034 8035 /* 8036 * Copy the source address into our udp structure. This address 8037 * may still be zero; if so, ip will fill in the correct address 8038 * each time an outbound packet is passed to it. 8039 * If we are binding to a broadcast or multicast address then 8040 * udp_post_ip_bind_connect will clear the source address 8041 * when udp_do_bind success. 8042 */ 8043 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8044 udp->udp_port = lport; 8045 /* 8046 * Now reset the the next anonymous port if the application requested 8047 * an anonymous port, or we handed out the next anonymous port. 8048 */ 8049 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8050 us->us_next_port_to_try = port + 1; 8051 } 8052 8053 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8054 if (udp->udp_family == AF_INET) { 8055 sin->sin_port = udp->udp_port; 8056 } else { 8057 sin6->sin6_port = udp->udp_port; 8058 /* Rebuild the header template */ 8059 error = udp_build_hdrs(udp); 8060 if (error != 0) { 8061 udp->udp_pending_op = -1; 8062 rw_exit(&udp->udp_rwlock); 8063 mutex_exit(&udpf->uf_lock); 8064 return (error); 8065 } 8066 } 8067 udp->udp_state = TS_IDLE; 8068 udp_bind_hash_insert(udpf, udp); 8069 mutex_exit(&udpf->uf_lock); 8070 rw_exit(&udp->udp_rwlock); 8071 8072 if (cl_inet_bind) { 8073 /* 8074 * Running in cluster mode - register bind information 8075 */ 8076 if (udp->udp_ipversion == IPV4_VERSION) { 8077 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8078 IPPROTO_UDP, AF_INET, 8079 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8080 (in_port_t)udp->udp_port, NULL); 8081 } else { 8082 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8083 IPPROTO_UDP, AF_INET6, 8084 (uint8_t *)&(udp->udp_v6src), 8085 (in_port_t)udp->udp_port, NULL); 8086 } 8087 } 8088 8089 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8090 if (is_system_labeled() && (!connp->conn_anon_port || 8091 connp->conn_anon_mlp)) { 8092 uint16_t mlpport; 8093 zone_t *zone; 8094 8095 zone = crgetzone(cr); 8096 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8097 mlptSingle; 8098 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8099 &v6src, us->us_netstack->netstack_ip); 8100 if (addrtype == mlptSingle) { 8101 rw_enter(&udp->udp_rwlock, RW_WRITER); 8102 udp->udp_pending_op = -1; 8103 rw_exit(&udp->udp_rwlock); 8104 connp->conn_anon_port = B_FALSE; 8105 connp->conn_mlp_type = mlptSingle; 8106 return (-TNOADDR); 8107 } 8108 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8109 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8110 addrtype); 8111 8112 /* 8113 * It is a coding error to attempt to bind an MLP port 8114 * without first setting SOL_SOCKET/SCM_UCRED. 8115 */ 8116 if (mlptype != mlptSingle && 8117 connp->conn_mlp_type == mlptSingle) { 8118 rw_enter(&udp->udp_rwlock, RW_WRITER); 8119 udp->udp_pending_op = -1; 8120 rw_exit(&udp->udp_rwlock); 8121 connp->conn_anon_port = B_FALSE; 8122 connp->conn_mlp_type = mlptSingle; 8123 return (EINVAL); 8124 } 8125 8126 /* 8127 * It is an access violation to attempt to bind an MLP port 8128 * without NET_BINDMLP privilege. 8129 */ 8130 if (mlptype != mlptSingle && 8131 secpolicy_net_bindmlp(cr) != 0) { 8132 if (udp->udp_debug) { 8133 (void) strlog(UDP_MOD_ID, 0, 1, 8134 SL_ERROR|SL_TRACE, 8135 "udp_bind: no priv for multilevel port %d", 8136 mlpport); 8137 } 8138 rw_enter(&udp->udp_rwlock, RW_WRITER); 8139 udp->udp_pending_op = -1; 8140 rw_exit(&udp->udp_rwlock); 8141 connp->conn_anon_port = B_FALSE; 8142 connp->conn_mlp_type = mlptSingle; 8143 return (-TACCES); 8144 } 8145 8146 /* 8147 * If we're specifically binding a shared IP address and the 8148 * port is MLP on shared addresses, then check to see if this 8149 * zone actually owns the MLP. Reject if not. 8150 */ 8151 if (mlptype == mlptShared && addrtype == mlptShared) { 8152 /* 8153 * No need to handle exclusive-stack zones since 8154 * ALL_ZONES only applies to the shared stack. 8155 */ 8156 zoneid_t mlpzone; 8157 8158 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8159 htons(mlpport)); 8160 if (connp->conn_zoneid != mlpzone) { 8161 if (udp->udp_debug) { 8162 (void) strlog(UDP_MOD_ID, 0, 1, 8163 SL_ERROR|SL_TRACE, 8164 "udp_bind: attempt to bind port " 8165 "%d on shared addr in zone %d " 8166 "(should be %d)", 8167 mlpport, connp->conn_zoneid, 8168 mlpzone); 8169 } 8170 rw_enter(&udp->udp_rwlock, RW_WRITER); 8171 udp->udp_pending_op = -1; 8172 rw_exit(&udp->udp_rwlock); 8173 connp->conn_anon_port = B_FALSE; 8174 connp->conn_mlp_type = mlptSingle; 8175 return (-TACCES); 8176 } 8177 } 8178 if (connp->conn_anon_port) { 8179 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8180 port, B_TRUE); 8181 if (error != 0) { 8182 if (udp->udp_debug) { 8183 (void) strlog(UDP_MOD_ID, 0, 1, 8184 SL_ERROR|SL_TRACE, 8185 "udp_bind: cannot establish anon " 8186 "MLP for port %d", port); 8187 } 8188 rw_enter(&udp->udp_rwlock, RW_WRITER); 8189 udp->udp_pending_op = -1; 8190 rw_exit(&udp->udp_rwlock); 8191 connp->conn_anon_port = B_FALSE; 8192 connp->conn_mlp_type = mlptSingle; 8193 return (-TACCES); 8194 } 8195 } 8196 connp->conn_mlp_type = mlptype; 8197 } 8198 8199 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8200 /* 8201 * Append a request for an IRE if udp_v6src not 8202 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8203 */ 8204 mp = allocb(sizeof (ire_t), BPRI_HI); 8205 if (!mp) { 8206 rw_enter(&udp->udp_rwlock, RW_WRITER); 8207 udp->udp_pending_op = -1; 8208 rw_exit(&udp->udp_rwlock); 8209 return (ENOMEM); 8210 } 8211 mp->b_wptr += sizeof (ire_t); 8212 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8213 } 8214 if (udp->udp_family == AF_INET6) { 8215 ASSERT(udp->udp_connp->conn_af_isv6); 8216 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8217 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8218 } else { 8219 ASSERT(!udp->udp_connp->conn_af_isv6); 8220 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8221 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8222 B_TRUE); 8223 } 8224 8225 (void) udp_post_ip_bind_connect(udp, mp, error); 8226 return (error); 8227 } 8228 8229 int 8230 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8231 socklen_t len, cred_t *cr) 8232 { 8233 int error; 8234 conn_t *connp; 8235 8236 /* All Solaris components should pass a cred for this operation. */ 8237 ASSERT(cr != NULL); 8238 8239 connp = (conn_t *)proto_handle; 8240 8241 if (sa == NULL) 8242 error = udp_do_unbind(connp); 8243 else 8244 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8245 8246 if (error < 0) { 8247 if (error == -TOUTSTATE) 8248 error = EINVAL; 8249 else 8250 error = proto_tlitosyserr(-error); 8251 } 8252 8253 return (error); 8254 } 8255 8256 static int 8257 udp_implicit_bind(conn_t *connp, cred_t *cr) 8258 { 8259 int error; 8260 8261 /* All Solaris components should pass a cred for this operation. */ 8262 ASSERT(cr != NULL); 8263 8264 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8265 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8266 } 8267 8268 /* 8269 * This routine removes a port number association from a stream. It 8270 * is called by udp_unbind and udp_tpi_unbind. 8271 */ 8272 static int 8273 udp_do_unbind(conn_t *connp) 8274 { 8275 udp_t *udp = connp->conn_udp; 8276 udp_fanout_t *udpf; 8277 udp_stack_t *us = udp->udp_us; 8278 8279 if (cl_inet_unbind != NULL) { 8280 /* 8281 * Running in cluster mode - register unbind information 8282 */ 8283 if (udp->udp_ipversion == IPV4_VERSION) { 8284 (*cl_inet_unbind)( 8285 connp->conn_netstack->netstack_stackid, 8286 IPPROTO_UDP, AF_INET, 8287 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8288 (in_port_t)udp->udp_port, NULL); 8289 } else { 8290 (*cl_inet_unbind)( 8291 connp->conn_netstack->netstack_stackid, 8292 IPPROTO_UDP, AF_INET6, 8293 (uint8_t *)&(udp->udp_v6src), 8294 (in_port_t)udp->udp_port, NULL); 8295 } 8296 } 8297 8298 rw_enter(&udp->udp_rwlock, RW_WRITER); 8299 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8300 rw_exit(&udp->udp_rwlock); 8301 return (-TOUTSTATE); 8302 } 8303 udp->udp_pending_op = T_UNBIND_REQ; 8304 rw_exit(&udp->udp_rwlock); 8305 8306 /* 8307 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8308 * and therefore ip_unbind must never return NULL. 8309 */ 8310 ip_unbind(connp); 8311 8312 /* 8313 * Once we're unbound from IP, the pending operation may be cleared 8314 * here. 8315 */ 8316 rw_enter(&udp->udp_rwlock, RW_WRITER); 8317 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8318 us->us_bind_fanout_size)]; 8319 8320 mutex_enter(&udpf->uf_lock); 8321 udp_bind_hash_remove(udp, B_TRUE); 8322 V6_SET_ZERO(udp->udp_v6src); 8323 V6_SET_ZERO(udp->udp_bound_v6src); 8324 udp->udp_port = 0; 8325 mutex_exit(&udpf->uf_lock); 8326 8327 udp->udp_pending_op = -1; 8328 udp->udp_state = TS_UNBND; 8329 if (udp->udp_family == AF_INET6) 8330 (void) udp_build_hdrs(udp); 8331 rw_exit(&udp->udp_rwlock); 8332 8333 return (0); 8334 } 8335 8336 static int 8337 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8338 { 8339 ire_t *ire; 8340 udp_fanout_t *udpf; 8341 udp_stack_t *us = udp->udp_us; 8342 8343 ASSERT(udp->udp_pending_op != -1); 8344 rw_enter(&udp->udp_rwlock, RW_WRITER); 8345 if (error == 0) { 8346 /* For udp_do_connect() success */ 8347 /* udp_do_bind() success will do nothing in here */ 8348 /* 8349 * If a broadcast/multicast address was bound, set 8350 * the source address to 0. 8351 * This ensures no datagrams with broadcast address 8352 * as source address are emitted (which would violate 8353 * RFC1122 - Hosts requirements) 8354 * 8355 * Note that when connecting the returned IRE is 8356 * for the destination address and we only perform 8357 * the broadcast check for the source address (it 8358 * is OK to connect to a broadcast/multicast address.) 8359 */ 8360 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8361 ire = (ire_t *)ire_mp->b_rptr; 8362 8363 /* 8364 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8365 * multicast local address. 8366 */ 8367 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8368 us->us_bind_fanout_size)]; 8369 if (ire->ire_type == IRE_BROADCAST && 8370 udp->udp_state != TS_DATA_XFER) { 8371 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8372 udp->udp_pending_op == O_T_BIND_REQ); 8373 /* 8374 * This was just a local bind to a broadcast 8375 * addr. 8376 */ 8377 mutex_enter(&udpf->uf_lock); 8378 V6_SET_ZERO(udp->udp_v6src); 8379 mutex_exit(&udpf->uf_lock); 8380 if (udp->udp_family == AF_INET6) 8381 (void) udp_build_hdrs(udp); 8382 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8383 if (udp->udp_family == AF_INET6) 8384 (void) udp_build_hdrs(udp); 8385 } 8386 } 8387 } else { 8388 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8389 us->us_bind_fanout_size)]; 8390 mutex_enter(&udpf->uf_lock); 8391 8392 if (udp->udp_state == TS_DATA_XFER) { 8393 /* Connect failed */ 8394 /* Revert back to the bound source */ 8395 udp->udp_v6src = udp->udp_bound_v6src; 8396 udp->udp_state = TS_IDLE; 8397 } else { 8398 /* For udp_do_bind() failed */ 8399 V6_SET_ZERO(udp->udp_v6src); 8400 V6_SET_ZERO(udp->udp_bound_v6src); 8401 udp->udp_state = TS_UNBND; 8402 udp_bind_hash_remove(udp, B_TRUE); 8403 udp->udp_port = 0; 8404 } 8405 mutex_exit(&udpf->uf_lock); 8406 if (udp->udp_family == AF_INET6) 8407 (void) udp_build_hdrs(udp); 8408 } 8409 udp->udp_pending_op = -1; 8410 rw_exit(&udp->udp_rwlock); 8411 if (ire_mp != NULL) 8412 freeb(ire_mp); 8413 return (error); 8414 } 8415 8416 /* 8417 * It associates a default destination address with the stream. 8418 */ 8419 static int 8420 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8421 cred_t *cr) 8422 { 8423 sin6_t *sin6; 8424 sin_t *sin; 8425 in6_addr_t v6dst; 8426 ipaddr_t v4dst; 8427 uint16_t dstport; 8428 uint32_t flowinfo; 8429 mblk_t *ire_mp; 8430 udp_fanout_t *udpf; 8431 udp_t *udp, *udp1; 8432 ushort_t ipversion; 8433 udp_stack_t *us; 8434 int error; 8435 8436 udp = connp->conn_udp; 8437 us = udp->udp_us; 8438 8439 /* 8440 * Address has been verified by the caller 8441 */ 8442 switch (len) { 8443 default: 8444 /* 8445 * Should never happen 8446 */ 8447 return (EINVAL); 8448 8449 case sizeof (sin_t): 8450 sin = (sin_t *)sa; 8451 v4dst = sin->sin_addr.s_addr; 8452 dstport = sin->sin_port; 8453 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8454 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8455 ipversion = IPV4_VERSION; 8456 break; 8457 8458 case sizeof (sin6_t): 8459 sin6 = (sin6_t *)sa; 8460 v6dst = sin6->sin6_addr; 8461 dstport = sin6->sin6_port; 8462 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8463 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8464 ipversion = IPV4_VERSION; 8465 flowinfo = 0; 8466 } else { 8467 ipversion = IPV6_VERSION; 8468 flowinfo = sin6->sin6_flowinfo; 8469 } 8470 break; 8471 } 8472 8473 if (dstport == 0) 8474 return (-TBADADDR); 8475 8476 rw_enter(&udp->udp_rwlock, RW_WRITER); 8477 8478 /* 8479 * This UDP must have bound to a port already before doing a connect. 8480 * TPI mandates that users must send TPI primitives only 1 at a time 8481 * and wait for the response before sending the next primitive. 8482 */ 8483 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8484 rw_exit(&udp->udp_rwlock); 8485 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8486 "udp_connect: bad state, %u", udp->udp_state); 8487 return (-TOUTSTATE); 8488 } 8489 udp->udp_pending_op = T_CONN_REQ; 8490 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8491 8492 if (ipversion == IPV4_VERSION) { 8493 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8494 udp->udp_ip_snd_options_len; 8495 } else { 8496 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8497 } 8498 8499 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8500 us->us_bind_fanout_size)]; 8501 8502 mutex_enter(&udpf->uf_lock); 8503 if (udp->udp_state == TS_DATA_XFER) { 8504 /* Already connected - clear out state */ 8505 udp->udp_v6src = udp->udp_bound_v6src; 8506 udp->udp_state = TS_IDLE; 8507 } 8508 8509 /* 8510 * Create a default IP header with no IP options. 8511 */ 8512 udp->udp_dstport = dstport; 8513 udp->udp_ipversion = ipversion; 8514 if (ipversion == IPV4_VERSION) { 8515 /* 8516 * Interpret a zero destination to mean loopback. 8517 * Update the T_CONN_REQ (sin/sin6) since it is used to 8518 * generate the T_CONN_CON. 8519 */ 8520 if (v4dst == INADDR_ANY) { 8521 v4dst = htonl(INADDR_LOOPBACK); 8522 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8523 if (udp->udp_family == AF_INET) { 8524 sin->sin_addr.s_addr = v4dst; 8525 } else { 8526 sin6->sin6_addr = v6dst; 8527 } 8528 } 8529 udp->udp_v6dst = v6dst; 8530 udp->udp_flowinfo = 0; 8531 8532 /* 8533 * If the destination address is multicast and 8534 * an outgoing multicast interface has been set, 8535 * use the address of that interface as our 8536 * source address if no source address has been set. 8537 */ 8538 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8539 CLASSD(v4dst) && 8540 udp->udp_multicast_if_addr != INADDR_ANY) { 8541 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8542 &udp->udp_v6src); 8543 } 8544 } else { 8545 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8546 /* 8547 * Interpret a zero destination to mean loopback. 8548 * Update the T_CONN_REQ (sin/sin6) since it is used to 8549 * generate the T_CONN_CON. 8550 */ 8551 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8552 v6dst = ipv6_loopback; 8553 sin6->sin6_addr = v6dst; 8554 } 8555 udp->udp_v6dst = v6dst; 8556 udp->udp_flowinfo = flowinfo; 8557 /* 8558 * If the destination address is multicast and 8559 * an outgoing multicast interface has been set, 8560 * then the ip bind logic will pick the correct source 8561 * address (i.e. matching the outgoing multicast interface). 8562 */ 8563 } 8564 8565 /* 8566 * Verify that the src/port/dst/port is unique for all 8567 * connections in TS_DATA_XFER 8568 */ 8569 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8570 if (udp1->udp_state != TS_DATA_XFER) 8571 continue; 8572 if (udp->udp_port != udp1->udp_port || 8573 udp->udp_ipversion != udp1->udp_ipversion || 8574 dstport != udp1->udp_dstport || 8575 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8576 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8577 !(IPCL_ZONE_MATCH(udp->udp_connp, 8578 udp1->udp_connp->conn_zoneid) || 8579 IPCL_ZONE_MATCH(udp1->udp_connp, 8580 udp->udp_connp->conn_zoneid))) 8581 continue; 8582 mutex_exit(&udpf->uf_lock); 8583 udp->udp_pending_op = -1; 8584 rw_exit(&udp->udp_rwlock); 8585 return (-TBADADDR); 8586 } 8587 8588 if (cl_inet_connect2 != NULL) { 8589 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8590 if (error != 0) { 8591 mutex_exit(&udpf->uf_lock); 8592 udp->udp_pending_op = -1; 8593 rw_exit(&udp->udp_rwlock); 8594 return (-TBADADDR); 8595 } 8596 } 8597 8598 udp->udp_state = TS_DATA_XFER; 8599 mutex_exit(&udpf->uf_lock); 8600 8601 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8602 if (ire_mp == NULL) { 8603 mutex_enter(&udpf->uf_lock); 8604 udp->udp_state = TS_IDLE; 8605 udp->udp_pending_op = -1; 8606 mutex_exit(&udpf->uf_lock); 8607 rw_exit(&udp->udp_rwlock); 8608 return (ENOMEM); 8609 } 8610 8611 rw_exit(&udp->udp_rwlock); 8612 8613 ire_mp->b_wptr += sizeof (ire_t); 8614 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8615 8616 if (udp->udp_family == AF_INET) { 8617 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8618 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8619 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8620 B_TRUE, B_TRUE, cr); 8621 } else { 8622 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8623 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8624 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 8625 } 8626 8627 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8628 } 8629 8630 /* ARGSUSED */ 8631 static int 8632 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8633 socklen_t len, sock_connid_t *id, cred_t *cr) 8634 { 8635 conn_t *connp = (conn_t *)proto_handle; 8636 udp_t *udp = connp->conn_udp; 8637 int error; 8638 boolean_t did_bind = B_FALSE; 8639 8640 /* All Solaris components should pass a cred for this operation. */ 8641 ASSERT(cr != NULL); 8642 8643 if (sa == NULL) { 8644 /* 8645 * Disconnect 8646 * Make sure we are connected 8647 */ 8648 if (udp->udp_state != TS_DATA_XFER) 8649 return (EINVAL); 8650 8651 error = udp_disconnect(connp); 8652 return (error); 8653 } 8654 8655 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8656 if (error != 0) 8657 goto done; 8658 8659 /* do an implicit bind if necessary */ 8660 if (udp->udp_state == TS_UNBND) { 8661 error = udp_implicit_bind(connp, cr); 8662 /* 8663 * We could be racing with an actual bind, in which case 8664 * we would see EPROTO. We cross our fingers and try 8665 * to connect. 8666 */ 8667 if (!(error == 0 || error == EPROTO)) 8668 goto done; 8669 did_bind = B_TRUE; 8670 } 8671 /* 8672 * set SO_DGRAM_ERRIND 8673 */ 8674 udp->udp_dgram_errind = B_TRUE; 8675 8676 error = udp_do_connect(connp, sa, len, cr); 8677 8678 if (error != 0 && did_bind) { 8679 int unbind_err; 8680 8681 unbind_err = udp_do_unbind(connp); 8682 ASSERT(unbind_err == 0); 8683 } 8684 8685 if (error == 0) { 8686 *id = 0; 8687 (*connp->conn_upcalls->su_connected) 8688 (connp->conn_upper_handle, 0, NULL, -1); 8689 } else if (error < 0) { 8690 error = proto_tlitosyserr(-error); 8691 } 8692 8693 done: 8694 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8695 /* 8696 * No need to hold locks to set state 8697 * after connect failure socket state is undefined 8698 * We set the state only to imitate old sockfs behavior 8699 */ 8700 udp->udp_state = TS_IDLE; 8701 } 8702 return (error); 8703 } 8704 8705 /* ARGSUSED */ 8706 int 8707 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8708 cred_t *cr) 8709 { 8710 conn_t *connp = (conn_t *)proto_handle; 8711 udp_t *udp = connp->conn_udp; 8712 udp_stack_t *us = udp->udp_us; 8713 int error = 0; 8714 8715 ASSERT(DB_TYPE(mp) == M_DATA); 8716 8717 /* All Solaris components should pass a cred for this operation. */ 8718 ASSERT(cr != NULL); 8719 8720 /* If labeled then sockfs should have already set db_credp */ 8721 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 8722 8723 /* 8724 * If the socket is connected and no change in destination 8725 */ 8726 if (msg->msg_namelen == 0) { 8727 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8728 if (error == EDESTADDRREQ) 8729 return (error); 8730 else 8731 return (udp->udp_dgram_errind ? error : 0); 8732 } 8733 8734 /* 8735 * Do an implicit bind if necessary. 8736 */ 8737 if (udp->udp_state == TS_UNBND) { 8738 error = udp_implicit_bind(connp, cr); 8739 /* 8740 * We could be racing with an actual bind, in which case 8741 * we would see EPROTO. We cross our fingers and try 8742 * to send. 8743 */ 8744 if (!(error == 0 || error == EPROTO)) { 8745 freemsg(mp); 8746 return (error); 8747 } 8748 } 8749 8750 rw_enter(&udp->udp_rwlock, RW_WRITER); 8751 8752 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 8753 rw_exit(&udp->udp_rwlock); 8754 freemsg(mp); 8755 return (EISCONN); 8756 } 8757 8758 8759 if (udp->udp_delayed_error != 0) { 8760 boolean_t match; 8761 8762 error = udp->udp_delayed_error; 8763 match = B_FALSE; 8764 udp->udp_delayed_error = 0; 8765 switch (udp->udp_family) { 8766 case AF_INET: { 8767 /* Compare just IP address and port */ 8768 sin_t *sin1 = (sin_t *)msg->msg_name; 8769 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 8770 8771 if (msg->msg_namelen == sizeof (sin_t) && 8772 sin1->sin_port == sin2->sin_port && 8773 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 8774 match = B_TRUE; 8775 8776 break; 8777 } 8778 case AF_INET6: { 8779 sin6_t *sin1 = (sin6_t *)msg->msg_name; 8780 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 8781 8782 if (msg->msg_namelen == sizeof (sin6_t) && 8783 sin1->sin6_port == sin2->sin6_port && 8784 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 8785 &sin2->sin6_addr)) 8786 match = B_TRUE; 8787 break; 8788 } 8789 default: 8790 ASSERT(0); 8791 } 8792 8793 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 8794 8795 if (match) { 8796 rw_exit(&udp->udp_rwlock); 8797 freemsg(mp); 8798 return (error); 8799 } 8800 } 8801 8802 error = proto_verify_ip_addr(udp->udp_family, 8803 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 8804 rw_exit(&udp->udp_rwlock); 8805 8806 if (error != 0) { 8807 freemsg(mp); 8808 return (error); 8809 } 8810 8811 error = udp_send_not_connected(connp, mp, 8812 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 8813 curproc->p_pid); 8814 if (error != 0) { 8815 UDP_STAT(us, udp_out_err_output); 8816 freemsg(mp); 8817 } 8818 return (udp->udp_dgram_errind ? error : 0); 8819 } 8820 8821 int 8822 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 8823 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 8824 { 8825 conn_t *connp = (conn_t *)proto_handle; 8826 udp_t *udp; 8827 struct T_capability_ack tca; 8828 struct sockaddr_in6 laddr, faddr; 8829 socklen_t laddrlen, faddrlen; 8830 short opts; 8831 struct stroptions *stropt; 8832 mblk_t *stropt_mp; 8833 int error; 8834 8835 udp = connp->conn_udp; 8836 8837 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 8838 8839 /* 8840 * setup the fallback stream that was allocated 8841 */ 8842 connp->conn_dev = (dev_t)RD(q)->q_ptr; 8843 connp->conn_minor_arena = WR(q)->q_ptr; 8844 8845 RD(q)->q_ptr = WR(q)->q_ptr = connp; 8846 8847 WR(q)->q_qinfo = &udp_winit; 8848 8849 connp->conn_rq = RD(q); 8850 connp->conn_wq = WR(q); 8851 8852 /* Notify stream head about options before sending up data */ 8853 stropt_mp->b_datap->db_type = M_SETOPTS; 8854 stropt_mp->b_wptr += sizeof (*stropt); 8855 stropt = (struct stroptions *)stropt_mp->b_rptr; 8856 stropt->so_flags = SO_WROFF | SO_HIWAT; 8857 stropt->so_wroff = 8858 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 8859 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 8860 putnext(RD(q), stropt_mp); 8861 8862 /* 8863 * Free the helper stream 8864 */ 8865 ip_free_helper_stream(connp); 8866 8867 if (!issocket) 8868 udp_use_pure_tpi(udp); 8869 8870 /* 8871 * Collect the information needed to sync with the sonode 8872 */ 8873 udp_do_capability_ack(udp, &tca, TC1_INFO); 8874 8875 laddrlen = faddrlen = sizeof (sin6_t); 8876 (void) udp_getsockname((sock_lower_handle_t)connp, 8877 (struct sockaddr *)&laddr, &laddrlen, CRED()); 8878 error = udp_getpeername((sock_lower_handle_t)connp, 8879 (struct sockaddr *)&faddr, &faddrlen, CRED()); 8880 if (error != 0) 8881 faddrlen = 0; 8882 8883 opts = 0; 8884 if (udp->udp_dgram_errind) 8885 opts |= SO_DGRAM_ERRIND; 8886 if (udp->udp_dontroute) 8887 opts |= SO_DONTROUTE; 8888 8889 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 8890 (struct sockaddr *)&laddr, laddrlen, 8891 (struct sockaddr *)&faddr, faddrlen, opts); 8892 8893 mutex_enter(&udp->udp_recv_lock); 8894 /* 8895 * Attempts to send data up during fallback will result in it being 8896 * queued in udp_t. Now we push up any queued packets. 8897 */ 8898 while (udp->udp_fallback_queue_head != NULL) { 8899 mblk_t *mp; 8900 mp = udp->udp_fallback_queue_head; 8901 udp->udp_fallback_queue_head = mp->b_next; 8902 mutex_exit(&udp->udp_recv_lock); 8903 mp->b_next = NULL; 8904 putnext(RD(q), mp); 8905 mutex_enter(&udp->udp_recv_lock); 8906 } 8907 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 8908 /* 8909 * No longer a streams less socket 8910 */ 8911 rw_enter(&udp->udp_rwlock, RW_WRITER); 8912 connp->conn_flags &= ~IPCL_NONSTR; 8913 rw_exit(&udp->udp_rwlock); 8914 8915 mutex_exit(&udp->udp_recv_lock); 8916 8917 ASSERT(connp->conn_ref >= 1); 8918 8919 return (0); 8920 } 8921 8922 static int 8923 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8924 { 8925 sin_t *sin = (sin_t *)sa; 8926 sin6_t *sin6 = (sin6_t *)sa; 8927 8928 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 8929 ASSERT(udp != NULL); 8930 8931 if (udp->udp_state != TS_DATA_XFER) 8932 return (ENOTCONN); 8933 8934 switch (udp->udp_family) { 8935 case AF_INET: 8936 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8937 8938 if (*salenp < sizeof (sin_t)) 8939 return (EINVAL); 8940 8941 *salenp = sizeof (sin_t); 8942 *sin = sin_null; 8943 sin->sin_family = AF_INET; 8944 sin->sin_port = udp->udp_dstport; 8945 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 8946 break; 8947 case AF_INET6: 8948 if (*salenp < sizeof (sin6_t)) 8949 return (EINVAL); 8950 8951 *salenp = sizeof (sin6_t); 8952 *sin6 = sin6_null; 8953 sin6->sin6_family = AF_INET6; 8954 sin6->sin6_port = udp->udp_dstport; 8955 sin6->sin6_addr = udp->udp_v6dst; 8956 sin6->sin6_flowinfo = udp->udp_flowinfo; 8957 break; 8958 } 8959 8960 return (0); 8961 } 8962 8963 /* ARGSUSED */ 8964 int 8965 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8966 socklen_t *salenp, cred_t *cr) 8967 { 8968 conn_t *connp = (conn_t *)proto_handle; 8969 udp_t *udp = connp->conn_udp; 8970 int error; 8971 8972 /* All Solaris components should pass a cred for this operation. */ 8973 ASSERT(cr != NULL); 8974 8975 ASSERT(udp != NULL); 8976 8977 rw_enter(&udp->udp_rwlock, RW_READER); 8978 8979 error = udp_do_getpeername(udp, sa, salenp); 8980 8981 rw_exit(&udp->udp_rwlock); 8982 8983 return (error); 8984 } 8985 8986 static int 8987 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8988 { 8989 sin_t *sin = (sin_t *)sa; 8990 sin6_t *sin6 = (sin6_t *)sa; 8991 8992 ASSERT(udp != NULL); 8993 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 8994 8995 switch (udp->udp_family) { 8996 case AF_INET: 8997 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8998 8999 if (*salenp < sizeof (sin_t)) 9000 return (EINVAL); 9001 9002 *salenp = sizeof (sin_t); 9003 *sin = sin_null; 9004 sin->sin_family = AF_INET; 9005 if (udp->udp_state == TS_UNBND) { 9006 break; 9007 } 9008 sin->sin_port = udp->udp_port; 9009 9010 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9011 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9012 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9013 } else { 9014 /* 9015 * INADDR_ANY 9016 * udp_v6src is not set, we might be bound to 9017 * broadcast/multicast. Use udp_bound_v6src as 9018 * local address instead (that could 9019 * also still be INADDR_ANY) 9020 */ 9021 sin->sin_addr.s_addr = 9022 V4_PART_OF_V6(udp->udp_bound_v6src); 9023 } 9024 break; 9025 9026 case AF_INET6: 9027 if (*salenp < sizeof (sin6_t)) 9028 return (EINVAL); 9029 9030 *salenp = sizeof (sin6_t); 9031 *sin6 = sin6_null; 9032 sin6->sin6_family = AF_INET6; 9033 if (udp->udp_state == TS_UNBND) { 9034 break; 9035 } 9036 sin6->sin6_port = udp->udp_port; 9037 9038 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9039 sin6->sin6_addr = udp->udp_v6src; 9040 } else { 9041 /* 9042 * UNSPECIFIED 9043 * udp_v6src is not set, we might be bound to 9044 * broadcast/multicast. Use udp_bound_v6src as 9045 * local address instead (that could 9046 * also still be UNSPECIFIED) 9047 */ 9048 sin6->sin6_addr = udp->udp_bound_v6src; 9049 } 9050 } 9051 return (0); 9052 } 9053 9054 /* ARGSUSED */ 9055 int 9056 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9057 socklen_t *salenp, cred_t *cr) 9058 { 9059 conn_t *connp = (conn_t *)proto_handle; 9060 udp_t *udp = connp->conn_udp; 9061 int error; 9062 9063 /* All Solaris components should pass a cred for this operation. */ 9064 ASSERT(cr != NULL); 9065 9066 ASSERT(udp != NULL); 9067 rw_enter(&udp->udp_rwlock, RW_READER); 9068 9069 error = udp_do_getsockname(udp, sa, salenp); 9070 9071 rw_exit(&udp->udp_rwlock); 9072 9073 return (error); 9074 } 9075 9076 int 9077 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9078 void *optvalp, socklen_t *optlen, cred_t *cr) 9079 { 9080 conn_t *connp = (conn_t *)proto_handle; 9081 udp_t *udp = connp->conn_udp; 9082 int error; 9083 t_uscalar_t max_optbuf_len; 9084 void *optvalp_buf; 9085 int len; 9086 9087 /* All Solaris components should pass a cred for this operation. */ 9088 ASSERT(cr != NULL); 9089 9090 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9091 udp_opt_obj.odb_opt_des_arr, 9092 udp_opt_obj.odb_opt_arr_cnt, 9093 udp_opt_obj.odb_topmost_tpiprovider, 9094 B_FALSE, B_TRUE, cr); 9095 if (error != 0) { 9096 if (error < 0) 9097 error = proto_tlitosyserr(-error); 9098 return (error); 9099 } 9100 9101 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9102 rw_enter(&udp->udp_rwlock, RW_READER); 9103 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9104 rw_exit(&udp->udp_rwlock); 9105 9106 if (len < 0) { 9107 /* 9108 * Pass on to IP 9109 */ 9110 kmem_free(optvalp_buf, max_optbuf_len); 9111 return (ip_get_options(connp, level, option_name, 9112 optvalp, optlen, cr)); 9113 } else { 9114 /* 9115 * update optlen and copy option value 9116 */ 9117 t_uscalar_t size = MIN(len, *optlen); 9118 bcopy(optvalp_buf, optvalp, size); 9119 bcopy(&size, optlen, sizeof (size)); 9120 9121 kmem_free(optvalp_buf, max_optbuf_len); 9122 return (0); 9123 } 9124 } 9125 9126 int 9127 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9128 const void *optvalp, socklen_t optlen, cred_t *cr) 9129 { 9130 conn_t *connp = (conn_t *)proto_handle; 9131 udp_t *udp = connp->conn_udp; 9132 int error; 9133 9134 /* All Solaris components should pass a cred for this operation. */ 9135 ASSERT(cr != NULL); 9136 9137 error = proto_opt_check(level, option_name, optlen, NULL, 9138 udp_opt_obj.odb_opt_des_arr, 9139 udp_opt_obj.odb_opt_arr_cnt, 9140 udp_opt_obj.odb_topmost_tpiprovider, 9141 B_TRUE, B_FALSE, cr); 9142 9143 if (error != 0) { 9144 if (error < 0) 9145 error = proto_tlitosyserr(-error); 9146 return (error); 9147 } 9148 9149 rw_enter(&udp->udp_rwlock, RW_WRITER); 9150 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9151 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9152 NULL, cr); 9153 rw_exit(&udp->udp_rwlock); 9154 9155 if (error < 0) { 9156 /* 9157 * Pass on to ip 9158 */ 9159 error = ip_set_options(connp, level, option_name, optvalp, 9160 optlen, cr); 9161 } 9162 9163 return (error); 9164 } 9165 9166 void 9167 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9168 { 9169 conn_t *connp = (conn_t *)proto_handle; 9170 udp_t *udp = connp->conn_udp; 9171 9172 mutex_enter(&udp->udp_recv_lock); 9173 connp->conn_flow_cntrld = B_FALSE; 9174 mutex_exit(&udp->udp_recv_lock); 9175 } 9176 9177 /* ARGSUSED */ 9178 int 9179 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9180 { 9181 conn_t *connp = (conn_t *)proto_handle; 9182 9183 /* All Solaris components should pass a cred for this operation. */ 9184 ASSERT(cr != NULL); 9185 9186 /* shut down the send side */ 9187 if (how != SHUT_RD) 9188 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9189 SOCK_OPCTL_SHUT_SEND, 0); 9190 /* shut down the recv side */ 9191 if (how != SHUT_WR) 9192 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9193 SOCK_OPCTL_SHUT_RECV, 0); 9194 return (0); 9195 } 9196 9197 int 9198 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9199 int mode, int32_t *rvalp, cred_t *cr) 9200 { 9201 conn_t *connp = (conn_t *)proto_handle; 9202 int error; 9203 9204 /* All Solaris components should pass a cred for this operation. */ 9205 ASSERT(cr != NULL); 9206 9207 switch (cmd) { 9208 case ND_SET: 9209 case ND_GET: 9210 case _SIOCSOCKFALLBACK: 9211 case TI_GETPEERNAME: 9212 case TI_GETMYNAME: 9213 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9214 cmd)); 9215 error = EINVAL; 9216 break; 9217 default: 9218 /* 9219 * Pass on to IP using helper stream 9220 */ 9221 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9222 cmd, arg, mode, cr, rvalp); 9223 break; 9224 } 9225 return (error); 9226 } 9227 9228 /* ARGSUSED */ 9229 int 9230 udp_accept(sock_lower_handle_t lproto_handle, 9231 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9232 cred_t *cr) 9233 { 9234 return (EOPNOTSUPP); 9235 } 9236 9237 /* ARGSUSED */ 9238 int 9239 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9240 { 9241 return (EOPNOTSUPP); 9242 } 9243 9244 sock_downcalls_t sock_udp_downcalls = { 9245 udp_activate, /* sd_activate */ 9246 udp_accept, /* sd_accept */ 9247 udp_bind, /* sd_bind */ 9248 udp_listen, /* sd_listen */ 9249 udp_connect, /* sd_connect */ 9250 udp_getpeername, /* sd_getpeername */ 9251 udp_getsockname, /* sd_getsockname */ 9252 udp_getsockopt, /* sd_getsockopt */ 9253 udp_setsockopt, /* sd_setsockopt */ 9254 udp_send, /* sd_send */ 9255 NULL, /* sd_send_uio */ 9256 NULL, /* sd_recv_uio */ 9257 NULL, /* sd_poll */ 9258 udp_shutdown, /* sd_shutdown */ 9259 udp_clr_flowctrl, /* sd_setflowctrl */ 9260 udp_ioctl, /* sd_ioctl */ 9261 udp_close /* sd_close */ 9262 }; 9263