1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static void udp_lrput(queue_t *, mblk_t *); 172 static void udp_lwput(queue_t *, mblk_t *); 173 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 174 cred_t *credp, boolean_t isv6); 175 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp); 177 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 180 int *errorp, udpattrs_t *udpattrs); 181 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 182 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 183 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 184 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 185 cred_t *cr); 186 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 187 ipha_t *ipha); 188 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 189 t_scalar_t destlen, t_scalar_t err); 190 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 191 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 192 boolean_t random); 193 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 194 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 195 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 196 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 197 static void udp_wput_other(queue_t *q, mblk_t *mp); 198 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 199 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 200 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 201 202 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 203 static void udp_stack_fini(netstackid_t stackid, void *arg); 204 205 static void *udp_kstat_init(netstackid_t stackid); 206 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 207 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 208 static void udp_kstat2_fini(netstackid_t, kstat_t *); 209 static int udp_kstat_update(kstat_t *kp, int rw); 210 211 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 212 213 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 214 cred_t *, pid_t); 215 static void udp_ulp_recv(conn_t *, mblk_t *); 216 217 /* Common routine for TPI and socket module */ 218 static conn_t *udp_do_open(cred_t *, boolean_t, int); 219 static void udp_do_close(conn_t *); 220 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 221 boolean_t); 222 static int udp_do_unbind(conn_t *); 223 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 224 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 225 226 int udp_getsockname(sock_lower_handle_t, 227 struct sockaddr *, socklen_t *, cred_t *); 228 int udp_getpeername(sock_lower_handle_t, 229 struct sockaddr *, socklen_t *, cred_t *); 230 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 231 cred_t *cr); 232 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 233 234 #define UDP_RECV_HIWATER (56 * 1024) 235 #define UDP_RECV_LOWATER 128 236 #define UDP_XMIT_HIWATER (56 * 1024) 237 #define UDP_XMIT_LOWATER 1024 238 239 /* 240 * The following is defined in tcp.c 241 */ 242 extern int (*cl_inet_connect2)(netstackid_t stack_id, 243 uint8_t protocol, boolean_t is_outgoing, 244 sa_family_t addr_family, 245 uint8_t *laddrp, in_port_t lport, 246 uint8_t *faddrp, in_port_t fport, void *args); 247 248 /* 249 * Checks if the given destination addr/port is allowed out. 250 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 251 * Called for each connect() and for sendto()/sendmsg() to a different 252 * destination. 253 * For connect(), called in udp_connect(). 254 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 255 * 256 * This macro assumes that the cl_inet_connect2 hook is not NULL. 257 * Please check this before calling this macro. 258 * 259 * void 260 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 261 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 262 */ 263 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 264 (err) = 0; \ 265 /* \ 266 * Running in cluster mode - check and register active \ 267 * "connection" information \ 268 */ \ 269 if ((udp)->udp_ipversion == IPV4_VERSION) \ 270 (err) = (*cl_inet_connect2)( \ 271 (cp)->conn_netstack->netstack_stackid, \ 272 IPPROTO_UDP, is_outgoing, AF_INET, \ 273 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 274 (udp)->udp_port, \ 275 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 276 (in_port_t)(fport), NULL); \ 277 else \ 278 (err) = (*cl_inet_connect2)( \ 279 (cp)->conn_netstack->netstack_stackid, \ 280 IPPROTO_UDP, is_outgoing, AF_INET6, \ 281 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 282 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 283 } 284 285 static struct module_info udp_mod_info = { 286 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 287 }; 288 289 /* 290 * Entry points for UDP as a device. 291 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 292 */ 293 static struct qinit udp_rinitv4 = { 294 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 295 }; 296 297 static struct qinit udp_rinitv6 = { 298 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 299 }; 300 301 static struct qinit udp_winit = { 302 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 303 }; 304 305 /* UDP entry point during fallback */ 306 struct qinit udp_fallback_sock_winit = { 307 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 308 }; 309 310 /* 311 * UDP needs to handle I_LINK and I_PLINK since ifconfig 312 * likes to use it as a place to hang the various streams. 313 */ 314 static struct qinit udp_lrinit = { 315 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 316 }; 317 318 static struct qinit udp_lwinit = { 319 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 320 }; 321 322 /* For AF_INET aka /dev/udp */ 323 struct streamtab udpinfov4 = { 324 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 325 }; 326 327 /* For AF_INET6 aka /dev/udp6 */ 328 struct streamtab udpinfov6 = { 329 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 330 }; 331 332 static sin_t sin_null; /* Zero address for quick clears */ 333 static sin6_t sin6_null; /* Zero address for quick clears */ 334 335 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 336 337 /* Default structure copied into T_INFO_ACK messages */ 338 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 339 T_INFO_ACK, 340 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 341 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 342 T_INVALID, /* CDATA_size. udp does not support connect data. */ 343 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 344 sizeof (sin_t), /* ADDR_size. */ 345 0, /* OPT_size - not initialized here */ 346 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 347 T_CLTS, /* SERV_type. udp supports connection-less. */ 348 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 349 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 350 }; 351 352 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 353 354 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 355 T_INFO_ACK, 356 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 357 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 358 T_INVALID, /* CDATA_size. udp does not support connect data. */ 359 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 360 sizeof (sin6_t), /* ADDR_size. */ 361 0, /* OPT_size - not initialized here */ 362 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 363 T_CLTS, /* SERV_type. udp supports connection-less. */ 364 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 365 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 366 }; 367 368 /* largest UDP port number */ 369 #define UDP_MAX_PORT 65535 370 371 /* 372 * Table of ND variables supported by udp. These are loaded into us_nd 373 * in udp_open. 374 * All of these are alterable, within the min/max values given, at run time. 375 */ 376 /* BEGIN CSTYLED */ 377 udpparam_t udp_param_arr[] = { 378 /*min max value name */ 379 { 0L, 256, 32, "udp_wroff_extra" }, 380 { 1L, 255, 255, "udp_ipv4_ttl" }, 381 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 382 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 383 { 0, 1, 1, "udp_do_checksum" }, 384 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 385 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 386 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 387 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 388 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 389 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 390 }; 391 /* END CSTYLED */ 392 393 /* Setable in /etc/system */ 394 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 395 uint32_t udp_random_anon_port = 1; 396 397 /* 398 * Hook functions to enable cluster networking. 399 * On non-clustered systems these vectors must always be NULL 400 */ 401 402 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 403 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 404 void *args) = NULL; 405 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 406 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 407 void *args) = NULL; 408 409 typedef union T_primitives *t_primp_t; 410 411 /* 412 * Return the next anonymous port in the privileged port range for 413 * bind checking. 414 * 415 * Trusted Extension (TX) notes: TX allows administrator to mark or 416 * reserve ports as Multilevel ports (MLP). MLP has special function 417 * on TX systems. Once a port is made MLP, it's not available as 418 * ordinary port. This creates "holes" in the port name space. It 419 * may be necessary to skip the "holes" find a suitable anon port. 420 */ 421 static in_port_t 422 udp_get_next_priv_port(udp_t *udp) 423 { 424 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 425 in_port_t nextport; 426 boolean_t restart = B_FALSE; 427 udp_stack_t *us = udp->udp_us; 428 429 retry: 430 if (next_priv_port < us->us_min_anonpriv_port || 431 next_priv_port >= IPPORT_RESERVED) { 432 next_priv_port = IPPORT_RESERVED - 1; 433 if (restart) 434 return (0); 435 restart = B_TRUE; 436 } 437 438 if (is_system_labeled() && 439 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 440 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 441 next_priv_port = nextport; 442 goto retry; 443 } 444 445 return (next_priv_port--); 446 } 447 448 /* 449 * Hash list removal routine for udp_t structures. 450 */ 451 static void 452 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 453 { 454 udp_t *udpnext; 455 kmutex_t *lockp; 456 udp_stack_t *us = udp->udp_us; 457 458 if (udp->udp_ptpbhn == NULL) 459 return; 460 461 /* 462 * Extract the lock pointer in case there are concurrent 463 * hash_remove's for this instance. 464 */ 465 ASSERT(udp->udp_port != 0); 466 if (!caller_holds_lock) { 467 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 468 us->us_bind_fanout_size)].uf_lock; 469 ASSERT(lockp != NULL); 470 mutex_enter(lockp); 471 } 472 if (udp->udp_ptpbhn != NULL) { 473 udpnext = udp->udp_bind_hash; 474 if (udpnext != NULL) { 475 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 476 udp->udp_bind_hash = NULL; 477 } 478 *udp->udp_ptpbhn = udpnext; 479 udp->udp_ptpbhn = NULL; 480 } 481 if (!caller_holds_lock) { 482 mutex_exit(lockp); 483 } 484 } 485 486 static void 487 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 488 { 489 udp_t **udpp; 490 udp_t *udpnext; 491 492 ASSERT(MUTEX_HELD(&uf->uf_lock)); 493 ASSERT(udp->udp_ptpbhn == NULL); 494 udpp = &uf->uf_udp; 495 udpnext = udpp[0]; 496 if (udpnext != NULL) { 497 /* 498 * If the new udp bound to the INADDR_ANY address 499 * and the first one in the list is not bound to 500 * INADDR_ANY we skip all entries until we find the 501 * first one bound to INADDR_ANY. 502 * This makes sure that applications binding to a 503 * specific address get preference over those binding to 504 * INADDR_ANY. 505 */ 506 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 507 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 508 while ((udpnext = udpp[0]) != NULL && 509 !V6_OR_V4_INADDR_ANY( 510 udpnext->udp_bound_v6src)) { 511 udpp = &(udpnext->udp_bind_hash); 512 } 513 if (udpnext != NULL) 514 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 515 } else { 516 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 517 } 518 } 519 udp->udp_bind_hash = udpnext; 520 udp->udp_ptpbhn = udpp; 521 udpp[0] = udp; 522 } 523 524 /* 525 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 526 * passed to udp_wput. 527 * It associates a port number and local address with the stream. 528 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 529 * protocol type (IPPROTO_UDP) placed in the message following the address. 530 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 531 * (Called as writer.) 532 * 533 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 534 * without setting SO_REUSEADDR. This is needed so that they 535 * can be viewed as two independent transport protocols. 536 * However, anonymouns ports are allocated from the same range to avoid 537 * duplicating the us->us_next_port_to_try. 538 */ 539 static void 540 udp_tpi_bind(queue_t *q, mblk_t *mp) 541 { 542 sin_t *sin; 543 sin6_t *sin6; 544 mblk_t *mp1; 545 struct T_bind_req *tbr; 546 conn_t *connp; 547 udp_t *udp; 548 int error; 549 struct sockaddr *sa; 550 cred_t *cr; 551 552 /* 553 * All Solaris components should pass a db_credp 554 * for this TPI message, hence we ASSERT. 555 * But in case there is some other M_PROTO that looks 556 * like a TPI message sent by some other kernel 557 * component, we check and return an error. 558 */ 559 cr = msg_getcred(mp, NULL); 560 ASSERT(cr != NULL); 561 if (cr == NULL) { 562 udp_err_ack(q, mp, TSYSERR, EINVAL); 563 return; 564 } 565 566 connp = Q_TO_CONN(q); 567 udp = connp->conn_udp; 568 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 569 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 570 "udp_bind: bad req, len %u", 571 (uint_t)(mp->b_wptr - mp->b_rptr)); 572 udp_err_ack(q, mp, TPROTO, 0); 573 return; 574 } 575 if (udp->udp_state != TS_UNBND) { 576 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 577 "udp_bind: bad state, %u", udp->udp_state); 578 udp_err_ack(q, mp, TOUTSTATE, 0); 579 return; 580 } 581 /* 582 * Reallocate the message to make sure we have enough room for an 583 * address and the protocol type. 584 */ 585 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 586 if (!mp1) { 587 udp_err_ack(q, mp, TSYSERR, ENOMEM); 588 return; 589 } 590 591 mp = mp1; 592 593 /* Reset the message type in preparation for shipping it back. */ 594 DB_TYPE(mp) = M_PCPROTO; 595 596 tbr = (struct T_bind_req *)mp->b_rptr; 597 switch (tbr->ADDR_length) { 598 case 0: /* Request for a generic port */ 599 tbr->ADDR_offset = sizeof (struct T_bind_req); 600 if (udp->udp_family == AF_INET) { 601 tbr->ADDR_length = sizeof (sin_t); 602 sin = (sin_t *)&tbr[1]; 603 *sin = sin_null; 604 sin->sin_family = AF_INET; 605 mp->b_wptr = (uchar_t *)&sin[1]; 606 sa = (struct sockaddr *)sin; 607 } else { 608 ASSERT(udp->udp_family == AF_INET6); 609 tbr->ADDR_length = sizeof (sin6_t); 610 sin6 = (sin6_t *)&tbr[1]; 611 *sin6 = sin6_null; 612 sin6->sin6_family = AF_INET6; 613 mp->b_wptr = (uchar_t *)&sin6[1]; 614 sa = (struct sockaddr *)sin6; 615 } 616 break; 617 618 case sizeof (sin_t): /* Complete IPv4 address */ 619 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 620 sizeof (sin_t)); 621 if (sa == NULL || !OK_32PTR((char *)sa)) { 622 udp_err_ack(q, mp, TSYSERR, EINVAL); 623 return; 624 } 625 if (udp->udp_family != AF_INET || 626 sa->sa_family != AF_INET) { 627 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 628 return; 629 } 630 break; 631 632 case sizeof (sin6_t): /* complete IPv6 address */ 633 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 634 sizeof (sin6_t)); 635 if (sa == NULL || !OK_32PTR((char *)sa)) { 636 udp_err_ack(q, mp, TSYSERR, EINVAL); 637 return; 638 } 639 if (udp->udp_family != AF_INET6 || 640 sa->sa_family != AF_INET6) { 641 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 642 return; 643 } 644 break; 645 646 default: /* Invalid request */ 647 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 648 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 649 udp_err_ack(q, mp, TBADADDR, 0); 650 return; 651 } 652 653 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 654 tbr->PRIM_type != O_T_BIND_REQ); 655 656 if (error != 0) { 657 if (error > 0) { 658 udp_err_ack(q, mp, TSYSERR, error); 659 } else { 660 udp_err_ack(q, mp, -error, 0); 661 } 662 } else { 663 tbr->PRIM_type = T_BIND_ACK; 664 qreply(q, mp); 665 } 666 } 667 668 /* 669 * This routine handles each T_CONN_REQ message passed to udp. It 670 * associates a default destination address with the stream. 671 * 672 * This routine sends down a T_BIND_REQ to IP with the following mblks: 673 * T_BIND_REQ - specifying local and remote address/port 674 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 675 * T_OK_ACK - for the T_CONN_REQ 676 * T_CONN_CON - to keep the TPI user happy 677 * 678 * The connect completes in udp_do_connect. 679 * When a T_BIND_ACK is received information is extracted from the IRE 680 * and the two appended messages are sent to the TPI user. 681 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 682 * convert it to an error ack for the appropriate primitive. 683 */ 684 static void 685 udp_tpi_connect(queue_t *q, mblk_t *mp) 686 { 687 udp_t *udp; 688 conn_t *connp = Q_TO_CONN(q); 689 int error; 690 socklen_t len; 691 struct sockaddr *sa; 692 struct T_conn_req *tcr; 693 cred_t *cr; 694 695 /* 696 * All Solaris components should pass a db_credp 697 * for this TPI message, hence we ASSERT. 698 * But in case there is some other M_PROTO that looks 699 * like a TPI message sent by some other kernel 700 * component, we check and return an error. 701 */ 702 cr = msg_getcred(mp, NULL); 703 ASSERT(cr != NULL); 704 if (cr == NULL) { 705 udp_err_ack(q, mp, TSYSERR, EINVAL); 706 return; 707 } 708 709 udp = connp->conn_udp; 710 tcr = (struct T_conn_req *)mp->b_rptr; 711 712 /* A bit of sanity checking */ 713 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 714 udp_err_ack(q, mp, TPROTO, 0); 715 return; 716 } 717 718 if (tcr->OPT_length != 0) { 719 udp_err_ack(q, mp, TBADOPT, 0); 720 return; 721 } 722 723 /* 724 * Determine packet type based on type of address passed in 725 * the request should contain an IPv4 or IPv6 address. 726 * Make sure that address family matches the type of 727 * family of the the address passed down 728 */ 729 len = tcr->DEST_length; 730 switch (tcr->DEST_length) { 731 default: 732 udp_err_ack(q, mp, TBADADDR, 0); 733 return; 734 735 case sizeof (sin_t): 736 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 737 sizeof (sin_t)); 738 break; 739 740 case sizeof (sin6_t): 741 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 742 sizeof (sin6_t)); 743 break; 744 } 745 746 error = proto_verify_ip_addr(udp->udp_family, sa, len); 747 if (error != 0) { 748 udp_err_ack(q, mp, TSYSERR, error); 749 return; 750 } 751 752 error = udp_do_connect(connp, sa, len, cr); 753 if (error != 0) { 754 if (error < 0) 755 udp_err_ack(q, mp, -error, 0); 756 else 757 udp_err_ack(q, mp, TSYSERR, error); 758 } else { 759 mblk_t *mp1; 760 /* 761 * We have to send a connection confirmation to 762 * keep TLI happy. 763 */ 764 if (udp->udp_family == AF_INET) { 765 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 766 sizeof (sin_t), NULL, 0); 767 } else { 768 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 769 sizeof (sin6_t), NULL, 0); 770 } 771 if (mp1 == NULL) { 772 udp_err_ack(q, mp, TSYSERR, ENOMEM); 773 return; 774 } 775 776 /* 777 * Send ok_ack for T_CONN_REQ 778 */ 779 mp = mi_tpi_ok_ack_alloc(mp); 780 if (mp == NULL) { 781 /* Unable to reuse the T_CONN_REQ for the ack. */ 782 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 783 return; 784 } 785 786 putnext(connp->conn_rq, mp); 787 putnext(connp->conn_rq, mp1); 788 } 789 } 790 791 static int 792 udp_tpi_close(queue_t *q, int flags) 793 { 794 conn_t *connp; 795 796 if (flags & SO_FALLBACK) { 797 /* 798 * stream is being closed while in fallback 799 * simply free the resources that were allocated 800 */ 801 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 802 qprocsoff(q); 803 goto done; 804 } 805 806 connp = Q_TO_CONN(q); 807 udp_do_close(connp); 808 done: 809 q->q_ptr = WR(q)->q_ptr = NULL; 810 return (0); 811 } 812 813 /* 814 * Called in the close path to quiesce the conn 815 */ 816 void 817 udp_quiesce_conn(conn_t *connp) 818 { 819 udp_t *udp = connp->conn_udp; 820 821 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 822 /* 823 * Running in cluster mode - register unbind information 824 */ 825 if (udp->udp_ipversion == IPV4_VERSION) { 826 (*cl_inet_unbind)( 827 connp->conn_netstack->netstack_stackid, 828 IPPROTO_UDP, AF_INET, 829 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 830 (in_port_t)udp->udp_port, NULL); 831 } else { 832 (*cl_inet_unbind)( 833 connp->conn_netstack->netstack_stackid, 834 IPPROTO_UDP, AF_INET6, 835 (uint8_t *)(&(udp->udp_v6src)), 836 (in_port_t)udp->udp_port, NULL); 837 } 838 } 839 840 udp_bind_hash_remove(udp, B_FALSE); 841 842 } 843 844 void 845 udp_close_free(conn_t *connp) 846 { 847 udp_t *udp = connp->conn_udp; 848 849 /* If there are any options associated with the stream, free them. */ 850 if (udp->udp_ip_snd_options != NULL) { 851 mi_free((char *)udp->udp_ip_snd_options); 852 udp->udp_ip_snd_options = NULL; 853 udp->udp_ip_snd_options_len = 0; 854 } 855 856 if (udp->udp_ip_rcv_options != NULL) { 857 mi_free((char *)udp->udp_ip_rcv_options); 858 udp->udp_ip_rcv_options = NULL; 859 udp->udp_ip_rcv_options_len = 0; 860 } 861 862 /* Free memory associated with sticky options */ 863 if (udp->udp_sticky_hdrs_len != 0) { 864 kmem_free(udp->udp_sticky_hdrs, 865 udp->udp_sticky_hdrs_len); 866 udp->udp_sticky_hdrs = NULL; 867 udp->udp_sticky_hdrs_len = 0; 868 } 869 if (udp->udp_last_cred != NULL) { 870 crfree(udp->udp_last_cred); 871 udp->udp_last_cred = NULL; 872 } 873 if (udp->udp_effective_cred != NULL) { 874 crfree(udp->udp_effective_cred); 875 udp->udp_effective_cred = NULL; 876 } 877 878 ip6_pkt_free(&udp->udp_sticky_ipp); 879 880 /* 881 * Clear any fields which the kmem_cache constructor clears. 882 * Only udp_connp needs to be preserved. 883 * TBD: We should make this more efficient to avoid clearing 884 * everything. 885 */ 886 ASSERT(udp->udp_connp == connp); 887 bzero(udp, sizeof (udp_t)); 888 udp->udp_connp = connp; 889 } 890 891 static int 892 udp_do_disconnect(conn_t *connp) 893 { 894 udp_t *udp; 895 mblk_t *ire_mp; 896 udp_fanout_t *udpf; 897 udp_stack_t *us; 898 int error; 899 900 udp = connp->conn_udp; 901 us = udp->udp_us; 902 rw_enter(&udp->udp_rwlock, RW_WRITER); 903 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 904 rw_exit(&udp->udp_rwlock); 905 return (-TOUTSTATE); 906 } 907 udp->udp_pending_op = T_DISCON_REQ; 908 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 909 us->us_bind_fanout_size)]; 910 mutex_enter(&udpf->uf_lock); 911 udp->udp_v6src = udp->udp_bound_v6src; 912 udp->udp_state = TS_IDLE; 913 mutex_exit(&udpf->uf_lock); 914 915 if (udp->udp_family == AF_INET6) { 916 /* Rebuild the header template */ 917 error = udp_build_hdrs(udp); 918 if (error != 0) { 919 udp->udp_pending_op = -1; 920 rw_exit(&udp->udp_rwlock); 921 return (error); 922 } 923 } 924 925 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 926 if (ire_mp == NULL) { 927 mutex_enter(&udpf->uf_lock); 928 udp->udp_pending_op = -1; 929 mutex_exit(&udpf->uf_lock); 930 rw_exit(&udp->udp_rwlock); 931 return (ENOMEM); 932 } 933 934 rw_exit(&udp->udp_rwlock); 935 936 if (udp->udp_family == AF_INET6) { 937 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 938 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 939 } else { 940 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 941 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 942 } 943 944 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 945 } 946 947 948 static void 949 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 950 { 951 conn_t *connp = Q_TO_CONN(q); 952 int error; 953 954 /* 955 * Allocate the largest primitive we need to send back 956 * T_error_ack is > than T_ok_ack 957 */ 958 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 959 if (mp == NULL) { 960 /* Unable to reuse the T_DISCON_REQ for the ack. */ 961 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 962 return; 963 } 964 965 error = udp_do_disconnect(connp); 966 967 if (error != 0) { 968 if (error < 0) { 969 udp_err_ack(q, mp, -error, 0); 970 } else { 971 udp_err_ack(q, mp, TSYSERR, error); 972 } 973 } else { 974 mp = mi_tpi_ok_ack_alloc(mp); 975 ASSERT(mp != NULL); 976 qreply(q, mp); 977 } 978 } 979 980 int 981 udp_disconnect(conn_t *connp) 982 { 983 int error; 984 udp_t *udp = connp->conn_udp; 985 986 udp->udp_dgram_errind = B_FALSE; 987 988 error = udp_do_disconnect(connp); 989 990 if (error < 0) 991 error = proto_tlitosyserr(-error); 992 993 return (error); 994 } 995 996 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 997 static void 998 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 999 { 1000 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1001 qreply(q, mp); 1002 } 1003 1004 /* Shorthand to generate and send TPI error acks to our client */ 1005 static void 1006 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1007 int sys_error) 1008 { 1009 struct T_error_ack *teackp; 1010 1011 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1012 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1013 teackp = (struct T_error_ack *)mp->b_rptr; 1014 teackp->ERROR_prim = primitive; 1015 teackp->TLI_error = t_error; 1016 teackp->UNIX_error = sys_error; 1017 qreply(q, mp); 1018 } 1019 } 1020 1021 /*ARGSUSED*/ 1022 static int 1023 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1024 { 1025 int i; 1026 udp_t *udp = Q_TO_UDP(q); 1027 udp_stack_t *us = udp->udp_us; 1028 1029 for (i = 0; i < us->us_num_epriv_ports; i++) { 1030 if (us->us_epriv_ports[i] != 0) 1031 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1032 } 1033 return (0); 1034 } 1035 1036 /* ARGSUSED */ 1037 static int 1038 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1039 cred_t *cr) 1040 { 1041 long new_value; 1042 int i; 1043 udp_t *udp = Q_TO_UDP(q); 1044 udp_stack_t *us = udp->udp_us; 1045 1046 /* 1047 * Fail the request if the new value does not lie within the 1048 * port number limits. 1049 */ 1050 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1051 new_value <= 0 || new_value >= 65536) { 1052 return (EINVAL); 1053 } 1054 1055 /* Check if the value is already in the list */ 1056 for (i = 0; i < us->us_num_epriv_ports; i++) { 1057 if (new_value == us->us_epriv_ports[i]) { 1058 return (EEXIST); 1059 } 1060 } 1061 /* Find an empty slot */ 1062 for (i = 0; i < us->us_num_epriv_ports; i++) { 1063 if (us->us_epriv_ports[i] == 0) 1064 break; 1065 } 1066 if (i == us->us_num_epriv_ports) { 1067 return (EOVERFLOW); 1068 } 1069 1070 /* Set the new value */ 1071 us->us_epriv_ports[i] = (in_port_t)new_value; 1072 return (0); 1073 } 1074 1075 /* ARGSUSED */ 1076 static int 1077 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1078 cred_t *cr) 1079 { 1080 long new_value; 1081 int i; 1082 udp_t *udp = Q_TO_UDP(q); 1083 udp_stack_t *us = udp->udp_us; 1084 1085 /* 1086 * Fail the request if the new value does not lie within the 1087 * port number limits. 1088 */ 1089 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1090 new_value <= 0 || new_value >= 65536) { 1091 return (EINVAL); 1092 } 1093 1094 /* Check that the value is already in the list */ 1095 for (i = 0; i < us->us_num_epriv_ports; i++) { 1096 if (us->us_epriv_ports[i] == new_value) 1097 break; 1098 } 1099 if (i == us->us_num_epriv_ports) { 1100 return (ESRCH); 1101 } 1102 1103 /* Clear the value */ 1104 us->us_epriv_ports[i] = 0; 1105 return (0); 1106 } 1107 1108 /* At minimum we need 4 bytes of UDP header */ 1109 #define ICMP_MIN_UDP_HDR 4 1110 1111 /* 1112 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1113 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1114 * Assumes that IP has pulled up everything up to and including the ICMP header. 1115 */ 1116 static void 1117 udp_icmp_error(conn_t *connp, mblk_t *mp) 1118 { 1119 icmph_t *icmph; 1120 ipha_t *ipha; 1121 int iph_hdr_length; 1122 udpha_t *udpha; 1123 sin_t sin; 1124 sin6_t sin6; 1125 mblk_t *mp1; 1126 int error = 0; 1127 udp_t *udp = connp->conn_udp; 1128 1129 mp1 = NULL; 1130 ipha = (ipha_t *)mp->b_rptr; 1131 1132 ASSERT(OK_32PTR(mp->b_rptr)); 1133 1134 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1135 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1136 udp_icmp_error_ipv6(connp, mp); 1137 return; 1138 } 1139 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1140 1141 /* Skip past the outer IP and ICMP headers */ 1142 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1143 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1144 ipha = (ipha_t *)&icmph[1]; 1145 1146 /* Skip past the inner IP and find the ULP header */ 1147 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1148 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1149 1150 switch (icmph->icmph_type) { 1151 case ICMP_DEST_UNREACHABLE: 1152 switch (icmph->icmph_code) { 1153 case ICMP_FRAGMENTATION_NEEDED: 1154 /* 1155 * IP has already adjusted the path MTU. 1156 */ 1157 break; 1158 case ICMP_PORT_UNREACHABLE: 1159 case ICMP_PROTOCOL_UNREACHABLE: 1160 error = ECONNREFUSED; 1161 break; 1162 default: 1163 /* Transient errors */ 1164 break; 1165 } 1166 break; 1167 default: 1168 /* Transient errors */ 1169 break; 1170 } 1171 if (error == 0) { 1172 freemsg(mp); 1173 return; 1174 } 1175 1176 /* 1177 * Deliver T_UDERROR_IND when the application has asked for it. 1178 * The socket layer enables this automatically when connected. 1179 */ 1180 if (!udp->udp_dgram_errind) { 1181 freemsg(mp); 1182 return; 1183 } 1184 1185 1186 switch (udp->udp_family) { 1187 case AF_INET: 1188 sin = sin_null; 1189 sin.sin_family = AF_INET; 1190 sin.sin_addr.s_addr = ipha->ipha_dst; 1191 sin.sin_port = udpha->uha_dst_port; 1192 if (IPCL_IS_NONSTR(connp)) { 1193 rw_enter(&udp->udp_rwlock, RW_WRITER); 1194 if (udp->udp_state == TS_DATA_XFER) { 1195 if (sin.sin_port == udp->udp_dstport && 1196 sin.sin_addr.s_addr == 1197 V4_PART_OF_V6(udp->udp_v6dst)) { 1198 rw_exit(&udp->udp_rwlock); 1199 (*connp->conn_upcalls->su_set_error) 1200 (connp->conn_upper_handle, error); 1201 goto done; 1202 } 1203 } else { 1204 udp->udp_delayed_error = error; 1205 *((sin_t *)&udp->udp_delayed_addr) = sin; 1206 } 1207 rw_exit(&udp->udp_rwlock); 1208 } else { 1209 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1210 NULL, 0, error); 1211 } 1212 break; 1213 case AF_INET6: 1214 sin6 = sin6_null; 1215 sin6.sin6_family = AF_INET6; 1216 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1217 sin6.sin6_port = udpha->uha_dst_port; 1218 if (IPCL_IS_NONSTR(connp)) { 1219 rw_enter(&udp->udp_rwlock, RW_WRITER); 1220 if (udp->udp_state == TS_DATA_XFER) { 1221 if (sin6.sin6_port == udp->udp_dstport && 1222 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1223 &udp->udp_v6dst)) { 1224 rw_exit(&udp->udp_rwlock); 1225 (*connp->conn_upcalls->su_set_error) 1226 (connp->conn_upper_handle, error); 1227 goto done; 1228 } 1229 } else { 1230 udp->udp_delayed_error = error; 1231 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1232 } 1233 rw_exit(&udp->udp_rwlock); 1234 } else { 1235 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1236 NULL, 0, error); 1237 } 1238 break; 1239 } 1240 if (mp1 != NULL) 1241 putnext(connp->conn_rq, mp1); 1242 done: 1243 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1244 freemsg(mp); 1245 } 1246 1247 /* 1248 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1249 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1250 * Assumes that IP has pulled up all the extension headers as well as the 1251 * ICMPv6 header. 1252 */ 1253 static void 1254 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1255 { 1256 icmp6_t *icmp6; 1257 ip6_t *ip6h, *outer_ip6h; 1258 uint16_t iph_hdr_length; 1259 uint8_t *nexthdrp; 1260 udpha_t *udpha; 1261 sin6_t sin6; 1262 mblk_t *mp1; 1263 int error = 0; 1264 udp_t *udp = connp->conn_udp; 1265 udp_stack_t *us = udp->udp_us; 1266 1267 outer_ip6h = (ip6_t *)mp->b_rptr; 1268 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1269 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1270 else 1271 iph_hdr_length = IPV6_HDR_LEN; 1272 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1273 ip6h = (ip6_t *)&icmp6[1]; 1274 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1275 freemsg(mp); 1276 return; 1277 } 1278 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1279 1280 switch (icmp6->icmp6_type) { 1281 case ICMP6_DST_UNREACH: 1282 switch (icmp6->icmp6_code) { 1283 case ICMP6_DST_UNREACH_NOPORT: 1284 error = ECONNREFUSED; 1285 break; 1286 case ICMP6_DST_UNREACH_ADMIN: 1287 case ICMP6_DST_UNREACH_NOROUTE: 1288 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1289 case ICMP6_DST_UNREACH_ADDR: 1290 /* Transient errors */ 1291 break; 1292 default: 1293 break; 1294 } 1295 break; 1296 case ICMP6_PACKET_TOO_BIG: { 1297 struct T_unitdata_ind *tudi; 1298 struct T_opthdr *toh; 1299 size_t udi_size; 1300 mblk_t *newmp; 1301 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1302 sizeof (struct ip6_mtuinfo); 1303 sin6_t *sin6; 1304 struct ip6_mtuinfo *mtuinfo; 1305 1306 /* 1307 * If the application has requested to receive path mtu 1308 * information, send up an empty message containing an 1309 * IPV6_PATHMTU ancillary data item. 1310 */ 1311 if (!udp->udp_ipv6_recvpathmtu) 1312 break; 1313 1314 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1315 opt_length; 1316 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1317 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1318 break; 1319 } 1320 1321 /* 1322 * newmp->b_cont is left to NULL on purpose. This is an 1323 * empty message containing only ancillary data. 1324 */ 1325 newmp->b_datap->db_type = M_PROTO; 1326 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1327 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1328 tudi->PRIM_type = T_UNITDATA_IND; 1329 tudi->SRC_length = sizeof (sin6_t); 1330 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1331 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1332 tudi->OPT_length = opt_length; 1333 1334 sin6 = (sin6_t *)&tudi[1]; 1335 bzero(sin6, sizeof (sin6_t)); 1336 sin6->sin6_family = AF_INET6; 1337 sin6->sin6_addr = udp->udp_v6dst; 1338 1339 toh = (struct T_opthdr *)&sin6[1]; 1340 toh->level = IPPROTO_IPV6; 1341 toh->name = IPV6_PATHMTU; 1342 toh->len = opt_length; 1343 toh->status = 0; 1344 1345 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1346 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1347 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1348 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1349 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1350 /* 1351 * We've consumed everything we need from the original 1352 * message. Free it, then send our empty message. 1353 */ 1354 freemsg(mp); 1355 udp_ulp_recv(connp, newmp); 1356 1357 return; 1358 } 1359 case ICMP6_TIME_EXCEEDED: 1360 /* Transient errors */ 1361 break; 1362 case ICMP6_PARAM_PROB: 1363 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1364 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1365 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1366 (uchar_t *)nexthdrp) { 1367 error = ECONNREFUSED; 1368 break; 1369 } 1370 break; 1371 } 1372 if (error == 0) { 1373 freemsg(mp); 1374 return; 1375 } 1376 1377 /* 1378 * Deliver T_UDERROR_IND when the application has asked for it. 1379 * The socket layer enables this automatically when connected. 1380 */ 1381 if (!udp->udp_dgram_errind) { 1382 freemsg(mp); 1383 return; 1384 } 1385 1386 sin6 = sin6_null; 1387 sin6.sin6_family = AF_INET6; 1388 sin6.sin6_addr = ip6h->ip6_dst; 1389 sin6.sin6_port = udpha->uha_dst_port; 1390 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1391 1392 if (IPCL_IS_NONSTR(connp)) { 1393 rw_enter(&udp->udp_rwlock, RW_WRITER); 1394 if (udp->udp_state == TS_DATA_XFER) { 1395 if (sin6.sin6_port == udp->udp_dstport && 1396 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1397 &udp->udp_v6dst)) { 1398 rw_exit(&udp->udp_rwlock); 1399 (*connp->conn_upcalls->su_set_error) 1400 (connp->conn_upper_handle, error); 1401 goto done; 1402 } 1403 } else { 1404 udp->udp_delayed_error = error; 1405 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1406 } 1407 rw_exit(&udp->udp_rwlock); 1408 } else { 1409 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1410 NULL, 0, error); 1411 if (mp1 != NULL) 1412 putnext(connp->conn_rq, mp1); 1413 } 1414 done: 1415 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1416 freemsg(mp); 1417 } 1418 1419 /* 1420 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1421 * The local address is filled in if endpoint is bound. The remote address 1422 * is filled in if remote address has been precified ("connected endpoint") 1423 * (The concept of connected CLTS sockets is alien to published TPI 1424 * but we support it anyway). 1425 */ 1426 static void 1427 udp_addr_req(queue_t *q, mblk_t *mp) 1428 { 1429 sin_t *sin; 1430 sin6_t *sin6; 1431 mblk_t *ackmp; 1432 struct T_addr_ack *taa; 1433 udp_t *udp = Q_TO_UDP(q); 1434 1435 /* Make it large enough for worst case */ 1436 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1437 2 * sizeof (sin6_t), 1); 1438 if (ackmp == NULL) { 1439 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1440 return; 1441 } 1442 taa = (struct T_addr_ack *)ackmp->b_rptr; 1443 1444 bzero(taa, sizeof (struct T_addr_ack)); 1445 ackmp->b_wptr = (uchar_t *)&taa[1]; 1446 1447 taa->PRIM_type = T_ADDR_ACK; 1448 ackmp->b_datap->db_type = M_PCPROTO; 1449 rw_enter(&udp->udp_rwlock, RW_READER); 1450 /* 1451 * Note: Following code assumes 32 bit alignment of basic 1452 * data structures like sin_t and struct T_addr_ack. 1453 */ 1454 if (udp->udp_state != TS_UNBND) { 1455 /* 1456 * Fill in local address first 1457 */ 1458 taa->LOCADDR_offset = sizeof (*taa); 1459 if (udp->udp_family == AF_INET) { 1460 taa->LOCADDR_length = sizeof (sin_t); 1461 sin = (sin_t *)&taa[1]; 1462 /* Fill zeroes and then initialize non-zero fields */ 1463 *sin = sin_null; 1464 sin->sin_family = AF_INET; 1465 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1466 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1467 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1468 sin->sin_addr.s_addr); 1469 } else { 1470 /* 1471 * INADDR_ANY 1472 * udp_v6src is not set, we might be bound to 1473 * broadcast/multicast. Use udp_bound_v6src as 1474 * local address instead (that could 1475 * also still be INADDR_ANY) 1476 */ 1477 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1478 sin->sin_addr.s_addr); 1479 } 1480 sin->sin_port = udp->udp_port; 1481 ackmp->b_wptr = (uchar_t *)&sin[1]; 1482 if (udp->udp_state == TS_DATA_XFER) { 1483 /* 1484 * connected, fill remote address too 1485 */ 1486 taa->REMADDR_length = sizeof (sin_t); 1487 /* assumed 32-bit alignment */ 1488 taa->REMADDR_offset = taa->LOCADDR_offset + 1489 taa->LOCADDR_length; 1490 1491 sin = (sin_t *)(ackmp->b_rptr + 1492 taa->REMADDR_offset); 1493 /* initialize */ 1494 *sin = sin_null; 1495 sin->sin_family = AF_INET; 1496 sin->sin_addr.s_addr = 1497 V4_PART_OF_V6(udp->udp_v6dst); 1498 sin->sin_port = udp->udp_dstport; 1499 ackmp->b_wptr = (uchar_t *)&sin[1]; 1500 } 1501 } else { 1502 taa->LOCADDR_length = sizeof (sin6_t); 1503 sin6 = (sin6_t *)&taa[1]; 1504 /* Fill zeroes and then initialize non-zero fields */ 1505 *sin6 = sin6_null; 1506 sin6->sin6_family = AF_INET6; 1507 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1508 sin6->sin6_addr = udp->udp_v6src; 1509 } else { 1510 /* 1511 * UNSPECIFIED 1512 * udp_v6src is not set, we might be bound to 1513 * broadcast/multicast. Use udp_bound_v6src as 1514 * local address instead (that could 1515 * also still be UNSPECIFIED) 1516 */ 1517 sin6->sin6_addr = 1518 udp->udp_bound_v6src; 1519 } 1520 sin6->sin6_port = udp->udp_port; 1521 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1522 if (udp->udp_state == TS_DATA_XFER) { 1523 /* 1524 * connected, fill remote address too 1525 */ 1526 taa->REMADDR_length = sizeof (sin6_t); 1527 /* assumed 32-bit alignment */ 1528 taa->REMADDR_offset = taa->LOCADDR_offset + 1529 taa->LOCADDR_length; 1530 1531 sin6 = (sin6_t *)(ackmp->b_rptr + 1532 taa->REMADDR_offset); 1533 /* initialize */ 1534 *sin6 = sin6_null; 1535 sin6->sin6_family = AF_INET6; 1536 sin6->sin6_addr = udp->udp_v6dst; 1537 sin6->sin6_port = udp->udp_dstport; 1538 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1539 } 1540 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1541 } 1542 } 1543 rw_exit(&udp->udp_rwlock); 1544 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1545 qreply(q, ackmp); 1546 } 1547 1548 static void 1549 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1550 { 1551 if (udp->udp_family == AF_INET) { 1552 *tap = udp_g_t_info_ack_ipv4; 1553 } else { 1554 *tap = udp_g_t_info_ack_ipv6; 1555 } 1556 tap->CURRENT_state = udp->udp_state; 1557 tap->OPT_size = udp_max_optsize; 1558 } 1559 1560 static void 1561 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1562 t_uscalar_t cap_bits1) 1563 { 1564 tcap->CAP_bits1 = 0; 1565 1566 if (cap_bits1 & TC1_INFO) { 1567 udp_copy_info(&tcap->INFO_ack, udp); 1568 tcap->CAP_bits1 |= TC1_INFO; 1569 } 1570 } 1571 1572 /* 1573 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1574 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1575 * udp_g_t_info_ack. The current state of the stream is copied from 1576 * udp_state. 1577 */ 1578 static void 1579 udp_capability_req(queue_t *q, mblk_t *mp) 1580 { 1581 t_uscalar_t cap_bits1; 1582 struct T_capability_ack *tcap; 1583 udp_t *udp = Q_TO_UDP(q); 1584 1585 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1586 1587 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1588 mp->b_datap->db_type, T_CAPABILITY_ACK); 1589 if (!mp) 1590 return; 1591 1592 tcap = (struct T_capability_ack *)mp->b_rptr; 1593 udp_do_capability_ack(udp, tcap, cap_bits1); 1594 1595 qreply(q, mp); 1596 } 1597 1598 /* 1599 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1600 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1601 * The current state of the stream is copied from udp_state. 1602 */ 1603 static void 1604 udp_info_req(queue_t *q, mblk_t *mp) 1605 { 1606 udp_t *udp = Q_TO_UDP(q); 1607 1608 /* Create a T_INFO_ACK message. */ 1609 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1610 T_INFO_ACK); 1611 if (!mp) 1612 return; 1613 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1614 qreply(q, mp); 1615 } 1616 1617 /* For /dev/udp aka AF_INET open */ 1618 static int 1619 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1620 { 1621 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1622 } 1623 1624 /* For /dev/udp6 aka AF_INET6 open */ 1625 static int 1626 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1627 { 1628 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1629 } 1630 1631 /* 1632 * This is the open routine for udp. It allocates a udp_t structure for 1633 * the stream and, on the first open of the module, creates an ND table. 1634 */ 1635 /*ARGSUSED2*/ 1636 static int 1637 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1638 boolean_t isv6) 1639 { 1640 int error; 1641 udp_t *udp; 1642 conn_t *connp; 1643 dev_t conn_dev; 1644 udp_stack_t *us; 1645 vmem_t *minor_arena; 1646 1647 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1648 1649 /* If the stream is already open, return immediately. */ 1650 if (q->q_ptr != NULL) 1651 return (0); 1652 1653 if (sflag == MODOPEN) 1654 return (EINVAL); 1655 1656 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1657 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1658 minor_arena = ip_minor_arena_la; 1659 } else { 1660 /* 1661 * Either minor numbers in the large arena were exhausted 1662 * or a non socket application is doing the open. 1663 * Try to allocate from the small arena. 1664 */ 1665 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1666 return (EBUSY); 1667 1668 minor_arena = ip_minor_arena_sa; 1669 } 1670 1671 if (flag & SO_FALLBACK) { 1672 /* 1673 * Non streams socket needs a stream to fallback to 1674 */ 1675 RD(q)->q_ptr = (void *)conn_dev; 1676 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1677 WR(q)->q_ptr = (void *)minor_arena; 1678 qprocson(q); 1679 return (0); 1680 } 1681 1682 connp = udp_do_open(credp, isv6, KM_SLEEP); 1683 if (connp == NULL) { 1684 inet_minor_free(minor_arena, conn_dev); 1685 return (ENOMEM); 1686 } 1687 udp = connp->conn_udp; 1688 us = udp->udp_us; 1689 1690 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1691 connp->conn_dev = conn_dev; 1692 connp->conn_minor_arena = minor_arena; 1693 1694 /* 1695 * Initialize the udp_t structure for this stream. 1696 */ 1697 q->q_ptr = connp; 1698 WR(q)->q_ptr = connp; 1699 connp->conn_rq = q; 1700 connp->conn_wq = WR(q); 1701 1702 rw_enter(&udp->udp_rwlock, RW_WRITER); 1703 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1704 ASSERT(connp->conn_udp == udp); 1705 ASSERT(udp->udp_connp == connp); 1706 1707 if (flag & SO_SOCKSTR) { 1708 connp->conn_flags |= IPCL_SOCKET; 1709 udp->udp_issocket = B_TRUE; 1710 } 1711 1712 q->q_hiwat = us->us_recv_hiwat; 1713 WR(q)->q_hiwat = us->us_xmit_hiwat; 1714 WR(q)->q_lowat = us->us_xmit_lowat; 1715 1716 qprocson(q); 1717 1718 if (udp->udp_family == AF_INET6) { 1719 /* Build initial header template for transmit */ 1720 if ((error = udp_build_hdrs(udp)) != 0) { 1721 rw_exit(&udp->udp_rwlock); 1722 qprocsoff(q); 1723 inet_minor_free(minor_arena, conn_dev); 1724 ipcl_conn_destroy(connp); 1725 return (error); 1726 } 1727 } 1728 rw_exit(&udp->udp_rwlock); 1729 1730 /* Set the Stream head write offset and high watermark. */ 1731 (void) proto_set_tx_wroff(q, connp, 1732 udp->udp_max_hdr_len + us->us_wroff_extra); 1733 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1734 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1735 1736 mutex_enter(&connp->conn_lock); 1737 connp->conn_state_flags &= ~CONN_INCIPIENT; 1738 mutex_exit(&connp->conn_lock); 1739 return (0); 1740 } 1741 1742 /* 1743 * Which UDP options OK to set through T_UNITDATA_REQ... 1744 */ 1745 /* ARGSUSED */ 1746 static boolean_t 1747 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1748 { 1749 return (B_TRUE); 1750 } 1751 1752 /* 1753 * This routine gets default values of certain options whose default 1754 * values are maintained by protcol specific code 1755 */ 1756 /* ARGSUSED */ 1757 int 1758 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1759 { 1760 udp_t *udp = Q_TO_UDP(q); 1761 udp_stack_t *us = udp->udp_us; 1762 int *i1 = (int *)ptr; 1763 1764 switch (level) { 1765 case IPPROTO_IP: 1766 switch (name) { 1767 case IP_MULTICAST_TTL: 1768 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1769 return (sizeof (uchar_t)); 1770 case IP_MULTICAST_LOOP: 1771 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1772 return (sizeof (uchar_t)); 1773 } 1774 break; 1775 case IPPROTO_IPV6: 1776 switch (name) { 1777 case IPV6_MULTICAST_HOPS: 1778 *i1 = IP_DEFAULT_MULTICAST_TTL; 1779 return (sizeof (int)); 1780 case IPV6_MULTICAST_LOOP: 1781 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1782 return (sizeof (int)); 1783 case IPV6_UNICAST_HOPS: 1784 *i1 = us->us_ipv6_hoplimit; 1785 return (sizeof (int)); 1786 } 1787 break; 1788 } 1789 return (-1); 1790 } 1791 1792 /* 1793 * This routine retrieves the current status of socket options. 1794 * It returns the size of the option retrieved. 1795 */ 1796 static int 1797 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1798 { 1799 udp_t *udp = connp->conn_udp; 1800 udp_stack_t *us = udp->udp_us; 1801 int *i1 = (int *)ptr; 1802 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1803 int len; 1804 1805 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1806 switch (level) { 1807 case SOL_SOCKET: 1808 switch (name) { 1809 case SO_DEBUG: 1810 *i1 = udp->udp_debug; 1811 break; /* goto sizeof (int) option return */ 1812 case SO_REUSEADDR: 1813 *i1 = udp->udp_reuseaddr; 1814 break; /* goto sizeof (int) option return */ 1815 case SO_TYPE: 1816 *i1 = SOCK_DGRAM; 1817 break; /* goto sizeof (int) option return */ 1818 1819 /* 1820 * The following three items are available here, 1821 * but are only meaningful to IP. 1822 */ 1823 case SO_DONTROUTE: 1824 *i1 = udp->udp_dontroute; 1825 break; /* goto sizeof (int) option return */ 1826 case SO_USELOOPBACK: 1827 *i1 = udp->udp_useloopback; 1828 break; /* goto sizeof (int) option return */ 1829 case SO_BROADCAST: 1830 *i1 = udp->udp_broadcast; 1831 break; /* goto sizeof (int) option return */ 1832 1833 case SO_SNDBUF: 1834 *i1 = udp->udp_xmit_hiwat; 1835 break; /* goto sizeof (int) option return */ 1836 case SO_RCVBUF: 1837 *i1 = udp->udp_rcv_disply_hiwat; 1838 break; /* goto sizeof (int) option return */ 1839 case SO_DGRAM_ERRIND: 1840 *i1 = udp->udp_dgram_errind; 1841 break; /* goto sizeof (int) option return */ 1842 case SO_RECVUCRED: 1843 *i1 = udp->udp_recvucred; 1844 break; /* goto sizeof (int) option return */ 1845 case SO_TIMESTAMP: 1846 *i1 = udp->udp_timestamp; 1847 break; /* goto sizeof (int) option return */ 1848 case SO_ANON_MLP: 1849 *i1 = connp->conn_anon_mlp; 1850 break; /* goto sizeof (int) option return */ 1851 case SO_MAC_EXEMPT: 1852 *i1 = (connp->conn_mac_mode == CONN_MAC_AWARE); 1853 break; 1854 case SO_MAC_IMPLICIT: 1855 *i1 = (connp->conn_mac_mode == CONN_MAC_IMPLICIT); 1856 break; 1857 case SO_ALLZONES: 1858 *i1 = connp->conn_allzones; 1859 break; /* goto sizeof (int) option return */ 1860 case SO_EXCLBIND: 1861 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 1862 break; 1863 case SO_PROTOTYPE: 1864 *i1 = IPPROTO_UDP; 1865 break; 1866 case SO_DOMAIN: 1867 *i1 = udp->udp_family; 1868 break; 1869 default: 1870 return (-1); 1871 } 1872 break; 1873 case IPPROTO_IP: 1874 if (udp->udp_family != AF_INET) 1875 return (-1); 1876 switch (name) { 1877 case IP_OPTIONS: 1878 case T_IP_OPTIONS: 1879 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 1880 if (len > 0) { 1881 bcopy(udp->udp_ip_rcv_options + 1882 udp->udp_label_len, ptr, len); 1883 } 1884 return (len); 1885 case IP_TOS: 1886 case T_IP_TOS: 1887 *i1 = (int)udp->udp_type_of_service; 1888 break; /* goto sizeof (int) option return */ 1889 case IP_TTL: 1890 *i1 = (int)udp->udp_ttl; 1891 break; /* goto sizeof (int) option return */ 1892 case IP_DHCPINIT_IF: 1893 return (-EINVAL); 1894 case IP_NEXTHOP: 1895 case IP_RECVPKTINFO: 1896 /* 1897 * This also handles IP_PKTINFO. 1898 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 1899 * Differentiation is based on the size of the argument 1900 * passed in. 1901 * This option is handled in IP which will return an 1902 * error for IP_PKTINFO as it's not supported as a 1903 * sticky option. 1904 */ 1905 return (-EINVAL); 1906 case IP_MULTICAST_IF: 1907 /* 0 address if not set */ 1908 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 1909 return (sizeof (ipaddr_t)); 1910 case IP_MULTICAST_TTL: 1911 *(uchar_t *)ptr = udp->udp_multicast_ttl; 1912 return (sizeof (uchar_t)); 1913 case IP_MULTICAST_LOOP: 1914 *ptr = connp->conn_multicast_loop; 1915 return (sizeof (uint8_t)); 1916 case IP_RECVOPTS: 1917 *i1 = udp->udp_recvopts; 1918 break; /* goto sizeof (int) option return */ 1919 case IP_RECVDSTADDR: 1920 *i1 = udp->udp_recvdstaddr; 1921 break; /* goto sizeof (int) option return */ 1922 case IP_RECVIF: 1923 *i1 = udp->udp_recvif; 1924 break; /* goto sizeof (int) option return */ 1925 case IP_RECVSLLA: 1926 *i1 = udp->udp_recvslla; 1927 break; /* goto sizeof (int) option return */ 1928 case IP_RECVTTL: 1929 *i1 = udp->udp_recvttl; 1930 break; /* goto sizeof (int) option return */ 1931 case IP_ADD_MEMBERSHIP: 1932 case IP_DROP_MEMBERSHIP: 1933 case IP_BLOCK_SOURCE: 1934 case IP_UNBLOCK_SOURCE: 1935 case IP_ADD_SOURCE_MEMBERSHIP: 1936 case IP_DROP_SOURCE_MEMBERSHIP: 1937 case MCAST_JOIN_GROUP: 1938 case MCAST_LEAVE_GROUP: 1939 case MCAST_BLOCK_SOURCE: 1940 case MCAST_UNBLOCK_SOURCE: 1941 case MCAST_JOIN_SOURCE_GROUP: 1942 case MCAST_LEAVE_SOURCE_GROUP: 1943 /* cannot "get" the value for these */ 1944 return (-1); 1945 case IP_BOUND_IF: 1946 /* Zero if not set */ 1947 *i1 = udp->udp_bound_if; 1948 break; /* goto sizeof (int) option return */ 1949 case IP_UNSPEC_SRC: 1950 *i1 = udp->udp_unspec_source; 1951 break; /* goto sizeof (int) option return */ 1952 case IP_BROADCAST_TTL: 1953 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 1954 return (sizeof (uchar_t)); 1955 default: 1956 return (-1); 1957 } 1958 break; 1959 case IPPROTO_IPV6: 1960 if (udp->udp_family != AF_INET6) 1961 return (-1); 1962 switch (name) { 1963 case IPV6_UNICAST_HOPS: 1964 *i1 = (unsigned int)udp->udp_ttl; 1965 break; /* goto sizeof (int) option return */ 1966 case IPV6_MULTICAST_IF: 1967 /* 0 index if not set */ 1968 *i1 = udp->udp_multicast_if_index; 1969 break; /* goto sizeof (int) option return */ 1970 case IPV6_MULTICAST_HOPS: 1971 *i1 = udp->udp_multicast_ttl; 1972 break; /* goto sizeof (int) option return */ 1973 case IPV6_MULTICAST_LOOP: 1974 *i1 = connp->conn_multicast_loop; 1975 break; /* goto sizeof (int) option return */ 1976 case IPV6_JOIN_GROUP: 1977 case IPV6_LEAVE_GROUP: 1978 case MCAST_JOIN_GROUP: 1979 case MCAST_LEAVE_GROUP: 1980 case MCAST_BLOCK_SOURCE: 1981 case MCAST_UNBLOCK_SOURCE: 1982 case MCAST_JOIN_SOURCE_GROUP: 1983 case MCAST_LEAVE_SOURCE_GROUP: 1984 /* cannot "get" the value for these */ 1985 return (-1); 1986 case IPV6_BOUND_IF: 1987 /* Zero if not set */ 1988 *i1 = udp->udp_bound_if; 1989 break; /* goto sizeof (int) option return */ 1990 case IPV6_UNSPEC_SRC: 1991 *i1 = udp->udp_unspec_source; 1992 break; /* goto sizeof (int) option return */ 1993 case IPV6_RECVPKTINFO: 1994 *i1 = udp->udp_ip_recvpktinfo; 1995 break; /* goto sizeof (int) option return */ 1996 case IPV6_RECVTCLASS: 1997 *i1 = udp->udp_ipv6_recvtclass; 1998 break; /* goto sizeof (int) option return */ 1999 case IPV6_RECVPATHMTU: 2000 *i1 = udp->udp_ipv6_recvpathmtu; 2001 break; /* goto sizeof (int) option return */ 2002 case IPV6_RECVHOPLIMIT: 2003 *i1 = udp->udp_ipv6_recvhoplimit; 2004 break; /* goto sizeof (int) option return */ 2005 case IPV6_RECVHOPOPTS: 2006 *i1 = udp->udp_ipv6_recvhopopts; 2007 break; /* goto sizeof (int) option return */ 2008 case IPV6_RECVDSTOPTS: 2009 *i1 = udp->udp_ipv6_recvdstopts; 2010 break; /* goto sizeof (int) option return */ 2011 case _OLD_IPV6_RECVDSTOPTS: 2012 *i1 = udp->udp_old_ipv6_recvdstopts; 2013 break; /* goto sizeof (int) option return */ 2014 case IPV6_RECVRTHDRDSTOPTS: 2015 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2016 break; /* goto sizeof (int) option return */ 2017 case IPV6_RECVRTHDR: 2018 *i1 = udp->udp_ipv6_recvrthdr; 2019 break; /* goto sizeof (int) option return */ 2020 case IPV6_PKTINFO: { 2021 /* XXX assumes that caller has room for max size! */ 2022 struct in6_pktinfo *pkti; 2023 2024 pkti = (struct in6_pktinfo *)ptr; 2025 if (ipp->ipp_fields & IPPF_IFINDEX) 2026 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2027 else 2028 pkti->ipi6_ifindex = 0; 2029 if (ipp->ipp_fields & IPPF_ADDR) 2030 pkti->ipi6_addr = ipp->ipp_addr; 2031 else 2032 pkti->ipi6_addr = ipv6_all_zeros; 2033 return (sizeof (struct in6_pktinfo)); 2034 } 2035 case IPV6_TCLASS: 2036 if (ipp->ipp_fields & IPPF_TCLASS) 2037 *i1 = ipp->ipp_tclass; 2038 else 2039 *i1 = IPV6_FLOW_TCLASS( 2040 IPV6_DEFAULT_VERS_AND_FLOW); 2041 break; /* goto sizeof (int) option return */ 2042 case IPV6_NEXTHOP: { 2043 sin6_t *sin6 = (sin6_t *)ptr; 2044 2045 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2046 return (0); 2047 *sin6 = sin6_null; 2048 sin6->sin6_family = AF_INET6; 2049 sin6->sin6_addr = ipp->ipp_nexthop; 2050 return (sizeof (sin6_t)); 2051 } 2052 case IPV6_HOPOPTS: 2053 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2054 return (0); 2055 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2056 return (0); 2057 /* 2058 * The cipso/label option is added by kernel. 2059 * User is not usually aware of this option. 2060 * We copy out the hbh opt after the label option. 2061 */ 2062 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2063 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2064 if (udp->udp_label_len_v6 > 0) { 2065 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2066 ptr[1] = (ipp->ipp_hopoptslen - 2067 udp->udp_label_len_v6 + 7) / 8 - 1; 2068 } 2069 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2070 case IPV6_RTHDRDSTOPTS: 2071 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2072 return (0); 2073 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2074 return (ipp->ipp_rtdstoptslen); 2075 case IPV6_RTHDR: 2076 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2077 return (0); 2078 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2079 return (ipp->ipp_rthdrlen); 2080 case IPV6_DSTOPTS: 2081 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2082 return (0); 2083 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2084 return (ipp->ipp_dstoptslen); 2085 case IPV6_PATHMTU: 2086 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2087 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2088 us->us_netstack)); 2089 default: 2090 return (-1); 2091 } 2092 break; 2093 case IPPROTO_UDP: 2094 switch (name) { 2095 case UDP_ANONPRIVBIND: 2096 *i1 = udp->udp_anon_priv_bind; 2097 break; 2098 case UDP_EXCLBIND: 2099 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2100 break; 2101 case UDP_RCVHDR: 2102 *i1 = udp->udp_rcvhdr ? 1 : 0; 2103 break; 2104 case UDP_NAT_T_ENDPOINT: 2105 *i1 = udp->udp_nat_t_endpoint; 2106 break; 2107 default: 2108 return (-1); 2109 } 2110 break; 2111 default: 2112 return (-1); 2113 } 2114 return (sizeof (int)); 2115 } 2116 2117 int 2118 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2119 { 2120 udp_t *udp; 2121 int err; 2122 2123 udp = Q_TO_UDP(q); 2124 2125 rw_enter(&udp->udp_rwlock, RW_READER); 2126 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2127 rw_exit(&udp->udp_rwlock); 2128 return (err); 2129 } 2130 2131 /* 2132 * This routine sets socket options. 2133 */ 2134 /* ARGSUSED */ 2135 static int 2136 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2137 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2138 void *thisdg_attrs, boolean_t checkonly) 2139 { 2140 udpattrs_t *attrs = thisdg_attrs; 2141 int *i1 = (int *)invalp; 2142 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2143 udp_t *udp = connp->conn_udp; 2144 udp_stack_t *us = udp->udp_us; 2145 int error; 2146 uint_t newlen; 2147 size_t sth_wroff; 2148 2149 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2150 /* 2151 * For fixed length options, no sanity check 2152 * of passed in length is done. It is assumed *_optcom_req() 2153 * routines do the right thing. 2154 */ 2155 switch (level) { 2156 case SOL_SOCKET: 2157 switch (name) { 2158 case SO_REUSEADDR: 2159 if (!checkonly) { 2160 udp->udp_reuseaddr = onoff; 2161 PASS_OPT_TO_IP(connp); 2162 } 2163 break; 2164 case SO_DEBUG: 2165 if (!checkonly) 2166 udp->udp_debug = onoff; 2167 break; 2168 /* 2169 * The following three items are available here, 2170 * but are only meaningful to IP. 2171 */ 2172 case SO_DONTROUTE: 2173 if (!checkonly) { 2174 udp->udp_dontroute = onoff; 2175 PASS_OPT_TO_IP(connp); 2176 } 2177 break; 2178 case SO_USELOOPBACK: 2179 if (!checkonly) { 2180 udp->udp_useloopback = onoff; 2181 PASS_OPT_TO_IP(connp); 2182 } 2183 break; 2184 case SO_BROADCAST: 2185 if (!checkonly) { 2186 udp->udp_broadcast = onoff; 2187 PASS_OPT_TO_IP(connp); 2188 } 2189 break; 2190 2191 case SO_SNDBUF: 2192 if (*i1 > us->us_max_buf) { 2193 *outlenp = 0; 2194 return (ENOBUFS); 2195 } 2196 if (!checkonly) { 2197 udp->udp_xmit_hiwat = *i1; 2198 connp->conn_wq->q_hiwat = *i1; 2199 } 2200 break; 2201 case SO_RCVBUF: 2202 if (*i1 > us->us_max_buf) { 2203 *outlenp = 0; 2204 return (ENOBUFS); 2205 } 2206 if (!checkonly) { 2207 int size; 2208 2209 udp->udp_rcv_disply_hiwat = *i1; 2210 size = udp_set_rcv_hiwat(udp, *i1); 2211 rw_exit(&udp->udp_rwlock); 2212 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2213 size); 2214 rw_enter(&udp->udp_rwlock, RW_WRITER); 2215 } 2216 break; 2217 case SO_DGRAM_ERRIND: 2218 if (!checkonly) 2219 udp->udp_dgram_errind = onoff; 2220 break; 2221 case SO_RECVUCRED: 2222 if (!checkonly) 2223 udp->udp_recvucred = onoff; 2224 break; 2225 case SO_ALLZONES: 2226 /* 2227 * "soft" error (negative) 2228 * option not handled at this level 2229 * Do not modify *outlenp. 2230 */ 2231 return (-EINVAL); 2232 case SO_TIMESTAMP: 2233 if (!checkonly) 2234 udp->udp_timestamp = onoff; 2235 break; 2236 case SO_ANON_MLP: 2237 case SO_MAC_EXEMPT: 2238 case SO_MAC_IMPLICIT: 2239 PASS_OPT_TO_IP(connp); 2240 break; 2241 case SCM_UCRED: { 2242 struct ucred_s *ucr; 2243 cred_t *cr, *newcr; 2244 ts_label_t *tsl; 2245 2246 /* 2247 * Only sockets that have proper privileges and are 2248 * bound to MLPs will have any other value here, so 2249 * this implicitly tests for privilege to set label. 2250 */ 2251 if (connp->conn_mlp_type == mlptSingle) 2252 break; 2253 ucr = (struct ucred_s *)invalp; 2254 if (inlen != ucredsize || 2255 ucr->uc_labeloff < sizeof (*ucr) || 2256 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2257 return (EINVAL); 2258 if (!checkonly) { 2259 mblk_t *mb; 2260 pid_t cpid; 2261 2262 if (attrs == NULL || 2263 (mb = attrs->udpattr_mb) == NULL) 2264 return (EINVAL); 2265 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2266 cr = udp->udp_connp->conn_cred; 2267 ASSERT(cr != NULL); 2268 if ((tsl = crgetlabel(cr)) == NULL) 2269 return (EINVAL); 2270 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2271 tsl->tsl_doi, KM_NOSLEEP); 2272 if (newcr == NULL) 2273 return (ENOSR); 2274 mblk_setcred(mb, newcr, cpid); 2275 attrs->udpattr_credset = B_TRUE; 2276 crfree(newcr); 2277 } 2278 break; 2279 } 2280 case SO_EXCLBIND: 2281 if (!checkonly) 2282 udp->udp_exclbind = onoff; 2283 break; 2284 case SO_RCVTIMEO: 2285 case SO_SNDTIMEO: 2286 /* 2287 * Pass these two options in order for third part 2288 * protocol usage. Here just return directly. 2289 */ 2290 return (0); 2291 default: 2292 *outlenp = 0; 2293 return (EINVAL); 2294 } 2295 break; 2296 case IPPROTO_IP: 2297 if (udp->udp_family != AF_INET) { 2298 *outlenp = 0; 2299 return (ENOPROTOOPT); 2300 } 2301 switch (name) { 2302 case IP_OPTIONS: 2303 case T_IP_OPTIONS: 2304 /* Save options for use by IP. */ 2305 newlen = inlen + udp->udp_label_len; 2306 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2307 *outlenp = 0; 2308 return (EINVAL); 2309 } 2310 if (checkonly) 2311 break; 2312 2313 /* 2314 * Update the stored options taking into account 2315 * any CIPSO option which we should not overwrite. 2316 */ 2317 if (!tsol_option_set(&udp->udp_ip_snd_options, 2318 &udp->udp_ip_snd_options_len, 2319 udp->udp_label_len, invalp, inlen)) { 2320 *outlenp = 0; 2321 return (ENOMEM); 2322 } 2323 2324 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2325 UDPH_SIZE + udp->udp_ip_snd_options_len; 2326 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2327 rw_exit(&udp->udp_rwlock); 2328 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2329 sth_wroff); 2330 rw_enter(&udp->udp_rwlock, RW_WRITER); 2331 break; 2332 2333 case IP_TTL: 2334 if (!checkonly) { 2335 udp->udp_ttl = (uchar_t)*i1; 2336 } 2337 break; 2338 case IP_TOS: 2339 case T_IP_TOS: 2340 if (!checkonly) { 2341 udp->udp_type_of_service = (uchar_t)*i1; 2342 } 2343 break; 2344 case IP_MULTICAST_IF: { 2345 /* 2346 * TODO should check OPTMGMT reply and undo this if 2347 * there is an error. 2348 */ 2349 struct in_addr *inap = (struct in_addr *)invalp; 2350 if (!checkonly) { 2351 udp->udp_multicast_if_addr = 2352 inap->s_addr; 2353 PASS_OPT_TO_IP(connp); 2354 } 2355 break; 2356 } 2357 case IP_MULTICAST_TTL: 2358 if (!checkonly) 2359 udp->udp_multicast_ttl = *invalp; 2360 break; 2361 case IP_MULTICAST_LOOP: 2362 if (!checkonly) { 2363 connp->conn_multicast_loop = *invalp; 2364 PASS_OPT_TO_IP(connp); 2365 } 2366 break; 2367 case IP_RECVOPTS: 2368 if (!checkonly) 2369 udp->udp_recvopts = onoff; 2370 break; 2371 case IP_RECVDSTADDR: 2372 if (!checkonly) 2373 udp->udp_recvdstaddr = onoff; 2374 break; 2375 case IP_RECVIF: 2376 if (!checkonly) { 2377 udp->udp_recvif = onoff; 2378 PASS_OPT_TO_IP(connp); 2379 } 2380 break; 2381 case IP_RECVSLLA: 2382 if (!checkonly) { 2383 udp->udp_recvslla = onoff; 2384 PASS_OPT_TO_IP(connp); 2385 } 2386 break; 2387 case IP_RECVTTL: 2388 if (!checkonly) 2389 udp->udp_recvttl = onoff; 2390 break; 2391 case IP_PKTINFO: { 2392 /* 2393 * This also handles IP_RECVPKTINFO. 2394 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2395 * Differentiation is based on the size of the 2396 * argument passed in. 2397 */ 2398 struct in_pktinfo *pktinfop; 2399 ip4_pkt_t *attr_pktinfop; 2400 2401 if (checkonly) 2402 break; 2403 2404 if (inlen == sizeof (int)) { 2405 /* 2406 * This is IP_RECVPKTINFO option. 2407 * Keep a local copy of whether this option is 2408 * set or not and pass it down to IP for 2409 * processing. 2410 */ 2411 2412 udp->udp_ip_recvpktinfo = onoff; 2413 return (-EINVAL); 2414 } 2415 2416 if (attrs == NULL || 2417 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2418 /* 2419 * sticky option or no buffer to return 2420 * the results. 2421 */ 2422 return (EINVAL); 2423 } 2424 2425 if (inlen != sizeof (struct in_pktinfo)) 2426 return (EINVAL); 2427 2428 pktinfop = (struct in_pktinfo *)invalp; 2429 2430 /* 2431 * At least one of the values should be specified 2432 */ 2433 if (pktinfop->ipi_ifindex == 0 && 2434 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2435 return (EINVAL); 2436 } 2437 2438 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2439 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2440 2441 break; 2442 } 2443 case IP_ADD_MEMBERSHIP: 2444 case IP_DROP_MEMBERSHIP: 2445 case IP_BLOCK_SOURCE: 2446 case IP_UNBLOCK_SOURCE: 2447 case IP_ADD_SOURCE_MEMBERSHIP: 2448 case IP_DROP_SOURCE_MEMBERSHIP: 2449 case MCAST_JOIN_GROUP: 2450 case MCAST_LEAVE_GROUP: 2451 case MCAST_BLOCK_SOURCE: 2452 case MCAST_UNBLOCK_SOURCE: 2453 case MCAST_JOIN_SOURCE_GROUP: 2454 case MCAST_LEAVE_SOURCE_GROUP: 2455 case IP_SEC_OPT: 2456 case IP_NEXTHOP: 2457 case IP_DHCPINIT_IF: 2458 /* 2459 * "soft" error (negative) 2460 * option not handled at this level 2461 * Do not modify *outlenp. 2462 */ 2463 return (-EINVAL); 2464 case IP_BOUND_IF: 2465 if (!checkonly) { 2466 udp->udp_bound_if = *i1; 2467 PASS_OPT_TO_IP(connp); 2468 } 2469 break; 2470 case IP_UNSPEC_SRC: 2471 if (!checkonly) { 2472 udp->udp_unspec_source = onoff; 2473 PASS_OPT_TO_IP(connp); 2474 } 2475 break; 2476 case IP_BROADCAST_TTL: 2477 if (!checkonly) 2478 connp->conn_broadcast_ttl = *invalp; 2479 break; 2480 default: 2481 *outlenp = 0; 2482 return (EINVAL); 2483 } 2484 break; 2485 case IPPROTO_IPV6: { 2486 ip6_pkt_t *ipp; 2487 boolean_t sticky; 2488 2489 if (udp->udp_family != AF_INET6) { 2490 *outlenp = 0; 2491 return (ENOPROTOOPT); 2492 } 2493 /* 2494 * Deal with both sticky options and ancillary data 2495 */ 2496 sticky = B_FALSE; 2497 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2498 NULL) { 2499 /* sticky options, or none */ 2500 ipp = &udp->udp_sticky_ipp; 2501 sticky = B_TRUE; 2502 } 2503 2504 switch (name) { 2505 case IPV6_MULTICAST_IF: 2506 if (!checkonly) { 2507 udp->udp_multicast_if_index = *i1; 2508 PASS_OPT_TO_IP(connp); 2509 } 2510 break; 2511 case IPV6_UNICAST_HOPS: 2512 /* -1 means use default */ 2513 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2514 *outlenp = 0; 2515 return (EINVAL); 2516 } 2517 if (!checkonly) { 2518 if (*i1 == -1) { 2519 udp->udp_ttl = ipp->ipp_unicast_hops = 2520 us->us_ipv6_hoplimit; 2521 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2522 /* Pass modified value to IP. */ 2523 *i1 = udp->udp_ttl; 2524 } else { 2525 udp->udp_ttl = ipp->ipp_unicast_hops = 2526 (uint8_t)*i1; 2527 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2528 } 2529 /* Rebuild the header template */ 2530 error = udp_build_hdrs(udp); 2531 if (error != 0) { 2532 *outlenp = 0; 2533 return (error); 2534 } 2535 } 2536 break; 2537 case IPV6_MULTICAST_HOPS: 2538 /* -1 means use default */ 2539 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2540 *outlenp = 0; 2541 return (EINVAL); 2542 } 2543 if (!checkonly) { 2544 if (*i1 == -1) { 2545 udp->udp_multicast_ttl = 2546 ipp->ipp_multicast_hops = 2547 IP_DEFAULT_MULTICAST_TTL; 2548 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2549 /* Pass modified value to IP. */ 2550 *i1 = udp->udp_multicast_ttl; 2551 } else { 2552 udp->udp_multicast_ttl = 2553 ipp->ipp_multicast_hops = 2554 (uint8_t)*i1; 2555 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2556 } 2557 } 2558 break; 2559 case IPV6_MULTICAST_LOOP: 2560 if (*i1 != 0 && *i1 != 1) { 2561 *outlenp = 0; 2562 return (EINVAL); 2563 } 2564 if (!checkonly) { 2565 connp->conn_multicast_loop = *i1; 2566 PASS_OPT_TO_IP(connp); 2567 } 2568 break; 2569 case IPV6_JOIN_GROUP: 2570 case IPV6_LEAVE_GROUP: 2571 case MCAST_JOIN_GROUP: 2572 case MCAST_LEAVE_GROUP: 2573 case MCAST_BLOCK_SOURCE: 2574 case MCAST_UNBLOCK_SOURCE: 2575 case MCAST_JOIN_SOURCE_GROUP: 2576 case MCAST_LEAVE_SOURCE_GROUP: 2577 /* 2578 * "soft" error (negative) 2579 * option not handled at this level 2580 * Note: Do not modify *outlenp 2581 */ 2582 return (-EINVAL); 2583 case IPV6_BOUND_IF: 2584 if (!checkonly) { 2585 udp->udp_bound_if = *i1; 2586 PASS_OPT_TO_IP(connp); 2587 } 2588 break; 2589 case IPV6_UNSPEC_SRC: 2590 if (!checkonly) { 2591 udp->udp_unspec_source = onoff; 2592 PASS_OPT_TO_IP(connp); 2593 } 2594 break; 2595 /* 2596 * Set boolean switches for ancillary data delivery 2597 */ 2598 case IPV6_RECVPKTINFO: 2599 if (!checkonly) { 2600 udp->udp_ip_recvpktinfo = onoff; 2601 PASS_OPT_TO_IP(connp); 2602 } 2603 break; 2604 case IPV6_RECVTCLASS: 2605 if (!checkonly) { 2606 udp->udp_ipv6_recvtclass = onoff; 2607 PASS_OPT_TO_IP(connp); 2608 } 2609 break; 2610 case IPV6_RECVPATHMTU: 2611 if (!checkonly) { 2612 udp->udp_ipv6_recvpathmtu = onoff; 2613 PASS_OPT_TO_IP(connp); 2614 } 2615 break; 2616 case IPV6_RECVHOPLIMIT: 2617 if (!checkonly) { 2618 udp->udp_ipv6_recvhoplimit = onoff; 2619 PASS_OPT_TO_IP(connp); 2620 } 2621 break; 2622 case IPV6_RECVHOPOPTS: 2623 if (!checkonly) { 2624 udp->udp_ipv6_recvhopopts = onoff; 2625 PASS_OPT_TO_IP(connp); 2626 } 2627 break; 2628 case IPV6_RECVDSTOPTS: 2629 if (!checkonly) { 2630 udp->udp_ipv6_recvdstopts = onoff; 2631 PASS_OPT_TO_IP(connp); 2632 } 2633 break; 2634 case _OLD_IPV6_RECVDSTOPTS: 2635 if (!checkonly) 2636 udp->udp_old_ipv6_recvdstopts = onoff; 2637 break; 2638 case IPV6_RECVRTHDRDSTOPTS: 2639 if (!checkonly) { 2640 udp->udp_ipv6_recvrthdrdstopts = onoff; 2641 PASS_OPT_TO_IP(connp); 2642 } 2643 break; 2644 case IPV6_RECVRTHDR: 2645 if (!checkonly) { 2646 udp->udp_ipv6_recvrthdr = onoff; 2647 PASS_OPT_TO_IP(connp); 2648 } 2649 break; 2650 /* 2651 * Set sticky options or ancillary data. 2652 * If sticky options, (re)build any extension headers 2653 * that might be needed as a result. 2654 */ 2655 case IPV6_PKTINFO: 2656 /* 2657 * The source address and ifindex are verified 2658 * in ip_opt_set(). For ancillary data the 2659 * source address is checked in ip_wput_v6. 2660 */ 2661 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2662 return (EINVAL); 2663 if (checkonly) 2664 break; 2665 2666 if (inlen == 0) { 2667 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2668 ipp->ipp_sticky_ignored |= 2669 (IPPF_IFINDEX|IPPF_ADDR); 2670 } else { 2671 struct in6_pktinfo *pkti; 2672 2673 pkti = (struct in6_pktinfo *)invalp; 2674 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2675 ipp->ipp_addr = pkti->ipi6_addr; 2676 if (ipp->ipp_ifindex != 0) 2677 ipp->ipp_fields |= IPPF_IFINDEX; 2678 else 2679 ipp->ipp_fields &= ~IPPF_IFINDEX; 2680 if (!IN6_IS_ADDR_UNSPECIFIED( 2681 &ipp->ipp_addr)) 2682 ipp->ipp_fields |= IPPF_ADDR; 2683 else 2684 ipp->ipp_fields &= ~IPPF_ADDR; 2685 } 2686 if (sticky) { 2687 error = udp_build_hdrs(udp); 2688 if (error != 0) 2689 return (error); 2690 PASS_OPT_TO_IP(connp); 2691 } 2692 break; 2693 case IPV6_HOPLIMIT: 2694 if (sticky) 2695 return (EINVAL); 2696 if (inlen != 0 && inlen != sizeof (int)) 2697 return (EINVAL); 2698 if (checkonly) 2699 break; 2700 2701 if (inlen == 0) { 2702 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2703 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2704 } else { 2705 if (*i1 > 255 || *i1 < -1) 2706 return (EINVAL); 2707 if (*i1 == -1) 2708 ipp->ipp_hoplimit = 2709 us->us_ipv6_hoplimit; 2710 else 2711 ipp->ipp_hoplimit = *i1; 2712 ipp->ipp_fields |= IPPF_HOPLIMIT; 2713 } 2714 break; 2715 case IPV6_TCLASS: 2716 if (inlen != 0 && inlen != sizeof (int)) 2717 return (EINVAL); 2718 if (checkonly) 2719 break; 2720 2721 if (inlen == 0) { 2722 ipp->ipp_fields &= ~IPPF_TCLASS; 2723 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2724 } else { 2725 if (*i1 > 255 || *i1 < -1) 2726 return (EINVAL); 2727 if (*i1 == -1) 2728 ipp->ipp_tclass = 0; 2729 else 2730 ipp->ipp_tclass = *i1; 2731 ipp->ipp_fields |= IPPF_TCLASS; 2732 } 2733 if (sticky) { 2734 error = udp_build_hdrs(udp); 2735 if (error != 0) 2736 return (error); 2737 } 2738 break; 2739 case IPV6_NEXTHOP: 2740 /* 2741 * IP will verify that the nexthop is reachable 2742 * and fail for sticky options. 2743 */ 2744 if (inlen != 0 && inlen != sizeof (sin6_t)) 2745 return (EINVAL); 2746 if (checkonly) 2747 break; 2748 2749 if (inlen == 0) { 2750 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2751 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2752 } else { 2753 sin6_t *sin6 = (sin6_t *)invalp; 2754 2755 if (sin6->sin6_family != AF_INET6) { 2756 return (EAFNOSUPPORT); 2757 } 2758 if (IN6_IS_ADDR_V4MAPPED( 2759 &sin6->sin6_addr)) 2760 return (EADDRNOTAVAIL); 2761 ipp->ipp_nexthop = sin6->sin6_addr; 2762 if (!IN6_IS_ADDR_UNSPECIFIED( 2763 &ipp->ipp_nexthop)) 2764 ipp->ipp_fields |= IPPF_NEXTHOP; 2765 else 2766 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2767 } 2768 if (sticky) { 2769 error = udp_build_hdrs(udp); 2770 if (error != 0) 2771 return (error); 2772 PASS_OPT_TO_IP(connp); 2773 } 2774 break; 2775 case IPV6_HOPOPTS: { 2776 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2777 /* 2778 * Sanity checks - minimum size, size a multiple of 2779 * eight bytes, and matching size passed in. 2780 */ 2781 if (inlen != 0 && 2782 inlen != (8 * (hopts->ip6h_len + 1))) 2783 return (EINVAL); 2784 2785 if (checkonly) 2786 break; 2787 2788 error = optcom_pkt_set(invalp, inlen, sticky, 2789 (uchar_t **)&ipp->ipp_hopopts, 2790 &ipp->ipp_hopoptslen, 2791 sticky ? udp->udp_label_len_v6 : 0); 2792 if (error != 0) 2793 return (error); 2794 if (ipp->ipp_hopoptslen == 0) { 2795 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2796 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2797 } else { 2798 ipp->ipp_fields |= IPPF_HOPOPTS; 2799 } 2800 if (sticky) { 2801 error = udp_build_hdrs(udp); 2802 if (error != 0) 2803 return (error); 2804 } 2805 break; 2806 } 2807 case IPV6_RTHDRDSTOPTS: { 2808 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2809 2810 /* 2811 * Sanity checks - minimum size, size a multiple of 2812 * eight bytes, and matching size passed in. 2813 */ 2814 if (inlen != 0 && 2815 inlen != (8 * (dopts->ip6d_len + 1))) 2816 return (EINVAL); 2817 2818 if (checkonly) 2819 break; 2820 2821 if (inlen == 0) { 2822 if (sticky && 2823 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2824 kmem_free(ipp->ipp_rtdstopts, 2825 ipp->ipp_rtdstoptslen); 2826 ipp->ipp_rtdstopts = NULL; 2827 ipp->ipp_rtdstoptslen = 0; 2828 } 2829 2830 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2831 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2832 } else { 2833 error = optcom_pkt_set(invalp, inlen, sticky, 2834 (uchar_t **)&ipp->ipp_rtdstopts, 2835 &ipp->ipp_rtdstoptslen, 0); 2836 if (error != 0) 2837 return (error); 2838 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2839 } 2840 if (sticky) { 2841 error = udp_build_hdrs(udp); 2842 if (error != 0) 2843 return (error); 2844 } 2845 break; 2846 } 2847 case IPV6_DSTOPTS: { 2848 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2849 2850 /* 2851 * Sanity checks - minimum size, size a multiple of 2852 * eight bytes, and matching size passed in. 2853 */ 2854 if (inlen != 0 && 2855 inlen != (8 * (dopts->ip6d_len + 1))) 2856 return (EINVAL); 2857 2858 if (checkonly) 2859 break; 2860 2861 if (inlen == 0) { 2862 if (sticky && 2863 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 2864 kmem_free(ipp->ipp_dstopts, 2865 ipp->ipp_dstoptslen); 2866 ipp->ipp_dstopts = NULL; 2867 ipp->ipp_dstoptslen = 0; 2868 } 2869 ipp->ipp_fields &= ~IPPF_DSTOPTS; 2870 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 2871 } else { 2872 error = optcom_pkt_set(invalp, inlen, sticky, 2873 (uchar_t **)&ipp->ipp_dstopts, 2874 &ipp->ipp_dstoptslen, 0); 2875 if (error != 0) 2876 return (error); 2877 ipp->ipp_fields |= IPPF_DSTOPTS; 2878 } 2879 if (sticky) { 2880 error = udp_build_hdrs(udp); 2881 if (error != 0) 2882 return (error); 2883 } 2884 break; 2885 } 2886 case IPV6_RTHDR: { 2887 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 2888 2889 /* 2890 * Sanity checks - minimum size, size a multiple of 2891 * eight bytes, and matching size passed in. 2892 */ 2893 if (inlen != 0 && 2894 inlen != (8 * (rt->ip6r_len + 1))) 2895 return (EINVAL); 2896 2897 if (checkonly) 2898 break; 2899 2900 if (inlen == 0) { 2901 if (sticky && 2902 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 2903 kmem_free(ipp->ipp_rthdr, 2904 ipp->ipp_rthdrlen); 2905 ipp->ipp_rthdr = NULL; 2906 ipp->ipp_rthdrlen = 0; 2907 } 2908 ipp->ipp_fields &= ~IPPF_RTHDR; 2909 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 2910 } else { 2911 error = optcom_pkt_set(invalp, inlen, sticky, 2912 (uchar_t **)&ipp->ipp_rthdr, 2913 &ipp->ipp_rthdrlen, 0); 2914 if (error != 0) 2915 return (error); 2916 ipp->ipp_fields |= IPPF_RTHDR; 2917 } 2918 if (sticky) { 2919 error = udp_build_hdrs(udp); 2920 if (error != 0) 2921 return (error); 2922 } 2923 break; 2924 } 2925 2926 case IPV6_DONTFRAG: 2927 if (checkonly) 2928 break; 2929 2930 if (onoff) { 2931 ipp->ipp_fields |= IPPF_DONTFRAG; 2932 } else { 2933 ipp->ipp_fields &= ~IPPF_DONTFRAG; 2934 } 2935 break; 2936 2937 case IPV6_USE_MIN_MTU: 2938 if (inlen != sizeof (int)) 2939 return (EINVAL); 2940 2941 if (*i1 < -1 || *i1 > 1) 2942 return (EINVAL); 2943 2944 if (checkonly) 2945 break; 2946 2947 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 2948 ipp->ipp_use_min_mtu = *i1; 2949 break; 2950 2951 case IPV6_SEC_OPT: 2952 case IPV6_SRC_PREFERENCES: 2953 case IPV6_V6ONLY: 2954 /* Handled at the IP level */ 2955 return (-EINVAL); 2956 default: 2957 *outlenp = 0; 2958 return (EINVAL); 2959 } 2960 break; 2961 } /* end IPPROTO_IPV6 */ 2962 case IPPROTO_UDP: 2963 switch (name) { 2964 case UDP_ANONPRIVBIND: 2965 if ((error = secpolicy_net_privaddr(cr, 0, 2966 IPPROTO_UDP)) != 0) { 2967 *outlenp = 0; 2968 return (error); 2969 } 2970 if (!checkonly) { 2971 udp->udp_anon_priv_bind = onoff; 2972 } 2973 break; 2974 case UDP_EXCLBIND: 2975 if (!checkonly) 2976 udp->udp_exclbind = onoff; 2977 break; 2978 case UDP_RCVHDR: 2979 if (!checkonly) 2980 udp->udp_rcvhdr = onoff; 2981 break; 2982 case UDP_NAT_T_ENDPOINT: 2983 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 2984 *outlenp = 0; 2985 return (error); 2986 } 2987 2988 /* 2989 * Use udp_family instead so we can avoid ambiguitites 2990 * with AF_INET6 sockets that may switch from IPv4 2991 * to IPv6. 2992 */ 2993 if (udp->udp_family != AF_INET) { 2994 *outlenp = 0; 2995 return (EAFNOSUPPORT); 2996 } 2997 2998 if (!checkonly) { 2999 int size; 3000 3001 udp->udp_nat_t_endpoint = onoff; 3002 3003 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3004 UDPH_SIZE + udp->udp_ip_snd_options_len; 3005 3006 /* Also, adjust wroff */ 3007 if (onoff) { 3008 udp->udp_max_hdr_len += 3009 sizeof (uint32_t); 3010 } 3011 size = udp->udp_max_hdr_len + 3012 us->us_wroff_extra; 3013 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3014 size); 3015 } 3016 break; 3017 default: 3018 *outlenp = 0; 3019 return (EINVAL); 3020 } 3021 break; 3022 default: 3023 *outlenp = 0; 3024 return (EINVAL); 3025 } 3026 /* 3027 * Common case of OK return with outval same as inval. 3028 */ 3029 if (invalp != outvalp) { 3030 /* don't trust bcopy for identical src/dst */ 3031 (void) bcopy(invalp, outvalp, inlen); 3032 } 3033 *outlenp = inlen; 3034 return (0); 3035 } 3036 3037 int 3038 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3039 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3040 void *thisdg_attrs, cred_t *cr) 3041 { 3042 int error; 3043 boolean_t checkonly; 3044 3045 error = 0; 3046 switch (optset_context) { 3047 case SETFN_OPTCOM_CHECKONLY: 3048 checkonly = B_TRUE; 3049 /* 3050 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3051 * inlen != 0 implies value supplied and 3052 * we have to "pretend" to set it. 3053 * inlen == 0 implies that there is no 3054 * value part in T_CHECK request and just validation 3055 * done elsewhere should be enough, we just return here. 3056 */ 3057 if (inlen == 0) { 3058 *outlenp = 0; 3059 goto done; 3060 } 3061 break; 3062 case SETFN_OPTCOM_NEGOTIATE: 3063 checkonly = B_FALSE; 3064 break; 3065 case SETFN_UD_NEGOTIATE: 3066 case SETFN_CONN_NEGOTIATE: 3067 checkonly = B_FALSE; 3068 /* 3069 * Negotiating local and "association-related" options 3070 * through T_UNITDATA_REQ. 3071 * 3072 * Following routine can filter out ones we do not 3073 * want to be "set" this way. 3074 */ 3075 if (!udp_opt_allow_udr_set(level, name)) { 3076 *outlenp = 0; 3077 error = EINVAL; 3078 goto done; 3079 } 3080 break; 3081 default: 3082 /* 3083 * We should never get here 3084 */ 3085 *outlenp = 0; 3086 error = EINVAL; 3087 goto done; 3088 } 3089 3090 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3091 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3092 3093 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3094 outvalp, cr, thisdg_attrs, checkonly); 3095 done: 3096 return (error); 3097 } 3098 3099 /* ARGSUSED */ 3100 int 3101 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3102 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3103 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3104 { 3105 conn_t *connp = Q_TO_CONN(q); 3106 int error; 3107 udp_t *udp = connp->conn_udp; 3108 3109 rw_enter(&udp->udp_rwlock, RW_WRITER); 3110 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3111 outlenp, outvalp, thisdg_attrs, cr); 3112 rw_exit(&udp->udp_rwlock); 3113 return (error); 3114 } 3115 3116 /* 3117 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3118 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3119 * headers, and the udp header. 3120 * Returns failure if can't allocate memory. 3121 */ 3122 static int 3123 udp_build_hdrs(udp_t *udp) 3124 { 3125 udp_stack_t *us = udp->udp_us; 3126 uchar_t *hdrs; 3127 uint_t hdrs_len; 3128 ip6_t *ip6h; 3129 ip6i_t *ip6i; 3130 udpha_t *udpha; 3131 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3132 size_t sth_wroff; 3133 conn_t *connp = udp->udp_connp; 3134 3135 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3136 ASSERT(connp != NULL); 3137 3138 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3139 ASSERT(hdrs_len != 0); 3140 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3141 /* Need to reallocate */ 3142 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3143 if (hdrs == NULL) 3144 return (ENOMEM); 3145 3146 if (udp->udp_sticky_hdrs_len != 0) { 3147 kmem_free(udp->udp_sticky_hdrs, 3148 udp->udp_sticky_hdrs_len); 3149 } 3150 udp->udp_sticky_hdrs = hdrs; 3151 udp->udp_sticky_hdrs_len = hdrs_len; 3152 } 3153 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3154 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3155 3156 /* Set header fields not in ipp */ 3157 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3158 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3159 ip6h = (ip6_t *)&ip6i[1]; 3160 } else { 3161 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3162 } 3163 3164 if (!(ipp->ipp_fields & IPPF_ADDR)) 3165 ip6h->ip6_src = udp->udp_v6src; 3166 3167 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3168 udpha->uha_src_port = udp->udp_port; 3169 3170 /* Try to get everything in a single mblk */ 3171 if (hdrs_len > udp->udp_max_hdr_len) { 3172 udp->udp_max_hdr_len = hdrs_len; 3173 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3174 rw_exit(&udp->udp_rwlock); 3175 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3176 udp->udp_connp, sth_wroff); 3177 rw_enter(&udp->udp_rwlock, RW_WRITER); 3178 } 3179 return (0); 3180 } 3181 3182 /* 3183 * This routine retrieves the value of an ND variable in a udpparam_t 3184 * structure. It is called through nd_getset when a user reads the 3185 * variable. 3186 */ 3187 /* ARGSUSED */ 3188 static int 3189 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3190 { 3191 udpparam_t *udppa = (udpparam_t *)cp; 3192 3193 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3194 return (0); 3195 } 3196 3197 /* 3198 * Walk through the param array specified registering each element with the 3199 * named dispatch (ND) handler. 3200 */ 3201 static boolean_t 3202 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3203 { 3204 for (; cnt-- > 0; udppa++) { 3205 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3206 if (!nd_load(ndp, udppa->udp_param_name, 3207 udp_param_get, udp_param_set, 3208 (caddr_t)udppa)) { 3209 nd_free(ndp); 3210 return (B_FALSE); 3211 } 3212 } 3213 } 3214 if (!nd_load(ndp, "udp_extra_priv_ports", 3215 udp_extra_priv_ports_get, NULL, NULL)) { 3216 nd_free(ndp); 3217 return (B_FALSE); 3218 } 3219 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3220 NULL, udp_extra_priv_ports_add, NULL)) { 3221 nd_free(ndp); 3222 return (B_FALSE); 3223 } 3224 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3225 NULL, udp_extra_priv_ports_del, NULL)) { 3226 nd_free(ndp); 3227 return (B_FALSE); 3228 } 3229 return (B_TRUE); 3230 } 3231 3232 /* This routine sets an ND variable in a udpparam_t structure. */ 3233 /* ARGSUSED */ 3234 static int 3235 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3236 { 3237 long new_value; 3238 udpparam_t *udppa = (udpparam_t *)cp; 3239 3240 /* 3241 * Fail the request if the new value does not lie within the 3242 * required bounds. 3243 */ 3244 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3245 new_value < udppa->udp_param_min || 3246 new_value > udppa->udp_param_max) { 3247 return (EINVAL); 3248 } 3249 3250 /* Set the new value */ 3251 udppa->udp_param_value = new_value; 3252 return (0); 3253 } 3254 3255 /* 3256 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3257 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3258 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3259 * then it's assumed to be allocated to be large enough. 3260 * 3261 * Returns zero if trimming of the security option causes all options to go 3262 * away. 3263 */ 3264 static size_t 3265 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3266 { 3267 struct T_opthdr *toh; 3268 size_t hol = ipp->ipp_hopoptslen; 3269 ip6_hbh_t *dstopt = NULL; 3270 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3271 size_t tlen, olen, plen; 3272 boolean_t deleting; 3273 const struct ip6_opt *sopt, *lastpad; 3274 struct ip6_opt *dopt; 3275 3276 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3277 toh->level = IPPROTO_IPV6; 3278 toh->name = IPV6_HOPOPTS; 3279 toh->status = 0; 3280 dstopt = (ip6_hbh_t *)(toh + 1); 3281 } 3282 3283 /* 3284 * If labeling is enabled, then skip the label option 3285 * but get other options if there are any. 3286 */ 3287 if (is_system_labeled()) { 3288 dopt = NULL; 3289 if (dstopt != NULL) { 3290 /* will fill in ip6h_len later */ 3291 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3292 dopt = (struct ip6_opt *)(dstopt + 1); 3293 } 3294 sopt = (const struct ip6_opt *)(srcopt + 1); 3295 hol -= sizeof (*srcopt); 3296 tlen = sizeof (*dstopt); 3297 lastpad = NULL; 3298 deleting = B_FALSE; 3299 /* 3300 * This loop finds the first (lastpad pointer) of any number of 3301 * pads that preceeds the security option, then treats the 3302 * security option as though it were a pad, and then finds the 3303 * next non-pad option (or end of list). 3304 * 3305 * It then treats the entire block as one big pad. To preserve 3306 * alignment of any options that follow, or just the end of the 3307 * list, it computes a minimal new padding size that keeps the 3308 * same alignment for the next option. 3309 * 3310 * If it encounters just a sequence of pads with no security 3311 * option, those are copied as-is rather than collapsed. 3312 * 3313 * Note that to handle the end of list case, the code makes one 3314 * loop with 'hol' set to zero. 3315 */ 3316 for (;;) { 3317 if (hol > 0) { 3318 if (sopt->ip6o_type == IP6OPT_PAD1) { 3319 if (lastpad == NULL) 3320 lastpad = sopt; 3321 sopt = (const struct ip6_opt *) 3322 &sopt->ip6o_len; 3323 hol--; 3324 continue; 3325 } 3326 olen = sopt->ip6o_len + sizeof (*sopt); 3327 if (olen > hol) 3328 olen = hol; 3329 if (sopt->ip6o_type == IP6OPT_PADN || 3330 sopt->ip6o_type == ip6opt_ls) { 3331 if (sopt->ip6o_type == ip6opt_ls) 3332 deleting = B_TRUE; 3333 if (lastpad == NULL) 3334 lastpad = sopt; 3335 sopt = (const struct ip6_opt *) 3336 ((const char *)sopt + olen); 3337 hol -= olen; 3338 continue; 3339 } 3340 } else { 3341 /* if nothing was copied at all, then delete */ 3342 if (tlen == sizeof (*dstopt)) 3343 return (0); 3344 /* last pass; pick up any trailing padding */ 3345 olen = 0; 3346 } 3347 if (deleting) { 3348 /* 3349 * compute aligning effect of deleted material 3350 * to reproduce with pad. 3351 */ 3352 plen = ((const char *)sopt - 3353 (const char *)lastpad) & 7; 3354 tlen += plen; 3355 if (dopt != NULL) { 3356 if (plen == 1) { 3357 dopt->ip6o_type = IP6OPT_PAD1; 3358 } else if (plen > 1) { 3359 plen -= sizeof (*dopt); 3360 dopt->ip6o_type = IP6OPT_PADN; 3361 dopt->ip6o_len = plen; 3362 if (plen > 0) 3363 bzero(dopt + 1, plen); 3364 } 3365 dopt = (struct ip6_opt *) 3366 ((char *)dopt + plen); 3367 } 3368 deleting = B_FALSE; 3369 lastpad = NULL; 3370 } 3371 /* if there's uncopied padding, then copy that now */ 3372 if (lastpad != NULL) { 3373 olen += (const char *)sopt - 3374 (const char *)lastpad; 3375 sopt = lastpad; 3376 lastpad = NULL; 3377 } 3378 if (dopt != NULL && olen > 0) { 3379 bcopy(sopt, dopt, olen); 3380 dopt = (struct ip6_opt *)((char *)dopt + olen); 3381 } 3382 if (hol == 0) 3383 break; 3384 tlen += olen; 3385 sopt = (const struct ip6_opt *) 3386 ((const char *)sopt + olen); 3387 hol -= olen; 3388 } 3389 /* go back and patch up the length value, rounded upward */ 3390 if (dstopt != NULL) 3391 dstopt->ip6h_len = (tlen - 1) >> 3; 3392 } else { 3393 tlen = hol; 3394 if (dstopt != NULL) 3395 bcopy(srcopt, dstopt, hol); 3396 } 3397 3398 tlen += sizeof (*toh); 3399 if (toh != NULL) 3400 toh->len = tlen; 3401 3402 return (tlen); 3403 } 3404 3405 /* 3406 * Update udp_rcv_opt_len from the packet. 3407 * Called when options received, and when no options received but 3408 * udp_ip_recv_opt_len has previously recorded options. 3409 */ 3410 static void 3411 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3412 { 3413 /* Save the options if any */ 3414 if (opt_len > 0) { 3415 if (opt_len > udp->udp_ip_rcv_options_len) { 3416 /* Need to allocate larger buffer */ 3417 if (udp->udp_ip_rcv_options_len != 0) 3418 mi_free((char *)udp->udp_ip_rcv_options); 3419 udp->udp_ip_rcv_options_len = 0; 3420 udp->udp_ip_rcv_options = 3421 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3422 if (udp->udp_ip_rcv_options != NULL) 3423 udp->udp_ip_rcv_options_len = opt_len; 3424 } 3425 if (udp->udp_ip_rcv_options_len != 0) { 3426 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3427 /* Adjust length if we are resusing the space */ 3428 udp->udp_ip_rcv_options_len = opt_len; 3429 } 3430 } else if (udp->udp_ip_rcv_options_len != 0) { 3431 /* Clear out previously recorded options */ 3432 mi_free((char *)udp->udp_ip_rcv_options); 3433 udp->udp_ip_rcv_options = NULL; 3434 udp->udp_ip_rcv_options_len = 0; 3435 } 3436 } 3437 3438 static mblk_t * 3439 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3440 { 3441 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3442 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3443 /* 3444 * fallback has started but messages have not been moved yet 3445 */ 3446 if (udp->udp_fallback_queue_head == NULL) { 3447 ASSERT(udp->udp_fallback_queue_tail == NULL); 3448 udp->udp_fallback_queue_head = mp; 3449 udp->udp_fallback_queue_tail = mp; 3450 } else { 3451 ASSERT(udp->udp_fallback_queue_tail != NULL); 3452 udp->udp_fallback_queue_tail->b_next = mp; 3453 udp->udp_fallback_queue_tail = mp; 3454 } 3455 return (NULL); 3456 } else { 3457 /* 3458 * Fallback completed, let the caller putnext() the mblk. 3459 */ 3460 return (mp); 3461 } 3462 } 3463 3464 /* 3465 * Deliver data to ULP. In case we have a socket, and it's falling back to 3466 * TPI, then we'll queue the mp for later processing. 3467 */ 3468 static void 3469 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3470 { 3471 if (IPCL_IS_NONSTR(connp)) { 3472 udp_t *udp = connp->conn_udp; 3473 int error; 3474 3475 if ((*connp->conn_upcalls->su_recv) 3476 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3477 NULL) < 0) { 3478 mutex_enter(&udp->udp_recv_lock); 3479 if (error == ENOSPC) { 3480 /* 3481 * let's confirm while holding the lock 3482 */ 3483 if ((*connp->conn_upcalls->su_recv) 3484 (connp->conn_upper_handle, NULL, 0, 0, 3485 &error, NULL) < 0) { 3486 ASSERT(error == ENOSPC); 3487 if (error == ENOSPC) { 3488 connp->conn_flow_cntrld = 3489 B_TRUE; 3490 } 3491 } 3492 mutex_exit(&udp->udp_recv_lock); 3493 } else { 3494 ASSERT(error == EOPNOTSUPP); 3495 mp = udp_queue_fallback(udp, mp); 3496 mutex_exit(&udp->udp_recv_lock); 3497 if (mp != NULL) 3498 putnext(connp->conn_rq, mp); 3499 } 3500 } 3501 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3502 } else { 3503 putnext(connp->conn_rq, mp); 3504 } 3505 } 3506 3507 /* ARGSUSED2 */ 3508 static void 3509 udp_input(void *arg1, mblk_t *mp, void *arg2) 3510 { 3511 conn_t *connp = (conn_t *)arg1; 3512 struct T_unitdata_ind *tudi; 3513 uchar_t *rptr; /* Pointer to IP header */ 3514 int hdr_length; /* Length of IP+UDP headers */ 3515 int opt_len; 3516 int udi_size; /* Size of T_unitdata_ind */ 3517 int mp_len; 3518 udp_t *udp; 3519 udpha_t *udpha; 3520 int ipversion; 3521 ip6_pkt_t ipp; 3522 ip6_t *ip6h; 3523 ip6i_t *ip6i; 3524 mblk_t *mp1; 3525 mblk_t *options_mp = NULL; 3526 ip_pktinfo_t *pinfo = NULL; 3527 cred_t *cr = NULL; 3528 pid_t cpid; 3529 uint32_t udp_ip_rcv_options_len; 3530 udp_bits_t udp_bits; 3531 cred_t *rcr = connp->conn_cred; 3532 udp_stack_t *us; 3533 3534 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3535 3536 udp = connp->conn_udp; 3537 us = udp->udp_us; 3538 rptr = mp->b_rptr; 3539 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3540 ASSERT(OK_32PTR(rptr)); 3541 3542 /* 3543 * IP should have prepended the options data in an M_CTL 3544 * Check M_CTL "type" to make sure are not here bcos of 3545 * a valid ICMP message 3546 */ 3547 if (DB_TYPE(mp) == M_CTL) { 3548 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3549 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3550 IN_PKTINFO) { 3551 /* 3552 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3553 * has been prepended to the packet by IP. We need to 3554 * extract the mblk and adjust the rptr 3555 */ 3556 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3557 options_mp = mp; 3558 mp = mp->b_cont; 3559 rptr = mp->b_rptr; 3560 UDP_STAT(us, udp_in_pktinfo); 3561 } else { 3562 /* 3563 * ICMP messages. 3564 */ 3565 udp_icmp_error(connp, mp); 3566 return; 3567 } 3568 } 3569 3570 mp_len = msgdsize(mp); 3571 /* 3572 * This is the inbound data path. 3573 * First, we check to make sure the IP version number is correct, 3574 * and then pull the IP and UDP headers into the first mblk. 3575 */ 3576 3577 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3578 ipp.ipp_fields = 0; 3579 3580 ipversion = IPH_HDR_VERSION(rptr); 3581 3582 rw_enter(&udp->udp_rwlock, RW_READER); 3583 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3584 udp_bits = udp->udp_bits; 3585 rw_exit(&udp->udp_rwlock); 3586 3587 switch (ipversion) { 3588 case IPV4_VERSION: 3589 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3590 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3591 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3592 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3593 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3594 udp->udp_family == AF_INET) { 3595 /* 3596 * Record/update udp_ip_rcv_options with the lock 3597 * held. Not needed for AF_INET6 sockets 3598 * since they don't support a getsockopt of IP_OPTIONS. 3599 */ 3600 rw_enter(&udp->udp_rwlock, RW_WRITER); 3601 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3602 opt_len); 3603 rw_exit(&udp->udp_rwlock); 3604 } 3605 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3606 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3607 udp->udp_ip_recvpktinfo) { 3608 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3609 ipp.ipp_fields |= IPPF_IFINDEX; 3610 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3611 } 3612 } 3613 break; 3614 case IPV6_VERSION: 3615 /* 3616 * IPv6 packets can only be received by applications 3617 * that are prepared to receive IPv6 addresses. 3618 * The IP fanout must ensure this. 3619 */ 3620 ASSERT(udp->udp_family == AF_INET6); 3621 3622 ip6h = (ip6_t *)rptr; 3623 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3624 3625 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3626 uint8_t nexthdrp; 3627 /* Look for ifindex information */ 3628 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3629 ip6i = (ip6i_t *)ip6h; 3630 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3631 goto tossit; 3632 3633 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3634 ASSERT(ip6i->ip6i_ifindex != 0); 3635 ipp.ipp_fields |= IPPF_IFINDEX; 3636 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3637 } 3638 rptr = (uchar_t *)&ip6i[1]; 3639 mp->b_rptr = rptr; 3640 if (rptr == mp->b_wptr) { 3641 mp1 = mp->b_cont; 3642 freeb(mp); 3643 mp = mp1; 3644 rptr = mp->b_rptr; 3645 } 3646 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3647 goto tossit; 3648 ip6h = (ip6_t *)rptr; 3649 mp_len = msgdsize(mp); 3650 } 3651 /* 3652 * Find any potentially interesting extension headers 3653 * as well as the length of the IPv6 + extension 3654 * headers. 3655 */ 3656 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3657 UDPH_SIZE; 3658 ASSERT(nexthdrp == IPPROTO_UDP); 3659 } else { 3660 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3661 ip6i = NULL; 3662 } 3663 break; 3664 default: 3665 ASSERT(0); 3666 } 3667 3668 /* 3669 * IP inspected the UDP header thus all of it must be in the mblk. 3670 * UDP length check is performed for IPv6 packets and IPv4 packets 3671 * to check if the size of the packet as specified 3672 * by the header is the same as the physical size of the packet. 3673 * FIXME? Didn't IP already check this? 3674 */ 3675 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3676 if ((MBLKL(mp) < hdr_length) || 3677 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3678 goto tossit; 3679 } 3680 3681 3682 /* Walk past the headers unless UDP_RCVHDR was set. */ 3683 if (!udp_bits.udpb_rcvhdr) { 3684 mp->b_rptr = rptr + hdr_length; 3685 mp_len -= hdr_length; 3686 } 3687 3688 /* 3689 * This is the inbound data path. Packets are passed upstream as 3690 * T_UNITDATA_IND messages with full IP headers still attached. 3691 */ 3692 if (udp->udp_family == AF_INET) { 3693 sin_t *sin; 3694 3695 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3696 3697 /* 3698 * Normally only send up the source address. 3699 * If IP_RECVDSTADDR is set we include the destination IP 3700 * address as an option. With IP_RECVOPTS we include all 3701 * the IP options. 3702 */ 3703 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3704 if (udp_bits.udpb_recvdstaddr) { 3705 udi_size += sizeof (struct T_opthdr) + 3706 sizeof (struct in_addr); 3707 UDP_STAT(us, udp_in_recvdstaddr); 3708 } 3709 3710 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3711 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3712 udi_size += sizeof (struct T_opthdr) + 3713 sizeof (struct in_pktinfo); 3714 UDP_STAT(us, udp_ip_rcvpktinfo); 3715 } 3716 3717 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3718 udi_size += sizeof (struct T_opthdr) + opt_len; 3719 UDP_STAT(us, udp_in_recvopts); 3720 } 3721 3722 /* 3723 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3724 * space accordingly 3725 */ 3726 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3727 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3728 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3729 UDP_STAT(us, udp_in_recvif); 3730 } 3731 3732 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3733 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3734 udi_size += sizeof (struct T_opthdr) + 3735 sizeof (struct sockaddr_dl); 3736 UDP_STAT(us, udp_in_recvslla); 3737 } 3738 3739 if ((udp_bits.udpb_recvucred) && 3740 (cr = msg_getcred(mp, &cpid)) != NULL) { 3741 udi_size += sizeof (struct T_opthdr) + ucredsize; 3742 UDP_STAT(us, udp_in_recvucred); 3743 } 3744 3745 /* 3746 * If SO_TIMESTAMP is set allocate the appropriate sized 3747 * buffer. Since gethrestime() expects a pointer aligned 3748 * argument, we allocate space necessary for extra 3749 * alignment (even though it might not be used). 3750 */ 3751 if (udp_bits.udpb_timestamp) { 3752 udi_size += sizeof (struct T_opthdr) + 3753 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3754 UDP_STAT(us, udp_in_timestamp); 3755 } 3756 3757 /* 3758 * If IP_RECVTTL is set allocate the appropriate sized buffer 3759 */ 3760 if (udp_bits.udpb_recvttl) { 3761 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3762 UDP_STAT(us, udp_in_recvttl); 3763 } 3764 3765 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3766 mp1 = allocb(udi_size, BPRI_MED); 3767 if (mp1 == NULL) { 3768 freemsg(mp); 3769 if (options_mp != NULL) 3770 freeb(options_mp); 3771 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3772 return; 3773 } 3774 mp1->b_cont = mp; 3775 mp = mp1; 3776 mp->b_datap->db_type = M_PROTO; 3777 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3778 mp->b_wptr = (uchar_t *)tudi + udi_size; 3779 tudi->PRIM_type = T_UNITDATA_IND; 3780 tudi->SRC_length = sizeof (sin_t); 3781 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3782 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3783 sizeof (sin_t); 3784 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3785 tudi->OPT_length = udi_size; 3786 sin = (sin_t *)&tudi[1]; 3787 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3788 sin->sin_port = udpha->uha_src_port; 3789 sin->sin_family = udp->udp_family; 3790 *(uint32_t *)&sin->sin_zero[0] = 0; 3791 *(uint32_t *)&sin->sin_zero[4] = 0; 3792 3793 /* 3794 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3795 * IP_RECVTTL has been set. 3796 */ 3797 if (udi_size != 0) { 3798 /* 3799 * Copy in destination address before options to avoid 3800 * any padding issues. 3801 */ 3802 char *dstopt; 3803 3804 dstopt = (char *)&sin[1]; 3805 if (udp_bits.udpb_recvdstaddr) { 3806 struct T_opthdr *toh; 3807 ipaddr_t *dstptr; 3808 3809 toh = (struct T_opthdr *)dstopt; 3810 toh->level = IPPROTO_IP; 3811 toh->name = IP_RECVDSTADDR; 3812 toh->len = sizeof (struct T_opthdr) + 3813 sizeof (ipaddr_t); 3814 toh->status = 0; 3815 dstopt += sizeof (struct T_opthdr); 3816 dstptr = (ipaddr_t *)dstopt; 3817 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3818 dstopt += sizeof (ipaddr_t); 3819 udi_size -= toh->len; 3820 } 3821 3822 if (udp_bits.udpb_recvopts && opt_len > 0) { 3823 struct T_opthdr *toh; 3824 3825 toh = (struct T_opthdr *)dstopt; 3826 toh->level = IPPROTO_IP; 3827 toh->name = IP_RECVOPTS; 3828 toh->len = sizeof (struct T_opthdr) + opt_len; 3829 toh->status = 0; 3830 dstopt += sizeof (struct T_opthdr); 3831 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3832 opt_len); 3833 dstopt += opt_len; 3834 udi_size -= toh->len; 3835 } 3836 3837 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3838 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3839 struct T_opthdr *toh; 3840 struct in_pktinfo *pktinfop; 3841 3842 toh = (struct T_opthdr *)dstopt; 3843 toh->level = IPPROTO_IP; 3844 toh->name = IP_PKTINFO; 3845 toh->len = sizeof (struct T_opthdr) + 3846 sizeof (*pktinfop); 3847 toh->status = 0; 3848 dstopt += sizeof (struct T_opthdr); 3849 pktinfop = (struct in_pktinfo *)dstopt; 3850 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3851 pktinfop->ipi_spec_dst = 3852 pinfo->ip_pkt_match_addr; 3853 pktinfop->ipi_addr.s_addr = 3854 ((ipha_t *)rptr)->ipha_dst; 3855 3856 dstopt += sizeof (struct in_pktinfo); 3857 udi_size -= toh->len; 3858 } 3859 3860 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3861 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3862 3863 struct T_opthdr *toh; 3864 struct sockaddr_dl *dstptr; 3865 3866 toh = (struct T_opthdr *)dstopt; 3867 toh->level = IPPROTO_IP; 3868 toh->name = IP_RECVSLLA; 3869 toh->len = sizeof (struct T_opthdr) + 3870 sizeof (struct sockaddr_dl); 3871 toh->status = 0; 3872 dstopt += sizeof (struct T_opthdr); 3873 dstptr = (struct sockaddr_dl *)dstopt; 3874 bcopy(&pinfo->ip_pkt_slla, dstptr, 3875 sizeof (struct sockaddr_dl)); 3876 dstopt += sizeof (struct sockaddr_dl); 3877 udi_size -= toh->len; 3878 } 3879 3880 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3881 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3882 3883 struct T_opthdr *toh; 3884 uint_t *dstptr; 3885 3886 toh = (struct T_opthdr *)dstopt; 3887 toh->level = IPPROTO_IP; 3888 toh->name = IP_RECVIF; 3889 toh->len = sizeof (struct T_opthdr) + 3890 sizeof (uint_t); 3891 toh->status = 0; 3892 dstopt += sizeof (struct T_opthdr); 3893 dstptr = (uint_t *)dstopt; 3894 *dstptr = pinfo->ip_pkt_ifindex; 3895 dstopt += sizeof (uint_t); 3896 udi_size -= toh->len; 3897 } 3898 3899 if (cr != NULL) { 3900 struct T_opthdr *toh; 3901 3902 toh = (struct T_opthdr *)dstopt; 3903 toh->level = SOL_SOCKET; 3904 toh->name = SCM_UCRED; 3905 toh->len = sizeof (struct T_opthdr) + ucredsize; 3906 toh->status = 0; 3907 dstopt += sizeof (struct T_opthdr); 3908 (void) cred2ucred(cr, cpid, dstopt, rcr); 3909 dstopt += ucredsize; 3910 udi_size -= toh->len; 3911 } 3912 3913 if (udp_bits.udpb_timestamp) { 3914 struct T_opthdr *toh; 3915 3916 toh = (struct T_opthdr *)dstopt; 3917 toh->level = SOL_SOCKET; 3918 toh->name = SCM_TIMESTAMP; 3919 toh->len = sizeof (struct T_opthdr) + 3920 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3921 toh->status = 0; 3922 dstopt += sizeof (struct T_opthdr); 3923 /* Align for gethrestime() */ 3924 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 3925 sizeof (intptr_t)); 3926 gethrestime((timestruc_t *)dstopt); 3927 dstopt = (char *)toh + toh->len; 3928 udi_size -= toh->len; 3929 } 3930 3931 /* 3932 * CAUTION: 3933 * Due to aligment issues 3934 * Processing of IP_RECVTTL option 3935 * should always be the last. Adding 3936 * any option processing after this will 3937 * cause alignment panic. 3938 */ 3939 if (udp_bits.udpb_recvttl) { 3940 struct T_opthdr *toh; 3941 uint8_t *dstptr; 3942 3943 toh = (struct T_opthdr *)dstopt; 3944 toh->level = IPPROTO_IP; 3945 toh->name = IP_RECVTTL; 3946 toh->len = sizeof (struct T_opthdr) + 3947 sizeof (uint8_t); 3948 toh->status = 0; 3949 dstopt += sizeof (struct T_opthdr); 3950 dstptr = (uint8_t *)dstopt; 3951 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 3952 dstopt += sizeof (uint8_t); 3953 udi_size -= toh->len; 3954 } 3955 3956 /* Consumed all of allocated space */ 3957 ASSERT(udi_size == 0); 3958 } 3959 } else { 3960 sin6_t *sin6; 3961 3962 /* 3963 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 3964 * 3965 * Normally we only send up the address. If receiving of any 3966 * optional receive side information is enabled, we also send 3967 * that up as options. 3968 */ 3969 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 3970 3971 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 3972 IPPF_RTHDR|IPPF_IFINDEX)) { 3973 if ((udp_bits.udpb_ipv6_recvhopopts) && 3974 (ipp.ipp_fields & IPPF_HOPOPTS)) { 3975 size_t hlen; 3976 3977 UDP_STAT(us, udp_in_recvhopopts); 3978 hlen = copy_hop_opts(&ipp, NULL); 3979 if (hlen == 0) 3980 ipp.ipp_fields &= ~IPPF_HOPOPTS; 3981 udi_size += hlen; 3982 } 3983 if (((udp_bits.udpb_ipv6_recvdstopts) || 3984 udp_bits.udpb_old_ipv6_recvdstopts) && 3985 (ipp.ipp_fields & IPPF_DSTOPTS)) { 3986 udi_size += sizeof (struct T_opthdr) + 3987 ipp.ipp_dstoptslen; 3988 UDP_STAT(us, udp_in_recvdstopts); 3989 } 3990 if ((((udp_bits.udpb_ipv6_recvdstopts) && 3991 udp_bits.udpb_ipv6_recvrthdr && 3992 (ipp.ipp_fields & IPPF_RTHDR)) || 3993 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 3994 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 3995 udi_size += sizeof (struct T_opthdr) + 3996 ipp.ipp_rtdstoptslen; 3997 UDP_STAT(us, udp_in_recvrtdstopts); 3998 } 3999 if ((udp_bits.udpb_ipv6_recvrthdr) && 4000 (ipp.ipp_fields & IPPF_RTHDR)) { 4001 udi_size += sizeof (struct T_opthdr) + 4002 ipp.ipp_rthdrlen; 4003 UDP_STAT(us, udp_in_recvrthdr); 4004 } 4005 if ((udp_bits.udpb_ip_recvpktinfo) && 4006 (ipp.ipp_fields & IPPF_IFINDEX)) { 4007 udi_size += sizeof (struct T_opthdr) + 4008 sizeof (struct in6_pktinfo); 4009 UDP_STAT(us, udp_in_recvpktinfo); 4010 } 4011 4012 } 4013 if ((udp_bits.udpb_recvucred) && 4014 (cr = msg_getcred(mp, &cpid)) != NULL) { 4015 udi_size += sizeof (struct T_opthdr) + ucredsize; 4016 UDP_STAT(us, udp_in_recvucred); 4017 } 4018 4019 /* 4020 * If SO_TIMESTAMP is set allocate the appropriate sized 4021 * buffer. Since gethrestime() expects a pointer aligned 4022 * argument, we allocate space necessary for extra 4023 * alignment (even though it might not be used). 4024 */ 4025 if (udp_bits.udpb_timestamp) { 4026 udi_size += sizeof (struct T_opthdr) + 4027 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4028 UDP_STAT(us, udp_in_timestamp); 4029 } 4030 4031 if (udp_bits.udpb_ipv6_recvhoplimit) { 4032 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4033 UDP_STAT(us, udp_in_recvhoplimit); 4034 } 4035 4036 if (udp_bits.udpb_ipv6_recvtclass) { 4037 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4038 UDP_STAT(us, udp_in_recvtclass); 4039 } 4040 4041 mp1 = allocb(udi_size, BPRI_MED); 4042 if (mp1 == NULL) { 4043 freemsg(mp); 4044 if (options_mp != NULL) 4045 freeb(options_mp); 4046 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4047 return; 4048 } 4049 mp1->b_cont = mp; 4050 mp = mp1; 4051 mp->b_datap->db_type = M_PROTO; 4052 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4053 mp->b_wptr = (uchar_t *)tudi + udi_size; 4054 tudi->PRIM_type = T_UNITDATA_IND; 4055 tudi->SRC_length = sizeof (sin6_t); 4056 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4057 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4058 sizeof (sin6_t); 4059 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4060 tudi->OPT_length = udi_size; 4061 sin6 = (sin6_t *)&tudi[1]; 4062 if (ipversion == IPV4_VERSION) { 4063 in6_addr_t v6dst; 4064 4065 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4066 &sin6->sin6_addr); 4067 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4068 &v6dst); 4069 sin6->sin6_flowinfo = 0; 4070 sin6->sin6_scope_id = 0; 4071 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4072 connp->conn_zoneid, us->us_netstack); 4073 } else { 4074 sin6->sin6_addr = ip6h->ip6_src; 4075 /* No sin6_flowinfo per API */ 4076 sin6->sin6_flowinfo = 0; 4077 /* For link-scope source pass up scope id */ 4078 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4079 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4080 sin6->sin6_scope_id = ipp.ipp_ifindex; 4081 else 4082 sin6->sin6_scope_id = 0; 4083 sin6->__sin6_src_id = ip_srcid_find_addr( 4084 &ip6h->ip6_dst, connp->conn_zoneid, 4085 us->us_netstack); 4086 } 4087 sin6->sin6_port = udpha->uha_src_port; 4088 sin6->sin6_family = udp->udp_family; 4089 4090 if (udi_size != 0) { 4091 uchar_t *dstopt; 4092 4093 dstopt = (uchar_t *)&sin6[1]; 4094 if ((udp_bits.udpb_ip_recvpktinfo) && 4095 (ipp.ipp_fields & IPPF_IFINDEX)) { 4096 struct T_opthdr *toh; 4097 struct in6_pktinfo *pkti; 4098 4099 toh = (struct T_opthdr *)dstopt; 4100 toh->level = IPPROTO_IPV6; 4101 toh->name = IPV6_PKTINFO; 4102 toh->len = sizeof (struct T_opthdr) + 4103 sizeof (*pkti); 4104 toh->status = 0; 4105 dstopt += sizeof (struct T_opthdr); 4106 pkti = (struct in6_pktinfo *)dstopt; 4107 if (ipversion == IPV6_VERSION) 4108 pkti->ipi6_addr = ip6h->ip6_dst; 4109 else 4110 IN6_IPADDR_TO_V4MAPPED( 4111 ((ipha_t *)rptr)->ipha_dst, 4112 &pkti->ipi6_addr); 4113 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4114 dstopt += sizeof (*pkti); 4115 udi_size -= toh->len; 4116 } 4117 if (udp_bits.udpb_ipv6_recvhoplimit) { 4118 struct T_opthdr *toh; 4119 4120 toh = (struct T_opthdr *)dstopt; 4121 toh->level = IPPROTO_IPV6; 4122 toh->name = IPV6_HOPLIMIT; 4123 toh->len = sizeof (struct T_opthdr) + 4124 sizeof (uint_t); 4125 toh->status = 0; 4126 dstopt += sizeof (struct T_opthdr); 4127 if (ipversion == IPV6_VERSION) 4128 *(uint_t *)dstopt = ip6h->ip6_hops; 4129 else 4130 *(uint_t *)dstopt = 4131 ((ipha_t *)rptr)->ipha_ttl; 4132 dstopt += sizeof (uint_t); 4133 udi_size -= toh->len; 4134 } 4135 if (udp_bits.udpb_ipv6_recvtclass) { 4136 struct T_opthdr *toh; 4137 4138 toh = (struct T_opthdr *)dstopt; 4139 toh->level = IPPROTO_IPV6; 4140 toh->name = IPV6_TCLASS; 4141 toh->len = sizeof (struct T_opthdr) + 4142 sizeof (uint_t); 4143 toh->status = 0; 4144 dstopt += sizeof (struct T_opthdr); 4145 if (ipversion == IPV6_VERSION) { 4146 *(uint_t *)dstopt = 4147 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4148 } else { 4149 ipha_t *ipha = (ipha_t *)rptr; 4150 *(uint_t *)dstopt = 4151 ipha->ipha_type_of_service; 4152 } 4153 dstopt += sizeof (uint_t); 4154 udi_size -= toh->len; 4155 } 4156 if ((udp_bits.udpb_ipv6_recvhopopts) && 4157 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4158 size_t hlen; 4159 4160 hlen = copy_hop_opts(&ipp, dstopt); 4161 dstopt += hlen; 4162 udi_size -= hlen; 4163 } 4164 if ((udp_bits.udpb_ipv6_recvdstopts) && 4165 (udp_bits.udpb_ipv6_recvrthdr) && 4166 (ipp.ipp_fields & IPPF_RTHDR) && 4167 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4168 struct T_opthdr *toh; 4169 4170 toh = (struct T_opthdr *)dstopt; 4171 toh->level = IPPROTO_IPV6; 4172 toh->name = IPV6_DSTOPTS; 4173 toh->len = sizeof (struct T_opthdr) + 4174 ipp.ipp_rtdstoptslen; 4175 toh->status = 0; 4176 dstopt += sizeof (struct T_opthdr); 4177 bcopy(ipp.ipp_rtdstopts, dstopt, 4178 ipp.ipp_rtdstoptslen); 4179 dstopt += ipp.ipp_rtdstoptslen; 4180 udi_size -= toh->len; 4181 } 4182 if ((udp_bits.udpb_ipv6_recvrthdr) && 4183 (ipp.ipp_fields & IPPF_RTHDR)) { 4184 struct T_opthdr *toh; 4185 4186 toh = (struct T_opthdr *)dstopt; 4187 toh->level = IPPROTO_IPV6; 4188 toh->name = IPV6_RTHDR; 4189 toh->len = sizeof (struct T_opthdr) + 4190 ipp.ipp_rthdrlen; 4191 toh->status = 0; 4192 dstopt += sizeof (struct T_opthdr); 4193 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4194 dstopt += ipp.ipp_rthdrlen; 4195 udi_size -= toh->len; 4196 } 4197 if ((udp_bits.udpb_ipv6_recvdstopts) && 4198 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4199 struct T_opthdr *toh; 4200 4201 toh = (struct T_opthdr *)dstopt; 4202 toh->level = IPPROTO_IPV6; 4203 toh->name = IPV6_DSTOPTS; 4204 toh->len = sizeof (struct T_opthdr) + 4205 ipp.ipp_dstoptslen; 4206 toh->status = 0; 4207 dstopt += sizeof (struct T_opthdr); 4208 bcopy(ipp.ipp_dstopts, dstopt, 4209 ipp.ipp_dstoptslen); 4210 dstopt += ipp.ipp_dstoptslen; 4211 udi_size -= toh->len; 4212 } 4213 if (cr != NULL) { 4214 struct T_opthdr *toh; 4215 4216 toh = (struct T_opthdr *)dstopt; 4217 toh->level = SOL_SOCKET; 4218 toh->name = SCM_UCRED; 4219 toh->len = sizeof (struct T_opthdr) + ucredsize; 4220 toh->status = 0; 4221 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4222 dstopt += toh->len; 4223 udi_size -= toh->len; 4224 } 4225 if (udp_bits.udpb_timestamp) { 4226 struct T_opthdr *toh; 4227 4228 toh = (struct T_opthdr *)dstopt; 4229 toh->level = SOL_SOCKET; 4230 toh->name = SCM_TIMESTAMP; 4231 toh->len = sizeof (struct T_opthdr) + 4232 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4233 toh->status = 0; 4234 dstopt += sizeof (struct T_opthdr); 4235 /* Align for gethrestime() */ 4236 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4237 sizeof (intptr_t)); 4238 gethrestime((timestruc_t *)dstopt); 4239 dstopt = (uchar_t *)toh + toh->len; 4240 udi_size -= toh->len; 4241 } 4242 4243 /* Consumed all of allocated space */ 4244 ASSERT(udi_size == 0); 4245 } 4246 #undef sin6 4247 /* No IP_RECVDSTADDR for IPv6. */ 4248 } 4249 4250 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4251 if (options_mp != NULL) 4252 freeb(options_mp); 4253 4254 udp_ulp_recv(connp, mp); 4255 4256 return; 4257 4258 tossit: 4259 freemsg(mp); 4260 if (options_mp != NULL) 4261 freeb(options_mp); 4262 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4263 } 4264 4265 /* 4266 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4267 * information that can be changing beneath us. 4268 */ 4269 mblk_t * 4270 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4271 { 4272 mblk_t *mpdata; 4273 mblk_t *mp_conn_ctl; 4274 mblk_t *mp_attr_ctl; 4275 mblk_t *mp6_conn_ctl; 4276 mblk_t *mp6_attr_ctl; 4277 mblk_t *mp_conn_tail; 4278 mblk_t *mp_attr_tail; 4279 mblk_t *mp6_conn_tail; 4280 mblk_t *mp6_attr_tail; 4281 struct opthdr *optp; 4282 mib2_udpEntry_t ude; 4283 mib2_udp6Entry_t ude6; 4284 mib2_transportMLPEntry_t mlp; 4285 int state; 4286 zoneid_t zoneid; 4287 int i; 4288 connf_t *connfp; 4289 conn_t *connp = Q_TO_CONN(q); 4290 int v4_conn_idx; 4291 int v6_conn_idx; 4292 boolean_t needattr; 4293 udp_t *udp; 4294 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4295 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4296 mblk_t *mp2ctl; 4297 4298 /* 4299 * make a copy of the original message 4300 */ 4301 mp2ctl = copymsg(mpctl); 4302 4303 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4304 if (mpctl == NULL || 4305 (mpdata = mpctl->b_cont) == NULL || 4306 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4307 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4308 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4309 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4310 freemsg(mp_conn_ctl); 4311 freemsg(mp_attr_ctl); 4312 freemsg(mp6_conn_ctl); 4313 freemsg(mpctl); 4314 freemsg(mp2ctl); 4315 return (0); 4316 } 4317 4318 zoneid = connp->conn_zoneid; 4319 4320 /* fixed length structure for IPv4 and IPv6 counters */ 4321 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4322 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4323 /* synchronize 64- and 32-bit counters */ 4324 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4325 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4326 4327 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4328 optp->level = MIB2_UDP; 4329 optp->name = 0; 4330 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4331 sizeof (us->us_udp_mib)); 4332 optp->len = msgdsize(mpdata); 4333 qreply(q, mpctl); 4334 4335 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4336 v4_conn_idx = v6_conn_idx = 0; 4337 4338 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4339 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4340 connp = NULL; 4341 4342 while ((connp = ipcl_get_next_conn(connfp, connp, 4343 IPCL_UDPCONN))) { 4344 udp = connp->conn_udp; 4345 if (zoneid != connp->conn_zoneid) 4346 continue; 4347 4348 /* 4349 * Note that the port numbers are sent in 4350 * host byte order 4351 */ 4352 4353 if (udp->udp_state == TS_UNBND) 4354 state = MIB2_UDP_unbound; 4355 else if (udp->udp_state == TS_IDLE) 4356 state = MIB2_UDP_idle; 4357 else if (udp->udp_state == TS_DATA_XFER) 4358 state = MIB2_UDP_connected; 4359 else 4360 state = MIB2_UDP_unknown; 4361 4362 needattr = B_FALSE; 4363 bzero(&mlp, sizeof (mlp)); 4364 if (connp->conn_mlp_type != mlptSingle) { 4365 if (connp->conn_mlp_type == mlptShared || 4366 connp->conn_mlp_type == mlptBoth) 4367 mlp.tme_flags |= MIB2_TMEF_SHARED; 4368 if (connp->conn_mlp_type == mlptPrivate || 4369 connp->conn_mlp_type == mlptBoth) 4370 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4371 needattr = B_TRUE; 4372 } 4373 if (connp->conn_anon_mlp) { 4374 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 4375 needattr = B_TRUE; 4376 } 4377 switch (connp->conn_mac_mode) { 4378 case CONN_MAC_DEFAULT: 4379 break; 4380 case CONN_MAC_AWARE: 4381 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 4382 needattr = B_TRUE; 4383 break; 4384 case CONN_MAC_IMPLICIT: 4385 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 4386 needattr = B_TRUE; 4387 break; 4388 } 4389 4390 /* 4391 * Create an IPv4 table entry for IPv4 entries and also 4392 * any IPv6 entries which are bound to in6addr_any 4393 * (i.e. anything a IPv4 peer could connect/send to). 4394 */ 4395 if (udp->udp_ipversion == IPV4_VERSION || 4396 (udp->udp_state <= TS_IDLE && 4397 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4398 ude.udpEntryInfo.ue_state = state; 4399 /* 4400 * If in6addr_any this will set it to 4401 * INADDR_ANY 4402 */ 4403 ude.udpLocalAddress = 4404 V4_PART_OF_V6(udp->udp_v6src); 4405 ude.udpLocalPort = ntohs(udp->udp_port); 4406 if (udp->udp_state == TS_DATA_XFER) { 4407 /* 4408 * Can potentially get here for 4409 * v6 socket if another process 4410 * (say, ping) has just done a 4411 * sendto(), changing the state 4412 * from the TS_IDLE above to 4413 * TS_DATA_XFER by the time we hit 4414 * this part of the code. 4415 */ 4416 ude.udpEntryInfo.ue_RemoteAddress = 4417 V4_PART_OF_V6(udp->udp_v6dst); 4418 ude.udpEntryInfo.ue_RemotePort = 4419 ntohs(udp->udp_dstport); 4420 } else { 4421 ude.udpEntryInfo.ue_RemoteAddress = 0; 4422 ude.udpEntryInfo.ue_RemotePort = 0; 4423 } 4424 4425 /* 4426 * We make the assumption that all udp_t 4427 * structs will be created within an address 4428 * region no larger than 32-bits. 4429 */ 4430 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4431 ude.udpCreationProcess = 4432 (udp->udp_open_pid < 0) ? 4433 MIB2_UNKNOWN_PROCESS : 4434 udp->udp_open_pid; 4435 ude.udpCreationTime = udp->udp_open_time; 4436 4437 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4438 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4439 mlp.tme_connidx = v4_conn_idx++; 4440 if (needattr) 4441 (void) snmp_append_data2( 4442 mp_attr_ctl->b_cont, &mp_attr_tail, 4443 (char *)&mlp, sizeof (mlp)); 4444 } 4445 if (udp->udp_ipversion == IPV6_VERSION) { 4446 ude6.udp6EntryInfo.ue_state = state; 4447 ude6.udp6LocalAddress = udp->udp_v6src; 4448 ude6.udp6LocalPort = ntohs(udp->udp_port); 4449 ude6.udp6IfIndex = udp->udp_bound_if; 4450 if (udp->udp_state == TS_DATA_XFER) { 4451 ude6.udp6EntryInfo.ue_RemoteAddress = 4452 udp->udp_v6dst; 4453 ude6.udp6EntryInfo.ue_RemotePort = 4454 ntohs(udp->udp_dstport); 4455 } else { 4456 ude6.udp6EntryInfo.ue_RemoteAddress = 4457 sin6_null.sin6_addr; 4458 ude6.udp6EntryInfo.ue_RemotePort = 0; 4459 } 4460 /* 4461 * We make the assumption that all udp_t 4462 * structs will be created within an address 4463 * region no larger than 32-bits. 4464 */ 4465 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4466 ude6.udp6CreationProcess = 4467 (udp->udp_open_pid < 0) ? 4468 MIB2_UNKNOWN_PROCESS : 4469 udp->udp_open_pid; 4470 ude6.udp6CreationTime = udp->udp_open_time; 4471 4472 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4473 &mp6_conn_tail, (char *)&ude6, 4474 sizeof (ude6)); 4475 mlp.tme_connidx = v6_conn_idx++; 4476 if (needattr) 4477 (void) snmp_append_data2( 4478 mp6_attr_ctl->b_cont, 4479 &mp6_attr_tail, (char *)&mlp, 4480 sizeof (mlp)); 4481 } 4482 } 4483 } 4484 4485 /* IPv4 UDP endpoints */ 4486 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4487 sizeof (struct T_optmgmt_ack)]; 4488 optp->level = MIB2_UDP; 4489 optp->name = MIB2_UDP_ENTRY; 4490 optp->len = msgdsize(mp_conn_ctl->b_cont); 4491 qreply(q, mp_conn_ctl); 4492 4493 /* table of MLP attributes... */ 4494 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4495 sizeof (struct T_optmgmt_ack)]; 4496 optp->level = MIB2_UDP; 4497 optp->name = EXPER_XPORT_MLP; 4498 optp->len = msgdsize(mp_attr_ctl->b_cont); 4499 if (optp->len == 0) 4500 freemsg(mp_attr_ctl); 4501 else 4502 qreply(q, mp_attr_ctl); 4503 4504 /* IPv6 UDP endpoints */ 4505 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4506 sizeof (struct T_optmgmt_ack)]; 4507 optp->level = MIB2_UDP6; 4508 optp->name = MIB2_UDP6_ENTRY; 4509 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4510 qreply(q, mp6_conn_ctl); 4511 4512 /* table of MLP attributes... */ 4513 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4514 sizeof (struct T_optmgmt_ack)]; 4515 optp->level = MIB2_UDP6; 4516 optp->name = EXPER_XPORT_MLP; 4517 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4518 if (optp->len == 0) 4519 freemsg(mp6_attr_ctl); 4520 else 4521 qreply(q, mp6_attr_ctl); 4522 4523 return (mp2ctl); 4524 } 4525 4526 /* 4527 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4528 * NOTE: Per MIB-II, UDP has no writable data. 4529 * TODO: If this ever actually tries to set anything, it needs to be 4530 * to do the appropriate locking. 4531 */ 4532 /* ARGSUSED */ 4533 int 4534 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4535 uchar_t *ptr, int len) 4536 { 4537 switch (level) { 4538 case MIB2_UDP: 4539 return (0); 4540 default: 4541 return (1); 4542 } 4543 } 4544 4545 /* 4546 * This routine creates a T_UDERROR_IND message and passes it upstream. 4547 * The address and options are copied from the T_UNITDATA_REQ message 4548 * passed in mp. This message is freed. 4549 */ 4550 static void 4551 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4552 t_scalar_t err) 4553 { 4554 struct T_unitdata_req *tudr; 4555 mblk_t *mp1; 4556 uchar_t *optaddr; 4557 t_scalar_t optlen; 4558 4559 if (DB_TYPE(mp) == M_DATA) { 4560 ASSERT(destaddr != NULL && destlen != 0); 4561 optaddr = NULL; 4562 optlen = 0; 4563 } else { 4564 if ((mp->b_wptr < mp->b_rptr) || 4565 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4566 goto done; 4567 } 4568 tudr = (struct T_unitdata_req *)mp->b_rptr; 4569 destaddr = mp->b_rptr + tudr->DEST_offset; 4570 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4571 destaddr + tudr->DEST_length < mp->b_rptr || 4572 destaddr + tudr->DEST_length > mp->b_wptr) { 4573 goto done; 4574 } 4575 optaddr = mp->b_rptr + tudr->OPT_offset; 4576 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4577 optaddr + tudr->OPT_length < mp->b_rptr || 4578 optaddr + tudr->OPT_length > mp->b_wptr) { 4579 goto done; 4580 } 4581 destlen = tudr->DEST_length; 4582 optlen = tudr->OPT_length; 4583 } 4584 4585 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4586 (char *)optaddr, optlen, err); 4587 if (mp1 != NULL) 4588 qreply(q, mp1); 4589 4590 done: 4591 freemsg(mp); 4592 } 4593 4594 /* 4595 * This routine removes a port number association from a stream. It 4596 * is called by udp_wput to handle T_UNBIND_REQ messages. 4597 */ 4598 static void 4599 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4600 { 4601 conn_t *connp = Q_TO_CONN(q); 4602 int error; 4603 4604 error = udp_do_unbind(connp); 4605 if (error) { 4606 if (error < 0) 4607 udp_err_ack(q, mp, -error, 0); 4608 else 4609 udp_err_ack(q, mp, TSYSERR, error); 4610 return; 4611 } 4612 4613 mp = mi_tpi_ok_ack_alloc(mp); 4614 ASSERT(mp != NULL); 4615 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4616 qreply(q, mp); 4617 } 4618 4619 /* 4620 * Don't let port fall into the privileged range. 4621 * Since the extra privileged ports can be arbitrary we also 4622 * ensure that we exclude those from consideration. 4623 * us->us_epriv_ports is not sorted thus we loop over it until 4624 * there are no changes. 4625 */ 4626 static in_port_t 4627 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4628 { 4629 int i; 4630 in_port_t nextport; 4631 boolean_t restart = B_FALSE; 4632 udp_stack_t *us = udp->udp_us; 4633 4634 if (random && udp_random_anon_port != 0) { 4635 (void) random_get_pseudo_bytes((uint8_t *)&port, 4636 sizeof (in_port_t)); 4637 /* 4638 * Unless changed by a sys admin, the smallest anon port 4639 * is 32768 and the largest anon port is 65535. It is 4640 * very likely (50%) for the random port to be smaller 4641 * than the smallest anon port. When that happens, 4642 * add port % (anon port range) to the smallest anon 4643 * port to get the random port. It should fall into the 4644 * valid anon port range. 4645 */ 4646 if (port < us->us_smallest_anon_port) { 4647 port = us->us_smallest_anon_port + 4648 port % (us->us_largest_anon_port - 4649 us->us_smallest_anon_port); 4650 } 4651 } 4652 4653 retry: 4654 if (port < us->us_smallest_anon_port) 4655 port = us->us_smallest_anon_port; 4656 4657 if (port > us->us_largest_anon_port) { 4658 port = us->us_smallest_anon_port; 4659 if (restart) 4660 return (0); 4661 restart = B_TRUE; 4662 } 4663 4664 if (port < us->us_smallest_nonpriv_port) 4665 port = us->us_smallest_nonpriv_port; 4666 4667 for (i = 0; i < us->us_num_epriv_ports; i++) { 4668 if (port == us->us_epriv_ports[i]) { 4669 port++; 4670 /* 4671 * Make sure that the port is in the 4672 * valid range. 4673 */ 4674 goto retry; 4675 } 4676 } 4677 4678 if (is_system_labeled() && 4679 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4680 port, IPPROTO_UDP, B_TRUE)) != 0) { 4681 port = nextport; 4682 goto retry; 4683 } 4684 4685 return (port); 4686 } 4687 4688 static int 4689 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 4690 { 4691 int err; 4692 cred_t *cred; 4693 cred_t *orig_cred = NULL; 4694 cred_t *effective_cred = NULL; 4695 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4696 udp_t *udp = Q_TO_UDP(wq); 4697 udp_stack_t *us = udp->udp_us; 4698 4699 /* 4700 * All Solaris components should pass a db_credp 4701 * for this message, hence we ASSERT. 4702 * On production kernels we return an error to be robust against 4703 * random streams modules sitting on top of us. 4704 */ 4705 cred = orig_cred = msg_getcred(mp, NULL); 4706 ASSERT(cred != NULL); 4707 if (cred == NULL) 4708 return (EINVAL); 4709 4710 /* 4711 * Verify the destination is allowed to receive packets at 4712 * the security label of the message data. tsol_check_dest() 4713 * may create a new effective cred for this message with a 4714 * modified label or label flags. Note that we use the cred/label 4715 * from the message to handle MLP 4716 */ 4717 if ((err = tsol_check_dest(cred, &dst, IPV4_VERSION, 4718 udp->udp_connp->conn_mac_mode, &effective_cred)) != 0) 4719 goto done; 4720 if (effective_cred != NULL) 4721 cred = effective_cred; 4722 4723 /* 4724 * Calculate the security label to be placed in the text 4725 * of the message (if any). 4726 */ 4727 if ((err = tsol_compute_label(cred, dst, opt_storage, 4728 us->us_netstack->netstack_ip)) != 0) 4729 goto done; 4730 4731 /* 4732 * Insert the security label in the cached ip options, 4733 * removing any old label that may exist. 4734 */ 4735 if ((err = tsol_update_options(&udp->udp_ip_snd_options, 4736 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4737 opt_storage)) != 0) 4738 goto done; 4739 4740 /* 4741 * Save the destination address and creds we used to 4742 * generate the security label text. 4743 */ 4744 if (cred != udp->udp_effective_cred) { 4745 if (udp->udp_effective_cred != NULL) 4746 crfree(udp->udp_effective_cred); 4747 crhold(cred); 4748 udp->udp_effective_cred = cred; 4749 } 4750 if (orig_cred != udp->udp_last_cred) { 4751 if (udp->udp_last_cred != NULL) 4752 crfree(udp->udp_last_cred); 4753 crhold(orig_cred); 4754 udp->udp_last_cred = orig_cred; 4755 } 4756 done: 4757 if (effective_cred != NULL) 4758 crfree(effective_cred); 4759 4760 if (err != 0) { 4761 DTRACE_PROBE4( 4762 tx__ip__log__info__updatelabel__udp, 4763 char *, "queue(1) failed to update options(2) on mp(3)", 4764 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4765 } 4766 return (err); 4767 } 4768 4769 static mblk_t * 4770 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4771 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4772 cred_t *cr, pid_t pid) 4773 { 4774 udp_t *udp = connp->conn_udp; 4775 mblk_t *mp1 = mp; 4776 mblk_t *mp2; 4777 ipha_t *ipha; 4778 int ip_hdr_length; 4779 uint32_t ip_len; 4780 udpha_t *udpha; 4781 boolean_t lock_held = B_FALSE; 4782 in_port_t uha_src_port; 4783 udpattrs_t attrs; 4784 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4785 uint32_t ip_snd_opt_len = 0; 4786 ip4_pkt_t pktinfo; 4787 ip4_pkt_t *pktinfop = &pktinfo; 4788 ip_opt_info_t optinfo; 4789 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4790 udp_stack_t *us = udp->udp_us; 4791 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4792 queue_t *q = connp->conn_wq; 4793 ire_t *ire; 4794 in6_addr_t v6dst; 4795 boolean_t update_lastdst = B_FALSE; 4796 4797 *error = 0; 4798 pktinfop->ip4_ill_index = 0; 4799 pktinfop->ip4_addr = INADDR_ANY; 4800 optinfo.ip_opt_flags = 0; 4801 optinfo.ip_opt_ill_index = 0; 4802 4803 if (v4dst == INADDR_ANY) 4804 v4dst = htonl(INADDR_LOOPBACK); 4805 4806 /* 4807 * If options passed in, feed it for verification and handling 4808 */ 4809 attrs.udpattr_credset = B_FALSE; 4810 if (IPCL_IS_NONSTR(connp)) { 4811 if (msg->msg_controllen != 0) { 4812 attrs.udpattr_ipp4 = pktinfop; 4813 attrs.udpattr_mb = mp; 4814 4815 rw_enter(&udp->udp_rwlock, RW_WRITER); 4816 *error = process_auxiliary_options(connp, 4817 msg->msg_control, msg->msg_controllen, 4818 &attrs, &udp_opt_obj, udp_opt_set, cr); 4819 rw_exit(&udp->udp_rwlock); 4820 if (*error) 4821 goto done; 4822 } 4823 } else { 4824 if (DB_TYPE(mp) != M_DATA) { 4825 mp1 = mp->b_cont; 4826 if (((struct T_unitdata_req *) 4827 mp->b_rptr)->OPT_length != 0) { 4828 attrs.udpattr_ipp4 = pktinfop; 4829 attrs.udpattr_mb = mp; 4830 if (udp_unitdata_opt_process(q, mp, error, 4831 &attrs) < 0) 4832 goto done; 4833 /* 4834 * Note: success in processing options. 4835 * mp option buffer represented by 4836 * OPT_length/offset now potentially modified 4837 * and contain option setting results 4838 */ 4839 ASSERT(*error == 0); 4840 } 4841 } 4842 } 4843 4844 /* mp1 points to the M_DATA mblk carrying the packet */ 4845 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4846 4847 /* 4848 * Determine whether we need to mark the mblk with the user's 4849 * credentials. 4850 * If labeled then sockfs would have already done this. 4851 */ 4852 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 4853 4854 ire = connp->conn_ire_cache; 4855 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 4856 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 4857 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 4858 mblk_setcred(mp, cr, pid); 4859 } 4860 4861 rw_enter(&udp->udp_rwlock, RW_READER); 4862 lock_held = B_TRUE; 4863 4864 /* 4865 * Cluster and TSOL note: 4866 * udp.udp_v6lastdst is shared by Cluster and TSOL 4867 * udp.udp_lastdstport is used by Cluster 4868 * 4869 * Both Cluster and TSOL need to update the dest addr and/or port. 4870 * Updating is done after both Cluster and TSOL checks, protected 4871 * by conn_lock. 4872 */ 4873 mutex_enter(&connp->conn_lock); 4874 4875 if (cl_inet_connect2 != NULL && 4876 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4877 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4878 udp->udp_lastdstport != port)) { 4879 mutex_exit(&connp->conn_lock); 4880 *error = 0; 4881 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 4882 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 4883 if (*error != 0) { 4884 *error = EHOSTUNREACH; 4885 goto done; 4886 } 4887 update_lastdst = B_TRUE; 4888 mutex_enter(&connp->conn_lock); 4889 } 4890 4891 /* 4892 * Check if our saved options are valid; update if not. 4893 * TSOL Note: Since we are not in WRITER mode, UDP packets 4894 * to different destination may require different labels, 4895 * or worse, UDP packets to same IP address may require 4896 * different labels due to use of shared all-zones address. 4897 * We use conn_lock to ensure that lastdst, ip_snd_options, 4898 * and ip_snd_options_len are consistent for the current 4899 * destination and are updated atomically. 4900 */ 4901 if (is_system_labeled()) { 4902 cred_t *credp; 4903 pid_t cpid; 4904 4905 /* Using UDP MLP requires SCM_UCRED from user */ 4906 if (connp->conn_mlp_type != mlptSingle && 4907 !attrs.udpattr_credset) { 4908 mutex_exit(&connp->conn_lock); 4909 DTRACE_PROBE4( 4910 tx__ip__log__info__output__udp, 4911 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 4912 mblk_t *, mp, udpattrs_t *, &attrs, queue_t *, q); 4913 *error = EINVAL; 4914 goto done; 4915 } 4916 /* 4917 * Update label option for this UDP socket if 4918 * - the destination has changed, 4919 * - the UDP socket is MLP, or 4920 * - the cred attached to the mblk changed. 4921 */ 4922 credp = msg_getcred(mp, &cpid); 4923 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4924 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4925 connp->conn_mlp_type != mlptSingle || 4926 credp != udp->udp_last_cred) { 4927 if ((*error = udp_update_label(q, mp, v4dst)) != 0) { 4928 mutex_exit(&connp->conn_lock); 4929 goto done; 4930 } 4931 update_lastdst = B_TRUE; 4932 } 4933 4934 /* 4935 * Attach the effective cred to the mblk to ensure future 4936 * routing decisions will be based on it's label. 4937 */ 4938 mblk_setcred(mp, udp->udp_effective_cred, cpid); 4939 } 4940 if (update_lastdst) { 4941 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 4942 udp->udp_lastdstport = port; 4943 } 4944 if (udp->udp_ip_snd_options_len > 0) { 4945 ip_snd_opt_len = udp->udp_ip_snd_options_len; 4946 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 4947 } 4948 mutex_exit(&connp->conn_lock); 4949 4950 /* Add an IP header */ 4951 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 4952 (insert_spi ? sizeof (uint32_t) : 0); 4953 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 4954 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 4955 !OK_32PTR(ipha)) { 4956 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 4957 if (mp2 == NULL) { 4958 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 4959 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 4960 *error = ENOMEM; 4961 goto done; 4962 } 4963 mp2->b_wptr = DB_LIM(mp2); 4964 mp2->b_cont = mp1; 4965 mp1 = mp2; 4966 if (DB_TYPE(mp) != M_DATA) 4967 mp->b_cont = mp1; 4968 else 4969 mp = mp1; 4970 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 4971 } 4972 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 4973 #ifdef _BIG_ENDIAN 4974 /* Set version, header length, and tos */ 4975 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4976 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 4977 udp->udp_type_of_service); 4978 /* Set ttl and protocol */ 4979 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 4980 #else 4981 /* Set version, header length, and tos */ 4982 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 4983 ((udp->udp_type_of_service << 8) | 4984 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 4985 /* Set ttl and protocol */ 4986 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 4987 #endif 4988 if (pktinfop->ip4_addr != INADDR_ANY) { 4989 ipha->ipha_src = pktinfop->ip4_addr; 4990 optinfo.ip_opt_flags = IP_VERIFY_SRC; 4991 } else { 4992 /* 4993 * Copy our address into the packet. If this is zero, 4994 * first look at __sin6_src_id for a hint. If we leave the 4995 * source as INADDR_ANY then ip will fill in the real source 4996 * address. 4997 */ 4998 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 4999 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5000 in6_addr_t v6src; 5001 5002 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5003 us->us_netstack); 5004 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5005 } 5006 } 5007 uha_src_port = udp->udp_port; 5008 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5009 rw_exit(&udp->udp_rwlock); 5010 lock_held = B_FALSE; 5011 } 5012 5013 if (pktinfop->ip4_ill_index != 0) { 5014 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5015 } 5016 5017 ipha->ipha_fragment_offset_and_flags = 0; 5018 ipha->ipha_ident = 0; 5019 5020 mp1->b_rptr = (uchar_t *)ipha; 5021 5022 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5023 (uintptr_t)UINT_MAX); 5024 5025 /* Determine length of packet */ 5026 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5027 if ((mp2 = mp1->b_cont) != NULL) { 5028 do { 5029 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5030 ip_len += (uint32_t)MBLKL(mp2); 5031 } while ((mp2 = mp2->b_cont) != NULL); 5032 } 5033 /* 5034 * If the size of the packet is greater than the maximum allowed by 5035 * ip, return an error. Passing this down could cause panics because 5036 * the size will have wrapped and be inconsistent with the msg size. 5037 */ 5038 if (ip_len > IP_MAXPACKET) { 5039 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5040 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5041 *error = EMSGSIZE; 5042 goto done; 5043 } 5044 ipha->ipha_length = htons((uint16_t)ip_len); 5045 ip_len -= ip_hdr_length; 5046 ip_len = htons((uint16_t)ip_len); 5047 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5048 5049 /* Insert all-0s SPI now. */ 5050 if (insert_spi) 5051 *((uint32_t *)(udpha + 1)) = 0; 5052 5053 /* 5054 * Copy in the destination address 5055 */ 5056 ipha->ipha_dst = v4dst; 5057 5058 /* 5059 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5060 */ 5061 if (CLASSD(v4dst)) 5062 ipha->ipha_ttl = udp->udp_multicast_ttl; 5063 5064 udpha->uha_dst_port = port; 5065 udpha->uha_src_port = uha_src_port; 5066 5067 if (ip_snd_opt_len > 0) { 5068 uint32_t cksum; 5069 5070 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5071 lock_held = B_FALSE; 5072 rw_exit(&udp->udp_rwlock); 5073 /* 5074 * Massage source route putting first source route in ipha_dst. 5075 * Ignore the destination in T_unitdata_req. 5076 * Create a checksum adjustment for a source route, if any. 5077 */ 5078 cksum = ip_massage_options(ipha, us->us_netstack); 5079 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5080 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5081 (ipha->ipha_dst & 0xFFFF); 5082 if ((int)cksum < 0) 5083 cksum--; 5084 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5085 /* 5086 * IP does the checksum if uha_checksum is non-zero, 5087 * We make it easy for IP to include our pseudo header 5088 * by putting our length in uha_checksum. 5089 */ 5090 cksum += ip_len; 5091 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5092 /* There might be a carry. */ 5093 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5094 #ifdef _LITTLE_ENDIAN 5095 if (us->us_do_checksum) 5096 ip_len = (cksum << 16) | ip_len; 5097 #else 5098 if (us->us_do_checksum) 5099 ip_len = (ip_len << 16) | cksum; 5100 else 5101 ip_len <<= 16; 5102 #endif 5103 } else { 5104 /* 5105 * IP does the checksum if uha_checksum is non-zero, 5106 * We make it easy for IP to include our pseudo header 5107 * by putting our length in uha_checksum. 5108 */ 5109 if (us->us_do_checksum) 5110 ip_len |= (ip_len << 16); 5111 #ifndef _LITTLE_ENDIAN 5112 else 5113 ip_len <<= 16; 5114 #endif 5115 } 5116 ASSERT(!lock_held); 5117 /* Set UDP length and checksum */ 5118 *((uint32_t *)&udpha->uha_length) = ip_len; 5119 5120 if (DB_TYPE(mp) != M_DATA) { 5121 cred_t *cr; 5122 pid_t cpid; 5123 5124 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5125 cr = msg_extractcred(mp, &cpid); 5126 if (cr != NULL) { 5127 if (mp1->b_datap->db_credp != NULL) 5128 crfree(mp1->b_datap->db_credp); 5129 mp1->b_datap->db_credp = cr; 5130 mp1->b_datap->db_cpid = cpid; 5131 } 5132 ASSERT(mp != mp1); 5133 freeb(mp); 5134 } 5135 5136 /* mp has been consumed and we'll return success */ 5137 ASSERT(*error == 0); 5138 mp = NULL; 5139 5140 /* We're done. Pass the packet to ip. */ 5141 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5142 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5143 "udp_wput_end: q %p (%S)", q, "end"); 5144 5145 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5146 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5147 connp->conn_dontroute || 5148 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5149 optinfo.ip_opt_ill_index != 0 || 5150 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5151 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5152 ipst->ips_ip_g_mrouter != NULL) { 5153 UDP_STAT(us, udp_ip_send); 5154 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5155 &optinfo); 5156 } else { 5157 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5158 } 5159 5160 done: 5161 if (lock_held) 5162 rw_exit(&udp->udp_rwlock); 5163 if (*error != 0) { 5164 ASSERT(mp != NULL); 5165 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5166 } 5167 return (mp); 5168 } 5169 5170 static void 5171 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5172 { 5173 conn_t *connp = udp->udp_connp; 5174 ipaddr_t src, dst; 5175 ire_t *ire; 5176 ipif_t *ipif = NULL; 5177 mblk_t *ire_fp_mp; 5178 boolean_t retry_caching; 5179 udp_stack_t *us = udp->udp_us; 5180 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5181 5182 dst = ipha->ipha_dst; 5183 src = ipha->ipha_src; 5184 ASSERT(ipha->ipha_ident == 0); 5185 5186 if (CLASSD(dst)) { 5187 int err; 5188 5189 ipif = conn_get_held_ipif(connp, 5190 &connp->conn_multicast_ipif, &err); 5191 5192 if (ipif == NULL || ipif->ipif_isv6 || 5193 (ipif->ipif_ill->ill_phyint->phyint_flags & 5194 PHYI_LOOPBACK)) { 5195 if (ipif != NULL) 5196 ipif_refrele(ipif); 5197 UDP_STAT(us, udp_ip_send); 5198 ip_output(connp, mp, q, IP_WPUT); 5199 return; 5200 } 5201 } 5202 5203 retry_caching = B_FALSE; 5204 mutex_enter(&connp->conn_lock); 5205 ire = connp->conn_ire_cache; 5206 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5207 5208 if (ire == NULL || ire->ire_addr != dst || 5209 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5210 retry_caching = B_TRUE; 5211 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5212 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5213 5214 ASSERT(ipif != NULL); 5215 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5216 retry_caching = B_TRUE; 5217 } 5218 5219 if (!retry_caching) { 5220 ASSERT(ire != NULL); 5221 IRE_REFHOLD(ire); 5222 mutex_exit(&connp->conn_lock); 5223 } else { 5224 boolean_t cached = B_FALSE; 5225 5226 connp->conn_ire_cache = NULL; 5227 mutex_exit(&connp->conn_lock); 5228 5229 /* Release the old ire */ 5230 if (ire != NULL) { 5231 IRE_REFRELE_NOTR(ire); 5232 ire = NULL; 5233 } 5234 5235 if (CLASSD(dst)) { 5236 ASSERT(ipif != NULL); 5237 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5238 connp->conn_zoneid, msg_getlabel(mp), 5239 MATCH_IRE_ILL, ipst); 5240 } else { 5241 ASSERT(ipif == NULL); 5242 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5243 msg_getlabel(mp), ipst); 5244 } 5245 5246 if (ire == NULL) { 5247 if (ipif != NULL) 5248 ipif_refrele(ipif); 5249 UDP_STAT(us, udp_ire_null); 5250 ip_output(connp, mp, q, IP_WPUT); 5251 return; 5252 } 5253 IRE_REFHOLD_NOTR(ire); 5254 5255 mutex_enter(&connp->conn_lock); 5256 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5257 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5258 irb_t *irb = ire->ire_bucket; 5259 5260 /* 5261 * IRE's created for non-connection oriented transports 5262 * are normally initialized with IRE_MARK_TEMPORARY set 5263 * in the ire_marks. These IRE's are preferentially 5264 * reaped when the hash chain length in the cache 5265 * bucket exceeds the maximum value specified in 5266 * ip[6]_ire_max_bucket_cnt. This can severely affect 5267 * UDP performance if IRE cache entries that we need 5268 * to reuse are continually removed. To remedy this, 5269 * when we cache the IRE in the conn_t, we remove the 5270 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5271 * set. 5272 */ 5273 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5274 rw_enter(&irb->irb_lock, RW_WRITER); 5275 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5276 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5277 irb->irb_tmp_ire_cnt--; 5278 } 5279 rw_exit(&irb->irb_lock); 5280 } 5281 connp->conn_ire_cache = ire; 5282 cached = B_TRUE; 5283 } 5284 mutex_exit(&connp->conn_lock); 5285 5286 /* 5287 * We can continue to use the ire but since it was not 5288 * cached, we should drop the extra reference. 5289 */ 5290 if (!cached) 5291 IRE_REFRELE_NOTR(ire); 5292 } 5293 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5294 ASSERT(!CLASSD(dst) || ipif != NULL); 5295 5296 /* 5297 * Check if we can take the fast-path. 5298 * Note that "incomplete" ire's (where the link-layer for next hop 5299 * is not resolved, or where the fast-path header in nce_fp_mp is not 5300 * available yet) are sent down the legacy (slow) path 5301 */ 5302 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5303 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5304 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5305 ((ire->ire_nce == NULL) || 5306 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5307 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5308 if (ipif != NULL) 5309 ipif_refrele(ipif); 5310 UDP_STAT(us, udp_ip_ire_send); 5311 IRE_REFRELE(ire); 5312 ip_output(connp, mp, q, IP_WPUT); 5313 return; 5314 } 5315 5316 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5317 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5318 ipha->ipha_src = ipif->ipif_src_addr; 5319 else 5320 ipha->ipha_src = ire->ire_src_addr; 5321 } 5322 5323 if (ipif != NULL) 5324 ipif_refrele(ipif); 5325 5326 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5327 } 5328 5329 static void 5330 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5331 { 5332 ipaddr_t src, dst; 5333 ill_t *ill; 5334 mblk_t *ire_fp_mp; 5335 uint_t ire_fp_mp_len; 5336 uint16_t *up; 5337 uint32_t cksum, hcksum_txflags; 5338 queue_t *dev_q; 5339 udp_t *udp = connp->conn_udp; 5340 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5341 udp_stack_t *us = udp->udp_us; 5342 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5343 boolean_t ll_multicast = B_FALSE; 5344 boolean_t direct_send; 5345 5346 dev_q = ire->ire_stq->q_next; 5347 ASSERT(dev_q != NULL); 5348 5349 ill = ire_to_ill(ire); 5350 ASSERT(ill != NULL); 5351 5352 /* 5353 * For the direct send case, if resetting of conn_direct_blocked 5354 * was missed, it is still ok because the putq() would enable 5355 * the queue and write service will drain it out. 5356 */ 5357 direct_send = ILL_DIRECT_CAPABLE(ill); 5358 5359 /* is queue flow controlled? */ 5360 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5361 DEV_Q_FLOW_BLOCKED(dev_q))) { 5362 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5363 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5364 if (ipst->ips_ip_output_queue) { 5365 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5366 (void) putq(connp->conn_wq, mp); 5367 } else { 5368 freemsg(mp); 5369 } 5370 ire_refrele(ire); 5371 return; 5372 } 5373 5374 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5375 ire_fp_mp_len = MBLKL(ire_fp_mp); 5376 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5377 5378 dst = ipha->ipha_dst; 5379 src = ipha->ipha_src; 5380 5381 5382 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5383 5384 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5385 #ifndef _BIG_ENDIAN 5386 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5387 #endif 5388 5389 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5390 ASSERT(ill->ill_hcksum_capab != NULL); 5391 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5392 } else { 5393 hcksum_txflags = 0; 5394 } 5395 5396 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5397 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5398 5399 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5400 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5401 if (*up != 0) { 5402 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5403 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5404 ntohs(ipha->ipha_length), cksum); 5405 5406 /* Software checksum? */ 5407 if (DB_CKSUMFLAGS(mp) == 0) { 5408 UDP_STAT(us, udp_out_sw_cksum); 5409 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5410 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5411 } 5412 } 5413 5414 if (!CLASSD(dst)) { 5415 ipha->ipha_fragment_offset_and_flags |= 5416 (uint32_t)htons(ire->ire_frag_flag); 5417 } 5418 5419 /* Calculate IP header checksum if hardware isn't capable */ 5420 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5421 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5422 ((uint16_t *)ipha)[4]); 5423 } 5424 5425 if (CLASSD(dst)) { 5426 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5427 ip_multicast_loopback(q, ill, mp, 5428 connp->conn_multicast_loop ? 0 : 5429 IP_FF_NO_MCAST_LOOP, zoneid); 5430 } 5431 5432 /* If multicast TTL is 0 then we are done */ 5433 if (ipha->ipha_ttl == 0) { 5434 freemsg(mp); 5435 ire_refrele(ire); 5436 return; 5437 } 5438 ll_multicast = B_TRUE; 5439 } 5440 5441 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5442 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5443 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5444 5445 UPDATE_OB_PKT_COUNT(ire); 5446 ire->ire_last_used_time = lbolt; 5447 5448 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5449 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5450 ntohs(ipha->ipha_length)); 5451 5452 DTRACE_PROBE4(ip4__physical__out__start, 5453 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5454 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5455 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5456 ll_multicast, ipst); 5457 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5458 if (ipst->ips_ip4_observe.he_interested && mp != NULL) { 5459 zoneid_t szone; 5460 5461 /* 5462 * Both of these functions expect b_rptr to be 5463 * where the IP header starts, so advance past the 5464 * link layer header if present. 5465 */ 5466 mp->b_rptr += ire_fp_mp_len; 5467 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5468 ipst, ALL_ZONES); 5469 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5470 ALL_ZONES, ill, ipst); 5471 mp->b_rptr -= ire_fp_mp_len; 5472 } 5473 5474 if (mp == NULL) 5475 goto bail; 5476 5477 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5478 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5479 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5480 5481 if (direct_send) { 5482 uintptr_t cookie; 5483 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5484 5485 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5486 (uintptr_t)connp, 0); 5487 if (cookie != NULL) { 5488 idl_tx_list_t *idl_txl; 5489 5490 /* 5491 * Flow controlled. 5492 */ 5493 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5494 cookie, conn_t *, connp); 5495 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5496 mutex_enter(&idl_txl->txl_lock); 5497 /* 5498 * Check again after holding txl_lock to see if Tx 5499 * ring is still blocked and only then insert the 5500 * connp into the drain list. 5501 */ 5502 if (connp->conn_direct_blocked || 5503 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5504 cookie) == 0)) { 5505 mutex_exit(&idl_txl->txl_lock); 5506 goto bail; 5507 } 5508 if (idl_txl->txl_cookie != NULL && 5509 idl_txl->txl_cookie != cookie) { 5510 DTRACE_PROBE2(udp__xmit__collision, 5511 uintptr_t, cookie, 5512 uintptr_t, idl_txl->txl_cookie); 5513 UDP_STAT(us, udp_cookie_coll); 5514 } else { 5515 connp->conn_direct_blocked = B_TRUE; 5516 idl_txl->txl_cookie = cookie; 5517 conn_drain_insert(connp, idl_txl); 5518 DTRACE_PROBE1(udp__xmit__insert, 5519 conn_t *, connp); 5520 } 5521 mutex_exit(&idl_txl->txl_lock); 5522 } 5523 } else { 5524 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5525 putnext(ire->ire_stq, mp); 5526 } 5527 bail: 5528 IRE_REFRELE(ire); 5529 } 5530 5531 static boolean_t 5532 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 5533 { 5534 udp_t *udp = Q_TO_UDP(wq); 5535 int err; 5536 cred_t *cred; 5537 cred_t *orig_cred; 5538 cred_t *effective_cred = NULL; 5539 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5540 udp_stack_t *us = udp->udp_us; 5541 5542 /* 5543 * All Solaris components should pass a db_credp 5544 * for this message, hence we ASSERT. 5545 * On production kernels we return an error to be robust against 5546 * random streams modules sitting on top of us. 5547 */ 5548 cred = orig_cred = msg_getcred(mp, NULL); 5549 ASSERT(cred != NULL); 5550 if (cred == NULL) 5551 return (EINVAL); 5552 5553 /* 5554 * Verify the destination is allowed to receive packets at 5555 * the security label of the message data. tsol_check_dest() 5556 * may create a new effective cred for this message with a 5557 * modified label or label flags. Note that we use the 5558 * cred/label from the message to handle MLP. 5559 */ 5560 if ((err = tsol_check_dest(cred, dst, IPV6_VERSION, 5561 udp->udp_connp->conn_mac_mode, &effective_cred)) != 0) 5562 goto done; 5563 if (effective_cred != NULL) 5564 cred = effective_cred; 5565 5566 /* 5567 * Calculate the security label to be placed in the text 5568 * of the message (if any). 5569 */ 5570 if ((err = tsol_compute_label_v6(cred, dst, opt_storage, 5571 us->us_netstack->netstack_ip)) != 0) 5572 goto done; 5573 5574 /* 5575 * Insert the security label in the cached ip options, 5576 * removing any old label that may exist. 5577 */ 5578 if ((err = tsol_update_sticky(&udp->udp_sticky_ipp, 5579 &udp->udp_label_len_v6, opt_storage)) != 0) 5580 goto done; 5581 5582 /* 5583 * Save the destination address and cred we used to 5584 * generate the security label text. 5585 */ 5586 if (cred != udp->udp_effective_cred) { 5587 if (udp->udp_effective_cred != NULL) 5588 crfree(udp->udp_effective_cred); 5589 crhold(cred); 5590 udp->udp_effective_cred = cred; 5591 } 5592 if (orig_cred != udp->udp_last_cred) { 5593 if (udp->udp_last_cred != NULL) 5594 crfree(udp->udp_last_cred); 5595 crhold(orig_cred); 5596 udp->udp_last_cred = orig_cred; 5597 } 5598 5599 done: 5600 if (effective_cred != NULL) 5601 crfree(effective_cred); 5602 5603 if (err != 0) { 5604 DTRACE_PROBE4( 5605 tx__ip__log__drop__updatelabel__udp6, 5606 char *, "queue(1) failed to update options(2) on mp(3)", 5607 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5608 } 5609 return (err); 5610 } 5611 5612 static int 5613 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5614 pid_t pid) 5615 { 5616 udp_t *udp = connp->conn_udp; 5617 udp_stack_t *us = udp->udp_us; 5618 ipaddr_t v4dst; 5619 in_port_t dstport; 5620 boolean_t mapped_addr; 5621 struct sockaddr_storage ss; 5622 sin_t *sin; 5623 sin6_t *sin6; 5624 struct sockaddr *addr; 5625 socklen_t addrlen; 5626 int error; 5627 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5628 5629 /* M_DATA for connected socket */ 5630 5631 ASSERT(udp->udp_issocket); 5632 UDP_DBGSTAT(us, udp_data_conn); 5633 5634 mutex_enter(&connp->conn_lock); 5635 if (udp->udp_state != TS_DATA_XFER) { 5636 mutex_exit(&connp->conn_lock); 5637 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5638 UDP_STAT(us, udp_out_err_notconn); 5639 freemsg(mp); 5640 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5641 "udp_wput_end: connp %p (%S)", connp, 5642 "not-connected; address required"); 5643 return (EDESTADDRREQ); 5644 } 5645 5646 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5647 if (mapped_addr) 5648 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5649 5650 /* Initialize addr and addrlen as if they're passed in */ 5651 if (udp->udp_family == AF_INET) { 5652 sin = (sin_t *)&ss; 5653 sin->sin_family = AF_INET; 5654 dstport = sin->sin_port = udp->udp_dstport; 5655 ASSERT(mapped_addr); 5656 sin->sin_addr.s_addr = v4dst; 5657 addr = (struct sockaddr *)sin; 5658 addrlen = sizeof (*sin); 5659 } else { 5660 sin6 = (sin6_t *)&ss; 5661 sin6->sin6_family = AF_INET6; 5662 dstport = sin6->sin6_port = udp->udp_dstport; 5663 sin6->sin6_flowinfo = udp->udp_flowinfo; 5664 sin6->sin6_addr = udp->udp_v6dst; 5665 sin6->sin6_scope_id = 0; 5666 sin6->__sin6_src_id = 0; 5667 addr = (struct sockaddr *)sin6; 5668 addrlen = sizeof (*sin6); 5669 } 5670 mutex_exit(&connp->conn_lock); 5671 5672 if (mapped_addr) { 5673 /* 5674 * Handle both AF_INET and AF_INET6; the latter 5675 * for IPV4 mapped destination addresses. Note 5676 * here that both addr and addrlen point to the 5677 * corresponding struct depending on the address 5678 * family of the socket. 5679 */ 5680 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5681 insert_spi, msg, cr, pid); 5682 } else { 5683 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5684 } 5685 if (error == 0) { 5686 ASSERT(mp == NULL); 5687 return (0); 5688 } 5689 5690 UDP_STAT(us, udp_out_err_output); 5691 ASSERT(mp != NULL); 5692 if (IPCL_IS_NONSTR(connp)) { 5693 freemsg(mp); 5694 return (error); 5695 } else { 5696 /* mp is freed by the following routine */ 5697 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5698 (t_scalar_t)addrlen, (t_scalar_t)error); 5699 return (0); 5700 } 5701 } 5702 5703 /* ARGSUSED */ 5704 static int 5705 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5706 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5707 { 5708 5709 udp_t *udp = connp->conn_udp; 5710 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5711 int error = 0; 5712 sin6_t *sin6; 5713 sin_t *sin; 5714 uint_t srcid; 5715 uint16_t port; 5716 ipaddr_t v4dst; 5717 5718 5719 ASSERT(addr != NULL); 5720 5721 switch (udp->udp_family) { 5722 case AF_INET6: 5723 sin6 = (sin6_t *)addr; 5724 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5725 /* 5726 * Destination is a non-IPv4-compatible IPv6 address. 5727 * Send out an IPv6 format packet. 5728 */ 5729 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5730 pid); 5731 if (error != 0) 5732 goto ud_error; 5733 5734 return (0); 5735 } 5736 /* 5737 * If the local address is not zero or a mapped address 5738 * return an error. It would be possible to send an IPv4 5739 * packet but the response would never make it back to the 5740 * application since it is bound to a non-mapped address. 5741 */ 5742 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5743 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5744 error = EADDRNOTAVAIL; 5745 goto ud_error; 5746 } 5747 /* Send IPv4 packet without modifying udp_ipversion */ 5748 /* Extract port and ipaddr */ 5749 port = sin6->sin6_port; 5750 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5751 srcid = sin6->__sin6_src_id; 5752 break; 5753 5754 case AF_INET: 5755 sin = (sin_t *)addr; 5756 /* Extract port and ipaddr */ 5757 port = sin->sin_port; 5758 v4dst = sin->sin_addr.s_addr; 5759 srcid = 0; 5760 break; 5761 } 5762 5763 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5764 msg, cr, pid); 5765 5766 if (error == 0) { 5767 ASSERT(mp == NULL); 5768 return (0); 5769 } 5770 5771 ud_error: 5772 ASSERT(mp != NULL); 5773 5774 return (error); 5775 } 5776 5777 /* 5778 * This routine handles all messages passed downstream. It either 5779 * consumes the message or passes it downstream; it never queues a 5780 * a message. 5781 * 5782 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5783 * is valid when we are directly beneath the stream head, and thus sockfs 5784 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5785 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5786 * connected endpoints. 5787 */ 5788 void 5789 udp_wput(queue_t *q, mblk_t *mp) 5790 { 5791 conn_t *connp = Q_TO_CONN(q); 5792 udp_t *udp = connp->conn_udp; 5793 int error = 0; 5794 struct sockaddr *addr; 5795 socklen_t addrlen; 5796 udp_stack_t *us = udp->udp_us; 5797 5798 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5799 "udp_wput_start: queue %p mp %p", q, mp); 5800 5801 /* 5802 * We directly handle several cases here: T_UNITDATA_REQ message 5803 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5804 * socket. 5805 */ 5806 switch (DB_TYPE(mp)) { 5807 case M_DATA: 5808 /* 5809 * Quick check for error cases. Checks will be done again 5810 * under the lock later on 5811 */ 5812 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 5813 /* Not connected; address is required */ 5814 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5815 UDP_STAT(us, udp_out_err_notconn); 5816 freemsg(mp); 5817 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5818 "udp_wput_end: connp %p (%S)", connp, 5819 "not-connected; address required"); 5820 return; 5821 } 5822 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5823 return; 5824 5825 case M_PROTO: 5826 case M_PCPROTO: { 5827 struct T_unitdata_req *tudr; 5828 5829 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5830 tudr = (struct T_unitdata_req *)mp->b_rptr; 5831 5832 /* Handle valid T_UNITDATA_REQ here */ 5833 if (MBLKL(mp) >= sizeof (*tudr) && 5834 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5835 if (mp->b_cont == NULL) { 5836 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5837 "udp_wput_end: q %p (%S)", q, "badaddr"); 5838 error = EPROTO; 5839 goto ud_error; 5840 } 5841 5842 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5843 tudr->DEST_length)) { 5844 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5845 "udp_wput_end: q %p (%S)", q, "badaddr"); 5846 error = EADDRNOTAVAIL; 5847 goto ud_error; 5848 } 5849 /* 5850 * If a port has not been bound to the stream, fail. 5851 * This is not a problem when sockfs is directly 5852 * above us, because it will ensure that the socket 5853 * is first bound before allowing data to be sent. 5854 */ 5855 if (udp->udp_state == TS_UNBND) { 5856 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5857 "udp_wput_end: q %p (%S)", q, "outstate"); 5858 error = EPROTO; 5859 goto ud_error; 5860 } 5861 addr = (struct sockaddr *) 5862 &mp->b_rptr[tudr->DEST_offset]; 5863 addrlen = tudr->DEST_length; 5864 if (tudr->OPT_length != 0) 5865 UDP_STAT(us, udp_out_opt); 5866 break; 5867 } 5868 /* FALLTHRU */ 5869 } 5870 default: 5871 udp_wput_other(q, mp); 5872 return; 5873 } 5874 ASSERT(addr != NULL); 5875 5876 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5877 -1); 5878 if (error != 0) { 5879 ud_error: 5880 UDP_STAT(us, udp_out_err_output); 5881 ASSERT(mp != NULL); 5882 /* mp is freed by the following routine */ 5883 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5884 (t_scalar_t)error); 5885 } 5886 } 5887 5888 /* ARGSUSED */ 5889 static void 5890 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5891 { 5892 #ifdef DEBUG 5893 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5894 #endif 5895 freemsg(mp); 5896 } 5897 5898 5899 /* 5900 * udp_output_v6(): 5901 * Assumes that udp_wput did some sanity checking on the destination 5902 * address. 5903 */ 5904 static mblk_t * 5905 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5906 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5907 { 5908 ip6_t *ip6h; 5909 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5910 mblk_t *mp1 = mp; 5911 mblk_t *mp2; 5912 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5913 size_t ip_len; 5914 udpha_t *udph; 5915 udp_t *udp = connp->conn_udp; 5916 udp_stack_t *us = udp->udp_us; 5917 queue_t *q = connp->conn_wq; 5918 ip6_pkt_t ipp_s; /* For ancillary data options */ 5919 ip6_pkt_t *ipp = &ipp_s; 5920 ip6_pkt_t *tipp; /* temporary ipp */ 5921 uint32_t csum = 0; 5922 uint_t ignore = 0; 5923 uint_t option_exists = 0, is_sticky = 0; 5924 uint8_t *cp; 5925 uint8_t *nxthdr_ptr; 5926 in6_addr_t ip6_dst; 5927 in_port_t port; 5928 udpattrs_t attrs; 5929 boolean_t opt_present; 5930 ip6_hbh_t *hopoptsptr = NULL; 5931 uint_t hopoptslen = 0; 5932 boolean_t is_ancillary = B_FALSE; 5933 size_t sth_wroff = 0; 5934 ire_t *ire; 5935 boolean_t update_lastdst = B_FALSE; 5936 5937 *error = 0; 5938 5939 /* 5940 * If the local address is a mapped address return 5941 * an error. 5942 * It would be possible to send an IPv6 packet but the 5943 * response would never make it back to the application 5944 * since it is bound to a mapped address. 5945 */ 5946 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5947 *error = EADDRNOTAVAIL; 5948 goto done; 5949 } 5950 5951 ipp->ipp_fields = 0; 5952 ipp->ipp_sticky_ignored = 0; 5953 5954 /* 5955 * If TPI options passed in, feed it for verification and handling 5956 */ 5957 attrs.udpattr_credset = B_FALSE; 5958 opt_present = B_FALSE; 5959 if (IPCL_IS_NONSTR(connp)) { 5960 if (msg->msg_controllen != 0) { 5961 attrs.udpattr_ipp6 = ipp; 5962 attrs.udpattr_mb = mp; 5963 5964 rw_enter(&udp->udp_rwlock, RW_WRITER); 5965 *error = process_auxiliary_options(connp, 5966 msg->msg_control, msg->msg_controllen, 5967 &attrs, &udp_opt_obj, udp_opt_set, cr); 5968 rw_exit(&udp->udp_rwlock); 5969 if (*error) 5970 goto done; 5971 ASSERT(*error == 0); 5972 opt_present = B_TRUE; 5973 } 5974 } else { 5975 if (DB_TYPE(mp) != M_DATA) { 5976 mp1 = mp->b_cont; 5977 if (((struct T_unitdata_req *) 5978 mp->b_rptr)->OPT_length != 0) { 5979 attrs.udpattr_ipp6 = ipp; 5980 attrs.udpattr_mb = mp; 5981 if (udp_unitdata_opt_process(q, mp, error, 5982 &attrs) < 0) { 5983 goto done; 5984 } 5985 ASSERT(*error == 0); 5986 opt_present = B_TRUE; 5987 } 5988 } 5989 } 5990 5991 /* 5992 * Determine whether we need to mark the mblk with the user's 5993 * credentials. 5994 * If labeled then sockfs would have already done this. 5995 */ 5996 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 5997 ire = connp->conn_ire_cache; 5998 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 5999 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6000 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6001 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6002 mblk_setcred(mp, cr, pid); 6003 } 6004 6005 rw_enter(&udp->udp_rwlock, RW_READER); 6006 ignore = ipp->ipp_sticky_ignored; 6007 6008 /* mp1 points to the M_DATA mblk carrying the packet */ 6009 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6010 6011 if (sin6->sin6_scope_id != 0 && 6012 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6013 /* 6014 * IPPF_SCOPE_ID is special. It's neither a sticky 6015 * option nor ancillary data. It needs to be 6016 * explicitly set in options_exists. 6017 */ 6018 option_exists |= IPPF_SCOPE_ID; 6019 } 6020 6021 /* 6022 * Compute the destination address 6023 */ 6024 ip6_dst = sin6->sin6_addr; 6025 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6026 ip6_dst = ipv6_loopback; 6027 6028 port = sin6->sin6_port; 6029 6030 /* 6031 * Cluster and TSOL notes, Cluster check: 6032 * see comments in udp_output_v4(). 6033 */ 6034 mutex_enter(&connp->conn_lock); 6035 6036 if (cl_inet_connect2 != NULL && 6037 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6038 port != udp->udp_lastdstport)) { 6039 mutex_exit(&connp->conn_lock); 6040 *error = 0; 6041 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6042 if (*error != 0) { 6043 *error = EHOSTUNREACH; 6044 rw_exit(&udp->udp_rwlock); 6045 goto done; 6046 } 6047 update_lastdst = B_TRUE; 6048 mutex_enter(&connp->conn_lock); 6049 } 6050 6051 /* 6052 * If we're not going to the same destination as last time, then 6053 * recompute the label required. This is done in a separate routine to 6054 * avoid blowing up our stack here. 6055 * 6056 * TSOL Note: Since we are not in WRITER mode, UDP packets 6057 * to different destination may require different labels, 6058 * or worse, UDP packets to same IP address may require 6059 * different labels due to use of shared all-zones address. 6060 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6061 * and sticky ipp_hopoptslen are consistent for the current 6062 * destination and are updated atomically. 6063 */ 6064 if (is_system_labeled()) { 6065 cred_t *credp; 6066 pid_t cpid; 6067 6068 /* Using UDP MLP requires SCM_UCRED from user */ 6069 if (connp->conn_mlp_type != mlptSingle && 6070 !attrs.udpattr_credset) { 6071 DTRACE_PROBE4( 6072 tx__ip__log__info__output__udp6, 6073 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6074 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6075 *error = EINVAL; 6076 rw_exit(&udp->udp_rwlock); 6077 mutex_exit(&connp->conn_lock); 6078 goto done; 6079 } 6080 /* 6081 * update label option for this UDP socket if 6082 * - the destination has changed, 6083 * - the UDP socket is MLP, or 6084 * - the cred attached to the mblk changed. 6085 */ 6086 credp = msg_getcred(mp, &cpid); 6087 if (opt_present || 6088 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6089 connp->conn_mlp_type != mlptSingle || 6090 credp != udp->udp_last_cred) { 6091 if ((*error = udp_update_label_v6(q, mp, &ip6_dst)) 6092 != 0) { 6093 rw_exit(&udp->udp_rwlock); 6094 mutex_exit(&connp->conn_lock); 6095 goto done; 6096 } 6097 update_lastdst = B_TRUE; 6098 } 6099 /* 6100 * Attach the effective cred to the mblk to ensure future 6101 * routing decisions will be based on it's label. 6102 */ 6103 mblk_setcred(mp, udp->udp_effective_cred, cpid); 6104 } 6105 6106 if (update_lastdst) { 6107 udp->udp_v6lastdst = ip6_dst; 6108 udp->udp_lastdstport = port; 6109 } 6110 6111 /* 6112 * If there's a security label here, then we ignore any options the 6113 * user may try to set. We keep the peer's label as a hidden sticky 6114 * option. We make a private copy of this label before releasing the 6115 * lock so that label is kept consistent with the destination addr. 6116 */ 6117 if (udp->udp_label_len_v6 > 0) { 6118 ignore &= ~IPPF_HOPOPTS; 6119 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6120 } 6121 6122 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6123 /* No sticky options nor ancillary data. */ 6124 mutex_exit(&connp->conn_lock); 6125 goto no_options; 6126 } 6127 6128 /* 6129 * Go through the options figuring out where each is going to 6130 * come from and build two masks. The first mask indicates if 6131 * the option exists at all. The second mask indicates if the 6132 * option is sticky or ancillary. 6133 */ 6134 if (!(ignore & IPPF_HOPOPTS)) { 6135 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6136 option_exists |= IPPF_HOPOPTS; 6137 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6138 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6139 option_exists |= IPPF_HOPOPTS; 6140 is_sticky |= IPPF_HOPOPTS; 6141 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6142 hopoptsptr = kmem_alloc( 6143 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6144 if (hopoptsptr == NULL) { 6145 *error = ENOMEM; 6146 mutex_exit(&connp->conn_lock); 6147 goto done; 6148 } 6149 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6150 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6151 hopoptslen); 6152 udp_ip_hdr_len += hopoptslen; 6153 } 6154 } 6155 mutex_exit(&connp->conn_lock); 6156 6157 if (!(ignore & IPPF_RTHDR)) { 6158 if (ipp->ipp_fields & IPPF_RTHDR) { 6159 option_exists |= IPPF_RTHDR; 6160 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6161 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6162 option_exists |= IPPF_RTHDR; 6163 is_sticky |= IPPF_RTHDR; 6164 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6165 } 6166 } 6167 6168 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6169 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6170 option_exists |= IPPF_RTDSTOPTS; 6171 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6172 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6173 option_exists |= IPPF_RTDSTOPTS; 6174 is_sticky |= IPPF_RTDSTOPTS; 6175 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6176 } 6177 } 6178 6179 if (!(ignore & IPPF_DSTOPTS)) { 6180 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6181 option_exists |= IPPF_DSTOPTS; 6182 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6183 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6184 option_exists |= IPPF_DSTOPTS; 6185 is_sticky |= IPPF_DSTOPTS; 6186 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6187 } 6188 } 6189 6190 if (!(ignore & IPPF_IFINDEX)) { 6191 if (ipp->ipp_fields & IPPF_IFINDEX) { 6192 option_exists |= IPPF_IFINDEX; 6193 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6194 option_exists |= IPPF_IFINDEX; 6195 is_sticky |= IPPF_IFINDEX; 6196 } 6197 } 6198 6199 if (!(ignore & IPPF_ADDR)) { 6200 if (ipp->ipp_fields & IPPF_ADDR) { 6201 option_exists |= IPPF_ADDR; 6202 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6203 option_exists |= IPPF_ADDR; 6204 is_sticky |= IPPF_ADDR; 6205 } 6206 } 6207 6208 if (!(ignore & IPPF_DONTFRAG)) { 6209 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6210 option_exists |= IPPF_DONTFRAG; 6211 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6212 option_exists |= IPPF_DONTFRAG; 6213 is_sticky |= IPPF_DONTFRAG; 6214 } 6215 } 6216 6217 if (!(ignore & IPPF_USE_MIN_MTU)) { 6218 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6219 option_exists |= IPPF_USE_MIN_MTU; 6220 } else if (udp->udp_sticky_ipp.ipp_fields & 6221 IPPF_USE_MIN_MTU) { 6222 option_exists |= IPPF_USE_MIN_MTU; 6223 is_sticky |= IPPF_USE_MIN_MTU; 6224 } 6225 } 6226 6227 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6228 option_exists |= IPPF_HOPLIMIT; 6229 /* IPV6_HOPLIMIT can never be sticky */ 6230 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6231 6232 if (!(ignore & IPPF_UNICAST_HOPS) && 6233 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6234 option_exists |= IPPF_UNICAST_HOPS; 6235 is_sticky |= IPPF_UNICAST_HOPS; 6236 } 6237 6238 if (!(ignore & IPPF_MULTICAST_HOPS) && 6239 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6240 option_exists |= IPPF_MULTICAST_HOPS; 6241 is_sticky |= IPPF_MULTICAST_HOPS; 6242 } 6243 6244 if (!(ignore & IPPF_TCLASS)) { 6245 if (ipp->ipp_fields & IPPF_TCLASS) { 6246 option_exists |= IPPF_TCLASS; 6247 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6248 option_exists |= IPPF_TCLASS; 6249 is_sticky |= IPPF_TCLASS; 6250 } 6251 } 6252 6253 if (!(ignore & IPPF_NEXTHOP) && 6254 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6255 option_exists |= IPPF_NEXTHOP; 6256 is_sticky |= IPPF_NEXTHOP; 6257 } 6258 6259 no_options: 6260 6261 /* 6262 * If any options carried in the ip6i_t were specified, we 6263 * need to account for the ip6i_t in the data we'll be sending 6264 * down. 6265 */ 6266 if (option_exists & IPPF_HAS_IP6I) 6267 udp_ip_hdr_len += sizeof (ip6i_t); 6268 6269 /* check/fix buffer config, setup pointers into it */ 6270 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6271 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6272 !OK_32PTR(ip6h)) { 6273 6274 /* Try to get everything in a single mblk next time */ 6275 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6276 udp->udp_max_hdr_len = udp_ip_hdr_len; 6277 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6278 } 6279 6280 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6281 if (mp2 == NULL) { 6282 *error = ENOMEM; 6283 rw_exit(&udp->udp_rwlock); 6284 goto done; 6285 } 6286 mp2->b_wptr = DB_LIM(mp2); 6287 mp2->b_cont = mp1; 6288 mp1 = mp2; 6289 if (DB_TYPE(mp) != M_DATA) 6290 mp->b_cont = mp1; 6291 else 6292 mp = mp1; 6293 6294 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6295 } 6296 mp1->b_rptr = (unsigned char *)ip6h; 6297 ip6i = (ip6i_t *)ip6h; 6298 6299 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6300 if (option_exists & IPPF_HAS_IP6I) { 6301 ip6h = (ip6_t *)&ip6i[1]; 6302 ip6i->ip6i_flags = 0; 6303 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6304 6305 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6306 if (option_exists & IPPF_SCOPE_ID) { 6307 ip6i->ip6i_flags |= IP6I_IFINDEX; 6308 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6309 } else if (option_exists & IPPF_IFINDEX) { 6310 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6311 ASSERT(tipp->ipp_ifindex != 0); 6312 ip6i->ip6i_flags |= IP6I_IFINDEX; 6313 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6314 } 6315 6316 if (option_exists & IPPF_ADDR) { 6317 /* 6318 * Enable per-packet source address verification if 6319 * IPV6_PKTINFO specified the source address. 6320 * ip6_src is set in the transport's _wput function. 6321 */ 6322 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6323 } 6324 6325 if (option_exists & IPPF_DONTFRAG) { 6326 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6327 } 6328 6329 if (option_exists & IPPF_USE_MIN_MTU) { 6330 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6331 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6332 } 6333 6334 if (option_exists & IPPF_NEXTHOP) { 6335 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6336 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6337 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6338 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6339 } 6340 6341 /* 6342 * tell IP this is an ip6i_t private header 6343 */ 6344 ip6i->ip6i_nxt = IPPROTO_RAW; 6345 } 6346 6347 /* Initialize IPv6 header */ 6348 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6349 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6350 6351 /* Set the hoplimit of the outgoing packet. */ 6352 if (option_exists & IPPF_HOPLIMIT) { 6353 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6354 ip6h->ip6_hops = ipp->ipp_hoplimit; 6355 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6356 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6357 ip6h->ip6_hops = udp->udp_multicast_ttl; 6358 if (option_exists & IPPF_MULTICAST_HOPS) 6359 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6360 } else { 6361 ip6h->ip6_hops = udp->udp_ttl; 6362 if (option_exists & IPPF_UNICAST_HOPS) 6363 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6364 } 6365 6366 if (option_exists & IPPF_ADDR) { 6367 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6368 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6369 ip6h->ip6_src = tipp->ipp_addr; 6370 } else { 6371 /* 6372 * The source address was not set using IPV6_PKTINFO. 6373 * First look at the bound source. 6374 * If unspecified fallback to __sin6_src_id. 6375 */ 6376 ip6h->ip6_src = udp->udp_v6src; 6377 if (sin6->__sin6_src_id != 0 && 6378 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6379 ip_srcid_find_id(sin6->__sin6_src_id, 6380 &ip6h->ip6_src, connp->conn_zoneid, 6381 us->us_netstack); 6382 } 6383 } 6384 6385 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6386 cp = (uint8_t *)&ip6h[1]; 6387 6388 /* 6389 * Here's where we have to start stringing together 6390 * any extension headers in the right order: 6391 * Hop-by-hop, destination, routing, and final destination opts. 6392 */ 6393 if (option_exists & IPPF_HOPOPTS) { 6394 /* Hop-by-hop options */ 6395 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6396 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6397 if (hopoptslen == 0) { 6398 hopoptsptr = tipp->ipp_hopopts; 6399 hopoptslen = tipp->ipp_hopoptslen; 6400 is_ancillary = B_TRUE; 6401 } 6402 6403 *nxthdr_ptr = IPPROTO_HOPOPTS; 6404 nxthdr_ptr = &hbh->ip6h_nxt; 6405 6406 bcopy(hopoptsptr, cp, hopoptslen); 6407 cp += hopoptslen; 6408 6409 if (hopoptsptr != NULL && !is_ancillary) { 6410 kmem_free(hopoptsptr, hopoptslen); 6411 hopoptsptr = NULL; 6412 hopoptslen = 0; 6413 } 6414 } 6415 /* 6416 * En-route destination options 6417 * Only do them if there's a routing header as well 6418 */ 6419 if (option_exists & IPPF_RTDSTOPTS) { 6420 ip6_dest_t *dst = (ip6_dest_t *)cp; 6421 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6422 6423 *nxthdr_ptr = IPPROTO_DSTOPTS; 6424 nxthdr_ptr = &dst->ip6d_nxt; 6425 6426 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6427 cp += tipp->ipp_rtdstoptslen; 6428 } 6429 /* 6430 * Routing header next 6431 */ 6432 if (option_exists & IPPF_RTHDR) { 6433 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6434 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6435 6436 *nxthdr_ptr = IPPROTO_ROUTING; 6437 nxthdr_ptr = &rt->ip6r_nxt; 6438 6439 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6440 cp += tipp->ipp_rthdrlen; 6441 } 6442 /* 6443 * Do ultimate destination options 6444 */ 6445 if (option_exists & IPPF_DSTOPTS) { 6446 ip6_dest_t *dest = (ip6_dest_t *)cp; 6447 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6448 6449 *nxthdr_ptr = IPPROTO_DSTOPTS; 6450 nxthdr_ptr = &dest->ip6d_nxt; 6451 6452 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6453 cp += tipp->ipp_dstoptslen; 6454 } 6455 /* 6456 * Now set the last header pointer to the proto passed in 6457 */ 6458 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6459 *nxthdr_ptr = IPPROTO_UDP; 6460 6461 /* Update UDP header */ 6462 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6463 udph->uha_dst_port = sin6->sin6_port; 6464 udph->uha_src_port = udp->udp_port; 6465 6466 /* 6467 * Copy in the destination address 6468 */ 6469 ip6h->ip6_dst = ip6_dst; 6470 6471 ip6h->ip6_vcf = 6472 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6473 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6474 6475 if (option_exists & IPPF_TCLASS) { 6476 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6477 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6478 tipp->ipp_tclass); 6479 } 6480 rw_exit(&udp->udp_rwlock); 6481 6482 if (option_exists & IPPF_RTHDR) { 6483 ip6_rthdr_t *rth; 6484 6485 /* 6486 * Perform any processing needed for source routing. 6487 * We know that all extension headers will be in the same mblk 6488 * as the IPv6 header. 6489 */ 6490 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6491 if (rth != NULL && rth->ip6r_segleft != 0) { 6492 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6493 /* 6494 * Drop packet - only support Type 0 routing. 6495 * Notify the application as well. 6496 */ 6497 *error = EPROTO; 6498 goto done; 6499 } 6500 6501 /* 6502 * rth->ip6r_len is twice the number of 6503 * addresses in the header. Thus it must be even. 6504 */ 6505 if (rth->ip6r_len & 0x1) { 6506 *error = EPROTO; 6507 goto done; 6508 } 6509 /* 6510 * Shuffle the routing header and ip6_dst 6511 * addresses, and get the checksum difference 6512 * between the first hop (in ip6_dst) and 6513 * the destination (in the last routing hdr entry). 6514 */ 6515 csum = ip_massage_options_v6(ip6h, rth, 6516 us->us_netstack); 6517 /* 6518 * Verify that the first hop isn't a mapped address. 6519 * Routers along the path need to do this verification 6520 * for subsequent hops. 6521 */ 6522 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6523 *error = EADDRNOTAVAIL; 6524 goto done; 6525 } 6526 6527 cp += (rth->ip6r_len + 1)*8; 6528 } 6529 } 6530 6531 /* count up length of UDP packet */ 6532 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6533 if ((mp2 = mp1->b_cont) != NULL) { 6534 do { 6535 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6536 ip_len += (uint32_t)MBLKL(mp2); 6537 } while ((mp2 = mp2->b_cont) != NULL); 6538 } 6539 6540 /* 6541 * If the size of the packet is greater than the maximum allowed by 6542 * ip, return an error. Passing this down could cause panics because 6543 * the size will have wrapped and be inconsistent with the msg size. 6544 */ 6545 if (ip_len > IP_MAXPACKET) { 6546 *error = EMSGSIZE; 6547 goto done; 6548 } 6549 6550 /* Store the UDP length. Subtract length of extension hdrs */ 6551 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6552 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6553 6554 /* 6555 * We make it easy for IP to include our pseudo header 6556 * by putting our length in uh_checksum, modified (if 6557 * we have a routing header) by the checksum difference 6558 * between the ultimate destination and first hop addresses. 6559 * Note: UDP over IPv6 must always checksum the packet. 6560 */ 6561 csum += udph->uha_length; 6562 csum = (csum & 0xFFFF) + (csum >> 16); 6563 udph->uha_checksum = (uint16_t)csum; 6564 6565 #ifdef _LITTLE_ENDIAN 6566 ip_len = htons(ip_len); 6567 #endif 6568 ip6h->ip6_plen = ip_len; 6569 6570 if (DB_TYPE(mp) != M_DATA) { 6571 cred_t *cr; 6572 pid_t cpid; 6573 6574 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6575 cr = msg_extractcred(mp, &cpid); 6576 if (cr != NULL) { 6577 if (mp1->b_datap->db_credp != NULL) 6578 crfree(mp1->b_datap->db_credp); 6579 mp1->b_datap->db_credp = cr; 6580 mp1->b_datap->db_cpid = cpid; 6581 } 6582 6583 ASSERT(mp != mp1); 6584 freeb(mp); 6585 } 6586 6587 /* mp has been consumed and we'll return success */ 6588 ASSERT(*error == 0); 6589 mp = NULL; 6590 6591 /* We're done. Pass the packet to IP */ 6592 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6593 ip_output_v6(connp, mp1, q, IP_WPUT); 6594 6595 done: 6596 if (sth_wroff != 0) { 6597 (void) proto_set_tx_wroff(RD(q), connp, 6598 udp->udp_max_hdr_len + us->us_wroff_extra); 6599 } 6600 if (hopoptsptr != NULL && !is_ancillary) { 6601 kmem_free(hopoptsptr, hopoptslen); 6602 hopoptsptr = NULL; 6603 } 6604 if (*error != 0) { 6605 ASSERT(mp != NULL); 6606 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6607 } 6608 return (mp); 6609 } 6610 6611 6612 static int 6613 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6614 { 6615 sin_t *sin = (sin_t *)sa; 6616 sin6_t *sin6 = (sin6_t *)sa; 6617 6618 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6619 6620 if (udp->udp_state != TS_DATA_XFER) 6621 return (ENOTCONN); 6622 6623 switch (udp->udp_family) { 6624 case AF_INET: 6625 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6626 6627 if (*salenp < sizeof (sin_t)) 6628 return (EINVAL); 6629 6630 *salenp = sizeof (sin_t); 6631 *sin = sin_null; 6632 sin->sin_family = AF_INET; 6633 sin->sin_port = udp->udp_dstport; 6634 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6635 break; 6636 6637 case AF_INET6: 6638 if (*salenp < sizeof (sin6_t)) 6639 return (EINVAL); 6640 6641 *salenp = sizeof (sin6_t); 6642 *sin6 = sin6_null; 6643 sin6->sin6_family = AF_INET6; 6644 sin6->sin6_port = udp->udp_dstport; 6645 sin6->sin6_addr = udp->udp_v6dst; 6646 sin6->sin6_flowinfo = udp->udp_flowinfo; 6647 break; 6648 } 6649 6650 return (0); 6651 } 6652 6653 static int 6654 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6655 { 6656 sin_t *sin = (sin_t *)sa; 6657 sin6_t *sin6 = (sin6_t *)sa; 6658 6659 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6660 6661 switch (udp->udp_family) { 6662 case AF_INET: 6663 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6664 6665 if (*salenp < sizeof (sin_t)) 6666 return (EINVAL); 6667 6668 *salenp = sizeof (sin_t); 6669 *sin = sin_null; 6670 sin->sin_family = AF_INET; 6671 sin->sin_port = udp->udp_port; 6672 6673 /* 6674 * If udp_v6src is unspecified, we might be bound to broadcast 6675 * / multicast. Use udp_bound_v6src as local address instead 6676 * (that could also still be unspecified). 6677 */ 6678 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6679 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6680 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6681 } else { 6682 sin->sin_addr.s_addr = 6683 V4_PART_OF_V6(udp->udp_bound_v6src); 6684 } 6685 break; 6686 6687 case AF_INET6: 6688 if (*salenp < sizeof (sin6_t)) 6689 return (EINVAL); 6690 6691 *salenp = sizeof (sin6_t); 6692 *sin6 = sin6_null; 6693 sin6->sin6_family = AF_INET6; 6694 sin6->sin6_port = udp->udp_port; 6695 sin6->sin6_flowinfo = udp->udp_flowinfo; 6696 6697 /* 6698 * If udp_v6src is unspecified, we might be bound to broadcast 6699 * / multicast. Use udp_bound_v6src as local address instead 6700 * (that could also still be unspecified). 6701 */ 6702 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6703 sin6->sin6_addr = udp->udp_v6src; 6704 else 6705 sin6->sin6_addr = udp->udp_bound_v6src; 6706 break; 6707 } 6708 6709 return (0); 6710 } 6711 6712 /* 6713 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6714 */ 6715 static void 6716 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6717 { 6718 void *data; 6719 mblk_t *datamp = mp->b_cont; 6720 udp_t *udp = Q_TO_UDP(q); 6721 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6722 6723 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6724 cmdp->cb_error = EPROTO; 6725 qreply(q, mp); 6726 return; 6727 } 6728 data = datamp->b_rptr; 6729 6730 rw_enter(&udp->udp_rwlock, RW_READER); 6731 switch (cmdp->cb_cmd) { 6732 case TI_GETPEERNAME: 6733 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6734 break; 6735 case TI_GETMYNAME: 6736 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6737 break; 6738 default: 6739 cmdp->cb_error = EINVAL; 6740 break; 6741 } 6742 rw_exit(&udp->udp_rwlock); 6743 6744 qreply(q, mp); 6745 } 6746 6747 static void 6748 udp_use_pure_tpi(udp_t *udp) 6749 { 6750 rw_enter(&udp->udp_rwlock, RW_WRITER); 6751 udp->udp_issocket = B_FALSE; 6752 rw_exit(&udp->udp_rwlock); 6753 6754 UDP_STAT(udp->udp_us, udp_sock_fallback); 6755 } 6756 6757 static void 6758 udp_wput_other(queue_t *q, mblk_t *mp) 6759 { 6760 uchar_t *rptr = mp->b_rptr; 6761 struct datab *db; 6762 struct iocblk *iocp; 6763 cred_t *cr; 6764 conn_t *connp = Q_TO_CONN(q); 6765 udp_t *udp = connp->conn_udp; 6766 udp_stack_t *us; 6767 6768 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6769 "udp_wput_other_start: q %p", q); 6770 6771 us = udp->udp_us; 6772 db = mp->b_datap; 6773 6774 switch (db->db_type) { 6775 case M_CMD: 6776 udp_wput_cmdblk(q, mp); 6777 return; 6778 6779 case M_PROTO: 6780 case M_PCPROTO: 6781 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6782 freemsg(mp); 6783 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6784 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6785 return; 6786 } 6787 switch (((t_primp_t)rptr)->type) { 6788 case T_ADDR_REQ: 6789 udp_addr_req(q, mp); 6790 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6791 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6792 return; 6793 case O_T_BIND_REQ: 6794 case T_BIND_REQ: 6795 udp_tpi_bind(q, mp); 6796 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6797 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6798 return; 6799 case T_CONN_REQ: 6800 udp_tpi_connect(q, mp); 6801 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6802 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6803 return; 6804 case T_CAPABILITY_REQ: 6805 udp_capability_req(q, mp); 6806 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6807 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6808 return; 6809 case T_INFO_REQ: 6810 udp_info_req(q, mp); 6811 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6812 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6813 return; 6814 case T_UNITDATA_REQ: 6815 /* 6816 * If a T_UNITDATA_REQ gets here, the address must 6817 * be bad. Valid T_UNITDATA_REQs are handled 6818 * in udp_wput. 6819 */ 6820 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6821 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6822 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6823 return; 6824 case T_UNBIND_REQ: 6825 udp_tpi_unbind(q, mp); 6826 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6827 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6828 return; 6829 case T_SVR4_OPTMGMT_REQ: 6830 /* 6831 * All Solaris components should pass a db_credp 6832 * for this TPI message, hence we ASSERT. 6833 * But in case there is some other M_PROTO that looks 6834 * like a TPI message sent by some other kernel 6835 * component, we check and return an error. 6836 */ 6837 cr = msg_getcred(mp, NULL); 6838 ASSERT(cr != NULL); 6839 if (cr == NULL) { 6840 udp_err_ack(q, mp, TSYSERR, EINVAL); 6841 return; 6842 } 6843 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6844 cr)) { 6845 (void) svr4_optcom_req(q, 6846 mp, cr, &udp_opt_obj, B_TRUE); 6847 } 6848 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6849 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6850 return; 6851 6852 case T_OPTMGMT_REQ: 6853 /* 6854 * All Solaris components should pass a db_credp 6855 * for this TPI message, hence we ASSERT. 6856 * But in case there is some other M_PROTO that looks 6857 * like a TPI message sent by some other kernel 6858 * component, we check and return an error. 6859 */ 6860 cr = msg_getcred(mp, NULL); 6861 ASSERT(cr != NULL); 6862 if (cr == NULL) { 6863 udp_err_ack(q, mp, TSYSERR, EINVAL); 6864 return; 6865 } 6866 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6867 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6868 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6869 return; 6870 6871 case T_DISCON_REQ: 6872 udp_tpi_disconnect(q, mp); 6873 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6874 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6875 return; 6876 6877 /* The following TPI message is not supported by udp. */ 6878 case O_T_CONN_RES: 6879 case T_CONN_RES: 6880 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6881 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6882 "udp_wput_other_end: q %p (%S)", q, 6883 "connres/disconreq"); 6884 return; 6885 6886 /* The following 3 TPI messages are illegal for udp. */ 6887 case T_DATA_REQ: 6888 case T_EXDATA_REQ: 6889 case T_ORDREL_REQ: 6890 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6891 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6892 "udp_wput_other_end: q %p (%S)", q, 6893 "data/exdata/ordrel"); 6894 return; 6895 default: 6896 break; 6897 } 6898 break; 6899 case M_FLUSH: 6900 if (*rptr & FLUSHW) 6901 flushq(q, FLUSHDATA); 6902 break; 6903 case M_IOCTL: 6904 iocp = (struct iocblk *)mp->b_rptr; 6905 switch (iocp->ioc_cmd) { 6906 case TI_GETPEERNAME: 6907 if (udp->udp_state != TS_DATA_XFER) { 6908 /* 6909 * If a default destination address has not 6910 * been associated with the stream, then we 6911 * don't know the peer's name. 6912 */ 6913 iocp->ioc_error = ENOTCONN; 6914 iocp->ioc_count = 0; 6915 mp->b_datap->db_type = M_IOCACK; 6916 qreply(q, mp); 6917 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6918 "udp_wput_other_end: q %p (%S)", q, 6919 "getpeername"); 6920 return; 6921 } 6922 /* FALLTHRU */ 6923 case TI_GETMYNAME: { 6924 /* 6925 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6926 * need to copyin the user's strbuf structure. 6927 * Processing will continue in the M_IOCDATA case 6928 * below. 6929 */ 6930 mi_copyin(q, mp, NULL, 6931 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6932 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6933 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6934 return; 6935 } 6936 case ND_SET: 6937 /* nd_getset performs the necessary checking */ 6938 case ND_GET: 6939 if (nd_getset(q, us->us_nd, mp)) { 6940 qreply(q, mp); 6941 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6942 "udp_wput_other_end: q %p (%S)", q, "get"); 6943 return; 6944 } 6945 break; 6946 case _SIOCSOCKFALLBACK: 6947 /* 6948 * Either sockmod is about to be popped and the 6949 * socket would now be treated as a plain stream, 6950 * or a module is about to be pushed so we have 6951 * to follow pure TPI semantics. 6952 */ 6953 if (!udp->udp_issocket) { 6954 DB_TYPE(mp) = M_IOCNAK; 6955 iocp->ioc_error = EINVAL; 6956 } else { 6957 udp_use_pure_tpi(udp); 6958 6959 DB_TYPE(mp) = M_IOCACK; 6960 iocp->ioc_error = 0; 6961 } 6962 iocp->ioc_count = 0; 6963 iocp->ioc_rval = 0; 6964 qreply(q, mp); 6965 return; 6966 default: 6967 break; 6968 } 6969 break; 6970 case M_IOCDATA: 6971 udp_wput_iocdata(q, mp); 6972 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6973 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 6974 return; 6975 default: 6976 /* Unrecognized messages are passed through without change. */ 6977 break; 6978 } 6979 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6980 "udp_wput_other_end: q %p (%S)", q, "end"); 6981 ip_output(connp, mp, q, IP_WPUT); 6982 } 6983 6984 /* 6985 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 6986 * messages. 6987 */ 6988 static void 6989 udp_wput_iocdata(queue_t *q, mblk_t *mp) 6990 { 6991 mblk_t *mp1; 6992 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 6993 STRUCT_HANDLE(strbuf, sb); 6994 udp_t *udp = Q_TO_UDP(q); 6995 int error; 6996 uint_t addrlen; 6997 6998 /* Make sure it is one of ours. */ 6999 switch (iocp->ioc_cmd) { 7000 case TI_GETMYNAME: 7001 case TI_GETPEERNAME: 7002 break; 7003 default: 7004 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7005 return; 7006 } 7007 7008 switch (mi_copy_state(q, mp, &mp1)) { 7009 case -1: 7010 return; 7011 case MI_COPY_CASE(MI_COPY_IN, 1): 7012 break; 7013 case MI_COPY_CASE(MI_COPY_OUT, 1): 7014 /* 7015 * The address has been copied out, so now 7016 * copyout the strbuf. 7017 */ 7018 mi_copyout(q, mp); 7019 return; 7020 case MI_COPY_CASE(MI_COPY_OUT, 2): 7021 /* 7022 * The address and strbuf have been copied out. 7023 * We're done, so just acknowledge the original 7024 * M_IOCTL. 7025 */ 7026 mi_copy_done(q, mp, 0); 7027 return; 7028 default: 7029 /* 7030 * Something strange has happened, so acknowledge 7031 * the original M_IOCTL with an EPROTO error. 7032 */ 7033 mi_copy_done(q, mp, EPROTO); 7034 return; 7035 } 7036 7037 /* 7038 * Now we have the strbuf structure for TI_GETMYNAME 7039 * and TI_GETPEERNAME. Next we copyout the requested 7040 * address and then we'll copyout the strbuf. 7041 */ 7042 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7043 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7044 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7045 mi_copy_done(q, mp, EINVAL); 7046 return; 7047 } 7048 7049 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7050 7051 if (mp1 == NULL) 7052 return; 7053 7054 rw_enter(&udp->udp_rwlock, RW_READER); 7055 switch (iocp->ioc_cmd) { 7056 case TI_GETMYNAME: 7057 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7058 break; 7059 case TI_GETPEERNAME: 7060 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7061 break; 7062 } 7063 rw_exit(&udp->udp_rwlock); 7064 7065 if (error != 0) { 7066 mi_copy_done(q, mp, error); 7067 } else { 7068 mp1->b_wptr += addrlen; 7069 STRUCT_FSET(sb, len, addrlen); 7070 7071 /* Copy out the address */ 7072 mi_copyout(q, mp); 7073 } 7074 } 7075 7076 static int 7077 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7078 udpattrs_t *udpattrs) 7079 { 7080 struct T_unitdata_req *udreqp; 7081 int is_absreq_failure; 7082 cred_t *cr; 7083 7084 ASSERT(((t_primp_t)mp->b_rptr)->type); 7085 7086 /* 7087 * All Solaris components should pass a db_credp 7088 * for this TPI message, hence we should ASSERT. 7089 * However, RPC (svc_clts_ksend) does this odd thing where it 7090 * passes the options from a T_UNITDATA_IND unchanged in a 7091 * T_UNITDATA_REQ. While that is the right thing to do for 7092 * some options, SCM_UCRED being the key one, this also makes it 7093 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7094 */ 7095 cr = msg_getcred(mp, NULL); 7096 if (cr == NULL) { 7097 cr = Q_TO_CONN(q)->conn_cred; 7098 } 7099 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7100 7101 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7102 udreqp->OPT_offset, cr, &udp_opt_obj, 7103 udpattrs, &is_absreq_failure); 7104 7105 if (*errorp != 0) { 7106 /* 7107 * Note: No special action needed in this 7108 * module for "is_absreq_failure" 7109 */ 7110 return (-1); /* failure */ 7111 } 7112 ASSERT(is_absreq_failure == 0); 7113 return (0); /* success */ 7114 } 7115 7116 void 7117 udp_ddi_g_init(void) 7118 { 7119 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7120 udp_opt_obj.odb_opt_arr_cnt); 7121 7122 /* 7123 * We want to be informed each time a stack is created or 7124 * destroyed in the kernel, so we can maintain the 7125 * set of udp_stack_t's. 7126 */ 7127 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7128 } 7129 7130 void 7131 udp_ddi_g_destroy(void) 7132 { 7133 netstack_unregister(NS_UDP); 7134 } 7135 7136 #define INET_NAME "ip" 7137 7138 /* 7139 * Initialize the UDP stack instance. 7140 */ 7141 static void * 7142 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7143 { 7144 udp_stack_t *us; 7145 udpparam_t *pa; 7146 int i; 7147 int error = 0; 7148 major_t major; 7149 7150 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7151 us->us_netstack = ns; 7152 7153 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7154 us->us_epriv_ports[0] = 2049; 7155 us->us_epriv_ports[1] = 4045; 7156 7157 /* 7158 * The smallest anonymous port in the priviledged port range which UDP 7159 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7160 */ 7161 us->us_min_anonpriv_port = 512; 7162 7163 us->us_bind_fanout_size = udp_bind_fanout_size; 7164 7165 /* Roundup variable that might have been modified in /etc/system */ 7166 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7167 /* Not a power of two. Round up to nearest power of two */ 7168 for (i = 0; i < 31; i++) { 7169 if (us->us_bind_fanout_size < (1 << i)) 7170 break; 7171 } 7172 us->us_bind_fanout_size = 1 << i; 7173 } 7174 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7175 sizeof (udp_fanout_t), KM_SLEEP); 7176 for (i = 0; i < us->us_bind_fanout_size; i++) { 7177 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7178 NULL); 7179 } 7180 7181 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7182 7183 us->us_param_arr = pa; 7184 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7185 7186 (void) udp_param_register(&us->us_nd, 7187 us->us_param_arr, A_CNT(udp_param_arr)); 7188 7189 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7190 us->us_mibkp = udp_kstat_init(stackid); 7191 7192 major = mod_name_to_major(INET_NAME); 7193 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7194 ASSERT(error == 0); 7195 return (us); 7196 } 7197 7198 /* 7199 * Free the UDP stack instance. 7200 */ 7201 static void 7202 udp_stack_fini(netstackid_t stackid, void *arg) 7203 { 7204 udp_stack_t *us = (udp_stack_t *)arg; 7205 int i; 7206 7207 for (i = 0; i < us->us_bind_fanout_size; i++) { 7208 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7209 } 7210 7211 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7212 sizeof (udp_fanout_t)); 7213 7214 us->us_bind_fanout = NULL; 7215 7216 nd_free(&us->us_nd); 7217 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7218 us->us_param_arr = NULL; 7219 7220 udp_kstat_fini(stackid, us->us_mibkp); 7221 us->us_mibkp = NULL; 7222 7223 udp_kstat2_fini(stackid, us->us_kstat); 7224 us->us_kstat = NULL; 7225 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7226 7227 ldi_ident_release(us->us_ldi_ident); 7228 kmem_free(us, sizeof (*us)); 7229 } 7230 7231 static void * 7232 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7233 { 7234 kstat_t *ksp; 7235 7236 udp_stat_t template = { 7237 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7238 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7239 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7240 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7241 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7242 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7243 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7244 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7245 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7246 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7247 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7248 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7249 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7250 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7251 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7252 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7253 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7254 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7255 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7256 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7257 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7258 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7259 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7260 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7261 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7262 #ifdef DEBUG 7263 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7264 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7265 #endif 7266 }; 7267 7268 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7269 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7270 KSTAT_FLAG_VIRTUAL, stackid); 7271 7272 if (ksp == NULL) 7273 return (NULL); 7274 7275 bcopy(&template, us_statisticsp, sizeof (template)); 7276 ksp->ks_data = (void *)us_statisticsp; 7277 ksp->ks_private = (void *)(uintptr_t)stackid; 7278 7279 kstat_install(ksp); 7280 return (ksp); 7281 } 7282 7283 static void 7284 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7285 { 7286 if (ksp != NULL) { 7287 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7288 kstat_delete_netstack(ksp, stackid); 7289 } 7290 } 7291 7292 static void * 7293 udp_kstat_init(netstackid_t stackid) 7294 { 7295 kstat_t *ksp; 7296 7297 udp_named_kstat_t template = { 7298 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7299 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7300 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7301 { "entrySize", KSTAT_DATA_INT32, 0 }, 7302 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7303 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7304 }; 7305 7306 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7307 KSTAT_TYPE_NAMED, 7308 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7309 7310 if (ksp == NULL || ksp->ks_data == NULL) 7311 return (NULL); 7312 7313 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7314 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7315 7316 bcopy(&template, ksp->ks_data, sizeof (template)); 7317 ksp->ks_update = udp_kstat_update; 7318 ksp->ks_private = (void *)(uintptr_t)stackid; 7319 7320 kstat_install(ksp); 7321 return (ksp); 7322 } 7323 7324 static void 7325 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7326 { 7327 if (ksp != NULL) { 7328 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7329 kstat_delete_netstack(ksp, stackid); 7330 } 7331 } 7332 7333 static int 7334 udp_kstat_update(kstat_t *kp, int rw) 7335 { 7336 udp_named_kstat_t *udpkp; 7337 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7338 netstack_t *ns; 7339 udp_stack_t *us; 7340 7341 if ((kp == NULL) || (kp->ks_data == NULL)) 7342 return (EIO); 7343 7344 if (rw == KSTAT_WRITE) 7345 return (EACCES); 7346 7347 ns = netstack_find_by_stackid(stackid); 7348 if (ns == NULL) 7349 return (-1); 7350 us = ns->netstack_udp; 7351 if (us == NULL) { 7352 netstack_rele(ns); 7353 return (-1); 7354 } 7355 udpkp = (udp_named_kstat_t *)kp->ks_data; 7356 7357 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7358 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7359 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7360 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7361 netstack_rele(ns); 7362 return (0); 7363 } 7364 7365 static size_t 7366 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7367 { 7368 udp_stack_t *us = udp->udp_us; 7369 7370 /* We add a bit of extra buffering */ 7371 size += size >> 1; 7372 if (size > us->us_max_buf) 7373 size = us->us_max_buf; 7374 7375 udp->udp_rcv_hiwat = size; 7376 return (size); 7377 } 7378 7379 /* 7380 * For the lower queue so that UDP can be a dummy mux. 7381 * Nobody should be sending 7382 * packets up this stream 7383 */ 7384 static void 7385 udp_lrput(queue_t *q, mblk_t *mp) 7386 { 7387 mblk_t *mp1; 7388 7389 switch (mp->b_datap->db_type) { 7390 case M_FLUSH: 7391 /* Turn around */ 7392 if (*mp->b_rptr & FLUSHW) { 7393 *mp->b_rptr &= ~FLUSHR; 7394 qreply(q, mp); 7395 return; 7396 } 7397 break; 7398 } 7399 /* Could receive messages that passed through ar_rput */ 7400 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7401 mp1->b_prev = mp1->b_next = NULL; 7402 freemsg(mp); 7403 } 7404 7405 /* 7406 * For the lower queue so that UDP can be a dummy mux. 7407 * Nobody should be sending packets down this stream. 7408 */ 7409 /* ARGSUSED */ 7410 void 7411 udp_lwput(queue_t *q, mblk_t *mp) 7412 { 7413 freemsg(mp); 7414 } 7415 7416 /* 7417 * Below routines for UDP socket module. 7418 */ 7419 7420 static conn_t * 7421 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7422 { 7423 udp_t *udp; 7424 conn_t *connp; 7425 zoneid_t zoneid; 7426 netstack_t *ns; 7427 udp_stack_t *us; 7428 7429 ns = netstack_find_by_cred(credp); 7430 ASSERT(ns != NULL); 7431 us = ns->netstack_udp; 7432 ASSERT(us != NULL); 7433 7434 /* 7435 * For exclusive stacks we set the zoneid to zero 7436 * to make UDP operate as if in the global zone. 7437 */ 7438 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7439 zoneid = GLOBAL_ZONEID; 7440 else 7441 zoneid = crgetzoneid(credp); 7442 7443 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7444 7445 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7446 if (connp == NULL) { 7447 netstack_rele(ns); 7448 return (NULL); 7449 } 7450 udp = connp->conn_udp; 7451 7452 /* 7453 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7454 * done by netstack_find_by_cred() 7455 */ 7456 netstack_rele(ns); 7457 7458 rw_enter(&udp->udp_rwlock, RW_WRITER); 7459 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7460 ASSERT(connp->conn_udp == udp); 7461 ASSERT(udp->udp_connp == connp); 7462 7463 /* Set the initial state of the stream and the privilege status. */ 7464 udp->udp_state = TS_UNBND; 7465 if (isv6) { 7466 udp->udp_family = AF_INET6; 7467 udp->udp_ipversion = IPV6_VERSION; 7468 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7469 udp->udp_ttl = us->us_ipv6_hoplimit; 7470 connp->conn_af_isv6 = B_TRUE; 7471 } else { 7472 udp->udp_family = AF_INET; 7473 udp->udp_ipversion = IPV4_VERSION; 7474 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7475 udp->udp_ttl = us->us_ipv4_ttl; 7476 connp->conn_af_isv6 = B_FALSE; 7477 } 7478 7479 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7480 udp->udp_pending_op = -1; 7481 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7482 connp->conn_zoneid = zoneid; 7483 7484 udp->udp_open_time = lbolt64; 7485 udp->udp_open_pid = curproc->p_pid; 7486 7487 /* 7488 * If the caller has the process-wide flag set, then default to MAC 7489 * exempt mode. This allows read-down to unlabeled hosts. 7490 */ 7491 if (getpflags(NET_MAC_AWARE, credp) != 0) 7492 connp->conn_mac_mode = CONN_MAC_AWARE; 7493 7494 connp->conn_ulp_labeled = is_system_labeled(); 7495 7496 udp->udp_us = us; 7497 7498 connp->conn_recv = udp_input; 7499 crhold(credp); 7500 connp->conn_cred = credp; 7501 7502 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7503 7504 rw_exit(&udp->udp_rwlock); 7505 7506 return (connp); 7507 } 7508 7509 /* ARGSUSED */ 7510 sock_lower_handle_t 7511 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7512 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7513 { 7514 udp_t *udp = NULL; 7515 udp_stack_t *us; 7516 conn_t *connp; 7517 boolean_t isv6; 7518 7519 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7520 (proto != 0 && proto != IPPROTO_UDP)) { 7521 *errorp = EPROTONOSUPPORT; 7522 return (NULL); 7523 } 7524 7525 if (family == AF_INET6) 7526 isv6 = B_TRUE; 7527 else 7528 isv6 = B_FALSE; 7529 7530 connp = udp_do_open(credp, isv6, flags); 7531 if (connp == NULL) { 7532 *errorp = ENOMEM; 7533 return (NULL); 7534 } 7535 7536 udp = connp->conn_udp; 7537 ASSERT(udp != NULL); 7538 us = udp->udp_us; 7539 ASSERT(us != NULL); 7540 7541 udp->udp_issocket = B_TRUE; 7542 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7543 7544 /* Set flow control */ 7545 rw_enter(&udp->udp_rwlock, RW_WRITER); 7546 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7547 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7548 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7549 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7550 udp->udp_xmit_lowat = us->us_xmit_lowat; 7551 7552 if (udp->udp_family == AF_INET6) { 7553 /* Build initial header template for transmit */ 7554 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7555 rw_exit(&udp->udp_rwlock); 7556 ipcl_conn_destroy(connp); 7557 return (NULL); 7558 } 7559 } 7560 rw_exit(&udp->udp_rwlock); 7561 7562 connp->conn_flow_cntrld = B_FALSE; 7563 7564 ASSERT(us->us_ldi_ident != NULL); 7565 7566 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7567 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7568 udp_do_close(connp); 7569 return (NULL); 7570 } 7571 7572 /* Set the send flow control */ 7573 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7574 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7575 7576 mutex_enter(&connp->conn_lock); 7577 connp->conn_state_flags &= ~CONN_INCIPIENT; 7578 mutex_exit(&connp->conn_lock); 7579 7580 *errorp = 0; 7581 *smodep = SM_ATOMIC; 7582 *sock_downcalls = &sock_udp_downcalls; 7583 return ((sock_lower_handle_t)connp); 7584 } 7585 7586 /* ARGSUSED */ 7587 void 7588 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7589 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7590 { 7591 conn_t *connp = (conn_t *)proto_handle; 7592 udp_t *udp = connp->conn_udp; 7593 udp_stack_t *us = udp->udp_us; 7594 struct sock_proto_props sopp; 7595 7596 /* All Solaris components should pass a cred for this operation. */ 7597 ASSERT(cr != NULL); 7598 7599 connp->conn_upcalls = sock_upcalls; 7600 connp->conn_upper_handle = sock_handle; 7601 7602 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7603 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7604 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7605 sopp.sopp_maxblk = INFPSZ; 7606 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7607 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7608 sopp.sopp_maxpsz = 7609 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7610 UDP_MAXPACKET_IPV6; 7611 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7612 udp_mod_info.mi_minpsz; 7613 7614 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7615 &sopp); 7616 } 7617 7618 static void 7619 udp_do_close(conn_t *connp) 7620 { 7621 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7622 7623 udp_quiesce_conn(connp); 7624 ip_quiesce_conn(connp); 7625 7626 if (!IPCL_IS_NONSTR(connp)) { 7627 ASSERT(connp->conn_wq != NULL); 7628 ASSERT(connp->conn_rq != NULL); 7629 qprocsoff(connp->conn_rq); 7630 } 7631 7632 udp_close_free(connp); 7633 7634 /* 7635 * Now we are truly single threaded on this stream, and can 7636 * delete the things hanging off the connp, and finally the connp. 7637 * We removed this connp from the fanout list, it cannot be 7638 * accessed thru the fanouts, and we already waited for the 7639 * conn_ref to drop to 0. We are already in close, so 7640 * there cannot be any other thread from the top. qprocsoff 7641 * has completed, and service has completed or won't run in 7642 * future. 7643 */ 7644 ASSERT(connp->conn_ref == 1); 7645 if (!IPCL_IS_NONSTR(connp)) { 7646 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7647 } else { 7648 ip_free_helper_stream(connp); 7649 } 7650 7651 connp->conn_ref--; 7652 ipcl_conn_destroy(connp); 7653 } 7654 7655 /* ARGSUSED */ 7656 int 7657 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7658 { 7659 conn_t *connp = (conn_t *)proto_handle; 7660 7661 /* All Solaris components should pass a cred for this operation. */ 7662 ASSERT(cr != NULL); 7663 7664 udp_do_close(connp); 7665 return (0); 7666 } 7667 7668 static int 7669 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7670 boolean_t bind_to_req_port_only) 7671 { 7672 sin_t *sin; 7673 sin6_t *sin6; 7674 sin6_t sin6addr; 7675 in_port_t port; /* Host byte order */ 7676 in_port_t requested_port; /* Host byte order */ 7677 int count; 7678 in6_addr_t v6src; 7679 int loopmax; 7680 udp_fanout_t *udpf; 7681 in_port_t lport; /* Network byte order */ 7682 udp_t *udp; 7683 boolean_t is_inaddr_any; 7684 mlp_type_t addrtype, mlptype; 7685 udp_stack_t *us; 7686 int error = 0; 7687 mblk_t *mp = NULL; 7688 7689 udp = connp->conn_udp; 7690 us = udp->udp_us; 7691 7692 if (udp->udp_state != TS_UNBND) { 7693 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7694 "udp_bind: bad state, %u", udp->udp_state); 7695 return (-TOUTSTATE); 7696 } 7697 7698 switch (len) { 7699 case 0: 7700 if (udp->udp_family == AF_INET) { 7701 sin = (sin_t *)&sin6addr; 7702 *sin = sin_null; 7703 sin->sin_family = AF_INET; 7704 sin->sin_addr.s_addr = INADDR_ANY; 7705 udp->udp_ipversion = IPV4_VERSION; 7706 } else { 7707 ASSERT(udp->udp_family == AF_INET6); 7708 sin6 = (sin6_t *)&sin6addr; 7709 *sin6 = sin6_null; 7710 sin6->sin6_family = AF_INET6; 7711 V6_SET_ZERO(sin6->sin6_addr); 7712 udp->udp_ipversion = IPV6_VERSION; 7713 } 7714 port = 0; 7715 break; 7716 7717 case sizeof (sin_t): /* Complete IPv4 address */ 7718 sin = (sin_t *)sa; 7719 7720 if (sin == NULL || !OK_32PTR((char *)sin)) 7721 return (EINVAL); 7722 7723 if (udp->udp_family != AF_INET || 7724 sin->sin_family != AF_INET) { 7725 return (EAFNOSUPPORT); 7726 } 7727 port = ntohs(sin->sin_port); 7728 break; 7729 7730 case sizeof (sin6_t): /* complete IPv6 address */ 7731 sin6 = (sin6_t *)sa; 7732 7733 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 7734 return (EINVAL); 7735 7736 if (udp->udp_family != AF_INET6 || 7737 sin6->sin6_family != AF_INET6) { 7738 return (EAFNOSUPPORT); 7739 } 7740 port = ntohs(sin6->sin6_port); 7741 break; 7742 7743 default: /* Invalid request */ 7744 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7745 "udp_bind: bad ADDR_length length %u", len); 7746 return (-TBADADDR); 7747 } 7748 7749 requested_port = port; 7750 7751 if (requested_port == 0 || !bind_to_req_port_only) 7752 bind_to_req_port_only = B_FALSE; 7753 else /* T_BIND_REQ and requested_port != 0 */ 7754 bind_to_req_port_only = B_TRUE; 7755 7756 if (requested_port == 0) { 7757 /* 7758 * If the application passed in zero for the port number, it 7759 * doesn't care which port number we bind to. Get one in the 7760 * valid range. 7761 */ 7762 if (udp->udp_anon_priv_bind) { 7763 port = udp_get_next_priv_port(udp); 7764 } else { 7765 port = udp_update_next_port(udp, 7766 us->us_next_port_to_try, B_TRUE); 7767 } 7768 } else { 7769 /* 7770 * If the port is in the well-known privileged range, 7771 * make sure the caller was privileged. 7772 */ 7773 int i; 7774 boolean_t priv = B_FALSE; 7775 7776 if (port < us->us_smallest_nonpriv_port) { 7777 priv = B_TRUE; 7778 } else { 7779 for (i = 0; i < us->us_num_epriv_ports; i++) { 7780 if (port == us->us_epriv_ports[i]) { 7781 priv = B_TRUE; 7782 break; 7783 } 7784 } 7785 } 7786 7787 if (priv) { 7788 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 7789 return (-TACCES); 7790 } 7791 } 7792 7793 if (port == 0) 7794 return (-TNOADDR); 7795 7796 /* 7797 * The state must be TS_UNBND. TPI mandates that users must send 7798 * TPI primitives only 1 at a time and wait for the response before 7799 * sending the next primitive. 7800 */ 7801 rw_enter(&udp->udp_rwlock, RW_WRITER); 7802 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 7803 rw_exit(&udp->udp_rwlock); 7804 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7805 "udp_bind: bad state, %u", udp->udp_state); 7806 return (-TOUTSTATE); 7807 } 7808 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 7809 udp->udp_pending_op = T_BIND_REQ; 7810 /* 7811 * Copy the source address into our udp structure. This address 7812 * may still be zero; if so, IP will fill in the correct address 7813 * each time an outbound packet is passed to it. Since the udp is 7814 * not yet in the bind hash list, we don't grab the uf_lock to 7815 * change udp_ipversion 7816 */ 7817 if (udp->udp_family == AF_INET) { 7818 ASSERT(sin != NULL); 7819 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7820 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 7821 udp->udp_ip_snd_options_len; 7822 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 7823 } else { 7824 ASSERT(sin6 != NULL); 7825 v6src = sin6->sin6_addr; 7826 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 7827 /* 7828 * no need to hold the uf_lock to set the udp_ipversion 7829 * since we are not yet in the fanout list 7830 */ 7831 udp->udp_ipversion = IPV4_VERSION; 7832 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 7833 UDPH_SIZE + udp->udp_ip_snd_options_len; 7834 } else { 7835 udp->udp_ipversion = IPV6_VERSION; 7836 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 7837 } 7838 } 7839 7840 /* 7841 * If udp_reuseaddr is not set, then we have to make sure that 7842 * the IP address and port number the application requested 7843 * (or we selected for the application) is not being used by 7844 * another stream. If another stream is already using the 7845 * requested IP address and port, the behavior depends on 7846 * "bind_to_req_port_only". If set the bind fails; otherwise we 7847 * search for any an unused port to bind to the the stream. 7848 * 7849 * As per the BSD semantics, as modified by the Deering multicast 7850 * changes, if udp_reuseaddr is set, then we allow multiple binds 7851 * to the same port independent of the local IP address. 7852 * 7853 * This is slightly different than in SunOS 4.X which did not 7854 * support IP multicast. Note that the change implemented by the 7855 * Deering multicast code effects all binds - not only binding 7856 * to IP multicast addresses. 7857 * 7858 * Note that when binding to port zero we ignore SO_REUSEADDR in 7859 * order to guarantee a unique port. 7860 */ 7861 7862 count = 0; 7863 if (udp->udp_anon_priv_bind) { 7864 /* 7865 * loopmax = (IPPORT_RESERVED-1) - 7866 * us->us_min_anonpriv_port + 1 7867 */ 7868 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 7869 } else { 7870 loopmax = us->us_largest_anon_port - 7871 us->us_smallest_anon_port + 1; 7872 } 7873 7874 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 7875 7876 for (;;) { 7877 udp_t *udp1; 7878 boolean_t found_exclbind = B_FALSE; 7879 7880 /* 7881 * Walk through the list of udp streams bound to 7882 * requested port with the same IP address. 7883 */ 7884 lport = htons(port); 7885 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 7886 us->us_bind_fanout_size)]; 7887 mutex_enter(&udpf->uf_lock); 7888 for (udp1 = udpf->uf_udp; udp1 != NULL; 7889 udp1 = udp1->udp_bind_hash) { 7890 if (lport != udp1->udp_port) 7891 continue; 7892 7893 /* 7894 * On a labeled system, we must treat bindings to ports 7895 * on shared IP addresses by sockets with MAC exemption 7896 * privilege as being in all zones, as there's 7897 * otherwise no way to identify the right receiver. 7898 */ 7899 if (!IPCL_BIND_ZONE_MATCH(udp1->udp_connp, connp)) 7900 continue; 7901 7902 /* 7903 * If UDP_EXCLBIND is set for either the bound or 7904 * binding endpoint, the semantics of bind 7905 * is changed according to the following chart. 7906 * 7907 * spec = specified address (v4 or v6) 7908 * unspec = unspecified address (v4 or v6) 7909 * A = specified addresses are different for endpoints 7910 * 7911 * bound bind to allowed? 7912 * ------------------------------------- 7913 * unspec unspec no 7914 * unspec spec no 7915 * spec unspec no 7916 * spec spec yes if A 7917 * 7918 * For labeled systems, SO_MAC_EXEMPT behaves the same 7919 * as UDP_EXCLBIND, except that zoneid is ignored. 7920 */ 7921 if (udp1->udp_exclbind || udp->udp_exclbind || 7922 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 7923 if (V6_OR_V4_INADDR_ANY( 7924 udp1->udp_bound_v6src) || 7925 is_inaddr_any || 7926 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7927 &v6src)) { 7928 found_exclbind = B_TRUE; 7929 break; 7930 } 7931 continue; 7932 } 7933 7934 /* 7935 * Check ipversion to allow IPv4 and IPv6 sockets to 7936 * have disjoint port number spaces. 7937 */ 7938 if (udp->udp_ipversion != udp1->udp_ipversion) { 7939 7940 /* 7941 * On the first time through the loop, if the 7942 * the user intentionally specified a 7943 * particular port number, then ignore any 7944 * bindings of the other protocol that may 7945 * conflict. This allows the user to bind IPv6 7946 * alone and get both v4 and v6, or bind both 7947 * both and get each seperately. On subsequent 7948 * times through the loop, we're checking a 7949 * port that we chose (not the user) and thus 7950 * we do not allow casual duplicate bindings. 7951 */ 7952 if (count == 0 && requested_port != 0) 7953 continue; 7954 } 7955 7956 /* 7957 * No difference depending on SO_REUSEADDR. 7958 * 7959 * If existing port is bound to a 7960 * non-wildcard IP address and 7961 * the requesting stream is bound to 7962 * a distinct different IP addresses 7963 * (non-wildcard, also), keep going. 7964 */ 7965 if (!is_inaddr_any && 7966 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 7967 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 7968 &v6src)) { 7969 continue; 7970 } 7971 break; 7972 } 7973 7974 if (!found_exclbind && 7975 (udp->udp_reuseaddr && requested_port != 0)) { 7976 break; 7977 } 7978 7979 if (udp1 == NULL) { 7980 /* 7981 * No other stream has this IP address 7982 * and port number. We can use it. 7983 */ 7984 break; 7985 } 7986 mutex_exit(&udpf->uf_lock); 7987 if (bind_to_req_port_only) { 7988 /* 7989 * We get here only when requested port 7990 * is bound (and only first of the for() 7991 * loop iteration). 7992 * 7993 * The semantics of this bind request 7994 * require it to fail so we return from 7995 * the routine (and exit the loop). 7996 * 7997 */ 7998 udp->udp_pending_op = -1; 7999 rw_exit(&udp->udp_rwlock); 8000 return (-TADDRBUSY); 8001 } 8002 8003 if (udp->udp_anon_priv_bind) { 8004 port = udp_get_next_priv_port(udp); 8005 } else { 8006 if ((count == 0) && (requested_port != 0)) { 8007 /* 8008 * If the application wants us to find 8009 * a port, get one to start with. Set 8010 * requested_port to 0, so that we will 8011 * update us->us_next_port_to_try below. 8012 */ 8013 port = udp_update_next_port(udp, 8014 us->us_next_port_to_try, B_TRUE); 8015 requested_port = 0; 8016 } else { 8017 port = udp_update_next_port(udp, port + 1, 8018 B_FALSE); 8019 } 8020 } 8021 8022 if (port == 0 || ++count >= loopmax) { 8023 /* 8024 * We've tried every possible port number and 8025 * there are none available, so send an error 8026 * to the user. 8027 */ 8028 udp->udp_pending_op = -1; 8029 rw_exit(&udp->udp_rwlock); 8030 return (-TNOADDR); 8031 } 8032 } 8033 8034 /* 8035 * Copy the source address into our udp structure. This address 8036 * may still be zero; if so, ip will fill in the correct address 8037 * each time an outbound packet is passed to it. 8038 * If we are binding to a broadcast or multicast address then 8039 * udp_post_ip_bind_connect will clear the source address 8040 * when udp_do_bind success. 8041 */ 8042 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8043 udp->udp_port = lport; 8044 /* 8045 * Now reset the the next anonymous port if the application requested 8046 * an anonymous port, or we handed out the next anonymous port. 8047 */ 8048 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8049 us->us_next_port_to_try = port + 1; 8050 } 8051 8052 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8053 if (udp->udp_family == AF_INET) { 8054 sin->sin_port = udp->udp_port; 8055 } else { 8056 sin6->sin6_port = udp->udp_port; 8057 /* Rebuild the header template */ 8058 error = udp_build_hdrs(udp); 8059 if (error != 0) { 8060 udp->udp_pending_op = -1; 8061 rw_exit(&udp->udp_rwlock); 8062 mutex_exit(&udpf->uf_lock); 8063 return (error); 8064 } 8065 } 8066 udp->udp_state = TS_IDLE; 8067 udp_bind_hash_insert(udpf, udp); 8068 mutex_exit(&udpf->uf_lock); 8069 rw_exit(&udp->udp_rwlock); 8070 8071 if (cl_inet_bind) { 8072 /* 8073 * Running in cluster mode - register bind information 8074 */ 8075 if (udp->udp_ipversion == IPV4_VERSION) { 8076 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8077 IPPROTO_UDP, AF_INET, 8078 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8079 (in_port_t)udp->udp_port, NULL); 8080 } else { 8081 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8082 IPPROTO_UDP, AF_INET6, 8083 (uint8_t *)&(udp->udp_v6src), 8084 (in_port_t)udp->udp_port, NULL); 8085 } 8086 } 8087 8088 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8089 if (is_system_labeled() && (!connp->conn_anon_port || 8090 connp->conn_anon_mlp)) { 8091 uint16_t mlpport; 8092 zone_t *zone; 8093 8094 zone = crgetzone(cr); 8095 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8096 mlptSingle; 8097 addrtype = tsol_mlp_addr_type( 8098 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 8099 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 8100 if (addrtype == mlptSingle) { 8101 rw_enter(&udp->udp_rwlock, RW_WRITER); 8102 udp->udp_pending_op = -1; 8103 rw_exit(&udp->udp_rwlock); 8104 connp->conn_anon_port = B_FALSE; 8105 connp->conn_mlp_type = mlptSingle; 8106 return (-TNOADDR); 8107 } 8108 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8109 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8110 addrtype); 8111 8112 /* 8113 * It is a coding error to attempt to bind an MLP port 8114 * without first setting SOL_SOCKET/SCM_UCRED. 8115 */ 8116 if (mlptype != mlptSingle && 8117 connp->conn_mlp_type == mlptSingle) { 8118 rw_enter(&udp->udp_rwlock, RW_WRITER); 8119 udp->udp_pending_op = -1; 8120 rw_exit(&udp->udp_rwlock); 8121 connp->conn_anon_port = B_FALSE; 8122 connp->conn_mlp_type = mlptSingle; 8123 return (EINVAL); 8124 } 8125 8126 /* 8127 * It is an access violation to attempt to bind an MLP port 8128 * without NET_BINDMLP privilege. 8129 */ 8130 if (mlptype != mlptSingle && 8131 secpolicy_net_bindmlp(cr) != 0) { 8132 if (udp->udp_debug) { 8133 (void) strlog(UDP_MOD_ID, 0, 1, 8134 SL_ERROR|SL_TRACE, 8135 "udp_bind: no priv for multilevel port %d", 8136 mlpport); 8137 } 8138 rw_enter(&udp->udp_rwlock, RW_WRITER); 8139 udp->udp_pending_op = -1; 8140 rw_exit(&udp->udp_rwlock); 8141 connp->conn_anon_port = B_FALSE; 8142 connp->conn_mlp_type = mlptSingle; 8143 return (-TACCES); 8144 } 8145 8146 /* 8147 * If we're specifically binding a shared IP address and the 8148 * port is MLP on shared addresses, then check to see if this 8149 * zone actually owns the MLP. Reject if not. 8150 */ 8151 if (mlptype == mlptShared && addrtype == mlptShared) { 8152 /* 8153 * No need to handle exclusive-stack zones since 8154 * ALL_ZONES only applies to the shared stack. 8155 */ 8156 zoneid_t mlpzone; 8157 8158 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8159 htons(mlpport)); 8160 if (connp->conn_zoneid != mlpzone) { 8161 if (udp->udp_debug) { 8162 (void) strlog(UDP_MOD_ID, 0, 1, 8163 SL_ERROR|SL_TRACE, 8164 "udp_bind: attempt to bind port " 8165 "%d on shared addr in zone %d " 8166 "(should be %d)", 8167 mlpport, connp->conn_zoneid, 8168 mlpzone); 8169 } 8170 rw_enter(&udp->udp_rwlock, RW_WRITER); 8171 udp->udp_pending_op = -1; 8172 rw_exit(&udp->udp_rwlock); 8173 connp->conn_anon_port = B_FALSE; 8174 connp->conn_mlp_type = mlptSingle; 8175 return (-TACCES); 8176 } 8177 } 8178 if (connp->conn_anon_port) { 8179 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8180 port, B_TRUE); 8181 if (error != 0) { 8182 if (udp->udp_debug) { 8183 (void) strlog(UDP_MOD_ID, 0, 1, 8184 SL_ERROR|SL_TRACE, 8185 "udp_bind: cannot establish anon " 8186 "MLP for port %d", port); 8187 } 8188 rw_enter(&udp->udp_rwlock, RW_WRITER); 8189 udp->udp_pending_op = -1; 8190 rw_exit(&udp->udp_rwlock); 8191 connp->conn_anon_port = B_FALSE; 8192 connp->conn_mlp_type = mlptSingle; 8193 return (-TACCES); 8194 } 8195 } 8196 connp->conn_mlp_type = mlptype; 8197 } 8198 8199 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8200 /* 8201 * Append a request for an IRE if udp_v6src not 8202 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8203 */ 8204 mp = allocb(sizeof (ire_t), BPRI_HI); 8205 if (!mp) { 8206 rw_enter(&udp->udp_rwlock, RW_WRITER); 8207 udp->udp_pending_op = -1; 8208 rw_exit(&udp->udp_rwlock); 8209 return (ENOMEM); 8210 } 8211 mp->b_wptr += sizeof (ire_t); 8212 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8213 } 8214 if (udp->udp_family == AF_INET6) { 8215 ASSERT(udp->udp_connp->conn_af_isv6); 8216 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8217 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8218 } else { 8219 ASSERT(!udp->udp_connp->conn_af_isv6); 8220 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8221 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8222 B_TRUE); 8223 } 8224 8225 (void) udp_post_ip_bind_connect(udp, mp, error); 8226 return (error); 8227 } 8228 8229 int 8230 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8231 socklen_t len, cred_t *cr) 8232 { 8233 int error; 8234 conn_t *connp; 8235 8236 /* All Solaris components should pass a cred for this operation. */ 8237 ASSERT(cr != NULL); 8238 8239 connp = (conn_t *)proto_handle; 8240 8241 if (sa == NULL) 8242 error = udp_do_unbind(connp); 8243 else 8244 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8245 8246 if (error < 0) { 8247 if (error == -TOUTSTATE) 8248 error = EINVAL; 8249 else 8250 error = proto_tlitosyserr(-error); 8251 } 8252 8253 return (error); 8254 } 8255 8256 static int 8257 udp_implicit_bind(conn_t *connp, cred_t *cr) 8258 { 8259 int error; 8260 8261 /* All Solaris components should pass a cred for this operation. */ 8262 ASSERT(cr != NULL); 8263 8264 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8265 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8266 } 8267 8268 /* 8269 * This routine removes a port number association from a stream. It 8270 * is called by udp_unbind and udp_tpi_unbind. 8271 */ 8272 static int 8273 udp_do_unbind(conn_t *connp) 8274 { 8275 udp_t *udp = connp->conn_udp; 8276 udp_fanout_t *udpf; 8277 udp_stack_t *us = udp->udp_us; 8278 8279 if (cl_inet_unbind != NULL) { 8280 /* 8281 * Running in cluster mode - register unbind information 8282 */ 8283 if (udp->udp_ipversion == IPV4_VERSION) { 8284 (*cl_inet_unbind)( 8285 connp->conn_netstack->netstack_stackid, 8286 IPPROTO_UDP, AF_INET, 8287 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8288 (in_port_t)udp->udp_port, NULL); 8289 } else { 8290 (*cl_inet_unbind)( 8291 connp->conn_netstack->netstack_stackid, 8292 IPPROTO_UDP, AF_INET6, 8293 (uint8_t *)&(udp->udp_v6src), 8294 (in_port_t)udp->udp_port, NULL); 8295 } 8296 } 8297 8298 rw_enter(&udp->udp_rwlock, RW_WRITER); 8299 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8300 rw_exit(&udp->udp_rwlock); 8301 return (-TOUTSTATE); 8302 } 8303 udp->udp_pending_op = T_UNBIND_REQ; 8304 rw_exit(&udp->udp_rwlock); 8305 8306 /* 8307 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8308 * and therefore ip_unbind must never return NULL. 8309 */ 8310 ip_unbind(connp); 8311 8312 /* 8313 * Once we're unbound from IP, the pending operation may be cleared 8314 * here. 8315 */ 8316 rw_enter(&udp->udp_rwlock, RW_WRITER); 8317 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8318 us->us_bind_fanout_size)]; 8319 8320 mutex_enter(&udpf->uf_lock); 8321 udp_bind_hash_remove(udp, B_TRUE); 8322 V6_SET_ZERO(udp->udp_v6src); 8323 V6_SET_ZERO(udp->udp_bound_v6src); 8324 udp->udp_port = 0; 8325 mutex_exit(&udpf->uf_lock); 8326 8327 udp->udp_pending_op = -1; 8328 udp->udp_state = TS_UNBND; 8329 if (udp->udp_family == AF_INET6) 8330 (void) udp_build_hdrs(udp); 8331 rw_exit(&udp->udp_rwlock); 8332 8333 return (0); 8334 } 8335 8336 static int 8337 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8338 { 8339 ire_t *ire; 8340 udp_fanout_t *udpf; 8341 udp_stack_t *us = udp->udp_us; 8342 8343 ASSERT(udp->udp_pending_op != -1); 8344 rw_enter(&udp->udp_rwlock, RW_WRITER); 8345 if (error == 0) { 8346 /* For udp_do_connect() success */ 8347 /* udp_do_bind() success will do nothing in here */ 8348 /* 8349 * If a broadcast/multicast address was bound, set 8350 * the source address to 0. 8351 * This ensures no datagrams with broadcast address 8352 * as source address are emitted (which would violate 8353 * RFC1122 - Hosts requirements) 8354 * 8355 * Note that when connecting the returned IRE is 8356 * for the destination address and we only perform 8357 * the broadcast check for the source address (it 8358 * is OK to connect to a broadcast/multicast address.) 8359 */ 8360 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8361 ire = (ire_t *)ire_mp->b_rptr; 8362 8363 /* 8364 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8365 * multicast local address. 8366 */ 8367 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8368 us->us_bind_fanout_size)]; 8369 if (ire->ire_type == IRE_BROADCAST && 8370 udp->udp_state != TS_DATA_XFER) { 8371 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8372 udp->udp_pending_op == O_T_BIND_REQ); 8373 /* 8374 * This was just a local bind to a broadcast 8375 * addr. 8376 */ 8377 mutex_enter(&udpf->uf_lock); 8378 V6_SET_ZERO(udp->udp_v6src); 8379 mutex_exit(&udpf->uf_lock); 8380 if (udp->udp_family == AF_INET6) 8381 (void) udp_build_hdrs(udp); 8382 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8383 if (udp->udp_family == AF_INET6) 8384 (void) udp_build_hdrs(udp); 8385 } 8386 } 8387 } else { 8388 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8389 us->us_bind_fanout_size)]; 8390 mutex_enter(&udpf->uf_lock); 8391 8392 if (udp->udp_state == TS_DATA_XFER) { 8393 /* Connect failed */ 8394 /* Revert back to the bound source */ 8395 udp->udp_v6src = udp->udp_bound_v6src; 8396 udp->udp_state = TS_IDLE; 8397 } else { 8398 /* For udp_do_bind() failed */ 8399 V6_SET_ZERO(udp->udp_v6src); 8400 V6_SET_ZERO(udp->udp_bound_v6src); 8401 udp->udp_state = TS_UNBND; 8402 udp_bind_hash_remove(udp, B_TRUE); 8403 udp->udp_port = 0; 8404 } 8405 mutex_exit(&udpf->uf_lock); 8406 if (udp->udp_family == AF_INET6) 8407 (void) udp_build_hdrs(udp); 8408 } 8409 udp->udp_pending_op = -1; 8410 rw_exit(&udp->udp_rwlock); 8411 if (ire_mp != NULL) 8412 freeb(ire_mp); 8413 return (error); 8414 } 8415 8416 /* 8417 * It associates a default destination address with the stream. 8418 */ 8419 static int 8420 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8421 cred_t *cr) 8422 { 8423 sin6_t *sin6; 8424 sin_t *sin; 8425 in6_addr_t v6dst; 8426 ipaddr_t v4dst; 8427 uint16_t dstport; 8428 uint32_t flowinfo; 8429 mblk_t *ire_mp; 8430 udp_fanout_t *udpf; 8431 udp_t *udp, *udp1; 8432 ushort_t ipversion; 8433 udp_stack_t *us; 8434 int error; 8435 8436 udp = connp->conn_udp; 8437 us = udp->udp_us; 8438 8439 /* 8440 * Address has been verified by the caller 8441 */ 8442 switch (len) { 8443 default: 8444 /* 8445 * Should never happen 8446 */ 8447 return (EINVAL); 8448 8449 case sizeof (sin_t): 8450 sin = (sin_t *)sa; 8451 v4dst = sin->sin_addr.s_addr; 8452 dstport = sin->sin_port; 8453 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8454 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8455 ipversion = IPV4_VERSION; 8456 break; 8457 8458 case sizeof (sin6_t): 8459 sin6 = (sin6_t *)sa; 8460 v6dst = sin6->sin6_addr; 8461 dstport = sin6->sin6_port; 8462 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8463 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8464 ipversion = IPV4_VERSION; 8465 flowinfo = 0; 8466 } else { 8467 ipversion = IPV6_VERSION; 8468 flowinfo = sin6->sin6_flowinfo; 8469 } 8470 break; 8471 } 8472 8473 if (dstport == 0) 8474 return (-TBADADDR); 8475 8476 rw_enter(&udp->udp_rwlock, RW_WRITER); 8477 8478 /* 8479 * This UDP must have bound to a port already before doing a connect. 8480 * TPI mandates that users must send TPI primitives only 1 at a time 8481 * and wait for the response before sending the next primitive. 8482 */ 8483 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8484 rw_exit(&udp->udp_rwlock); 8485 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8486 "udp_connect: bad state, %u", udp->udp_state); 8487 return (-TOUTSTATE); 8488 } 8489 udp->udp_pending_op = T_CONN_REQ; 8490 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8491 8492 if (ipversion == IPV4_VERSION) { 8493 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8494 udp->udp_ip_snd_options_len; 8495 } else { 8496 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8497 } 8498 8499 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8500 us->us_bind_fanout_size)]; 8501 8502 mutex_enter(&udpf->uf_lock); 8503 if (udp->udp_state == TS_DATA_XFER) { 8504 /* Already connected - clear out state */ 8505 udp->udp_v6src = udp->udp_bound_v6src; 8506 udp->udp_state = TS_IDLE; 8507 } 8508 8509 /* 8510 * Create a default IP header with no IP options. 8511 */ 8512 udp->udp_dstport = dstport; 8513 udp->udp_ipversion = ipversion; 8514 if (ipversion == IPV4_VERSION) { 8515 /* 8516 * Interpret a zero destination to mean loopback. 8517 * Update the T_CONN_REQ (sin/sin6) since it is used to 8518 * generate the T_CONN_CON. 8519 */ 8520 if (v4dst == INADDR_ANY) { 8521 v4dst = htonl(INADDR_LOOPBACK); 8522 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8523 if (udp->udp_family == AF_INET) { 8524 sin->sin_addr.s_addr = v4dst; 8525 } else { 8526 sin6->sin6_addr = v6dst; 8527 } 8528 } 8529 udp->udp_v6dst = v6dst; 8530 udp->udp_flowinfo = 0; 8531 8532 /* 8533 * If the destination address is multicast and 8534 * an outgoing multicast interface has been set, 8535 * use the address of that interface as our 8536 * source address if no source address has been set. 8537 */ 8538 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8539 CLASSD(v4dst) && 8540 udp->udp_multicast_if_addr != INADDR_ANY) { 8541 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8542 &udp->udp_v6src); 8543 } 8544 } else { 8545 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8546 /* 8547 * Interpret a zero destination to mean loopback. 8548 * Update the T_CONN_REQ (sin/sin6) since it is used to 8549 * generate the T_CONN_CON. 8550 */ 8551 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8552 v6dst = ipv6_loopback; 8553 sin6->sin6_addr = v6dst; 8554 } 8555 udp->udp_v6dst = v6dst; 8556 udp->udp_flowinfo = flowinfo; 8557 /* 8558 * If the destination address is multicast and 8559 * an outgoing multicast interface has been set, 8560 * then the ip bind logic will pick the correct source 8561 * address (i.e. matching the outgoing multicast interface). 8562 */ 8563 } 8564 8565 /* 8566 * Verify that the src/port/dst/port is unique for all 8567 * connections in TS_DATA_XFER 8568 */ 8569 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8570 if (udp1->udp_state != TS_DATA_XFER) 8571 continue; 8572 if (udp->udp_port != udp1->udp_port || 8573 udp->udp_ipversion != udp1->udp_ipversion || 8574 dstport != udp1->udp_dstport || 8575 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8576 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8577 !(IPCL_ZONE_MATCH(udp->udp_connp, 8578 udp1->udp_connp->conn_zoneid) || 8579 IPCL_ZONE_MATCH(udp1->udp_connp, 8580 udp->udp_connp->conn_zoneid))) 8581 continue; 8582 mutex_exit(&udpf->uf_lock); 8583 udp->udp_pending_op = -1; 8584 rw_exit(&udp->udp_rwlock); 8585 return (-TBADADDR); 8586 } 8587 8588 if (cl_inet_connect2 != NULL) { 8589 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8590 if (error != 0) { 8591 mutex_exit(&udpf->uf_lock); 8592 udp->udp_pending_op = -1; 8593 rw_exit(&udp->udp_rwlock); 8594 return (-TBADADDR); 8595 } 8596 } 8597 8598 udp->udp_state = TS_DATA_XFER; 8599 mutex_exit(&udpf->uf_lock); 8600 8601 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8602 if (ire_mp == NULL) { 8603 mutex_enter(&udpf->uf_lock); 8604 udp->udp_state = TS_IDLE; 8605 udp->udp_pending_op = -1; 8606 mutex_exit(&udpf->uf_lock); 8607 rw_exit(&udp->udp_rwlock); 8608 return (ENOMEM); 8609 } 8610 8611 rw_exit(&udp->udp_rwlock); 8612 8613 ire_mp->b_wptr += sizeof (ire_t); 8614 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8615 8616 if (udp->udp_family == AF_INET) { 8617 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8618 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8619 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8620 B_TRUE, B_TRUE, cr); 8621 } else { 8622 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8623 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8624 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 8625 } 8626 8627 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8628 } 8629 8630 /* ARGSUSED */ 8631 static int 8632 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8633 socklen_t len, sock_connid_t *id, cred_t *cr) 8634 { 8635 conn_t *connp = (conn_t *)proto_handle; 8636 udp_t *udp = connp->conn_udp; 8637 int error; 8638 boolean_t did_bind = B_FALSE; 8639 8640 /* All Solaris components should pass a cred for this operation. */ 8641 ASSERT(cr != NULL); 8642 8643 if (sa == NULL) { 8644 /* 8645 * Disconnect 8646 * Make sure we are connected 8647 */ 8648 if (udp->udp_state != TS_DATA_XFER) 8649 return (EINVAL); 8650 8651 error = udp_disconnect(connp); 8652 return (error); 8653 } 8654 8655 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8656 if (error != 0) 8657 goto done; 8658 8659 /* do an implicit bind if necessary */ 8660 if (udp->udp_state == TS_UNBND) { 8661 error = udp_implicit_bind(connp, cr); 8662 /* 8663 * We could be racing with an actual bind, in which case 8664 * we would see EPROTO. We cross our fingers and try 8665 * to connect. 8666 */ 8667 if (!(error == 0 || error == EPROTO)) 8668 goto done; 8669 did_bind = B_TRUE; 8670 } 8671 /* 8672 * set SO_DGRAM_ERRIND 8673 */ 8674 udp->udp_dgram_errind = B_TRUE; 8675 8676 error = udp_do_connect(connp, sa, len, cr); 8677 8678 if (error != 0 && did_bind) { 8679 int unbind_err; 8680 8681 unbind_err = udp_do_unbind(connp); 8682 ASSERT(unbind_err == 0); 8683 } 8684 8685 if (error == 0) { 8686 *id = 0; 8687 (*connp->conn_upcalls->su_connected) 8688 (connp->conn_upper_handle, 0, NULL, -1); 8689 } else if (error < 0) { 8690 error = proto_tlitosyserr(-error); 8691 } 8692 8693 done: 8694 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8695 /* 8696 * No need to hold locks to set state 8697 * after connect failure socket state is undefined 8698 * We set the state only to imitate old sockfs behavior 8699 */ 8700 udp->udp_state = TS_IDLE; 8701 } 8702 return (error); 8703 } 8704 8705 /* ARGSUSED */ 8706 int 8707 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8708 cred_t *cr) 8709 { 8710 conn_t *connp = (conn_t *)proto_handle; 8711 udp_t *udp = connp->conn_udp; 8712 udp_stack_t *us = udp->udp_us; 8713 int error = 0; 8714 8715 ASSERT(DB_TYPE(mp) == M_DATA); 8716 8717 /* All Solaris components should pass a cred for this operation. */ 8718 ASSERT(cr != NULL); 8719 8720 /* If labeled then sockfs should have already set db_credp */ 8721 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 8722 8723 /* 8724 * If the socket is connected and no change in destination 8725 */ 8726 if (msg->msg_namelen == 0) { 8727 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8728 if (error == EDESTADDRREQ) 8729 return (error); 8730 else 8731 return (udp->udp_dgram_errind ? error : 0); 8732 } 8733 8734 /* 8735 * Do an implicit bind if necessary. 8736 */ 8737 if (udp->udp_state == TS_UNBND) { 8738 error = udp_implicit_bind(connp, cr); 8739 /* 8740 * We could be racing with an actual bind, in which case 8741 * we would see EPROTO. We cross our fingers and try 8742 * to send. 8743 */ 8744 if (!(error == 0 || error == EPROTO)) { 8745 freemsg(mp); 8746 return (error); 8747 } 8748 } 8749 8750 rw_enter(&udp->udp_rwlock, RW_WRITER); 8751 8752 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 8753 rw_exit(&udp->udp_rwlock); 8754 freemsg(mp); 8755 return (EISCONN); 8756 } 8757 8758 8759 if (udp->udp_delayed_error != 0) { 8760 boolean_t match; 8761 8762 error = udp->udp_delayed_error; 8763 match = B_FALSE; 8764 udp->udp_delayed_error = 0; 8765 switch (udp->udp_family) { 8766 case AF_INET: { 8767 /* Compare just IP address and port */ 8768 sin_t *sin1 = (sin_t *)msg->msg_name; 8769 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 8770 8771 if (msg->msg_namelen == sizeof (sin_t) && 8772 sin1->sin_port == sin2->sin_port && 8773 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 8774 match = B_TRUE; 8775 8776 break; 8777 } 8778 case AF_INET6: { 8779 sin6_t *sin1 = (sin6_t *)msg->msg_name; 8780 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 8781 8782 if (msg->msg_namelen == sizeof (sin6_t) && 8783 sin1->sin6_port == sin2->sin6_port && 8784 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 8785 &sin2->sin6_addr)) 8786 match = B_TRUE; 8787 break; 8788 } 8789 default: 8790 ASSERT(0); 8791 } 8792 8793 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 8794 8795 if (match) { 8796 rw_exit(&udp->udp_rwlock); 8797 freemsg(mp); 8798 return (error); 8799 } 8800 } 8801 8802 error = proto_verify_ip_addr(udp->udp_family, 8803 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 8804 rw_exit(&udp->udp_rwlock); 8805 8806 if (error != 0) { 8807 freemsg(mp); 8808 return (error); 8809 } 8810 8811 error = udp_send_not_connected(connp, mp, 8812 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 8813 curproc->p_pid); 8814 if (error != 0) { 8815 UDP_STAT(us, udp_out_err_output); 8816 freemsg(mp); 8817 } 8818 return (udp->udp_dgram_errind ? error : 0); 8819 } 8820 8821 int 8822 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 8823 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 8824 { 8825 conn_t *connp = (conn_t *)proto_handle; 8826 udp_t *udp; 8827 struct T_capability_ack tca; 8828 struct sockaddr_in6 laddr, faddr; 8829 socklen_t laddrlen, faddrlen; 8830 short opts; 8831 struct stroptions *stropt; 8832 mblk_t *stropt_mp; 8833 int error; 8834 8835 udp = connp->conn_udp; 8836 8837 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 8838 8839 /* 8840 * setup the fallback stream that was allocated 8841 */ 8842 connp->conn_dev = (dev_t)RD(q)->q_ptr; 8843 connp->conn_minor_arena = WR(q)->q_ptr; 8844 8845 RD(q)->q_ptr = WR(q)->q_ptr = connp; 8846 8847 WR(q)->q_qinfo = &udp_winit; 8848 8849 connp->conn_rq = RD(q); 8850 connp->conn_wq = WR(q); 8851 8852 /* Notify stream head about options before sending up data */ 8853 stropt_mp->b_datap->db_type = M_SETOPTS; 8854 stropt_mp->b_wptr += sizeof (*stropt); 8855 stropt = (struct stroptions *)stropt_mp->b_rptr; 8856 stropt->so_flags = SO_WROFF | SO_HIWAT; 8857 stropt->so_wroff = 8858 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 8859 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 8860 putnext(RD(q), stropt_mp); 8861 8862 /* 8863 * Free the helper stream 8864 */ 8865 ip_free_helper_stream(connp); 8866 8867 if (!issocket) 8868 udp_use_pure_tpi(udp); 8869 8870 /* 8871 * Collect the information needed to sync with the sonode 8872 */ 8873 udp_do_capability_ack(udp, &tca, TC1_INFO); 8874 8875 laddrlen = faddrlen = sizeof (sin6_t); 8876 (void) udp_getsockname((sock_lower_handle_t)connp, 8877 (struct sockaddr *)&laddr, &laddrlen, CRED()); 8878 error = udp_getpeername((sock_lower_handle_t)connp, 8879 (struct sockaddr *)&faddr, &faddrlen, CRED()); 8880 if (error != 0) 8881 faddrlen = 0; 8882 8883 opts = 0; 8884 if (udp->udp_dgram_errind) 8885 opts |= SO_DGRAM_ERRIND; 8886 if (udp->udp_dontroute) 8887 opts |= SO_DONTROUTE; 8888 8889 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 8890 (struct sockaddr *)&laddr, laddrlen, 8891 (struct sockaddr *)&faddr, faddrlen, opts); 8892 8893 mutex_enter(&udp->udp_recv_lock); 8894 /* 8895 * Attempts to send data up during fallback will result in it being 8896 * queued in udp_t. Now we push up any queued packets. 8897 */ 8898 while (udp->udp_fallback_queue_head != NULL) { 8899 mblk_t *mp; 8900 mp = udp->udp_fallback_queue_head; 8901 udp->udp_fallback_queue_head = mp->b_next; 8902 mutex_exit(&udp->udp_recv_lock); 8903 mp->b_next = NULL; 8904 putnext(RD(q), mp); 8905 mutex_enter(&udp->udp_recv_lock); 8906 } 8907 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 8908 /* 8909 * No longer a streams less socket 8910 */ 8911 rw_enter(&udp->udp_rwlock, RW_WRITER); 8912 connp->conn_flags &= ~IPCL_NONSTR; 8913 rw_exit(&udp->udp_rwlock); 8914 8915 mutex_exit(&udp->udp_recv_lock); 8916 8917 ASSERT(connp->conn_ref >= 1); 8918 8919 return (0); 8920 } 8921 8922 static int 8923 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8924 { 8925 sin_t *sin = (sin_t *)sa; 8926 sin6_t *sin6 = (sin6_t *)sa; 8927 8928 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 8929 ASSERT(udp != NULL); 8930 8931 if (udp->udp_state != TS_DATA_XFER) 8932 return (ENOTCONN); 8933 8934 switch (udp->udp_family) { 8935 case AF_INET: 8936 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8937 8938 if (*salenp < sizeof (sin_t)) 8939 return (EINVAL); 8940 8941 *salenp = sizeof (sin_t); 8942 *sin = sin_null; 8943 sin->sin_family = AF_INET; 8944 sin->sin_port = udp->udp_dstport; 8945 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 8946 break; 8947 case AF_INET6: 8948 if (*salenp < sizeof (sin6_t)) 8949 return (EINVAL); 8950 8951 *salenp = sizeof (sin6_t); 8952 *sin6 = sin6_null; 8953 sin6->sin6_family = AF_INET6; 8954 sin6->sin6_port = udp->udp_dstport; 8955 sin6->sin6_addr = udp->udp_v6dst; 8956 sin6->sin6_flowinfo = udp->udp_flowinfo; 8957 break; 8958 } 8959 8960 return (0); 8961 } 8962 8963 /* ARGSUSED */ 8964 int 8965 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8966 socklen_t *salenp, cred_t *cr) 8967 { 8968 conn_t *connp = (conn_t *)proto_handle; 8969 udp_t *udp = connp->conn_udp; 8970 int error; 8971 8972 /* All Solaris components should pass a cred for this operation. */ 8973 ASSERT(cr != NULL); 8974 8975 ASSERT(udp != NULL); 8976 8977 rw_enter(&udp->udp_rwlock, RW_READER); 8978 8979 error = udp_do_getpeername(udp, sa, salenp); 8980 8981 rw_exit(&udp->udp_rwlock); 8982 8983 return (error); 8984 } 8985 8986 static int 8987 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 8988 { 8989 sin_t *sin = (sin_t *)sa; 8990 sin6_t *sin6 = (sin6_t *)sa; 8991 8992 ASSERT(udp != NULL); 8993 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 8994 8995 switch (udp->udp_family) { 8996 case AF_INET: 8997 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8998 8999 if (*salenp < sizeof (sin_t)) 9000 return (EINVAL); 9001 9002 *salenp = sizeof (sin_t); 9003 *sin = sin_null; 9004 sin->sin_family = AF_INET; 9005 if (udp->udp_state == TS_UNBND) { 9006 break; 9007 } 9008 sin->sin_port = udp->udp_port; 9009 9010 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9011 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9012 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9013 } else { 9014 /* 9015 * INADDR_ANY 9016 * udp_v6src is not set, we might be bound to 9017 * broadcast/multicast. Use udp_bound_v6src as 9018 * local address instead (that could 9019 * also still be INADDR_ANY) 9020 */ 9021 sin->sin_addr.s_addr = 9022 V4_PART_OF_V6(udp->udp_bound_v6src); 9023 } 9024 break; 9025 9026 case AF_INET6: 9027 if (*salenp < sizeof (sin6_t)) 9028 return (EINVAL); 9029 9030 *salenp = sizeof (sin6_t); 9031 *sin6 = sin6_null; 9032 sin6->sin6_family = AF_INET6; 9033 if (udp->udp_state == TS_UNBND) { 9034 break; 9035 } 9036 sin6->sin6_port = udp->udp_port; 9037 9038 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9039 sin6->sin6_addr = udp->udp_v6src; 9040 } else { 9041 /* 9042 * UNSPECIFIED 9043 * udp_v6src is not set, we might be bound to 9044 * broadcast/multicast. Use udp_bound_v6src as 9045 * local address instead (that could 9046 * also still be UNSPECIFIED) 9047 */ 9048 sin6->sin6_addr = udp->udp_bound_v6src; 9049 } 9050 } 9051 return (0); 9052 } 9053 9054 /* ARGSUSED */ 9055 int 9056 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9057 socklen_t *salenp, cred_t *cr) 9058 { 9059 conn_t *connp = (conn_t *)proto_handle; 9060 udp_t *udp = connp->conn_udp; 9061 int error; 9062 9063 /* All Solaris components should pass a cred for this operation. */ 9064 ASSERT(cr != NULL); 9065 9066 ASSERT(udp != NULL); 9067 rw_enter(&udp->udp_rwlock, RW_READER); 9068 9069 error = udp_do_getsockname(udp, sa, salenp); 9070 9071 rw_exit(&udp->udp_rwlock); 9072 9073 return (error); 9074 } 9075 9076 int 9077 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9078 void *optvalp, socklen_t *optlen, cred_t *cr) 9079 { 9080 conn_t *connp = (conn_t *)proto_handle; 9081 udp_t *udp = connp->conn_udp; 9082 int error; 9083 t_uscalar_t max_optbuf_len; 9084 void *optvalp_buf; 9085 int len; 9086 9087 /* All Solaris components should pass a cred for this operation. */ 9088 ASSERT(cr != NULL); 9089 9090 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9091 udp_opt_obj.odb_opt_des_arr, 9092 udp_opt_obj.odb_opt_arr_cnt, 9093 udp_opt_obj.odb_topmost_tpiprovider, 9094 B_FALSE, B_TRUE, cr); 9095 if (error != 0) { 9096 if (error < 0) 9097 error = proto_tlitosyserr(-error); 9098 return (error); 9099 } 9100 9101 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9102 rw_enter(&udp->udp_rwlock, RW_READER); 9103 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9104 rw_exit(&udp->udp_rwlock); 9105 9106 if (len < 0) { 9107 /* 9108 * Pass on to IP 9109 */ 9110 kmem_free(optvalp_buf, max_optbuf_len); 9111 return (ip_get_options(connp, level, option_name, 9112 optvalp, optlen, cr)); 9113 } else { 9114 /* 9115 * update optlen and copy option value 9116 */ 9117 t_uscalar_t size = MIN(len, *optlen); 9118 bcopy(optvalp_buf, optvalp, size); 9119 bcopy(&size, optlen, sizeof (size)); 9120 9121 kmem_free(optvalp_buf, max_optbuf_len); 9122 return (0); 9123 } 9124 } 9125 9126 int 9127 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9128 const void *optvalp, socklen_t optlen, cred_t *cr) 9129 { 9130 conn_t *connp = (conn_t *)proto_handle; 9131 udp_t *udp = connp->conn_udp; 9132 int error; 9133 9134 /* All Solaris components should pass a cred for this operation. */ 9135 ASSERT(cr != NULL); 9136 9137 error = proto_opt_check(level, option_name, optlen, NULL, 9138 udp_opt_obj.odb_opt_des_arr, 9139 udp_opt_obj.odb_opt_arr_cnt, 9140 udp_opt_obj.odb_topmost_tpiprovider, 9141 B_TRUE, B_FALSE, cr); 9142 9143 if (error != 0) { 9144 if (error < 0) 9145 error = proto_tlitosyserr(-error); 9146 return (error); 9147 } 9148 9149 rw_enter(&udp->udp_rwlock, RW_WRITER); 9150 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9151 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9152 NULL, cr); 9153 rw_exit(&udp->udp_rwlock); 9154 9155 if (error < 0) { 9156 /* 9157 * Pass on to ip 9158 */ 9159 error = ip_set_options(connp, level, option_name, optvalp, 9160 optlen, cr); 9161 } 9162 9163 return (error); 9164 } 9165 9166 void 9167 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9168 { 9169 conn_t *connp = (conn_t *)proto_handle; 9170 udp_t *udp = connp->conn_udp; 9171 9172 mutex_enter(&udp->udp_recv_lock); 9173 connp->conn_flow_cntrld = B_FALSE; 9174 mutex_exit(&udp->udp_recv_lock); 9175 } 9176 9177 /* ARGSUSED */ 9178 int 9179 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9180 { 9181 conn_t *connp = (conn_t *)proto_handle; 9182 9183 /* All Solaris components should pass a cred for this operation. */ 9184 ASSERT(cr != NULL); 9185 9186 /* shut down the send side */ 9187 if (how != SHUT_RD) 9188 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9189 SOCK_OPCTL_SHUT_SEND, 0); 9190 /* shut down the recv side */ 9191 if (how != SHUT_WR) 9192 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9193 SOCK_OPCTL_SHUT_RECV, 0); 9194 return (0); 9195 } 9196 9197 int 9198 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9199 int mode, int32_t *rvalp, cred_t *cr) 9200 { 9201 conn_t *connp = (conn_t *)proto_handle; 9202 int error; 9203 9204 /* All Solaris components should pass a cred for this operation. */ 9205 ASSERT(cr != NULL); 9206 9207 switch (cmd) { 9208 case ND_SET: 9209 case ND_GET: 9210 case _SIOCSOCKFALLBACK: 9211 case TI_GETPEERNAME: 9212 case TI_GETMYNAME: 9213 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9214 cmd)); 9215 error = EINVAL; 9216 break; 9217 default: 9218 /* 9219 * Pass on to IP using helper stream 9220 */ 9221 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9222 cmd, arg, mode, cr, rvalp); 9223 break; 9224 } 9225 return (error); 9226 } 9227 9228 /* ARGSUSED */ 9229 int 9230 udp_accept(sock_lower_handle_t lproto_handle, 9231 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9232 cred_t *cr) 9233 { 9234 return (EOPNOTSUPP); 9235 } 9236 9237 /* ARGSUSED */ 9238 int 9239 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9240 { 9241 return (EOPNOTSUPP); 9242 } 9243 9244 sock_downcalls_t sock_udp_downcalls = { 9245 udp_activate, /* sd_activate */ 9246 udp_accept, /* sd_accept */ 9247 udp_bind, /* sd_bind */ 9248 udp_listen, /* sd_listen */ 9249 udp_connect, /* sd_connect */ 9250 udp_getpeername, /* sd_getpeername */ 9251 udp_getsockname, /* sd_getsockname */ 9252 udp_getsockopt, /* sd_getsockopt */ 9253 udp_setsockopt, /* sd_setsockopt */ 9254 udp_send, /* sd_send */ 9255 NULL, /* sd_send_uio */ 9256 NULL, /* sd_recv_uio */ 9257 NULL, /* sd_poll */ 9258 udp_shutdown, /* sd_shutdown */ 9259 udp_clr_flowctrl, /* sd_setflowctrl */ 9260 udp_ioctl, /* sd_ioctl */ 9261 udp_close /* sd_close */ 9262 }; 9263