1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 172 t_scalar_t addr_length); 173 static void udp_lrput(queue_t *, mblk_t *); 174 static void udp_lwput(queue_t *, mblk_t *); 175 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp, boolean_t isv6); 177 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 180 cred_t *credp); 181 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 182 int *errorp, udpattrs_t *udpattrs); 183 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 184 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 185 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 186 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 187 cred_t *cr); 188 static int udp_rinfop(queue_t *q, infod_t *dp); 189 static int udp_rrw(queue_t *q, struiod_t *dp); 190 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 191 ipha_t *ipha); 192 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 193 t_scalar_t destlen, t_scalar_t err); 194 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 195 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 196 boolean_t random); 197 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 198 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 199 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 200 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 201 static void udp_wput_other(queue_t *q, mblk_t *mp); 202 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 203 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 204 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 205 206 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 207 static void udp_stack_fini(netstackid_t stackid, void *arg); 208 209 static void *udp_kstat_init(netstackid_t stackid); 210 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 211 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 212 static void udp_kstat2_fini(netstackid_t, kstat_t *); 213 static int udp_kstat_update(kstat_t *kp, int rw); 214 215 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 216 uint_t pkt_len); 217 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 218 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 219 220 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 221 cred_t *, pid_t); 222 static void udp_ulp_recv(conn_t *, mblk_t *); 223 224 /* Common routine for TPI and socket module */ 225 static conn_t *udp_do_open(cred_t *, boolean_t, int); 226 static void udp_do_close(conn_t *); 227 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 228 boolean_t); 229 static int udp_do_unbind(conn_t *); 230 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 231 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 232 233 int udp_getsockname(sock_lower_handle_t, 234 struct sockaddr *, socklen_t *, cred_t *); 235 int udp_getpeername(sock_lower_handle_t, 236 struct sockaddr *, socklen_t *, cred_t *); 237 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 238 cred_t *cr); 239 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 240 241 #define UDP_RECV_HIWATER (56 * 1024) 242 #define UDP_RECV_LOWATER 128 243 #define UDP_XMIT_HIWATER (56 * 1024) 244 #define UDP_XMIT_LOWATER 1024 245 246 /* 247 * The following is defined in tcp.c 248 */ 249 extern int (*cl_inet_connect2)(netstackid_t stack_id, 250 uint8_t protocol, boolean_t is_outgoing, 251 sa_family_t addr_family, 252 uint8_t *laddrp, in_port_t lport, 253 uint8_t *faddrp, in_port_t fport, void *args); 254 255 /* 256 * Checks if the given destination addr/port is allowed out. 257 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 258 * Called for each connect() and for sendto()/sendmsg() to a different 259 * destination. 260 * For connect(), called in udp_connect(). 261 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 262 * 263 * This macro assumes that the cl_inet_connect2 hook is not NULL. 264 * Please check this before calling this macro. 265 * 266 * void 267 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 268 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 269 */ 270 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 271 (err) = 0; \ 272 /* \ 273 * Running in cluster mode - check and register active \ 274 * "connection" information \ 275 */ \ 276 if ((udp)->udp_ipversion == IPV4_VERSION) \ 277 (err) = (*cl_inet_connect2)( \ 278 (cp)->conn_netstack->netstack_stackid, \ 279 IPPROTO_UDP, is_outgoing, AF_INET, \ 280 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 281 (udp)->udp_port, \ 282 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 283 (in_port_t)(fport), NULL); \ 284 else \ 285 (err) = (*cl_inet_connect2)( \ 286 (cp)->conn_netstack->netstack_stackid, \ 287 IPPROTO_UDP, is_outgoing, AF_INET6, \ 288 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 289 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 290 } 291 292 static struct module_info udp_mod_info = { 293 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 294 }; 295 296 /* 297 * Entry points for UDP as a device. 298 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 299 */ 300 static struct qinit udp_rinitv4 = { 301 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 302 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 303 }; 304 305 static struct qinit udp_rinitv6 = { 306 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 307 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 308 }; 309 310 static struct qinit udp_winit = { 311 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 312 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 313 }; 314 315 /* UDP entry point during fallback */ 316 struct qinit udp_fallback_sock_winit = { 317 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 318 }; 319 320 /* 321 * UDP needs to handle I_LINK and I_PLINK since ifconfig 322 * likes to use it as a place to hang the various streams. 323 */ 324 static struct qinit udp_lrinit = { 325 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 326 &udp_mod_info 327 }; 328 329 static struct qinit udp_lwinit = { 330 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 331 &udp_mod_info 332 }; 333 334 /* For AF_INET aka /dev/udp */ 335 struct streamtab udpinfov4 = { 336 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 337 }; 338 339 /* For AF_INET6 aka /dev/udp6 */ 340 struct streamtab udpinfov6 = { 341 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 342 }; 343 344 static sin_t sin_null; /* Zero address for quick clears */ 345 static sin6_t sin6_null; /* Zero address for quick clears */ 346 347 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 348 349 /* Default structure copied into T_INFO_ACK messages */ 350 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 351 T_INFO_ACK, 352 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 353 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 354 T_INVALID, /* CDATA_size. udp does not support connect data. */ 355 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 356 sizeof (sin_t), /* ADDR_size. */ 357 0, /* OPT_size - not initialized here */ 358 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 359 T_CLTS, /* SERV_type. udp supports connection-less. */ 360 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 361 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 362 }; 363 364 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 365 366 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 367 T_INFO_ACK, 368 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 369 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 370 T_INVALID, /* CDATA_size. udp does not support connect data. */ 371 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 372 sizeof (sin6_t), /* ADDR_size. */ 373 0, /* OPT_size - not initialized here */ 374 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 375 T_CLTS, /* SERV_type. udp supports connection-less. */ 376 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 377 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 378 }; 379 380 /* largest UDP port number */ 381 #define UDP_MAX_PORT 65535 382 383 /* 384 * Table of ND variables supported by udp. These are loaded into us_nd 385 * in udp_open. 386 * All of these are alterable, within the min/max values given, at run time. 387 */ 388 /* BEGIN CSTYLED */ 389 udpparam_t udp_param_arr[] = { 390 /*min max value name */ 391 { 0L, 256, 32, "udp_wroff_extra" }, 392 { 1L, 255, 255, "udp_ipv4_ttl" }, 393 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 394 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 395 { 0, 1, 1, "udp_do_checksum" }, 396 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 397 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 398 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 399 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 400 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 401 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 402 }; 403 /* END CSTYLED */ 404 405 /* Setable in /etc/system */ 406 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 407 uint32_t udp_random_anon_port = 1; 408 409 /* 410 * Hook functions to enable cluster networking. 411 * On non-clustered systems these vectors must always be NULL 412 */ 413 414 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 415 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 416 void *args) = NULL; 417 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 418 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 419 void *args) = NULL; 420 421 typedef union T_primitives *t_primp_t; 422 423 /* 424 * Return the next anonymous port in the privileged port range for 425 * bind checking. 426 * 427 * Trusted Extension (TX) notes: TX allows administrator to mark or 428 * reserve ports as Multilevel ports (MLP). MLP has special function 429 * on TX systems. Once a port is made MLP, it's not available as 430 * ordinary port. This creates "holes" in the port name space. It 431 * may be necessary to skip the "holes" find a suitable anon port. 432 */ 433 static in_port_t 434 udp_get_next_priv_port(udp_t *udp) 435 { 436 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 437 in_port_t nextport; 438 boolean_t restart = B_FALSE; 439 udp_stack_t *us = udp->udp_us; 440 441 retry: 442 if (next_priv_port < us->us_min_anonpriv_port || 443 next_priv_port >= IPPORT_RESERVED) { 444 next_priv_port = IPPORT_RESERVED - 1; 445 if (restart) 446 return (0); 447 restart = B_TRUE; 448 } 449 450 if (is_system_labeled() && 451 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 452 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 453 next_priv_port = nextport; 454 goto retry; 455 } 456 457 return (next_priv_port--); 458 } 459 460 /* 461 * Hash list removal routine for udp_t structures. 462 */ 463 static void 464 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 465 { 466 udp_t *udpnext; 467 kmutex_t *lockp; 468 udp_stack_t *us = udp->udp_us; 469 470 if (udp->udp_ptpbhn == NULL) 471 return; 472 473 /* 474 * Extract the lock pointer in case there are concurrent 475 * hash_remove's for this instance. 476 */ 477 ASSERT(udp->udp_port != 0); 478 if (!caller_holds_lock) { 479 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 480 us->us_bind_fanout_size)].uf_lock; 481 ASSERT(lockp != NULL); 482 mutex_enter(lockp); 483 } 484 if (udp->udp_ptpbhn != NULL) { 485 udpnext = udp->udp_bind_hash; 486 if (udpnext != NULL) { 487 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 488 udp->udp_bind_hash = NULL; 489 } 490 *udp->udp_ptpbhn = udpnext; 491 udp->udp_ptpbhn = NULL; 492 } 493 if (!caller_holds_lock) { 494 mutex_exit(lockp); 495 } 496 } 497 498 static void 499 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 500 { 501 udp_t **udpp; 502 udp_t *udpnext; 503 504 ASSERT(MUTEX_HELD(&uf->uf_lock)); 505 ASSERT(udp->udp_ptpbhn == NULL); 506 udpp = &uf->uf_udp; 507 udpnext = udpp[0]; 508 if (udpnext != NULL) { 509 /* 510 * If the new udp bound to the INADDR_ANY address 511 * and the first one in the list is not bound to 512 * INADDR_ANY we skip all entries until we find the 513 * first one bound to INADDR_ANY. 514 * This makes sure that applications binding to a 515 * specific address get preference over those binding to 516 * INADDR_ANY. 517 */ 518 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 519 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 520 while ((udpnext = udpp[0]) != NULL && 521 !V6_OR_V4_INADDR_ANY( 522 udpnext->udp_bound_v6src)) { 523 udpp = &(udpnext->udp_bind_hash); 524 } 525 if (udpnext != NULL) 526 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 527 } else { 528 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 529 } 530 } 531 udp->udp_bind_hash = udpnext; 532 udp->udp_ptpbhn = udpp; 533 udpp[0] = udp; 534 } 535 536 /* 537 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 538 * passed to udp_wput. 539 * It associates a port number and local address with the stream. 540 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 541 * protocol type (IPPROTO_UDP) placed in the message following the address. 542 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 543 * (Called as writer.) 544 * 545 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 546 * without setting SO_REUSEADDR. This is needed so that they 547 * can be viewed as two independent transport protocols. 548 * However, anonymouns ports are allocated from the same range to avoid 549 * duplicating the us->us_next_port_to_try. 550 */ 551 static void 552 udp_tpi_bind(queue_t *q, mblk_t *mp) 553 { 554 sin_t *sin; 555 sin6_t *sin6; 556 mblk_t *mp1; 557 struct T_bind_req *tbr; 558 conn_t *connp; 559 udp_t *udp; 560 int error; 561 struct sockaddr *sa; 562 cred_t *cr; 563 564 /* 565 * All Solaris components should pass a db_credp 566 * for this TPI message, hence we ASSERT. 567 * But in case there is some other M_PROTO that looks 568 * like a TPI message sent by some other kernel 569 * component, we check and return an error. 570 */ 571 cr = msg_getcred(mp, NULL); 572 ASSERT(cr != NULL); 573 if (cr == NULL) { 574 udp_err_ack(q, mp, TSYSERR, EINVAL); 575 return; 576 } 577 578 connp = Q_TO_CONN(q); 579 udp = connp->conn_udp; 580 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 581 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 582 "udp_bind: bad req, len %u", 583 (uint_t)(mp->b_wptr - mp->b_rptr)); 584 udp_err_ack(q, mp, TPROTO, 0); 585 return; 586 } 587 if (udp->udp_state != TS_UNBND) { 588 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 589 "udp_bind: bad state, %u", udp->udp_state); 590 udp_err_ack(q, mp, TOUTSTATE, 0); 591 return; 592 } 593 /* 594 * Reallocate the message to make sure we have enough room for an 595 * address and the protocol type. 596 */ 597 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 598 if (!mp1) { 599 udp_err_ack(q, mp, TSYSERR, ENOMEM); 600 return; 601 } 602 603 mp = mp1; 604 605 /* Reset the message type in preparation for shipping it back. */ 606 DB_TYPE(mp) = M_PCPROTO; 607 608 tbr = (struct T_bind_req *)mp->b_rptr; 609 switch (tbr->ADDR_length) { 610 case 0: /* Request for a generic port */ 611 tbr->ADDR_offset = sizeof (struct T_bind_req); 612 if (udp->udp_family == AF_INET) { 613 tbr->ADDR_length = sizeof (sin_t); 614 sin = (sin_t *)&tbr[1]; 615 *sin = sin_null; 616 sin->sin_family = AF_INET; 617 mp->b_wptr = (uchar_t *)&sin[1]; 618 sa = (struct sockaddr *)sin; 619 } else { 620 ASSERT(udp->udp_family == AF_INET6); 621 tbr->ADDR_length = sizeof (sin6_t); 622 sin6 = (sin6_t *)&tbr[1]; 623 *sin6 = sin6_null; 624 sin6->sin6_family = AF_INET6; 625 mp->b_wptr = (uchar_t *)&sin6[1]; 626 sa = (struct sockaddr *)sin6; 627 } 628 break; 629 630 case sizeof (sin_t): /* Complete IPv4 address */ 631 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 632 sizeof (sin_t)); 633 if (sa == NULL || !OK_32PTR((char *)sa)) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 if (udp->udp_family != AF_INET || 638 sa->sa_family != AF_INET) { 639 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 640 return; 641 } 642 break; 643 644 case sizeof (sin6_t): /* complete IPv6 address */ 645 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 646 sizeof (sin6_t)); 647 if (sa == NULL || !OK_32PTR((char *)sa)) { 648 udp_err_ack(q, mp, TSYSERR, EINVAL); 649 return; 650 } 651 if (udp->udp_family != AF_INET6 || 652 sa->sa_family != AF_INET6) { 653 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 654 return; 655 } 656 break; 657 658 default: /* Invalid request */ 659 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 660 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 661 udp_err_ack(q, mp, TBADADDR, 0); 662 return; 663 } 664 665 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 666 tbr->PRIM_type != O_T_BIND_REQ); 667 668 if (error != 0) { 669 if (error > 0) { 670 udp_err_ack(q, mp, TSYSERR, error); 671 } else { 672 udp_err_ack(q, mp, -error, 0); 673 } 674 } else { 675 tbr->PRIM_type = T_BIND_ACK; 676 qreply(q, mp); 677 } 678 } 679 680 /* 681 * This routine handles each T_CONN_REQ message passed to udp. It 682 * associates a default destination address with the stream. 683 * 684 * This routine sends down a T_BIND_REQ to IP with the following mblks: 685 * T_BIND_REQ - specifying local and remote address/port 686 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 687 * T_OK_ACK - for the T_CONN_REQ 688 * T_CONN_CON - to keep the TPI user happy 689 * 690 * The connect completes in udp_do_connect. 691 * When a T_BIND_ACK is received information is extracted from the IRE 692 * and the two appended messages are sent to the TPI user. 693 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 694 * convert it to an error ack for the appropriate primitive. 695 */ 696 static void 697 udp_tpi_connect(queue_t *q, mblk_t *mp) 698 { 699 udp_t *udp; 700 conn_t *connp = Q_TO_CONN(q); 701 int error; 702 socklen_t len; 703 struct sockaddr *sa; 704 struct T_conn_req *tcr; 705 cred_t *cr; 706 707 /* 708 * All Solaris components should pass a db_credp 709 * for this TPI message, hence we ASSERT. 710 * But in case there is some other M_PROTO that looks 711 * like a TPI message sent by some other kernel 712 * component, we check and return an error. 713 */ 714 cr = msg_getcred(mp, NULL); 715 ASSERT(cr != NULL); 716 if (cr == NULL) { 717 udp_err_ack(q, mp, TSYSERR, EINVAL); 718 return; 719 } 720 721 udp = connp->conn_udp; 722 tcr = (struct T_conn_req *)mp->b_rptr; 723 724 /* A bit of sanity checking */ 725 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 726 udp_err_ack(q, mp, TPROTO, 0); 727 return; 728 } 729 730 if (tcr->OPT_length != 0) { 731 udp_err_ack(q, mp, TBADOPT, 0); 732 return; 733 } 734 735 /* 736 * Determine packet type based on type of address passed in 737 * the request should contain an IPv4 or IPv6 address. 738 * Make sure that address family matches the type of 739 * family of the the address passed down 740 */ 741 len = tcr->DEST_length; 742 switch (tcr->DEST_length) { 743 default: 744 udp_err_ack(q, mp, TBADADDR, 0); 745 return; 746 747 case sizeof (sin_t): 748 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 749 sizeof (sin_t)); 750 break; 751 752 case sizeof (sin6_t): 753 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 754 sizeof (sin6_t)); 755 break; 756 } 757 758 error = proto_verify_ip_addr(udp->udp_family, sa, len); 759 if (error != 0) { 760 udp_err_ack(q, mp, TSYSERR, error); 761 return; 762 } 763 764 error = udp_do_connect(connp, sa, len, cr); 765 if (error != 0) { 766 if (error < 0) 767 udp_err_ack(q, mp, -error, 0); 768 else 769 udp_err_ack(q, mp, TSYSERR, error); 770 } else { 771 mblk_t *mp1; 772 /* 773 * We have to send a connection confirmation to 774 * keep TLI happy. 775 */ 776 if (udp->udp_family == AF_INET) { 777 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 778 sizeof (sin_t), NULL, 0); 779 } else { 780 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 781 sizeof (sin6_t), NULL, 0); 782 } 783 if (mp1 == NULL) { 784 udp_err_ack(q, mp, TSYSERR, ENOMEM); 785 return; 786 } 787 788 /* 789 * Send ok_ack for T_CONN_REQ 790 */ 791 mp = mi_tpi_ok_ack_alloc(mp); 792 if (mp == NULL) { 793 /* Unable to reuse the T_CONN_REQ for the ack. */ 794 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 795 return; 796 } 797 798 putnext(connp->conn_rq, mp); 799 putnext(connp->conn_rq, mp1); 800 } 801 } 802 803 static int 804 udp_tpi_close(queue_t *q, int flags) 805 { 806 conn_t *connp; 807 808 if (flags & SO_FALLBACK) { 809 /* 810 * stream is being closed while in fallback 811 * simply free the resources that were allocated 812 */ 813 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 814 qprocsoff(q); 815 goto done; 816 } 817 818 connp = Q_TO_CONN(q); 819 udp_do_close(connp); 820 done: 821 q->q_ptr = WR(q)->q_ptr = NULL; 822 return (0); 823 } 824 825 /* 826 * Called in the close path to quiesce the conn 827 */ 828 void 829 udp_quiesce_conn(conn_t *connp) 830 { 831 udp_t *udp = connp->conn_udp; 832 833 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 834 /* 835 * Running in cluster mode - register unbind information 836 */ 837 if (udp->udp_ipversion == IPV4_VERSION) { 838 (*cl_inet_unbind)( 839 connp->conn_netstack->netstack_stackid, 840 IPPROTO_UDP, AF_INET, 841 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 842 (in_port_t)udp->udp_port, NULL); 843 } else { 844 (*cl_inet_unbind)( 845 connp->conn_netstack->netstack_stackid, 846 IPPROTO_UDP, AF_INET6, 847 (uint8_t *)(&(udp->udp_v6src)), 848 (in_port_t)udp->udp_port, NULL); 849 } 850 } 851 852 udp_bind_hash_remove(udp, B_FALSE); 853 854 } 855 856 void 857 udp_close_free(conn_t *connp) 858 { 859 udp_t *udp = connp->conn_udp; 860 861 /* If there are any options associated with the stream, free them. */ 862 if (udp->udp_ip_snd_options != NULL) { 863 mi_free((char *)udp->udp_ip_snd_options); 864 udp->udp_ip_snd_options = NULL; 865 udp->udp_ip_snd_options_len = 0; 866 } 867 868 if (udp->udp_ip_rcv_options != NULL) { 869 mi_free((char *)udp->udp_ip_rcv_options); 870 udp->udp_ip_rcv_options = NULL; 871 udp->udp_ip_rcv_options_len = 0; 872 } 873 874 /* Free memory associated with sticky options */ 875 if (udp->udp_sticky_hdrs_len != 0) { 876 kmem_free(udp->udp_sticky_hdrs, 877 udp->udp_sticky_hdrs_len); 878 udp->udp_sticky_hdrs = NULL; 879 udp->udp_sticky_hdrs_len = 0; 880 } 881 882 ip6_pkt_free(&udp->udp_sticky_ipp); 883 884 /* 885 * Clear any fields which the kmem_cache constructor clears. 886 * Only udp_connp needs to be preserved. 887 * TBD: We should make this more efficient to avoid clearing 888 * everything. 889 */ 890 ASSERT(udp->udp_connp == connp); 891 bzero(udp, sizeof (udp_t)); 892 udp->udp_connp = connp; 893 } 894 895 static int 896 udp_do_disconnect(conn_t *connp) 897 { 898 udp_t *udp; 899 mblk_t *ire_mp; 900 udp_fanout_t *udpf; 901 udp_stack_t *us; 902 int error; 903 904 udp = connp->conn_udp; 905 us = udp->udp_us; 906 rw_enter(&udp->udp_rwlock, RW_WRITER); 907 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 908 rw_exit(&udp->udp_rwlock); 909 return (-TOUTSTATE); 910 } 911 udp->udp_pending_op = T_DISCON_REQ; 912 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 913 us->us_bind_fanout_size)]; 914 mutex_enter(&udpf->uf_lock); 915 udp->udp_v6src = udp->udp_bound_v6src; 916 udp->udp_state = TS_IDLE; 917 mutex_exit(&udpf->uf_lock); 918 919 if (udp->udp_family == AF_INET6) { 920 /* Rebuild the header template */ 921 error = udp_build_hdrs(udp); 922 if (error != 0) { 923 udp->udp_pending_op = -1; 924 rw_exit(&udp->udp_rwlock); 925 return (error); 926 } 927 } 928 929 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 930 if (ire_mp == NULL) { 931 mutex_enter(&udpf->uf_lock); 932 udp->udp_pending_op = -1; 933 mutex_exit(&udpf->uf_lock); 934 rw_exit(&udp->udp_rwlock); 935 return (ENOMEM); 936 } 937 938 rw_exit(&udp->udp_rwlock); 939 940 if (udp->udp_family == AF_INET6) { 941 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 942 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 943 } else { 944 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 945 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 946 } 947 948 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 949 } 950 951 952 static void 953 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 954 { 955 conn_t *connp = Q_TO_CONN(q); 956 int error; 957 958 /* 959 * Allocate the largest primitive we need to send back 960 * T_error_ack is > than T_ok_ack 961 */ 962 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 963 if (mp == NULL) { 964 /* Unable to reuse the T_DISCON_REQ for the ack. */ 965 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 966 return; 967 } 968 969 error = udp_do_disconnect(connp); 970 971 if (error != 0) { 972 if (error < 0) { 973 udp_err_ack(q, mp, -error, 0); 974 } else { 975 udp_err_ack(q, mp, TSYSERR, error); 976 } 977 } else { 978 mp = mi_tpi_ok_ack_alloc(mp); 979 ASSERT(mp != NULL); 980 qreply(q, mp); 981 } 982 } 983 984 int 985 udp_disconnect(conn_t *connp) 986 { 987 int error; 988 udp_t *udp = connp->conn_udp; 989 990 udp->udp_dgram_errind = B_FALSE; 991 992 error = udp_do_disconnect(connp); 993 994 if (error < 0) 995 error = proto_tlitosyserr(-error); 996 997 return (error); 998 } 999 1000 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1001 static void 1002 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1003 { 1004 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1005 qreply(q, mp); 1006 } 1007 1008 /* Shorthand to generate and send TPI error acks to our client */ 1009 static void 1010 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1011 int sys_error) 1012 { 1013 struct T_error_ack *teackp; 1014 1015 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1016 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1017 teackp = (struct T_error_ack *)mp->b_rptr; 1018 teackp->ERROR_prim = primitive; 1019 teackp->TLI_error = t_error; 1020 teackp->UNIX_error = sys_error; 1021 qreply(q, mp); 1022 } 1023 } 1024 1025 /*ARGSUSED*/ 1026 static int 1027 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1028 { 1029 int i; 1030 udp_t *udp = Q_TO_UDP(q); 1031 udp_stack_t *us = udp->udp_us; 1032 1033 for (i = 0; i < us->us_num_epriv_ports; i++) { 1034 if (us->us_epriv_ports[i] != 0) 1035 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1036 } 1037 return (0); 1038 } 1039 1040 /* ARGSUSED */ 1041 static int 1042 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1043 cred_t *cr) 1044 { 1045 long new_value; 1046 int i; 1047 udp_t *udp = Q_TO_UDP(q); 1048 udp_stack_t *us = udp->udp_us; 1049 1050 /* 1051 * Fail the request if the new value does not lie within the 1052 * port number limits. 1053 */ 1054 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1055 new_value <= 0 || new_value >= 65536) { 1056 return (EINVAL); 1057 } 1058 1059 /* Check if the value is already in the list */ 1060 for (i = 0; i < us->us_num_epriv_ports; i++) { 1061 if (new_value == us->us_epriv_ports[i]) { 1062 return (EEXIST); 1063 } 1064 } 1065 /* Find an empty slot */ 1066 for (i = 0; i < us->us_num_epriv_ports; i++) { 1067 if (us->us_epriv_ports[i] == 0) 1068 break; 1069 } 1070 if (i == us->us_num_epriv_ports) { 1071 return (EOVERFLOW); 1072 } 1073 1074 /* Set the new value */ 1075 us->us_epriv_ports[i] = (in_port_t)new_value; 1076 return (0); 1077 } 1078 1079 /* ARGSUSED */ 1080 static int 1081 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1082 cred_t *cr) 1083 { 1084 long new_value; 1085 int i; 1086 udp_t *udp = Q_TO_UDP(q); 1087 udp_stack_t *us = udp->udp_us; 1088 1089 /* 1090 * Fail the request if the new value does not lie within the 1091 * port number limits. 1092 */ 1093 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1094 new_value <= 0 || new_value >= 65536) { 1095 return (EINVAL); 1096 } 1097 1098 /* Check that the value is already in the list */ 1099 for (i = 0; i < us->us_num_epriv_ports; i++) { 1100 if (us->us_epriv_ports[i] == new_value) 1101 break; 1102 } 1103 if (i == us->us_num_epriv_ports) { 1104 return (ESRCH); 1105 } 1106 1107 /* Clear the value */ 1108 us->us_epriv_ports[i] = 0; 1109 return (0); 1110 } 1111 1112 /* At minimum we need 4 bytes of UDP header */ 1113 #define ICMP_MIN_UDP_HDR 4 1114 1115 /* 1116 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1117 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1118 * Assumes that IP has pulled up everything up to and including the ICMP header. 1119 */ 1120 static void 1121 udp_icmp_error(conn_t *connp, mblk_t *mp) 1122 { 1123 icmph_t *icmph; 1124 ipha_t *ipha; 1125 int iph_hdr_length; 1126 udpha_t *udpha; 1127 sin_t sin; 1128 sin6_t sin6; 1129 mblk_t *mp1; 1130 int error = 0; 1131 udp_t *udp = connp->conn_udp; 1132 1133 mp1 = NULL; 1134 ipha = (ipha_t *)mp->b_rptr; 1135 1136 ASSERT(OK_32PTR(mp->b_rptr)); 1137 1138 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1139 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1140 udp_icmp_error_ipv6(connp, mp); 1141 return; 1142 } 1143 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1144 1145 /* Skip past the outer IP and ICMP headers */ 1146 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1147 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1148 ipha = (ipha_t *)&icmph[1]; 1149 1150 /* Skip past the inner IP and find the ULP header */ 1151 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1152 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1153 1154 switch (icmph->icmph_type) { 1155 case ICMP_DEST_UNREACHABLE: 1156 switch (icmph->icmph_code) { 1157 case ICMP_FRAGMENTATION_NEEDED: 1158 /* 1159 * IP has already adjusted the path MTU. 1160 */ 1161 break; 1162 case ICMP_PORT_UNREACHABLE: 1163 case ICMP_PROTOCOL_UNREACHABLE: 1164 error = ECONNREFUSED; 1165 break; 1166 default: 1167 /* Transient errors */ 1168 break; 1169 } 1170 break; 1171 default: 1172 /* Transient errors */ 1173 break; 1174 } 1175 if (error == 0) { 1176 freemsg(mp); 1177 return; 1178 } 1179 1180 /* 1181 * Deliver T_UDERROR_IND when the application has asked for it. 1182 * The socket layer enables this automatically when connected. 1183 */ 1184 if (!udp->udp_dgram_errind) { 1185 freemsg(mp); 1186 return; 1187 } 1188 1189 1190 switch (udp->udp_family) { 1191 case AF_INET: 1192 sin = sin_null; 1193 sin.sin_family = AF_INET; 1194 sin.sin_addr.s_addr = ipha->ipha_dst; 1195 sin.sin_port = udpha->uha_dst_port; 1196 if (IPCL_IS_NONSTR(connp)) { 1197 rw_enter(&udp->udp_rwlock, RW_WRITER); 1198 if (udp->udp_state == TS_DATA_XFER) { 1199 if (sin.sin_port == udp->udp_dstport && 1200 sin.sin_addr.s_addr == 1201 V4_PART_OF_V6(udp->udp_v6dst)) { 1202 rw_exit(&udp->udp_rwlock); 1203 (*connp->conn_upcalls->su_set_error) 1204 (connp->conn_upper_handle, error); 1205 goto done; 1206 } 1207 } else { 1208 udp->udp_delayed_error = error; 1209 *((sin_t *)&udp->udp_delayed_addr) = sin; 1210 } 1211 rw_exit(&udp->udp_rwlock); 1212 } else { 1213 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1214 NULL, 0, error); 1215 } 1216 break; 1217 case AF_INET6: 1218 sin6 = sin6_null; 1219 sin6.sin6_family = AF_INET6; 1220 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1221 sin6.sin6_port = udpha->uha_dst_port; 1222 if (IPCL_IS_NONSTR(connp)) { 1223 rw_enter(&udp->udp_rwlock, RW_WRITER); 1224 if (udp->udp_state == TS_DATA_XFER) { 1225 if (sin6.sin6_port == udp->udp_dstport && 1226 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1227 &udp->udp_v6dst)) { 1228 rw_exit(&udp->udp_rwlock); 1229 (*connp->conn_upcalls->su_set_error) 1230 (connp->conn_upper_handle, error); 1231 goto done; 1232 } 1233 } else { 1234 udp->udp_delayed_error = error; 1235 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1236 } 1237 rw_exit(&udp->udp_rwlock); 1238 } else { 1239 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1240 NULL, 0, error); 1241 } 1242 break; 1243 } 1244 if (mp1 != NULL) 1245 putnext(connp->conn_rq, mp1); 1246 done: 1247 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1248 freemsg(mp); 1249 } 1250 1251 /* 1252 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1253 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1254 * Assumes that IP has pulled up all the extension headers as well as the 1255 * ICMPv6 header. 1256 */ 1257 static void 1258 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1259 { 1260 icmp6_t *icmp6; 1261 ip6_t *ip6h, *outer_ip6h; 1262 uint16_t iph_hdr_length; 1263 uint8_t *nexthdrp; 1264 udpha_t *udpha; 1265 sin6_t sin6; 1266 mblk_t *mp1; 1267 int error = 0; 1268 udp_t *udp = connp->conn_udp; 1269 udp_stack_t *us = udp->udp_us; 1270 1271 outer_ip6h = (ip6_t *)mp->b_rptr; 1272 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1273 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1274 else 1275 iph_hdr_length = IPV6_HDR_LEN; 1276 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1277 ip6h = (ip6_t *)&icmp6[1]; 1278 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1279 freemsg(mp); 1280 return; 1281 } 1282 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1283 1284 switch (icmp6->icmp6_type) { 1285 case ICMP6_DST_UNREACH: 1286 switch (icmp6->icmp6_code) { 1287 case ICMP6_DST_UNREACH_NOPORT: 1288 error = ECONNREFUSED; 1289 break; 1290 case ICMP6_DST_UNREACH_ADMIN: 1291 case ICMP6_DST_UNREACH_NOROUTE: 1292 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1293 case ICMP6_DST_UNREACH_ADDR: 1294 /* Transient errors */ 1295 break; 1296 default: 1297 break; 1298 } 1299 break; 1300 case ICMP6_PACKET_TOO_BIG: { 1301 struct T_unitdata_ind *tudi; 1302 struct T_opthdr *toh; 1303 size_t udi_size; 1304 mblk_t *newmp; 1305 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1306 sizeof (struct ip6_mtuinfo); 1307 sin6_t *sin6; 1308 struct ip6_mtuinfo *mtuinfo; 1309 1310 /* 1311 * If the application has requested to receive path mtu 1312 * information, send up an empty message containing an 1313 * IPV6_PATHMTU ancillary data item. 1314 */ 1315 if (!udp->udp_ipv6_recvpathmtu) 1316 break; 1317 1318 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1319 opt_length; 1320 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1321 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1322 break; 1323 } 1324 1325 /* 1326 * newmp->b_cont is left to NULL on purpose. This is an 1327 * empty message containing only ancillary data. 1328 */ 1329 newmp->b_datap->db_type = M_PROTO; 1330 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1331 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1332 tudi->PRIM_type = T_UNITDATA_IND; 1333 tudi->SRC_length = sizeof (sin6_t); 1334 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1335 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1336 tudi->OPT_length = opt_length; 1337 1338 sin6 = (sin6_t *)&tudi[1]; 1339 bzero(sin6, sizeof (sin6_t)); 1340 sin6->sin6_family = AF_INET6; 1341 sin6->sin6_addr = udp->udp_v6dst; 1342 1343 toh = (struct T_opthdr *)&sin6[1]; 1344 toh->level = IPPROTO_IPV6; 1345 toh->name = IPV6_PATHMTU; 1346 toh->len = opt_length; 1347 toh->status = 0; 1348 1349 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1350 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1351 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1352 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1353 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1354 /* 1355 * We've consumed everything we need from the original 1356 * message. Free it, then send our empty message. 1357 */ 1358 freemsg(mp); 1359 udp_ulp_recv(connp, newmp); 1360 1361 return; 1362 } 1363 case ICMP6_TIME_EXCEEDED: 1364 /* Transient errors */ 1365 break; 1366 case ICMP6_PARAM_PROB: 1367 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1368 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1369 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1370 (uchar_t *)nexthdrp) { 1371 error = ECONNREFUSED; 1372 break; 1373 } 1374 break; 1375 } 1376 if (error == 0) { 1377 freemsg(mp); 1378 return; 1379 } 1380 1381 /* 1382 * Deliver T_UDERROR_IND when the application has asked for it. 1383 * The socket layer enables this automatically when connected. 1384 */ 1385 if (!udp->udp_dgram_errind) { 1386 freemsg(mp); 1387 return; 1388 } 1389 1390 sin6 = sin6_null; 1391 sin6.sin6_family = AF_INET6; 1392 sin6.sin6_addr = ip6h->ip6_dst; 1393 sin6.sin6_port = udpha->uha_dst_port; 1394 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1395 1396 if (IPCL_IS_NONSTR(connp)) { 1397 rw_enter(&udp->udp_rwlock, RW_WRITER); 1398 if (udp->udp_state == TS_DATA_XFER) { 1399 if (sin6.sin6_port == udp->udp_dstport && 1400 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1401 &udp->udp_v6dst)) { 1402 rw_exit(&udp->udp_rwlock); 1403 (*connp->conn_upcalls->su_set_error) 1404 (connp->conn_upper_handle, error); 1405 goto done; 1406 } 1407 } else { 1408 udp->udp_delayed_error = error; 1409 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1410 } 1411 rw_exit(&udp->udp_rwlock); 1412 } else { 1413 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1414 NULL, 0, error); 1415 if (mp1 != NULL) 1416 putnext(connp->conn_rq, mp1); 1417 } 1418 done: 1419 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1420 freemsg(mp); 1421 } 1422 1423 /* 1424 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1425 * The local address is filled in if endpoint is bound. The remote address 1426 * is filled in if remote address has been precified ("connected endpoint") 1427 * (The concept of connected CLTS sockets is alien to published TPI 1428 * but we support it anyway). 1429 */ 1430 static void 1431 udp_addr_req(queue_t *q, mblk_t *mp) 1432 { 1433 sin_t *sin; 1434 sin6_t *sin6; 1435 mblk_t *ackmp; 1436 struct T_addr_ack *taa; 1437 udp_t *udp = Q_TO_UDP(q); 1438 1439 /* Make it large enough for worst case */ 1440 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1441 2 * sizeof (sin6_t), 1); 1442 if (ackmp == NULL) { 1443 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1444 return; 1445 } 1446 taa = (struct T_addr_ack *)ackmp->b_rptr; 1447 1448 bzero(taa, sizeof (struct T_addr_ack)); 1449 ackmp->b_wptr = (uchar_t *)&taa[1]; 1450 1451 taa->PRIM_type = T_ADDR_ACK; 1452 ackmp->b_datap->db_type = M_PCPROTO; 1453 rw_enter(&udp->udp_rwlock, RW_READER); 1454 /* 1455 * Note: Following code assumes 32 bit alignment of basic 1456 * data structures like sin_t and struct T_addr_ack. 1457 */ 1458 if (udp->udp_state != TS_UNBND) { 1459 /* 1460 * Fill in local address first 1461 */ 1462 taa->LOCADDR_offset = sizeof (*taa); 1463 if (udp->udp_family == AF_INET) { 1464 taa->LOCADDR_length = sizeof (sin_t); 1465 sin = (sin_t *)&taa[1]; 1466 /* Fill zeroes and then initialize non-zero fields */ 1467 *sin = sin_null; 1468 sin->sin_family = AF_INET; 1469 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1470 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1471 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1472 sin->sin_addr.s_addr); 1473 } else { 1474 /* 1475 * INADDR_ANY 1476 * udp_v6src is not set, we might be bound to 1477 * broadcast/multicast. Use udp_bound_v6src as 1478 * local address instead (that could 1479 * also still be INADDR_ANY) 1480 */ 1481 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1482 sin->sin_addr.s_addr); 1483 } 1484 sin->sin_port = udp->udp_port; 1485 ackmp->b_wptr = (uchar_t *)&sin[1]; 1486 if (udp->udp_state == TS_DATA_XFER) { 1487 /* 1488 * connected, fill remote address too 1489 */ 1490 taa->REMADDR_length = sizeof (sin_t); 1491 /* assumed 32-bit alignment */ 1492 taa->REMADDR_offset = taa->LOCADDR_offset + 1493 taa->LOCADDR_length; 1494 1495 sin = (sin_t *)(ackmp->b_rptr + 1496 taa->REMADDR_offset); 1497 /* initialize */ 1498 *sin = sin_null; 1499 sin->sin_family = AF_INET; 1500 sin->sin_addr.s_addr = 1501 V4_PART_OF_V6(udp->udp_v6dst); 1502 sin->sin_port = udp->udp_dstport; 1503 ackmp->b_wptr = (uchar_t *)&sin[1]; 1504 } 1505 } else { 1506 taa->LOCADDR_length = sizeof (sin6_t); 1507 sin6 = (sin6_t *)&taa[1]; 1508 /* Fill zeroes and then initialize non-zero fields */ 1509 *sin6 = sin6_null; 1510 sin6->sin6_family = AF_INET6; 1511 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1512 sin6->sin6_addr = udp->udp_v6src; 1513 } else { 1514 /* 1515 * UNSPECIFIED 1516 * udp_v6src is not set, we might be bound to 1517 * broadcast/multicast. Use udp_bound_v6src as 1518 * local address instead (that could 1519 * also still be UNSPECIFIED) 1520 */ 1521 sin6->sin6_addr = 1522 udp->udp_bound_v6src; 1523 } 1524 sin6->sin6_port = udp->udp_port; 1525 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1526 if (udp->udp_state == TS_DATA_XFER) { 1527 /* 1528 * connected, fill remote address too 1529 */ 1530 taa->REMADDR_length = sizeof (sin6_t); 1531 /* assumed 32-bit alignment */ 1532 taa->REMADDR_offset = taa->LOCADDR_offset + 1533 taa->LOCADDR_length; 1534 1535 sin6 = (sin6_t *)(ackmp->b_rptr + 1536 taa->REMADDR_offset); 1537 /* initialize */ 1538 *sin6 = sin6_null; 1539 sin6->sin6_family = AF_INET6; 1540 sin6->sin6_addr = udp->udp_v6dst; 1541 sin6->sin6_port = udp->udp_dstport; 1542 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1543 } 1544 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1545 } 1546 } 1547 rw_exit(&udp->udp_rwlock); 1548 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1549 qreply(q, ackmp); 1550 } 1551 1552 static void 1553 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1554 { 1555 if (udp->udp_family == AF_INET) { 1556 *tap = udp_g_t_info_ack_ipv4; 1557 } else { 1558 *tap = udp_g_t_info_ack_ipv6; 1559 } 1560 tap->CURRENT_state = udp->udp_state; 1561 tap->OPT_size = udp_max_optsize; 1562 } 1563 1564 static void 1565 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1566 t_uscalar_t cap_bits1) 1567 { 1568 tcap->CAP_bits1 = 0; 1569 1570 if (cap_bits1 & TC1_INFO) { 1571 udp_copy_info(&tcap->INFO_ack, udp); 1572 tcap->CAP_bits1 |= TC1_INFO; 1573 } 1574 } 1575 1576 /* 1577 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1578 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1579 * udp_g_t_info_ack. The current state of the stream is copied from 1580 * udp_state. 1581 */ 1582 static void 1583 udp_capability_req(queue_t *q, mblk_t *mp) 1584 { 1585 t_uscalar_t cap_bits1; 1586 struct T_capability_ack *tcap; 1587 udp_t *udp = Q_TO_UDP(q); 1588 1589 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1590 1591 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1592 mp->b_datap->db_type, T_CAPABILITY_ACK); 1593 if (!mp) 1594 return; 1595 1596 tcap = (struct T_capability_ack *)mp->b_rptr; 1597 udp_do_capability_ack(udp, tcap, cap_bits1); 1598 1599 qreply(q, mp); 1600 } 1601 1602 /* 1603 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1604 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1605 * The current state of the stream is copied from udp_state. 1606 */ 1607 static void 1608 udp_info_req(queue_t *q, mblk_t *mp) 1609 { 1610 udp_t *udp = Q_TO_UDP(q); 1611 1612 /* Create a T_INFO_ACK message. */ 1613 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1614 T_INFO_ACK); 1615 if (!mp) 1616 return; 1617 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1618 qreply(q, mp); 1619 } 1620 1621 /* 1622 * IP recognizes seven kinds of bind requests: 1623 * 1624 * - A zero-length address binds only to the protocol number. 1625 * 1626 * - A 4-byte address is treated as a request to 1627 * validate that the address is a valid local IPv4 1628 * address, appropriate for an application to bind to. 1629 * IP does the verification, but does not make any note 1630 * of the address at this time. 1631 * 1632 * - A 16-byte address contains is treated as a request 1633 * to validate a local IPv6 address, as the 4-byte 1634 * address case above. 1635 * 1636 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1637 * use it for the inbound fanout of packets. 1638 * 1639 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1640 * use it for the inbound fanout of packets. 1641 * 1642 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1643 * information consisting of local and remote addresses 1644 * and ports. In this case, the addresses are both 1645 * validated as appropriate for this operation, and, if 1646 * so, the information is retained for use in the 1647 * inbound fanout. 1648 * 1649 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1650 * fanout information, like the 12-byte case above. 1651 * 1652 * IP will also fill in the IRE request mblk with information 1653 * regarding our peer. In all cases, we notify IP of our protocol 1654 * type by appending a single protocol byte to the bind request. 1655 */ 1656 static mblk_t * 1657 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1658 { 1659 char *cp; 1660 mblk_t *mp; 1661 struct T_bind_req *tbr; 1662 ipa_conn_t *ac; 1663 ipa6_conn_t *ac6; 1664 sin_t *sin; 1665 sin6_t *sin6; 1666 1667 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1668 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1669 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1670 if (!mp) 1671 return (mp); 1672 mp->b_datap->db_type = M_PROTO; 1673 tbr = (struct T_bind_req *)mp->b_rptr; 1674 tbr->PRIM_type = bind_prim; 1675 tbr->ADDR_offset = sizeof (*tbr); 1676 tbr->CONIND_number = 0; 1677 tbr->ADDR_length = addr_length; 1678 cp = (char *)&tbr[1]; 1679 switch (addr_length) { 1680 case sizeof (ipa_conn_t): 1681 ASSERT(udp->udp_family == AF_INET); 1682 /* Append a request for an IRE */ 1683 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1684 if (!mp->b_cont) { 1685 freemsg(mp); 1686 return (NULL); 1687 } 1688 mp->b_cont->b_wptr += sizeof (ire_t); 1689 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1690 1691 /* cp known to be 32 bit aligned */ 1692 ac = (ipa_conn_t *)cp; 1693 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1694 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1695 ac->ac_fport = udp->udp_dstport; 1696 ac->ac_lport = udp->udp_port; 1697 break; 1698 1699 case sizeof (ipa6_conn_t): 1700 ASSERT(udp->udp_family == AF_INET6); 1701 /* Append a request for an IRE */ 1702 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1703 if (!mp->b_cont) { 1704 freemsg(mp); 1705 return (NULL); 1706 } 1707 mp->b_cont->b_wptr += sizeof (ire_t); 1708 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1709 1710 /* cp known to be 32 bit aligned */ 1711 ac6 = (ipa6_conn_t *)cp; 1712 ac6->ac6_laddr = udp->udp_v6src; 1713 ac6->ac6_faddr = udp->udp_v6dst; 1714 ac6->ac6_fport = udp->udp_dstport; 1715 ac6->ac6_lport = udp->udp_port; 1716 break; 1717 1718 case sizeof (sin_t): 1719 ASSERT(udp->udp_family == AF_INET); 1720 /* Append a request for an IRE */ 1721 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1722 if (!mp->b_cont) { 1723 freemsg(mp); 1724 return (NULL); 1725 } 1726 mp->b_cont->b_wptr += sizeof (ire_t); 1727 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1728 1729 sin = (sin_t *)cp; 1730 *sin = sin_null; 1731 sin->sin_family = AF_INET; 1732 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1733 sin->sin_port = udp->udp_port; 1734 break; 1735 1736 case sizeof (sin6_t): 1737 ASSERT(udp->udp_family == AF_INET6); 1738 /* Append a request for an IRE */ 1739 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1740 if (!mp->b_cont) { 1741 freemsg(mp); 1742 return (NULL); 1743 } 1744 mp->b_cont->b_wptr += sizeof (ire_t); 1745 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1746 1747 sin6 = (sin6_t *)cp; 1748 *sin6 = sin6_null; 1749 sin6->sin6_family = AF_INET6; 1750 sin6->sin6_addr = udp->udp_bound_v6src; 1751 sin6->sin6_port = udp->udp_port; 1752 break; 1753 } 1754 /* Add protocol number to end */ 1755 cp[addr_length] = (char)IPPROTO_UDP; 1756 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1757 return (mp); 1758 } 1759 1760 /* For /dev/udp aka AF_INET open */ 1761 static int 1762 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1763 { 1764 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1765 } 1766 1767 /* For /dev/udp6 aka AF_INET6 open */ 1768 static int 1769 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1770 { 1771 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1772 } 1773 1774 /* 1775 * This is the open routine for udp. It allocates a udp_t structure for 1776 * the stream and, on the first open of the module, creates an ND table. 1777 */ 1778 /*ARGSUSED2*/ 1779 static int 1780 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1781 boolean_t isv6) 1782 { 1783 int error; 1784 udp_t *udp; 1785 conn_t *connp; 1786 dev_t conn_dev; 1787 udp_stack_t *us; 1788 vmem_t *minor_arena; 1789 1790 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1791 1792 /* If the stream is already open, return immediately. */ 1793 if (q->q_ptr != NULL) 1794 return (0); 1795 1796 if (sflag == MODOPEN) 1797 return (EINVAL); 1798 1799 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1800 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1801 minor_arena = ip_minor_arena_la; 1802 } else { 1803 /* 1804 * Either minor numbers in the large arena were exhausted 1805 * or a non socket application is doing the open. 1806 * Try to allocate from the small arena. 1807 */ 1808 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1809 return (EBUSY); 1810 1811 minor_arena = ip_minor_arena_sa; 1812 } 1813 1814 if (flag & SO_FALLBACK) { 1815 /* 1816 * Non streams socket needs a stream to fallback to 1817 */ 1818 RD(q)->q_ptr = (void *)conn_dev; 1819 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1820 WR(q)->q_ptr = (void *)minor_arena; 1821 qprocson(q); 1822 return (0); 1823 } 1824 1825 connp = udp_do_open(credp, isv6, KM_SLEEP); 1826 if (connp == NULL) { 1827 inet_minor_free(minor_arena, conn_dev); 1828 return (ENOMEM); 1829 } 1830 udp = connp->conn_udp; 1831 us = udp->udp_us; 1832 1833 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1834 connp->conn_dev = conn_dev; 1835 connp->conn_minor_arena = minor_arena; 1836 1837 /* 1838 * Initialize the udp_t structure for this stream. 1839 */ 1840 q->q_ptr = connp; 1841 WR(q)->q_ptr = connp; 1842 connp->conn_rq = q; 1843 connp->conn_wq = WR(q); 1844 1845 rw_enter(&udp->udp_rwlock, RW_WRITER); 1846 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1847 ASSERT(connp->conn_udp == udp); 1848 ASSERT(udp->udp_connp == connp); 1849 1850 if (flag & SO_SOCKSTR) { 1851 connp->conn_flags |= IPCL_SOCKET; 1852 udp->udp_issocket = B_TRUE; 1853 udp->udp_direct_sockfs = B_TRUE; 1854 } 1855 1856 q->q_hiwat = us->us_recv_hiwat; 1857 WR(q)->q_hiwat = us->us_xmit_hiwat; 1858 WR(q)->q_lowat = us->us_xmit_lowat; 1859 1860 qprocson(q); 1861 1862 if (udp->udp_family == AF_INET6) { 1863 /* Build initial header template for transmit */ 1864 if ((error = udp_build_hdrs(udp)) != 0) { 1865 rw_exit(&udp->udp_rwlock); 1866 qprocsoff(q); 1867 inet_minor_free(minor_arena, conn_dev); 1868 ipcl_conn_destroy(connp); 1869 return (error); 1870 } 1871 } 1872 rw_exit(&udp->udp_rwlock); 1873 1874 /* Set the Stream head write offset and high watermark. */ 1875 (void) proto_set_tx_wroff(q, connp, 1876 udp->udp_max_hdr_len + us->us_wroff_extra); 1877 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1878 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1879 1880 mutex_enter(&connp->conn_lock); 1881 connp->conn_state_flags &= ~CONN_INCIPIENT; 1882 mutex_exit(&connp->conn_lock); 1883 return (0); 1884 } 1885 1886 /* 1887 * Which UDP options OK to set through T_UNITDATA_REQ... 1888 */ 1889 /* ARGSUSED */ 1890 static boolean_t 1891 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1892 { 1893 return (B_TRUE); 1894 } 1895 1896 /* 1897 * This routine gets default values of certain options whose default 1898 * values are maintained by protcol specific code 1899 */ 1900 /* ARGSUSED */ 1901 int 1902 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1903 { 1904 udp_t *udp = Q_TO_UDP(q); 1905 udp_stack_t *us = udp->udp_us; 1906 int *i1 = (int *)ptr; 1907 1908 switch (level) { 1909 case IPPROTO_IP: 1910 switch (name) { 1911 case IP_MULTICAST_TTL: 1912 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1913 return (sizeof (uchar_t)); 1914 case IP_MULTICAST_LOOP: 1915 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1916 return (sizeof (uchar_t)); 1917 } 1918 break; 1919 case IPPROTO_IPV6: 1920 switch (name) { 1921 case IPV6_MULTICAST_HOPS: 1922 *i1 = IP_DEFAULT_MULTICAST_TTL; 1923 return (sizeof (int)); 1924 case IPV6_MULTICAST_LOOP: 1925 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1926 return (sizeof (int)); 1927 case IPV6_UNICAST_HOPS: 1928 *i1 = us->us_ipv6_hoplimit; 1929 return (sizeof (int)); 1930 } 1931 break; 1932 } 1933 return (-1); 1934 } 1935 1936 /* 1937 * This routine retrieves the current status of socket options. 1938 * It returns the size of the option retrieved. 1939 */ 1940 static int 1941 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1942 { 1943 udp_t *udp = connp->conn_udp; 1944 udp_stack_t *us = udp->udp_us; 1945 int *i1 = (int *)ptr; 1946 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1947 int len; 1948 1949 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1950 switch (level) { 1951 case SOL_SOCKET: 1952 switch (name) { 1953 case SO_DEBUG: 1954 *i1 = udp->udp_debug; 1955 break; /* goto sizeof (int) option return */ 1956 case SO_REUSEADDR: 1957 *i1 = udp->udp_reuseaddr; 1958 break; /* goto sizeof (int) option return */ 1959 case SO_TYPE: 1960 *i1 = SOCK_DGRAM; 1961 break; /* goto sizeof (int) option return */ 1962 1963 /* 1964 * The following three items are available here, 1965 * but are only meaningful to IP. 1966 */ 1967 case SO_DONTROUTE: 1968 *i1 = udp->udp_dontroute; 1969 break; /* goto sizeof (int) option return */ 1970 case SO_USELOOPBACK: 1971 *i1 = udp->udp_useloopback; 1972 break; /* goto sizeof (int) option return */ 1973 case SO_BROADCAST: 1974 *i1 = udp->udp_broadcast; 1975 break; /* goto sizeof (int) option return */ 1976 1977 case SO_SNDBUF: 1978 *i1 = udp->udp_xmit_hiwat; 1979 break; /* goto sizeof (int) option return */ 1980 case SO_RCVBUF: 1981 *i1 = udp->udp_rcv_disply_hiwat; 1982 break; /* goto sizeof (int) option return */ 1983 case SO_DGRAM_ERRIND: 1984 *i1 = udp->udp_dgram_errind; 1985 break; /* goto sizeof (int) option return */ 1986 case SO_RECVUCRED: 1987 *i1 = udp->udp_recvucred; 1988 break; /* goto sizeof (int) option return */ 1989 case SO_TIMESTAMP: 1990 *i1 = udp->udp_timestamp; 1991 break; /* goto sizeof (int) option return */ 1992 case SO_ANON_MLP: 1993 *i1 = connp->conn_anon_mlp; 1994 break; /* goto sizeof (int) option return */ 1995 case SO_MAC_EXEMPT: 1996 *i1 = connp->conn_mac_exempt; 1997 break; /* goto sizeof (int) option return */ 1998 case SO_ALLZONES: 1999 *i1 = connp->conn_allzones; 2000 break; /* goto sizeof (int) option return */ 2001 case SO_EXCLBIND: 2002 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2003 break; 2004 case SO_PROTOTYPE: 2005 *i1 = IPPROTO_UDP; 2006 break; 2007 case SO_DOMAIN: 2008 *i1 = udp->udp_family; 2009 break; 2010 default: 2011 return (-1); 2012 } 2013 break; 2014 case IPPROTO_IP: 2015 if (udp->udp_family != AF_INET) 2016 return (-1); 2017 switch (name) { 2018 case IP_OPTIONS: 2019 case T_IP_OPTIONS: 2020 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2021 if (len > 0) { 2022 bcopy(udp->udp_ip_rcv_options + 2023 udp->udp_label_len, ptr, len); 2024 } 2025 return (len); 2026 case IP_TOS: 2027 case T_IP_TOS: 2028 *i1 = (int)udp->udp_type_of_service; 2029 break; /* goto sizeof (int) option return */ 2030 case IP_TTL: 2031 *i1 = (int)udp->udp_ttl; 2032 break; /* goto sizeof (int) option return */ 2033 case IP_DHCPINIT_IF: 2034 return (-EINVAL); 2035 case IP_NEXTHOP: 2036 case IP_RECVPKTINFO: 2037 /* 2038 * This also handles IP_PKTINFO. 2039 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2040 * Differentiation is based on the size of the argument 2041 * passed in. 2042 * This option is handled in IP which will return an 2043 * error for IP_PKTINFO as it's not supported as a 2044 * sticky option. 2045 */ 2046 return (-EINVAL); 2047 case IP_MULTICAST_IF: 2048 /* 0 address if not set */ 2049 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2050 return (sizeof (ipaddr_t)); 2051 case IP_MULTICAST_TTL: 2052 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2053 return (sizeof (uchar_t)); 2054 case IP_MULTICAST_LOOP: 2055 *ptr = connp->conn_multicast_loop; 2056 return (sizeof (uint8_t)); 2057 case IP_RECVOPTS: 2058 *i1 = udp->udp_recvopts; 2059 break; /* goto sizeof (int) option return */ 2060 case IP_RECVDSTADDR: 2061 *i1 = udp->udp_recvdstaddr; 2062 break; /* goto sizeof (int) option return */ 2063 case IP_RECVIF: 2064 *i1 = udp->udp_recvif; 2065 break; /* goto sizeof (int) option return */ 2066 case IP_RECVSLLA: 2067 *i1 = udp->udp_recvslla; 2068 break; /* goto sizeof (int) option return */ 2069 case IP_RECVTTL: 2070 *i1 = udp->udp_recvttl; 2071 break; /* goto sizeof (int) option return */ 2072 case IP_ADD_MEMBERSHIP: 2073 case IP_DROP_MEMBERSHIP: 2074 case IP_BLOCK_SOURCE: 2075 case IP_UNBLOCK_SOURCE: 2076 case IP_ADD_SOURCE_MEMBERSHIP: 2077 case IP_DROP_SOURCE_MEMBERSHIP: 2078 case MCAST_JOIN_GROUP: 2079 case MCAST_LEAVE_GROUP: 2080 case MCAST_BLOCK_SOURCE: 2081 case MCAST_UNBLOCK_SOURCE: 2082 case MCAST_JOIN_SOURCE_GROUP: 2083 case MCAST_LEAVE_SOURCE_GROUP: 2084 /* cannot "get" the value for these */ 2085 return (-1); 2086 case IP_BOUND_IF: 2087 /* Zero if not set */ 2088 *i1 = udp->udp_bound_if; 2089 break; /* goto sizeof (int) option return */ 2090 case IP_UNSPEC_SRC: 2091 *i1 = udp->udp_unspec_source; 2092 break; /* goto sizeof (int) option return */ 2093 case IP_BROADCAST_TTL: 2094 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2095 return (sizeof (uchar_t)); 2096 default: 2097 return (-1); 2098 } 2099 break; 2100 case IPPROTO_IPV6: 2101 if (udp->udp_family != AF_INET6) 2102 return (-1); 2103 switch (name) { 2104 case IPV6_UNICAST_HOPS: 2105 *i1 = (unsigned int)udp->udp_ttl; 2106 break; /* goto sizeof (int) option return */ 2107 case IPV6_MULTICAST_IF: 2108 /* 0 index if not set */ 2109 *i1 = udp->udp_multicast_if_index; 2110 break; /* goto sizeof (int) option return */ 2111 case IPV6_MULTICAST_HOPS: 2112 *i1 = udp->udp_multicast_ttl; 2113 break; /* goto sizeof (int) option return */ 2114 case IPV6_MULTICAST_LOOP: 2115 *i1 = connp->conn_multicast_loop; 2116 break; /* goto sizeof (int) option return */ 2117 case IPV6_JOIN_GROUP: 2118 case IPV6_LEAVE_GROUP: 2119 case MCAST_JOIN_GROUP: 2120 case MCAST_LEAVE_GROUP: 2121 case MCAST_BLOCK_SOURCE: 2122 case MCAST_UNBLOCK_SOURCE: 2123 case MCAST_JOIN_SOURCE_GROUP: 2124 case MCAST_LEAVE_SOURCE_GROUP: 2125 /* cannot "get" the value for these */ 2126 return (-1); 2127 case IPV6_BOUND_IF: 2128 /* Zero if not set */ 2129 *i1 = udp->udp_bound_if; 2130 break; /* goto sizeof (int) option return */ 2131 case IPV6_UNSPEC_SRC: 2132 *i1 = udp->udp_unspec_source; 2133 break; /* goto sizeof (int) option return */ 2134 case IPV6_RECVPKTINFO: 2135 *i1 = udp->udp_ip_recvpktinfo; 2136 break; /* goto sizeof (int) option return */ 2137 case IPV6_RECVTCLASS: 2138 *i1 = udp->udp_ipv6_recvtclass; 2139 break; /* goto sizeof (int) option return */ 2140 case IPV6_RECVPATHMTU: 2141 *i1 = udp->udp_ipv6_recvpathmtu; 2142 break; /* goto sizeof (int) option return */ 2143 case IPV6_RECVHOPLIMIT: 2144 *i1 = udp->udp_ipv6_recvhoplimit; 2145 break; /* goto sizeof (int) option return */ 2146 case IPV6_RECVHOPOPTS: 2147 *i1 = udp->udp_ipv6_recvhopopts; 2148 break; /* goto sizeof (int) option return */ 2149 case IPV6_RECVDSTOPTS: 2150 *i1 = udp->udp_ipv6_recvdstopts; 2151 break; /* goto sizeof (int) option return */ 2152 case _OLD_IPV6_RECVDSTOPTS: 2153 *i1 = udp->udp_old_ipv6_recvdstopts; 2154 break; /* goto sizeof (int) option return */ 2155 case IPV6_RECVRTHDRDSTOPTS: 2156 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2157 break; /* goto sizeof (int) option return */ 2158 case IPV6_RECVRTHDR: 2159 *i1 = udp->udp_ipv6_recvrthdr; 2160 break; /* goto sizeof (int) option return */ 2161 case IPV6_PKTINFO: { 2162 /* XXX assumes that caller has room for max size! */ 2163 struct in6_pktinfo *pkti; 2164 2165 pkti = (struct in6_pktinfo *)ptr; 2166 if (ipp->ipp_fields & IPPF_IFINDEX) 2167 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2168 else 2169 pkti->ipi6_ifindex = 0; 2170 if (ipp->ipp_fields & IPPF_ADDR) 2171 pkti->ipi6_addr = ipp->ipp_addr; 2172 else 2173 pkti->ipi6_addr = ipv6_all_zeros; 2174 return (sizeof (struct in6_pktinfo)); 2175 } 2176 case IPV6_TCLASS: 2177 if (ipp->ipp_fields & IPPF_TCLASS) 2178 *i1 = ipp->ipp_tclass; 2179 else 2180 *i1 = IPV6_FLOW_TCLASS( 2181 IPV6_DEFAULT_VERS_AND_FLOW); 2182 break; /* goto sizeof (int) option return */ 2183 case IPV6_NEXTHOP: { 2184 sin6_t *sin6 = (sin6_t *)ptr; 2185 2186 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2187 return (0); 2188 *sin6 = sin6_null; 2189 sin6->sin6_family = AF_INET6; 2190 sin6->sin6_addr = ipp->ipp_nexthop; 2191 return (sizeof (sin6_t)); 2192 } 2193 case IPV6_HOPOPTS: 2194 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2195 return (0); 2196 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2197 return (0); 2198 /* 2199 * The cipso/label option is added by kernel. 2200 * User is not usually aware of this option. 2201 * We copy out the hbh opt after the label option. 2202 */ 2203 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2204 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2205 if (udp->udp_label_len_v6 > 0) { 2206 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2207 ptr[1] = (ipp->ipp_hopoptslen - 2208 udp->udp_label_len_v6 + 7) / 8 - 1; 2209 } 2210 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2211 case IPV6_RTHDRDSTOPTS: 2212 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2213 return (0); 2214 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2215 return (ipp->ipp_rtdstoptslen); 2216 case IPV6_RTHDR: 2217 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2218 return (0); 2219 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2220 return (ipp->ipp_rthdrlen); 2221 case IPV6_DSTOPTS: 2222 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2223 return (0); 2224 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2225 return (ipp->ipp_dstoptslen); 2226 case IPV6_PATHMTU: 2227 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2228 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2229 us->us_netstack)); 2230 default: 2231 return (-1); 2232 } 2233 break; 2234 case IPPROTO_UDP: 2235 switch (name) { 2236 case UDP_ANONPRIVBIND: 2237 *i1 = udp->udp_anon_priv_bind; 2238 break; 2239 case UDP_EXCLBIND: 2240 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2241 break; 2242 case UDP_RCVHDR: 2243 *i1 = udp->udp_rcvhdr ? 1 : 0; 2244 break; 2245 case UDP_NAT_T_ENDPOINT: 2246 *i1 = udp->udp_nat_t_endpoint; 2247 break; 2248 default: 2249 return (-1); 2250 } 2251 break; 2252 default: 2253 return (-1); 2254 } 2255 return (sizeof (int)); 2256 } 2257 2258 int 2259 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2260 { 2261 udp_t *udp; 2262 int err; 2263 2264 udp = Q_TO_UDP(q); 2265 2266 rw_enter(&udp->udp_rwlock, RW_READER); 2267 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2268 rw_exit(&udp->udp_rwlock); 2269 return (err); 2270 } 2271 2272 /* 2273 * This routine sets socket options. 2274 */ 2275 /* ARGSUSED */ 2276 static int 2277 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2278 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2279 void *thisdg_attrs, boolean_t checkonly) 2280 { 2281 udpattrs_t *attrs = thisdg_attrs; 2282 int *i1 = (int *)invalp; 2283 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2284 udp_t *udp = connp->conn_udp; 2285 udp_stack_t *us = udp->udp_us; 2286 int error; 2287 uint_t newlen; 2288 size_t sth_wroff; 2289 2290 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2291 /* 2292 * For fixed length options, no sanity check 2293 * of passed in length is done. It is assumed *_optcom_req() 2294 * routines do the right thing. 2295 */ 2296 switch (level) { 2297 case SOL_SOCKET: 2298 switch (name) { 2299 case SO_REUSEADDR: 2300 if (!checkonly) { 2301 udp->udp_reuseaddr = onoff; 2302 PASS_OPT_TO_IP(connp); 2303 } 2304 break; 2305 case SO_DEBUG: 2306 if (!checkonly) 2307 udp->udp_debug = onoff; 2308 break; 2309 /* 2310 * The following three items are available here, 2311 * but are only meaningful to IP. 2312 */ 2313 case SO_DONTROUTE: 2314 if (!checkonly) { 2315 udp->udp_dontroute = onoff; 2316 PASS_OPT_TO_IP(connp); 2317 } 2318 break; 2319 case SO_USELOOPBACK: 2320 if (!checkonly) { 2321 udp->udp_useloopback = onoff; 2322 PASS_OPT_TO_IP(connp); 2323 } 2324 break; 2325 case SO_BROADCAST: 2326 if (!checkonly) { 2327 udp->udp_broadcast = onoff; 2328 PASS_OPT_TO_IP(connp); 2329 } 2330 break; 2331 2332 case SO_SNDBUF: 2333 if (*i1 > us->us_max_buf) { 2334 *outlenp = 0; 2335 return (ENOBUFS); 2336 } 2337 if (!checkonly) { 2338 udp->udp_xmit_hiwat = *i1; 2339 connp->conn_wq->q_hiwat = *i1; 2340 } 2341 break; 2342 case SO_RCVBUF: 2343 if (*i1 > us->us_max_buf) { 2344 *outlenp = 0; 2345 return (ENOBUFS); 2346 } 2347 if (!checkonly) { 2348 int size; 2349 2350 udp->udp_rcv_disply_hiwat = *i1; 2351 size = udp_set_rcv_hiwat(udp, *i1); 2352 rw_exit(&udp->udp_rwlock); 2353 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2354 size); 2355 rw_enter(&udp->udp_rwlock, RW_WRITER); 2356 } 2357 break; 2358 case SO_DGRAM_ERRIND: 2359 if (!checkonly) 2360 udp->udp_dgram_errind = onoff; 2361 break; 2362 case SO_RECVUCRED: 2363 if (!checkonly) 2364 udp->udp_recvucred = onoff; 2365 break; 2366 case SO_ALLZONES: 2367 /* 2368 * "soft" error (negative) 2369 * option not handled at this level 2370 * Do not modify *outlenp. 2371 */ 2372 return (-EINVAL); 2373 case SO_TIMESTAMP: 2374 if (!checkonly) 2375 udp->udp_timestamp = onoff; 2376 break; 2377 case SO_ANON_MLP: 2378 if (!checkonly) { 2379 connp->conn_anon_mlp = onoff; 2380 PASS_OPT_TO_IP(connp); 2381 } 2382 break; 2383 case SO_MAC_EXEMPT: 2384 if (secpolicy_net_mac_aware(cr) != 0 || 2385 udp->udp_state != TS_UNBND) 2386 return (EACCES); 2387 if (!checkonly) { 2388 connp->conn_mac_exempt = onoff; 2389 PASS_OPT_TO_IP(connp); 2390 } 2391 break; 2392 case SCM_UCRED: { 2393 struct ucred_s *ucr; 2394 cred_t *cr, *newcr; 2395 ts_label_t *tsl; 2396 2397 /* 2398 * Only sockets that have proper privileges and are 2399 * bound to MLPs will have any other value here, so 2400 * this implicitly tests for privilege to set label. 2401 */ 2402 if (connp->conn_mlp_type == mlptSingle) 2403 break; 2404 ucr = (struct ucred_s *)invalp; 2405 if (inlen != ucredsize || 2406 ucr->uc_labeloff < sizeof (*ucr) || 2407 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2408 return (EINVAL); 2409 if (!checkonly) { 2410 mblk_t *mb; 2411 pid_t cpid; 2412 2413 if (attrs == NULL || 2414 (mb = attrs->udpattr_mb) == NULL) 2415 return (EINVAL); 2416 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2417 cr = udp->udp_connp->conn_cred; 2418 ASSERT(cr != NULL); 2419 if ((tsl = crgetlabel(cr)) == NULL) 2420 return (EINVAL); 2421 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2422 tsl->tsl_doi, KM_NOSLEEP); 2423 if (newcr == NULL) 2424 return (ENOSR); 2425 mblk_setcred(mb, newcr, cpid); 2426 attrs->udpattr_credset = B_TRUE; 2427 crfree(newcr); 2428 } 2429 break; 2430 } 2431 case SO_EXCLBIND: 2432 if (!checkonly) 2433 udp->udp_exclbind = onoff; 2434 break; 2435 case SO_RCVTIMEO: 2436 case SO_SNDTIMEO: 2437 /* 2438 * Pass these two options in order for third part 2439 * protocol usage. Here just return directly. 2440 */ 2441 return (0); 2442 default: 2443 *outlenp = 0; 2444 return (EINVAL); 2445 } 2446 break; 2447 case IPPROTO_IP: 2448 if (udp->udp_family != AF_INET) { 2449 *outlenp = 0; 2450 return (ENOPROTOOPT); 2451 } 2452 switch (name) { 2453 case IP_OPTIONS: 2454 case T_IP_OPTIONS: 2455 /* Save options for use by IP. */ 2456 newlen = inlen + udp->udp_label_len; 2457 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2458 *outlenp = 0; 2459 return (EINVAL); 2460 } 2461 if (checkonly) 2462 break; 2463 2464 /* 2465 * Update the stored options taking into account 2466 * any CIPSO option which we should not overwrite. 2467 */ 2468 if (!tsol_option_set(&udp->udp_ip_snd_options, 2469 &udp->udp_ip_snd_options_len, 2470 udp->udp_label_len, invalp, inlen)) { 2471 *outlenp = 0; 2472 return (ENOMEM); 2473 } 2474 2475 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2476 UDPH_SIZE + udp->udp_ip_snd_options_len; 2477 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2478 rw_exit(&udp->udp_rwlock); 2479 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2480 sth_wroff); 2481 rw_enter(&udp->udp_rwlock, RW_WRITER); 2482 break; 2483 2484 case IP_TTL: 2485 if (!checkonly) { 2486 udp->udp_ttl = (uchar_t)*i1; 2487 } 2488 break; 2489 case IP_TOS: 2490 case T_IP_TOS: 2491 if (!checkonly) { 2492 udp->udp_type_of_service = (uchar_t)*i1; 2493 } 2494 break; 2495 case IP_MULTICAST_IF: { 2496 /* 2497 * TODO should check OPTMGMT reply and undo this if 2498 * there is an error. 2499 */ 2500 struct in_addr *inap = (struct in_addr *)invalp; 2501 if (!checkonly) { 2502 udp->udp_multicast_if_addr = 2503 inap->s_addr; 2504 PASS_OPT_TO_IP(connp); 2505 } 2506 break; 2507 } 2508 case IP_MULTICAST_TTL: 2509 if (!checkonly) 2510 udp->udp_multicast_ttl = *invalp; 2511 break; 2512 case IP_MULTICAST_LOOP: 2513 if (!checkonly) { 2514 connp->conn_multicast_loop = *invalp; 2515 PASS_OPT_TO_IP(connp); 2516 } 2517 break; 2518 case IP_RECVOPTS: 2519 if (!checkonly) 2520 udp->udp_recvopts = onoff; 2521 break; 2522 case IP_RECVDSTADDR: 2523 if (!checkonly) 2524 udp->udp_recvdstaddr = onoff; 2525 break; 2526 case IP_RECVIF: 2527 if (!checkonly) { 2528 udp->udp_recvif = onoff; 2529 PASS_OPT_TO_IP(connp); 2530 } 2531 break; 2532 case IP_RECVSLLA: 2533 if (!checkonly) { 2534 udp->udp_recvslla = onoff; 2535 PASS_OPT_TO_IP(connp); 2536 } 2537 break; 2538 case IP_RECVTTL: 2539 if (!checkonly) 2540 udp->udp_recvttl = onoff; 2541 break; 2542 case IP_PKTINFO: { 2543 /* 2544 * This also handles IP_RECVPKTINFO. 2545 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2546 * Differentiation is based on the size of the 2547 * argument passed in. 2548 */ 2549 struct in_pktinfo *pktinfop; 2550 ip4_pkt_t *attr_pktinfop; 2551 2552 if (checkonly) 2553 break; 2554 2555 if (inlen == sizeof (int)) { 2556 /* 2557 * This is IP_RECVPKTINFO option. 2558 * Keep a local copy of whether this option is 2559 * set or not and pass it down to IP for 2560 * processing. 2561 */ 2562 2563 udp->udp_ip_recvpktinfo = onoff; 2564 return (-EINVAL); 2565 } 2566 2567 if (attrs == NULL || 2568 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2569 /* 2570 * sticky option or no buffer to return 2571 * the results. 2572 */ 2573 return (EINVAL); 2574 } 2575 2576 if (inlen != sizeof (struct in_pktinfo)) 2577 return (EINVAL); 2578 2579 pktinfop = (struct in_pktinfo *)invalp; 2580 2581 /* 2582 * At least one of the values should be specified 2583 */ 2584 if (pktinfop->ipi_ifindex == 0 && 2585 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2586 return (EINVAL); 2587 } 2588 2589 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2590 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2591 2592 break; 2593 } 2594 case IP_ADD_MEMBERSHIP: 2595 case IP_DROP_MEMBERSHIP: 2596 case IP_BLOCK_SOURCE: 2597 case IP_UNBLOCK_SOURCE: 2598 case IP_ADD_SOURCE_MEMBERSHIP: 2599 case IP_DROP_SOURCE_MEMBERSHIP: 2600 case MCAST_JOIN_GROUP: 2601 case MCAST_LEAVE_GROUP: 2602 case MCAST_BLOCK_SOURCE: 2603 case MCAST_UNBLOCK_SOURCE: 2604 case MCAST_JOIN_SOURCE_GROUP: 2605 case MCAST_LEAVE_SOURCE_GROUP: 2606 case IP_SEC_OPT: 2607 case IP_NEXTHOP: 2608 case IP_DHCPINIT_IF: 2609 /* 2610 * "soft" error (negative) 2611 * option not handled at this level 2612 * Do not modify *outlenp. 2613 */ 2614 return (-EINVAL); 2615 case IP_BOUND_IF: 2616 if (!checkonly) { 2617 udp->udp_bound_if = *i1; 2618 PASS_OPT_TO_IP(connp); 2619 } 2620 break; 2621 case IP_UNSPEC_SRC: 2622 if (!checkonly) { 2623 udp->udp_unspec_source = onoff; 2624 PASS_OPT_TO_IP(connp); 2625 } 2626 break; 2627 case IP_BROADCAST_TTL: 2628 if (!checkonly) 2629 connp->conn_broadcast_ttl = *invalp; 2630 break; 2631 default: 2632 *outlenp = 0; 2633 return (EINVAL); 2634 } 2635 break; 2636 case IPPROTO_IPV6: { 2637 ip6_pkt_t *ipp; 2638 boolean_t sticky; 2639 2640 if (udp->udp_family != AF_INET6) { 2641 *outlenp = 0; 2642 return (ENOPROTOOPT); 2643 } 2644 /* 2645 * Deal with both sticky options and ancillary data 2646 */ 2647 sticky = B_FALSE; 2648 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2649 NULL) { 2650 /* sticky options, or none */ 2651 ipp = &udp->udp_sticky_ipp; 2652 sticky = B_TRUE; 2653 } 2654 2655 switch (name) { 2656 case IPV6_MULTICAST_IF: 2657 if (!checkonly) { 2658 udp->udp_multicast_if_index = *i1; 2659 PASS_OPT_TO_IP(connp); 2660 } 2661 break; 2662 case IPV6_UNICAST_HOPS: 2663 /* -1 means use default */ 2664 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2665 *outlenp = 0; 2666 return (EINVAL); 2667 } 2668 if (!checkonly) { 2669 if (*i1 == -1) { 2670 udp->udp_ttl = ipp->ipp_unicast_hops = 2671 us->us_ipv6_hoplimit; 2672 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2673 /* Pass modified value to IP. */ 2674 *i1 = udp->udp_ttl; 2675 } else { 2676 udp->udp_ttl = ipp->ipp_unicast_hops = 2677 (uint8_t)*i1; 2678 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2679 } 2680 /* Rebuild the header template */ 2681 error = udp_build_hdrs(udp); 2682 if (error != 0) { 2683 *outlenp = 0; 2684 return (error); 2685 } 2686 } 2687 break; 2688 case IPV6_MULTICAST_HOPS: 2689 /* -1 means use default */ 2690 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2691 *outlenp = 0; 2692 return (EINVAL); 2693 } 2694 if (!checkonly) { 2695 if (*i1 == -1) { 2696 udp->udp_multicast_ttl = 2697 ipp->ipp_multicast_hops = 2698 IP_DEFAULT_MULTICAST_TTL; 2699 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2700 /* Pass modified value to IP. */ 2701 *i1 = udp->udp_multicast_ttl; 2702 } else { 2703 udp->udp_multicast_ttl = 2704 ipp->ipp_multicast_hops = 2705 (uint8_t)*i1; 2706 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2707 } 2708 } 2709 break; 2710 case IPV6_MULTICAST_LOOP: 2711 if (*i1 != 0 && *i1 != 1) { 2712 *outlenp = 0; 2713 return (EINVAL); 2714 } 2715 if (!checkonly) { 2716 connp->conn_multicast_loop = *i1; 2717 PASS_OPT_TO_IP(connp); 2718 } 2719 break; 2720 case IPV6_JOIN_GROUP: 2721 case IPV6_LEAVE_GROUP: 2722 case MCAST_JOIN_GROUP: 2723 case MCAST_LEAVE_GROUP: 2724 case MCAST_BLOCK_SOURCE: 2725 case MCAST_UNBLOCK_SOURCE: 2726 case MCAST_JOIN_SOURCE_GROUP: 2727 case MCAST_LEAVE_SOURCE_GROUP: 2728 /* 2729 * "soft" error (negative) 2730 * option not handled at this level 2731 * Note: Do not modify *outlenp 2732 */ 2733 return (-EINVAL); 2734 case IPV6_BOUND_IF: 2735 if (!checkonly) { 2736 udp->udp_bound_if = *i1; 2737 PASS_OPT_TO_IP(connp); 2738 } 2739 break; 2740 case IPV6_UNSPEC_SRC: 2741 if (!checkonly) { 2742 udp->udp_unspec_source = onoff; 2743 PASS_OPT_TO_IP(connp); 2744 } 2745 break; 2746 /* 2747 * Set boolean switches for ancillary data delivery 2748 */ 2749 case IPV6_RECVPKTINFO: 2750 if (!checkonly) { 2751 udp->udp_ip_recvpktinfo = onoff; 2752 PASS_OPT_TO_IP(connp); 2753 } 2754 break; 2755 case IPV6_RECVTCLASS: 2756 if (!checkonly) { 2757 udp->udp_ipv6_recvtclass = onoff; 2758 PASS_OPT_TO_IP(connp); 2759 } 2760 break; 2761 case IPV6_RECVPATHMTU: 2762 if (!checkonly) { 2763 udp->udp_ipv6_recvpathmtu = onoff; 2764 PASS_OPT_TO_IP(connp); 2765 } 2766 break; 2767 case IPV6_RECVHOPLIMIT: 2768 if (!checkonly) { 2769 udp->udp_ipv6_recvhoplimit = onoff; 2770 PASS_OPT_TO_IP(connp); 2771 } 2772 break; 2773 case IPV6_RECVHOPOPTS: 2774 if (!checkonly) { 2775 udp->udp_ipv6_recvhopopts = onoff; 2776 PASS_OPT_TO_IP(connp); 2777 } 2778 break; 2779 case IPV6_RECVDSTOPTS: 2780 if (!checkonly) { 2781 udp->udp_ipv6_recvdstopts = onoff; 2782 PASS_OPT_TO_IP(connp); 2783 } 2784 break; 2785 case _OLD_IPV6_RECVDSTOPTS: 2786 if (!checkonly) 2787 udp->udp_old_ipv6_recvdstopts = onoff; 2788 break; 2789 case IPV6_RECVRTHDRDSTOPTS: 2790 if (!checkonly) { 2791 udp->udp_ipv6_recvrthdrdstopts = onoff; 2792 PASS_OPT_TO_IP(connp); 2793 } 2794 break; 2795 case IPV6_RECVRTHDR: 2796 if (!checkonly) { 2797 udp->udp_ipv6_recvrthdr = onoff; 2798 PASS_OPT_TO_IP(connp); 2799 } 2800 break; 2801 /* 2802 * Set sticky options or ancillary data. 2803 * If sticky options, (re)build any extension headers 2804 * that might be needed as a result. 2805 */ 2806 case IPV6_PKTINFO: 2807 /* 2808 * The source address and ifindex are verified 2809 * in ip_opt_set(). For ancillary data the 2810 * source address is checked in ip_wput_v6. 2811 */ 2812 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2813 return (EINVAL); 2814 if (checkonly) 2815 break; 2816 2817 if (inlen == 0) { 2818 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2819 ipp->ipp_sticky_ignored |= 2820 (IPPF_IFINDEX|IPPF_ADDR); 2821 } else { 2822 struct in6_pktinfo *pkti; 2823 2824 pkti = (struct in6_pktinfo *)invalp; 2825 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2826 ipp->ipp_addr = pkti->ipi6_addr; 2827 if (ipp->ipp_ifindex != 0) 2828 ipp->ipp_fields |= IPPF_IFINDEX; 2829 else 2830 ipp->ipp_fields &= ~IPPF_IFINDEX; 2831 if (!IN6_IS_ADDR_UNSPECIFIED( 2832 &ipp->ipp_addr)) 2833 ipp->ipp_fields |= IPPF_ADDR; 2834 else 2835 ipp->ipp_fields &= ~IPPF_ADDR; 2836 } 2837 if (sticky) { 2838 error = udp_build_hdrs(udp); 2839 if (error != 0) 2840 return (error); 2841 PASS_OPT_TO_IP(connp); 2842 } 2843 break; 2844 case IPV6_HOPLIMIT: 2845 if (sticky) 2846 return (EINVAL); 2847 if (inlen != 0 && inlen != sizeof (int)) 2848 return (EINVAL); 2849 if (checkonly) 2850 break; 2851 2852 if (inlen == 0) { 2853 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2854 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2855 } else { 2856 if (*i1 > 255 || *i1 < -1) 2857 return (EINVAL); 2858 if (*i1 == -1) 2859 ipp->ipp_hoplimit = 2860 us->us_ipv6_hoplimit; 2861 else 2862 ipp->ipp_hoplimit = *i1; 2863 ipp->ipp_fields |= IPPF_HOPLIMIT; 2864 } 2865 break; 2866 case IPV6_TCLASS: 2867 if (inlen != 0 && inlen != sizeof (int)) 2868 return (EINVAL); 2869 if (checkonly) 2870 break; 2871 2872 if (inlen == 0) { 2873 ipp->ipp_fields &= ~IPPF_TCLASS; 2874 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2875 } else { 2876 if (*i1 > 255 || *i1 < -1) 2877 return (EINVAL); 2878 if (*i1 == -1) 2879 ipp->ipp_tclass = 0; 2880 else 2881 ipp->ipp_tclass = *i1; 2882 ipp->ipp_fields |= IPPF_TCLASS; 2883 } 2884 if (sticky) { 2885 error = udp_build_hdrs(udp); 2886 if (error != 0) 2887 return (error); 2888 } 2889 break; 2890 case IPV6_NEXTHOP: 2891 /* 2892 * IP will verify that the nexthop is reachable 2893 * and fail for sticky options. 2894 */ 2895 if (inlen != 0 && inlen != sizeof (sin6_t)) 2896 return (EINVAL); 2897 if (checkonly) 2898 break; 2899 2900 if (inlen == 0) { 2901 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2902 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2903 } else { 2904 sin6_t *sin6 = (sin6_t *)invalp; 2905 2906 if (sin6->sin6_family != AF_INET6) { 2907 return (EAFNOSUPPORT); 2908 } 2909 if (IN6_IS_ADDR_V4MAPPED( 2910 &sin6->sin6_addr)) 2911 return (EADDRNOTAVAIL); 2912 ipp->ipp_nexthop = sin6->sin6_addr; 2913 if (!IN6_IS_ADDR_UNSPECIFIED( 2914 &ipp->ipp_nexthop)) 2915 ipp->ipp_fields |= IPPF_NEXTHOP; 2916 else 2917 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2918 } 2919 if (sticky) { 2920 error = udp_build_hdrs(udp); 2921 if (error != 0) 2922 return (error); 2923 PASS_OPT_TO_IP(connp); 2924 } 2925 break; 2926 case IPV6_HOPOPTS: { 2927 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2928 /* 2929 * Sanity checks - minimum size, size a multiple of 2930 * eight bytes, and matching size passed in. 2931 */ 2932 if (inlen != 0 && 2933 inlen != (8 * (hopts->ip6h_len + 1))) 2934 return (EINVAL); 2935 2936 if (checkonly) 2937 break; 2938 2939 error = optcom_pkt_set(invalp, inlen, sticky, 2940 (uchar_t **)&ipp->ipp_hopopts, 2941 &ipp->ipp_hopoptslen, 2942 sticky ? udp->udp_label_len_v6 : 0); 2943 if (error != 0) 2944 return (error); 2945 if (ipp->ipp_hopoptslen == 0) { 2946 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2947 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2948 } else { 2949 ipp->ipp_fields |= IPPF_HOPOPTS; 2950 } 2951 if (sticky) { 2952 error = udp_build_hdrs(udp); 2953 if (error != 0) 2954 return (error); 2955 } 2956 break; 2957 } 2958 case IPV6_RTHDRDSTOPTS: { 2959 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2960 2961 /* 2962 * Sanity checks - minimum size, size a multiple of 2963 * eight bytes, and matching size passed in. 2964 */ 2965 if (inlen != 0 && 2966 inlen != (8 * (dopts->ip6d_len + 1))) 2967 return (EINVAL); 2968 2969 if (checkonly) 2970 break; 2971 2972 if (inlen == 0) { 2973 if (sticky && 2974 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2975 kmem_free(ipp->ipp_rtdstopts, 2976 ipp->ipp_rtdstoptslen); 2977 ipp->ipp_rtdstopts = NULL; 2978 ipp->ipp_rtdstoptslen = 0; 2979 } 2980 2981 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2982 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2983 } else { 2984 error = optcom_pkt_set(invalp, inlen, sticky, 2985 (uchar_t **)&ipp->ipp_rtdstopts, 2986 &ipp->ipp_rtdstoptslen, 0); 2987 if (error != 0) 2988 return (error); 2989 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2990 } 2991 if (sticky) { 2992 error = udp_build_hdrs(udp); 2993 if (error != 0) 2994 return (error); 2995 } 2996 break; 2997 } 2998 case IPV6_DSTOPTS: { 2999 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3000 3001 /* 3002 * Sanity checks - minimum size, size a multiple of 3003 * eight bytes, and matching size passed in. 3004 */ 3005 if (inlen != 0 && 3006 inlen != (8 * (dopts->ip6d_len + 1))) 3007 return (EINVAL); 3008 3009 if (checkonly) 3010 break; 3011 3012 if (inlen == 0) { 3013 if (sticky && 3014 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3015 kmem_free(ipp->ipp_dstopts, 3016 ipp->ipp_dstoptslen); 3017 ipp->ipp_dstopts = NULL; 3018 ipp->ipp_dstoptslen = 0; 3019 } 3020 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3021 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3022 } else { 3023 error = optcom_pkt_set(invalp, inlen, sticky, 3024 (uchar_t **)&ipp->ipp_dstopts, 3025 &ipp->ipp_dstoptslen, 0); 3026 if (error != 0) 3027 return (error); 3028 ipp->ipp_fields |= IPPF_DSTOPTS; 3029 } 3030 if (sticky) { 3031 error = udp_build_hdrs(udp); 3032 if (error != 0) 3033 return (error); 3034 } 3035 break; 3036 } 3037 case IPV6_RTHDR: { 3038 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3039 3040 /* 3041 * Sanity checks - minimum size, size a multiple of 3042 * eight bytes, and matching size passed in. 3043 */ 3044 if (inlen != 0 && 3045 inlen != (8 * (rt->ip6r_len + 1))) 3046 return (EINVAL); 3047 3048 if (checkonly) 3049 break; 3050 3051 if (inlen == 0) { 3052 if (sticky && 3053 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3054 kmem_free(ipp->ipp_rthdr, 3055 ipp->ipp_rthdrlen); 3056 ipp->ipp_rthdr = NULL; 3057 ipp->ipp_rthdrlen = 0; 3058 } 3059 ipp->ipp_fields &= ~IPPF_RTHDR; 3060 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3061 } else { 3062 error = optcom_pkt_set(invalp, inlen, sticky, 3063 (uchar_t **)&ipp->ipp_rthdr, 3064 &ipp->ipp_rthdrlen, 0); 3065 if (error != 0) 3066 return (error); 3067 ipp->ipp_fields |= IPPF_RTHDR; 3068 } 3069 if (sticky) { 3070 error = udp_build_hdrs(udp); 3071 if (error != 0) 3072 return (error); 3073 } 3074 break; 3075 } 3076 3077 case IPV6_DONTFRAG: 3078 if (checkonly) 3079 break; 3080 3081 if (onoff) { 3082 ipp->ipp_fields |= IPPF_DONTFRAG; 3083 } else { 3084 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3085 } 3086 break; 3087 3088 case IPV6_USE_MIN_MTU: 3089 if (inlen != sizeof (int)) 3090 return (EINVAL); 3091 3092 if (*i1 < -1 || *i1 > 1) 3093 return (EINVAL); 3094 3095 if (checkonly) 3096 break; 3097 3098 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3099 ipp->ipp_use_min_mtu = *i1; 3100 break; 3101 3102 case IPV6_SEC_OPT: 3103 case IPV6_SRC_PREFERENCES: 3104 case IPV6_V6ONLY: 3105 /* Handled at the IP level */ 3106 return (-EINVAL); 3107 default: 3108 *outlenp = 0; 3109 return (EINVAL); 3110 } 3111 break; 3112 } /* end IPPROTO_IPV6 */ 3113 case IPPROTO_UDP: 3114 switch (name) { 3115 case UDP_ANONPRIVBIND: 3116 if ((error = secpolicy_net_privaddr(cr, 0, 3117 IPPROTO_UDP)) != 0) { 3118 *outlenp = 0; 3119 return (error); 3120 } 3121 if (!checkonly) { 3122 udp->udp_anon_priv_bind = onoff; 3123 } 3124 break; 3125 case UDP_EXCLBIND: 3126 if (!checkonly) 3127 udp->udp_exclbind = onoff; 3128 break; 3129 case UDP_RCVHDR: 3130 if (!checkonly) 3131 udp->udp_rcvhdr = onoff; 3132 break; 3133 case UDP_NAT_T_ENDPOINT: 3134 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3135 *outlenp = 0; 3136 return (error); 3137 } 3138 3139 /* 3140 * Use udp_family instead so we can avoid ambiguitites 3141 * with AF_INET6 sockets that may switch from IPv4 3142 * to IPv6. 3143 */ 3144 if (udp->udp_family != AF_INET) { 3145 *outlenp = 0; 3146 return (EAFNOSUPPORT); 3147 } 3148 3149 if (!checkonly) { 3150 int size; 3151 3152 udp->udp_nat_t_endpoint = onoff; 3153 3154 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3155 UDPH_SIZE + udp->udp_ip_snd_options_len; 3156 3157 /* Also, adjust wroff */ 3158 if (onoff) { 3159 udp->udp_max_hdr_len += 3160 sizeof (uint32_t); 3161 } 3162 size = udp->udp_max_hdr_len + 3163 us->us_wroff_extra; 3164 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3165 size); 3166 } 3167 break; 3168 default: 3169 *outlenp = 0; 3170 return (EINVAL); 3171 } 3172 break; 3173 default: 3174 *outlenp = 0; 3175 return (EINVAL); 3176 } 3177 /* 3178 * Common case of OK return with outval same as inval. 3179 */ 3180 if (invalp != outvalp) { 3181 /* don't trust bcopy for identical src/dst */ 3182 (void) bcopy(invalp, outvalp, inlen); 3183 } 3184 *outlenp = inlen; 3185 return (0); 3186 } 3187 3188 int 3189 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3190 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3191 void *thisdg_attrs, cred_t *cr) 3192 { 3193 int error; 3194 boolean_t checkonly; 3195 3196 error = 0; 3197 switch (optset_context) { 3198 case SETFN_OPTCOM_CHECKONLY: 3199 checkonly = B_TRUE; 3200 /* 3201 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3202 * inlen != 0 implies value supplied and 3203 * we have to "pretend" to set it. 3204 * inlen == 0 implies that there is no 3205 * value part in T_CHECK request and just validation 3206 * done elsewhere should be enough, we just return here. 3207 */ 3208 if (inlen == 0) { 3209 *outlenp = 0; 3210 goto done; 3211 } 3212 break; 3213 case SETFN_OPTCOM_NEGOTIATE: 3214 checkonly = B_FALSE; 3215 break; 3216 case SETFN_UD_NEGOTIATE: 3217 case SETFN_CONN_NEGOTIATE: 3218 checkonly = B_FALSE; 3219 /* 3220 * Negotiating local and "association-related" options 3221 * through T_UNITDATA_REQ. 3222 * 3223 * Following routine can filter out ones we do not 3224 * want to be "set" this way. 3225 */ 3226 if (!udp_opt_allow_udr_set(level, name)) { 3227 *outlenp = 0; 3228 error = EINVAL; 3229 goto done; 3230 } 3231 break; 3232 default: 3233 /* 3234 * We should never get here 3235 */ 3236 *outlenp = 0; 3237 error = EINVAL; 3238 goto done; 3239 } 3240 3241 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3242 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3243 3244 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3245 outvalp, cr, thisdg_attrs, checkonly); 3246 done: 3247 return (error); 3248 } 3249 3250 /* ARGSUSED */ 3251 int 3252 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3253 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3254 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3255 { 3256 conn_t *connp = Q_TO_CONN(q); 3257 int error; 3258 udp_t *udp = connp->conn_udp; 3259 3260 rw_enter(&udp->udp_rwlock, RW_WRITER); 3261 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3262 outlenp, outvalp, thisdg_attrs, cr); 3263 rw_exit(&udp->udp_rwlock); 3264 return (error); 3265 } 3266 3267 /* 3268 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3269 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3270 * headers, and the udp header. 3271 * Returns failure if can't allocate memory. 3272 */ 3273 static int 3274 udp_build_hdrs(udp_t *udp) 3275 { 3276 udp_stack_t *us = udp->udp_us; 3277 uchar_t *hdrs; 3278 uint_t hdrs_len; 3279 ip6_t *ip6h; 3280 ip6i_t *ip6i; 3281 udpha_t *udpha; 3282 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3283 size_t sth_wroff; 3284 conn_t *connp = udp->udp_connp; 3285 3286 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3287 ASSERT(connp != NULL); 3288 3289 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3290 ASSERT(hdrs_len != 0); 3291 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3292 /* Need to reallocate */ 3293 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3294 if (hdrs == NULL) 3295 return (ENOMEM); 3296 3297 if (udp->udp_sticky_hdrs_len != 0) { 3298 kmem_free(udp->udp_sticky_hdrs, 3299 udp->udp_sticky_hdrs_len); 3300 } 3301 udp->udp_sticky_hdrs = hdrs; 3302 udp->udp_sticky_hdrs_len = hdrs_len; 3303 } 3304 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3305 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3306 3307 /* Set header fields not in ipp */ 3308 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3309 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3310 ip6h = (ip6_t *)&ip6i[1]; 3311 } else { 3312 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3313 } 3314 3315 if (!(ipp->ipp_fields & IPPF_ADDR)) 3316 ip6h->ip6_src = udp->udp_v6src; 3317 3318 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3319 udpha->uha_src_port = udp->udp_port; 3320 3321 /* Try to get everything in a single mblk */ 3322 if (hdrs_len > udp->udp_max_hdr_len) { 3323 udp->udp_max_hdr_len = hdrs_len; 3324 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3325 rw_exit(&udp->udp_rwlock); 3326 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3327 udp->udp_connp, sth_wroff); 3328 rw_enter(&udp->udp_rwlock, RW_WRITER); 3329 } 3330 return (0); 3331 } 3332 3333 /* 3334 * This routine retrieves the value of an ND variable in a udpparam_t 3335 * structure. It is called through nd_getset when a user reads the 3336 * variable. 3337 */ 3338 /* ARGSUSED */ 3339 static int 3340 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3341 { 3342 udpparam_t *udppa = (udpparam_t *)cp; 3343 3344 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3345 return (0); 3346 } 3347 3348 /* 3349 * Walk through the param array specified registering each element with the 3350 * named dispatch (ND) handler. 3351 */ 3352 static boolean_t 3353 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3354 { 3355 for (; cnt-- > 0; udppa++) { 3356 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3357 if (!nd_load(ndp, udppa->udp_param_name, 3358 udp_param_get, udp_param_set, 3359 (caddr_t)udppa)) { 3360 nd_free(ndp); 3361 return (B_FALSE); 3362 } 3363 } 3364 } 3365 if (!nd_load(ndp, "udp_extra_priv_ports", 3366 udp_extra_priv_ports_get, NULL, NULL)) { 3367 nd_free(ndp); 3368 return (B_FALSE); 3369 } 3370 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3371 NULL, udp_extra_priv_ports_add, NULL)) { 3372 nd_free(ndp); 3373 return (B_FALSE); 3374 } 3375 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3376 NULL, udp_extra_priv_ports_del, NULL)) { 3377 nd_free(ndp); 3378 return (B_FALSE); 3379 } 3380 return (B_TRUE); 3381 } 3382 3383 /* This routine sets an ND variable in a udpparam_t structure. */ 3384 /* ARGSUSED */ 3385 static int 3386 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3387 { 3388 long new_value; 3389 udpparam_t *udppa = (udpparam_t *)cp; 3390 3391 /* 3392 * Fail the request if the new value does not lie within the 3393 * required bounds. 3394 */ 3395 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3396 new_value < udppa->udp_param_min || 3397 new_value > udppa->udp_param_max) { 3398 return (EINVAL); 3399 } 3400 3401 /* Set the new value */ 3402 udppa->udp_param_value = new_value; 3403 return (0); 3404 } 3405 3406 /* 3407 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3408 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3409 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3410 * then it's assumed to be allocated to be large enough. 3411 * 3412 * Returns zero if trimming of the security option causes all options to go 3413 * away. 3414 */ 3415 static size_t 3416 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3417 { 3418 struct T_opthdr *toh; 3419 size_t hol = ipp->ipp_hopoptslen; 3420 ip6_hbh_t *dstopt = NULL; 3421 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3422 size_t tlen, olen, plen; 3423 boolean_t deleting; 3424 const struct ip6_opt *sopt, *lastpad; 3425 struct ip6_opt *dopt; 3426 3427 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3428 toh->level = IPPROTO_IPV6; 3429 toh->name = IPV6_HOPOPTS; 3430 toh->status = 0; 3431 dstopt = (ip6_hbh_t *)(toh + 1); 3432 } 3433 3434 /* 3435 * If labeling is enabled, then skip the label option 3436 * but get other options if there are any. 3437 */ 3438 if (is_system_labeled()) { 3439 dopt = NULL; 3440 if (dstopt != NULL) { 3441 /* will fill in ip6h_len later */ 3442 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3443 dopt = (struct ip6_opt *)(dstopt + 1); 3444 } 3445 sopt = (const struct ip6_opt *)(srcopt + 1); 3446 hol -= sizeof (*srcopt); 3447 tlen = sizeof (*dstopt); 3448 lastpad = NULL; 3449 deleting = B_FALSE; 3450 /* 3451 * This loop finds the first (lastpad pointer) of any number of 3452 * pads that preceeds the security option, then treats the 3453 * security option as though it were a pad, and then finds the 3454 * next non-pad option (or end of list). 3455 * 3456 * It then treats the entire block as one big pad. To preserve 3457 * alignment of any options that follow, or just the end of the 3458 * list, it computes a minimal new padding size that keeps the 3459 * same alignment for the next option. 3460 * 3461 * If it encounters just a sequence of pads with no security 3462 * option, those are copied as-is rather than collapsed. 3463 * 3464 * Note that to handle the end of list case, the code makes one 3465 * loop with 'hol' set to zero. 3466 */ 3467 for (;;) { 3468 if (hol > 0) { 3469 if (sopt->ip6o_type == IP6OPT_PAD1) { 3470 if (lastpad == NULL) 3471 lastpad = sopt; 3472 sopt = (const struct ip6_opt *) 3473 &sopt->ip6o_len; 3474 hol--; 3475 continue; 3476 } 3477 olen = sopt->ip6o_len + sizeof (*sopt); 3478 if (olen > hol) 3479 olen = hol; 3480 if (sopt->ip6o_type == IP6OPT_PADN || 3481 sopt->ip6o_type == ip6opt_ls) { 3482 if (sopt->ip6o_type == ip6opt_ls) 3483 deleting = B_TRUE; 3484 if (lastpad == NULL) 3485 lastpad = sopt; 3486 sopt = (const struct ip6_opt *) 3487 ((const char *)sopt + olen); 3488 hol -= olen; 3489 continue; 3490 } 3491 } else { 3492 /* if nothing was copied at all, then delete */ 3493 if (tlen == sizeof (*dstopt)) 3494 return (0); 3495 /* last pass; pick up any trailing padding */ 3496 olen = 0; 3497 } 3498 if (deleting) { 3499 /* 3500 * compute aligning effect of deleted material 3501 * to reproduce with pad. 3502 */ 3503 plen = ((const char *)sopt - 3504 (const char *)lastpad) & 7; 3505 tlen += plen; 3506 if (dopt != NULL) { 3507 if (plen == 1) { 3508 dopt->ip6o_type = IP6OPT_PAD1; 3509 } else if (plen > 1) { 3510 plen -= sizeof (*dopt); 3511 dopt->ip6o_type = IP6OPT_PADN; 3512 dopt->ip6o_len = plen; 3513 if (plen > 0) 3514 bzero(dopt + 1, plen); 3515 } 3516 dopt = (struct ip6_opt *) 3517 ((char *)dopt + plen); 3518 } 3519 deleting = B_FALSE; 3520 lastpad = NULL; 3521 } 3522 /* if there's uncopied padding, then copy that now */ 3523 if (lastpad != NULL) { 3524 olen += (const char *)sopt - 3525 (const char *)lastpad; 3526 sopt = lastpad; 3527 lastpad = NULL; 3528 } 3529 if (dopt != NULL && olen > 0) { 3530 bcopy(sopt, dopt, olen); 3531 dopt = (struct ip6_opt *)((char *)dopt + olen); 3532 } 3533 if (hol == 0) 3534 break; 3535 tlen += olen; 3536 sopt = (const struct ip6_opt *) 3537 ((const char *)sopt + olen); 3538 hol -= olen; 3539 } 3540 /* go back and patch up the length value, rounded upward */ 3541 if (dstopt != NULL) 3542 dstopt->ip6h_len = (tlen - 1) >> 3; 3543 } else { 3544 tlen = hol; 3545 if (dstopt != NULL) 3546 bcopy(srcopt, dstopt, hol); 3547 } 3548 3549 tlen += sizeof (*toh); 3550 if (toh != NULL) 3551 toh->len = tlen; 3552 3553 return (tlen); 3554 } 3555 3556 /* 3557 * Update udp_rcv_opt_len from the packet. 3558 * Called when options received, and when no options received but 3559 * udp_ip_recv_opt_len has previously recorded options. 3560 */ 3561 static void 3562 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3563 { 3564 /* Save the options if any */ 3565 if (opt_len > 0) { 3566 if (opt_len > udp->udp_ip_rcv_options_len) { 3567 /* Need to allocate larger buffer */ 3568 if (udp->udp_ip_rcv_options_len != 0) 3569 mi_free((char *)udp->udp_ip_rcv_options); 3570 udp->udp_ip_rcv_options_len = 0; 3571 udp->udp_ip_rcv_options = 3572 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3573 if (udp->udp_ip_rcv_options != NULL) 3574 udp->udp_ip_rcv_options_len = opt_len; 3575 } 3576 if (udp->udp_ip_rcv_options_len != 0) { 3577 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3578 /* Adjust length if we are resusing the space */ 3579 udp->udp_ip_rcv_options_len = opt_len; 3580 } 3581 } else if (udp->udp_ip_rcv_options_len != 0) { 3582 /* Clear out previously recorded options */ 3583 mi_free((char *)udp->udp_ip_rcv_options); 3584 udp->udp_ip_rcv_options = NULL; 3585 udp->udp_ip_rcv_options_len = 0; 3586 } 3587 } 3588 3589 static mblk_t * 3590 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3591 { 3592 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3593 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3594 /* 3595 * fallback has started but messages have not been moved yet 3596 */ 3597 if (udp->udp_fallback_queue_head == NULL) { 3598 ASSERT(udp->udp_fallback_queue_tail == NULL); 3599 udp->udp_fallback_queue_head = mp; 3600 udp->udp_fallback_queue_tail = mp; 3601 } else { 3602 ASSERT(udp->udp_fallback_queue_tail != NULL); 3603 udp->udp_fallback_queue_tail->b_next = mp; 3604 udp->udp_fallback_queue_tail = mp; 3605 } 3606 return (NULL); 3607 } else { 3608 /* 3609 * Fallback completed, let the caller putnext() the mblk. 3610 */ 3611 return (mp); 3612 } 3613 } 3614 3615 /* 3616 * Deliver data to ULP. In case we have a socket, and it's falling back to 3617 * TPI, then we'll queue the mp for later processing. 3618 */ 3619 static void 3620 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3621 { 3622 if (IPCL_IS_NONSTR(connp)) { 3623 udp_t *udp = connp->conn_udp; 3624 int error; 3625 3626 if ((*connp->conn_upcalls->su_recv) 3627 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3628 NULL) < 0) { 3629 mutex_enter(&udp->udp_recv_lock); 3630 if (error == ENOSPC) { 3631 /* 3632 * let's confirm while holding the lock 3633 */ 3634 if ((*connp->conn_upcalls->su_recv) 3635 (connp->conn_upper_handle, NULL, 0, 0, 3636 &error, NULL) < 0) { 3637 ASSERT(error == ENOSPC); 3638 if (error == ENOSPC) { 3639 connp->conn_flow_cntrld = 3640 B_TRUE; 3641 } 3642 } 3643 mutex_exit(&udp->udp_recv_lock); 3644 } else { 3645 ASSERT(error == EOPNOTSUPP); 3646 mp = udp_queue_fallback(udp, mp); 3647 mutex_exit(&udp->udp_recv_lock); 3648 if (mp != NULL) 3649 putnext(connp->conn_rq, mp); 3650 } 3651 } 3652 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3653 } else { 3654 putnext(connp->conn_rq, mp); 3655 } 3656 } 3657 3658 /* ARGSUSED2 */ 3659 static void 3660 udp_input(void *arg1, mblk_t *mp, void *arg2) 3661 { 3662 conn_t *connp = (conn_t *)arg1; 3663 struct T_unitdata_ind *tudi; 3664 uchar_t *rptr; /* Pointer to IP header */ 3665 int hdr_length; /* Length of IP+UDP headers */ 3666 int opt_len; 3667 int udi_size; /* Size of T_unitdata_ind */ 3668 int mp_len; 3669 udp_t *udp; 3670 udpha_t *udpha; 3671 int ipversion; 3672 ip6_pkt_t ipp; 3673 ip6_t *ip6h; 3674 ip6i_t *ip6i; 3675 mblk_t *mp1; 3676 mblk_t *options_mp = NULL; 3677 ip_pktinfo_t *pinfo = NULL; 3678 cred_t *cr = NULL; 3679 pid_t cpid; 3680 uint32_t udp_ip_rcv_options_len; 3681 udp_bits_t udp_bits; 3682 cred_t *rcr = connp->conn_cred; 3683 udp_stack_t *us; 3684 3685 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3686 3687 udp = connp->conn_udp; 3688 us = udp->udp_us; 3689 rptr = mp->b_rptr; 3690 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3691 ASSERT(OK_32PTR(rptr)); 3692 3693 /* 3694 * IP should have prepended the options data in an M_CTL 3695 * Check M_CTL "type" to make sure are not here bcos of 3696 * a valid ICMP message 3697 */ 3698 if (DB_TYPE(mp) == M_CTL) { 3699 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3700 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3701 IN_PKTINFO) { 3702 /* 3703 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3704 * has been prepended to the packet by IP. We need to 3705 * extract the mblk and adjust the rptr 3706 */ 3707 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3708 options_mp = mp; 3709 mp = mp->b_cont; 3710 rptr = mp->b_rptr; 3711 UDP_STAT(us, udp_in_pktinfo); 3712 } else { 3713 /* 3714 * ICMP messages. 3715 */ 3716 udp_icmp_error(connp, mp); 3717 return; 3718 } 3719 } 3720 3721 mp_len = msgdsize(mp); 3722 /* 3723 * This is the inbound data path. 3724 * First, we check to make sure the IP version number is correct, 3725 * and then pull the IP and UDP headers into the first mblk. 3726 */ 3727 3728 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3729 ipp.ipp_fields = 0; 3730 3731 ipversion = IPH_HDR_VERSION(rptr); 3732 3733 rw_enter(&udp->udp_rwlock, RW_READER); 3734 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3735 udp_bits = udp->udp_bits; 3736 rw_exit(&udp->udp_rwlock); 3737 3738 switch (ipversion) { 3739 case IPV4_VERSION: 3740 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3741 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3742 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3743 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3744 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3745 udp->udp_family == AF_INET) { 3746 /* 3747 * Record/update udp_ip_rcv_options with the lock 3748 * held. Not needed for AF_INET6 sockets 3749 * since they don't support a getsockopt of IP_OPTIONS. 3750 */ 3751 rw_enter(&udp->udp_rwlock, RW_WRITER); 3752 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3753 opt_len); 3754 rw_exit(&udp->udp_rwlock); 3755 } 3756 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3757 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3758 udp->udp_ip_recvpktinfo) { 3759 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3760 ipp.ipp_fields |= IPPF_IFINDEX; 3761 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3762 } 3763 } 3764 break; 3765 case IPV6_VERSION: 3766 /* 3767 * IPv6 packets can only be received by applications 3768 * that are prepared to receive IPv6 addresses. 3769 * The IP fanout must ensure this. 3770 */ 3771 ASSERT(udp->udp_family == AF_INET6); 3772 3773 ip6h = (ip6_t *)rptr; 3774 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3775 3776 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3777 uint8_t nexthdrp; 3778 /* Look for ifindex information */ 3779 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3780 ip6i = (ip6i_t *)ip6h; 3781 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3782 goto tossit; 3783 3784 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3785 ASSERT(ip6i->ip6i_ifindex != 0); 3786 ipp.ipp_fields |= IPPF_IFINDEX; 3787 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3788 } 3789 rptr = (uchar_t *)&ip6i[1]; 3790 mp->b_rptr = rptr; 3791 if (rptr == mp->b_wptr) { 3792 mp1 = mp->b_cont; 3793 freeb(mp); 3794 mp = mp1; 3795 rptr = mp->b_rptr; 3796 } 3797 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3798 goto tossit; 3799 ip6h = (ip6_t *)rptr; 3800 mp_len = msgdsize(mp); 3801 } 3802 /* 3803 * Find any potentially interesting extension headers 3804 * as well as the length of the IPv6 + extension 3805 * headers. 3806 */ 3807 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3808 UDPH_SIZE; 3809 ASSERT(nexthdrp == IPPROTO_UDP); 3810 } else { 3811 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3812 ip6i = NULL; 3813 } 3814 break; 3815 default: 3816 ASSERT(0); 3817 } 3818 3819 /* 3820 * IP inspected the UDP header thus all of it must be in the mblk. 3821 * UDP length check is performed for IPv6 packets and IPv4 packets 3822 * to check if the size of the packet as specified 3823 * by the header is the same as the physical size of the packet. 3824 * FIXME? Didn't IP already check this? 3825 */ 3826 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3827 if ((MBLKL(mp) < hdr_length) || 3828 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3829 goto tossit; 3830 } 3831 3832 3833 /* Walk past the headers unless UDP_RCVHDR was set. */ 3834 if (!udp_bits.udpb_rcvhdr) { 3835 mp->b_rptr = rptr + hdr_length; 3836 mp_len -= hdr_length; 3837 } 3838 3839 /* 3840 * This is the inbound data path. Packets are passed upstream as 3841 * T_UNITDATA_IND messages with full IP headers still attached. 3842 */ 3843 if (udp->udp_family == AF_INET) { 3844 sin_t *sin; 3845 3846 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3847 3848 /* 3849 * Normally only send up the source address. 3850 * If IP_RECVDSTADDR is set we include the destination IP 3851 * address as an option. With IP_RECVOPTS we include all 3852 * the IP options. 3853 */ 3854 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3855 if (udp_bits.udpb_recvdstaddr) { 3856 udi_size += sizeof (struct T_opthdr) + 3857 sizeof (struct in_addr); 3858 UDP_STAT(us, udp_in_recvdstaddr); 3859 } 3860 3861 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3862 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3863 udi_size += sizeof (struct T_opthdr) + 3864 sizeof (struct in_pktinfo); 3865 UDP_STAT(us, udp_ip_rcvpktinfo); 3866 } 3867 3868 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3869 udi_size += sizeof (struct T_opthdr) + opt_len; 3870 UDP_STAT(us, udp_in_recvopts); 3871 } 3872 3873 /* 3874 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3875 * space accordingly 3876 */ 3877 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3878 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3879 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3880 UDP_STAT(us, udp_in_recvif); 3881 } 3882 3883 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3884 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3885 udi_size += sizeof (struct T_opthdr) + 3886 sizeof (struct sockaddr_dl); 3887 UDP_STAT(us, udp_in_recvslla); 3888 } 3889 3890 if ((udp_bits.udpb_recvucred) && 3891 (cr = msg_getcred(mp, &cpid)) != NULL) { 3892 udi_size += sizeof (struct T_opthdr) + ucredsize; 3893 UDP_STAT(us, udp_in_recvucred); 3894 } 3895 3896 /* 3897 * If SO_TIMESTAMP is set allocate the appropriate sized 3898 * buffer. Since gethrestime() expects a pointer aligned 3899 * argument, we allocate space necessary for extra 3900 * alignment (even though it might not be used). 3901 */ 3902 if (udp_bits.udpb_timestamp) { 3903 udi_size += sizeof (struct T_opthdr) + 3904 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3905 UDP_STAT(us, udp_in_timestamp); 3906 } 3907 3908 /* 3909 * If IP_RECVTTL is set allocate the appropriate sized buffer 3910 */ 3911 if (udp_bits.udpb_recvttl) { 3912 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3913 UDP_STAT(us, udp_in_recvttl); 3914 } 3915 3916 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3917 mp1 = allocb(udi_size, BPRI_MED); 3918 if (mp1 == NULL) { 3919 freemsg(mp); 3920 if (options_mp != NULL) 3921 freeb(options_mp); 3922 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3923 return; 3924 } 3925 mp1->b_cont = mp; 3926 mp = mp1; 3927 mp->b_datap->db_type = M_PROTO; 3928 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3929 mp->b_wptr = (uchar_t *)tudi + udi_size; 3930 tudi->PRIM_type = T_UNITDATA_IND; 3931 tudi->SRC_length = sizeof (sin_t); 3932 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3933 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3934 sizeof (sin_t); 3935 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3936 tudi->OPT_length = udi_size; 3937 sin = (sin_t *)&tudi[1]; 3938 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3939 sin->sin_port = udpha->uha_src_port; 3940 sin->sin_family = udp->udp_family; 3941 *(uint32_t *)&sin->sin_zero[0] = 0; 3942 *(uint32_t *)&sin->sin_zero[4] = 0; 3943 3944 /* 3945 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3946 * IP_RECVTTL has been set. 3947 */ 3948 if (udi_size != 0) { 3949 /* 3950 * Copy in destination address before options to avoid 3951 * any padding issues. 3952 */ 3953 char *dstopt; 3954 3955 dstopt = (char *)&sin[1]; 3956 if (udp_bits.udpb_recvdstaddr) { 3957 struct T_opthdr *toh; 3958 ipaddr_t *dstptr; 3959 3960 toh = (struct T_opthdr *)dstopt; 3961 toh->level = IPPROTO_IP; 3962 toh->name = IP_RECVDSTADDR; 3963 toh->len = sizeof (struct T_opthdr) + 3964 sizeof (ipaddr_t); 3965 toh->status = 0; 3966 dstopt += sizeof (struct T_opthdr); 3967 dstptr = (ipaddr_t *)dstopt; 3968 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3969 dstopt += sizeof (ipaddr_t); 3970 udi_size -= toh->len; 3971 } 3972 3973 if (udp_bits.udpb_recvopts && opt_len > 0) { 3974 struct T_opthdr *toh; 3975 3976 toh = (struct T_opthdr *)dstopt; 3977 toh->level = IPPROTO_IP; 3978 toh->name = IP_RECVOPTS; 3979 toh->len = sizeof (struct T_opthdr) + opt_len; 3980 toh->status = 0; 3981 dstopt += sizeof (struct T_opthdr); 3982 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3983 opt_len); 3984 dstopt += opt_len; 3985 udi_size -= toh->len; 3986 } 3987 3988 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3989 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3990 struct T_opthdr *toh; 3991 struct in_pktinfo *pktinfop; 3992 3993 toh = (struct T_opthdr *)dstopt; 3994 toh->level = IPPROTO_IP; 3995 toh->name = IP_PKTINFO; 3996 toh->len = sizeof (struct T_opthdr) + 3997 sizeof (*pktinfop); 3998 toh->status = 0; 3999 dstopt += sizeof (struct T_opthdr); 4000 pktinfop = (struct in_pktinfo *)dstopt; 4001 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4002 pktinfop->ipi_spec_dst = 4003 pinfo->ip_pkt_match_addr; 4004 pktinfop->ipi_addr.s_addr = 4005 ((ipha_t *)rptr)->ipha_dst; 4006 4007 dstopt += sizeof (struct in_pktinfo); 4008 udi_size -= toh->len; 4009 } 4010 4011 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4012 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4013 4014 struct T_opthdr *toh; 4015 struct sockaddr_dl *dstptr; 4016 4017 toh = (struct T_opthdr *)dstopt; 4018 toh->level = IPPROTO_IP; 4019 toh->name = IP_RECVSLLA; 4020 toh->len = sizeof (struct T_opthdr) + 4021 sizeof (struct sockaddr_dl); 4022 toh->status = 0; 4023 dstopt += sizeof (struct T_opthdr); 4024 dstptr = (struct sockaddr_dl *)dstopt; 4025 bcopy(&pinfo->ip_pkt_slla, dstptr, 4026 sizeof (struct sockaddr_dl)); 4027 dstopt += sizeof (struct sockaddr_dl); 4028 udi_size -= toh->len; 4029 } 4030 4031 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4032 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4033 4034 struct T_opthdr *toh; 4035 uint_t *dstptr; 4036 4037 toh = (struct T_opthdr *)dstopt; 4038 toh->level = IPPROTO_IP; 4039 toh->name = IP_RECVIF; 4040 toh->len = sizeof (struct T_opthdr) + 4041 sizeof (uint_t); 4042 toh->status = 0; 4043 dstopt += sizeof (struct T_opthdr); 4044 dstptr = (uint_t *)dstopt; 4045 *dstptr = pinfo->ip_pkt_ifindex; 4046 dstopt += sizeof (uint_t); 4047 udi_size -= toh->len; 4048 } 4049 4050 if (cr != NULL) { 4051 struct T_opthdr *toh; 4052 4053 toh = (struct T_opthdr *)dstopt; 4054 toh->level = SOL_SOCKET; 4055 toh->name = SCM_UCRED; 4056 toh->len = sizeof (struct T_opthdr) + ucredsize; 4057 toh->status = 0; 4058 dstopt += sizeof (struct T_opthdr); 4059 (void) cred2ucred(cr, cpid, dstopt, rcr); 4060 dstopt += ucredsize; 4061 udi_size -= toh->len; 4062 } 4063 4064 if (udp_bits.udpb_timestamp) { 4065 struct T_opthdr *toh; 4066 4067 toh = (struct T_opthdr *)dstopt; 4068 toh->level = SOL_SOCKET; 4069 toh->name = SCM_TIMESTAMP; 4070 toh->len = sizeof (struct T_opthdr) + 4071 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4072 toh->status = 0; 4073 dstopt += sizeof (struct T_opthdr); 4074 /* Align for gethrestime() */ 4075 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4076 sizeof (intptr_t)); 4077 gethrestime((timestruc_t *)dstopt); 4078 dstopt = (char *)toh + toh->len; 4079 udi_size -= toh->len; 4080 } 4081 4082 /* 4083 * CAUTION: 4084 * Due to aligment issues 4085 * Processing of IP_RECVTTL option 4086 * should always be the last. Adding 4087 * any option processing after this will 4088 * cause alignment panic. 4089 */ 4090 if (udp_bits.udpb_recvttl) { 4091 struct T_opthdr *toh; 4092 uint8_t *dstptr; 4093 4094 toh = (struct T_opthdr *)dstopt; 4095 toh->level = IPPROTO_IP; 4096 toh->name = IP_RECVTTL; 4097 toh->len = sizeof (struct T_opthdr) + 4098 sizeof (uint8_t); 4099 toh->status = 0; 4100 dstopt += sizeof (struct T_opthdr); 4101 dstptr = (uint8_t *)dstopt; 4102 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4103 dstopt += sizeof (uint8_t); 4104 udi_size -= toh->len; 4105 } 4106 4107 /* Consumed all of allocated space */ 4108 ASSERT(udi_size == 0); 4109 } 4110 } else { 4111 sin6_t *sin6; 4112 4113 /* 4114 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4115 * 4116 * Normally we only send up the address. If receiving of any 4117 * optional receive side information is enabled, we also send 4118 * that up as options. 4119 */ 4120 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4121 4122 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4123 IPPF_RTHDR|IPPF_IFINDEX)) { 4124 if ((udp_bits.udpb_ipv6_recvhopopts) && 4125 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4126 size_t hlen; 4127 4128 UDP_STAT(us, udp_in_recvhopopts); 4129 hlen = copy_hop_opts(&ipp, NULL); 4130 if (hlen == 0) 4131 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4132 udi_size += hlen; 4133 } 4134 if (((udp_bits.udpb_ipv6_recvdstopts) || 4135 udp_bits.udpb_old_ipv6_recvdstopts) && 4136 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4137 udi_size += sizeof (struct T_opthdr) + 4138 ipp.ipp_dstoptslen; 4139 UDP_STAT(us, udp_in_recvdstopts); 4140 } 4141 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4142 udp_bits.udpb_ipv6_recvrthdr && 4143 (ipp.ipp_fields & IPPF_RTHDR)) || 4144 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4145 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4146 udi_size += sizeof (struct T_opthdr) + 4147 ipp.ipp_rtdstoptslen; 4148 UDP_STAT(us, udp_in_recvrtdstopts); 4149 } 4150 if ((udp_bits.udpb_ipv6_recvrthdr) && 4151 (ipp.ipp_fields & IPPF_RTHDR)) { 4152 udi_size += sizeof (struct T_opthdr) + 4153 ipp.ipp_rthdrlen; 4154 UDP_STAT(us, udp_in_recvrthdr); 4155 } 4156 if ((udp_bits.udpb_ip_recvpktinfo) && 4157 (ipp.ipp_fields & IPPF_IFINDEX)) { 4158 udi_size += sizeof (struct T_opthdr) + 4159 sizeof (struct in6_pktinfo); 4160 UDP_STAT(us, udp_in_recvpktinfo); 4161 } 4162 4163 } 4164 if ((udp_bits.udpb_recvucred) && 4165 (cr = msg_getcred(mp, &cpid)) != NULL) { 4166 udi_size += sizeof (struct T_opthdr) + ucredsize; 4167 UDP_STAT(us, udp_in_recvucred); 4168 } 4169 4170 /* 4171 * If SO_TIMESTAMP is set allocate the appropriate sized 4172 * buffer. Since gethrestime() expects a pointer aligned 4173 * argument, we allocate space necessary for extra 4174 * alignment (even though it might not be used). 4175 */ 4176 if (udp_bits.udpb_timestamp) { 4177 udi_size += sizeof (struct T_opthdr) + 4178 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4179 UDP_STAT(us, udp_in_timestamp); 4180 } 4181 4182 if (udp_bits.udpb_ipv6_recvhoplimit) { 4183 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4184 UDP_STAT(us, udp_in_recvhoplimit); 4185 } 4186 4187 if (udp_bits.udpb_ipv6_recvtclass) { 4188 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4189 UDP_STAT(us, udp_in_recvtclass); 4190 } 4191 4192 mp1 = allocb(udi_size, BPRI_MED); 4193 if (mp1 == NULL) { 4194 freemsg(mp); 4195 if (options_mp != NULL) 4196 freeb(options_mp); 4197 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4198 return; 4199 } 4200 mp1->b_cont = mp; 4201 mp = mp1; 4202 mp->b_datap->db_type = M_PROTO; 4203 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4204 mp->b_wptr = (uchar_t *)tudi + udi_size; 4205 tudi->PRIM_type = T_UNITDATA_IND; 4206 tudi->SRC_length = sizeof (sin6_t); 4207 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4208 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4209 sizeof (sin6_t); 4210 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4211 tudi->OPT_length = udi_size; 4212 sin6 = (sin6_t *)&tudi[1]; 4213 if (ipversion == IPV4_VERSION) { 4214 in6_addr_t v6dst; 4215 4216 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4217 &sin6->sin6_addr); 4218 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4219 &v6dst); 4220 sin6->sin6_flowinfo = 0; 4221 sin6->sin6_scope_id = 0; 4222 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4223 connp->conn_zoneid, us->us_netstack); 4224 } else { 4225 sin6->sin6_addr = ip6h->ip6_src; 4226 /* No sin6_flowinfo per API */ 4227 sin6->sin6_flowinfo = 0; 4228 /* For link-scope source pass up scope id */ 4229 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4230 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4231 sin6->sin6_scope_id = ipp.ipp_ifindex; 4232 else 4233 sin6->sin6_scope_id = 0; 4234 sin6->__sin6_src_id = ip_srcid_find_addr( 4235 &ip6h->ip6_dst, connp->conn_zoneid, 4236 us->us_netstack); 4237 } 4238 sin6->sin6_port = udpha->uha_src_port; 4239 sin6->sin6_family = udp->udp_family; 4240 4241 if (udi_size != 0) { 4242 uchar_t *dstopt; 4243 4244 dstopt = (uchar_t *)&sin6[1]; 4245 if ((udp_bits.udpb_ip_recvpktinfo) && 4246 (ipp.ipp_fields & IPPF_IFINDEX)) { 4247 struct T_opthdr *toh; 4248 struct in6_pktinfo *pkti; 4249 4250 toh = (struct T_opthdr *)dstopt; 4251 toh->level = IPPROTO_IPV6; 4252 toh->name = IPV6_PKTINFO; 4253 toh->len = sizeof (struct T_opthdr) + 4254 sizeof (*pkti); 4255 toh->status = 0; 4256 dstopt += sizeof (struct T_opthdr); 4257 pkti = (struct in6_pktinfo *)dstopt; 4258 if (ipversion == IPV6_VERSION) 4259 pkti->ipi6_addr = ip6h->ip6_dst; 4260 else 4261 IN6_IPADDR_TO_V4MAPPED( 4262 ((ipha_t *)rptr)->ipha_dst, 4263 &pkti->ipi6_addr); 4264 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4265 dstopt += sizeof (*pkti); 4266 udi_size -= toh->len; 4267 } 4268 if (udp_bits.udpb_ipv6_recvhoplimit) { 4269 struct T_opthdr *toh; 4270 4271 toh = (struct T_opthdr *)dstopt; 4272 toh->level = IPPROTO_IPV6; 4273 toh->name = IPV6_HOPLIMIT; 4274 toh->len = sizeof (struct T_opthdr) + 4275 sizeof (uint_t); 4276 toh->status = 0; 4277 dstopt += sizeof (struct T_opthdr); 4278 if (ipversion == IPV6_VERSION) 4279 *(uint_t *)dstopt = ip6h->ip6_hops; 4280 else 4281 *(uint_t *)dstopt = 4282 ((ipha_t *)rptr)->ipha_ttl; 4283 dstopt += sizeof (uint_t); 4284 udi_size -= toh->len; 4285 } 4286 if (udp_bits.udpb_ipv6_recvtclass) { 4287 struct T_opthdr *toh; 4288 4289 toh = (struct T_opthdr *)dstopt; 4290 toh->level = IPPROTO_IPV6; 4291 toh->name = IPV6_TCLASS; 4292 toh->len = sizeof (struct T_opthdr) + 4293 sizeof (uint_t); 4294 toh->status = 0; 4295 dstopt += sizeof (struct T_opthdr); 4296 if (ipversion == IPV6_VERSION) { 4297 *(uint_t *)dstopt = 4298 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4299 } else { 4300 ipha_t *ipha = (ipha_t *)rptr; 4301 *(uint_t *)dstopt = 4302 ipha->ipha_type_of_service; 4303 } 4304 dstopt += sizeof (uint_t); 4305 udi_size -= toh->len; 4306 } 4307 if ((udp_bits.udpb_ipv6_recvhopopts) && 4308 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4309 size_t hlen; 4310 4311 hlen = copy_hop_opts(&ipp, dstopt); 4312 dstopt += hlen; 4313 udi_size -= hlen; 4314 } 4315 if ((udp_bits.udpb_ipv6_recvdstopts) && 4316 (udp_bits.udpb_ipv6_recvrthdr) && 4317 (ipp.ipp_fields & IPPF_RTHDR) && 4318 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4319 struct T_opthdr *toh; 4320 4321 toh = (struct T_opthdr *)dstopt; 4322 toh->level = IPPROTO_IPV6; 4323 toh->name = IPV6_DSTOPTS; 4324 toh->len = sizeof (struct T_opthdr) + 4325 ipp.ipp_rtdstoptslen; 4326 toh->status = 0; 4327 dstopt += sizeof (struct T_opthdr); 4328 bcopy(ipp.ipp_rtdstopts, dstopt, 4329 ipp.ipp_rtdstoptslen); 4330 dstopt += ipp.ipp_rtdstoptslen; 4331 udi_size -= toh->len; 4332 } 4333 if ((udp_bits.udpb_ipv6_recvrthdr) && 4334 (ipp.ipp_fields & IPPF_RTHDR)) { 4335 struct T_opthdr *toh; 4336 4337 toh = (struct T_opthdr *)dstopt; 4338 toh->level = IPPROTO_IPV6; 4339 toh->name = IPV6_RTHDR; 4340 toh->len = sizeof (struct T_opthdr) + 4341 ipp.ipp_rthdrlen; 4342 toh->status = 0; 4343 dstopt += sizeof (struct T_opthdr); 4344 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4345 dstopt += ipp.ipp_rthdrlen; 4346 udi_size -= toh->len; 4347 } 4348 if ((udp_bits.udpb_ipv6_recvdstopts) && 4349 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4350 struct T_opthdr *toh; 4351 4352 toh = (struct T_opthdr *)dstopt; 4353 toh->level = IPPROTO_IPV6; 4354 toh->name = IPV6_DSTOPTS; 4355 toh->len = sizeof (struct T_opthdr) + 4356 ipp.ipp_dstoptslen; 4357 toh->status = 0; 4358 dstopt += sizeof (struct T_opthdr); 4359 bcopy(ipp.ipp_dstopts, dstopt, 4360 ipp.ipp_dstoptslen); 4361 dstopt += ipp.ipp_dstoptslen; 4362 udi_size -= toh->len; 4363 } 4364 if (cr != NULL) { 4365 struct T_opthdr *toh; 4366 4367 toh = (struct T_opthdr *)dstopt; 4368 toh->level = SOL_SOCKET; 4369 toh->name = SCM_UCRED; 4370 toh->len = sizeof (struct T_opthdr) + ucredsize; 4371 toh->status = 0; 4372 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4373 dstopt += toh->len; 4374 udi_size -= toh->len; 4375 } 4376 if (udp_bits.udpb_timestamp) { 4377 struct T_opthdr *toh; 4378 4379 toh = (struct T_opthdr *)dstopt; 4380 toh->level = SOL_SOCKET; 4381 toh->name = SCM_TIMESTAMP; 4382 toh->len = sizeof (struct T_opthdr) + 4383 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4384 toh->status = 0; 4385 dstopt += sizeof (struct T_opthdr); 4386 /* Align for gethrestime() */ 4387 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4388 sizeof (intptr_t)); 4389 gethrestime((timestruc_t *)dstopt); 4390 dstopt = (uchar_t *)toh + toh->len; 4391 udi_size -= toh->len; 4392 } 4393 4394 /* Consumed all of allocated space */ 4395 ASSERT(udi_size == 0); 4396 } 4397 #undef sin6 4398 /* No IP_RECVDSTADDR for IPv6. */ 4399 } 4400 4401 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4402 if (options_mp != NULL) 4403 freeb(options_mp); 4404 4405 udp_ulp_recv(connp, mp); 4406 4407 return; 4408 4409 tossit: 4410 freemsg(mp); 4411 if (options_mp != NULL) 4412 freeb(options_mp); 4413 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4414 } 4415 4416 /* 4417 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4418 * information that can be changing beneath us. 4419 */ 4420 mblk_t * 4421 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4422 { 4423 mblk_t *mpdata; 4424 mblk_t *mp_conn_ctl; 4425 mblk_t *mp_attr_ctl; 4426 mblk_t *mp6_conn_ctl; 4427 mblk_t *mp6_attr_ctl; 4428 mblk_t *mp_conn_tail; 4429 mblk_t *mp_attr_tail; 4430 mblk_t *mp6_conn_tail; 4431 mblk_t *mp6_attr_tail; 4432 struct opthdr *optp; 4433 mib2_udpEntry_t ude; 4434 mib2_udp6Entry_t ude6; 4435 mib2_transportMLPEntry_t mlp; 4436 int state; 4437 zoneid_t zoneid; 4438 int i; 4439 connf_t *connfp; 4440 conn_t *connp = Q_TO_CONN(q); 4441 int v4_conn_idx; 4442 int v6_conn_idx; 4443 boolean_t needattr; 4444 udp_t *udp; 4445 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4446 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4447 mblk_t *mp2ctl; 4448 4449 /* 4450 * make a copy of the original message 4451 */ 4452 mp2ctl = copymsg(mpctl); 4453 4454 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4455 if (mpctl == NULL || 4456 (mpdata = mpctl->b_cont) == NULL || 4457 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4458 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4459 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4460 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4461 freemsg(mp_conn_ctl); 4462 freemsg(mp_attr_ctl); 4463 freemsg(mp6_conn_ctl); 4464 freemsg(mpctl); 4465 freemsg(mp2ctl); 4466 return (0); 4467 } 4468 4469 zoneid = connp->conn_zoneid; 4470 4471 /* fixed length structure for IPv4 and IPv6 counters */ 4472 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4473 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4474 /* synchronize 64- and 32-bit counters */ 4475 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4476 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4477 4478 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4479 optp->level = MIB2_UDP; 4480 optp->name = 0; 4481 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4482 sizeof (us->us_udp_mib)); 4483 optp->len = msgdsize(mpdata); 4484 qreply(q, mpctl); 4485 4486 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4487 v4_conn_idx = v6_conn_idx = 0; 4488 4489 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4490 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4491 connp = NULL; 4492 4493 while ((connp = ipcl_get_next_conn(connfp, connp, 4494 IPCL_UDPCONN))) { 4495 udp = connp->conn_udp; 4496 if (zoneid != connp->conn_zoneid) 4497 continue; 4498 4499 /* 4500 * Note that the port numbers are sent in 4501 * host byte order 4502 */ 4503 4504 if (udp->udp_state == TS_UNBND) 4505 state = MIB2_UDP_unbound; 4506 else if (udp->udp_state == TS_IDLE) 4507 state = MIB2_UDP_idle; 4508 else if (udp->udp_state == TS_DATA_XFER) 4509 state = MIB2_UDP_connected; 4510 else 4511 state = MIB2_UDP_unknown; 4512 4513 needattr = B_FALSE; 4514 bzero(&mlp, sizeof (mlp)); 4515 if (connp->conn_mlp_type != mlptSingle) { 4516 if (connp->conn_mlp_type == mlptShared || 4517 connp->conn_mlp_type == mlptBoth) 4518 mlp.tme_flags |= MIB2_TMEF_SHARED; 4519 if (connp->conn_mlp_type == mlptPrivate || 4520 connp->conn_mlp_type == mlptBoth) 4521 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4522 needattr = B_TRUE; 4523 } 4524 4525 /* 4526 * Create an IPv4 table entry for IPv4 entries and also 4527 * any IPv6 entries which are bound to in6addr_any 4528 * (i.e. anything a IPv4 peer could connect/send to). 4529 */ 4530 if (udp->udp_ipversion == IPV4_VERSION || 4531 (udp->udp_state <= TS_IDLE && 4532 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4533 ude.udpEntryInfo.ue_state = state; 4534 /* 4535 * If in6addr_any this will set it to 4536 * INADDR_ANY 4537 */ 4538 ude.udpLocalAddress = 4539 V4_PART_OF_V6(udp->udp_v6src); 4540 ude.udpLocalPort = ntohs(udp->udp_port); 4541 if (udp->udp_state == TS_DATA_XFER) { 4542 /* 4543 * Can potentially get here for 4544 * v6 socket if another process 4545 * (say, ping) has just done a 4546 * sendto(), changing the state 4547 * from the TS_IDLE above to 4548 * TS_DATA_XFER by the time we hit 4549 * this part of the code. 4550 */ 4551 ude.udpEntryInfo.ue_RemoteAddress = 4552 V4_PART_OF_V6(udp->udp_v6dst); 4553 ude.udpEntryInfo.ue_RemotePort = 4554 ntohs(udp->udp_dstport); 4555 } else { 4556 ude.udpEntryInfo.ue_RemoteAddress = 0; 4557 ude.udpEntryInfo.ue_RemotePort = 0; 4558 } 4559 4560 /* 4561 * We make the assumption that all udp_t 4562 * structs will be created within an address 4563 * region no larger than 32-bits. 4564 */ 4565 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4566 ude.udpCreationProcess = 4567 (udp->udp_open_pid < 0) ? 4568 MIB2_UNKNOWN_PROCESS : 4569 udp->udp_open_pid; 4570 ude.udpCreationTime = udp->udp_open_time; 4571 4572 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4573 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4574 mlp.tme_connidx = v4_conn_idx++; 4575 if (needattr) 4576 (void) snmp_append_data2( 4577 mp_attr_ctl->b_cont, &mp_attr_tail, 4578 (char *)&mlp, sizeof (mlp)); 4579 } 4580 if (udp->udp_ipversion == IPV6_VERSION) { 4581 ude6.udp6EntryInfo.ue_state = state; 4582 ude6.udp6LocalAddress = udp->udp_v6src; 4583 ude6.udp6LocalPort = ntohs(udp->udp_port); 4584 ude6.udp6IfIndex = udp->udp_bound_if; 4585 if (udp->udp_state == TS_DATA_XFER) { 4586 ude6.udp6EntryInfo.ue_RemoteAddress = 4587 udp->udp_v6dst; 4588 ude6.udp6EntryInfo.ue_RemotePort = 4589 ntohs(udp->udp_dstport); 4590 } else { 4591 ude6.udp6EntryInfo.ue_RemoteAddress = 4592 sin6_null.sin6_addr; 4593 ude6.udp6EntryInfo.ue_RemotePort = 0; 4594 } 4595 /* 4596 * We make the assumption that all udp_t 4597 * structs will be created within an address 4598 * region no larger than 32-bits. 4599 */ 4600 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4601 ude6.udp6CreationProcess = 4602 (udp->udp_open_pid < 0) ? 4603 MIB2_UNKNOWN_PROCESS : 4604 udp->udp_open_pid; 4605 ude6.udp6CreationTime = udp->udp_open_time; 4606 4607 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4608 &mp6_conn_tail, (char *)&ude6, 4609 sizeof (ude6)); 4610 mlp.tme_connidx = v6_conn_idx++; 4611 if (needattr) 4612 (void) snmp_append_data2( 4613 mp6_attr_ctl->b_cont, 4614 &mp6_attr_tail, (char *)&mlp, 4615 sizeof (mlp)); 4616 } 4617 } 4618 } 4619 4620 /* IPv4 UDP endpoints */ 4621 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4622 sizeof (struct T_optmgmt_ack)]; 4623 optp->level = MIB2_UDP; 4624 optp->name = MIB2_UDP_ENTRY; 4625 optp->len = msgdsize(mp_conn_ctl->b_cont); 4626 qreply(q, mp_conn_ctl); 4627 4628 /* table of MLP attributes... */ 4629 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4630 sizeof (struct T_optmgmt_ack)]; 4631 optp->level = MIB2_UDP; 4632 optp->name = EXPER_XPORT_MLP; 4633 optp->len = msgdsize(mp_attr_ctl->b_cont); 4634 if (optp->len == 0) 4635 freemsg(mp_attr_ctl); 4636 else 4637 qreply(q, mp_attr_ctl); 4638 4639 /* IPv6 UDP endpoints */ 4640 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4641 sizeof (struct T_optmgmt_ack)]; 4642 optp->level = MIB2_UDP6; 4643 optp->name = MIB2_UDP6_ENTRY; 4644 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4645 qreply(q, mp6_conn_ctl); 4646 4647 /* table of MLP attributes... */ 4648 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4649 sizeof (struct T_optmgmt_ack)]; 4650 optp->level = MIB2_UDP6; 4651 optp->name = EXPER_XPORT_MLP; 4652 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4653 if (optp->len == 0) 4654 freemsg(mp6_attr_ctl); 4655 else 4656 qreply(q, mp6_attr_ctl); 4657 4658 return (mp2ctl); 4659 } 4660 4661 /* 4662 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4663 * NOTE: Per MIB-II, UDP has no writable data. 4664 * TODO: If this ever actually tries to set anything, it needs to be 4665 * to do the appropriate locking. 4666 */ 4667 /* ARGSUSED */ 4668 int 4669 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4670 uchar_t *ptr, int len) 4671 { 4672 switch (level) { 4673 case MIB2_UDP: 4674 return (0); 4675 default: 4676 return (1); 4677 } 4678 } 4679 4680 /* 4681 * This routine creates a T_UDERROR_IND message and passes it upstream. 4682 * The address and options are copied from the T_UNITDATA_REQ message 4683 * passed in mp. This message is freed. 4684 */ 4685 static void 4686 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4687 t_scalar_t err) 4688 { 4689 struct T_unitdata_req *tudr; 4690 mblk_t *mp1; 4691 uchar_t *optaddr; 4692 t_scalar_t optlen; 4693 4694 if (DB_TYPE(mp) == M_DATA) { 4695 ASSERT(destaddr != NULL && destlen != 0); 4696 optaddr = NULL; 4697 optlen = 0; 4698 } else { 4699 if ((mp->b_wptr < mp->b_rptr) || 4700 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4701 goto done; 4702 } 4703 tudr = (struct T_unitdata_req *)mp->b_rptr; 4704 destaddr = mp->b_rptr + tudr->DEST_offset; 4705 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4706 destaddr + tudr->DEST_length < mp->b_rptr || 4707 destaddr + tudr->DEST_length > mp->b_wptr) { 4708 goto done; 4709 } 4710 optaddr = mp->b_rptr + tudr->OPT_offset; 4711 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4712 optaddr + tudr->OPT_length < mp->b_rptr || 4713 optaddr + tudr->OPT_length > mp->b_wptr) { 4714 goto done; 4715 } 4716 destlen = tudr->DEST_length; 4717 optlen = tudr->OPT_length; 4718 } 4719 4720 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4721 (char *)optaddr, optlen, err); 4722 if (mp1 != NULL) 4723 qreply(q, mp1); 4724 4725 done: 4726 freemsg(mp); 4727 } 4728 4729 /* 4730 * This routine removes a port number association from a stream. It 4731 * is called by udp_wput to handle T_UNBIND_REQ messages. 4732 */ 4733 static void 4734 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4735 { 4736 conn_t *connp = Q_TO_CONN(q); 4737 int error; 4738 4739 error = udp_do_unbind(connp); 4740 if (error) { 4741 if (error < 0) 4742 udp_err_ack(q, mp, -error, 0); 4743 else 4744 udp_err_ack(q, mp, TSYSERR, error); 4745 return; 4746 } 4747 4748 mp = mi_tpi_ok_ack_alloc(mp); 4749 ASSERT(mp != NULL); 4750 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4751 qreply(q, mp); 4752 } 4753 4754 /* 4755 * Don't let port fall into the privileged range. 4756 * Since the extra privileged ports can be arbitrary we also 4757 * ensure that we exclude those from consideration. 4758 * us->us_epriv_ports is not sorted thus we loop over it until 4759 * there are no changes. 4760 */ 4761 static in_port_t 4762 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4763 { 4764 int i; 4765 in_port_t nextport; 4766 boolean_t restart = B_FALSE; 4767 udp_stack_t *us = udp->udp_us; 4768 4769 if (random && udp_random_anon_port != 0) { 4770 (void) random_get_pseudo_bytes((uint8_t *)&port, 4771 sizeof (in_port_t)); 4772 /* 4773 * Unless changed by a sys admin, the smallest anon port 4774 * is 32768 and the largest anon port is 65535. It is 4775 * very likely (50%) for the random port to be smaller 4776 * than the smallest anon port. When that happens, 4777 * add port % (anon port range) to the smallest anon 4778 * port to get the random port. It should fall into the 4779 * valid anon port range. 4780 */ 4781 if (port < us->us_smallest_anon_port) { 4782 port = us->us_smallest_anon_port + 4783 port % (us->us_largest_anon_port - 4784 us->us_smallest_anon_port); 4785 } 4786 } 4787 4788 retry: 4789 if (port < us->us_smallest_anon_port) 4790 port = us->us_smallest_anon_port; 4791 4792 if (port > us->us_largest_anon_port) { 4793 port = us->us_smallest_anon_port; 4794 if (restart) 4795 return (0); 4796 restart = B_TRUE; 4797 } 4798 4799 if (port < us->us_smallest_nonpriv_port) 4800 port = us->us_smallest_nonpriv_port; 4801 4802 for (i = 0; i < us->us_num_epriv_ports; i++) { 4803 if (port == us->us_epriv_ports[i]) { 4804 port++; 4805 /* 4806 * Make sure that the port is in the 4807 * valid range. 4808 */ 4809 goto retry; 4810 } 4811 } 4812 4813 if (is_system_labeled() && 4814 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4815 port, IPPROTO_UDP, B_TRUE)) != 0) { 4816 port = nextport; 4817 goto retry; 4818 } 4819 4820 return (port); 4821 } 4822 4823 static int 4824 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 4825 boolean_t *update_lastdst) 4826 { 4827 int err; 4828 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4829 udp_t *udp = Q_TO_UDP(wq); 4830 udp_stack_t *us = udp->udp_us; 4831 cred_t *cr; 4832 4833 /* 4834 * All Solaris components should pass a db_credp 4835 * for this message, hence we ASSERT. 4836 * On production kernels we return an error to be robust against 4837 * random streams modules sitting on top of us. 4838 */ 4839 cr = msg_getcred(mp, NULL); 4840 ASSERT(cr != NULL); 4841 if (cr == NULL) 4842 return (EINVAL); 4843 4844 /* Note that we use the cred/label from the message to handle MLP */ 4845 err = tsol_compute_label(cr, dst, 4846 opt_storage, udp->udp_connp->conn_mac_exempt, 4847 us->us_netstack->netstack_ip); 4848 if (err == 0) { 4849 err = tsol_update_options(&udp->udp_ip_snd_options, 4850 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4851 opt_storage); 4852 } 4853 if (err != 0) { 4854 DTRACE_PROBE4( 4855 tx__ip__log__info__updatelabel__udp, 4856 char *, "queue(1) failed to update options(2) on mp(3)", 4857 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4858 } else { 4859 *update_lastdst = B_TRUE; 4860 } 4861 return (err); 4862 } 4863 4864 static mblk_t * 4865 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4866 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4867 cred_t *cr, pid_t pid) 4868 { 4869 udp_t *udp = connp->conn_udp; 4870 mblk_t *mp1 = mp; 4871 mblk_t *mp2; 4872 ipha_t *ipha; 4873 int ip_hdr_length; 4874 uint32_t ip_len; 4875 udpha_t *udpha; 4876 boolean_t lock_held = B_FALSE; 4877 in_port_t uha_src_port; 4878 udpattrs_t attrs; 4879 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4880 uint32_t ip_snd_opt_len = 0; 4881 ip4_pkt_t pktinfo; 4882 ip4_pkt_t *pktinfop = &pktinfo; 4883 ip_opt_info_t optinfo; 4884 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4885 udp_stack_t *us = udp->udp_us; 4886 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4887 queue_t *q = connp->conn_wq; 4888 ire_t *ire; 4889 in6_addr_t v6dst; 4890 boolean_t update_lastdst = B_FALSE; 4891 4892 *error = 0; 4893 pktinfop->ip4_ill_index = 0; 4894 pktinfop->ip4_addr = INADDR_ANY; 4895 optinfo.ip_opt_flags = 0; 4896 optinfo.ip_opt_ill_index = 0; 4897 4898 if (v4dst == INADDR_ANY) 4899 v4dst = htonl(INADDR_LOOPBACK); 4900 4901 /* 4902 * If options passed in, feed it for verification and handling 4903 */ 4904 attrs.udpattr_credset = B_FALSE; 4905 if (IPCL_IS_NONSTR(connp)) { 4906 if (msg->msg_controllen != 0) { 4907 attrs.udpattr_ipp4 = pktinfop; 4908 attrs.udpattr_mb = mp; 4909 4910 rw_enter(&udp->udp_rwlock, RW_WRITER); 4911 *error = process_auxiliary_options(connp, 4912 msg->msg_control, msg->msg_controllen, 4913 &attrs, &udp_opt_obj, udp_opt_set, cr); 4914 rw_exit(&udp->udp_rwlock); 4915 if (*error) 4916 goto done; 4917 } 4918 } else { 4919 if (DB_TYPE(mp) != M_DATA) { 4920 mp1 = mp->b_cont; 4921 if (((struct T_unitdata_req *) 4922 mp->b_rptr)->OPT_length != 0) { 4923 attrs.udpattr_ipp4 = pktinfop; 4924 attrs.udpattr_mb = mp; 4925 if (udp_unitdata_opt_process(q, mp, error, 4926 &attrs) < 0) 4927 goto done; 4928 /* 4929 * Note: success in processing options. 4930 * mp option buffer represented by 4931 * OPT_length/offset now potentially modified 4932 * and contain option setting results 4933 */ 4934 ASSERT(*error == 0); 4935 } 4936 } 4937 } 4938 4939 /* mp1 points to the M_DATA mblk carrying the packet */ 4940 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4941 4942 /* 4943 * Determine whether we need to mark the mblk with the user's 4944 * credentials. 4945 * If labeled then sockfs would have already done this. 4946 */ 4947 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 4948 4949 ire = connp->conn_ire_cache; 4950 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 4951 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 4952 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 4953 mblk_setcred(mp, cr, pid); 4954 } 4955 4956 rw_enter(&udp->udp_rwlock, RW_READER); 4957 lock_held = B_TRUE; 4958 4959 /* 4960 * Cluster and TSOL note: 4961 * udp.udp_v6lastdst is shared by Cluster and TSOL 4962 * udp.udp_lastdstport is used by Cluster 4963 * 4964 * Both Cluster and TSOL need to update the dest addr and/or port. 4965 * Updating is done after both Cluster and TSOL checks, protected 4966 * by conn_lock. 4967 */ 4968 mutex_enter(&connp->conn_lock); 4969 4970 if (cl_inet_connect2 != NULL && 4971 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4972 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4973 udp->udp_lastdstport != port)) { 4974 mutex_exit(&connp->conn_lock); 4975 *error = 0; 4976 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 4977 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 4978 if (*error != 0) { 4979 *error = EHOSTUNREACH; 4980 goto done; 4981 } 4982 update_lastdst = B_TRUE; 4983 mutex_enter(&connp->conn_lock); 4984 } 4985 4986 /* 4987 * Check if our saved options are valid; update if not. 4988 * TSOL Note: Since we are not in WRITER mode, UDP packets 4989 * to different destination may require different labels, 4990 * or worse, UDP packets to same IP address may require 4991 * different labels due to use of shared all-zones address. 4992 * We use conn_lock to ensure that lastdst, ip_snd_options, 4993 * and ip_snd_options_len are consistent for the current 4994 * destination and are updated atomically. 4995 */ 4996 if (is_system_labeled()) { 4997 /* Using UDP MLP requires SCM_UCRED from user */ 4998 if (connp->conn_mlp_type != mlptSingle && 4999 !attrs.udpattr_credset) { 5000 mutex_exit(&connp->conn_lock); 5001 DTRACE_PROBE4( 5002 tx__ip__log__info__output__udp, 5003 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5004 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5005 *error = ECONNREFUSED; 5006 goto done; 5007 } 5008 /* 5009 * update label option for this UDP socket if 5010 * - the destination has changed, or 5011 * - the UDP socket is MLP 5012 */ 5013 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5014 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5015 connp->conn_mlp_type != mlptSingle) && 5016 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5017 != 0) { 5018 mutex_exit(&connp->conn_lock); 5019 goto done; 5020 } 5021 } 5022 if (update_lastdst) { 5023 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5024 udp->udp_lastdstport = port; 5025 } 5026 if (udp->udp_ip_snd_options_len > 0) { 5027 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5028 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5029 } 5030 mutex_exit(&connp->conn_lock); 5031 5032 /* Add an IP header */ 5033 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5034 (insert_spi ? sizeof (uint32_t) : 0); 5035 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5036 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5037 !OK_32PTR(ipha)) { 5038 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5039 if (mp2 == NULL) { 5040 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5041 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5042 *error = ENOMEM; 5043 goto done; 5044 } 5045 mp2->b_wptr = DB_LIM(mp2); 5046 mp2->b_cont = mp1; 5047 mp1 = mp2; 5048 if (DB_TYPE(mp) != M_DATA) 5049 mp->b_cont = mp1; 5050 else 5051 mp = mp1; 5052 5053 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5054 } 5055 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5056 #ifdef _BIG_ENDIAN 5057 /* Set version, header length, and tos */ 5058 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5059 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5060 udp->udp_type_of_service); 5061 /* Set ttl and protocol */ 5062 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5063 #else 5064 /* Set version, header length, and tos */ 5065 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5066 ((udp->udp_type_of_service << 8) | 5067 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5068 /* Set ttl and protocol */ 5069 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5070 #endif 5071 if (pktinfop->ip4_addr != INADDR_ANY) { 5072 ipha->ipha_src = pktinfop->ip4_addr; 5073 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5074 } else { 5075 /* 5076 * Copy our address into the packet. If this is zero, 5077 * first look at __sin6_src_id for a hint. If we leave the 5078 * source as INADDR_ANY then ip will fill in the real source 5079 * address. 5080 */ 5081 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5082 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5083 in6_addr_t v6src; 5084 5085 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5086 us->us_netstack); 5087 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5088 } 5089 } 5090 uha_src_port = udp->udp_port; 5091 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5092 rw_exit(&udp->udp_rwlock); 5093 lock_held = B_FALSE; 5094 } 5095 5096 if (pktinfop->ip4_ill_index != 0) { 5097 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5098 } 5099 5100 ipha->ipha_fragment_offset_and_flags = 0; 5101 ipha->ipha_ident = 0; 5102 5103 mp1->b_rptr = (uchar_t *)ipha; 5104 5105 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5106 (uintptr_t)UINT_MAX); 5107 5108 /* Determine length of packet */ 5109 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5110 if ((mp2 = mp1->b_cont) != NULL) { 5111 do { 5112 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5113 ip_len += (uint32_t)MBLKL(mp2); 5114 } while ((mp2 = mp2->b_cont) != NULL); 5115 } 5116 /* 5117 * If the size of the packet is greater than the maximum allowed by 5118 * ip, return an error. Passing this down could cause panics because 5119 * the size will have wrapped and be inconsistent with the msg size. 5120 */ 5121 if (ip_len > IP_MAXPACKET) { 5122 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5123 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5124 *error = EMSGSIZE; 5125 goto done; 5126 } 5127 ipha->ipha_length = htons((uint16_t)ip_len); 5128 ip_len -= ip_hdr_length; 5129 ip_len = htons((uint16_t)ip_len); 5130 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5131 5132 /* Insert all-0s SPI now. */ 5133 if (insert_spi) 5134 *((uint32_t *)(udpha + 1)) = 0; 5135 5136 /* 5137 * Copy in the destination address 5138 */ 5139 ipha->ipha_dst = v4dst; 5140 5141 /* 5142 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5143 */ 5144 if (CLASSD(v4dst)) 5145 ipha->ipha_ttl = udp->udp_multicast_ttl; 5146 5147 udpha->uha_dst_port = port; 5148 udpha->uha_src_port = uha_src_port; 5149 5150 if (ip_snd_opt_len > 0) { 5151 uint32_t cksum; 5152 5153 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5154 lock_held = B_FALSE; 5155 rw_exit(&udp->udp_rwlock); 5156 /* 5157 * Massage source route putting first source route in ipha_dst. 5158 * Ignore the destination in T_unitdata_req. 5159 * Create a checksum adjustment for a source route, if any. 5160 */ 5161 cksum = ip_massage_options(ipha, us->us_netstack); 5162 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5163 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5164 (ipha->ipha_dst & 0xFFFF); 5165 if ((int)cksum < 0) 5166 cksum--; 5167 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5168 /* 5169 * IP does the checksum if uha_checksum is non-zero, 5170 * We make it easy for IP to include our pseudo header 5171 * by putting our length in uha_checksum. 5172 */ 5173 cksum += ip_len; 5174 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5175 /* There might be a carry. */ 5176 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5177 #ifdef _LITTLE_ENDIAN 5178 if (us->us_do_checksum) 5179 ip_len = (cksum << 16) | ip_len; 5180 #else 5181 if (us->us_do_checksum) 5182 ip_len = (ip_len << 16) | cksum; 5183 else 5184 ip_len <<= 16; 5185 #endif 5186 } else { 5187 /* 5188 * IP does the checksum if uha_checksum is non-zero, 5189 * We make it easy for IP to include our pseudo header 5190 * by putting our length in uha_checksum. 5191 */ 5192 if (us->us_do_checksum) 5193 ip_len |= (ip_len << 16); 5194 #ifndef _LITTLE_ENDIAN 5195 else 5196 ip_len <<= 16; 5197 #endif 5198 } 5199 ASSERT(!lock_held); 5200 /* Set UDP length and checksum */ 5201 *((uint32_t *)&udpha->uha_length) = ip_len; 5202 5203 if (DB_TYPE(mp) != M_DATA) { 5204 cred_t *cr; 5205 pid_t cpid; 5206 5207 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5208 cr = msg_extractcred(mp, &cpid); 5209 if (cr != NULL) { 5210 if (mp1->b_datap->db_credp != NULL) 5211 crfree(mp1->b_datap->db_credp); 5212 mp1->b_datap->db_credp = cr; 5213 mp1->b_datap->db_cpid = cpid; 5214 } 5215 ASSERT(mp != mp1); 5216 freeb(mp); 5217 } 5218 5219 /* mp has been consumed and we'll return success */ 5220 ASSERT(*error == 0); 5221 mp = NULL; 5222 5223 /* We're done. Pass the packet to ip. */ 5224 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5225 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5226 "udp_wput_end: q %p (%S)", q, "end"); 5227 5228 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5229 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5230 connp->conn_dontroute || 5231 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5232 optinfo.ip_opt_ill_index != 0 || 5233 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5234 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5235 ipst->ips_ip_g_mrouter != NULL) { 5236 UDP_STAT(us, udp_ip_send); 5237 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5238 &optinfo); 5239 } else { 5240 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5241 } 5242 5243 done: 5244 if (lock_held) 5245 rw_exit(&udp->udp_rwlock); 5246 if (*error != 0) { 5247 ASSERT(mp != NULL); 5248 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5249 } 5250 return (mp); 5251 } 5252 5253 static void 5254 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5255 { 5256 conn_t *connp = udp->udp_connp; 5257 ipaddr_t src, dst; 5258 ire_t *ire; 5259 ipif_t *ipif = NULL; 5260 mblk_t *ire_fp_mp; 5261 boolean_t retry_caching; 5262 udp_stack_t *us = udp->udp_us; 5263 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5264 5265 dst = ipha->ipha_dst; 5266 src = ipha->ipha_src; 5267 ASSERT(ipha->ipha_ident == 0); 5268 5269 if (CLASSD(dst)) { 5270 int err; 5271 5272 ipif = conn_get_held_ipif(connp, 5273 &connp->conn_multicast_ipif, &err); 5274 5275 if (ipif == NULL || ipif->ipif_isv6 || 5276 (ipif->ipif_ill->ill_phyint->phyint_flags & 5277 PHYI_LOOPBACK)) { 5278 if (ipif != NULL) 5279 ipif_refrele(ipif); 5280 UDP_STAT(us, udp_ip_send); 5281 ip_output(connp, mp, q, IP_WPUT); 5282 return; 5283 } 5284 } 5285 5286 retry_caching = B_FALSE; 5287 mutex_enter(&connp->conn_lock); 5288 ire = connp->conn_ire_cache; 5289 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5290 5291 if (ire == NULL || ire->ire_addr != dst || 5292 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5293 retry_caching = B_TRUE; 5294 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5295 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5296 5297 ASSERT(ipif != NULL); 5298 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5299 retry_caching = B_TRUE; 5300 } 5301 5302 if (!retry_caching) { 5303 ASSERT(ire != NULL); 5304 IRE_REFHOLD(ire); 5305 mutex_exit(&connp->conn_lock); 5306 } else { 5307 boolean_t cached = B_FALSE; 5308 5309 connp->conn_ire_cache = NULL; 5310 mutex_exit(&connp->conn_lock); 5311 5312 /* Release the old ire */ 5313 if (ire != NULL) { 5314 IRE_REFRELE_NOTR(ire); 5315 ire = NULL; 5316 } 5317 5318 if (CLASSD(dst)) { 5319 ASSERT(ipif != NULL); 5320 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5321 connp->conn_zoneid, msg_getlabel(mp), 5322 MATCH_IRE_ILL, ipst); 5323 } else { 5324 ASSERT(ipif == NULL); 5325 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5326 msg_getlabel(mp), ipst); 5327 } 5328 5329 if (ire == NULL) { 5330 if (ipif != NULL) 5331 ipif_refrele(ipif); 5332 UDP_STAT(us, udp_ire_null); 5333 ip_output(connp, mp, q, IP_WPUT); 5334 return; 5335 } 5336 IRE_REFHOLD_NOTR(ire); 5337 5338 mutex_enter(&connp->conn_lock); 5339 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5340 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5341 irb_t *irb = ire->ire_bucket; 5342 5343 /* 5344 * IRE's created for non-connection oriented transports 5345 * are normally initialized with IRE_MARK_TEMPORARY set 5346 * in the ire_marks. These IRE's are preferentially 5347 * reaped when the hash chain length in the cache 5348 * bucket exceeds the maximum value specified in 5349 * ip[6]_ire_max_bucket_cnt. This can severely affect 5350 * UDP performance if IRE cache entries that we need 5351 * to reuse are continually removed. To remedy this, 5352 * when we cache the IRE in the conn_t, we remove the 5353 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5354 * set. 5355 */ 5356 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5357 rw_enter(&irb->irb_lock, RW_WRITER); 5358 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5359 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5360 irb->irb_tmp_ire_cnt--; 5361 } 5362 rw_exit(&irb->irb_lock); 5363 } 5364 connp->conn_ire_cache = ire; 5365 cached = B_TRUE; 5366 } 5367 mutex_exit(&connp->conn_lock); 5368 5369 /* 5370 * We can continue to use the ire but since it was not 5371 * cached, we should drop the extra reference. 5372 */ 5373 if (!cached) 5374 IRE_REFRELE_NOTR(ire); 5375 } 5376 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5377 ASSERT(!CLASSD(dst) || ipif != NULL); 5378 5379 /* 5380 * Check if we can take the fast-path. 5381 * Note that "incomplete" ire's (where the link-layer for next hop 5382 * is not resolved, or where the fast-path header in nce_fp_mp is not 5383 * available yet) are sent down the legacy (slow) path 5384 */ 5385 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5386 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5387 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5388 ((ire->ire_nce == NULL) || 5389 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5390 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5391 if (ipif != NULL) 5392 ipif_refrele(ipif); 5393 UDP_STAT(us, udp_ip_ire_send); 5394 IRE_REFRELE(ire); 5395 ip_output(connp, mp, q, IP_WPUT); 5396 return; 5397 } 5398 5399 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5400 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5401 ipha->ipha_src = ipif->ipif_src_addr; 5402 else 5403 ipha->ipha_src = ire->ire_src_addr; 5404 } 5405 5406 if (ipif != NULL) 5407 ipif_refrele(ipif); 5408 5409 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5410 } 5411 5412 static void 5413 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5414 { 5415 ipaddr_t src, dst; 5416 ill_t *ill; 5417 mblk_t *ire_fp_mp; 5418 uint_t ire_fp_mp_len; 5419 uint16_t *up; 5420 uint32_t cksum, hcksum_txflags; 5421 queue_t *dev_q; 5422 udp_t *udp = connp->conn_udp; 5423 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5424 udp_stack_t *us = udp->udp_us; 5425 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5426 boolean_t ll_multicast = B_FALSE; 5427 boolean_t direct_send; 5428 5429 dev_q = ire->ire_stq->q_next; 5430 ASSERT(dev_q != NULL); 5431 5432 ill = ire_to_ill(ire); 5433 ASSERT(ill != NULL); 5434 5435 /* 5436 * For the direct send case, if resetting of conn_direct_blocked 5437 * was missed, it is still ok because the putq() would enable 5438 * the queue and write service will drain it out. 5439 */ 5440 direct_send = ILL_DIRECT_CAPABLE(ill); 5441 5442 /* is queue flow controlled? */ 5443 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5444 DEV_Q_FLOW_BLOCKED(dev_q))) { 5445 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5446 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5447 if (ipst->ips_ip_output_queue) { 5448 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5449 (void) putq(connp->conn_wq, mp); 5450 } else { 5451 freemsg(mp); 5452 } 5453 ire_refrele(ire); 5454 return; 5455 } 5456 5457 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5458 ire_fp_mp_len = MBLKL(ire_fp_mp); 5459 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5460 5461 dst = ipha->ipha_dst; 5462 src = ipha->ipha_src; 5463 5464 5465 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5466 5467 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5468 #ifndef _BIG_ENDIAN 5469 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5470 #endif 5471 5472 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5473 ASSERT(ill->ill_hcksum_capab != NULL); 5474 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5475 } else { 5476 hcksum_txflags = 0; 5477 } 5478 5479 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5480 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5481 5482 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5483 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5484 if (*up != 0) { 5485 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5486 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5487 ntohs(ipha->ipha_length), cksum); 5488 5489 /* Software checksum? */ 5490 if (DB_CKSUMFLAGS(mp) == 0) { 5491 UDP_STAT(us, udp_out_sw_cksum); 5492 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5493 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5494 } 5495 } 5496 5497 if (!CLASSD(dst)) { 5498 ipha->ipha_fragment_offset_and_flags |= 5499 (uint32_t)htons(ire->ire_frag_flag); 5500 } 5501 5502 /* Calculate IP header checksum if hardware isn't capable */ 5503 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5504 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5505 ((uint16_t *)ipha)[4]); 5506 } 5507 5508 if (CLASSD(dst)) { 5509 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5510 ip_multicast_loopback(q, ill, mp, 5511 connp->conn_multicast_loop ? 0 : 5512 IP_FF_NO_MCAST_LOOP, zoneid); 5513 } 5514 5515 /* If multicast TTL is 0 then we are done */ 5516 if (ipha->ipha_ttl == 0) { 5517 freemsg(mp); 5518 ire_refrele(ire); 5519 return; 5520 } 5521 ll_multicast = B_TRUE; 5522 } 5523 5524 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5525 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5526 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5527 5528 UPDATE_OB_PKT_COUNT(ire); 5529 ire->ire_last_used_time = lbolt; 5530 5531 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5532 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5533 ntohs(ipha->ipha_length)); 5534 5535 DTRACE_PROBE4(ip4__physical__out__start, 5536 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5537 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5538 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5539 ll_multicast, ipst); 5540 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5541 if (ipst->ips_ipobs_enabled && mp != NULL) { 5542 zoneid_t szone; 5543 5544 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5545 ipst, ALL_ZONES); 5546 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5547 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5548 } 5549 5550 if (mp == NULL) 5551 goto bail; 5552 5553 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5554 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5555 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5556 5557 if (direct_send) { 5558 uintptr_t cookie; 5559 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5560 5561 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5562 (uintptr_t)connp, 0); 5563 if (cookie != NULL) { 5564 idl_tx_list_t *idl_txl; 5565 5566 /* 5567 * Flow controlled. 5568 */ 5569 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5570 cookie, conn_t *, connp); 5571 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5572 mutex_enter(&idl_txl->txl_lock); 5573 /* 5574 * Check again after holding txl_lock to see if Tx 5575 * ring is still blocked and only then insert the 5576 * connp into the drain list. 5577 */ 5578 if (connp->conn_direct_blocked || 5579 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5580 cookie) == 0)) { 5581 mutex_exit(&idl_txl->txl_lock); 5582 goto bail; 5583 } 5584 if (idl_txl->txl_cookie != NULL && 5585 idl_txl->txl_cookie != cookie) { 5586 DTRACE_PROBE2(udp__xmit__collision, 5587 uintptr_t, cookie, 5588 uintptr_t, idl_txl->txl_cookie); 5589 UDP_STAT(us, udp_cookie_coll); 5590 } else { 5591 connp->conn_direct_blocked = B_TRUE; 5592 idl_txl->txl_cookie = cookie; 5593 conn_drain_insert(connp, idl_txl); 5594 DTRACE_PROBE1(udp__xmit__insert, 5595 conn_t *, connp); 5596 } 5597 mutex_exit(&idl_txl->txl_lock); 5598 } 5599 } else { 5600 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5601 putnext(ire->ire_stq, mp); 5602 } 5603 bail: 5604 IRE_REFRELE(ire); 5605 } 5606 5607 static boolean_t 5608 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5609 boolean_t *update_lastdst) 5610 { 5611 udp_t *udp = Q_TO_UDP(wq); 5612 int err; 5613 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5614 udp_stack_t *us = udp->udp_us; 5615 cred_t *cr; 5616 5617 /* 5618 * All Solaris components should pass a db_credp 5619 * for this message, hence we ASSERT. 5620 * On production kernels we return an error to be robust against 5621 * random streams modules sitting on top of us. 5622 */ 5623 cr = msg_getcred(mp, NULL); 5624 ASSERT(cr != NULL); 5625 if (cr == NULL) 5626 return (EINVAL); 5627 5628 /* Note that we use the cred/label from the message to handle MLP */ 5629 err = tsol_compute_label_v6(cr, 5630 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5631 us->us_netstack->netstack_ip); 5632 if (err == 0) { 5633 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5634 &udp->udp_label_len_v6, opt_storage); 5635 } 5636 if (err != 0) { 5637 DTRACE_PROBE4( 5638 tx__ip__log__drop__updatelabel__udp6, 5639 char *, "queue(1) failed to update options(2) on mp(3)", 5640 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5641 } else { 5642 *update_lastdst = B_TRUE; 5643 } 5644 return (err); 5645 } 5646 5647 static int 5648 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5649 pid_t pid) 5650 { 5651 udp_t *udp = connp->conn_udp; 5652 udp_stack_t *us = udp->udp_us; 5653 ipaddr_t v4dst; 5654 in_port_t dstport; 5655 boolean_t mapped_addr; 5656 struct sockaddr_storage ss; 5657 sin_t *sin; 5658 sin6_t *sin6; 5659 struct sockaddr *addr; 5660 socklen_t addrlen; 5661 int error; 5662 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5663 5664 /* M_DATA for connected socket */ 5665 5666 ASSERT(udp->udp_issocket); 5667 UDP_DBGSTAT(us, udp_data_conn); 5668 5669 mutex_enter(&connp->conn_lock); 5670 if (udp->udp_state != TS_DATA_XFER) { 5671 mutex_exit(&connp->conn_lock); 5672 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5673 UDP_STAT(us, udp_out_err_notconn); 5674 freemsg(mp); 5675 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5676 "udp_wput_end: connp %p (%S)", connp, 5677 "not-connected; address required"); 5678 return (EDESTADDRREQ); 5679 } 5680 5681 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5682 if (mapped_addr) 5683 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5684 5685 /* Initialize addr and addrlen as if they're passed in */ 5686 if (udp->udp_family == AF_INET) { 5687 sin = (sin_t *)&ss; 5688 sin->sin_family = AF_INET; 5689 dstport = sin->sin_port = udp->udp_dstport; 5690 ASSERT(mapped_addr); 5691 sin->sin_addr.s_addr = v4dst; 5692 addr = (struct sockaddr *)sin; 5693 addrlen = sizeof (*sin); 5694 } else { 5695 sin6 = (sin6_t *)&ss; 5696 sin6->sin6_family = AF_INET6; 5697 dstport = sin6->sin6_port = udp->udp_dstport; 5698 sin6->sin6_flowinfo = udp->udp_flowinfo; 5699 sin6->sin6_addr = udp->udp_v6dst; 5700 sin6->sin6_scope_id = 0; 5701 sin6->__sin6_src_id = 0; 5702 addr = (struct sockaddr *)sin6; 5703 addrlen = sizeof (*sin6); 5704 } 5705 mutex_exit(&connp->conn_lock); 5706 5707 if (mapped_addr) { 5708 /* 5709 * Handle both AF_INET and AF_INET6; the latter 5710 * for IPV4 mapped destination addresses. Note 5711 * here that both addr and addrlen point to the 5712 * corresponding struct depending on the address 5713 * family of the socket. 5714 */ 5715 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5716 insert_spi, msg, cr, pid); 5717 } else { 5718 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5719 } 5720 if (error == 0) { 5721 ASSERT(mp == NULL); 5722 return (0); 5723 } 5724 5725 UDP_STAT(us, udp_out_err_output); 5726 ASSERT(mp != NULL); 5727 if (IPCL_IS_NONSTR(connp)) { 5728 freemsg(mp); 5729 return (error); 5730 } else { 5731 /* mp is freed by the following routine */ 5732 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5733 (t_scalar_t)addrlen, (t_scalar_t)error); 5734 return (0); 5735 } 5736 } 5737 5738 /* ARGSUSED */ 5739 static int 5740 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5741 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5742 { 5743 5744 udp_t *udp = connp->conn_udp; 5745 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5746 int error = 0; 5747 sin6_t *sin6; 5748 sin_t *sin; 5749 uint_t srcid; 5750 uint16_t port; 5751 ipaddr_t v4dst; 5752 5753 5754 ASSERT(addr != NULL); 5755 5756 switch (udp->udp_family) { 5757 case AF_INET6: 5758 sin6 = (sin6_t *)addr; 5759 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5760 /* 5761 * Destination is a non-IPv4-compatible IPv6 address. 5762 * Send out an IPv6 format packet. 5763 */ 5764 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5765 pid); 5766 if (error != 0) 5767 goto ud_error; 5768 5769 return (0); 5770 } 5771 /* 5772 * If the local address is not zero or a mapped address 5773 * return an error. It would be possible to send an IPv4 5774 * packet but the response would never make it back to the 5775 * application since it is bound to a non-mapped address. 5776 */ 5777 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5778 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5779 error = EADDRNOTAVAIL; 5780 goto ud_error; 5781 } 5782 /* Send IPv4 packet without modifying udp_ipversion */ 5783 /* Extract port and ipaddr */ 5784 port = sin6->sin6_port; 5785 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5786 srcid = sin6->__sin6_src_id; 5787 break; 5788 5789 case AF_INET: 5790 sin = (sin_t *)addr; 5791 /* Extract port and ipaddr */ 5792 port = sin->sin_port; 5793 v4dst = sin->sin_addr.s_addr; 5794 srcid = 0; 5795 break; 5796 } 5797 5798 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5799 msg, cr, pid); 5800 5801 if (error == 0) { 5802 ASSERT(mp == NULL); 5803 return (0); 5804 } 5805 5806 ud_error: 5807 ASSERT(mp != NULL); 5808 5809 return (error); 5810 } 5811 5812 /* 5813 * This routine handles all messages passed downstream. It either 5814 * consumes the message or passes it downstream; it never queues a 5815 * a message. 5816 * 5817 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5818 * is valid when we are directly beneath the stream head, and thus sockfs 5819 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5820 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5821 * connected endpoints. 5822 */ 5823 void 5824 udp_wput(queue_t *q, mblk_t *mp) 5825 { 5826 conn_t *connp = Q_TO_CONN(q); 5827 udp_t *udp = connp->conn_udp; 5828 int error = 0; 5829 struct sockaddr *addr; 5830 socklen_t addrlen; 5831 udp_stack_t *us = udp->udp_us; 5832 5833 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5834 "udp_wput_start: queue %p mp %p", q, mp); 5835 5836 /* 5837 * We directly handle several cases here: T_UNITDATA_REQ message 5838 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5839 * socket. 5840 */ 5841 switch (DB_TYPE(mp)) { 5842 case M_DATA: 5843 /* 5844 * Quick check for error cases. Checks will be done again 5845 * under the lock later on 5846 */ 5847 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5848 /* Not connected; address is required */ 5849 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5850 UDP_STAT(us, udp_out_err_notconn); 5851 freemsg(mp); 5852 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5853 "udp_wput_end: connp %p (%S)", connp, 5854 "not-connected; address required"); 5855 return; 5856 } 5857 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5858 return; 5859 5860 case M_PROTO: 5861 case M_PCPROTO: { 5862 struct T_unitdata_req *tudr; 5863 5864 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5865 tudr = (struct T_unitdata_req *)mp->b_rptr; 5866 5867 /* Handle valid T_UNITDATA_REQ here */ 5868 if (MBLKL(mp) >= sizeof (*tudr) && 5869 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5870 if (mp->b_cont == NULL) { 5871 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5872 "udp_wput_end: q %p (%S)", q, "badaddr"); 5873 error = EPROTO; 5874 goto ud_error; 5875 } 5876 5877 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5878 tudr->DEST_length)) { 5879 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5880 "udp_wput_end: q %p (%S)", q, "badaddr"); 5881 error = EADDRNOTAVAIL; 5882 goto ud_error; 5883 } 5884 /* 5885 * If a port has not been bound to the stream, fail. 5886 * This is not a problem when sockfs is directly 5887 * above us, because it will ensure that the socket 5888 * is first bound before allowing data to be sent. 5889 */ 5890 if (udp->udp_state == TS_UNBND) { 5891 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5892 "udp_wput_end: q %p (%S)", q, "outstate"); 5893 error = EPROTO; 5894 goto ud_error; 5895 } 5896 addr = (struct sockaddr *) 5897 &mp->b_rptr[tudr->DEST_offset]; 5898 addrlen = tudr->DEST_length; 5899 if (tudr->OPT_length != 0) 5900 UDP_STAT(us, udp_out_opt); 5901 break; 5902 } 5903 /* FALLTHRU */ 5904 } 5905 default: 5906 udp_wput_other(q, mp); 5907 return; 5908 } 5909 ASSERT(addr != NULL); 5910 5911 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5912 -1); 5913 if (error != 0) { 5914 ud_error: 5915 UDP_STAT(us, udp_out_err_output); 5916 ASSERT(mp != NULL); 5917 /* mp is freed by the following routine */ 5918 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5919 (t_scalar_t)error); 5920 } 5921 } 5922 5923 /* ARGSUSED */ 5924 static void 5925 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5926 { 5927 #ifdef DEBUG 5928 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5929 #endif 5930 freemsg(mp); 5931 } 5932 5933 5934 /* 5935 * udp_output_v6(): 5936 * Assumes that udp_wput did some sanity checking on the destination 5937 * address. 5938 */ 5939 static mblk_t * 5940 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5941 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5942 { 5943 ip6_t *ip6h; 5944 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5945 mblk_t *mp1 = mp; 5946 mblk_t *mp2; 5947 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5948 size_t ip_len; 5949 udpha_t *udph; 5950 udp_t *udp = connp->conn_udp; 5951 udp_stack_t *us = udp->udp_us; 5952 queue_t *q = connp->conn_wq; 5953 ip6_pkt_t ipp_s; /* For ancillary data options */ 5954 ip6_pkt_t *ipp = &ipp_s; 5955 ip6_pkt_t *tipp; /* temporary ipp */ 5956 uint32_t csum = 0; 5957 uint_t ignore = 0; 5958 uint_t option_exists = 0, is_sticky = 0; 5959 uint8_t *cp; 5960 uint8_t *nxthdr_ptr; 5961 in6_addr_t ip6_dst; 5962 in_port_t port; 5963 udpattrs_t attrs; 5964 boolean_t opt_present; 5965 ip6_hbh_t *hopoptsptr = NULL; 5966 uint_t hopoptslen = 0; 5967 boolean_t is_ancillary = B_FALSE; 5968 size_t sth_wroff = 0; 5969 ire_t *ire; 5970 boolean_t update_lastdst = B_FALSE; 5971 5972 *error = 0; 5973 5974 /* 5975 * If the local address is a mapped address return 5976 * an error. 5977 * It would be possible to send an IPv6 packet but the 5978 * response would never make it back to the application 5979 * since it is bound to a mapped address. 5980 */ 5981 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5982 *error = EADDRNOTAVAIL; 5983 goto done; 5984 } 5985 5986 ipp->ipp_fields = 0; 5987 ipp->ipp_sticky_ignored = 0; 5988 5989 /* 5990 * If TPI options passed in, feed it for verification and handling 5991 */ 5992 attrs.udpattr_credset = B_FALSE; 5993 opt_present = B_FALSE; 5994 if (IPCL_IS_NONSTR(connp)) { 5995 if (msg->msg_controllen != 0) { 5996 attrs.udpattr_ipp6 = ipp; 5997 attrs.udpattr_mb = mp; 5998 5999 rw_enter(&udp->udp_rwlock, RW_WRITER); 6000 *error = process_auxiliary_options(connp, 6001 msg->msg_control, msg->msg_controllen, 6002 &attrs, &udp_opt_obj, udp_opt_set, cr); 6003 rw_exit(&udp->udp_rwlock); 6004 if (*error) 6005 goto done; 6006 ASSERT(*error == 0); 6007 opt_present = B_TRUE; 6008 } 6009 } else { 6010 if (DB_TYPE(mp) != M_DATA) { 6011 mp1 = mp->b_cont; 6012 if (((struct T_unitdata_req *) 6013 mp->b_rptr)->OPT_length != 0) { 6014 attrs.udpattr_ipp6 = ipp; 6015 attrs.udpattr_mb = mp; 6016 if (udp_unitdata_opt_process(q, mp, error, 6017 &attrs) < 0) { 6018 goto done; 6019 } 6020 ASSERT(*error == 0); 6021 opt_present = B_TRUE; 6022 } 6023 } 6024 } 6025 6026 /* 6027 * Determine whether we need to mark the mblk with the user's 6028 * credentials. 6029 * If labeled then sockfs would have already done this. 6030 */ 6031 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6032 ire = connp->conn_ire_cache; 6033 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6034 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6035 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6036 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6037 mblk_setcred(mp, cr, pid); 6038 } 6039 6040 rw_enter(&udp->udp_rwlock, RW_READER); 6041 ignore = ipp->ipp_sticky_ignored; 6042 6043 /* mp1 points to the M_DATA mblk carrying the packet */ 6044 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6045 6046 if (sin6->sin6_scope_id != 0 && 6047 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6048 /* 6049 * IPPF_SCOPE_ID is special. It's neither a sticky 6050 * option nor ancillary data. It needs to be 6051 * explicitly set in options_exists. 6052 */ 6053 option_exists |= IPPF_SCOPE_ID; 6054 } 6055 6056 /* 6057 * Compute the destination address 6058 */ 6059 ip6_dst = sin6->sin6_addr; 6060 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6061 ip6_dst = ipv6_loopback; 6062 6063 port = sin6->sin6_port; 6064 6065 /* 6066 * Cluster and TSOL notes, Cluster check: 6067 * see comments in udp_output_v4(). 6068 */ 6069 mutex_enter(&connp->conn_lock); 6070 6071 if (cl_inet_connect2 != NULL && 6072 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6073 port != udp->udp_lastdstport)) { 6074 mutex_exit(&connp->conn_lock); 6075 *error = 0; 6076 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6077 if (*error != 0) { 6078 *error = EHOSTUNREACH; 6079 rw_exit(&udp->udp_rwlock); 6080 goto done; 6081 } 6082 update_lastdst = B_TRUE; 6083 mutex_enter(&connp->conn_lock); 6084 } 6085 6086 /* 6087 * If we're not going to the same destination as last time, then 6088 * recompute the label required. This is done in a separate routine to 6089 * avoid blowing up our stack here. 6090 * 6091 * TSOL Note: Since we are not in WRITER mode, UDP packets 6092 * to different destination may require different labels, 6093 * or worse, UDP packets to same IP address may require 6094 * different labels due to use of shared all-zones address. 6095 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6096 * and sticky ipp_hopoptslen are consistent for the current 6097 * destination and are updated atomically. 6098 */ 6099 if (is_system_labeled()) { 6100 /* Using UDP MLP requires SCM_UCRED from user */ 6101 if (connp->conn_mlp_type != mlptSingle && 6102 !attrs.udpattr_credset) { 6103 DTRACE_PROBE4( 6104 tx__ip__log__info__output__udp6, 6105 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6106 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6107 *error = ECONNREFUSED; 6108 rw_exit(&udp->udp_rwlock); 6109 mutex_exit(&connp->conn_lock); 6110 goto done; 6111 } 6112 /* 6113 * update label option for this UDP socket if 6114 * - the destination has changed, or 6115 * - the UDP socket is MLP 6116 */ 6117 if ((opt_present || 6118 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6119 connp->conn_mlp_type != mlptSingle) && 6120 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6121 &update_lastdst)) != 0) { 6122 rw_exit(&udp->udp_rwlock); 6123 mutex_exit(&connp->conn_lock); 6124 goto done; 6125 } 6126 } 6127 6128 if (update_lastdst) { 6129 udp->udp_v6lastdst = ip6_dst; 6130 udp->udp_lastdstport = port; 6131 } 6132 6133 /* 6134 * If there's a security label here, then we ignore any options the 6135 * user may try to set. We keep the peer's label as a hidden sticky 6136 * option. We make a private copy of this label before releasing the 6137 * lock so that label is kept consistent with the destination addr. 6138 */ 6139 if (udp->udp_label_len_v6 > 0) { 6140 ignore &= ~IPPF_HOPOPTS; 6141 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6142 } 6143 6144 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6145 /* No sticky options nor ancillary data. */ 6146 mutex_exit(&connp->conn_lock); 6147 goto no_options; 6148 } 6149 6150 /* 6151 * Go through the options figuring out where each is going to 6152 * come from and build two masks. The first mask indicates if 6153 * the option exists at all. The second mask indicates if the 6154 * option is sticky or ancillary. 6155 */ 6156 if (!(ignore & IPPF_HOPOPTS)) { 6157 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6158 option_exists |= IPPF_HOPOPTS; 6159 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6160 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6161 option_exists |= IPPF_HOPOPTS; 6162 is_sticky |= IPPF_HOPOPTS; 6163 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6164 hopoptsptr = kmem_alloc( 6165 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6166 if (hopoptsptr == NULL) { 6167 *error = ENOMEM; 6168 mutex_exit(&connp->conn_lock); 6169 goto done; 6170 } 6171 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6172 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6173 hopoptslen); 6174 udp_ip_hdr_len += hopoptslen; 6175 } 6176 } 6177 mutex_exit(&connp->conn_lock); 6178 6179 if (!(ignore & IPPF_RTHDR)) { 6180 if (ipp->ipp_fields & IPPF_RTHDR) { 6181 option_exists |= IPPF_RTHDR; 6182 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6183 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6184 option_exists |= IPPF_RTHDR; 6185 is_sticky |= IPPF_RTHDR; 6186 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6187 } 6188 } 6189 6190 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6191 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6192 option_exists |= IPPF_RTDSTOPTS; 6193 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6194 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6195 option_exists |= IPPF_RTDSTOPTS; 6196 is_sticky |= IPPF_RTDSTOPTS; 6197 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6198 } 6199 } 6200 6201 if (!(ignore & IPPF_DSTOPTS)) { 6202 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6203 option_exists |= IPPF_DSTOPTS; 6204 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6205 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6206 option_exists |= IPPF_DSTOPTS; 6207 is_sticky |= IPPF_DSTOPTS; 6208 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6209 } 6210 } 6211 6212 if (!(ignore & IPPF_IFINDEX)) { 6213 if (ipp->ipp_fields & IPPF_IFINDEX) { 6214 option_exists |= IPPF_IFINDEX; 6215 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6216 option_exists |= IPPF_IFINDEX; 6217 is_sticky |= IPPF_IFINDEX; 6218 } 6219 } 6220 6221 if (!(ignore & IPPF_ADDR)) { 6222 if (ipp->ipp_fields & IPPF_ADDR) { 6223 option_exists |= IPPF_ADDR; 6224 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6225 option_exists |= IPPF_ADDR; 6226 is_sticky |= IPPF_ADDR; 6227 } 6228 } 6229 6230 if (!(ignore & IPPF_DONTFRAG)) { 6231 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6232 option_exists |= IPPF_DONTFRAG; 6233 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6234 option_exists |= IPPF_DONTFRAG; 6235 is_sticky |= IPPF_DONTFRAG; 6236 } 6237 } 6238 6239 if (!(ignore & IPPF_USE_MIN_MTU)) { 6240 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6241 option_exists |= IPPF_USE_MIN_MTU; 6242 } else if (udp->udp_sticky_ipp.ipp_fields & 6243 IPPF_USE_MIN_MTU) { 6244 option_exists |= IPPF_USE_MIN_MTU; 6245 is_sticky |= IPPF_USE_MIN_MTU; 6246 } 6247 } 6248 6249 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6250 option_exists |= IPPF_HOPLIMIT; 6251 /* IPV6_HOPLIMIT can never be sticky */ 6252 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6253 6254 if (!(ignore & IPPF_UNICAST_HOPS) && 6255 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6256 option_exists |= IPPF_UNICAST_HOPS; 6257 is_sticky |= IPPF_UNICAST_HOPS; 6258 } 6259 6260 if (!(ignore & IPPF_MULTICAST_HOPS) && 6261 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6262 option_exists |= IPPF_MULTICAST_HOPS; 6263 is_sticky |= IPPF_MULTICAST_HOPS; 6264 } 6265 6266 if (!(ignore & IPPF_TCLASS)) { 6267 if (ipp->ipp_fields & IPPF_TCLASS) { 6268 option_exists |= IPPF_TCLASS; 6269 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6270 option_exists |= IPPF_TCLASS; 6271 is_sticky |= IPPF_TCLASS; 6272 } 6273 } 6274 6275 if (!(ignore & IPPF_NEXTHOP) && 6276 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6277 option_exists |= IPPF_NEXTHOP; 6278 is_sticky |= IPPF_NEXTHOP; 6279 } 6280 6281 no_options: 6282 6283 /* 6284 * If any options carried in the ip6i_t were specified, we 6285 * need to account for the ip6i_t in the data we'll be sending 6286 * down. 6287 */ 6288 if (option_exists & IPPF_HAS_IP6I) 6289 udp_ip_hdr_len += sizeof (ip6i_t); 6290 6291 /* check/fix buffer config, setup pointers into it */ 6292 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6293 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6294 !OK_32PTR(ip6h)) { 6295 6296 /* Try to get everything in a single mblk next time */ 6297 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6298 udp->udp_max_hdr_len = udp_ip_hdr_len; 6299 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6300 } 6301 6302 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6303 if (mp2 == NULL) { 6304 *error = ENOMEM; 6305 rw_exit(&udp->udp_rwlock); 6306 goto done; 6307 } 6308 mp2->b_wptr = DB_LIM(mp2); 6309 mp2->b_cont = mp1; 6310 mp1 = mp2; 6311 if (DB_TYPE(mp) != M_DATA) 6312 mp->b_cont = mp1; 6313 else 6314 mp = mp1; 6315 6316 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6317 } 6318 mp1->b_rptr = (unsigned char *)ip6h; 6319 ip6i = (ip6i_t *)ip6h; 6320 6321 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6322 if (option_exists & IPPF_HAS_IP6I) { 6323 ip6h = (ip6_t *)&ip6i[1]; 6324 ip6i->ip6i_flags = 0; 6325 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6326 6327 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6328 if (option_exists & IPPF_SCOPE_ID) { 6329 ip6i->ip6i_flags |= IP6I_IFINDEX; 6330 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6331 } else if (option_exists & IPPF_IFINDEX) { 6332 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6333 ASSERT(tipp->ipp_ifindex != 0); 6334 ip6i->ip6i_flags |= IP6I_IFINDEX; 6335 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6336 } 6337 6338 if (option_exists & IPPF_ADDR) { 6339 /* 6340 * Enable per-packet source address verification if 6341 * IPV6_PKTINFO specified the source address. 6342 * ip6_src is set in the transport's _wput function. 6343 */ 6344 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6345 } 6346 6347 if (option_exists & IPPF_DONTFRAG) { 6348 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6349 } 6350 6351 if (option_exists & IPPF_USE_MIN_MTU) { 6352 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6353 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6354 } 6355 6356 if (option_exists & IPPF_NEXTHOP) { 6357 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6358 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6359 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6360 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6361 } 6362 6363 /* 6364 * tell IP this is an ip6i_t private header 6365 */ 6366 ip6i->ip6i_nxt = IPPROTO_RAW; 6367 } 6368 6369 /* Initialize IPv6 header */ 6370 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6371 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6372 6373 /* Set the hoplimit of the outgoing packet. */ 6374 if (option_exists & IPPF_HOPLIMIT) { 6375 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6376 ip6h->ip6_hops = ipp->ipp_hoplimit; 6377 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6378 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6379 ip6h->ip6_hops = udp->udp_multicast_ttl; 6380 if (option_exists & IPPF_MULTICAST_HOPS) 6381 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6382 } else { 6383 ip6h->ip6_hops = udp->udp_ttl; 6384 if (option_exists & IPPF_UNICAST_HOPS) 6385 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6386 } 6387 6388 if (option_exists & IPPF_ADDR) { 6389 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6390 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6391 ip6h->ip6_src = tipp->ipp_addr; 6392 } else { 6393 /* 6394 * The source address was not set using IPV6_PKTINFO. 6395 * First look at the bound source. 6396 * If unspecified fallback to __sin6_src_id. 6397 */ 6398 ip6h->ip6_src = udp->udp_v6src; 6399 if (sin6->__sin6_src_id != 0 && 6400 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6401 ip_srcid_find_id(sin6->__sin6_src_id, 6402 &ip6h->ip6_src, connp->conn_zoneid, 6403 us->us_netstack); 6404 } 6405 } 6406 6407 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6408 cp = (uint8_t *)&ip6h[1]; 6409 6410 /* 6411 * Here's where we have to start stringing together 6412 * any extension headers in the right order: 6413 * Hop-by-hop, destination, routing, and final destination opts. 6414 */ 6415 if (option_exists & IPPF_HOPOPTS) { 6416 /* Hop-by-hop options */ 6417 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6418 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6419 if (hopoptslen == 0) { 6420 hopoptsptr = tipp->ipp_hopopts; 6421 hopoptslen = tipp->ipp_hopoptslen; 6422 is_ancillary = B_TRUE; 6423 } 6424 6425 *nxthdr_ptr = IPPROTO_HOPOPTS; 6426 nxthdr_ptr = &hbh->ip6h_nxt; 6427 6428 bcopy(hopoptsptr, cp, hopoptslen); 6429 cp += hopoptslen; 6430 6431 if (hopoptsptr != NULL && !is_ancillary) { 6432 kmem_free(hopoptsptr, hopoptslen); 6433 hopoptsptr = NULL; 6434 hopoptslen = 0; 6435 } 6436 } 6437 /* 6438 * En-route destination options 6439 * Only do them if there's a routing header as well 6440 */ 6441 if (option_exists & IPPF_RTDSTOPTS) { 6442 ip6_dest_t *dst = (ip6_dest_t *)cp; 6443 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6444 6445 *nxthdr_ptr = IPPROTO_DSTOPTS; 6446 nxthdr_ptr = &dst->ip6d_nxt; 6447 6448 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6449 cp += tipp->ipp_rtdstoptslen; 6450 } 6451 /* 6452 * Routing header next 6453 */ 6454 if (option_exists & IPPF_RTHDR) { 6455 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6456 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6457 6458 *nxthdr_ptr = IPPROTO_ROUTING; 6459 nxthdr_ptr = &rt->ip6r_nxt; 6460 6461 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6462 cp += tipp->ipp_rthdrlen; 6463 } 6464 /* 6465 * Do ultimate destination options 6466 */ 6467 if (option_exists & IPPF_DSTOPTS) { 6468 ip6_dest_t *dest = (ip6_dest_t *)cp; 6469 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6470 6471 *nxthdr_ptr = IPPROTO_DSTOPTS; 6472 nxthdr_ptr = &dest->ip6d_nxt; 6473 6474 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6475 cp += tipp->ipp_dstoptslen; 6476 } 6477 /* 6478 * Now set the last header pointer to the proto passed in 6479 */ 6480 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6481 *nxthdr_ptr = IPPROTO_UDP; 6482 6483 /* Update UDP header */ 6484 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6485 udph->uha_dst_port = sin6->sin6_port; 6486 udph->uha_src_port = udp->udp_port; 6487 6488 /* 6489 * Copy in the destination address 6490 */ 6491 ip6h->ip6_dst = ip6_dst; 6492 6493 ip6h->ip6_vcf = 6494 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6495 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6496 6497 if (option_exists & IPPF_TCLASS) { 6498 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6499 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6500 tipp->ipp_tclass); 6501 } 6502 rw_exit(&udp->udp_rwlock); 6503 6504 if (option_exists & IPPF_RTHDR) { 6505 ip6_rthdr_t *rth; 6506 6507 /* 6508 * Perform any processing needed for source routing. 6509 * We know that all extension headers will be in the same mblk 6510 * as the IPv6 header. 6511 */ 6512 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6513 if (rth != NULL && rth->ip6r_segleft != 0) { 6514 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6515 /* 6516 * Drop packet - only support Type 0 routing. 6517 * Notify the application as well. 6518 */ 6519 *error = EPROTO; 6520 goto done; 6521 } 6522 6523 /* 6524 * rth->ip6r_len is twice the number of 6525 * addresses in the header. Thus it must be even. 6526 */ 6527 if (rth->ip6r_len & 0x1) { 6528 *error = EPROTO; 6529 goto done; 6530 } 6531 /* 6532 * Shuffle the routing header and ip6_dst 6533 * addresses, and get the checksum difference 6534 * between the first hop (in ip6_dst) and 6535 * the destination (in the last routing hdr entry). 6536 */ 6537 csum = ip_massage_options_v6(ip6h, rth, 6538 us->us_netstack); 6539 /* 6540 * Verify that the first hop isn't a mapped address. 6541 * Routers along the path need to do this verification 6542 * for subsequent hops. 6543 */ 6544 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6545 *error = EADDRNOTAVAIL; 6546 goto done; 6547 } 6548 6549 cp += (rth->ip6r_len + 1)*8; 6550 } 6551 } 6552 6553 /* count up length of UDP packet */ 6554 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6555 if ((mp2 = mp1->b_cont) != NULL) { 6556 do { 6557 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6558 ip_len += (uint32_t)MBLKL(mp2); 6559 } while ((mp2 = mp2->b_cont) != NULL); 6560 } 6561 6562 /* 6563 * If the size of the packet is greater than the maximum allowed by 6564 * ip, return an error. Passing this down could cause panics because 6565 * the size will have wrapped and be inconsistent with the msg size. 6566 */ 6567 if (ip_len > IP_MAXPACKET) { 6568 *error = EMSGSIZE; 6569 goto done; 6570 } 6571 6572 /* Store the UDP length. Subtract length of extension hdrs */ 6573 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6574 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6575 6576 /* 6577 * We make it easy for IP to include our pseudo header 6578 * by putting our length in uh_checksum, modified (if 6579 * we have a routing header) by the checksum difference 6580 * between the ultimate destination and first hop addresses. 6581 * Note: UDP over IPv6 must always checksum the packet. 6582 */ 6583 csum += udph->uha_length; 6584 csum = (csum & 0xFFFF) + (csum >> 16); 6585 udph->uha_checksum = (uint16_t)csum; 6586 6587 #ifdef _LITTLE_ENDIAN 6588 ip_len = htons(ip_len); 6589 #endif 6590 ip6h->ip6_plen = ip_len; 6591 6592 if (DB_TYPE(mp) != M_DATA) { 6593 cred_t *cr; 6594 pid_t cpid; 6595 6596 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6597 cr = msg_extractcred(mp, &cpid); 6598 if (cr != NULL) { 6599 if (mp1->b_datap->db_credp != NULL) 6600 crfree(mp1->b_datap->db_credp); 6601 mp1->b_datap->db_credp = cr; 6602 mp1->b_datap->db_cpid = cpid; 6603 } 6604 6605 ASSERT(mp != mp1); 6606 freeb(mp); 6607 } 6608 6609 /* mp has been consumed and we'll return success */ 6610 ASSERT(*error == 0); 6611 mp = NULL; 6612 6613 /* We're done. Pass the packet to IP */ 6614 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6615 ip_output_v6(connp, mp1, q, IP_WPUT); 6616 6617 done: 6618 if (sth_wroff != 0) { 6619 (void) proto_set_tx_wroff(RD(q), connp, 6620 udp->udp_max_hdr_len + us->us_wroff_extra); 6621 } 6622 if (hopoptsptr != NULL && !is_ancillary) { 6623 kmem_free(hopoptsptr, hopoptslen); 6624 hopoptsptr = NULL; 6625 } 6626 if (*error != 0) { 6627 ASSERT(mp != NULL); 6628 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6629 } 6630 return (mp); 6631 } 6632 6633 6634 static int 6635 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6636 { 6637 sin_t *sin = (sin_t *)sa; 6638 sin6_t *sin6 = (sin6_t *)sa; 6639 6640 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6641 6642 if (udp->udp_state != TS_DATA_XFER) 6643 return (ENOTCONN); 6644 6645 switch (udp->udp_family) { 6646 case AF_INET: 6647 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6648 6649 if (*salenp < sizeof (sin_t)) 6650 return (EINVAL); 6651 6652 *salenp = sizeof (sin_t); 6653 *sin = sin_null; 6654 sin->sin_family = AF_INET; 6655 sin->sin_port = udp->udp_dstport; 6656 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6657 break; 6658 6659 case AF_INET6: 6660 if (*salenp < sizeof (sin6_t)) 6661 return (EINVAL); 6662 6663 *salenp = sizeof (sin6_t); 6664 *sin6 = sin6_null; 6665 sin6->sin6_family = AF_INET6; 6666 sin6->sin6_port = udp->udp_dstport; 6667 sin6->sin6_addr = udp->udp_v6dst; 6668 sin6->sin6_flowinfo = udp->udp_flowinfo; 6669 break; 6670 } 6671 6672 return (0); 6673 } 6674 6675 static int 6676 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6677 { 6678 sin_t *sin = (sin_t *)sa; 6679 sin6_t *sin6 = (sin6_t *)sa; 6680 6681 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6682 6683 switch (udp->udp_family) { 6684 case AF_INET: 6685 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6686 6687 if (*salenp < sizeof (sin_t)) 6688 return (EINVAL); 6689 6690 *salenp = sizeof (sin_t); 6691 *sin = sin_null; 6692 sin->sin_family = AF_INET; 6693 sin->sin_port = udp->udp_port; 6694 6695 /* 6696 * If udp_v6src is unspecified, we might be bound to broadcast 6697 * / multicast. Use udp_bound_v6src as local address instead 6698 * (that could also still be unspecified). 6699 */ 6700 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6701 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6702 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6703 } else { 6704 sin->sin_addr.s_addr = 6705 V4_PART_OF_V6(udp->udp_bound_v6src); 6706 } 6707 break; 6708 6709 case AF_INET6: 6710 if (*salenp < sizeof (sin6_t)) 6711 return (EINVAL); 6712 6713 *salenp = sizeof (sin6_t); 6714 *sin6 = sin6_null; 6715 sin6->sin6_family = AF_INET6; 6716 sin6->sin6_port = udp->udp_port; 6717 sin6->sin6_flowinfo = udp->udp_flowinfo; 6718 6719 /* 6720 * If udp_v6src is unspecified, we might be bound to broadcast 6721 * / multicast. Use udp_bound_v6src as local address instead 6722 * (that could also still be unspecified). 6723 */ 6724 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6725 sin6->sin6_addr = udp->udp_v6src; 6726 else 6727 sin6->sin6_addr = udp->udp_bound_v6src; 6728 break; 6729 } 6730 6731 return (0); 6732 } 6733 6734 /* 6735 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6736 */ 6737 static void 6738 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6739 { 6740 void *data; 6741 mblk_t *datamp = mp->b_cont; 6742 udp_t *udp = Q_TO_UDP(q); 6743 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6744 6745 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6746 cmdp->cb_error = EPROTO; 6747 qreply(q, mp); 6748 return; 6749 } 6750 data = datamp->b_rptr; 6751 6752 rw_enter(&udp->udp_rwlock, RW_READER); 6753 switch (cmdp->cb_cmd) { 6754 case TI_GETPEERNAME: 6755 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6756 break; 6757 case TI_GETMYNAME: 6758 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6759 break; 6760 default: 6761 cmdp->cb_error = EINVAL; 6762 break; 6763 } 6764 rw_exit(&udp->udp_rwlock); 6765 6766 qreply(q, mp); 6767 } 6768 6769 static void 6770 udp_disable_direct_sockfs(udp_t *udp) 6771 { 6772 udp->udp_issocket = B_FALSE; 6773 if (udp->udp_direct_sockfs) { 6774 /* 6775 * Disable read-side synchronous stream interface and 6776 * drain any queued data. 6777 */ 6778 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6779 ASSERT(!udp->udp_direct_sockfs); 6780 UDP_STAT(udp->udp_us, udp_sock_fallback); 6781 } 6782 } 6783 6784 static void 6785 udp_wput_other(queue_t *q, mblk_t *mp) 6786 { 6787 uchar_t *rptr = mp->b_rptr; 6788 struct datab *db; 6789 struct iocblk *iocp; 6790 cred_t *cr; 6791 conn_t *connp = Q_TO_CONN(q); 6792 udp_t *udp = connp->conn_udp; 6793 udp_stack_t *us; 6794 6795 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6796 "udp_wput_other_start: q %p", q); 6797 6798 us = udp->udp_us; 6799 db = mp->b_datap; 6800 6801 switch (db->db_type) { 6802 case M_CMD: 6803 udp_wput_cmdblk(q, mp); 6804 return; 6805 6806 case M_PROTO: 6807 case M_PCPROTO: 6808 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6809 freemsg(mp); 6810 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6811 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6812 return; 6813 } 6814 switch (((t_primp_t)rptr)->type) { 6815 case T_ADDR_REQ: 6816 udp_addr_req(q, mp); 6817 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6818 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6819 return; 6820 case O_T_BIND_REQ: 6821 case T_BIND_REQ: 6822 udp_tpi_bind(q, mp); 6823 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6824 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6825 return; 6826 case T_CONN_REQ: 6827 udp_tpi_connect(q, mp); 6828 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6829 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6830 return; 6831 case T_CAPABILITY_REQ: 6832 udp_capability_req(q, mp); 6833 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6834 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6835 return; 6836 case T_INFO_REQ: 6837 udp_info_req(q, mp); 6838 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6839 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6840 return; 6841 case T_UNITDATA_REQ: 6842 /* 6843 * If a T_UNITDATA_REQ gets here, the address must 6844 * be bad. Valid T_UNITDATA_REQs are handled 6845 * in udp_wput. 6846 */ 6847 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6848 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6849 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6850 return; 6851 case T_UNBIND_REQ: 6852 udp_tpi_unbind(q, mp); 6853 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6854 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6855 return; 6856 case T_SVR4_OPTMGMT_REQ: 6857 /* 6858 * All Solaris components should pass a db_credp 6859 * for this TPI message, hence we ASSERT. 6860 * But in case there is some other M_PROTO that looks 6861 * like a TPI message sent by some other kernel 6862 * component, we check and return an error. 6863 */ 6864 cr = msg_getcred(mp, NULL); 6865 ASSERT(cr != NULL); 6866 if (cr == NULL) { 6867 udp_err_ack(q, mp, TSYSERR, EINVAL); 6868 return; 6869 } 6870 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6871 cr)) { 6872 (void) svr4_optcom_req(q, 6873 mp, cr, &udp_opt_obj, B_TRUE); 6874 } 6875 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6876 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6877 return; 6878 6879 case T_OPTMGMT_REQ: 6880 /* 6881 * All Solaris components should pass a db_credp 6882 * for this TPI message, hence we ASSERT. 6883 * But in case there is some other M_PROTO that looks 6884 * like a TPI message sent by some other kernel 6885 * component, we check and return an error. 6886 */ 6887 cr = msg_getcred(mp, NULL); 6888 ASSERT(cr != NULL); 6889 if (cr == NULL) { 6890 udp_err_ack(q, mp, TSYSERR, EINVAL); 6891 return; 6892 } 6893 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6894 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6895 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6896 return; 6897 6898 case T_DISCON_REQ: 6899 udp_tpi_disconnect(q, mp); 6900 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6901 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6902 return; 6903 6904 /* The following TPI message is not supported by udp. */ 6905 case O_T_CONN_RES: 6906 case T_CONN_RES: 6907 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6908 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6909 "udp_wput_other_end: q %p (%S)", q, 6910 "connres/disconreq"); 6911 return; 6912 6913 /* The following 3 TPI messages are illegal for udp. */ 6914 case T_DATA_REQ: 6915 case T_EXDATA_REQ: 6916 case T_ORDREL_REQ: 6917 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6918 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6919 "udp_wput_other_end: q %p (%S)", q, 6920 "data/exdata/ordrel"); 6921 return; 6922 default: 6923 break; 6924 } 6925 break; 6926 case M_FLUSH: 6927 if (*rptr & FLUSHW) 6928 flushq(q, FLUSHDATA); 6929 break; 6930 case M_IOCTL: 6931 iocp = (struct iocblk *)mp->b_rptr; 6932 switch (iocp->ioc_cmd) { 6933 case TI_GETPEERNAME: 6934 if (udp->udp_state != TS_DATA_XFER) { 6935 /* 6936 * If a default destination address has not 6937 * been associated with the stream, then we 6938 * don't know the peer's name. 6939 */ 6940 iocp->ioc_error = ENOTCONN; 6941 iocp->ioc_count = 0; 6942 mp->b_datap->db_type = M_IOCACK; 6943 qreply(q, mp); 6944 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6945 "udp_wput_other_end: q %p (%S)", q, 6946 "getpeername"); 6947 return; 6948 } 6949 /* FALLTHRU */ 6950 case TI_GETMYNAME: { 6951 /* 6952 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6953 * need to copyin the user's strbuf structure. 6954 * Processing will continue in the M_IOCDATA case 6955 * below. 6956 */ 6957 mi_copyin(q, mp, NULL, 6958 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6959 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6960 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6961 return; 6962 } 6963 case ND_SET: 6964 /* nd_getset performs the necessary checking */ 6965 case ND_GET: 6966 if (nd_getset(q, us->us_nd, mp)) { 6967 qreply(q, mp); 6968 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6969 "udp_wput_other_end: q %p (%S)", q, "get"); 6970 return; 6971 } 6972 break; 6973 case _SIOCSOCKFALLBACK: 6974 /* 6975 * Either sockmod is about to be popped and the 6976 * socket would now be treated as a plain stream, 6977 * or a module is about to be pushed so we could 6978 * no longer use read-side synchronous stream. 6979 * Drain any queued data and disable direct sockfs 6980 * interface from now on. 6981 */ 6982 if (!udp->udp_issocket) { 6983 DB_TYPE(mp) = M_IOCNAK; 6984 iocp->ioc_error = EINVAL; 6985 } else { 6986 udp_disable_direct_sockfs(udp); 6987 6988 DB_TYPE(mp) = M_IOCACK; 6989 iocp->ioc_error = 0; 6990 } 6991 iocp->ioc_count = 0; 6992 iocp->ioc_rval = 0; 6993 qreply(q, mp); 6994 return; 6995 default: 6996 break; 6997 } 6998 break; 6999 case M_IOCDATA: 7000 udp_wput_iocdata(q, mp); 7001 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7002 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7003 return; 7004 default: 7005 /* Unrecognized messages are passed through without change. */ 7006 break; 7007 } 7008 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7009 "udp_wput_other_end: q %p (%S)", q, "end"); 7010 ip_output(connp, mp, q, IP_WPUT); 7011 } 7012 7013 /* 7014 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7015 * messages. 7016 */ 7017 static void 7018 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7019 { 7020 mblk_t *mp1; 7021 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7022 STRUCT_HANDLE(strbuf, sb); 7023 udp_t *udp = Q_TO_UDP(q); 7024 int error; 7025 uint_t addrlen; 7026 7027 /* Make sure it is one of ours. */ 7028 switch (iocp->ioc_cmd) { 7029 case TI_GETMYNAME: 7030 case TI_GETPEERNAME: 7031 break; 7032 default: 7033 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7034 return; 7035 } 7036 7037 switch (mi_copy_state(q, mp, &mp1)) { 7038 case -1: 7039 return; 7040 case MI_COPY_CASE(MI_COPY_IN, 1): 7041 break; 7042 case MI_COPY_CASE(MI_COPY_OUT, 1): 7043 /* 7044 * The address has been copied out, so now 7045 * copyout the strbuf. 7046 */ 7047 mi_copyout(q, mp); 7048 return; 7049 case MI_COPY_CASE(MI_COPY_OUT, 2): 7050 /* 7051 * The address and strbuf have been copied out. 7052 * We're done, so just acknowledge the original 7053 * M_IOCTL. 7054 */ 7055 mi_copy_done(q, mp, 0); 7056 return; 7057 default: 7058 /* 7059 * Something strange has happened, so acknowledge 7060 * the original M_IOCTL with an EPROTO error. 7061 */ 7062 mi_copy_done(q, mp, EPROTO); 7063 return; 7064 } 7065 7066 /* 7067 * Now we have the strbuf structure for TI_GETMYNAME 7068 * and TI_GETPEERNAME. Next we copyout the requested 7069 * address and then we'll copyout the strbuf. 7070 */ 7071 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7072 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7073 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7074 mi_copy_done(q, mp, EINVAL); 7075 return; 7076 } 7077 7078 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7079 7080 if (mp1 == NULL) 7081 return; 7082 7083 rw_enter(&udp->udp_rwlock, RW_READER); 7084 switch (iocp->ioc_cmd) { 7085 case TI_GETMYNAME: 7086 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7087 break; 7088 case TI_GETPEERNAME: 7089 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7090 break; 7091 } 7092 rw_exit(&udp->udp_rwlock); 7093 7094 if (error != 0) { 7095 mi_copy_done(q, mp, error); 7096 } else { 7097 mp1->b_wptr += addrlen; 7098 STRUCT_FSET(sb, len, addrlen); 7099 7100 /* Copy out the address */ 7101 mi_copyout(q, mp); 7102 } 7103 } 7104 7105 static int 7106 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7107 udpattrs_t *udpattrs) 7108 { 7109 struct T_unitdata_req *udreqp; 7110 int is_absreq_failure; 7111 cred_t *cr; 7112 7113 ASSERT(((t_primp_t)mp->b_rptr)->type); 7114 7115 /* 7116 * All Solaris components should pass a db_credp 7117 * for this TPI message, hence we should ASSERT. 7118 * However, RPC (svc_clts_ksend) does this odd thing where it 7119 * passes the options from a T_UNITDATA_IND unchanged in a 7120 * T_UNITDATA_REQ. While that is the right thing to do for 7121 * some options, SCM_UCRED being the key one, this also makes it 7122 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7123 */ 7124 cr = msg_getcred(mp, NULL); 7125 if (cr == NULL) { 7126 cr = Q_TO_CONN(q)->conn_cred; 7127 } 7128 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7129 7130 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7131 udreqp->OPT_offset, cr, &udp_opt_obj, 7132 udpattrs, &is_absreq_failure); 7133 7134 if (*errorp != 0) { 7135 /* 7136 * Note: No special action needed in this 7137 * module for "is_absreq_failure" 7138 */ 7139 return (-1); /* failure */ 7140 } 7141 ASSERT(is_absreq_failure == 0); 7142 return (0); /* success */ 7143 } 7144 7145 void 7146 udp_ddi_g_init(void) 7147 { 7148 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7149 udp_opt_obj.odb_opt_arr_cnt); 7150 7151 /* 7152 * We want to be informed each time a stack is created or 7153 * destroyed in the kernel, so we can maintain the 7154 * set of udp_stack_t's. 7155 */ 7156 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7157 } 7158 7159 void 7160 udp_ddi_g_destroy(void) 7161 { 7162 netstack_unregister(NS_UDP); 7163 } 7164 7165 #define INET_NAME "ip" 7166 7167 /* 7168 * Initialize the UDP stack instance. 7169 */ 7170 static void * 7171 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7172 { 7173 udp_stack_t *us; 7174 udpparam_t *pa; 7175 int i; 7176 int error = 0; 7177 major_t major; 7178 7179 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7180 us->us_netstack = ns; 7181 7182 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7183 us->us_epriv_ports[0] = 2049; 7184 us->us_epriv_ports[1] = 4045; 7185 7186 /* 7187 * The smallest anonymous port in the priviledged port range which UDP 7188 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7189 */ 7190 us->us_min_anonpriv_port = 512; 7191 7192 us->us_bind_fanout_size = udp_bind_fanout_size; 7193 7194 /* Roundup variable that might have been modified in /etc/system */ 7195 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7196 /* Not a power of two. Round up to nearest power of two */ 7197 for (i = 0; i < 31; i++) { 7198 if (us->us_bind_fanout_size < (1 << i)) 7199 break; 7200 } 7201 us->us_bind_fanout_size = 1 << i; 7202 } 7203 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7204 sizeof (udp_fanout_t), KM_SLEEP); 7205 for (i = 0; i < us->us_bind_fanout_size; i++) { 7206 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7207 NULL); 7208 } 7209 7210 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7211 7212 us->us_param_arr = pa; 7213 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7214 7215 (void) udp_param_register(&us->us_nd, 7216 us->us_param_arr, A_CNT(udp_param_arr)); 7217 7218 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7219 us->us_mibkp = udp_kstat_init(stackid); 7220 7221 major = mod_name_to_major(INET_NAME); 7222 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7223 ASSERT(error == 0); 7224 return (us); 7225 } 7226 7227 /* 7228 * Free the UDP stack instance. 7229 */ 7230 static void 7231 udp_stack_fini(netstackid_t stackid, void *arg) 7232 { 7233 udp_stack_t *us = (udp_stack_t *)arg; 7234 int i; 7235 7236 for (i = 0; i < us->us_bind_fanout_size; i++) { 7237 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7238 } 7239 7240 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7241 sizeof (udp_fanout_t)); 7242 7243 us->us_bind_fanout = NULL; 7244 7245 nd_free(&us->us_nd); 7246 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7247 us->us_param_arr = NULL; 7248 7249 udp_kstat_fini(stackid, us->us_mibkp); 7250 us->us_mibkp = NULL; 7251 7252 udp_kstat2_fini(stackid, us->us_kstat); 7253 us->us_kstat = NULL; 7254 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7255 7256 ldi_ident_release(us->us_ldi_ident); 7257 kmem_free(us, sizeof (*us)); 7258 } 7259 7260 static void * 7261 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7262 { 7263 kstat_t *ksp; 7264 7265 udp_stat_t template = { 7266 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7267 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7268 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7269 { "udp_drain", KSTAT_DATA_UINT64 }, 7270 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7271 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7272 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7273 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7274 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7275 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7276 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7277 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7278 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7279 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7280 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7281 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7282 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7283 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7284 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7285 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7286 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7287 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7288 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7289 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7290 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7291 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7292 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7293 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7294 #ifdef DEBUG 7295 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7296 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7297 #endif 7298 }; 7299 7300 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7301 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7302 KSTAT_FLAG_VIRTUAL, stackid); 7303 7304 if (ksp == NULL) 7305 return (NULL); 7306 7307 bcopy(&template, us_statisticsp, sizeof (template)); 7308 ksp->ks_data = (void *)us_statisticsp; 7309 ksp->ks_private = (void *)(uintptr_t)stackid; 7310 7311 kstat_install(ksp); 7312 return (ksp); 7313 } 7314 7315 static void 7316 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7317 { 7318 if (ksp != NULL) { 7319 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7320 kstat_delete_netstack(ksp, stackid); 7321 } 7322 } 7323 7324 static void * 7325 udp_kstat_init(netstackid_t stackid) 7326 { 7327 kstat_t *ksp; 7328 7329 udp_named_kstat_t template = { 7330 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7331 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7332 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7333 { "entrySize", KSTAT_DATA_INT32, 0 }, 7334 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7335 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7336 }; 7337 7338 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7339 KSTAT_TYPE_NAMED, 7340 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7341 7342 if (ksp == NULL || ksp->ks_data == NULL) 7343 return (NULL); 7344 7345 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7346 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7347 7348 bcopy(&template, ksp->ks_data, sizeof (template)); 7349 ksp->ks_update = udp_kstat_update; 7350 ksp->ks_private = (void *)(uintptr_t)stackid; 7351 7352 kstat_install(ksp); 7353 return (ksp); 7354 } 7355 7356 static void 7357 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7358 { 7359 if (ksp != NULL) { 7360 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7361 kstat_delete_netstack(ksp, stackid); 7362 } 7363 } 7364 7365 static int 7366 udp_kstat_update(kstat_t *kp, int rw) 7367 { 7368 udp_named_kstat_t *udpkp; 7369 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7370 netstack_t *ns; 7371 udp_stack_t *us; 7372 7373 if ((kp == NULL) || (kp->ks_data == NULL)) 7374 return (EIO); 7375 7376 if (rw == KSTAT_WRITE) 7377 return (EACCES); 7378 7379 ns = netstack_find_by_stackid(stackid); 7380 if (ns == NULL) 7381 return (-1); 7382 us = ns->netstack_udp; 7383 if (us == NULL) { 7384 netstack_rele(ns); 7385 return (-1); 7386 } 7387 udpkp = (udp_named_kstat_t *)kp->ks_data; 7388 7389 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7390 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7391 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7392 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7393 netstack_rele(ns); 7394 return (0); 7395 } 7396 7397 /* 7398 * Read-side synchronous stream info entry point, called as a 7399 * result of handling certain STREAMS ioctl operations. 7400 */ 7401 static int 7402 udp_rinfop(queue_t *q, infod_t *dp) 7403 { 7404 mblk_t *mp; 7405 uint_t cmd = dp->d_cmd; 7406 int res = 0; 7407 int error = 0; 7408 udp_t *udp = Q_TO_UDP(q); 7409 struct stdata *stp = STREAM(q); 7410 7411 mutex_enter(&udp->udp_drain_lock); 7412 /* If shutdown on read has happened, return nothing */ 7413 mutex_enter(&stp->sd_lock); 7414 if (stp->sd_flag & STREOF) { 7415 mutex_exit(&stp->sd_lock); 7416 goto done; 7417 } 7418 mutex_exit(&stp->sd_lock); 7419 7420 if ((mp = udp->udp_rcv_list_head) == NULL) 7421 goto done; 7422 7423 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7424 7425 if (cmd & INFOD_COUNT) { 7426 /* 7427 * Return the number of messages. 7428 */ 7429 dp->d_count += udp->udp_rcv_msgcnt; 7430 res |= INFOD_COUNT; 7431 } 7432 if (cmd & INFOD_BYTES) { 7433 /* 7434 * Return size of all data messages. 7435 */ 7436 dp->d_bytes += udp->udp_rcv_cnt; 7437 res |= INFOD_BYTES; 7438 } 7439 if (cmd & INFOD_FIRSTBYTES) { 7440 /* 7441 * Return size of first data message. 7442 */ 7443 dp->d_bytes = msgdsize(mp); 7444 res |= INFOD_FIRSTBYTES; 7445 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7446 } 7447 if (cmd & INFOD_COPYOUT) { 7448 mblk_t *mp1 = mp->b_cont; 7449 int n; 7450 /* 7451 * Return data contents of first message. 7452 */ 7453 ASSERT(DB_TYPE(mp1) == M_DATA); 7454 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7455 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7456 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7457 UIO_READ, dp->d_uiop)) != 0) { 7458 goto done; 7459 } 7460 mp1 = mp1->b_cont; 7461 } 7462 res |= INFOD_COPYOUT; 7463 dp->d_cmd &= ~INFOD_COPYOUT; 7464 } 7465 done: 7466 mutex_exit(&udp->udp_drain_lock); 7467 7468 dp->d_res |= res; 7469 7470 return (error); 7471 } 7472 7473 /* 7474 * Read-side synchronous stream entry point. This is called as a result 7475 * of recv/read operation done at sockfs, and is guaranteed to execute 7476 * outside of the interrupt thread context. It returns a single datagram 7477 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7478 */ 7479 static int 7480 udp_rrw(queue_t *q, struiod_t *dp) 7481 { 7482 mblk_t *mp; 7483 udp_t *udp = Q_TO_UDP(q); 7484 udp_stack_t *us = udp->udp_us; 7485 7486 /* 7487 * Dequeue datagram from the head of the list and return 7488 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7489 * set/cleared depending on whether or not there's data 7490 * remaining in the list. 7491 */ 7492 mutex_enter(&udp->udp_drain_lock); 7493 if (!udp->udp_direct_sockfs) { 7494 mutex_exit(&udp->udp_drain_lock); 7495 UDP_STAT(us, udp_rrw_busy); 7496 return (EBUSY); 7497 } 7498 if ((mp = udp->udp_rcv_list_head) != NULL) { 7499 uint_t size = msgdsize(mp); 7500 7501 /* Last datagram in the list? */ 7502 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7503 udp->udp_rcv_list_tail = NULL; 7504 mp->b_next = NULL; 7505 7506 udp->udp_rcv_cnt -= size; 7507 udp->udp_rcv_msgcnt--; 7508 UDP_STAT(us, udp_rrw_msgcnt); 7509 7510 /* No longer flow-controlling? */ 7511 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7512 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7513 udp->udp_drain_qfull = B_FALSE; 7514 } 7515 if (udp->udp_rcv_list_head == NULL) { 7516 /* 7517 * Either we just dequeued the last datagram or 7518 * we get here from sockfs and have nothing to 7519 * return; in this case clear RSLEEP. 7520 */ 7521 ASSERT(udp->udp_rcv_cnt == 0); 7522 ASSERT(udp->udp_rcv_msgcnt == 0); 7523 ASSERT(udp->udp_rcv_list_tail == NULL); 7524 STR_WAKEUP_CLEAR(STREAM(q)); 7525 } else { 7526 /* 7527 * More data follows; we need udp_rrw() to be 7528 * called in future to pick up the rest. 7529 */ 7530 STR_WAKEUP_SET(STREAM(q)); 7531 } 7532 mutex_exit(&udp->udp_drain_lock); 7533 dp->d_mp = mp; 7534 return (0); 7535 } 7536 7537 /* 7538 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7539 * list; this is typically executed within the interrupt thread context 7540 * and so we do things as quickly as possible. 7541 */ 7542 static void 7543 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7544 { 7545 ASSERT(q == RD(q)); 7546 ASSERT(pkt_len == msgdsize(mp)); 7547 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7548 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7549 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7550 7551 mutex_enter(&udp->udp_drain_lock); 7552 /* 7553 * Wake up and signal the receiving app; it is okay to do this 7554 * before enqueueing the mp because we are holding the drain lock. 7555 * One of the advantages of synchronous stream is the ability for 7556 * us to find out when the application performs a read on the 7557 * socket by way of udp_rrw() entry point being called. We need 7558 * to generate SIGPOLL/SIGIO for each received data in the case 7559 * of asynchronous socket just as in the strrput() case. However, 7560 * we only wake the application up when necessary, i.e. during the 7561 * first enqueue. When udp_rrw() is called, we send up a single 7562 * datagram upstream and call STR_WAKEUP_SET() again when there 7563 * are still data remaining in our receive queue. 7564 */ 7565 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7566 if (udp->udp_rcv_list_head == NULL) 7567 udp->udp_rcv_list_head = mp; 7568 else 7569 udp->udp_rcv_list_tail->b_next = mp; 7570 udp->udp_rcv_list_tail = mp; 7571 udp->udp_rcv_cnt += pkt_len; 7572 udp->udp_rcv_msgcnt++; 7573 7574 /* Need to flow-control? */ 7575 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7576 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7577 udp->udp_drain_qfull = B_TRUE; 7578 7579 mutex_exit(&udp->udp_drain_lock); 7580 } 7581 7582 /* 7583 * Drain the contents of receive list to the module upstream; we do 7584 * this during close or when we fallback to the slow mode due to 7585 * sockmod being popped or a module being pushed on top of us. 7586 */ 7587 static void 7588 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7589 { 7590 mblk_t *mp; 7591 udp_stack_t *us = udp->udp_us; 7592 7593 mutex_enter(&udp->udp_drain_lock); 7594 /* 7595 * There is no race with a concurrent udp_input() sending 7596 * up packets using putnext() after we have cleared the 7597 * udp_direct_sockfs flag but before we have completed 7598 * sending up the packets in udp_rcv_list, since we are 7599 * either a writer or we have quiesced the conn. 7600 */ 7601 udp->udp_direct_sockfs = B_FALSE; 7602 mutex_exit(&udp->udp_drain_lock); 7603 7604 if (udp->udp_rcv_list_head != NULL) 7605 UDP_STAT(us, udp_drain); 7606 7607 /* 7608 * Send up everything via putnext(); note here that we 7609 * don't need the udp_drain_lock to protect us since 7610 * nothing can enter udp_rrw() and that we currently 7611 * have exclusive access to this udp. 7612 */ 7613 while ((mp = udp->udp_rcv_list_head) != NULL) { 7614 udp->udp_rcv_list_head = mp->b_next; 7615 mp->b_next = NULL; 7616 udp->udp_rcv_cnt -= msgdsize(mp); 7617 udp->udp_rcv_msgcnt--; 7618 if (closing) { 7619 freemsg(mp); 7620 } else { 7621 ASSERT(q == RD(q)); 7622 putnext(q, mp); 7623 } 7624 } 7625 ASSERT(udp->udp_rcv_cnt == 0); 7626 ASSERT(udp->udp_rcv_msgcnt == 0); 7627 ASSERT(udp->udp_rcv_list_head == NULL); 7628 udp->udp_rcv_list_tail = NULL; 7629 udp->udp_drain_qfull = B_FALSE; 7630 } 7631 7632 static size_t 7633 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7634 { 7635 udp_stack_t *us = udp->udp_us; 7636 7637 /* We add a bit of extra buffering */ 7638 size += size >> 1; 7639 if (size > us->us_max_buf) 7640 size = us->us_max_buf; 7641 7642 udp->udp_rcv_hiwat = size; 7643 return (size); 7644 } 7645 7646 /* 7647 * For the lower queue so that UDP can be a dummy mux. 7648 * Nobody should be sending 7649 * packets up this stream 7650 */ 7651 static void 7652 udp_lrput(queue_t *q, mblk_t *mp) 7653 { 7654 mblk_t *mp1; 7655 7656 switch (mp->b_datap->db_type) { 7657 case M_FLUSH: 7658 /* Turn around */ 7659 if (*mp->b_rptr & FLUSHW) { 7660 *mp->b_rptr &= ~FLUSHR; 7661 qreply(q, mp); 7662 return; 7663 } 7664 break; 7665 } 7666 /* Could receive messages that passed through ar_rput */ 7667 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7668 mp1->b_prev = mp1->b_next = NULL; 7669 freemsg(mp); 7670 } 7671 7672 /* 7673 * For the lower queue so that UDP can be a dummy mux. 7674 * Nobody should be sending packets down this stream. 7675 */ 7676 /* ARGSUSED */ 7677 void 7678 udp_lwput(queue_t *q, mblk_t *mp) 7679 { 7680 freemsg(mp); 7681 } 7682 7683 /* 7684 * Below routines for UDP socket module. 7685 */ 7686 7687 static conn_t * 7688 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7689 { 7690 udp_t *udp; 7691 conn_t *connp; 7692 zoneid_t zoneid; 7693 netstack_t *ns; 7694 udp_stack_t *us; 7695 7696 ns = netstack_find_by_cred(credp); 7697 ASSERT(ns != NULL); 7698 us = ns->netstack_udp; 7699 ASSERT(us != NULL); 7700 7701 /* 7702 * For exclusive stacks we set the zoneid to zero 7703 * to make UDP operate as if in the global zone. 7704 */ 7705 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7706 zoneid = GLOBAL_ZONEID; 7707 else 7708 zoneid = crgetzoneid(credp); 7709 7710 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7711 7712 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7713 if (connp == NULL) { 7714 netstack_rele(ns); 7715 return (NULL); 7716 } 7717 udp = connp->conn_udp; 7718 7719 /* 7720 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7721 * done by netstack_find_by_cred() 7722 */ 7723 netstack_rele(ns); 7724 7725 rw_enter(&udp->udp_rwlock, RW_WRITER); 7726 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7727 ASSERT(connp->conn_udp == udp); 7728 ASSERT(udp->udp_connp == connp); 7729 7730 /* Set the initial state of the stream and the privilege status. */ 7731 udp->udp_state = TS_UNBND; 7732 if (isv6) { 7733 udp->udp_family = AF_INET6; 7734 udp->udp_ipversion = IPV6_VERSION; 7735 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7736 udp->udp_ttl = us->us_ipv6_hoplimit; 7737 connp->conn_af_isv6 = B_TRUE; 7738 connp->conn_flags |= IPCL_ISV6; 7739 } else { 7740 udp->udp_family = AF_INET; 7741 udp->udp_ipversion = IPV4_VERSION; 7742 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7743 udp->udp_ttl = us->us_ipv4_ttl; 7744 connp->conn_af_isv6 = B_FALSE; 7745 connp->conn_flags &= ~IPCL_ISV6; 7746 } 7747 7748 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7749 udp->udp_pending_op = -1; 7750 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7751 connp->conn_zoneid = zoneid; 7752 7753 udp->udp_open_time = lbolt64; 7754 udp->udp_open_pid = curproc->p_pid; 7755 7756 /* 7757 * If the caller has the process-wide flag set, then default to MAC 7758 * exempt mode. This allows read-down to unlabeled hosts. 7759 */ 7760 if (getpflags(NET_MAC_AWARE, credp) != 0) 7761 connp->conn_mac_exempt = B_TRUE; 7762 7763 connp->conn_ulp_labeled = is_system_labeled(); 7764 7765 udp->udp_us = us; 7766 7767 connp->conn_recv = udp_input; 7768 crhold(credp); 7769 connp->conn_cred = credp; 7770 7771 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7772 7773 rw_exit(&udp->udp_rwlock); 7774 7775 return (connp); 7776 } 7777 7778 /* ARGSUSED */ 7779 sock_lower_handle_t 7780 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7781 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7782 { 7783 udp_t *udp = NULL; 7784 udp_stack_t *us; 7785 conn_t *connp; 7786 boolean_t isv6; 7787 7788 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7789 (proto != 0 && proto != IPPROTO_UDP)) { 7790 *errorp = EPROTONOSUPPORT; 7791 return (NULL); 7792 } 7793 7794 if (family == AF_INET6) 7795 isv6 = B_TRUE; 7796 else 7797 isv6 = B_FALSE; 7798 7799 connp = udp_do_open(credp, isv6, flags); 7800 if (connp == NULL) { 7801 *errorp = ENOMEM; 7802 return (NULL); 7803 } 7804 7805 udp = connp->conn_udp; 7806 ASSERT(udp != NULL); 7807 us = udp->udp_us; 7808 ASSERT(us != NULL); 7809 7810 udp->udp_issocket = B_TRUE; 7811 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7812 7813 /* Set flow control */ 7814 rw_enter(&udp->udp_rwlock, RW_WRITER); 7815 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7816 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7817 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7818 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7819 udp->udp_xmit_lowat = us->us_xmit_lowat; 7820 7821 if (udp->udp_family == AF_INET6) { 7822 /* Build initial header template for transmit */ 7823 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7824 rw_exit(&udp->udp_rwlock); 7825 ipcl_conn_destroy(connp); 7826 return (NULL); 7827 } 7828 } 7829 rw_exit(&udp->udp_rwlock); 7830 7831 connp->conn_flow_cntrld = B_FALSE; 7832 7833 ASSERT(us->us_ldi_ident != NULL); 7834 7835 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7836 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7837 udp_do_close(connp); 7838 return (NULL); 7839 } 7840 7841 /* Set the send flow control */ 7842 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7843 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7844 7845 mutex_enter(&connp->conn_lock); 7846 connp->conn_state_flags &= ~CONN_INCIPIENT; 7847 mutex_exit(&connp->conn_lock); 7848 7849 *errorp = 0; 7850 *smodep = SM_ATOMIC; 7851 *sock_downcalls = &sock_udp_downcalls; 7852 return ((sock_lower_handle_t)connp); 7853 } 7854 7855 /* ARGSUSED */ 7856 void 7857 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7858 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7859 { 7860 conn_t *connp = (conn_t *)proto_handle; 7861 udp_t *udp = connp->conn_udp; 7862 udp_stack_t *us = udp->udp_us; 7863 struct sock_proto_props sopp; 7864 7865 /* All Solaris components should pass a cred for this operation. */ 7866 ASSERT(cr != NULL); 7867 7868 connp->conn_upcalls = sock_upcalls; 7869 connp->conn_upper_handle = sock_handle; 7870 7871 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7872 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7873 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7874 sopp.sopp_maxblk = INFPSZ; 7875 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7876 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7877 sopp.sopp_maxpsz = 7878 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7879 UDP_MAXPACKET_IPV6; 7880 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7881 udp_mod_info.mi_minpsz; 7882 7883 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7884 &sopp); 7885 } 7886 7887 static void 7888 udp_do_close(conn_t *connp) 7889 { 7890 udp_t *udp; 7891 7892 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7893 udp = connp->conn_udp; 7894 7895 udp_quiesce_conn(connp); 7896 ip_quiesce_conn(connp); 7897 7898 if (!IPCL_IS_NONSTR(connp)) { 7899 /* 7900 * Disable read-side synchronous stream 7901 * interface and drain any queued data. 7902 */ 7903 ASSERT(connp->conn_wq != NULL); 7904 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7905 ASSERT(!udp->udp_direct_sockfs); 7906 7907 ASSERT(connp->conn_rq != NULL); 7908 qprocsoff(connp->conn_rq); 7909 } 7910 7911 ASSERT(udp->udp_rcv_cnt == 0); 7912 ASSERT(udp->udp_rcv_msgcnt == 0); 7913 ASSERT(udp->udp_rcv_list_head == NULL); 7914 ASSERT(udp->udp_rcv_list_tail == NULL); 7915 7916 udp_close_free(connp); 7917 7918 /* 7919 * Now we are truly single threaded on this stream, and can 7920 * delete the things hanging off the connp, and finally the connp. 7921 * We removed this connp from the fanout list, it cannot be 7922 * accessed thru the fanouts, and we already waited for the 7923 * conn_ref to drop to 0. We are already in close, so 7924 * there cannot be any other thread from the top. qprocsoff 7925 * has completed, and service has completed or won't run in 7926 * future. 7927 */ 7928 ASSERT(connp->conn_ref == 1); 7929 if (!IPCL_IS_NONSTR(connp)) { 7930 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7931 } else { 7932 ip_free_helper_stream(connp); 7933 } 7934 7935 connp->conn_ref--; 7936 ipcl_conn_destroy(connp); 7937 } 7938 7939 /* ARGSUSED */ 7940 int 7941 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7942 { 7943 conn_t *connp = (conn_t *)proto_handle; 7944 7945 /* All Solaris components should pass a cred for this operation. */ 7946 ASSERT(cr != NULL); 7947 7948 udp_do_close(connp); 7949 return (0); 7950 } 7951 7952 static int 7953 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7954 boolean_t bind_to_req_port_only) 7955 { 7956 sin_t *sin; 7957 sin6_t *sin6; 7958 sin6_t sin6addr; 7959 in_port_t port; /* Host byte order */ 7960 in_port_t requested_port; /* Host byte order */ 7961 int count; 7962 in6_addr_t v6src; 7963 int loopmax; 7964 udp_fanout_t *udpf; 7965 in_port_t lport; /* Network byte order */ 7966 zoneid_t zoneid; 7967 udp_t *udp; 7968 boolean_t is_inaddr_any; 7969 mlp_type_t addrtype, mlptype; 7970 udp_stack_t *us; 7971 int error = 0; 7972 mblk_t *mp = NULL; 7973 7974 udp = connp->conn_udp; 7975 us = udp->udp_us; 7976 7977 if (udp->udp_state != TS_UNBND) { 7978 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7979 "udp_bind: bad state, %u", udp->udp_state); 7980 return (-TOUTSTATE); 7981 } 7982 7983 switch (len) { 7984 case 0: 7985 if (udp->udp_family == AF_INET) { 7986 sin = (sin_t *)&sin6addr; 7987 *sin = sin_null; 7988 sin->sin_family = AF_INET; 7989 sin->sin_addr.s_addr = INADDR_ANY; 7990 udp->udp_ipversion = IPV4_VERSION; 7991 } else { 7992 ASSERT(udp->udp_family == AF_INET6); 7993 sin6 = (sin6_t *)&sin6addr; 7994 *sin6 = sin6_null; 7995 sin6->sin6_family = AF_INET6; 7996 V6_SET_ZERO(sin6->sin6_addr); 7997 udp->udp_ipversion = IPV6_VERSION; 7998 } 7999 port = 0; 8000 break; 8001 8002 case sizeof (sin_t): /* Complete IPv4 address */ 8003 sin = (sin_t *)sa; 8004 8005 if (sin == NULL || !OK_32PTR((char *)sin)) 8006 return (EINVAL); 8007 8008 if (udp->udp_family != AF_INET || 8009 sin->sin_family != AF_INET) { 8010 return (EAFNOSUPPORT); 8011 } 8012 port = ntohs(sin->sin_port); 8013 break; 8014 8015 case sizeof (sin6_t): /* complete IPv6 address */ 8016 sin6 = (sin6_t *)sa; 8017 8018 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8019 return (EINVAL); 8020 8021 if (udp->udp_family != AF_INET6 || 8022 sin6->sin6_family != AF_INET6) { 8023 return (EAFNOSUPPORT); 8024 } 8025 port = ntohs(sin6->sin6_port); 8026 break; 8027 8028 default: /* Invalid request */ 8029 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8030 "udp_bind: bad ADDR_length length %u", len); 8031 return (-TBADADDR); 8032 } 8033 8034 requested_port = port; 8035 8036 if (requested_port == 0 || !bind_to_req_port_only) 8037 bind_to_req_port_only = B_FALSE; 8038 else /* T_BIND_REQ and requested_port != 0 */ 8039 bind_to_req_port_only = B_TRUE; 8040 8041 if (requested_port == 0) { 8042 /* 8043 * If the application passed in zero for the port number, it 8044 * doesn't care which port number we bind to. Get one in the 8045 * valid range. 8046 */ 8047 if (udp->udp_anon_priv_bind) { 8048 port = udp_get_next_priv_port(udp); 8049 } else { 8050 port = udp_update_next_port(udp, 8051 us->us_next_port_to_try, B_TRUE); 8052 } 8053 } else { 8054 /* 8055 * If the port is in the well-known privileged range, 8056 * make sure the caller was privileged. 8057 */ 8058 int i; 8059 boolean_t priv = B_FALSE; 8060 8061 if (port < us->us_smallest_nonpriv_port) { 8062 priv = B_TRUE; 8063 } else { 8064 for (i = 0; i < us->us_num_epriv_ports; i++) { 8065 if (port == us->us_epriv_ports[i]) { 8066 priv = B_TRUE; 8067 break; 8068 } 8069 } 8070 } 8071 8072 if (priv) { 8073 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8074 return (-TACCES); 8075 } 8076 } 8077 8078 if (port == 0) 8079 return (-TNOADDR); 8080 8081 /* 8082 * The state must be TS_UNBND. TPI mandates that users must send 8083 * TPI primitives only 1 at a time and wait for the response before 8084 * sending the next primitive. 8085 */ 8086 rw_enter(&udp->udp_rwlock, RW_WRITER); 8087 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8088 rw_exit(&udp->udp_rwlock); 8089 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8090 "udp_bind: bad state, %u", udp->udp_state); 8091 return (-TOUTSTATE); 8092 } 8093 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8094 udp->udp_pending_op = T_BIND_REQ; 8095 /* 8096 * Copy the source address into our udp structure. This address 8097 * may still be zero; if so, IP will fill in the correct address 8098 * each time an outbound packet is passed to it. Since the udp is 8099 * not yet in the bind hash list, we don't grab the uf_lock to 8100 * change udp_ipversion 8101 */ 8102 if (udp->udp_family == AF_INET) { 8103 ASSERT(sin != NULL); 8104 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8105 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8106 udp->udp_ip_snd_options_len; 8107 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8108 } else { 8109 ASSERT(sin6 != NULL); 8110 v6src = sin6->sin6_addr; 8111 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8112 /* 8113 * no need to hold the uf_lock to set the udp_ipversion 8114 * since we are not yet in the fanout list 8115 */ 8116 udp->udp_ipversion = IPV4_VERSION; 8117 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8118 UDPH_SIZE + udp->udp_ip_snd_options_len; 8119 } else { 8120 udp->udp_ipversion = IPV6_VERSION; 8121 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8122 } 8123 } 8124 8125 /* 8126 * If udp_reuseaddr is not set, then we have to make sure that 8127 * the IP address and port number the application requested 8128 * (or we selected for the application) is not being used by 8129 * another stream. If another stream is already using the 8130 * requested IP address and port, the behavior depends on 8131 * "bind_to_req_port_only". If set the bind fails; otherwise we 8132 * search for any an unused port to bind to the the stream. 8133 * 8134 * As per the BSD semantics, as modified by the Deering multicast 8135 * changes, if udp_reuseaddr is set, then we allow multiple binds 8136 * to the same port independent of the local IP address. 8137 * 8138 * This is slightly different than in SunOS 4.X which did not 8139 * support IP multicast. Note that the change implemented by the 8140 * Deering multicast code effects all binds - not only binding 8141 * to IP multicast addresses. 8142 * 8143 * Note that when binding to port zero we ignore SO_REUSEADDR in 8144 * order to guarantee a unique port. 8145 */ 8146 8147 count = 0; 8148 if (udp->udp_anon_priv_bind) { 8149 /* 8150 * loopmax = (IPPORT_RESERVED-1) - 8151 * us->us_min_anonpriv_port + 1 8152 */ 8153 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8154 } else { 8155 loopmax = us->us_largest_anon_port - 8156 us->us_smallest_anon_port + 1; 8157 } 8158 8159 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8160 zoneid = connp->conn_zoneid; 8161 8162 for (;;) { 8163 udp_t *udp1; 8164 boolean_t found_exclbind = B_FALSE; 8165 8166 /* 8167 * Walk through the list of udp streams bound to 8168 * requested port with the same IP address. 8169 */ 8170 lport = htons(port); 8171 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8172 us->us_bind_fanout_size)]; 8173 mutex_enter(&udpf->uf_lock); 8174 for (udp1 = udpf->uf_udp; udp1 != NULL; 8175 udp1 = udp1->udp_bind_hash) { 8176 if (lport != udp1->udp_port) 8177 continue; 8178 8179 /* 8180 * On a labeled system, we must treat bindings to ports 8181 * on shared IP addresses by sockets with MAC exemption 8182 * privilege as being in all zones, as there's 8183 * otherwise no way to identify the right receiver. 8184 */ 8185 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8186 IPCL_ZONE_MATCH(connp, 8187 udp1->udp_connp->conn_zoneid)) && 8188 !connp->conn_mac_exempt && \ 8189 !udp1->udp_connp->conn_mac_exempt) 8190 continue; 8191 8192 /* 8193 * If UDP_EXCLBIND is set for either the bound or 8194 * binding endpoint, the semantics of bind 8195 * is changed according to the following chart. 8196 * 8197 * spec = specified address (v4 or v6) 8198 * unspec = unspecified address (v4 or v6) 8199 * A = specified addresses are different for endpoints 8200 * 8201 * bound bind to allowed? 8202 * ------------------------------------- 8203 * unspec unspec no 8204 * unspec spec no 8205 * spec unspec no 8206 * spec spec yes if A 8207 * 8208 * For labeled systems, SO_MAC_EXEMPT behaves the same 8209 * as UDP_EXCLBIND, except that zoneid is ignored. 8210 */ 8211 if (udp1->udp_exclbind || udp->udp_exclbind || 8212 udp1->udp_connp->conn_mac_exempt || 8213 connp->conn_mac_exempt) { 8214 if (V6_OR_V4_INADDR_ANY( 8215 udp1->udp_bound_v6src) || 8216 is_inaddr_any || 8217 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8218 &v6src)) { 8219 found_exclbind = B_TRUE; 8220 break; 8221 } 8222 continue; 8223 } 8224 8225 /* 8226 * Check ipversion to allow IPv4 and IPv6 sockets to 8227 * have disjoint port number spaces. 8228 */ 8229 if (udp->udp_ipversion != udp1->udp_ipversion) { 8230 8231 /* 8232 * On the first time through the loop, if the 8233 * the user intentionally specified a 8234 * particular port number, then ignore any 8235 * bindings of the other protocol that may 8236 * conflict. This allows the user to bind IPv6 8237 * alone and get both v4 and v6, or bind both 8238 * both and get each seperately. On subsequent 8239 * times through the loop, we're checking a 8240 * port that we chose (not the user) and thus 8241 * we do not allow casual duplicate bindings. 8242 */ 8243 if (count == 0 && requested_port != 0) 8244 continue; 8245 } 8246 8247 /* 8248 * No difference depending on SO_REUSEADDR. 8249 * 8250 * If existing port is bound to a 8251 * non-wildcard IP address and 8252 * the requesting stream is bound to 8253 * a distinct different IP addresses 8254 * (non-wildcard, also), keep going. 8255 */ 8256 if (!is_inaddr_any && 8257 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8258 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8259 &v6src)) { 8260 continue; 8261 } 8262 break; 8263 } 8264 8265 if (!found_exclbind && 8266 (udp->udp_reuseaddr && requested_port != 0)) { 8267 break; 8268 } 8269 8270 if (udp1 == NULL) { 8271 /* 8272 * No other stream has this IP address 8273 * and port number. We can use it. 8274 */ 8275 break; 8276 } 8277 mutex_exit(&udpf->uf_lock); 8278 if (bind_to_req_port_only) { 8279 /* 8280 * We get here only when requested port 8281 * is bound (and only first of the for() 8282 * loop iteration). 8283 * 8284 * The semantics of this bind request 8285 * require it to fail so we return from 8286 * the routine (and exit the loop). 8287 * 8288 */ 8289 udp->udp_pending_op = -1; 8290 rw_exit(&udp->udp_rwlock); 8291 return (-TADDRBUSY); 8292 } 8293 8294 if (udp->udp_anon_priv_bind) { 8295 port = udp_get_next_priv_port(udp); 8296 } else { 8297 if ((count == 0) && (requested_port != 0)) { 8298 /* 8299 * If the application wants us to find 8300 * a port, get one to start with. Set 8301 * requested_port to 0, so that we will 8302 * update us->us_next_port_to_try below. 8303 */ 8304 port = udp_update_next_port(udp, 8305 us->us_next_port_to_try, B_TRUE); 8306 requested_port = 0; 8307 } else { 8308 port = udp_update_next_port(udp, port + 1, 8309 B_FALSE); 8310 } 8311 } 8312 8313 if (port == 0 || ++count >= loopmax) { 8314 /* 8315 * We've tried every possible port number and 8316 * there are none available, so send an error 8317 * to the user. 8318 */ 8319 udp->udp_pending_op = -1; 8320 rw_exit(&udp->udp_rwlock); 8321 return (-TNOADDR); 8322 } 8323 } 8324 8325 /* 8326 * Copy the source address into our udp structure. This address 8327 * may still be zero; if so, ip will fill in the correct address 8328 * each time an outbound packet is passed to it. 8329 * If we are binding to a broadcast or multicast address then 8330 * udp_post_ip_bind_connect will clear the source address 8331 * when udp_do_bind success. 8332 */ 8333 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8334 udp->udp_port = lport; 8335 /* 8336 * Now reset the the next anonymous port if the application requested 8337 * an anonymous port, or we handed out the next anonymous port. 8338 */ 8339 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8340 us->us_next_port_to_try = port + 1; 8341 } 8342 8343 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8344 if (udp->udp_family == AF_INET) { 8345 sin->sin_port = udp->udp_port; 8346 } else { 8347 sin6->sin6_port = udp->udp_port; 8348 /* Rebuild the header template */ 8349 error = udp_build_hdrs(udp); 8350 if (error != 0) { 8351 udp->udp_pending_op = -1; 8352 rw_exit(&udp->udp_rwlock); 8353 mutex_exit(&udpf->uf_lock); 8354 return (error); 8355 } 8356 } 8357 udp->udp_state = TS_IDLE; 8358 udp_bind_hash_insert(udpf, udp); 8359 mutex_exit(&udpf->uf_lock); 8360 rw_exit(&udp->udp_rwlock); 8361 8362 if (cl_inet_bind) { 8363 /* 8364 * Running in cluster mode - register bind information 8365 */ 8366 if (udp->udp_ipversion == IPV4_VERSION) { 8367 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8368 IPPROTO_UDP, AF_INET, 8369 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8370 (in_port_t)udp->udp_port, NULL); 8371 } else { 8372 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8373 IPPROTO_UDP, AF_INET6, 8374 (uint8_t *)&(udp->udp_v6src), 8375 (in_port_t)udp->udp_port, NULL); 8376 } 8377 } 8378 8379 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8380 if (is_system_labeled() && (!connp->conn_anon_port || 8381 connp->conn_anon_mlp)) { 8382 uint16_t mlpport; 8383 zone_t *zone; 8384 8385 zone = crgetzone(cr); 8386 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8387 mlptSingle; 8388 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8389 &v6src, us->us_netstack->netstack_ip); 8390 if (addrtype == mlptSingle) { 8391 rw_enter(&udp->udp_rwlock, RW_WRITER); 8392 udp->udp_pending_op = -1; 8393 rw_exit(&udp->udp_rwlock); 8394 connp->conn_anon_port = B_FALSE; 8395 connp->conn_mlp_type = mlptSingle; 8396 return (-TNOADDR); 8397 } 8398 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8399 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8400 addrtype); 8401 if (mlptype != mlptSingle && 8402 (connp->conn_mlp_type == mlptSingle || 8403 secpolicy_net_bindmlp(cr) != 0)) { 8404 if (udp->udp_debug) { 8405 (void) strlog(UDP_MOD_ID, 0, 1, 8406 SL_ERROR|SL_TRACE, 8407 "udp_bind: no priv for multilevel port %d", 8408 mlpport); 8409 } 8410 rw_enter(&udp->udp_rwlock, RW_WRITER); 8411 udp->udp_pending_op = -1; 8412 rw_exit(&udp->udp_rwlock); 8413 connp->conn_anon_port = B_FALSE; 8414 connp->conn_mlp_type = mlptSingle; 8415 return (-TACCES); 8416 } 8417 8418 /* 8419 * If we're specifically binding a shared IP address and the 8420 * port is MLP on shared addresses, then check to see if this 8421 * zone actually owns the MLP. Reject if not. 8422 */ 8423 if (mlptype == mlptShared && addrtype == mlptShared) { 8424 /* 8425 * No need to handle exclusive-stack zones since 8426 * ALL_ZONES only applies to the shared stack. 8427 */ 8428 zoneid_t mlpzone; 8429 8430 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8431 htons(mlpport)); 8432 if (connp->conn_zoneid != mlpzone) { 8433 if (udp->udp_debug) { 8434 (void) strlog(UDP_MOD_ID, 0, 1, 8435 SL_ERROR|SL_TRACE, 8436 "udp_bind: attempt to bind port " 8437 "%d on shared addr in zone %d " 8438 "(should be %d)", 8439 mlpport, connp->conn_zoneid, 8440 mlpzone); 8441 } 8442 rw_enter(&udp->udp_rwlock, RW_WRITER); 8443 udp->udp_pending_op = -1; 8444 rw_exit(&udp->udp_rwlock); 8445 connp->conn_anon_port = B_FALSE; 8446 connp->conn_mlp_type = mlptSingle; 8447 return (-TACCES); 8448 } 8449 } 8450 if (connp->conn_anon_port) { 8451 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8452 port, B_TRUE); 8453 if (error != 0) { 8454 if (udp->udp_debug) { 8455 (void) strlog(UDP_MOD_ID, 0, 1, 8456 SL_ERROR|SL_TRACE, 8457 "udp_bind: cannot establish anon " 8458 "MLP for port %d", port); 8459 } 8460 rw_enter(&udp->udp_rwlock, RW_WRITER); 8461 udp->udp_pending_op = -1; 8462 rw_exit(&udp->udp_rwlock); 8463 connp->conn_anon_port = B_FALSE; 8464 connp->conn_mlp_type = mlptSingle; 8465 return (-TACCES); 8466 } 8467 } 8468 connp->conn_mlp_type = mlptype; 8469 } 8470 8471 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8472 /* 8473 * Append a request for an IRE if udp_v6src not 8474 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8475 */ 8476 mp = allocb(sizeof (ire_t), BPRI_HI); 8477 if (!mp) { 8478 rw_enter(&udp->udp_rwlock, RW_WRITER); 8479 udp->udp_pending_op = -1; 8480 rw_exit(&udp->udp_rwlock); 8481 return (ENOMEM); 8482 } 8483 mp->b_wptr += sizeof (ire_t); 8484 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8485 } 8486 if (udp->udp_family == AF_INET6) { 8487 ASSERT(udp->udp_connp->conn_af_isv6); 8488 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8489 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8490 } else { 8491 ASSERT(!udp->udp_connp->conn_af_isv6); 8492 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8493 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8494 B_TRUE); 8495 } 8496 8497 (void) udp_post_ip_bind_connect(udp, mp, error); 8498 return (error); 8499 } 8500 8501 int 8502 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8503 socklen_t len, cred_t *cr) 8504 { 8505 int error; 8506 conn_t *connp; 8507 8508 /* All Solaris components should pass a cred for this operation. */ 8509 ASSERT(cr != NULL); 8510 8511 connp = (conn_t *)proto_handle; 8512 8513 if (sa == NULL) 8514 error = udp_do_unbind(connp); 8515 else 8516 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8517 8518 if (error < 0) { 8519 if (error == -TOUTSTATE) 8520 error = EINVAL; 8521 else 8522 error = proto_tlitosyserr(-error); 8523 } 8524 8525 return (error); 8526 } 8527 8528 static int 8529 udp_implicit_bind(conn_t *connp, cred_t *cr) 8530 { 8531 int error; 8532 8533 /* All Solaris components should pass a cred for this operation. */ 8534 ASSERT(cr != NULL); 8535 8536 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8537 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8538 } 8539 8540 /* 8541 * This routine removes a port number association from a stream. It 8542 * is called by udp_unbind and udp_tpi_unbind. 8543 */ 8544 static int 8545 udp_do_unbind(conn_t *connp) 8546 { 8547 udp_t *udp = connp->conn_udp; 8548 udp_fanout_t *udpf; 8549 udp_stack_t *us = udp->udp_us; 8550 8551 if (cl_inet_unbind != NULL) { 8552 /* 8553 * Running in cluster mode - register unbind information 8554 */ 8555 if (udp->udp_ipversion == IPV4_VERSION) { 8556 (*cl_inet_unbind)( 8557 connp->conn_netstack->netstack_stackid, 8558 IPPROTO_UDP, AF_INET, 8559 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8560 (in_port_t)udp->udp_port, NULL); 8561 } else { 8562 (*cl_inet_unbind)( 8563 connp->conn_netstack->netstack_stackid, 8564 IPPROTO_UDP, AF_INET6, 8565 (uint8_t *)&(udp->udp_v6src), 8566 (in_port_t)udp->udp_port, NULL); 8567 } 8568 } 8569 8570 rw_enter(&udp->udp_rwlock, RW_WRITER); 8571 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8572 rw_exit(&udp->udp_rwlock); 8573 return (-TOUTSTATE); 8574 } 8575 udp->udp_pending_op = T_UNBIND_REQ; 8576 rw_exit(&udp->udp_rwlock); 8577 8578 /* 8579 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8580 * and therefore ip_unbind must never return NULL. 8581 */ 8582 ip_unbind(connp); 8583 8584 /* 8585 * Once we're unbound from IP, the pending operation may be cleared 8586 * here. 8587 */ 8588 rw_enter(&udp->udp_rwlock, RW_WRITER); 8589 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8590 us->us_bind_fanout_size)]; 8591 8592 mutex_enter(&udpf->uf_lock); 8593 udp_bind_hash_remove(udp, B_TRUE); 8594 V6_SET_ZERO(udp->udp_v6src); 8595 V6_SET_ZERO(udp->udp_bound_v6src); 8596 udp->udp_port = 0; 8597 mutex_exit(&udpf->uf_lock); 8598 8599 udp->udp_pending_op = -1; 8600 udp->udp_state = TS_UNBND; 8601 if (udp->udp_family == AF_INET6) 8602 (void) udp_build_hdrs(udp); 8603 rw_exit(&udp->udp_rwlock); 8604 8605 return (0); 8606 } 8607 8608 static int 8609 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8610 { 8611 ire_t *ire; 8612 udp_fanout_t *udpf; 8613 udp_stack_t *us = udp->udp_us; 8614 8615 ASSERT(udp->udp_pending_op != -1); 8616 rw_enter(&udp->udp_rwlock, RW_WRITER); 8617 if (error == 0) { 8618 /* For udp_do_connect() success */ 8619 /* udp_do_bind() success will do nothing in here */ 8620 /* 8621 * If a broadcast/multicast address was bound, set 8622 * the source address to 0. 8623 * This ensures no datagrams with broadcast address 8624 * as source address are emitted (which would violate 8625 * RFC1122 - Hosts requirements) 8626 * 8627 * Note that when connecting the returned IRE is 8628 * for the destination address and we only perform 8629 * the broadcast check for the source address (it 8630 * is OK to connect to a broadcast/multicast address.) 8631 */ 8632 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8633 ire = (ire_t *)ire_mp->b_rptr; 8634 8635 /* 8636 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8637 * multicast local address. 8638 */ 8639 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8640 us->us_bind_fanout_size)]; 8641 if (ire->ire_type == IRE_BROADCAST && 8642 udp->udp_state != TS_DATA_XFER) { 8643 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8644 udp->udp_pending_op == O_T_BIND_REQ); 8645 /* 8646 * This was just a local bind to a broadcast 8647 * addr. 8648 */ 8649 mutex_enter(&udpf->uf_lock); 8650 V6_SET_ZERO(udp->udp_v6src); 8651 mutex_exit(&udpf->uf_lock); 8652 if (udp->udp_family == AF_INET6) 8653 (void) udp_build_hdrs(udp); 8654 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8655 if (udp->udp_family == AF_INET6) 8656 (void) udp_build_hdrs(udp); 8657 } 8658 } 8659 } else { 8660 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8661 us->us_bind_fanout_size)]; 8662 mutex_enter(&udpf->uf_lock); 8663 8664 if (udp->udp_state == TS_DATA_XFER) { 8665 /* Connect failed */ 8666 /* Revert back to the bound source */ 8667 udp->udp_v6src = udp->udp_bound_v6src; 8668 udp->udp_state = TS_IDLE; 8669 } else { 8670 /* For udp_do_bind() failed */ 8671 V6_SET_ZERO(udp->udp_v6src); 8672 V6_SET_ZERO(udp->udp_bound_v6src); 8673 udp->udp_state = TS_UNBND; 8674 udp_bind_hash_remove(udp, B_TRUE); 8675 udp->udp_port = 0; 8676 } 8677 mutex_exit(&udpf->uf_lock); 8678 if (udp->udp_family == AF_INET6) 8679 (void) udp_build_hdrs(udp); 8680 } 8681 udp->udp_pending_op = -1; 8682 rw_exit(&udp->udp_rwlock); 8683 if (ire_mp != NULL) 8684 freeb(ire_mp); 8685 return (error); 8686 } 8687 8688 /* 8689 * It associates a default destination address with the stream. 8690 */ 8691 static int 8692 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8693 cred_t *cr) 8694 { 8695 sin6_t *sin6; 8696 sin_t *sin; 8697 in6_addr_t v6dst; 8698 ipaddr_t v4dst; 8699 uint16_t dstport; 8700 uint32_t flowinfo; 8701 mblk_t *ire_mp; 8702 udp_fanout_t *udpf; 8703 udp_t *udp, *udp1; 8704 ushort_t ipversion; 8705 udp_stack_t *us; 8706 int error; 8707 8708 udp = connp->conn_udp; 8709 us = udp->udp_us; 8710 8711 /* 8712 * Address has been verified by the caller 8713 */ 8714 switch (len) { 8715 default: 8716 /* 8717 * Should never happen 8718 */ 8719 return (EINVAL); 8720 8721 case sizeof (sin_t): 8722 sin = (sin_t *)sa; 8723 v4dst = sin->sin_addr.s_addr; 8724 dstport = sin->sin_port; 8725 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8726 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8727 ipversion = IPV4_VERSION; 8728 break; 8729 8730 case sizeof (sin6_t): 8731 sin6 = (sin6_t *)sa; 8732 v6dst = sin6->sin6_addr; 8733 dstport = sin6->sin6_port; 8734 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8735 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8736 ipversion = IPV4_VERSION; 8737 flowinfo = 0; 8738 } else { 8739 ipversion = IPV6_VERSION; 8740 flowinfo = sin6->sin6_flowinfo; 8741 } 8742 break; 8743 } 8744 8745 if (dstport == 0) 8746 return (-TBADADDR); 8747 8748 rw_enter(&udp->udp_rwlock, RW_WRITER); 8749 8750 /* 8751 * This UDP must have bound to a port already before doing a connect. 8752 * TPI mandates that users must send TPI primitives only 1 at a time 8753 * and wait for the response before sending the next primitive. 8754 */ 8755 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8756 rw_exit(&udp->udp_rwlock); 8757 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8758 "udp_connect: bad state, %u", udp->udp_state); 8759 return (-TOUTSTATE); 8760 } 8761 udp->udp_pending_op = T_CONN_REQ; 8762 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8763 8764 if (ipversion == IPV4_VERSION) { 8765 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8766 udp->udp_ip_snd_options_len; 8767 } else { 8768 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8769 } 8770 8771 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8772 us->us_bind_fanout_size)]; 8773 8774 mutex_enter(&udpf->uf_lock); 8775 if (udp->udp_state == TS_DATA_XFER) { 8776 /* Already connected - clear out state */ 8777 udp->udp_v6src = udp->udp_bound_v6src; 8778 udp->udp_state = TS_IDLE; 8779 } 8780 8781 /* 8782 * Create a default IP header with no IP options. 8783 */ 8784 udp->udp_dstport = dstport; 8785 udp->udp_ipversion = ipversion; 8786 if (ipversion == IPV4_VERSION) { 8787 /* 8788 * Interpret a zero destination to mean loopback. 8789 * Update the T_CONN_REQ (sin/sin6) since it is used to 8790 * generate the T_CONN_CON. 8791 */ 8792 if (v4dst == INADDR_ANY) { 8793 v4dst = htonl(INADDR_LOOPBACK); 8794 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8795 if (udp->udp_family == AF_INET) { 8796 sin->sin_addr.s_addr = v4dst; 8797 } else { 8798 sin6->sin6_addr = v6dst; 8799 } 8800 } 8801 udp->udp_v6dst = v6dst; 8802 udp->udp_flowinfo = 0; 8803 8804 /* 8805 * If the destination address is multicast and 8806 * an outgoing multicast interface has been set, 8807 * use the address of that interface as our 8808 * source address if no source address has been set. 8809 */ 8810 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8811 CLASSD(v4dst) && 8812 udp->udp_multicast_if_addr != INADDR_ANY) { 8813 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8814 &udp->udp_v6src); 8815 } 8816 } else { 8817 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8818 /* 8819 * Interpret a zero destination to mean loopback. 8820 * Update the T_CONN_REQ (sin/sin6) since it is used to 8821 * generate the T_CONN_CON. 8822 */ 8823 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8824 v6dst = ipv6_loopback; 8825 sin6->sin6_addr = v6dst; 8826 } 8827 udp->udp_v6dst = v6dst; 8828 udp->udp_flowinfo = flowinfo; 8829 /* 8830 * If the destination address is multicast and 8831 * an outgoing multicast interface has been set, 8832 * then the ip bind logic will pick the correct source 8833 * address (i.e. matching the outgoing multicast interface). 8834 */ 8835 } 8836 8837 /* 8838 * Verify that the src/port/dst/port is unique for all 8839 * connections in TS_DATA_XFER 8840 */ 8841 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8842 if (udp1->udp_state != TS_DATA_XFER) 8843 continue; 8844 if (udp->udp_port != udp1->udp_port || 8845 udp->udp_ipversion != udp1->udp_ipversion || 8846 dstport != udp1->udp_dstport || 8847 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8848 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8849 !(IPCL_ZONE_MATCH(udp->udp_connp, 8850 udp1->udp_connp->conn_zoneid) || 8851 IPCL_ZONE_MATCH(udp1->udp_connp, 8852 udp->udp_connp->conn_zoneid))) 8853 continue; 8854 mutex_exit(&udpf->uf_lock); 8855 udp->udp_pending_op = -1; 8856 rw_exit(&udp->udp_rwlock); 8857 return (-TBADADDR); 8858 } 8859 8860 if (cl_inet_connect2 != NULL) { 8861 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8862 if (error != 0) { 8863 mutex_exit(&udpf->uf_lock); 8864 udp->udp_pending_op = -1; 8865 rw_exit(&udp->udp_rwlock); 8866 return (-TBADADDR); 8867 } 8868 } 8869 8870 udp->udp_state = TS_DATA_XFER; 8871 mutex_exit(&udpf->uf_lock); 8872 8873 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8874 if (ire_mp == NULL) { 8875 mutex_enter(&udpf->uf_lock); 8876 udp->udp_state = TS_IDLE; 8877 udp->udp_pending_op = -1; 8878 mutex_exit(&udpf->uf_lock); 8879 rw_exit(&udp->udp_rwlock); 8880 return (ENOMEM); 8881 } 8882 8883 rw_exit(&udp->udp_rwlock); 8884 8885 ire_mp->b_wptr += sizeof (ire_t); 8886 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8887 8888 if (udp->udp_family == AF_INET) { 8889 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8890 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8891 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8892 B_TRUE, B_TRUE, cr); 8893 } else { 8894 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8895 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8896 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 8897 } 8898 8899 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8900 } 8901 8902 /* ARGSUSED */ 8903 static int 8904 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8905 socklen_t len, sock_connid_t *id, cred_t *cr) 8906 { 8907 conn_t *connp = (conn_t *)proto_handle; 8908 udp_t *udp = connp->conn_udp; 8909 int error; 8910 boolean_t did_bind = B_FALSE; 8911 8912 /* All Solaris components should pass a cred for this operation. */ 8913 ASSERT(cr != NULL); 8914 8915 if (sa == NULL) { 8916 /* 8917 * Disconnect 8918 * Make sure we are connected 8919 */ 8920 if (udp->udp_state != TS_DATA_XFER) 8921 return (EINVAL); 8922 8923 error = udp_disconnect(connp); 8924 return (error); 8925 } 8926 8927 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8928 if (error != 0) 8929 goto done; 8930 8931 /* do an implicit bind if necessary */ 8932 if (udp->udp_state == TS_UNBND) { 8933 error = udp_implicit_bind(connp, cr); 8934 /* 8935 * We could be racing with an actual bind, in which case 8936 * we would see EPROTO. We cross our fingers and try 8937 * to connect. 8938 */ 8939 if (!(error == 0 || error == EPROTO)) 8940 goto done; 8941 did_bind = B_TRUE; 8942 } 8943 /* 8944 * set SO_DGRAM_ERRIND 8945 */ 8946 udp->udp_dgram_errind = B_TRUE; 8947 8948 error = udp_do_connect(connp, sa, len, cr); 8949 8950 if (error != 0 && did_bind) { 8951 int unbind_err; 8952 8953 unbind_err = udp_do_unbind(connp); 8954 ASSERT(unbind_err == 0); 8955 } 8956 8957 if (error == 0) { 8958 *id = 0; 8959 (*connp->conn_upcalls->su_connected) 8960 (connp->conn_upper_handle, 0, NULL, -1); 8961 } else if (error < 0) { 8962 error = proto_tlitosyserr(-error); 8963 } 8964 8965 done: 8966 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8967 /* 8968 * No need to hold locks to set state 8969 * after connect failure socket state is undefined 8970 * We set the state only to imitate old sockfs behavior 8971 */ 8972 udp->udp_state = TS_IDLE; 8973 } 8974 return (error); 8975 } 8976 8977 /* ARGSUSED */ 8978 int 8979 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8980 cred_t *cr) 8981 { 8982 conn_t *connp = (conn_t *)proto_handle; 8983 udp_t *udp = connp->conn_udp; 8984 udp_stack_t *us = udp->udp_us; 8985 int error = 0; 8986 8987 ASSERT(DB_TYPE(mp) == M_DATA); 8988 8989 /* All Solaris components should pass a cred for this operation. */ 8990 ASSERT(cr != NULL); 8991 8992 /* If labeled then sockfs should have already set db_credp */ 8993 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 8994 8995 /* 8996 * If the socket is connected and no change in destination 8997 */ 8998 if (msg->msg_namelen == 0) { 8999 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9000 if (error == EDESTADDRREQ) 9001 return (error); 9002 else 9003 return (udp->udp_dgram_errind ? error : 0); 9004 } 9005 9006 /* 9007 * Do an implicit bind if necessary. 9008 */ 9009 if (udp->udp_state == TS_UNBND) { 9010 error = udp_implicit_bind(connp, cr); 9011 /* 9012 * We could be racing with an actual bind, in which case 9013 * we would see EPROTO. We cross our fingers and try 9014 * to send. 9015 */ 9016 if (!(error == 0 || error == EPROTO)) { 9017 freemsg(mp); 9018 return (error); 9019 } 9020 } 9021 9022 rw_enter(&udp->udp_rwlock, RW_WRITER); 9023 9024 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9025 rw_exit(&udp->udp_rwlock); 9026 freemsg(mp); 9027 return (EISCONN); 9028 } 9029 9030 9031 if (udp->udp_delayed_error != 0) { 9032 boolean_t match; 9033 9034 error = udp->udp_delayed_error; 9035 match = B_FALSE; 9036 udp->udp_delayed_error = 0; 9037 switch (udp->udp_family) { 9038 case AF_INET: { 9039 /* Compare just IP address and port */ 9040 sin_t *sin1 = (sin_t *)msg->msg_name; 9041 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9042 9043 if (msg->msg_namelen == sizeof (sin_t) && 9044 sin1->sin_port == sin2->sin_port && 9045 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9046 match = B_TRUE; 9047 9048 break; 9049 } 9050 case AF_INET6: { 9051 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9052 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9053 9054 if (msg->msg_namelen == sizeof (sin6_t) && 9055 sin1->sin6_port == sin2->sin6_port && 9056 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9057 &sin2->sin6_addr)) 9058 match = B_TRUE; 9059 break; 9060 } 9061 default: 9062 ASSERT(0); 9063 } 9064 9065 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9066 9067 if (match) { 9068 rw_exit(&udp->udp_rwlock); 9069 freemsg(mp); 9070 return (error); 9071 } 9072 } 9073 9074 error = proto_verify_ip_addr(udp->udp_family, 9075 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9076 rw_exit(&udp->udp_rwlock); 9077 9078 if (error != 0) { 9079 freemsg(mp); 9080 return (error); 9081 } 9082 9083 error = udp_send_not_connected(connp, mp, 9084 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9085 curproc->p_pid); 9086 if (error != 0) { 9087 UDP_STAT(us, udp_out_err_output); 9088 freemsg(mp); 9089 } 9090 return (udp->udp_dgram_errind ? error : 0); 9091 } 9092 9093 int 9094 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9095 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9096 { 9097 conn_t *connp = (conn_t *)proto_handle; 9098 udp_t *udp; 9099 struct T_capability_ack tca; 9100 struct sockaddr_in6 laddr, faddr; 9101 socklen_t laddrlen, faddrlen; 9102 short opts; 9103 struct stroptions *stropt; 9104 mblk_t *stropt_mp; 9105 int error; 9106 9107 udp = connp->conn_udp; 9108 9109 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9110 9111 /* 9112 * setup the fallback stream that was allocated 9113 */ 9114 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9115 connp->conn_minor_arena = WR(q)->q_ptr; 9116 9117 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9118 9119 WR(q)->q_qinfo = &udp_winit; 9120 9121 connp->conn_rq = RD(q); 9122 connp->conn_wq = WR(q); 9123 9124 /* Notify stream head about options before sending up data */ 9125 stropt_mp->b_datap->db_type = M_SETOPTS; 9126 stropt_mp->b_wptr += sizeof (*stropt); 9127 stropt = (struct stroptions *)stropt_mp->b_rptr; 9128 stropt->so_flags = SO_WROFF | SO_HIWAT; 9129 stropt->so_wroff = 9130 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9131 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9132 putnext(RD(q), stropt_mp); 9133 9134 /* 9135 * Free the helper stream 9136 */ 9137 ip_free_helper_stream(connp); 9138 9139 if (!direct_sockfs) 9140 udp_disable_direct_sockfs(udp); 9141 9142 /* 9143 * Collect the information needed to sync with the sonode 9144 */ 9145 udp_do_capability_ack(udp, &tca, TC1_INFO); 9146 9147 laddrlen = faddrlen = sizeof (sin6_t); 9148 (void) udp_getsockname((sock_lower_handle_t)connp, 9149 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9150 error = udp_getpeername((sock_lower_handle_t)connp, 9151 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9152 if (error != 0) 9153 faddrlen = 0; 9154 9155 opts = 0; 9156 if (udp->udp_dgram_errind) 9157 opts |= SO_DGRAM_ERRIND; 9158 if (udp->udp_dontroute) 9159 opts |= SO_DONTROUTE; 9160 9161 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9162 (struct sockaddr *)&laddr, laddrlen, 9163 (struct sockaddr *)&faddr, faddrlen, opts); 9164 9165 mutex_enter(&udp->udp_recv_lock); 9166 /* 9167 * Attempts to send data up during fallback will result in it being 9168 * queued in udp_t. Now we push up any queued packets. 9169 */ 9170 while (udp->udp_fallback_queue_head != NULL) { 9171 mblk_t *mp; 9172 mp = udp->udp_fallback_queue_head; 9173 udp->udp_fallback_queue_head = mp->b_next; 9174 mutex_exit(&udp->udp_recv_lock); 9175 mp->b_next = NULL; 9176 putnext(RD(q), mp); 9177 mutex_enter(&udp->udp_recv_lock); 9178 } 9179 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9180 /* 9181 * No longer a streams less socket 9182 */ 9183 rw_enter(&udp->udp_rwlock, RW_WRITER); 9184 connp->conn_flags &= ~IPCL_NONSTR; 9185 rw_exit(&udp->udp_rwlock); 9186 9187 mutex_exit(&udp->udp_recv_lock); 9188 9189 ASSERT(connp->conn_ref >= 1); 9190 9191 return (0); 9192 } 9193 9194 static int 9195 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9196 { 9197 sin_t *sin = (sin_t *)sa; 9198 sin6_t *sin6 = (sin6_t *)sa; 9199 9200 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9201 ASSERT(udp != NULL); 9202 9203 if (udp->udp_state != TS_DATA_XFER) 9204 return (ENOTCONN); 9205 9206 switch (udp->udp_family) { 9207 case AF_INET: 9208 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9209 9210 if (*salenp < sizeof (sin_t)) 9211 return (EINVAL); 9212 9213 *salenp = sizeof (sin_t); 9214 *sin = sin_null; 9215 sin->sin_family = AF_INET; 9216 sin->sin_port = udp->udp_dstport; 9217 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9218 break; 9219 case AF_INET6: 9220 if (*salenp < sizeof (sin6_t)) 9221 return (EINVAL); 9222 9223 *salenp = sizeof (sin6_t); 9224 *sin6 = sin6_null; 9225 sin6->sin6_family = AF_INET6; 9226 sin6->sin6_port = udp->udp_dstport; 9227 sin6->sin6_addr = udp->udp_v6dst; 9228 sin6->sin6_flowinfo = udp->udp_flowinfo; 9229 break; 9230 } 9231 9232 return (0); 9233 } 9234 9235 /* ARGSUSED */ 9236 int 9237 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9238 socklen_t *salenp, cred_t *cr) 9239 { 9240 conn_t *connp = (conn_t *)proto_handle; 9241 udp_t *udp = connp->conn_udp; 9242 int error; 9243 9244 /* All Solaris components should pass a cred for this operation. */ 9245 ASSERT(cr != NULL); 9246 9247 ASSERT(udp != NULL); 9248 9249 rw_enter(&udp->udp_rwlock, RW_READER); 9250 9251 error = udp_do_getpeername(udp, sa, salenp); 9252 9253 rw_exit(&udp->udp_rwlock); 9254 9255 return (error); 9256 } 9257 9258 static int 9259 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9260 { 9261 sin_t *sin = (sin_t *)sa; 9262 sin6_t *sin6 = (sin6_t *)sa; 9263 9264 ASSERT(udp != NULL); 9265 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9266 9267 switch (udp->udp_family) { 9268 case AF_INET: 9269 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9270 9271 if (*salenp < sizeof (sin_t)) 9272 return (EINVAL); 9273 9274 *salenp = sizeof (sin_t); 9275 *sin = sin_null; 9276 sin->sin_family = AF_INET; 9277 if (udp->udp_state == TS_UNBND) { 9278 break; 9279 } 9280 sin->sin_port = udp->udp_port; 9281 9282 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9283 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9284 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9285 } else { 9286 /* 9287 * INADDR_ANY 9288 * udp_v6src is not set, we might be bound to 9289 * broadcast/multicast. Use udp_bound_v6src as 9290 * local address instead (that could 9291 * also still be INADDR_ANY) 9292 */ 9293 sin->sin_addr.s_addr = 9294 V4_PART_OF_V6(udp->udp_bound_v6src); 9295 } 9296 break; 9297 9298 case AF_INET6: 9299 if (*salenp < sizeof (sin6_t)) 9300 return (EINVAL); 9301 9302 *salenp = sizeof (sin6_t); 9303 *sin6 = sin6_null; 9304 sin6->sin6_family = AF_INET6; 9305 if (udp->udp_state == TS_UNBND) { 9306 break; 9307 } 9308 sin6->sin6_port = udp->udp_port; 9309 9310 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9311 sin6->sin6_addr = udp->udp_v6src; 9312 } else { 9313 /* 9314 * UNSPECIFIED 9315 * udp_v6src is not set, we might be bound to 9316 * broadcast/multicast. Use udp_bound_v6src as 9317 * local address instead (that could 9318 * also still be UNSPECIFIED) 9319 */ 9320 sin6->sin6_addr = udp->udp_bound_v6src; 9321 } 9322 } 9323 return (0); 9324 } 9325 9326 /* ARGSUSED */ 9327 int 9328 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9329 socklen_t *salenp, cred_t *cr) 9330 { 9331 conn_t *connp = (conn_t *)proto_handle; 9332 udp_t *udp = connp->conn_udp; 9333 int error; 9334 9335 /* All Solaris components should pass a cred for this operation. */ 9336 ASSERT(cr != NULL); 9337 9338 ASSERT(udp != NULL); 9339 rw_enter(&udp->udp_rwlock, RW_READER); 9340 9341 error = udp_do_getsockname(udp, sa, salenp); 9342 9343 rw_exit(&udp->udp_rwlock); 9344 9345 return (error); 9346 } 9347 9348 int 9349 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9350 void *optvalp, socklen_t *optlen, cred_t *cr) 9351 { 9352 conn_t *connp = (conn_t *)proto_handle; 9353 udp_t *udp = connp->conn_udp; 9354 int error; 9355 t_uscalar_t max_optbuf_len; 9356 void *optvalp_buf; 9357 int len; 9358 9359 /* All Solaris components should pass a cred for this operation. */ 9360 ASSERT(cr != NULL); 9361 9362 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9363 udp_opt_obj.odb_opt_des_arr, 9364 udp_opt_obj.odb_opt_arr_cnt, 9365 udp_opt_obj.odb_topmost_tpiprovider, 9366 B_FALSE, B_TRUE, cr); 9367 if (error != 0) { 9368 if (error < 0) 9369 error = proto_tlitosyserr(-error); 9370 return (error); 9371 } 9372 9373 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9374 rw_enter(&udp->udp_rwlock, RW_READER); 9375 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9376 rw_exit(&udp->udp_rwlock); 9377 9378 if (len < 0) { 9379 /* 9380 * Pass on to IP 9381 */ 9382 kmem_free(optvalp_buf, max_optbuf_len); 9383 return (ip_get_options(connp, level, option_name, 9384 optvalp, optlen, cr)); 9385 } else { 9386 /* 9387 * update optlen and copy option value 9388 */ 9389 t_uscalar_t size = MIN(len, *optlen); 9390 bcopy(optvalp_buf, optvalp, size); 9391 bcopy(&size, optlen, sizeof (size)); 9392 9393 kmem_free(optvalp_buf, max_optbuf_len); 9394 return (0); 9395 } 9396 } 9397 9398 int 9399 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9400 const void *optvalp, socklen_t optlen, cred_t *cr) 9401 { 9402 conn_t *connp = (conn_t *)proto_handle; 9403 udp_t *udp = connp->conn_udp; 9404 int error; 9405 9406 /* All Solaris components should pass a cred for this operation. */ 9407 ASSERT(cr != NULL); 9408 9409 error = proto_opt_check(level, option_name, optlen, NULL, 9410 udp_opt_obj.odb_opt_des_arr, 9411 udp_opt_obj.odb_opt_arr_cnt, 9412 udp_opt_obj.odb_topmost_tpiprovider, 9413 B_TRUE, B_FALSE, cr); 9414 9415 if (error != 0) { 9416 if (error < 0) 9417 error = proto_tlitosyserr(-error); 9418 return (error); 9419 } 9420 9421 rw_enter(&udp->udp_rwlock, RW_WRITER); 9422 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9423 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9424 NULL, cr); 9425 rw_exit(&udp->udp_rwlock); 9426 9427 if (error < 0) { 9428 /* 9429 * Pass on to ip 9430 */ 9431 error = ip_set_options(connp, level, option_name, optvalp, 9432 optlen, cr); 9433 } 9434 9435 return (error); 9436 } 9437 9438 void 9439 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9440 { 9441 conn_t *connp = (conn_t *)proto_handle; 9442 udp_t *udp = connp->conn_udp; 9443 9444 mutex_enter(&udp->udp_recv_lock); 9445 connp->conn_flow_cntrld = B_FALSE; 9446 mutex_exit(&udp->udp_recv_lock); 9447 } 9448 9449 /* ARGSUSED */ 9450 int 9451 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9452 { 9453 conn_t *connp = (conn_t *)proto_handle; 9454 9455 /* All Solaris components should pass a cred for this operation. */ 9456 ASSERT(cr != NULL); 9457 9458 /* shut down the send side */ 9459 if (how != SHUT_RD) 9460 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9461 SOCK_OPCTL_SHUT_SEND, 0); 9462 /* shut down the recv side */ 9463 if (how != SHUT_WR) 9464 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9465 SOCK_OPCTL_SHUT_RECV, 0); 9466 return (0); 9467 } 9468 9469 int 9470 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9471 int mode, int32_t *rvalp, cred_t *cr) 9472 { 9473 conn_t *connp = (conn_t *)proto_handle; 9474 int error; 9475 9476 /* All Solaris components should pass a cred for this operation. */ 9477 ASSERT(cr != NULL); 9478 9479 switch (cmd) { 9480 case ND_SET: 9481 case ND_GET: 9482 case _SIOCSOCKFALLBACK: 9483 case TI_GETPEERNAME: 9484 case TI_GETMYNAME: 9485 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9486 cmd)); 9487 error = EINVAL; 9488 break; 9489 default: 9490 /* 9491 * Pass on to IP using helper stream 9492 */ 9493 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9494 cmd, arg, mode, cr, rvalp); 9495 break; 9496 } 9497 return (error); 9498 } 9499 9500 /* ARGSUSED */ 9501 int 9502 udp_accept(sock_lower_handle_t lproto_handle, 9503 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9504 cred_t *cr) 9505 { 9506 return (EOPNOTSUPP); 9507 } 9508 9509 /* ARGSUSED */ 9510 int 9511 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9512 { 9513 return (EOPNOTSUPP); 9514 } 9515 9516 sock_downcalls_t sock_udp_downcalls = { 9517 udp_activate, /* sd_activate */ 9518 udp_accept, /* sd_accept */ 9519 udp_bind, /* sd_bind */ 9520 udp_listen, /* sd_listen */ 9521 udp_connect, /* sd_connect */ 9522 udp_getpeername, /* sd_getpeername */ 9523 udp_getsockname, /* sd_getsockname */ 9524 udp_getsockopt, /* sd_getsockopt */ 9525 udp_setsockopt, /* sd_setsockopt */ 9526 udp_send, /* sd_send */ 9527 NULL, /* sd_send_uio */ 9528 NULL, /* sd_recv_uio */ 9529 NULL, /* sd_poll */ 9530 udp_shutdown, /* sd_shutdown */ 9531 udp_clr_flowctrl, /* sd_setflowctrl */ 9532 udp_ioctl, /* sd_ioctl */ 9533 udp_close /* sd_close */ 9534 }; 9535