1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2018, Joyent, Inc. 26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 27 * Copyright 2024 Oxide Computer Company 28 */ 29 /* Copyright (c) 1990 Mentat Inc. */ 30 31 #include <sys/sysmacros.h> 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/stropts.h> 35 #include <sys/strlog.h> 36 #include <sys/strsun.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/timod.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/strsubr.h> 43 #include <sys/suntpi.h> 44 #include <sys/xti_inet.h> 45 #include <sys/kmem.h> 46 #include <sys/cred_impl.h> 47 #include <sys/policy.h> 48 #include <sys/priv.h> 49 #include <sys/ucred.h> 50 #include <sys/zone.h> 51 52 #include <sys/socket.h> 53 #include <sys/socketvar.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 65 #include <inet/common.h> 66 #include <inet/ip.h> 67 #include <inet/ip_impl.h> 68 #include <inet/ipsec_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/ip_ndp.h> 74 #include <inet/proto_set.h> 75 #include <inet/mib2.h> 76 #include <inet/optcom.h> 77 #include <inet/snmpcom.h> 78 #include <inet/kstatcom.h> 79 #include <inet/ipclassifier.h> 80 #include <sys/squeue_impl.h> 81 #include <inet/ipnet.h> 82 #include <sys/vxlan.h> 83 #include <inet/inet_hash.h> 84 85 #include <sys/tsol/label.h> 86 #include <sys/tsol/tnet.h> 87 #include <rpc/pmap_prot.h> 88 89 #include <inet/udp_impl.h> 90 91 /* 92 * Synchronization notes: 93 * 94 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 95 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 96 * protects the contents of the udp_t. uf_lock protects the address and the 97 * fanout information. 98 * The lock order is conn_lock -> uf_lock. 99 * 100 * The fanout lock uf_lock: 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure and a few other fields in the udp_t. A UDP endpoint is removed 108 * from the bind hash list only when it is being unbound or being closed. 109 * The per bucket lock also protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * UDP is always a device driver. For compatibility with mibopen() code 113 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 114 * dummy module. 115 * 116 * The above implies that we don't support any intermediate module to 117 * reside in between /dev/ip and udp -- in fact, we never supported such 118 * scenario in the past as the inter-layer communication semantics have 119 * always been private. 120 */ 121 122 /* For /etc/system control */ 123 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 124 125 static void udp_addr_req(queue_t *q, mblk_t *mp); 126 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 127 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 128 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 129 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 130 const in6_addr_t *, in_port_t, uint32_t); 131 static void udp_capability_req(queue_t *q, mblk_t *mp); 132 static int udp_tpi_close(queue_t *q, int flags, cred_t *); 133 static void udp_close_free(conn_t *); 134 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 135 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 136 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 137 int sys_error); 138 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 139 t_scalar_t tlierr, int sys_error); 140 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 141 cred_t *cr); 142 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 143 char *value, caddr_t cp, cred_t *cr); 144 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 145 char *value, caddr_t cp, cred_t *cr); 146 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 147 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 148 ip_recv_attr_t *ira); 149 static void udp_info_req(queue_t *q, mblk_t *mp); 150 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 151 static int udp_lrput(queue_t *, mblk_t *); 152 static int udp_lwput(queue_t *, mblk_t *); 153 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 154 cred_t *credp, boolean_t isv6); 155 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 156 cred_t *credp); 157 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 158 cred_t *credp); 159 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 160 int udp_opt_set(conn_t *connp, uint_t optset_context, 161 int level, int name, uint_t inlen, 162 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 163 void *thisdg_attrs, cred_t *cr); 164 int udp_opt_get(conn_t *connp, int level, int name, 165 uchar_t *ptr); 166 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 167 pid_t pid); 168 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 169 pid_t pid, ip_xmit_attr_t *ixa); 170 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 171 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 172 ip_xmit_attr_t *ixa); 173 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 174 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 175 int *); 176 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 177 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 178 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 179 static void udp_ud_err_connected(conn_t *, t_scalar_t); 180 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 181 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 182 boolean_t random); 183 static void udp_wput_other(queue_t *q, mblk_t *mp); 184 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 185 static int udp_wput_fallback(queue_t *q, mblk_t *mp); 186 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 187 188 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 189 static void udp_stack_fini(netstackid_t stackid, void *arg); 190 191 /* Common routines for TPI and socket module */ 192 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 193 194 /* Common routine for TPI and socket module */ 195 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 196 static void udp_do_close(conn_t *); 197 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 198 boolean_t); 199 static int udp_do_unbind(conn_t *); 200 201 int udp_getsockname(sock_lower_handle_t, 202 struct sockaddr *, socklen_t *, cred_t *); 203 int udp_getpeername(sock_lower_handle_t, 204 struct sockaddr *, socklen_t *, cred_t *); 205 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 206 cred_t *, pid_t); 207 208 /* 209 * Checks if the given destination addr/port is allowed out. 210 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 211 * Called for each connect() and for sendto()/sendmsg() to a different 212 * destination. 213 * For connect(), called in udp_connect(). 214 * For sendto()/sendmsg(), called in udp_output_newdst(). 215 * 216 * This macro assumes that the cl_inet_connect2 hook is not NULL. 217 * Please check this before calling this macro. 218 * 219 * void 220 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 221 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 222 */ 223 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 224 (err) = 0; \ 225 /* \ 226 * Running in cluster mode - check and register active \ 227 * "connection" information \ 228 */ \ 229 if ((cp)->conn_ipversion == IPV4_VERSION) \ 230 (err) = (*cl_inet_connect2)( \ 231 (cp)->conn_netstack->netstack_stackid, \ 232 IPPROTO_UDP, is_outgoing, AF_INET, \ 233 (uint8_t *)&((cp)->conn_laddr_v4), \ 234 (cp)->conn_lport, \ 235 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 236 (in_port_t)(fport), NULL); \ 237 else \ 238 (err) = (*cl_inet_connect2)( \ 239 (cp)->conn_netstack->netstack_stackid, \ 240 IPPROTO_UDP, is_outgoing, AF_INET6, \ 241 (uint8_t *)&((cp)->conn_laddr_v6), \ 242 (cp)->conn_lport, \ 243 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 244 } 245 246 static struct module_info udp_mod_info = { 247 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 248 }; 249 250 /* 251 * Entry points for UDP as a device. 252 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 253 */ 254 static struct qinit udp_rinitv4 = { 255 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 256 }; 257 258 static struct qinit udp_rinitv6 = { 259 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 260 }; 261 262 static struct qinit udp_winit = { 263 udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info 264 }; 265 266 /* UDP entry point during fallback */ 267 struct qinit udp_fallback_sock_winit = { 268 udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 269 }; 270 271 /* 272 * UDP needs to handle I_LINK and I_PLINK since ifconfig 273 * likes to use it as a place to hang the various streams. 274 */ 275 static struct qinit udp_lrinit = { 276 udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 277 }; 278 279 static struct qinit udp_lwinit = { 280 udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 281 }; 282 283 /* For AF_INET aka /dev/udp */ 284 struct streamtab udpinfov4 = { 285 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 286 }; 287 288 /* For AF_INET6 aka /dev/udp6 */ 289 struct streamtab udpinfov6 = { 290 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 291 }; 292 293 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 294 295 /* Default structure copied into T_INFO_ACK messages */ 296 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 297 T_INFO_ACK, 298 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 299 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 300 T_INVALID, /* CDATA_size. udp does not support connect data. */ 301 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 302 sizeof (sin_t), /* ADDR_size. */ 303 0, /* OPT_size - not initialized here */ 304 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 305 T_CLTS, /* SERV_type. udp supports connection-less. */ 306 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 307 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 308 }; 309 310 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 311 312 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 313 T_INFO_ACK, 314 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 315 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 316 T_INVALID, /* CDATA_size. udp does not support connect data. */ 317 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 318 sizeof (sin6_t), /* ADDR_size. */ 319 0, /* OPT_size - not initialized here */ 320 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 321 T_CLTS, /* SERV_type. udp supports connection-less. */ 322 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 323 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 324 }; 325 326 /* 327 * UDP tunables related declarations. Definitions are in udp_tunables.c 328 */ 329 extern mod_prop_info_t udp_propinfo_tbl[]; 330 extern int udp_propinfo_count; 331 332 /* Setable in /etc/system */ 333 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 334 uint32_t udp_random_anon_port = 1; 335 336 /* 337 * Hook functions to enable cluster networking. 338 * On non-clustered systems these vectors must always be NULL 339 */ 340 341 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 342 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 343 void *args) = NULL; 344 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 345 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 346 void *args) = NULL; 347 348 typedef union T_primitives *t_primp_t; 349 350 /* 351 * Various protocols that encapsulate UDP have no real use for the source port. 352 * Instead, they want to vary the source port to provide better equal-cost 353 * multipathing and other systems that use fanout. Consider something like 354 * VXLAN. If you're actually sending multiple different streams to a single 355 * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP, 356 * SRC Port, DST Port) will always be the same. 357 * 358 * Here, we return a port to hash this to, if we know how to hash it. If for 359 * some reason we can't perform an L4 hash, then we just return the default 360 * value, usually the default port. After we determine the hash we transform it 361 * so that it's in the range of [ min, max ]. 362 * 363 * We'd like to avoid a pull up for the sake of performing the hash. If the 364 * first mblk_t doesn't have the full protocol header, then we just send it to 365 * the default. If for some reason we have an encapsulated packet that has its 366 * protocol header in different parts of an mblk_t, then we'll go with the 367 * default port. This means that that if a driver isn't consistent about how it 368 * generates the frames for a given flow, it will not always be consistently 369 * hashed. That should be an uncommon event. 370 */ 371 uint16_t 372 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max, 373 uint16_t def) 374 { 375 size_t szused = 0; 376 ip6_t *ip6h; 377 ipha_t *ipha; 378 uint16_t sap; 379 uint64_t hash; 380 uint32_t mod; 381 382 ASSERT(min <= max); 383 384 if (type != UDP_HASH_VXLAN) 385 return (def); 386 387 if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))) 388 return (def); 389 390 /* 391 * The following logic is VXLAN specific to get at the header, if we 392 * have formats, eg. GENEVE, then we should ignore this. 393 * 394 * The kernel overlay device often puts a first mblk_t for the data 395 * which is just the encap. If so, then we're going to use that and try 396 * to avoid a pull up. 397 */ 398 if (MBLKL(mp) == VXLAN_HDR_LEN) { 399 if (mp->b_cont == NULL) 400 return (def); 401 mp = mp->b_cont; 402 } else if (MBLKL(mp) < VXLAN_HDR_LEN) { 403 return (def); 404 } else { 405 szused = VXLAN_HDR_LEN; 406 } 407 408 /* Can we hold a MAC header? */ 409 if (MBLKL(mp) + szused < sizeof (struct ether_header)) 410 return (def); 411 412 /* 413 * We need to lie about the starting offset into the message block for 414 * convenience. Undo it at the end. We know that inet_pkt_hash() won't 415 * modify the mblk_t. 416 */ 417 mp->b_rptr += szused; 418 hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 | 419 INET_PKT_HASH_L3 | INET_PKT_HASH_L4); 420 mp->b_rptr -= szused; 421 422 if (hash == 0) 423 return (def); 424 425 mod = max - min + 1; 426 return ((hash % mod) + min); 427 } 428 429 /* 430 * Return the next anonymous port in the privileged port range for 431 * bind checking. 432 * 433 * Trusted Extension (TX) notes: TX allows administrator to mark or 434 * reserve ports as Multilevel ports (MLP). MLP has special function 435 * on TX systems. Once a port is made MLP, it's not available as 436 * ordinary port. This creates "holes" in the port name space. It 437 * may be necessary to skip the "holes" find a suitable anon port. 438 */ 439 static in_port_t 440 udp_get_next_priv_port(udp_t *udp) 441 { 442 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 443 in_port_t nextport; 444 boolean_t restart = B_FALSE; 445 udp_stack_t *us = udp->udp_us; 446 447 retry: 448 if (next_priv_port < us->us_min_anonpriv_port || 449 next_priv_port >= IPPORT_RESERVED) { 450 next_priv_port = IPPORT_RESERVED - 1; 451 if (restart) 452 return (0); 453 restart = B_TRUE; 454 } 455 456 if (is_system_labeled() && 457 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 458 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 459 next_priv_port = nextport; 460 goto retry; 461 } 462 463 return (next_priv_port--); 464 } 465 466 /* 467 * Hash list removal routine for udp_t structures. 468 */ 469 static void 470 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 471 { 472 udp_t *udpnext; 473 kmutex_t *lockp; 474 udp_stack_t *us = udp->udp_us; 475 conn_t *connp = udp->udp_connp; 476 477 if (udp->udp_ptpbhn == NULL) 478 return; 479 480 /* 481 * Extract the lock pointer in case there are concurrent 482 * hash_remove's for this instance. 483 */ 484 ASSERT(connp->conn_lport != 0); 485 if (!caller_holds_lock) { 486 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 487 us->us_bind_fanout_size)].uf_lock; 488 ASSERT(lockp != NULL); 489 mutex_enter(lockp); 490 } 491 if (udp->udp_ptpbhn != NULL) { 492 udpnext = udp->udp_bind_hash; 493 if (udpnext != NULL) { 494 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 495 udp->udp_bind_hash = NULL; 496 } 497 *udp->udp_ptpbhn = udpnext; 498 udp->udp_ptpbhn = NULL; 499 } 500 if (!caller_holds_lock) { 501 mutex_exit(lockp); 502 } 503 } 504 505 static void 506 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 507 { 508 conn_t *connp = udp->udp_connp; 509 udp_t **udpp; 510 udp_t *udpnext; 511 conn_t *connext; 512 513 ASSERT(MUTEX_HELD(&uf->uf_lock)); 514 ASSERT(udp->udp_ptpbhn == NULL); 515 udpp = &uf->uf_udp; 516 udpnext = udpp[0]; 517 if (udpnext != NULL) { 518 /* 519 * If the new udp bound to the INADDR_ANY address 520 * and the first one in the list is not bound to 521 * INADDR_ANY we skip all entries until we find the 522 * first one bound to INADDR_ANY. 523 * This makes sure that applications binding to a 524 * specific address get preference over those binding to 525 * INADDR_ANY. 526 */ 527 connext = udpnext->udp_connp; 528 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 529 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 530 while ((udpnext = udpp[0]) != NULL && 531 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 532 udpp = &(udpnext->udp_bind_hash); 533 } 534 if (udpnext != NULL) 535 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 536 } else { 537 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 538 } 539 } 540 udp->udp_bind_hash = udpnext; 541 udp->udp_ptpbhn = udpp; 542 udpp[0] = udp; 543 } 544 545 /* 546 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 547 * passed to udp_wput. 548 * It associates a port number and local address with the stream. 549 * It calls IP to verify the local IP address, and calls IP to insert 550 * the conn_t in the fanout table. 551 * If everything is ok it then sends the T_BIND_ACK back up. 552 * 553 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 554 * without setting SO_REUSEADDR. This is needed so that they 555 * can be viewed as two independent transport protocols. 556 * However, anonymouns ports are allocated from the same range to avoid 557 * duplicating the us->us_next_port_to_try. 558 */ 559 static void 560 udp_tpi_bind(queue_t *q, mblk_t *mp) 561 { 562 sin_t *sin; 563 sin6_t *sin6; 564 mblk_t *mp1; 565 struct T_bind_req *tbr; 566 conn_t *connp; 567 udp_t *udp; 568 int error; 569 struct sockaddr *sa; 570 cred_t *cr; 571 572 /* 573 * All Solaris components should pass a db_credp 574 * for this TPI message, hence we ASSERT. 575 * But in case there is some other M_PROTO that looks 576 * like a TPI message sent by some other kernel 577 * component, we check and return an error. 578 */ 579 cr = msg_getcred(mp, NULL); 580 ASSERT(cr != NULL); 581 if (cr == NULL) { 582 udp_err_ack(q, mp, TSYSERR, EINVAL); 583 return; 584 } 585 586 connp = Q_TO_CONN(q); 587 udp = connp->conn_udp; 588 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 589 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 590 "udp_bind: bad req, len %u", 591 (uint_t)(mp->b_wptr - mp->b_rptr)); 592 udp_err_ack(q, mp, TPROTO, 0); 593 return; 594 } 595 if (udp->udp_state != TS_UNBND) { 596 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 597 "udp_bind: bad state, %u", udp->udp_state); 598 udp_err_ack(q, mp, TOUTSTATE, 0); 599 return; 600 } 601 /* 602 * Reallocate the message to make sure we have enough room for an 603 * address. 604 */ 605 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 606 if (mp1 == NULL) { 607 udp_err_ack(q, mp, TSYSERR, ENOMEM); 608 return; 609 } 610 611 mp = mp1; 612 613 /* Reset the message type in preparation for shipping it back. */ 614 DB_TYPE(mp) = M_PCPROTO; 615 616 tbr = (struct T_bind_req *)mp->b_rptr; 617 switch (tbr->ADDR_length) { 618 case 0: /* Request for a generic port */ 619 tbr->ADDR_offset = sizeof (struct T_bind_req); 620 if (connp->conn_family == AF_INET) { 621 tbr->ADDR_length = sizeof (sin_t); 622 sin = (sin_t *)&tbr[1]; 623 *sin = sin_null; 624 sin->sin_family = AF_INET; 625 mp->b_wptr = (uchar_t *)&sin[1]; 626 sa = (struct sockaddr *)sin; 627 } else { 628 ASSERT(connp->conn_family == AF_INET6); 629 tbr->ADDR_length = sizeof (sin6_t); 630 sin6 = (sin6_t *)&tbr[1]; 631 *sin6 = sin6_null; 632 sin6->sin6_family = AF_INET6; 633 mp->b_wptr = (uchar_t *)&sin6[1]; 634 sa = (struct sockaddr *)sin6; 635 } 636 break; 637 638 case sizeof (sin_t): /* Complete IPv4 address */ 639 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 640 sizeof (sin_t)); 641 if (sa == NULL || !OK_32PTR((char *)sa)) { 642 udp_err_ack(q, mp, TSYSERR, EINVAL); 643 return; 644 } 645 if (connp->conn_family != AF_INET || 646 sa->sa_family != AF_INET) { 647 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 648 return; 649 } 650 break; 651 652 case sizeof (sin6_t): /* complete IPv6 address */ 653 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 654 sizeof (sin6_t)); 655 if (sa == NULL || !OK_32PTR((char *)sa)) { 656 udp_err_ack(q, mp, TSYSERR, EINVAL); 657 return; 658 } 659 if (connp->conn_family != AF_INET6 || 660 sa->sa_family != AF_INET6) { 661 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 662 return; 663 } 664 break; 665 666 default: /* Invalid request */ 667 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 668 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 669 udp_err_ack(q, mp, TBADADDR, 0); 670 return; 671 } 672 673 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 674 tbr->PRIM_type != O_T_BIND_REQ); 675 676 if (error != 0) { 677 if (error > 0) { 678 udp_err_ack(q, mp, TSYSERR, error); 679 } else { 680 udp_err_ack(q, mp, -error, 0); 681 } 682 } else { 683 tbr->PRIM_type = T_BIND_ACK; 684 qreply(q, mp); 685 } 686 } 687 688 /* 689 * This routine handles each T_CONN_REQ message passed to udp. It 690 * associates a default destination address with the stream. 691 * 692 * After various error checks are completed, udp_connect() lays 693 * the target address and port into the composite header template. 694 * Then we ask IP for information, including a source address if we didn't 695 * already have one. Finally we send up the T_OK_ACK reply message. 696 */ 697 static void 698 udp_tpi_connect(queue_t *q, mblk_t *mp) 699 { 700 conn_t *connp = Q_TO_CONN(q); 701 int error; 702 socklen_t len; 703 struct sockaddr *sa; 704 struct T_conn_req *tcr; 705 cred_t *cr; 706 pid_t pid; 707 /* 708 * All Solaris components should pass a db_credp 709 * for this TPI message, hence we ASSERT. 710 * But in case there is some other M_PROTO that looks 711 * like a TPI message sent by some other kernel 712 * component, we check and return an error. 713 */ 714 cr = msg_getcred(mp, &pid); 715 ASSERT(cr != NULL); 716 if (cr == NULL) { 717 udp_err_ack(q, mp, TSYSERR, EINVAL); 718 return; 719 } 720 721 tcr = (struct T_conn_req *)mp->b_rptr; 722 723 /* A bit of sanity checking */ 724 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 725 udp_err_ack(q, mp, TPROTO, 0); 726 return; 727 } 728 729 if (tcr->OPT_length != 0) { 730 udp_err_ack(q, mp, TBADOPT, 0); 731 return; 732 } 733 734 /* 735 * Determine packet type based on type of address passed in 736 * the request should contain an IPv4 or IPv6 address. 737 * Make sure that address family matches the type of 738 * family of the address passed down. 739 */ 740 len = tcr->DEST_length; 741 switch (tcr->DEST_length) { 742 default: 743 udp_err_ack(q, mp, TBADADDR, 0); 744 return; 745 746 case sizeof (sin_t): 747 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 748 sizeof (sin_t)); 749 break; 750 751 case sizeof (sin6_t): 752 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 753 sizeof (sin6_t)); 754 break; 755 } 756 757 error = proto_verify_ip_addr(connp->conn_family, sa, len); 758 if (error != 0) { 759 udp_err_ack(q, mp, TSYSERR, error); 760 return; 761 } 762 763 error = udp_do_connect(connp, sa, len, cr, pid); 764 if (error != 0) { 765 if (error < 0) 766 udp_err_ack(q, mp, -error, 0); 767 else 768 udp_err_ack(q, mp, TSYSERR, error); 769 } else { 770 mblk_t *mp1; 771 /* 772 * We have to send a connection confirmation to 773 * keep TLI happy. 774 */ 775 if (connp->conn_family == AF_INET) { 776 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 777 sizeof (sin_t), NULL, 0); 778 } else { 779 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 780 sizeof (sin6_t), NULL, 0); 781 } 782 if (mp1 == NULL) { 783 udp_err_ack(q, mp, TSYSERR, ENOMEM); 784 return; 785 } 786 787 /* 788 * Send ok_ack for T_CONN_REQ 789 */ 790 mp = mi_tpi_ok_ack_alloc(mp); 791 if (mp == NULL) { 792 /* Unable to reuse the T_CONN_REQ for the ack. */ 793 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 794 return; 795 } 796 797 putnext(connp->conn_rq, mp); 798 putnext(connp->conn_rq, mp1); 799 } 800 } 801 802 /* ARGSUSED */ 803 static int 804 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused) 805 { 806 conn_t *connp; 807 808 if (flags & SO_FALLBACK) { 809 /* 810 * stream is being closed while in fallback 811 * simply free the resources that were allocated 812 */ 813 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 814 qprocsoff(q); 815 goto done; 816 } 817 818 connp = Q_TO_CONN(q); 819 udp_do_close(connp); 820 done: 821 q->q_ptr = WR(q)->q_ptr = NULL; 822 return (0); 823 } 824 825 static void 826 udp_close_free(conn_t *connp) 827 { 828 udp_t *udp = connp->conn_udp; 829 830 /* If there are any options associated with the stream, free them. */ 831 if (udp->udp_recv_ipp.ipp_fields != 0) 832 ip_pkt_free(&udp->udp_recv_ipp); 833 834 /* 835 * Clear any fields which the kmem_cache constructor clears. 836 * Only udp_connp needs to be preserved. 837 * TBD: We should make this more efficient to avoid clearing 838 * everything. 839 */ 840 ASSERT(udp->udp_connp == connp); 841 bzero(udp, sizeof (udp_t)); 842 udp->udp_connp = connp; 843 } 844 845 static int 846 udp_do_disconnect(conn_t *connp) 847 { 848 udp_t *udp; 849 udp_fanout_t *udpf; 850 udp_stack_t *us; 851 int error; 852 853 udp = connp->conn_udp; 854 us = udp->udp_us; 855 mutex_enter(&connp->conn_lock); 856 if (udp->udp_state != TS_DATA_XFER) { 857 mutex_exit(&connp->conn_lock); 858 return (-TOUTSTATE); 859 } 860 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 861 us->us_bind_fanout_size)]; 862 mutex_enter(&udpf->uf_lock); 863 if (connp->conn_mcbc_bind) 864 connp->conn_saddr_v6 = ipv6_all_zeros; 865 else 866 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 867 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 868 connp->conn_faddr_v6 = ipv6_all_zeros; 869 connp->conn_fport = 0; 870 udp->udp_state = TS_IDLE; 871 mutex_exit(&udpf->uf_lock); 872 873 /* Remove any remnants of mapped address binding */ 874 if (connp->conn_family == AF_INET6) 875 connp->conn_ipversion = IPV6_VERSION; 876 877 connp->conn_v6lastdst = ipv6_all_zeros; 878 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 879 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 880 mutex_exit(&connp->conn_lock); 881 if (error != 0) 882 return (error); 883 884 /* 885 * Tell IP to remove the full binding and revert 886 * to the local address binding. 887 */ 888 return (ip_laddr_fanout_insert(connp)); 889 } 890 891 static void 892 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 893 { 894 conn_t *connp = Q_TO_CONN(q); 895 int error; 896 897 /* 898 * Allocate the largest primitive we need to send back 899 * T_error_ack is > than T_ok_ack 900 */ 901 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 902 if (mp == NULL) { 903 /* Unable to reuse the T_DISCON_REQ for the ack. */ 904 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 905 return; 906 } 907 908 error = udp_do_disconnect(connp); 909 910 if (error != 0) { 911 if (error < 0) { 912 udp_err_ack(q, mp, -error, 0); 913 } else { 914 udp_err_ack(q, mp, TSYSERR, error); 915 } 916 } else { 917 mp = mi_tpi_ok_ack_alloc(mp); 918 ASSERT(mp != NULL); 919 qreply(q, mp); 920 } 921 } 922 923 int 924 udp_disconnect(conn_t *connp) 925 { 926 int error; 927 928 connp->conn_dgram_errind = B_FALSE; 929 error = udp_do_disconnect(connp); 930 if (error < 0) 931 error = proto_tlitosyserr(-error); 932 933 return (error); 934 } 935 936 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 937 static void 938 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 939 { 940 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 941 qreply(q, mp); 942 } 943 944 /* Shorthand to generate and send TPI error acks to our client */ 945 static void 946 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 947 t_scalar_t t_error, int sys_error) 948 { 949 struct T_error_ack *teackp; 950 951 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 952 M_PCPROTO, T_ERROR_ACK)) != NULL) { 953 teackp = (struct T_error_ack *)mp->b_rptr; 954 teackp->ERROR_prim = primitive; 955 teackp->TLI_error = t_error; 956 teackp->UNIX_error = sys_error; 957 qreply(q, mp); 958 } 959 } 960 961 /* At minimum we need 4 bytes of UDP header */ 962 #define ICMP_MIN_UDP_HDR 4 963 964 /* 965 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 966 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 967 * Assumes that IP has pulled up everything up to and including the ICMP header. 968 */ 969 /* ARGSUSED2 */ 970 static void 971 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 972 { 973 conn_t *connp = (conn_t *)arg1; 974 icmph_t *icmph; 975 ipha_t *ipha; 976 int iph_hdr_length; 977 udpha_t *udpha; 978 sin_t sin; 979 sin6_t sin6; 980 mblk_t *mp1; 981 int error = 0; 982 udp_t *udp = connp->conn_udp; 983 984 ipha = (ipha_t *)mp->b_rptr; 985 986 ASSERT(OK_32PTR(mp->b_rptr)); 987 988 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 989 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 990 udp_icmp_error_ipv6(connp, mp, ira); 991 return; 992 } 993 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 994 995 /* Skip past the outer IP and ICMP headers */ 996 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 997 iph_hdr_length = ira->ira_ip_hdr_length; 998 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 999 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1000 1001 /* Skip past the inner IP and find the ULP header */ 1002 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1003 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1004 1005 switch (icmph->icmph_type) { 1006 case ICMP_DEST_UNREACHABLE: 1007 switch (icmph->icmph_code) { 1008 case ICMP_FRAGMENTATION_NEEDED: { 1009 ipha_t *ipha; 1010 ip_xmit_attr_t *ixa; 1011 /* 1012 * IP has already adjusted the path MTU. 1013 * But we need to adjust DF for IPv4. 1014 */ 1015 if (connp->conn_ipversion != IPV4_VERSION) 1016 break; 1017 1018 ixa = conn_get_ixa(connp, B_FALSE); 1019 if (ixa == NULL || ixa->ixa_ire == NULL) { 1020 /* 1021 * Some other thread holds conn_ixa. We will 1022 * redo this on the next ICMP too big. 1023 */ 1024 if (ixa != NULL) 1025 ixa_refrele(ixa); 1026 break; 1027 } 1028 (void) ip_get_pmtu(ixa); 1029 1030 mutex_enter(&connp->conn_lock); 1031 ipha = (ipha_t *)connp->conn_ht_iphc; 1032 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1033 ipha->ipha_fragment_offset_and_flags |= 1034 IPH_DF_HTONS; 1035 } else { 1036 ipha->ipha_fragment_offset_and_flags &= 1037 ~IPH_DF_HTONS; 1038 } 1039 mutex_exit(&connp->conn_lock); 1040 ixa_refrele(ixa); 1041 break; 1042 } 1043 case ICMP_PORT_UNREACHABLE: 1044 case ICMP_PROTOCOL_UNREACHABLE: 1045 error = ECONNREFUSED; 1046 break; 1047 default: 1048 /* Transient errors */ 1049 break; 1050 } 1051 break; 1052 default: 1053 /* Transient errors */ 1054 break; 1055 } 1056 if (error == 0) { 1057 freemsg(mp); 1058 return; 1059 } 1060 1061 /* 1062 * Deliver T_UDERROR_IND when the application has asked for it. 1063 * The socket layer enables this automatically when connected. 1064 */ 1065 if (!connp->conn_dgram_errind) { 1066 freemsg(mp); 1067 return; 1068 } 1069 1070 switch (connp->conn_family) { 1071 case AF_INET: 1072 sin = sin_null; 1073 sin.sin_family = AF_INET; 1074 sin.sin_addr.s_addr = ipha->ipha_dst; 1075 sin.sin_port = udpha->uha_dst_port; 1076 if (IPCL_IS_NONSTR(connp)) { 1077 mutex_enter(&connp->conn_lock); 1078 if (udp->udp_state == TS_DATA_XFER) { 1079 if (sin.sin_port == connp->conn_fport && 1080 sin.sin_addr.s_addr == 1081 connp->conn_faddr_v4) { 1082 mutex_exit(&connp->conn_lock); 1083 (*connp->conn_upcalls->su_set_error) 1084 (connp->conn_upper_handle, error); 1085 goto done; 1086 } 1087 } else { 1088 udp->udp_delayed_error = error; 1089 *((sin_t *)&udp->udp_delayed_addr) = sin; 1090 } 1091 mutex_exit(&connp->conn_lock); 1092 } else { 1093 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1094 NULL, 0, error); 1095 if (mp1 != NULL) 1096 putnext(connp->conn_rq, mp1); 1097 } 1098 break; 1099 case AF_INET6: 1100 sin6 = sin6_null; 1101 sin6.sin6_family = AF_INET6; 1102 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1103 sin6.sin6_port = udpha->uha_dst_port; 1104 if (IPCL_IS_NONSTR(connp)) { 1105 mutex_enter(&connp->conn_lock); 1106 if (udp->udp_state == TS_DATA_XFER) { 1107 if (sin6.sin6_port == connp->conn_fport && 1108 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1109 &connp->conn_faddr_v6)) { 1110 mutex_exit(&connp->conn_lock); 1111 (*connp->conn_upcalls->su_set_error) 1112 (connp->conn_upper_handle, error); 1113 goto done; 1114 } 1115 } else { 1116 udp->udp_delayed_error = error; 1117 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1118 } 1119 mutex_exit(&connp->conn_lock); 1120 } else { 1121 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1122 NULL, 0, error); 1123 if (mp1 != NULL) 1124 putnext(connp->conn_rq, mp1); 1125 } 1126 break; 1127 } 1128 done: 1129 freemsg(mp); 1130 } 1131 1132 /* 1133 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1134 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1135 * Assumes that IP has pulled up all the extension headers as well as the 1136 * ICMPv6 header. 1137 */ 1138 static void 1139 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1140 { 1141 icmp6_t *icmp6; 1142 ip6_t *ip6h, *outer_ip6h; 1143 uint16_t iph_hdr_length; 1144 uint8_t *nexthdrp; 1145 udpha_t *udpha; 1146 sin6_t sin6; 1147 mblk_t *mp1; 1148 int error = 0; 1149 udp_t *udp = connp->conn_udp; 1150 udp_stack_t *us = udp->udp_us; 1151 1152 outer_ip6h = (ip6_t *)mp->b_rptr; 1153 #ifdef DEBUG 1154 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1155 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1156 else 1157 iph_hdr_length = IPV6_HDR_LEN; 1158 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1159 #endif 1160 /* Skip past the outer IP and ICMP headers */ 1161 iph_hdr_length = ira->ira_ip_hdr_length; 1162 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1163 1164 /* Skip past the inner IP and find the ULP header */ 1165 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1166 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1167 freemsg(mp); 1168 return; 1169 } 1170 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1171 1172 switch (icmp6->icmp6_type) { 1173 case ICMP6_DST_UNREACH: 1174 switch (icmp6->icmp6_code) { 1175 case ICMP6_DST_UNREACH_NOPORT: 1176 error = ECONNREFUSED; 1177 break; 1178 case ICMP6_DST_UNREACH_ADMIN: 1179 case ICMP6_DST_UNREACH_NOROUTE: 1180 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1181 case ICMP6_DST_UNREACH_ADDR: 1182 /* Transient errors */ 1183 break; 1184 default: 1185 break; 1186 } 1187 break; 1188 case ICMP6_PACKET_TOO_BIG: { 1189 struct T_unitdata_ind *tudi; 1190 struct T_opthdr *toh; 1191 size_t udi_size; 1192 mblk_t *newmp; 1193 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1194 sizeof (struct ip6_mtuinfo); 1195 sin6_t *sin6; 1196 struct ip6_mtuinfo *mtuinfo; 1197 1198 /* 1199 * If the application has requested to receive path mtu 1200 * information, send up an empty message containing an 1201 * IPV6_PATHMTU ancillary data item. 1202 */ 1203 if (!connp->conn_ipv6_recvpathmtu) 1204 break; 1205 1206 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1207 opt_length; 1208 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1209 UDPS_BUMP_MIB(us, udpInErrors); 1210 break; 1211 } 1212 1213 /* 1214 * newmp->b_cont is left to NULL on purpose. This is an 1215 * empty message containing only ancillary data. 1216 */ 1217 newmp->b_datap->db_type = M_PROTO; 1218 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1219 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1220 tudi->PRIM_type = T_UNITDATA_IND; 1221 tudi->SRC_length = sizeof (sin6_t); 1222 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1223 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1224 tudi->OPT_length = opt_length; 1225 1226 sin6 = (sin6_t *)&tudi[1]; 1227 bzero(sin6, sizeof (sin6_t)); 1228 sin6->sin6_family = AF_INET6; 1229 sin6->sin6_addr = connp->conn_faddr_v6; 1230 1231 toh = (struct T_opthdr *)&sin6[1]; 1232 toh->level = IPPROTO_IPV6; 1233 toh->name = IPV6_PATHMTU; 1234 toh->len = opt_length; 1235 toh->status = 0; 1236 1237 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1238 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1239 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1240 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1241 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1242 /* 1243 * We've consumed everything we need from the original 1244 * message. Free it, then send our empty message. 1245 */ 1246 freemsg(mp); 1247 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1248 return; 1249 } 1250 case ICMP6_TIME_EXCEEDED: 1251 /* Transient errors */ 1252 break; 1253 case ICMP6_PARAM_PROB: 1254 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1255 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1256 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1257 (uchar_t *)nexthdrp) { 1258 error = ECONNREFUSED; 1259 break; 1260 } 1261 break; 1262 } 1263 if (error == 0) { 1264 freemsg(mp); 1265 return; 1266 } 1267 1268 /* 1269 * Deliver T_UDERROR_IND when the application has asked for it. 1270 * The socket layer enables this automatically when connected. 1271 */ 1272 if (!connp->conn_dgram_errind) { 1273 freemsg(mp); 1274 return; 1275 } 1276 1277 sin6 = sin6_null; 1278 sin6.sin6_family = AF_INET6; 1279 sin6.sin6_addr = ip6h->ip6_dst; 1280 sin6.sin6_port = udpha->uha_dst_port; 1281 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1282 1283 if (IPCL_IS_NONSTR(connp)) { 1284 mutex_enter(&connp->conn_lock); 1285 if (udp->udp_state == TS_DATA_XFER) { 1286 if (sin6.sin6_port == connp->conn_fport && 1287 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1288 &connp->conn_faddr_v6)) { 1289 mutex_exit(&connp->conn_lock); 1290 (*connp->conn_upcalls->su_set_error) 1291 (connp->conn_upper_handle, error); 1292 goto done; 1293 } 1294 } else { 1295 udp->udp_delayed_error = error; 1296 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1297 } 1298 mutex_exit(&connp->conn_lock); 1299 } else { 1300 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1301 NULL, 0, error); 1302 if (mp1 != NULL) 1303 putnext(connp->conn_rq, mp1); 1304 } 1305 done: 1306 freemsg(mp); 1307 } 1308 1309 /* 1310 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1311 * The local address is filled in if endpoint is bound. The remote address 1312 * is filled in if remote address has been precified ("connected endpoint") 1313 * (The concept of connected CLTS sockets is alien to published TPI 1314 * but we support it anyway). 1315 */ 1316 static void 1317 udp_addr_req(queue_t *q, mblk_t *mp) 1318 { 1319 struct sockaddr *sa; 1320 mblk_t *ackmp; 1321 struct T_addr_ack *taa; 1322 udp_t *udp = Q_TO_UDP(q); 1323 conn_t *connp = udp->udp_connp; 1324 uint_t addrlen; 1325 1326 /* Make it large enough for worst case */ 1327 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1328 2 * sizeof (sin6_t), 1); 1329 if (ackmp == NULL) { 1330 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1331 return; 1332 } 1333 taa = (struct T_addr_ack *)ackmp->b_rptr; 1334 1335 bzero(taa, sizeof (struct T_addr_ack)); 1336 ackmp->b_wptr = (uchar_t *)&taa[1]; 1337 1338 taa->PRIM_type = T_ADDR_ACK; 1339 ackmp->b_datap->db_type = M_PCPROTO; 1340 1341 if (connp->conn_family == AF_INET) 1342 addrlen = sizeof (sin_t); 1343 else 1344 addrlen = sizeof (sin6_t); 1345 1346 mutex_enter(&connp->conn_lock); 1347 /* 1348 * Note: Following code assumes 32 bit alignment of basic 1349 * data structures like sin_t and struct T_addr_ack. 1350 */ 1351 if (udp->udp_state != TS_UNBND) { 1352 /* 1353 * Fill in local address first 1354 */ 1355 taa->LOCADDR_offset = sizeof (*taa); 1356 taa->LOCADDR_length = addrlen; 1357 sa = (struct sockaddr *)&taa[1]; 1358 (void) conn_getsockname(connp, sa, &addrlen); 1359 ackmp->b_wptr += addrlen; 1360 } 1361 if (udp->udp_state == TS_DATA_XFER) { 1362 /* 1363 * connected, fill remote address too 1364 */ 1365 taa->REMADDR_length = addrlen; 1366 /* assumed 32-bit alignment */ 1367 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1368 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1369 (void) conn_getpeername(connp, sa, &addrlen); 1370 ackmp->b_wptr += addrlen; 1371 } 1372 mutex_exit(&connp->conn_lock); 1373 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1374 qreply(q, ackmp); 1375 } 1376 1377 static void 1378 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1379 { 1380 conn_t *connp = udp->udp_connp; 1381 1382 if (connp->conn_family == AF_INET) { 1383 *tap = udp_g_t_info_ack_ipv4; 1384 } else { 1385 *tap = udp_g_t_info_ack_ipv6; 1386 } 1387 tap->CURRENT_state = udp->udp_state; 1388 tap->OPT_size = udp_max_optsize; 1389 } 1390 1391 static void 1392 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1393 t_uscalar_t cap_bits1) 1394 { 1395 tcap->CAP_bits1 = 0; 1396 1397 if (cap_bits1 & TC1_INFO) { 1398 udp_copy_info(&tcap->INFO_ack, udp); 1399 tcap->CAP_bits1 |= TC1_INFO; 1400 } 1401 } 1402 1403 /* 1404 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1405 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1406 * udp_g_t_info_ack. The current state of the stream is copied from 1407 * udp_state. 1408 */ 1409 static void 1410 udp_capability_req(queue_t *q, mblk_t *mp) 1411 { 1412 t_uscalar_t cap_bits1; 1413 struct T_capability_ack *tcap; 1414 udp_t *udp = Q_TO_UDP(q); 1415 1416 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1417 1418 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1419 mp->b_datap->db_type, T_CAPABILITY_ACK); 1420 if (!mp) 1421 return; 1422 1423 tcap = (struct T_capability_ack *)mp->b_rptr; 1424 udp_do_capability_ack(udp, tcap, cap_bits1); 1425 1426 qreply(q, mp); 1427 } 1428 1429 /* 1430 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1431 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1432 * The current state of the stream is copied from udp_state. 1433 */ 1434 static void 1435 udp_info_req(queue_t *q, mblk_t *mp) 1436 { 1437 udp_t *udp = Q_TO_UDP(q); 1438 1439 /* Create a T_INFO_ACK message. */ 1440 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1441 T_INFO_ACK); 1442 if (!mp) 1443 return; 1444 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1445 qreply(q, mp); 1446 } 1447 1448 /* For /dev/udp aka AF_INET open */ 1449 static int 1450 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1451 { 1452 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1453 } 1454 1455 /* For /dev/udp6 aka AF_INET6 open */ 1456 static int 1457 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1458 { 1459 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1460 } 1461 1462 /* 1463 * This is the open routine for udp. It allocates a udp_t structure for 1464 * the stream and, on the first open of the module, creates an ND table. 1465 */ 1466 static int 1467 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1468 boolean_t isv6) 1469 { 1470 udp_t *udp; 1471 conn_t *connp; 1472 dev_t conn_dev; 1473 vmem_t *minor_arena; 1474 int err; 1475 1476 /* If the stream is already open, return immediately. */ 1477 if (q->q_ptr != NULL) 1478 return (0); 1479 1480 if (sflag == MODOPEN) 1481 return (EINVAL); 1482 1483 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1484 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1485 minor_arena = ip_minor_arena_la; 1486 } else { 1487 /* 1488 * Either minor numbers in the large arena were exhausted 1489 * or a non socket application is doing the open. 1490 * Try to allocate from the small arena. 1491 */ 1492 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1493 return (EBUSY); 1494 1495 minor_arena = ip_minor_arena_sa; 1496 } 1497 1498 if (flag & SO_FALLBACK) { 1499 /* 1500 * Non streams socket needs a stream to fallback to 1501 */ 1502 RD(q)->q_ptr = (void *)conn_dev; 1503 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1504 WR(q)->q_ptr = (void *)minor_arena; 1505 qprocson(q); 1506 return (0); 1507 } 1508 1509 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1510 if (connp == NULL) { 1511 inet_minor_free(minor_arena, conn_dev); 1512 return (err); 1513 } 1514 udp = connp->conn_udp; 1515 1516 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1517 connp->conn_dev = conn_dev; 1518 connp->conn_minor_arena = minor_arena; 1519 1520 /* 1521 * Initialize the udp_t structure for this stream. 1522 */ 1523 q->q_ptr = connp; 1524 WR(q)->q_ptr = connp; 1525 connp->conn_rq = q; 1526 connp->conn_wq = WR(q); 1527 1528 /* 1529 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1530 * need to lock anything. 1531 */ 1532 ASSERT(connp->conn_proto == IPPROTO_UDP); 1533 ASSERT(connp->conn_udp == udp); 1534 ASSERT(udp->udp_connp == connp); 1535 1536 if (flag & SO_SOCKSTR) { 1537 udp->udp_issocket = B_TRUE; 1538 } 1539 1540 WR(q)->q_hiwat = connp->conn_sndbuf; 1541 WR(q)->q_lowat = connp->conn_sndlowat; 1542 1543 qprocson(q); 1544 1545 /* Set the Stream head write offset and high watermark. */ 1546 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1547 (void) proto_set_rx_hiwat(q, connp, 1548 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1549 1550 mutex_enter(&connp->conn_lock); 1551 connp->conn_state_flags &= ~CONN_INCIPIENT; 1552 mutex_exit(&connp->conn_lock); 1553 return (0); 1554 } 1555 1556 /* 1557 * Which UDP options OK to set through T_UNITDATA_REQ... 1558 */ 1559 /* ARGSUSED */ 1560 static boolean_t 1561 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1562 { 1563 return (B_TRUE); 1564 } 1565 1566 /* 1567 * This routine gets default values of certain options whose default 1568 * values are maintained by protcol specific code 1569 */ 1570 int 1571 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1572 { 1573 udp_t *udp = Q_TO_UDP(q); 1574 udp_stack_t *us = udp->udp_us; 1575 int *i1 = (int *)ptr; 1576 1577 switch (level) { 1578 case IPPROTO_IP: 1579 switch (name) { 1580 case IP_MULTICAST_TTL: 1581 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1582 return (sizeof (uchar_t)); 1583 case IP_MULTICAST_LOOP: 1584 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1585 return (sizeof (uchar_t)); 1586 } 1587 break; 1588 case IPPROTO_IPV6: 1589 switch (name) { 1590 case IPV6_MULTICAST_HOPS: 1591 *i1 = IP_DEFAULT_MULTICAST_TTL; 1592 return (sizeof (int)); 1593 case IPV6_MULTICAST_LOOP: 1594 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1595 return (sizeof (int)); 1596 case IPV6_UNICAST_HOPS: 1597 *i1 = us->us_ipv6_hoplimit; 1598 return (sizeof (int)); 1599 } 1600 break; 1601 } 1602 return (-1); 1603 } 1604 1605 /* 1606 * This routine retrieves the current status of socket options. 1607 * It returns the size of the option retrieved, or -1. 1608 */ 1609 int 1610 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1611 uchar_t *ptr) 1612 { 1613 int *i1 = (int *)ptr; 1614 udp_t *udp = connp->conn_udp; 1615 int len; 1616 conn_opt_arg_t coas; 1617 int retval; 1618 1619 coas.coa_connp = connp; 1620 coas.coa_ixa = connp->conn_ixa; 1621 coas.coa_ipp = &connp->conn_xmit_ipp; 1622 coas.coa_ancillary = B_FALSE; 1623 coas.coa_changed = 0; 1624 1625 /* 1626 * We assume that the optcom framework has checked for the set 1627 * of levels and names that are supported, hence we don't worry 1628 * about rejecting based on that. 1629 * First check for UDP specific handling, then pass to common routine. 1630 */ 1631 switch (level) { 1632 case IPPROTO_IP: 1633 /* 1634 * Only allow IPv4 option processing on IPv4 sockets. 1635 */ 1636 if (connp->conn_family != AF_INET) 1637 return (-1); 1638 1639 switch (name) { 1640 case IP_OPTIONS: 1641 case T_IP_OPTIONS: 1642 mutex_enter(&connp->conn_lock); 1643 if (!(udp->udp_recv_ipp.ipp_fields & 1644 IPPF_IPV4_OPTIONS)) { 1645 mutex_exit(&connp->conn_lock); 1646 return (0); 1647 } 1648 1649 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1650 ASSERT(len != 0); 1651 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1652 mutex_exit(&connp->conn_lock); 1653 return (len); 1654 } 1655 break; 1656 case IPPROTO_UDP: 1657 switch (name) { 1658 case UDP_NAT_T_ENDPOINT: 1659 mutex_enter(&connp->conn_lock); 1660 *i1 = udp->udp_nat_t_endpoint; 1661 mutex_exit(&connp->conn_lock); 1662 return (sizeof (int)); 1663 case UDP_RCVHDR: 1664 mutex_enter(&connp->conn_lock); 1665 *i1 = udp->udp_rcvhdr ? 1 : 0; 1666 mutex_exit(&connp->conn_lock); 1667 return (sizeof (int)); 1668 case UDP_SRCPORT_HASH: 1669 mutex_enter(&connp->conn_lock); 1670 *i1 = udp->udp_vxlanhash; 1671 mutex_exit(&connp->conn_lock); 1672 return (sizeof (int)); 1673 } 1674 } 1675 mutex_enter(&connp->conn_lock); 1676 retval = conn_opt_get(&coas, level, name, ptr); 1677 mutex_exit(&connp->conn_lock); 1678 return (retval); 1679 } 1680 1681 /* 1682 * This routine retrieves the current status of socket options. 1683 * It returns the size of the option retrieved, or -1. 1684 */ 1685 int 1686 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1687 { 1688 conn_t *connp = Q_TO_CONN(q); 1689 int err; 1690 1691 err = udp_opt_get(connp, level, name, ptr); 1692 return (err); 1693 } 1694 1695 /* 1696 * This routine sets socket options. 1697 */ 1698 int 1699 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1700 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1701 { 1702 conn_t *connp = coa->coa_connp; 1703 ip_xmit_attr_t *ixa = coa->coa_ixa; 1704 udp_t *udp = connp->conn_udp; 1705 udp_stack_t *us = udp->udp_us; 1706 int *i1 = (int *)invalp; 1707 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1708 int error; 1709 1710 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1711 /* 1712 * First do UDP specific sanity checks and handle UDP specific 1713 * options. Note that some IPPROTO_UDP options are handled 1714 * by conn_opt_set. 1715 */ 1716 switch (level) { 1717 case SOL_SOCKET: 1718 switch (name) { 1719 case SO_SNDBUF: 1720 if (*i1 > us->us_max_buf) { 1721 return (ENOBUFS); 1722 } 1723 break; 1724 case SO_RCVBUF: 1725 if (*i1 > us->us_max_buf) { 1726 return (ENOBUFS); 1727 } 1728 break; 1729 1730 case SCM_UCRED: { 1731 struct ucred_s *ucr; 1732 cred_t *newcr; 1733 ts_label_t *tsl; 1734 1735 /* 1736 * Only sockets that have proper privileges and are 1737 * bound to MLPs will have any other value here, so 1738 * this implicitly tests for privilege to set label. 1739 */ 1740 if (connp->conn_mlp_type == mlptSingle) 1741 break; 1742 1743 ucr = (struct ucred_s *)invalp; 1744 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1745 ucr->uc_labeloff < sizeof (*ucr) || 1746 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1747 return (EINVAL); 1748 if (!checkonly) { 1749 /* 1750 * Set ixa_tsl to the new label. 1751 * We assume that crgetzoneid doesn't change 1752 * as part of the SCM_UCRED. 1753 */ 1754 ASSERT(cr != NULL); 1755 if ((tsl = crgetlabel(cr)) == NULL) 1756 return (EINVAL); 1757 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1758 tsl->tsl_doi, KM_NOSLEEP); 1759 if (newcr == NULL) 1760 return (ENOSR); 1761 ASSERT(newcr->cr_label != NULL); 1762 /* 1763 * Move the hold on the cr_label to ixa_tsl by 1764 * setting cr_label to NULL. Then release newcr. 1765 */ 1766 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1767 ixa->ixa_flags |= IXAF_UCRED_TSL; 1768 newcr->cr_label = NULL; 1769 crfree(newcr); 1770 coa->coa_changed |= COA_HEADER_CHANGED; 1771 coa->coa_changed |= COA_WROFF_CHANGED; 1772 } 1773 /* Fully handled this option. */ 1774 return (0); 1775 } 1776 } 1777 break; 1778 case IPPROTO_UDP: 1779 switch (name) { 1780 case UDP_NAT_T_ENDPOINT: 1781 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1782 return (error); 1783 } 1784 1785 /* 1786 * Use conn_family instead so we can avoid ambiguitites 1787 * with AF_INET6 sockets that may switch from IPv4 1788 * to IPv6. 1789 */ 1790 if (connp->conn_family != AF_INET) { 1791 return (EAFNOSUPPORT); 1792 } 1793 1794 if (!checkonly) { 1795 mutex_enter(&connp->conn_lock); 1796 udp->udp_nat_t_endpoint = onoff; 1797 mutex_exit(&connp->conn_lock); 1798 coa->coa_changed |= COA_HEADER_CHANGED; 1799 coa->coa_changed |= COA_WROFF_CHANGED; 1800 } 1801 /* Fully handled this option. */ 1802 return (0); 1803 case UDP_RCVHDR: 1804 mutex_enter(&connp->conn_lock); 1805 udp->udp_rcvhdr = onoff; 1806 mutex_exit(&connp->conn_lock); 1807 return (0); 1808 case UDP_SRCPORT_HASH: 1809 /* 1810 * This should have already been verified, but double 1811 * check. 1812 */ 1813 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1814 return (error); 1815 } 1816 1817 /* First see if the val is something we understand */ 1818 if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN) 1819 return (EINVAL); 1820 1821 if (!checkonly) { 1822 mutex_enter(&connp->conn_lock); 1823 udp->udp_vxlanhash = *i1; 1824 mutex_exit(&connp->conn_lock); 1825 } 1826 /* Fully handled this option. */ 1827 return (0); 1828 } 1829 break; 1830 } 1831 error = conn_opt_set(coa, level, name, inlen, invalp, 1832 checkonly, cr); 1833 return (error); 1834 } 1835 1836 /* 1837 * This routine sets socket options. 1838 */ 1839 int 1840 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1841 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1842 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1843 { 1844 udp_t *udp = connp->conn_udp; 1845 int err; 1846 conn_opt_arg_t coas, *coa; 1847 boolean_t checkonly; 1848 udp_stack_t *us = udp->udp_us; 1849 1850 switch (optset_context) { 1851 case SETFN_OPTCOM_CHECKONLY: 1852 checkonly = B_TRUE; 1853 /* 1854 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1855 * inlen != 0 implies value supplied and 1856 * we have to "pretend" to set it. 1857 * inlen == 0 implies that there is no 1858 * value part in T_CHECK request and just validation 1859 * done elsewhere should be enough, we just return here. 1860 */ 1861 if (inlen == 0) { 1862 *outlenp = 0; 1863 return (0); 1864 } 1865 break; 1866 case SETFN_OPTCOM_NEGOTIATE: 1867 checkonly = B_FALSE; 1868 break; 1869 case SETFN_UD_NEGOTIATE: 1870 case SETFN_CONN_NEGOTIATE: 1871 checkonly = B_FALSE; 1872 /* 1873 * Negotiating local and "association-related" options 1874 * through T_UNITDATA_REQ. 1875 * 1876 * Following routine can filter out ones we do not 1877 * want to be "set" this way. 1878 */ 1879 if (!udp_opt_allow_udr_set(level, name)) { 1880 *outlenp = 0; 1881 return (EINVAL); 1882 } 1883 break; 1884 default: 1885 /* 1886 * We should never get here 1887 */ 1888 *outlenp = 0; 1889 return (EINVAL); 1890 } 1891 1892 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1893 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1894 1895 if (thisdg_attrs != NULL) { 1896 /* Options from T_UNITDATA_REQ */ 1897 coa = (conn_opt_arg_t *)thisdg_attrs; 1898 ASSERT(coa->coa_connp == connp); 1899 ASSERT(coa->coa_ixa != NULL); 1900 ASSERT(coa->coa_ipp != NULL); 1901 ASSERT(coa->coa_ancillary); 1902 } else { 1903 coa = &coas; 1904 coas.coa_connp = connp; 1905 /* Get a reference on conn_ixa to prevent concurrent mods */ 1906 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1907 if (coas.coa_ixa == NULL) { 1908 *outlenp = 0; 1909 return (ENOMEM); 1910 } 1911 coas.coa_ipp = &connp->conn_xmit_ipp; 1912 coas.coa_ancillary = B_FALSE; 1913 coas.coa_changed = 0; 1914 } 1915 1916 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1917 cr, checkonly); 1918 if (err != 0) { 1919 errout: 1920 if (!coa->coa_ancillary) 1921 ixa_refrele(coa->coa_ixa); 1922 *outlenp = 0; 1923 return (err); 1924 } 1925 /* Handle DHCPINIT here outside of lock */ 1926 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1927 uint_t ifindex; 1928 ill_t *ill; 1929 1930 ifindex = *(uint_t *)invalp; 1931 if (ifindex == 0) { 1932 ill = NULL; 1933 } else { 1934 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1935 coa->coa_ixa->ixa_ipst); 1936 if (ill == NULL) { 1937 err = ENXIO; 1938 goto errout; 1939 } 1940 1941 mutex_enter(&ill->ill_lock); 1942 if (ill->ill_state_flags & ILL_CONDEMNED) { 1943 mutex_exit(&ill->ill_lock); 1944 ill_refrele(ill); 1945 err = ENXIO; 1946 goto errout; 1947 } 1948 if (IS_VNI(ill)) { 1949 mutex_exit(&ill->ill_lock); 1950 ill_refrele(ill); 1951 err = EINVAL; 1952 goto errout; 1953 } 1954 } 1955 mutex_enter(&connp->conn_lock); 1956 1957 if (connp->conn_dhcpinit_ill != NULL) { 1958 /* 1959 * We've locked the conn so conn_cleanup_ill() 1960 * cannot clear conn_dhcpinit_ill -- so it's 1961 * safe to access the ill. 1962 */ 1963 ill_t *oill = connp->conn_dhcpinit_ill; 1964 1965 ASSERT(oill->ill_dhcpinit != 0); 1966 atomic_dec_32(&oill->ill_dhcpinit); 1967 ill_set_inputfn(connp->conn_dhcpinit_ill); 1968 connp->conn_dhcpinit_ill = NULL; 1969 } 1970 1971 if (ill != NULL) { 1972 connp->conn_dhcpinit_ill = ill; 1973 atomic_inc_32(&ill->ill_dhcpinit); 1974 ill_set_inputfn(ill); 1975 mutex_exit(&connp->conn_lock); 1976 mutex_exit(&ill->ill_lock); 1977 ill_refrele(ill); 1978 } else { 1979 mutex_exit(&connp->conn_lock); 1980 } 1981 } 1982 1983 /* 1984 * Common case of OK return with outval same as inval. 1985 */ 1986 if (invalp != outvalp) { 1987 /* don't trust bcopy for identical src/dst */ 1988 (void) bcopy(invalp, outvalp, inlen); 1989 } 1990 *outlenp = inlen; 1991 1992 /* 1993 * If this was not ancillary data, then we rebuild the headers, 1994 * update the IRE/NCE, and IPsec as needed. 1995 * Since the label depends on the destination we go through 1996 * ip_set_destination first. 1997 */ 1998 if (coa->coa_ancillary) { 1999 return (0); 2000 } 2001 2002 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2003 in6_addr_t saddr, faddr, nexthop; 2004 in_port_t fport; 2005 2006 /* 2007 * We clear lastdst to make sure we pick up the change 2008 * next time sending. 2009 * If we are connected we re-cache the information. 2010 * We ignore errors to preserve BSD behavior. 2011 * Note that we don't redo IPsec policy lookup here 2012 * since the final destination (or source) didn't change. 2013 */ 2014 mutex_enter(&connp->conn_lock); 2015 connp->conn_v6lastdst = ipv6_all_zeros; 2016 2017 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2018 &connp->conn_faddr_v6, &nexthop); 2019 saddr = connp->conn_saddr_v6; 2020 faddr = connp->conn_faddr_v6; 2021 fport = connp->conn_fport; 2022 mutex_exit(&connp->conn_lock); 2023 2024 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2025 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2026 (void) ip_attr_connect(connp, coa->coa_ixa, 2027 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2028 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2029 } 2030 } 2031 2032 ixa_refrele(coa->coa_ixa); 2033 2034 if (coa->coa_changed & COA_HEADER_CHANGED) { 2035 /* 2036 * Rebuild the header template if we are connected. 2037 * Otherwise clear conn_v6lastdst so we rebuild the header 2038 * in the data path. 2039 */ 2040 mutex_enter(&connp->conn_lock); 2041 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2042 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2043 err = udp_build_hdr_template(connp, 2044 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2045 connp->conn_fport, connp->conn_flowinfo); 2046 if (err != 0) { 2047 mutex_exit(&connp->conn_lock); 2048 return (err); 2049 } 2050 } else { 2051 connp->conn_v6lastdst = ipv6_all_zeros; 2052 } 2053 mutex_exit(&connp->conn_lock); 2054 } 2055 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2056 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2057 connp->conn_rcvbuf); 2058 } 2059 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2060 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2061 } 2062 if (coa->coa_changed & COA_WROFF_CHANGED) { 2063 /* Increase wroff if needed */ 2064 uint_t wroff; 2065 2066 mutex_enter(&connp->conn_lock); 2067 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2068 if (udp->udp_nat_t_endpoint) 2069 wroff += sizeof (uint32_t); 2070 if (wroff > connp->conn_wroff) { 2071 connp->conn_wroff = wroff; 2072 mutex_exit(&connp->conn_lock); 2073 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2074 } else { 2075 mutex_exit(&connp->conn_lock); 2076 } 2077 } 2078 return (err); 2079 } 2080 2081 /* This routine sets socket options. */ 2082 int 2083 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2084 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2085 void *thisdg_attrs, cred_t *cr) 2086 { 2087 conn_t *connp = Q_TO_CONN(q); 2088 int error; 2089 2090 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2091 outlenp, outvalp, thisdg_attrs, cr); 2092 return (error); 2093 } 2094 2095 /* 2096 * Setup IP and UDP headers. 2097 * Returns NULL on allocation failure, in which case data_mp is freed. 2098 */ 2099 mblk_t * 2100 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2101 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2102 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2103 { 2104 mblk_t *mp; 2105 udpha_t *udpha; 2106 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2107 uint_t data_len; 2108 uint32_t cksum; 2109 udp_t *udp = connp->conn_udp; 2110 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2111 boolean_t hash_srcport = udp->udp_vxlanhash; 2112 uint_t ulp_hdr_len; 2113 uint16_t srcport; 2114 2115 data_len = msgdsize(data_mp); 2116 ulp_hdr_len = UDPH_SIZE; 2117 if (insert_spi) 2118 ulp_hdr_len += sizeof (uint32_t); 2119 2120 /* 2121 * If we have source port hashing going on, determine the hash before 2122 * we modify the mblk_t. 2123 */ 2124 if (hash_srcport == B_TRUE) { 2125 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 2126 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 2127 ntohs(connp->conn_lport)); 2128 } 2129 2130 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2131 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2132 if (mp == NULL) { 2133 ASSERT(*errorp != 0); 2134 return (NULL); 2135 } 2136 2137 data_len += ulp_hdr_len; 2138 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2139 2140 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2141 if (hash_srcport == B_TRUE) { 2142 udpha->uha_src_port = htons(srcport); 2143 } else { 2144 udpha->uha_src_port = connp->conn_lport; 2145 } 2146 udpha->uha_dst_port = dstport; 2147 udpha->uha_checksum = 0; 2148 udpha->uha_length = htons(data_len); 2149 2150 /* 2151 * If there was a routing option/header then conn_prepend_hdr 2152 * has massaged it and placed the pseudo-header checksum difference 2153 * in the cksum argument. 2154 * 2155 * Setup header length and prepare for ULP checksum done in IP. 2156 * 2157 * We make it easy for IP to include our pseudo header 2158 * by putting our length in uha_checksum. 2159 * The IP source, destination, and length have already been set by 2160 * conn_prepend_hdr. 2161 */ 2162 cksum += data_len; 2163 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2164 ASSERT(cksum < 0x10000); 2165 2166 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2167 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2168 2169 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2170 2171 /* IP does the checksum if uha_checksum is non-zero */ 2172 if (us->us_do_checksum) { 2173 if (cksum == 0) 2174 udpha->uha_checksum = 0xffff; 2175 else 2176 udpha->uha_checksum = htons(cksum); 2177 } else { 2178 udpha->uha_checksum = 0; 2179 } 2180 } else { 2181 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2182 2183 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2184 if (cksum == 0) 2185 udpha->uha_checksum = 0xffff; 2186 else 2187 udpha->uha_checksum = htons(cksum); 2188 } 2189 2190 /* Insert all-0s SPI now. */ 2191 if (insert_spi) 2192 *((uint32_t *)(udpha + 1)) = 0; 2193 2194 return (mp); 2195 } 2196 2197 static int 2198 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2199 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2200 { 2201 udpha_t *udpha; 2202 int error; 2203 2204 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2205 /* 2206 * We clear lastdst to make sure we don't use the lastdst path 2207 * next time sending since we might not have set v6dst yet. 2208 */ 2209 connp->conn_v6lastdst = ipv6_all_zeros; 2210 2211 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2212 flowinfo); 2213 if (error != 0) 2214 return (error); 2215 2216 /* 2217 * Any routing header/option has been massaged. The checksum difference 2218 * is stored in conn_sum. 2219 */ 2220 udpha = (udpha_t *)connp->conn_ht_ulp; 2221 udpha->uha_src_port = connp->conn_lport; 2222 udpha->uha_dst_port = dstport; 2223 udpha->uha_checksum = 0; 2224 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2225 return (0); 2226 } 2227 2228 static mblk_t * 2229 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2230 { 2231 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2232 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2233 /* 2234 * fallback has started but messages have not been moved yet 2235 */ 2236 if (udp->udp_fallback_queue_head == NULL) { 2237 ASSERT(udp->udp_fallback_queue_tail == NULL); 2238 udp->udp_fallback_queue_head = mp; 2239 udp->udp_fallback_queue_tail = mp; 2240 } else { 2241 ASSERT(udp->udp_fallback_queue_tail != NULL); 2242 udp->udp_fallback_queue_tail->b_next = mp; 2243 udp->udp_fallback_queue_tail = mp; 2244 } 2245 return (NULL); 2246 } else { 2247 /* 2248 * Fallback completed, let the caller putnext() the mblk. 2249 */ 2250 return (mp); 2251 } 2252 } 2253 2254 /* 2255 * Deliver data to ULP. In case we have a socket, and it's falling back to 2256 * TPI, then we'll queue the mp for later processing. 2257 */ 2258 static void 2259 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2260 { 2261 if (IPCL_IS_NONSTR(connp)) { 2262 udp_t *udp = connp->conn_udp; 2263 int error; 2264 2265 ASSERT(len == msgdsize(mp)); 2266 if ((*connp->conn_upcalls->su_recv) 2267 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2268 mutex_enter(&udp->udp_recv_lock); 2269 if (error == ENOSPC) { 2270 /* 2271 * let's confirm while holding the lock 2272 */ 2273 if ((*connp->conn_upcalls->su_recv) 2274 (connp->conn_upper_handle, NULL, 0, 0, 2275 &error, NULL) < 0) { 2276 ASSERT(error == ENOSPC); 2277 if (error == ENOSPC) { 2278 connp->conn_flow_cntrld = 2279 B_TRUE; 2280 } 2281 } 2282 mutex_exit(&udp->udp_recv_lock); 2283 } else { 2284 ASSERT(error == EOPNOTSUPP); 2285 mp = udp_queue_fallback(udp, mp); 2286 mutex_exit(&udp->udp_recv_lock); 2287 if (mp != NULL) 2288 putnext(connp->conn_rq, mp); 2289 } 2290 } 2291 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2292 } else { 2293 if (is_system_labeled()) { 2294 ASSERT(ira->ira_cred != NULL); 2295 /* 2296 * Provide for protocols above UDP such as RPC 2297 * NOPID leaves db_cpid unchanged. 2298 */ 2299 mblk_setcred(mp, ira->ira_cred, NOPID); 2300 } 2301 2302 putnext(connp->conn_rq, mp); 2303 } 2304 } 2305 2306 /* 2307 * This is the inbound data path. 2308 * IP has already pulled up the IP plus UDP headers and verified alignment 2309 * etc. 2310 */ 2311 /* ARGSUSED2 */ 2312 static void 2313 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2314 { 2315 conn_t *connp = (conn_t *)arg1; 2316 struct T_unitdata_ind *tudi; 2317 uchar_t *rptr; /* Pointer to IP header */ 2318 int hdr_length; /* Length of IP+UDP headers */ 2319 int udi_size; /* Size of T_unitdata_ind */ 2320 int pkt_len; 2321 udp_t *udp; 2322 udpha_t *udpha; 2323 ip_pkt_t ipps; 2324 ip6_t *ip6h; 2325 mblk_t *mp1; 2326 uint32_t udp_ipv4_options_len; 2327 crb_t recv_ancillary; 2328 udp_stack_t *us; 2329 2330 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2331 2332 udp = connp->conn_udp; 2333 us = udp->udp_us; 2334 rptr = mp->b_rptr; 2335 2336 ASSERT(DB_TYPE(mp) == M_DATA); 2337 ASSERT(OK_32PTR(rptr)); 2338 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2339 pkt_len = ira->ira_pktlen; 2340 2341 /* 2342 * Get a snapshot of these and allow other threads to change 2343 * them after that. We need the same recv_ancillary when determining 2344 * the size as when adding the ancillary data items. 2345 */ 2346 mutex_enter(&connp->conn_lock); 2347 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2348 recv_ancillary = connp->conn_recv_ancillary; 2349 mutex_exit(&connp->conn_lock); 2350 2351 hdr_length = ira->ira_ip_hdr_length; 2352 2353 /* 2354 * IP inspected the UDP header thus all of it must be in the mblk. 2355 * UDP length check is performed for IPv6 packets and IPv4 packets 2356 * to check if the size of the packet as specified 2357 * by the UDP header is the same as the length derived from the IP 2358 * header. 2359 */ 2360 udpha = (udpha_t *)(rptr + hdr_length); 2361 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2362 goto tossit; 2363 2364 hdr_length += UDPH_SIZE; 2365 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2366 2367 /* Initialize regardless of IP version */ 2368 ipps.ipp_fields = 0; 2369 2370 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2371 udp_ipv4_options_len > 0) && 2372 connp->conn_family == AF_INET) { 2373 int err; 2374 2375 /* 2376 * Record/update udp_recv_ipp with the lock 2377 * held. Not needed for AF_INET6 sockets 2378 * since they don't support a getsockopt of IP_OPTIONS. 2379 */ 2380 mutex_enter(&connp->conn_lock); 2381 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2382 B_TRUE); 2383 if (err != 0) { 2384 /* Allocation failed. Drop packet */ 2385 mutex_exit(&connp->conn_lock); 2386 freemsg(mp); 2387 UDPS_BUMP_MIB(us, udpInErrors); 2388 return; 2389 } 2390 mutex_exit(&connp->conn_lock); 2391 } 2392 2393 if (recv_ancillary.crb_all != 0) { 2394 /* 2395 * Record packet information in the ip_pkt_t 2396 */ 2397 if (ira->ira_flags & IRAF_IS_IPV4) { 2398 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2399 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2400 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2401 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2402 2403 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2404 } else { 2405 uint8_t nexthdrp; 2406 2407 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2408 /* 2409 * IPv6 packets can only be received by applications 2410 * that are prepared to receive IPv6 addresses. 2411 * The IP fanout must ensure this. 2412 */ 2413 ASSERT(connp->conn_family == AF_INET6); 2414 2415 ip6h = (ip6_t *)rptr; 2416 2417 /* We don't care about the length, but need the ipp */ 2418 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2419 &nexthdrp); 2420 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2421 /* Restore */ 2422 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2423 ASSERT(nexthdrp == IPPROTO_UDP); 2424 } 2425 } 2426 2427 /* 2428 * This is the inbound data path. Packets are passed upstream as 2429 * T_UNITDATA_IND messages. 2430 */ 2431 if (connp->conn_family == AF_INET) { 2432 sin_t *sin; 2433 2434 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2435 2436 /* 2437 * Normally only send up the source address. 2438 * If any ancillary data items are wanted we add those. 2439 */ 2440 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2441 if (recv_ancillary.crb_all != 0) { 2442 udi_size += conn_recvancillary_size(connp, 2443 recv_ancillary, ira, mp, &ipps); 2444 } 2445 2446 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2447 mp1 = allocb(udi_size, BPRI_MED); 2448 if (mp1 == NULL) { 2449 freemsg(mp); 2450 UDPS_BUMP_MIB(us, udpInErrors); 2451 return; 2452 } 2453 mp1->b_cont = mp; 2454 mp1->b_datap->db_type = M_PROTO; 2455 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2456 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2457 tudi->PRIM_type = T_UNITDATA_IND; 2458 tudi->SRC_length = sizeof (sin_t); 2459 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2460 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2461 sizeof (sin_t); 2462 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2463 tudi->OPT_length = udi_size; 2464 sin = (sin_t *)&tudi[1]; 2465 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2466 sin->sin_port = udpha->uha_src_port; 2467 sin->sin_family = connp->conn_family; 2468 *(uint32_t *)&sin->sin_zero[0] = 0; 2469 *(uint32_t *)&sin->sin_zero[4] = 0; 2470 2471 /* 2472 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA, 2473 * IP_RECVTTL or IP_RECVTOS has been set. 2474 */ 2475 if (udi_size != 0) { 2476 conn_recvancillary_add(connp, recv_ancillary, ira, 2477 &ipps, (uchar_t *)&sin[1], udi_size); 2478 } 2479 } else { 2480 sin6_t *sin6; 2481 2482 /* 2483 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2484 * 2485 * Normally we only send up the address. If receiving of any 2486 * optional receive side information is enabled, we also send 2487 * that up as options. 2488 */ 2489 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2490 2491 if (recv_ancillary.crb_all != 0) { 2492 udi_size += conn_recvancillary_size(connp, 2493 recv_ancillary, ira, mp, &ipps); 2494 } 2495 2496 mp1 = allocb(udi_size, BPRI_MED); 2497 if (mp1 == NULL) { 2498 freemsg(mp); 2499 UDPS_BUMP_MIB(us, udpInErrors); 2500 return; 2501 } 2502 mp1->b_cont = mp; 2503 mp1->b_datap->db_type = M_PROTO; 2504 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2505 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2506 tudi->PRIM_type = T_UNITDATA_IND; 2507 tudi->SRC_length = sizeof (sin6_t); 2508 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2509 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2510 sizeof (sin6_t); 2511 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2512 tudi->OPT_length = udi_size; 2513 sin6 = (sin6_t *)&tudi[1]; 2514 if (ira->ira_flags & IRAF_IS_IPV4) { 2515 in6_addr_t v6dst; 2516 2517 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2518 &sin6->sin6_addr); 2519 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2520 &v6dst); 2521 sin6->sin6_flowinfo = 0; 2522 sin6->sin6_scope_id = 0; 2523 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2524 IPCL_ZONEID(connp), us->us_netstack); 2525 } else { 2526 ip6h = (ip6_t *)rptr; 2527 2528 sin6->sin6_addr = ip6h->ip6_src; 2529 /* No sin6_flowinfo per API */ 2530 sin6->sin6_flowinfo = 0; 2531 /* For link-scope pass up scope id */ 2532 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2533 sin6->sin6_scope_id = ira->ira_ruifindex; 2534 else 2535 sin6->sin6_scope_id = 0; 2536 sin6->__sin6_src_id = ip_srcid_find_addr( 2537 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2538 us->us_netstack); 2539 } 2540 sin6->sin6_port = udpha->uha_src_port; 2541 sin6->sin6_family = connp->conn_family; 2542 2543 if (udi_size != 0) { 2544 conn_recvancillary_add(connp, recv_ancillary, ira, 2545 &ipps, (uchar_t *)&sin6[1], udi_size); 2546 } 2547 } 2548 2549 /* 2550 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2551 * loopback traffic). 2552 */ 2553 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2554 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2555 2556 /* Walk past the headers unless IP_RECVHDR was set. */ 2557 if (!udp->udp_rcvhdr) { 2558 mp->b_rptr = rptr + hdr_length; 2559 pkt_len -= hdr_length; 2560 } 2561 2562 UDPS_BUMP_MIB(us, udpHCInDatagrams); 2563 udp_ulp_recv(connp, mp1, pkt_len, ira); 2564 return; 2565 2566 tossit: 2567 freemsg(mp); 2568 UDPS_BUMP_MIB(us, udpInErrors); 2569 } 2570 2571 /* 2572 * This routine creates a T_UDERROR_IND message and passes it upstream. 2573 * The address and options are copied from the T_UNITDATA_REQ message 2574 * passed in mp. This message is freed. 2575 */ 2576 static void 2577 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2578 { 2579 struct T_unitdata_req *tudr; 2580 mblk_t *mp1; 2581 uchar_t *destaddr; 2582 t_scalar_t destlen; 2583 uchar_t *optaddr; 2584 t_scalar_t optlen; 2585 2586 if ((mp->b_wptr < mp->b_rptr) || 2587 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2588 goto done; 2589 } 2590 tudr = (struct T_unitdata_req *)mp->b_rptr; 2591 destaddr = mp->b_rptr + tudr->DEST_offset; 2592 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2593 destaddr + tudr->DEST_length < mp->b_rptr || 2594 destaddr + tudr->DEST_length > mp->b_wptr) { 2595 goto done; 2596 } 2597 optaddr = mp->b_rptr + tudr->OPT_offset; 2598 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2599 optaddr + tudr->OPT_length < mp->b_rptr || 2600 optaddr + tudr->OPT_length > mp->b_wptr) { 2601 goto done; 2602 } 2603 destlen = tudr->DEST_length; 2604 optlen = tudr->OPT_length; 2605 2606 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2607 (char *)optaddr, optlen, err); 2608 if (mp1 != NULL) 2609 qreply(q, mp1); 2610 2611 done: 2612 freemsg(mp); 2613 } 2614 2615 /* 2616 * This routine removes a port number association from a stream. It 2617 * is called by udp_wput to handle T_UNBIND_REQ messages. 2618 */ 2619 static void 2620 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2621 { 2622 conn_t *connp = Q_TO_CONN(q); 2623 int error; 2624 2625 error = udp_do_unbind(connp); 2626 if (error) { 2627 if (error < 0) 2628 udp_err_ack(q, mp, -error, 0); 2629 else 2630 udp_err_ack(q, mp, TSYSERR, error); 2631 return; 2632 } 2633 2634 mp = mi_tpi_ok_ack_alloc(mp); 2635 ASSERT(mp != NULL); 2636 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2637 qreply(q, mp); 2638 } 2639 2640 /* 2641 * Don't let port fall into the privileged range. 2642 * Since the extra privileged ports can be arbitrary we also 2643 * ensure that we exclude those from consideration. 2644 * us->us_epriv_ports is not sorted thus we loop over it until 2645 * there are no changes. 2646 */ 2647 static in_port_t 2648 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2649 { 2650 int i, bump; 2651 in_port_t nextport; 2652 boolean_t restart = B_FALSE; 2653 udp_stack_t *us = udp->udp_us; 2654 2655 if (random && udp_random_anon_port != 0) { 2656 (void) random_get_pseudo_bytes((uint8_t *)&port, 2657 sizeof (in_port_t)); 2658 /* 2659 * Unless changed by a sys admin, the smallest anon port 2660 * is 32768 and the largest anon port is 65535. It is 2661 * very likely (50%) for the random port to be smaller 2662 * than the smallest anon port. When that happens, 2663 * add port % (anon port range) to the smallest anon 2664 * port to get the random port. It should fall into the 2665 * valid anon port range. 2666 */ 2667 if ((port < us->us_smallest_anon_port) || 2668 (port > us->us_largest_anon_port)) { 2669 if (us->us_smallest_anon_port == 2670 us->us_largest_anon_port) { 2671 bump = 0; 2672 } else { 2673 bump = port % (us->us_largest_anon_port - 2674 us->us_smallest_anon_port); 2675 } 2676 2677 port = us->us_smallest_anon_port + bump; 2678 } 2679 } 2680 2681 retry: 2682 if (port < us->us_smallest_anon_port) 2683 port = us->us_smallest_anon_port; 2684 2685 if (port > us->us_largest_anon_port) { 2686 port = us->us_smallest_anon_port; 2687 if (restart) 2688 return (0); 2689 restart = B_TRUE; 2690 } 2691 2692 if (port < us->us_smallest_nonpriv_port) 2693 port = us->us_smallest_nonpriv_port; 2694 2695 for (i = 0; i < us->us_num_epriv_ports; i++) { 2696 if (port == us->us_epriv_ports[i]) { 2697 port++; 2698 /* 2699 * Make sure that the port is in the 2700 * valid range. 2701 */ 2702 goto retry; 2703 } 2704 } 2705 2706 if (is_system_labeled() && 2707 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2708 port, IPPROTO_UDP, B_TRUE)) != 0) { 2709 port = nextport; 2710 goto retry; 2711 } 2712 2713 return (port); 2714 } 2715 2716 /* 2717 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2718 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2719 * the TPI options, otherwise we take them from msg_control. 2720 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2721 * Always consumes mp; never consumes tudr_mp. 2722 */ 2723 static int 2724 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2725 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2726 { 2727 udp_t *udp = connp->conn_udp; 2728 udp_stack_t *us = udp->udp_us; 2729 int error; 2730 ip_xmit_attr_t *ixa; 2731 ip_pkt_t *ipp; 2732 in6_addr_t v6src; 2733 in6_addr_t v6dst; 2734 in6_addr_t v6nexthop; 2735 in_port_t dstport; 2736 uint32_t flowinfo; 2737 uint_t srcid; 2738 int is_absreq_failure = 0; 2739 conn_opt_arg_t coas, *coa; 2740 2741 ASSERT(tudr_mp != NULL || msg != NULL); 2742 2743 /* 2744 * Get ixa before checking state to handle a disconnect race. 2745 * 2746 * We need an exclusive copy of conn_ixa since the ancillary data 2747 * options might modify it. That copy has no pointers hence we 2748 * need to set them up once we've parsed the ancillary data. 2749 */ 2750 ixa = conn_get_ixa_exclusive(connp); 2751 if (ixa == NULL) { 2752 UDPS_BUMP_MIB(us, udpOutErrors); 2753 freemsg(mp); 2754 return (ENOMEM); 2755 } 2756 ASSERT(cr != NULL); 2757 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2758 ixa->ixa_cred = cr; 2759 ixa->ixa_cpid = pid; 2760 if (is_system_labeled()) { 2761 /* We need to restart with a label based on the cred */ 2762 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2763 } 2764 2765 /* In case previous destination was multicast or multirt */ 2766 ip_attr_newdst(ixa); 2767 2768 /* Get a copy of conn_xmit_ipp since the options might change it */ 2769 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2770 if (ipp == NULL) { 2771 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2772 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2773 ixa->ixa_cpid = connp->conn_cpid; 2774 ixa_refrele(ixa); 2775 UDPS_BUMP_MIB(us, udpOutErrors); 2776 freemsg(mp); 2777 return (ENOMEM); 2778 } 2779 mutex_enter(&connp->conn_lock); 2780 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2781 mutex_exit(&connp->conn_lock); 2782 if (error != 0) { 2783 UDPS_BUMP_MIB(us, udpOutErrors); 2784 freemsg(mp); 2785 goto done; 2786 } 2787 2788 /* 2789 * Parse the options and update ixa and ipp as a result. 2790 * Note that ixa_tsl can be updated if SCM_UCRED. 2791 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2792 */ 2793 2794 coa = &coas; 2795 coa->coa_connp = connp; 2796 coa->coa_ixa = ixa; 2797 coa->coa_ipp = ipp; 2798 coa->coa_ancillary = B_TRUE; 2799 coa->coa_changed = 0; 2800 2801 if (msg != NULL) { 2802 error = process_auxiliary_options(connp, msg->msg_control, 2803 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2804 } else { 2805 struct T_unitdata_req *tudr; 2806 2807 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2808 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2809 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2810 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2811 coa, &is_absreq_failure); 2812 } 2813 if (error != 0) { 2814 /* 2815 * Note: No special action needed in this 2816 * module for "is_absreq_failure" 2817 */ 2818 freemsg(mp); 2819 UDPS_BUMP_MIB(us, udpOutErrors); 2820 goto done; 2821 } 2822 ASSERT(is_absreq_failure == 0); 2823 2824 mutex_enter(&connp->conn_lock); 2825 /* 2826 * If laddr is unspecified then we look at sin6_src_id. 2827 * We will give precedence to a source address set with IPV6_PKTINFO 2828 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 2829 * want ip_attr_connect to select a source (since it can fail) when 2830 * IPV6_PKTINFO is specified. 2831 * If this doesn't result in a source address then we get a source 2832 * from ip_attr_connect() below. 2833 */ 2834 v6src = connp->conn_saddr_v6; 2835 if (sin != NULL) { 2836 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 2837 dstport = sin->sin_port; 2838 flowinfo = 0; 2839 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2840 ixa->ixa_flags |= IXAF_IS_IPV4; 2841 } else if (sin6 != NULL) { 2842 boolean_t v4mapped; 2843 2844 v6dst = sin6->sin6_addr; 2845 dstport = sin6->sin6_port; 2846 flowinfo = sin6->sin6_flowinfo; 2847 srcid = sin6->__sin6_src_id; 2848 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 2849 ixa->ixa_scopeid = sin6->sin6_scope_id; 2850 ixa->ixa_flags |= IXAF_SCOPEID_SET; 2851 } else { 2852 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2853 } 2854 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 2855 if (v4mapped) 2856 ixa->ixa_flags |= IXAF_IS_IPV4; 2857 else 2858 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2859 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 2860 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 2861 v4mapped, connp->conn_netstack)) { 2862 /* Mismatch - v4mapped/v6 specified by srcid. */ 2863 mutex_exit(&connp->conn_lock); 2864 error = EADDRNOTAVAIL; 2865 goto failed; /* Does freemsg() and mib. */ 2866 } 2867 } 2868 } else { 2869 /* Connected case */ 2870 v6dst = connp->conn_faddr_v6; 2871 dstport = connp->conn_fport; 2872 flowinfo = connp->conn_flowinfo; 2873 } 2874 mutex_exit(&connp->conn_lock); 2875 2876 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 2877 if (ipp->ipp_fields & IPPF_ADDR) { 2878 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2879 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2880 v6src = ipp->ipp_addr; 2881 } else { 2882 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2883 v6src = ipp->ipp_addr; 2884 } 2885 } 2886 2887 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 2888 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 2889 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 2890 2891 switch (error) { 2892 case 0: 2893 break; 2894 case EADDRNOTAVAIL: 2895 /* 2896 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2897 * Don't have the application see that errno 2898 */ 2899 error = ENETUNREACH; 2900 goto failed; 2901 case ENETDOWN: 2902 /* 2903 * Have !ipif_addr_ready address; drop packet silently 2904 * until we can get applications to not send until we 2905 * are ready. 2906 */ 2907 error = 0; 2908 goto failed; 2909 case EHOSTUNREACH: 2910 case ENETUNREACH: 2911 if (ixa->ixa_ire != NULL) { 2912 /* 2913 * Let conn_ip_output/ire_send_noroute return 2914 * the error and send any local ICMP error. 2915 */ 2916 error = 0; 2917 break; 2918 } 2919 /* FALLTHRU */ 2920 default: 2921 failed: 2922 freemsg(mp); 2923 UDPS_BUMP_MIB(us, udpOutErrors); 2924 goto done; 2925 } 2926 2927 /* 2928 * We might be going to a different destination than last time, 2929 * thus check that TX allows the communication and compute any 2930 * needed label. 2931 * 2932 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 2933 * don't have to worry about concurrent threads. 2934 */ 2935 if (is_system_labeled()) { 2936 /* Using UDP MLP requires SCM_UCRED from user */ 2937 if (connp->conn_mlp_type != mlptSingle && 2938 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 2939 UDPS_BUMP_MIB(us, udpOutErrors); 2940 error = ECONNREFUSED; 2941 freemsg(mp); 2942 goto done; 2943 } 2944 /* 2945 * Check whether Trusted Solaris policy allows communication 2946 * with this host, and pretend that the destination is 2947 * unreachable if not. 2948 * Compute any needed label and place it in ipp_label_v4/v6. 2949 * 2950 * Later conn_build_hdr_template/conn_prepend_hdr takes 2951 * ipp_label_v4/v6 to form the packet. 2952 * 2953 * Tsol note: We have ipp structure local to this thread so 2954 * no locking is needed. 2955 */ 2956 error = conn_update_label(connp, ixa, &v6dst, ipp); 2957 if (error != 0) { 2958 freemsg(mp); 2959 UDPS_BUMP_MIB(us, udpOutErrors); 2960 goto done; 2961 } 2962 } 2963 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 2964 flowinfo, mp, &error); 2965 if (mp == NULL) { 2966 ASSERT(error != 0); 2967 UDPS_BUMP_MIB(us, udpOutErrors); 2968 goto done; 2969 } 2970 if (ixa->ixa_pktlen > IP_MAXPACKET) { 2971 error = EMSGSIZE; 2972 UDPS_BUMP_MIB(us, udpOutErrors); 2973 freemsg(mp); 2974 goto done; 2975 } 2976 /* We're done. Pass the packet to ip. */ 2977 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2978 2979 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2980 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2981 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2982 2983 error = conn_ip_output(mp, ixa); 2984 /* No udpOutErrors if an error since IP increases its error counter */ 2985 switch (error) { 2986 case 0: 2987 break; 2988 case EWOULDBLOCK: 2989 (void) ixa_check_drain_insert(connp, ixa); 2990 error = 0; 2991 break; 2992 case EADDRNOTAVAIL: 2993 /* 2994 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2995 * Don't have the application see that errno 2996 */ 2997 error = ENETUNREACH; 2998 /* FALLTHRU */ 2999 default: 3000 mutex_enter(&connp->conn_lock); 3001 /* 3002 * Clear the source and v6lastdst so we call ip_attr_connect 3003 * for the next packet and try to pick a better source. 3004 */ 3005 if (connp->conn_mcbc_bind) 3006 connp->conn_saddr_v6 = ipv6_all_zeros; 3007 else 3008 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3009 connp->conn_v6lastdst = ipv6_all_zeros; 3010 mutex_exit(&connp->conn_lock); 3011 break; 3012 } 3013 done: 3014 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3015 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3016 ixa->ixa_cpid = connp->conn_cpid; 3017 ixa_refrele(ixa); 3018 ip_pkt_free(ipp); 3019 kmem_free(ipp, sizeof (*ipp)); 3020 return (error); 3021 } 3022 3023 /* 3024 * Handle sending an M_DATA for a connected socket. 3025 * Handles both IPv4 and IPv6. 3026 */ 3027 static int 3028 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3029 { 3030 udp_t *udp = connp->conn_udp; 3031 udp_stack_t *us = udp->udp_us; 3032 int error; 3033 ip_xmit_attr_t *ixa; 3034 3035 /* 3036 * If no other thread is using conn_ixa this just gets a reference to 3037 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3038 */ 3039 ixa = conn_get_ixa(connp, B_FALSE); 3040 if (ixa == NULL) { 3041 UDPS_BUMP_MIB(us, udpOutErrors); 3042 freemsg(mp); 3043 return (ENOMEM); 3044 } 3045 3046 ASSERT(cr != NULL); 3047 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3048 ixa->ixa_cred = cr; 3049 ixa->ixa_cpid = pid; 3050 3051 mutex_enter(&connp->conn_lock); 3052 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3053 connp->conn_fport, connp->conn_flowinfo, &error); 3054 3055 if (mp == NULL) { 3056 ASSERT(error != 0); 3057 mutex_exit(&connp->conn_lock); 3058 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3059 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3060 ixa->ixa_cpid = connp->conn_cpid; 3061 ixa_refrele(ixa); 3062 UDPS_BUMP_MIB(us, udpOutErrors); 3063 freemsg(mp); 3064 return (error); 3065 } 3066 3067 /* 3068 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3069 * safe copy, then we need to fill in any pointers in it. 3070 */ 3071 if (ixa->ixa_ire == NULL) { 3072 in6_addr_t faddr, saddr; 3073 in6_addr_t nexthop; 3074 in_port_t fport; 3075 3076 saddr = connp->conn_saddr_v6; 3077 faddr = connp->conn_faddr_v6; 3078 fport = connp->conn_fport; 3079 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3080 mutex_exit(&connp->conn_lock); 3081 3082 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3083 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3084 IPDF_IPSEC); 3085 switch (error) { 3086 case 0: 3087 break; 3088 case EADDRNOTAVAIL: 3089 /* 3090 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3091 * Don't have the application see that errno 3092 */ 3093 error = ENETUNREACH; 3094 goto failed; 3095 case ENETDOWN: 3096 /* 3097 * Have !ipif_addr_ready address; drop packet silently 3098 * until we can get applications to not send until we 3099 * are ready. 3100 */ 3101 error = 0; 3102 goto failed; 3103 case EHOSTUNREACH: 3104 case ENETUNREACH: 3105 if (ixa->ixa_ire != NULL) { 3106 /* 3107 * Let conn_ip_output/ire_send_noroute return 3108 * the error and send any local ICMP error. 3109 */ 3110 error = 0; 3111 break; 3112 } 3113 /* FALLTHRU */ 3114 default: 3115 failed: 3116 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3117 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3118 ixa->ixa_cpid = connp->conn_cpid; 3119 ixa_refrele(ixa); 3120 freemsg(mp); 3121 UDPS_BUMP_MIB(us, udpOutErrors); 3122 return (error); 3123 } 3124 } else { 3125 /* Done with conn_t */ 3126 mutex_exit(&connp->conn_lock); 3127 } 3128 ASSERT(ixa->ixa_ire != NULL); 3129 3130 /* We're done. Pass the packet to ip. */ 3131 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3132 3133 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3134 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3135 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3136 3137 error = conn_ip_output(mp, ixa); 3138 /* No udpOutErrors if an error since IP increases its error counter */ 3139 switch (error) { 3140 case 0: 3141 break; 3142 case EWOULDBLOCK: 3143 (void) ixa_check_drain_insert(connp, ixa); 3144 error = 0; 3145 break; 3146 case EADDRNOTAVAIL: 3147 /* 3148 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3149 * Don't have the application see that errno 3150 */ 3151 error = ENETUNREACH; 3152 break; 3153 } 3154 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3155 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3156 ixa->ixa_cpid = connp->conn_cpid; 3157 ixa_refrele(ixa); 3158 return (error); 3159 } 3160 3161 /* 3162 * Handle sending an M_DATA to the last destination. 3163 * Handles both IPv4 and IPv6. 3164 * 3165 * NOTE: The caller must hold conn_lock and we drop it here. 3166 */ 3167 static int 3168 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3169 ip_xmit_attr_t *ixa) 3170 { 3171 udp_t *udp = connp->conn_udp; 3172 udp_stack_t *us = udp->udp_us; 3173 int error; 3174 3175 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3176 ASSERT(ixa != NULL); 3177 3178 ASSERT(cr != NULL); 3179 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3180 ixa->ixa_cred = cr; 3181 ixa->ixa_cpid = pid; 3182 3183 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3184 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3185 3186 if (mp == NULL) { 3187 ASSERT(error != 0); 3188 mutex_exit(&connp->conn_lock); 3189 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3190 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3191 ixa->ixa_cpid = connp->conn_cpid; 3192 ixa_refrele(ixa); 3193 UDPS_BUMP_MIB(us, udpOutErrors); 3194 freemsg(mp); 3195 return (error); 3196 } 3197 3198 /* 3199 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3200 * safe copy, then we need to fill in any pointers in it. 3201 */ 3202 if (ixa->ixa_ire == NULL) { 3203 in6_addr_t lastdst, lastsrc; 3204 in6_addr_t nexthop; 3205 in_port_t lastport; 3206 3207 lastsrc = connp->conn_v6lastsrc; 3208 lastdst = connp->conn_v6lastdst; 3209 lastport = connp->conn_lastdstport; 3210 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3211 mutex_exit(&connp->conn_lock); 3212 3213 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3214 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3215 IPDF_VERIFY_DST | IPDF_IPSEC); 3216 switch (error) { 3217 case 0: 3218 break; 3219 case EADDRNOTAVAIL: 3220 /* 3221 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3222 * Don't have the application see that errno 3223 */ 3224 error = ENETUNREACH; 3225 goto failed; 3226 case ENETDOWN: 3227 /* 3228 * Have !ipif_addr_ready address; drop packet silently 3229 * until we can get applications to not send until we 3230 * are ready. 3231 */ 3232 error = 0; 3233 goto failed; 3234 case EHOSTUNREACH: 3235 case ENETUNREACH: 3236 if (ixa->ixa_ire != NULL) { 3237 /* 3238 * Let conn_ip_output/ire_send_noroute return 3239 * the error and send any local ICMP error. 3240 */ 3241 error = 0; 3242 break; 3243 } 3244 /* FALLTHRU */ 3245 default: 3246 failed: 3247 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3248 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3249 ixa->ixa_cpid = connp->conn_cpid; 3250 ixa_refrele(ixa); 3251 freemsg(mp); 3252 UDPS_BUMP_MIB(us, udpOutErrors); 3253 return (error); 3254 } 3255 } else { 3256 /* Done with conn_t */ 3257 mutex_exit(&connp->conn_lock); 3258 } 3259 3260 /* We're done. Pass the packet to ip. */ 3261 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3262 3263 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3264 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3265 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3266 3267 error = conn_ip_output(mp, ixa); 3268 /* No udpOutErrors if an error since IP increases its error counter */ 3269 switch (error) { 3270 case 0: 3271 break; 3272 case EWOULDBLOCK: 3273 (void) ixa_check_drain_insert(connp, ixa); 3274 error = 0; 3275 break; 3276 case EADDRNOTAVAIL: 3277 /* 3278 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3279 * Don't have the application see that errno 3280 */ 3281 error = ENETUNREACH; 3282 /* FALLTHRU */ 3283 default: 3284 mutex_enter(&connp->conn_lock); 3285 /* 3286 * Clear the source and v6lastdst so we call ip_attr_connect 3287 * for the next packet and try to pick a better source. 3288 */ 3289 if (connp->conn_mcbc_bind) 3290 connp->conn_saddr_v6 = ipv6_all_zeros; 3291 else 3292 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3293 connp->conn_v6lastdst = ipv6_all_zeros; 3294 mutex_exit(&connp->conn_lock); 3295 break; 3296 } 3297 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3298 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3299 ixa->ixa_cpid = connp->conn_cpid; 3300 ixa_refrele(ixa); 3301 return (error); 3302 } 3303 3304 3305 /* 3306 * Prepend the header template and then fill in the source and 3307 * flowinfo. The caller needs to handle the destination address since 3308 * it's setting is different if rthdr or source route. 3309 * 3310 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3311 * When it returns NULL it sets errorp. 3312 */ 3313 static mblk_t * 3314 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3315 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3316 { 3317 udp_t *udp = connp->conn_udp; 3318 udp_stack_t *us = udp->udp_us; 3319 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3320 boolean_t hash_srcport = udp->udp_vxlanhash; 3321 uint_t pktlen; 3322 uint_t alloclen; 3323 uint_t copylen; 3324 uint8_t *iph; 3325 uint_t ip_hdr_length; 3326 udpha_t *udpha; 3327 uint32_t cksum; 3328 ip_pkt_t *ipp; 3329 uint16_t srcport; 3330 3331 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3332 3333 /* 3334 * If we have source port hashing going on, determine the hash before 3335 * we modify the mblk_t. 3336 */ 3337 if (hash_srcport == B_TRUE) { 3338 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 3339 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 3340 ntohs(connp->conn_lport)); 3341 } 3342 3343 /* 3344 * Copy the header template and leave space for an SPI 3345 */ 3346 copylen = connp->conn_ht_iphc_len; 3347 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3348 pktlen = alloclen + msgdsize(mp); 3349 if (pktlen > IP_MAXPACKET) { 3350 freemsg(mp); 3351 *errorp = EMSGSIZE; 3352 return (NULL); 3353 } 3354 ixa->ixa_pktlen = pktlen; 3355 3356 /* check/fix buffer config, setup pointers into it */ 3357 iph = mp->b_rptr - alloclen; 3358 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3359 mblk_t *mp1; 3360 3361 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3362 if (mp1 == NULL) { 3363 freemsg(mp); 3364 *errorp = ENOMEM; 3365 return (NULL); 3366 } 3367 mp1->b_wptr = DB_LIM(mp1); 3368 mp1->b_cont = mp; 3369 mp = mp1; 3370 iph = (mp->b_wptr - alloclen); 3371 } 3372 mp->b_rptr = iph; 3373 bcopy(connp->conn_ht_iphc, iph, copylen); 3374 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3375 3376 ixa->ixa_ip_hdr_length = ip_hdr_length; 3377 udpha = (udpha_t *)(iph + ip_hdr_length); 3378 3379 /* 3380 * Setup header length and prepare for ULP checksum done in IP. 3381 * udp_build_hdr_template has already massaged any routing header 3382 * and placed the result in conn_sum. 3383 * 3384 * We make it easy for IP to include our pseudo header 3385 * by putting our length in uha_checksum. 3386 */ 3387 cksum = pktlen - ip_hdr_length; 3388 udpha->uha_length = htons(cksum); 3389 3390 cksum += connp->conn_sum; 3391 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3392 ASSERT(cksum < 0x10000); 3393 3394 ipp = &connp->conn_xmit_ipp; 3395 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3396 ipha_t *ipha = (ipha_t *)iph; 3397 3398 ipha->ipha_length = htons((uint16_t)pktlen); 3399 3400 /* IP does the checksum if uha_checksum is non-zero */ 3401 if (us->us_do_checksum) 3402 udpha->uha_checksum = htons(cksum); 3403 3404 /* if IP_PKTINFO specified an addres it wins over bind() */ 3405 if ((ipp->ipp_fields & IPPF_ADDR) && 3406 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3407 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3408 ipha->ipha_src = ipp->ipp_addr_v4; 3409 } else { 3410 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3411 } 3412 } else { 3413 ip6_t *ip6h = (ip6_t *)iph; 3414 3415 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3416 udpha->uha_checksum = htons(cksum); 3417 3418 /* if IP_PKTINFO specified an addres it wins over bind() */ 3419 if ((ipp->ipp_fields & IPPF_ADDR) && 3420 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3421 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3422 ip6h->ip6_src = ipp->ipp_addr; 3423 } else { 3424 ip6h->ip6_src = *v6src; 3425 } 3426 ip6h->ip6_vcf = 3427 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3428 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3429 if (ipp->ipp_fields & IPPF_TCLASS) { 3430 /* Overrides the class part of flowinfo */ 3431 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3432 ipp->ipp_tclass); 3433 } 3434 } 3435 3436 /* Insert all-0s SPI now. */ 3437 if (insert_spi) 3438 *((uint32_t *)(udpha + 1)) = 0; 3439 3440 udpha->uha_dst_port = dstport; 3441 if (hash_srcport == B_TRUE) 3442 udpha->uha_src_port = htons(srcport); 3443 3444 return (mp); 3445 } 3446 3447 /* 3448 * Send a T_UDERR_IND in response to an M_DATA 3449 */ 3450 static void 3451 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3452 { 3453 struct sockaddr_storage ss; 3454 sin_t *sin; 3455 sin6_t *sin6; 3456 struct sockaddr *addr; 3457 socklen_t addrlen; 3458 mblk_t *mp1; 3459 3460 mutex_enter(&connp->conn_lock); 3461 /* Initialize addr and addrlen as if they're passed in */ 3462 if (connp->conn_family == AF_INET) { 3463 sin = (sin_t *)&ss; 3464 *sin = sin_null; 3465 sin->sin_family = AF_INET; 3466 sin->sin_port = connp->conn_fport; 3467 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3468 addr = (struct sockaddr *)sin; 3469 addrlen = sizeof (*sin); 3470 } else { 3471 sin6 = (sin6_t *)&ss; 3472 *sin6 = sin6_null; 3473 sin6->sin6_family = AF_INET6; 3474 sin6->sin6_port = connp->conn_fport; 3475 sin6->sin6_flowinfo = connp->conn_flowinfo; 3476 sin6->sin6_addr = connp->conn_faddr_v6; 3477 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3478 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3479 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3480 } else { 3481 sin6->sin6_scope_id = 0; 3482 } 3483 sin6->__sin6_src_id = 0; 3484 addr = (struct sockaddr *)sin6; 3485 addrlen = sizeof (*sin6); 3486 } 3487 mutex_exit(&connp->conn_lock); 3488 3489 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3490 if (mp1 != NULL) 3491 putnext(connp->conn_rq, mp1); 3492 } 3493 3494 /* 3495 * This routine handles all messages passed downstream. It either 3496 * consumes the message or passes it downstream; it never queues a 3497 * a message. 3498 * 3499 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3500 * is valid when we are directly beneath the stream head, and thus sockfs 3501 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3502 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3503 * connected endpoints. 3504 */ 3505 int 3506 udp_wput(queue_t *q, mblk_t *mp) 3507 { 3508 sin6_t *sin6; 3509 sin_t *sin = NULL; 3510 uint_t srcid; 3511 conn_t *connp = Q_TO_CONN(q); 3512 udp_t *udp = connp->conn_udp; 3513 int error = 0; 3514 struct sockaddr *addr = NULL; 3515 socklen_t addrlen; 3516 udp_stack_t *us = udp->udp_us; 3517 struct T_unitdata_req *tudr; 3518 mblk_t *data_mp; 3519 ushort_t ipversion; 3520 cred_t *cr; 3521 pid_t pid; 3522 3523 /* 3524 * We directly handle several cases here: T_UNITDATA_REQ message 3525 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3526 * socket. 3527 */ 3528 switch (DB_TYPE(mp)) { 3529 case M_DATA: 3530 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3531 /* Not connected; address is required */ 3532 UDPS_BUMP_MIB(us, udpOutErrors); 3533 UDP_DBGSTAT(us, udp_data_notconn); 3534 UDP_STAT(us, udp_out_err_notconn); 3535 freemsg(mp); 3536 return (0); 3537 } 3538 /* 3539 * All Solaris components should pass a db_credp 3540 * for this message, hence we ASSERT. 3541 * On production kernels we return an error to be robust against 3542 * random streams modules sitting on top of us. 3543 */ 3544 cr = msg_getcred(mp, &pid); 3545 ASSERT(cr != NULL); 3546 if (cr == NULL) { 3547 UDPS_BUMP_MIB(us, udpOutErrors); 3548 freemsg(mp); 3549 return (0); 3550 } 3551 ASSERT(udp->udp_issocket); 3552 UDP_DBGSTAT(us, udp_data_conn); 3553 error = udp_output_connected(connp, mp, cr, pid); 3554 if (error != 0) { 3555 UDP_STAT(us, udp_out_err_output); 3556 if (connp->conn_rq != NULL) 3557 udp_ud_err_connected(connp, (t_scalar_t)error); 3558 #ifdef DEBUG 3559 printf("udp_output_connected returned %d\n", error); 3560 #endif 3561 } 3562 return (0); 3563 3564 case M_PROTO: 3565 case M_PCPROTO: 3566 tudr = (struct T_unitdata_req *)mp->b_rptr; 3567 if (MBLKL(mp) < sizeof (*tudr) || 3568 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3569 udp_wput_other(q, mp); 3570 return (0); 3571 } 3572 break; 3573 3574 default: 3575 udp_wput_other(q, mp); 3576 return (0); 3577 } 3578 3579 /* Handle valid T_UNITDATA_REQ here */ 3580 data_mp = mp->b_cont; 3581 if (data_mp == NULL) { 3582 error = EPROTO; 3583 goto ud_error2; 3584 } 3585 mp->b_cont = NULL; 3586 3587 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3588 error = EADDRNOTAVAIL; 3589 goto ud_error2; 3590 } 3591 3592 /* 3593 * All Solaris components should pass a db_credp 3594 * for this TPI message, hence we should ASSERT. 3595 * However, RPC (svc_clts_ksend) does this odd thing where it 3596 * passes the options from a T_UNITDATA_IND unchanged in a 3597 * T_UNITDATA_REQ. While that is the right thing to do for 3598 * some options, SCM_UCRED being the key one, this also makes it 3599 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3600 */ 3601 cr = msg_getcred(mp, &pid); 3602 if (cr == NULL) { 3603 cr = connp->conn_cred; 3604 pid = connp->conn_cpid; 3605 } 3606 3607 /* 3608 * If a port has not been bound to the stream, fail. 3609 * This is not a problem when sockfs is directly 3610 * above us, because it will ensure that the socket 3611 * is first bound before allowing data to be sent. 3612 */ 3613 if (udp->udp_state == TS_UNBND) { 3614 error = EPROTO; 3615 goto ud_error2; 3616 } 3617 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3618 addrlen = tudr->DEST_length; 3619 3620 switch (connp->conn_family) { 3621 case AF_INET6: 3622 sin6 = (sin6_t *)addr; 3623 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3624 (sin6->sin6_family != AF_INET6)) { 3625 error = EADDRNOTAVAIL; 3626 goto ud_error2; 3627 } 3628 3629 srcid = sin6->__sin6_src_id; 3630 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3631 /* 3632 * Destination is a non-IPv4-compatible IPv6 address. 3633 * Send out an IPv6 format packet. 3634 */ 3635 3636 /* 3637 * If the local address is a mapped address return 3638 * an error. 3639 * It would be possible to send an IPv6 packet but the 3640 * response would never make it back to the application 3641 * since it is bound to a mapped address. 3642 */ 3643 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3644 error = EADDRNOTAVAIL; 3645 goto ud_error2; 3646 } 3647 3648 UDP_DBGSTAT(us, udp_out_ipv6); 3649 3650 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3651 sin6->sin6_addr = ipv6_loopback; 3652 ipversion = IPV6_VERSION; 3653 } else { 3654 if (connp->conn_ipv6_v6only) { 3655 error = EADDRNOTAVAIL; 3656 goto ud_error2; 3657 } 3658 3659 /* 3660 * If the local address is not zero or a mapped address 3661 * return an error. It would be possible to send an 3662 * IPv4 packet but the response would never make it 3663 * back to the application since it is bound to a 3664 * non-mapped address. 3665 */ 3666 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3667 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3668 error = EADDRNOTAVAIL; 3669 goto ud_error2; 3670 } 3671 UDP_DBGSTAT(us, udp_out_mapped); 3672 3673 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3674 V4_PART_OF_V6(sin6->sin6_addr) = 3675 htonl(INADDR_LOOPBACK); 3676 } 3677 ipversion = IPV4_VERSION; 3678 } 3679 3680 if (tudr->OPT_length != 0) { 3681 /* 3682 * If we are connected then the destination needs to be 3683 * the same as the connected one. 3684 */ 3685 if (udp->udp_state == TS_DATA_XFER && 3686 !conn_same_as_last_v6(connp, sin6)) { 3687 error = EISCONN; 3688 goto ud_error2; 3689 } 3690 UDP_STAT(us, udp_out_opt); 3691 error = udp_output_ancillary(connp, NULL, sin6, 3692 data_mp, mp, NULL, cr, pid); 3693 } else { 3694 ip_xmit_attr_t *ixa; 3695 3696 /* 3697 * We have to allocate an ip_xmit_attr_t before we grab 3698 * conn_lock and we need to hold conn_lock once we've 3699 * checked conn_same_as_last_v6 to handle concurrent 3700 * send* calls on a socket. 3701 */ 3702 ixa = conn_get_ixa(connp, B_FALSE); 3703 if (ixa == NULL) { 3704 error = ENOMEM; 3705 goto ud_error2; 3706 } 3707 mutex_enter(&connp->conn_lock); 3708 3709 if (conn_same_as_last_v6(connp, sin6) && 3710 connp->conn_lastsrcid == srcid && 3711 ipsec_outbound_policy_current(ixa)) { 3712 UDP_DBGSTAT(us, udp_out_lastdst); 3713 /* udp_output_lastdst drops conn_lock */ 3714 error = udp_output_lastdst(connp, data_mp, cr, 3715 pid, ixa); 3716 } else { 3717 UDP_DBGSTAT(us, udp_out_diffdst); 3718 /* udp_output_newdst drops conn_lock */ 3719 error = udp_output_newdst(connp, data_mp, NULL, 3720 sin6, ipversion, cr, pid, ixa); 3721 } 3722 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3723 } 3724 if (error == 0) { 3725 freeb(mp); 3726 return (0); 3727 } 3728 break; 3729 3730 case AF_INET: 3731 sin = (sin_t *)addr; 3732 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3733 (sin->sin_family != AF_INET)) { 3734 error = EADDRNOTAVAIL; 3735 goto ud_error2; 3736 } 3737 UDP_DBGSTAT(us, udp_out_ipv4); 3738 if (sin->sin_addr.s_addr == INADDR_ANY) 3739 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3740 ipversion = IPV4_VERSION; 3741 3742 srcid = 0; 3743 if (tudr->OPT_length != 0) { 3744 /* 3745 * If we are connected then the destination needs to be 3746 * the same as the connected one. 3747 */ 3748 if (udp->udp_state == TS_DATA_XFER && 3749 !conn_same_as_last_v4(connp, sin)) { 3750 error = EISCONN; 3751 goto ud_error2; 3752 } 3753 UDP_STAT(us, udp_out_opt); 3754 error = udp_output_ancillary(connp, sin, NULL, 3755 data_mp, mp, NULL, cr, pid); 3756 } else { 3757 ip_xmit_attr_t *ixa; 3758 3759 /* 3760 * We have to allocate an ip_xmit_attr_t before we grab 3761 * conn_lock and we need to hold conn_lock once we've 3762 * checked conn_same_as_last_v4 to handle concurrent 3763 * send* calls on a socket. 3764 */ 3765 ixa = conn_get_ixa(connp, B_FALSE); 3766 if (ixa == NULL) { 3767 error = ENOMEM; 3768 goto ud_error2; 3769 } 3770 mutex_enter(&connp->conn_lock); 3771 3772 if (conn_same_as_last_v4(connp, sin) && 3773 ipsec_outbound_policy_current(ixa)) { 3774 UDP_DBGSTAT(us, udp_out_lastdst); 3775 /* udp_output_lastdst drops conn_lock */ 3776 error = udp_output_lastdst(connp, data_mp, cr, 3777 pid, ixa); 3778 } else { 3779 UDP_DBGSTAT(us, udp_out_diffdst); 3780 /* udp_output_newdst drops conn_lock */ 3781 error = udp_output_newdst(connp, data_mp, sin, 3782 NULL, ipversion, cr, pid, ixa); 3783 } 3784 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3785 } 3786 if (error == 0) { 3787 freeb(mp); 3788 return (0); 3789 } 3790 break; 3791 } 3792 UDP_STAT(us, udp_out_err_output); 3793 ASSERT(mp != NULL); 3794 /* mp is freed by the following routine */ 3795 udp_ud_err(q, mp, (t_scalar_t)error); 3796 return (0); 3797 3798 ud_error2: 3799 UDPS_BUMP_MIB(us, udpOutErrors); 3800 freemsg(data_mp); 3801 UDP_STAT(us, udp_out_err_output); 3802 ASSERT(mp != NULL); 3803 /* mp is freed by the following routine */ 3804 udp_ud_err(q, mp, (t_scalar_t)error); 3805 return (0); 3806 } 3807 3808 /* 3809 * Handle the case of the IP address, port, flow label being different 3810 * for both IPv4 and IPv6. 3811 * 3812 * NOTE: The caller must hold conn_lock and we drop it here. 3813 */ 3814 static int 3815 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3816 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3817 { 3818 uint_t srcid; 3819 uint32_t flowinfo; 3820 udp_t *udp = connp->conn_udp; 3821 int error = 0; 3822 ip_xmit_attr_t *oldixa; 3823 udp_stack_t *us = udp->udp_us; 3824 in6_addr_t v6src; 3825 in6_addr_t v6dst; 3826 in6_addr_t v6nexthop; 3827 in_port_t dstport; 3828 3829 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3830 ASSERT(ixa != NULL); 3831 /* 3832 * We hold conn_lock across all the use and modifications of 3833 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3834 * stay consistent. 3835 */ 3836 3837 ASSERT(cr != NULL); 3838 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3839 ixa->ixa_cred = cr; 3840 ixa->ixa_cpid = pid; 3841 if (is_system_labeled()) { 3842 /* We need to restart with a label based on the cred */ 3843 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3844 } 3845 3846 /* 3847 * If we are connected then the destination needs to be the 3848 * same as the connected one, which is not the case here since we 3849 * checked for that above. 3850 */ 3851 if (udp->udp_state == TS_DATA_XFER) { 3852 mutex_exit(&connp->conn_lock); 3853 error = EISCONN; 3854 goto ud_error; 3855 } 3856 3857 /* 3858 * Before we modify the ixa at all, invalidate our most recent address 3859 * to assure that any subsequent call to conn_same_as_last_v6() will 3860 * not indicate a match: any thread that picks up conn_lock after we 3861 * drop it (but before we pick it up again and properly set the most 3862 * recent address) must not associate the ixa with the (now old) last 3863 * address. 3864 */ 3865 connp->conn_v6lastdst = ipv6_all_zeros; 3866 3867 /* In case previous destination was multicast or multirt */ 3868 ip_attr_newdst(ixa); 3869 3870 /* 3871 * If laddr is unspecified then we look at sin6_src_id. 3872 * We will give precedence to a source address set with IPV6_PKTINFO 3873 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3874 * want ip_attr_connect to select a source (since it can fail) when 3875 * IPV6_PKTINFO is specified. 3876 * If this doesn't result in a source address then we get a source 3877 * from ip_attr_connect() below. 3878 */ 3879 v6src = connp->conn_saddr_v6; 3880 if (sin != NULL) { 3881 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3882 dstport = sin->sin_port; 3883 flowinfo = 0; 3884 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */ 3885 srcid = 0; 3886 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3887 ixa->ixa_flags |= IXAF_IS_IPV4; 3888 } else { 3889 boolean_t v4mapped; 3890 3891 v6dst = sin6->sin6_addr; 3892 dstport = sin6->sin6_port; 3893 flowinfo = sin6->sin6_flowinfo; 3894 srcid = sin6->__sin6_src_id; 3895 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3896 ixa->ixa_scopeid = sin6->sin6_scope_id; 3897 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3898 } else { 3899 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3900 } 3901 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 3902 if (v4mapped) 3903 ixa->ixa_flags |= IXAF_IS_IPV4; 3904 else 3905 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3906 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3907 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3908 v4mapped, connp->conn_netstack)) { 3909 /* Mismatched v4mapped/v6 specified by srcid. */ 3910 mutex_exit(&connp->conn_lock); 3911 error = EADDRNOTAVAIL; 3912 goto ud_error; 3913 } 3914 } 3915 } 3916 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3917 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 3918 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 3919 3920 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3921 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3922 v6src = ipp->ipp_addr; 3923 } else { 3924 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3925 v6src = ipp->ipp_addr; 3926 } 3927 } 3928 3929 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 3930 mutex_exit(&connp->conn_lock); 3931 3932 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3933 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3934 switch (error) { 3935 case 0: 3936 break; 3937 case EADDRNOTAVAIL: 3938 /* 3939 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3940 * Don't have the application see that errno 3941 */ 3942 error = ENETUNREACH; 3943 goto failed; 3944 case ENETDOWN: 3945 /* 3946 * Have !ipif_addr_ready address; drop packet silently 3947 * until we can get applications to not send until we 3948 * are ready. 3949 */ 3950 error = 0; 3951 goto failed; 3952 case EHOSTUNREACH: 3953 case ENETUNREACH: 3954 if (ixa->ixa_ire != NULL) { 3955 /* 3956 * Let conn_ip_output/ire_send_noroute return 3957 * the error and send any local ICMP error. 3958 */ 3959 error = 0; 3960 break; 3961 } 3962 /* FALLTHRU */ 3963 failed: 3964 default: 3965 goto ud_error; 3966 } 3967 3968 3969 /* 3970 * Cluster note: we let the cluster hook know that we are sending to a 3971 * new address and/or port. 3972 */ 3973 if (cl_inet_connect2 != NULL) { 3974 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 3975 if (error != 0) { 3976 error = EHOSTUNREACH; 3977 goto ud_error; 3978 } 3979 } 3980 3981 mutex_enter(&connp->conn_lock); 3982 /* 3983 * While we dropped the lock some other thread might have connected 3984 * this socket. If so we bail out with EISCONN to ensure that the 3985 * connecting thread is the one that updates conn_ixa, conn_ht_* 3986 * and conn_*last*. 3987 */ 3988 if (udp->udp_state == TS_DATA_XFER) { 3989 mutex_exit(&connp->conn_lock); 3990 error = EISCONN; 3991 goto ud_error; 3992 } 3993 3994 /* 3995 * We need to rebuild the headers if 3996 * - we are labeling packets (could be different for different 3997 * destinations) 3998 * - we have a source route (or routing header) since we need to 3999 * massage that to get the pseudo-header checksum 4000 * - the IP version is different than the last time 4001 * - a socket option with COA_HEADER_CHANGED has been set which 4002 * set conn_v6lastdst to zero. 4003 * 4004 * Otherwise the prepend function will just update the src, dst, 4005 * dstport, and flow label. 4006 */ 4007 if (is_system_labeled()) { 4008 /* TX MLP requires SCM_UCRED and don't have that here */ 4009 if (connp->conn_mlp_type != mlptSingle) { 4010 mutex_exit(&connp->conn_lock); 4011 error = ECONNREFUSED; 4012 goto ud_error; 4013 } 4014 /* 4015 * Check whether Trusted Solaris policy allows communication 4016 * with this host, and pretend that the destination is 4017 * unreachable if not. 4018 * Compute any needed label and place it in ipp_label_v4/v6. 4019 * 4020 * Later conn_build_hdr_template/conn_prepend_hdr takes 4021 * ipp_label_v4/v6 to form the packet. 4022 * 4023 * Tsol note: Since we hold conn_lock we know no other 4024 * thread manipulates conn_xmit_ipp. 4025 */ 4026 error = conn_update_label(connp, ixa, &v6dst, 4027 &connp->conn_xmit_ipp); 4028 if (error != 0) { 4029 mutex_exit(&connp->conn_lock); 4030 goto ud_error; 4031 } 4032 /* Rebuild the header template */ 4033 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4034 flowinfo); 4035 if (error != 0) { 4036 mutex_exit(&connp->conn_lock); 4037 goto ud_error; 4038 } 4039 } else if ((connp->conn_xmit_ipp.ipp_fields & 4040 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4041 ipversion != connp->conn_lastipversion || 4042 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4043 /* Rebuild the header template */ 4044 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4045 flowinfo); 4046 if (error != 0) { 4047 mutex_exit(&connp->conn_lock); 4048 goto ud_error; 4049 } 4050 } else { 4051 /* Simply update the destination address if no source route */ 4052 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4053 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4054 4055 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4056 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4057 ipha->ipha_fragment_offset_and_flags |= 4058 IPH_DF_HTONS; 4059 } else { 4060 ipha->ipha_fragment_offset_and_flags &= 4061 ~IPH_DF_HTONS; 4062 } 4063 } else { 4064 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4065 ip6h->ip6_dst = v6dst; 4066 } 4067 } 4068 4069 /* 4070 * Remember the dst/dstport etc which corresponds to the built header 4071 * template and conn_ixa. 4072 */ 4073 oldixa = conn_replace_ixa(connp, ixa); 4074 connp->conn_v6lastdst = v6dst; 4075 connp->conn_lastipversion = ipversion; 4076 connp->conn_lastdstport = dstport; 4077 connp->conn_lastflowinfo = flowinfo; 4078 connp->conn_lastscopeid = ixa->ixa_scopeid; 4079 connp->conn_lastsrcid = srcid; 4080 /* Also remember a source to use together with lastdst */ 4081 connp->conn_v6lastsrc = v6src; 4082 4083 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4084 dstport, flowinfo, &error); 4085 4086 /* Done with conn_t */ 4087 mutex_exit(&connp->conn_lock); 4088 ixa_refrele(oldixa); 4089 4090 if (data_mp == NULL) { 4091 ASSERT(error != 0); 4092 goto ud_error; 4093 } 4094 4095 /* We're done. Pass the packet to ip. */ 4096 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 4097 4098 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 4099 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 4100 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 4101 4102 error = conn_ip_output(data_mp, ixa); 4103 /* No udpOutErrors if an error since IP increases its error counter */ 4104 switch (error) { 4105 case 0: 4106 break; 4107 case EWOULDBLOCK: 4108 (void) ixa_check_drain_insert(connp, ixa); 4109 error = 0; 4110 break; 4111 case EADDRNOTAVAIL: 4112 /* 4113 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4114 * Don't have the application see that errno 4115 */ 4116 error = ENETUNREACH; 4117 /* FALLTHRU */ 4118 default: 4119 mutex_enter(&connp->conn_lock); 4120 /* 4121 * Clear the source and v6lastdst so we call ip_attr_connect 4122 * for the next packet and try to pick a better source. 4123 */ 4124 if (connp->conn_mcbc_bind) 4125 connp->conn_saddr_v6 = ipv6_all_zeros; 4126 else 4127 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4128 connp->conn_v6lastdst = ipv6_all_zeros; 4129 mutex_exit(&connp->conn_lock); 4130 break; 4131 } 4132 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4133 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4134 ixa->ixa_cpid = connp->conn_cpid; 4135 ixa_refrele(ixa); 4136 return (error); 4137 4138 ud_error: 4139 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4140 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4141 ixa->ixa_cpid = connp->conn_cpid; 4142 ixa_refrele(ixa); 4143 4144 freemsg(data_mp); 4145 UDPS_BUMP_MIB(us, udpOutErrors); 4146 UDP_STAT(us, udp_out_err_output); 4147 return (error); 4148 } 4149 4150 /* ARGSUSED */ 4151 static int 4152 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4153 { 4154 #ifdef DEBUG 4155 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4156 #endif 4157 freemsg(mp); 4158 return (0); 4159 } 4160 4161 4162 /* 4163 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4164 */ 4165 static void 4166 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4167 { 4168 void *data; 4169 mblk_t *datamp = mp->b_cont; 4170 conn_t *connp = Q_TO_CONN(q); 4171 udp_t *udp = connp->conn_udp; 4172 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4173 4174 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4175 cmdp->cb_error = EPROTO; 4176 qreply(q, mp); 4177 return; 4178 } 4179 data = datamp->b_rptr; 4180 4181 mutex_enter(&connp->conn_lock); 4182 switch (cmdp->cb_cmd) { 4183 case TI_GETPEERNAME: 4184 if (udp->udp_state != TS_DATA_XFER) 4185 cmdp->cb_error = ENOTCONN; 4186 else 4187 cmdp->cb_error = conn_getpeername(connp, data, 4188 &cmdp->cb_len); 4189 break; 4190 case TI_GETMYNAME: 4191 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4192 break; 4193 default: 4194 cmdp->cb_error = EINVAL; 4195 break; 4196 } 4197 mutex_exit(&connp->conn_lock); 4198 4199 qreply(q, mp); 4200 } 4201 4202 static void 4203 udp_use_pure_tpi(udp_t *udp) 4204 { 4205 conn_t *connp = udp->udp_connp; 4206 4207 mutex_enter(&connp->conn_lock); 4208 udp->udp_issocket = B_FALSE; 4209 mutex_exit(&connp->conn_lock); 4210 UDP_STAT(udp->udp_us, udp_sock_fallback); 4211 } 4212 4213 static void 4214 udp_wput_other(queue_t *q, mblk_t *mp) 4215 { 4216 uchar_t *rptr = mp->b_rptr; 4217 struct iocblk *iocp; 4218 conn_t *connp = Q_TO_CONN(q); 4219 udp_t *udp = connp->conn_udp; 4220 cred_t *cr; 4221 4222 switch (mp->b_datap->db_type) { 4223 case M_CMD: 4224 udp_wput_cmdblk(q, mp); 4225 return; 4226 4227 case M_PROTO: 4228 case M_PCPROTO: 4229 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4230 /* 4231 * If the message does not contain a PRIM_type, 4232 * throw it away. 4233 */ 4234 freemsg(mp); 4235 return; 4236 } 4237 switch (((t_primp_t)rptr)->type) { 4238 case T_ADDR_REQ: 4239 udp_addr_req(q, mp); 4240 return; 4241 case O_T_BIND_REQ: 4242 case T_BIND_REQ: 4243 udp_tpi_bind(q, mp); 4244 return; 4245 case T_CONN_REQ: 4246 udp_tpi_connect(q, mp); 4247 return; 4248 case T_CAPABILITY_REQ: 4249 udp_capability_req(q, mp); 4250 return; 4251 case T_INFO_REQ: 4252 udp_info_req(q, mp); 4253 return; 4254 case T_UNITDATA_REQ: 4255 /* 4256 * If a T_UNITDATA_REQ gets here, the address must 4257 * be bad. Valid T_UNITDATA_REQs are handled 4258 * in udp_wput. 4259 */ 4260 udp_ud_err(q, mp, EADDRNOTAVAIL); 4261 return; 4262 case T_UNBIND_REQ: 4263 udp_tpi_unbind(q, mp); 4264 return; 4265 case T_SVR4_OPTMGMT_REQ: 4266 /* 4267 * All Solaris components should pass a db_credp 4268 * for this TPI message, hence we ASSERT. 4269 * But in case there is some other M_PROTO that looks 4270 * like a TPI message sent by some other kernel 4271 * component, we check and return an error. 4272 */ 4273 cr = msg_getcred(mp, NULL); 4274 ASSERT(cr != NULL); 4275 if (cr == NULL) { 4276 udp_err_ack(q, mp, TSYSERR, EINVAL); 4277 return; 4278 } 4279 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4280 cr)) { 4281 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4282 } 4283 return; 4284 4285 case T_OPTMGMT_REQ: 4286 /* 4287 * All Solaris components should pass a db_credp 4288 * for this TPI message, hence we ASSERT. 4289 * But in case there is some other M_PROTO that looks 4290 * like a TPI message sent by some other kernel 4291 * component, we check and return an error. 4292 */ 4293 cr = msg_getcred(mp, NULL); 4294 ASSERT(cr != NULL); 4295 if (cr == NULL) { 4296 udp_err_ack(q, mp, TSYSERR, EINVAL); 4297 return; 4298 } 4299 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4300 return; 4301 4302 case T_DISCON_REQ: 4303 udp_tpi_disconnect(q, mp); 4304 return; 4305 4306 /* The following TPI message is not supported by udp. */ 4307 case O_T_CONN_RES: 4308 case T_CONN_RES: 4309 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4310 return; 4311 4312 /* The following 3 TPI requests are illegal for udp. */ 4313 case T_DATA_REQ: 4314 case T_EXDATA_REQ: 4315 case T_ORDREL_REQ: 4316 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4317 return; 4318 default: 4319 break; 4320 } 4321 break; 4322 case M_FLUSH: 4323 if (*rptr & FLUSHW) 4324 flushq(q, FLUSHDATA); 4325 break; 4326 case M_IOCTL: 4327 iocp = (struct iocblk *)mp->b_rptr; 4328 switch (iocp->ioc_cmd) { 4329 case TI_GETPEERNAME: 4330 if (udp->udp_state != TS_DATA_XFER) { 4331 /* 4332 * If a default destination address has not 4333 * been associated with the stream, then we 4334 * don't know the peer's name. 4335 */ 4336 iocp->ioc_error = ENOTCONN; 4337 iocp->ioc_count = 0; 4338 mp->b_datap->db_type = M_IOCACK; 4339 qreply(q, mp); 4340 return; 4341 } 4342 /* FALLTHRU */ 4343 case TI_GETMYNAME: 4344 /* 4345 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4346 * need to copyin the user's strbuf structure. 4347 * Processing will continue in the M_IOCDATA case 4348 * below. 4349 */ 4350 mi_copyin(q, mp, NULL, 4351 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4352 return; 4353 case _SIOCSOCKFALLBACK: 4354 /* 4355 * Either sockmod is about to be popped and the 4356 * socket would now be treated as a plain stream, 4357 * or a module is about to be pushed so we have 4358 * to follow pure TPI semantics. 4359 */ 4360 if (!udp->udp_issocket) { 4361 DB_TYPE(mp) = M_IOCNAK; 4362 iocp->ioc_error = EINVAL; 4363 } else { 4364 udp_use_pure_tpi(udp); 4365 4366 DB_TYPE(mp) = M_IOCACK; 4367 iocp->ioc_error = 0; 4368 } 4369 iocp->ioc_count = 0; 4370 iocp->ioc_rval = 0; 4371 qreply(q, mp); 4372 return; 4373 default: 4374 break; 4375 } 4376 break; 4377 case M_IOCDATA: 4378 udp_wput_iocdata(q, mp); 4379 return; 4380 default: 4381 /* Unrecognized messages are passed through without change. */ 4382 break; 4383 } 4384 ip_wput_nondata(q, mp); 4385 } 4386 4387 /* 4388 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4389 * messages. 4390 */ 4391 static void 4392 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4393 { 4394 mblk_t *mp1; 4395 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4396 STRUCT_HANDLE(strbuf, sb); 4397 uint_t addrlen; 4398 conn_t *connp = Q_TO_CONN(q); 4399 udp_t *udp = connp->conn_udp; 4400 4401 /* Make sure it is one of ours. */ 4402 switch (iocp->ioc_cmd) { 4403 case TI_GETMYNAME: 4404 case TI_GETPEERNAME: 4405 break; 4406 default: 4407 ip_wput_nondata(q, mp); 4408 return; 4409 } 4410 4411 switch (mi_copy_state(q, mp, &mp1)) { 4412 case -1: 4413 return; 4414 case MI_COPY_CASE(MI_COPY_IN, 1): 4415 break; 4416 case MI_COPY_CASE(MI_COPY_OUT, 1): 4417 /* 4418 * The address has been copied out, so now 4419 * copyout the strbuf. 4420 */ 4421 mi_copyout(q, mp); 4422 return; 4423 case MI_COPY_CASE(MI_COPY_OUT, 2): 4424 /* 4425 * The address and strbuf have been copied out. 4426 * We're done, so just acknowledge the original 4427 * M_IOCTL. 4428 */ 4429 mi_copy_done(q, mp, 0); 4430 return; 4431 default: 4432 /* 4433 * Something strange has happened, so acknowledge 4434 * the original M_IOCTL with an EPROTO error. 4435 */ 4436 mi_copy_done(q, mp, EPROTO); 4437 return; 4438 } 4439 4440 /* 4441 * Now we have the strbuf structure for TI_GETMYNAME 4442 * and TI_GETPEERNAME. Next we copyout the requested 4443 * address and then we'll copyout the strbuf. 4444 */ 4445 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4446 4447 if (connp->conn_family == AF_INET) 4448 addrlen = sizeof (sin_t); 4449 else 4450 addrlen = sizeof (sin6_t); 4451 4452 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4453 mi_copy_done(q, mp, EINVAL); 4454 return; 4455 } 4456 4457 switch (iocp->ioc_cmd) { 4458 case TI_GETMYNAME: 4459 break; 4460 case TI_GETPEERNAME: 4461 if (udp->udp_state != TS_DATA_XFER) { 4462 mi_copy_done(q, mp, ENOTCONN); 4463 return; 4464 } 4465 break; 4466 } 4467 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4468 if (!mp1) 4469 return; 4470 4471 STRUCT_FSET(sb, len, addrlen); 4472 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4473 case TI_GETMYNAME: 4474 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4475 &addrlen); 4476 break; 4477 case TI_GETPEERNAME: 4478 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4479 &addrlen); 4480 break; 4481 } 4482 mp1->b_wptr += addrlen; 4483 /* Copy out the address */ 4484 mi_copyout(q, mp); 4485 } 4486 4487 void 4488 udp_ddi_g_init(void) 4489 { 4490 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4491 udp_opt_obj.odb_opt_arr_cnt); 4492 4493 /* 4494 * We want to be informed each time a stack is created or 4495 * destroyed in the kernel, so we can maintain the 4496 * set of udp_stack_t's. 4497 */ 4498 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4499 } 4500 4501 void 4502 udp_ddi_g_destroy(void) 4503 { 4504 netstack_unregister(NS_UDP); 4505 } 4506 4507 #define INET_NAME "ip" 4508 4509 /* 4510 * Initialize the UDP stack instance. 4511 */ 4512 static void * 4513 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4514 { 4515 udp_stack_t *us; 4516 int i; 4517 int error = 0; 4518 major_t major; 4519 size_t arrsz; 4520 4521 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4522 us->us_netstack = ns; 4523 4524 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4525 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4526 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4527 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4528 4529 /* 4530 * The smallest anonymous port in the priviledged port range which UDP 4531 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4532 */ 4533 us->us_min_anonpriv_port = 512; 4534 4535 us->us_bind_fanout_size = udp_bind_fanout_size; 4536 4537 /* Roundup variable that might have been modified in /etc/system */ 4538 if (!ISP2(us->us_bind_fanout_size)) { 4539 /* Not a power of two. Round up to nearest power of two */ 4540 for (i = 0; i < 31; i++) { 4541 if (us->us_bind_fanout_size < (1 << i)) 4542 break; 4543 } 4544 us->us_bind_fanout_size = 1 << i; 4545 } 4546 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4547 sizeof (udp_fanout_t), KM_SLEEP); 4548 for (i = 0; i < us->us_bind_fanout_size; i++) { 4549 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4550 NULL); 4551 } 4552 4553 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4554 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4555 KM_SLEEP); 4556 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4557 4558 /* Allocate the per netstack stats */ 4559 mutex_enter(&cpu_lock); 4560 us->us_sc_cnt = MAX(ncpus, boot_ncpus); 4561 mutex_exit(&cpu_lock); 4562 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), 4563 KM_SLEEP); 4564 for (i = 0; i < us->us_sc_cnt; i++) { 4565 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4566 KM_SLEEP); 4567 } 4568 4569 us->us_kstat = udp_kstat2_init(stackid); 4570 us->us_mibkp = udp_kstat_init(stackid); 4571 4572 major = mod_name_to_major(INET_NAME); 4573 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4574 ASSERT(error == 0); 4575 return (us); 4576 } 4577 4578 /* 4579 * Free the UDP stack instance. 4580 */ 4581 static void 4582 udp_stack_fini(netstackid_t stackid, void *arg) 4583 { 4584 udp_stack_t *us = (udp_stack_t *)arg; 4585 int i; 4586 4587 for (i = 0; i < us->us_bind_fanout_size; i++) { 4588 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4589 } 4590 4591 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4592 sizeof (udp_fanout_t)); 4593 4594 us->us_bind_fanout = NULL; 4595 4596 for (i = 0; i < us->us_sc_cnt; i++) 4597 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); 4598 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); 4599 4600 kmem_free(us->us_propinfo_tbl, 4601 udp_propinfo_count * sizeof (mod_prop_info_t)); 4602 us->us_propinfo_tbl = NULL; 4603 4604 udp_kstat_fini(stackid, us->us_mibkp); 4605 us->us_mibkp = NULL; 4606 4607 udp_kstat2_fini(stackid, us->us_kstat); 4608 us->us_kstat = NULL; 4609 4610 mutex_destroy(&us->us_epriv_port_lock); 4611 ldi_ident_release(us->us_ldi_ident); 4612 kmem_free(us, sizeof (*us)); 4613 } 4614 4615 static size_t 4616 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4617 { 4618 udp_stack_t *us = udp->udp_us; 4619 4620 /* We add a bit of extra buffering */ 4621 size += size >> 1; 4622 if (size > us->us_max_buf) 4623 size = us->us_max_buf; 4624 4625 udp->udp_rcv_hiwat = size; 4626 return (size); 4627 } 4628 4629 /* 4630 * For the lower queue so that UDP can be a dummy mux. 4631 * Nobody should be sending 4632 * packets up this stream 4633 */ 4634 static int 4635 udp_lrput(queue_t *q, mblk_t *mp) 4636 { 4637 switch (mp->b_datap->db_type) { 4638 case M_FLUSH: 4639 /* Turn around */ 4640 if (*mp->b_rptr & FLUSHW) { 4641 *mp->b_rptr &= ~FLUSHR; 4642 qreply(q, mp); 4643 return (0); 4644 } 4645 break; 4646 } 4647 freemsg(mp); 4648 return (0); 4649 } 4650 4651 /* 4652 * For the lower queue so that UDP can be a dummy mux. 4653 * Nobody should be sending packets down this stream. 4654 */ 4655 /* ARGSUSED */ 4656 int 4657 udp_lwput(queue_t *q, mblk_t *mp) 4658 { 4659 freemsg(mp); 4660 return (0); 4661 } 4662 4663 /* 4664 * When a CPU is added, we need to allocate the per CPU stats struct. 4665 */ 4666 void 4667 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) 4668 { 4669 int i; 4670 4671 if (cpu_seqid < us->us_sc_cnt) 4672 return; 4673 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { 4674 ASSERT(us->us_sc[i] == NULL); 4675 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4676 KM_SLEEP); 4677 } 4678 membar_producer(); 4679 us->us_sc_cnt = cpu_seqid + 1; 4680 } 4681 4682 /* 4683 * Below routines for UDP socket module. 4684 */ 4685 4686 static conn_t * 4687 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4688 { 4689 udp_t *udp; 4690 conn_t *connp; 4691 zoneid_t zoneid; 4692 netstack_t *ns; 4693 udp_stack_t *us; 4694 int len; 4695 4696 ASSERT(errorp != NULL); 4697 4698 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4699 return (NULL); 4700 4701 ns = netstack_find_by_cred(credp); 4702 ASSERT(ns != NULL); 4703 us = ns->netstack_udp; 4704 ASSERT(us != NULL); 4705 4706 /* 4707 * For exclusive stacks we set the zoneid to zero 4708 * to make UDP operate as if in the global zone. 4709 */ 4710 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4711 zoneid = GLOBAL_ZONEID; 4712 else 4713 zoneid = crgetzoneid(credp); 4714 4715 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4716 4717 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4718 if (connp == NULL) { 4719 netstack_rele(ns); 4720 *errorp = ENOMEM; 4721 return (NULL); 4722 } 4723 udp = connp->conn_udp; 4724 4725 /* 4726 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4727 * done by netstack_find_by_cred() 4728 */ 4729 netstack_rele(ns); 4730 4731 /* 4732 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4733 * need to lock anything. 4734 */ 4735 ASSERT(connp->conn_proto == IPPROTO_UDP); 4736 ASSERT(connp->conn_udp == udp); 4737 ASSERT(udp->udp_connp == connp); 4738 4739 /* Set the initial state of the stream and the privilege status. */ 4740 udp->udp_state = TS_UNBND; 4741 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4742 if (isv6) { 4743 connp->conn_family = AF_INET6; 4744 connp->conn_ipversion = IPV6_VERSION; 4745 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4746 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4747 len = sizeof (ip6_t) + UDPH_SIZE; 4748 } else { 4749 connp->conn_family = AF_INET; 4750 connp->conn_ipversion = IPV4_VERSION; 4751 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4752 connp->conn_default_ttl = us->us_ipv4_ttl; 4753 len = sizeof (ipha_t) + UDPH_SIZE; 4754 } 4755 4756 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4757 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4758 4759 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4760 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4761 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4762 connp->conn_ixa->ixa_zoneid = zoneid; 4763 4764 connp->conn_zoneid = zoneid; 4765 4766 /* 4767 * If the caller has the process-wide flag set, then default to MAC 4768 * exempt mode. This allows read-down to unlabeled hosts. 4769 */ 4770 if (getpflags(NET_MAC_AWARE, credp) != 0) 4771 connp->conn_mac_mode = CONN_MAC_AWARE; 4772 4773 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4774 4775 udp->udp_us = us; 4776 4777 connp->conn_rcvbuf = us->us_recv_hiwat; 4778 connp->conn_sndbuf = us->us_xmit_hiwat; 4779 connp->conn_sndlowat = us->us_xmit_lowat; 4780 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4781 4782 connp->conn_wroff = len + us->us_wroff_extra; 4783 connp->conn_so_type = SOCK_DGRAM; 4784 4785 connp->conn_recv = udp_input; 4786 connp->conn_recvicmp = udp_icmp_input; 4787 crhold(credp); 4788 connp->conn_cred = credp; 4789 connp->conn_cpid = curproc->p_pid; 4790 connp->conn_open_time = ddi_get_lbolt64(); 4791 /* Cache things in ixa without an extra refhold */ 4792 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 4793 connp->conn_ixa->ixa_cred = connp->conn_cred; 4794 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 4795 if (is_system_labeled()) 4796 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 4797 4798 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 4799 4800 if (us->us_pmtu_discovery) 4801 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 4802 4803 return (connp); 4804 } 4805 4806 sock_lower_handle_t 4807 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 4808 uint_t *smodep, int *errorp, int flags, cred_t *credp) 4809 { 4810 udp_t *udp = NULL; 4811 udp_stack_t *us; 4812 conn_t *connp; 4813 boolean_t isv6; 4814 4815 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 4816 (proto != 0 && proto != IPPROTO_UDP)) { 4817 *errorp = EPROTONOSUPPORT; 4818 return (NULL); 4819 } 4820 4821 if (family == AF_INET6) 4822 isv6 = B_TRUE; 4823 else 4824 isv6 = B_FALSE; 4825 4826 connp = udp_do_open(credp, isv6, flags, errorp); 4827 if (connp == NULL) 4828 return (NULL); 4829 4830 udp = connp->conn_udp; 4831 ASSERT(udp != NULL); 4832 us = udp->udp_us; 4833 ASSERT(us != NULL); 4834 4835 udp->udp_issocket = B_TRUE; 4836 connp->conn_flags |= IPCL_NONSTR; 4837 4838 /* 4839 * Set flow control 4840 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4841 * need to lock anything. 4842 */ 4843 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 4844 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 4845 4846 connp->conn_flow_cntrld = B_FALSE; 4847 4848 mutex_enter(&connp->conn_lock); 4849 connp->conn_state_flags &= ~CONN_INCIPIENT; 4850 mutex_exit(&connp->conn_lock); 4851 4852 *errorp = 0; 4853 *smodep = SM_ATOMIC; 4854 *sock_downcalls = &sock_udp_downcalls; 4855 return ((sock_lower_handle_t)connp); 4856 } 4857 4858 /* ARGSUSED3 */ 4859 void 4860 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 4861 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 4862 { 4863 conn_t *connp = (conn_t *)proto_handle; 4864 struct sock_proto_props sopp; 4865 4866 /* All Solaris components should pass a cred for this operation. */ 4867 ASSERT(cr != NULL); 4868 4869 connp->conn_upcalls = sock_upcalls; 4870 connp->conn_upper_handle = sock_handle; 4871 4872 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 4873 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 4874 sopp.sopp_wroff = connp->conn_wroff; 4875 sopp.sopp_maxblk = INFPSZ; 4876 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 4877 sopp.sopp_rxlowat = connp->conn_rcvlowat; 4878 sopp.sopp_maxaddrlen = sizeof (sin6_t); 4879 sopp.sopp_maxpsz = 4880 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 4881 UDP_MAXPACKET_IPV6; 4882 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 4883 udp_mod_info.mi_minpsz; 4884 4885 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 4886 &sopp); 4887 } 4888 4889 static void 4890 udp_do_close(conn_t *connp) 4891 { 4892 udp_t *udp; 4893 4894 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 4895 udp = connp->conn_udp; 4896 4897 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 4898 /* 4899 * Running in cluster mode - register unbind information 4900 */ 4901 if (connp->conn_ipversion == IPV4_VERSION) { 4902 (*cl_inet_unbind)( 4903 connp->conn_netstack->netstack_stackid, 4904 IPPROTO_UDP, AF_INET, 4905 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 4906 (in_port_t)connp->conn_lport, NULL); 4907 } else { 4908 (*cl_inet_unbind)( 4909 connp->conn_netstack->netstack_stackid, 4910 IPPROTO_UDP, AF_INET6, 4911 (uint8_t *)&(connp->conn_laddr_v6), 4912 (in_port_t)connp->conn_lport, NULL); 4913 } 4914 } 4915 4916 udp_bind_hash_remove(udp, B_FALSE); 4917 4918 ip_quiesce_conn(connp); 4919 4920 if (!IPCL_IS_NONSTR(connp)) { 4921 ASSERT(connp->conn_wq != NULL); 4922 ASSERT(connp->conn_rq != NULL); 4923 qprocsoff(connp->conn_rq); 4924 } 4925 4926 udp_close_free(connp); 4927 4928 /* 4929 * Now we are truly single threaded on this stream, and can 4930 * delete the things hanging off the connp, and finally the connp. 4931 * We removed this connp from the fanout list, it cannot be 4932 * accessed thru the fanouts, and we already waited for the 4933 * conn_ref to drop to 0. We are already in close, so 4934 * there cannot be any other thread from the top. qprocsoff 4935 * has completed, and service has completed or won't run in 4936 * future. 4937 */ 4938 ASSERT(connp->conn_ref == 1); 4939 4940 if (!IPCL_IS_NONSTR(connp)) { 4941 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 4942 } else { 4943 ip_free_helper_stream(connp); 4944 } 4945 4946 connp->conn_ref--; 4947 ipcl_conn_destroy(connp); 4948 } 4949 4950 /* ARGSUSED1 */ 4951 int 4952 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 4953 { 4954 conn_t *connp = (conn_t *)proto_handle; 4955 4956 /* All Solaris components should pass a cred for this operation. */ 4957 ASSERT(cr != NULL); 4958 4959 udp_do_close(connp); 4960 return (0); 4961 } 4962 4963 static int 4964 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 4965 boolean_t bind_to_req_port_only) 4966 { 4967 sin_t *sin; 4968 sin6_t *sin6; 4969 udp_t *udp = connp->conn_udp; 4970 int error = 0; 4971 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 4972 in_port_t port; /* Host byte order */ 4973 in_port_t requested_port; /* Host byte order */ 4974 int count; 4975 ipaddr_t v4src; /* Set if AF_INET */ 4976 in6_addr_t v6src; 4977 int loopmax; 4978 udp_fanout_t *udpf; 4979 in_port_t lport; /* Network byte order */ 4980 uint_t scopeid = 0; 4981 zoneid_t zoneid = IPCL_ZONEID(connp); 4982 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4983 boolean_t is_inaddr_any; 4984 mlp_type_t addrtype, mlptype; 4985 udp_stack_t *us = udp->udp_us; 4986 4987 sin = NULL; 4988 sin6 = NULL; 4989 switch (len) { 4990 case sizeof (sin_t): /* Complete IPv4 address */ 4991 sin = (sin_t *)sa; 4992 4993 if (sin == NULL || !OK_32PTR((char *)sin)) 4994 return (EINVAL); 4995 4996 if (connp->conn_family != AF_INET || 4997 sin->sin_family != AF_INET) { 4998 return (EAFNOSUPPORT); 4999 } 5000 v4src = sin->sin_addr.s_addr; 5001 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5002 if (v4src != INADDR_ANY) { 5003 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5004 B_TRUE); 5005 } 5006 port = ntohs(sin->sin_port); 5007 break; 5008 5009 case sizeof (sin6_t): /* complete IPv6 address */ 5010 sin6 = (sin6_t *)sa; 5011 5012 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5013 return (EINVAL); 5014 5015 if (connp->conn_family != AF_INET6 || 5016 sin6->sin6_family != AF_INET6) { 5017 return (EAFNOSUPPORT); 5018 } 5019 v6src = sin6->sin6_addr; 5020 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5021 if (connp->conn_ipv6_v6only) 5022 return (EADDRNOTAVAIL); 5023 5024 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5025 if (v4src != INADDR_ANY) { 5026 laddr_type = ip_laddr_verify_v4(v4src, 5027 zoneid, ipst, B_FALSE); 5028 } 5029 } else { 5030 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5031 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5032 scopeid = sin6->sin6_scope_id; 5033 laddr_type = ip_laddr_verify_v6(&v6src, 5034 zoneid, ipst, B_TRUE, scopeid); 5035 } 5036 } 5037 port = ntohs(sin6->sin6_port); 5038 break; 5039 5040 default: /* Invalid request */ 5041 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5042 "udp_bind: bad ADDR_length length %u", len); 5043 return (-TBADADDR); 5044 } 5045 5046 /* Is the local address a valid unicast, multicast, or broadcast? */ 5047 if (laddr_type == IPVL_BAD) 5048 return (EADDRNOTAVAIL); 5049 5050 requested_port = port; 5051 5052 if (requested_port == 0 || !bind_to_req_port_only) 5053 bind_to_req_port_only = B_FALSE; 5054 else /* T_BIND_REQ and requested_port != 0 */ 5055 bind_to_req_port_only = B_TRUE; 5056 5057 if (requested_port == 0) { 5058 /* 5059 * If the application passed in zero for the port number, it 5060 * doesn't care which port number we bind to. Get one in the 5061 * valid range. 5062 */ 5063 if (connp->conn_anon_priv_bind) { 5064 port = udp_get_next_priv_port(udp); 5065 } else { 5066 port = udp_update_next_port(udp, 5067 us->us_next_port_to_try, B_TRUE); 5068 } 5069 } else { 5070 /* 5071 * If the port is in the well-known privileged range, 5072 * make sure the caller was privileged. 5073 */ 5074 int i; 5075 boolean_t priv = B_FALSE; 5076 5077 if (port < us->us_smallest_nonpriv_port) { 5078 priv = B_TRUE; 5079 } else { 5080 for (i = 0; i < us->us_num_epriv_ports; i++) { 5081 if (port == us->us_epriv_ports[i]) { 5082 priv = B_TRUE; 5083 break; 5084 } 5085 } 5086 } 5087 5088 if (priv) { 5089 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5090 return (-TACCES); 5091 } 5092 } 5093 5094 if (port == 0) 5095 return (-TNOADDR); 5096 5097 /* 5098 * The state must be TS_UNBND. TPI mandates that users must send 5099 * TPI primitives only 1 at a time and wait for the response before 5100 * sending the next primitive. 5101 */ 5102 mutex_enter(&connp->conn_lock); 5103 if (udp->udp_state != TS_UNBND) { 5104 mutex_exit(&connp->conn_lock); 5105 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5106 "udp_bind: bad state, %u", udp->udp_state); 5107 return (-TOUTSTATE); 5108 } 5109 /* 5110 * Copy the source address into our udp structure. This address 5111 * may still be zero; if so, IP will fill in the correct address 5112 * each time an outbound packet is passed to it. Since the udp is 5113 * not yet in the bind hash list, we don't grab the uf_lock to 5114 * change conn_ipversion 5115 */ 5116 if (connp->conn_family == AF_INET) { 5117 ASSERT(sin != NULL); 5118 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5119 } else { 5120 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5121 /* 5122 * no need to hold the uf_lock to set the conn_ipversion 5123 * since we are not yet in the fanout list 5124 */ 5125 connp->conn_ipversion = IPV4_VERSION; 5126 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5127 } else { 5128 connp->conn_ipversion = IPV6_VERSION; 5129 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5130 } 5131 } 5132 5133 /* 5134 * If conn_reuseaddr is not set, then we have to make sure that 5135 * the IP address and port number the application requested 5136 * (or we selected for the application) is not being used by 5137 * another stream. If another stream is already using the 5138 * requested IP address and port, the behavior depends on 5139 * "bind_to_req_port_only". If set the bind fails; otherwise we 5140 * search for any unused port to bind to the stream. 5141 * 5142 * As per the BSD semantics, as modified by the Deering multicast 5143 * changes, if conn_reuseaddr is set, then we allow multiple binds 5144 * to the same port independent of the local IP address. 5145 * 5146 * This is slightly different than in SunOS 4.X which did not 5147 * support IP multicast. Note that the change implemented by the 5148 * Deering multicast code effects all binds - not only binding 5149 * to IP multicast addresses. 5150 * 5151 * Note that when binding to port zero we ignore SO_REUSEADDR in 5152 * order to guarantee a unique port. 5153 */ 5154 5155 count = 0; 5156 if (connp->conn_anon_priv_bind) { 5157 /* 5158 * loopmax = (IPPORT_RESERVED-1) - 5159 * us->us_min_anonpriv_port + 1 5160 */ 5161 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5162 } else { 5163 loopmax = us->us_largest_anon_port - 5164 us->us_smallest_anon_port + 1; 5165 } 5166 5167 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5168 5169 for (;;) { 5170 udp_t *udp1; 5171 boolean_t found_exclbind = B_FALSE; 5172 conn_t *connp1; 5173 5174 /* 5175 * Walk through the list of udp streams bound to 5176 * requested port with the same IP address. 5177 */ 5178 lport = htons(port); 5179 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5180 us->us_bind_fanout_size)]; 5181 mutex_enter(&udpf->uf_lock); 5182 for (udp1 = udpf->uf_udp; udp1 != NULL; 5183 udp1 = udp1->udp_bind_hash) { 5184 connp1 = udp1->udp_connp; 5185 5186 if (lport != connp1->conn_lport) 5187 continue; 5188 5189 /* 5190 * On a labeled system, we must treat bindings to ports 5191 * on shared IP addresses by sockets with MAC exemption 5192 * privilege as being in all zones, as there's 5193 * otherwise no way to identify the right receiver. 5194 */ 5195 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5196 continue; 5197 5198 /* 5199 * If UDP_EXCLBIND is set for either the bound or 5200 * binding endpoint, the semantics of bind 5201 * is changed according to the following chart. 5202 * 5203 * spec = specified address (v4 or v6) 5204 * unspec = unspecified address (v4 or v6) 5205 * A = specified addresses are different for endpoints 5206 * 5207 * bound bind to allowed? 5208 * ------------------------------------- 5209 * unspec unspec no 5210 * unspec spec no 5211 * spec unspec no 5212 * spec spec yes if A 5213 * 5214 * For labeled systems, SO_MAC_EXEMPT behaves the same 5215 * as UDP_EXCLBIND, except that zoneid is ignored. 5216 */ 5217 if (connp1->conn_exclbind || connp->conn_exclbind || 5218 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5219 if (V6_OR_V4_INADDR_ANY( 5220 connp1->conn_bound_addr_v6) || 5221 is_inaddr_any || 5222 IN6_ARE_ADDR_EQUAL( 5223 &connp1->conn_bound_addr_v6, 5224 &v6src)) { 5225 found_exclbind = B_TRUE; 5226 break; 5227 } 5228 continue; 5229 } 5230 5231 /* 5232 * Check ipversion to allow IPv4 and IPv6 sockets to 5233 * have disjoint port number spaces. 5234 */ 5235 if (connp->conn_ipversion != connp1->conn_ipversion) { 5236 5237 /* 5238 * On the first time through the loop, if the 5239 * the user intentionally specified a 5240 * particular port number, then ignore any 5241 * bindings of the other protocol that may 5242 * conflict. This allows the user to bind IPv6 5243 * alone and get both v4 and v6, or bind both 5244 * both and get each seperately. On subsequent 5245 * times through the loop, we're checking a 5246 * port that we chose (not the user) and thus 5247 * we do not allow casual duplicate bindings. 5248 */ 5249 if (count == 0 && requested_port != 0) 5250 continue; 5251 } 5252 5253 /* 5254 * No difference depending on SO_REUSEADDR. 5255 * 5256 * If existing port is bound to a 5257 * non-wildcard IP address and 5258 * the requesting stream is bound to 5259 * a distinct different IP addresses 5260 * (non-wildcard, also), keep going. 5261 */ 5262 if (!is_inaddr_any && 5263 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5264 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5265 &v6src)) { 5266 continue; 5267 } 5268 break; 5269 } 5270 5271 if (!found_exclbind && 5272 (connp->conn_reuseaddr && requested_port != 0)) { 5273 break; 5274 } 5275 5276 if (udp1 == NULL) { 5277 /* 5278 * No other stream has this IP address 5279 * and port number. We can use it. 5280 */ 5281 break; 5282 } 5283 mutex_exit(&udpf->uf_lock); 5284 if (bind_to_req_port_only) { 5285 /* 5286 * We get here only when requested port 5287 * is bound (and only first of the for() 5288 * loop iteration). 5289 * 5290 * The semantics of this bind request 5291 * require it to fail so we return from 5292 * the routine (and exit the loop). 5293 * 5294 */ 5295 mutex_exit(&connp->conn_lock); 5296 return (-TADDRBUSY); 5297 } 5298 5299 if (connp->conn_anon_priv_bind) { 5300 port = udp_get_next_priv_port(udp); 5301 } else { 5302 if ((count == 0) && (requested_port != 0)) { 5303 /* 5304 * If the application wants us to find 5305 * a port, get one to start with. Set 5306 * requested_port to 0, so that we will 5307 * update us->us_next_port_to_try below. 5308 */ 5309 port = udp_update_next_port(udp, 5310 us->us_next_port_to_try, B_TRUE); 5311 requested_port = 0; 5312 } else { 5313 port = udp_update_next_port(udp, port + 1, 5314 B_FALSE); 5315 } 5316 } 5317 5318 if (port == 0 || ++count >= loopmax) { 5319 /* 5320 * We've tried every possible port number and 5321 * there are none available, so send an error 5322 * to the user. 5323 */ 5324 mutex_exit(&connp->conn_lock); 5325 return (-TNOADDR); 5326 } 5327 } 5328 5329 /* 5330 * Copy the source address into our udp structure. This address 5331 * may still be zero; if so, ip_attr_connect will fill in the correct 5332 * address when a packet is about to be sent. 5333 * If we are binding to a broadcast or multicast address then 5334 * we just set the conn_bound_addr since we don't want to use 5335 * that as the source address when sending. 5336 */ 5337 connp->conn_bound_addr_v6 = v6src; 5338 connp->conn_laddr_v6 = v6src; 5339 if (scopeid != 0) { 5340 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5341 connp->conn_ixa->ixa_scopeid = scopeid; 5342 connp->conn_incoming_ifindex = scopeid; 5343 } else { 5344 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5345 connp->conn_incoming_ifindex = connp->conn_bound_if; 5346 } 5347 5348 switch (laddr_type) { 5349 case IPVL_UNICAST_UP: 5350 case IPVL_UNICAST_DOWN: 5351 connp->conn_saddr_v6 = v6src; 5352 connp->conn_mcbc_bind = B_FALSE; 5353 break; 5354 case IPVL_MCAST: 5355 case IPVL_BCAST: 5356 /* ip_set_destination will pick a source address later */ 5357 connp->conn_saddr_v6 = ipv6_all_zeros; 5358 connp->conn_mcbc_bind = B_TRUE; 5359 break; 5360 } 5361 5362 /* Any errors after this point should use late_error */ 5363 connp->conn_lport = lport; 5364 5365 /* 5366 * Now reset the next anonymous port if the application requested 5367 * an anonymous port, or we handed out the next anonymous port. 5368 */ 5369 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5370 us->us_next_port_to_try = port + 1; 5371 } 5372 5373 /* Initialize the T_BIND_ACK. */ 5374 if (connp->conn_family == AF_INET) { 5375 sin->sin_port = connp->conn_lport; 5376 } else { 5377 sin6->sin6_port = connp->conn_lport; 5378 } 5379 udp->udp_state = TS_IDLE; 5380 udp_bind_hash_insert(udpf, udp); 5381 mutex_exit(&udpf->uf_lock); 5382 mutex_exit(&connp->conn_lock); 5383 5384 if (cl_inet_bind) { 5385 /* 5386 * Running in cluster mode - register bind information 5387 */ 5388 if (connp->conn_ipversion == IPV4_VERSION) { 5389 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5390 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5391 (in_port_t)connp->conn_lport, NULL); 5392 } else { 5393 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5394 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5395 (in_port_t)connp->conn_lport, NULL); 5396 } 5397 } 5398 5399 mutex_enter(&connp->conn_lock); 5400 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5401 if (is_system_labeled() && (!connp->conn_anon_port || 5402 connp->conn_anon_mlp)) { 5403 uint16_t mlpport; 5404 zone_t *zone; 5405 5406 zone = crgetzone(cr); 5407 connp->conn_mlp_type = 5408 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5409 mlptSingle; 5410 addrtype = tsol_mlp_addr_type( 5411 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5412 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5413 if (addrtype == mlptSingle) { 5414 error = -TNOADDR; 5415 mutex_exit(&connp->conn_lock); 5416 goto late_error; 5417 } 5418 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5419 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5420 addrtype); 5421 5422 /* 5423 * It is a coding error to attempt to bind an MLP port 5424 * without first setting SOL_SOCKET/SCM_UCRED. 5425 */ 5426 if (mlptype != mlptSingle && 5427 connp->conn_mlp_type == mlptSingle) { 5428 error = EINVAL; 5429 mutex_exit(&connp->conn_lock); 5430 goto late_error; 5431 } 5432 5433 /* 5434 * It is an access violation to attempt to bind an MLP port 5435 * without NET_BINDMLP privilege. 5436 */ 5437 if (mlptype != mlptSingle && 5438 secpolicy_net_bindmlp(cr) != 0) { 5439 if (connp->conn_debug) { 5440 (void) strlog(UDP_MOD_ID, 0, 1, 5441 SL_ERROR|SL_TRACE, 5442 "udp_bind: no priv for multilevel port %d", 5443 mlpport); 5444 } 5445 error = -TACCES; 5446 mutex_exit(&connp->conn_lock); 5447 goto late_error; 5448 } 5449 5450 /* 5451 * If we're specifically binding a shared IP address and the 5452 * port is MLP on shared addresses, then check to see if this 5453 * zone actually owns the MLP. Reject if not. 5454 */ 5455 if (mlptype == mlptShared && addrtype == mlptShared) { 5456 /* 5457 * No need to handle exclusive-stack zones since 5458 * ALL_ZONES only applies to the shared stack. 5459 */ 5460 zoneid_t mlpzone; 5461 5462 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5463 htons(mlpport)); 5464 if (connp->conn_zoneid != mlpzone) { 5465 if (connp->conn_debug) { 5466 (void) strlog(UDP_MOD_ID, 0, 1, 5467 SL_ERROR|SL_TRACE, 5468 "udp_bind: attempt to bind port " 5469 "%d on shared addr in zone %d " 5470 "(should be %d)", 5471 mlpport, connp->conn_zoneid, 5472 mlpzone); 5473 } 5474 error = -TACCES; 5475 mutex_exit(&connp->conn_lock); 5476 goto late_error; 5477 } 5478 } 5479 if (connp->conn_anon_port) { 5480 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5481 port, B_TRUE); 5482 if (error != 0) { 5483 if (connp->conn_debug) { 5484 (void) strlog(UDP_MOD_ID, 0, 1, 5485 SL_ERROR|SL_TRACE, 5486 "udp_bind: cannot establish anon " 5487 "MLP for port %d", port); 5488 } 5489 error = -TACCES; 5490 mutex_exit(&connp->conn_lock); 5491 goto late_error; 5492 } 5493 } 5494 connp->conn_mlp_type = mlptype; 5495 } 5496 5497 /* 5498 * We create an initial header template here to make a subsequent 5499 * sendto have a starting point. Since conn_last_dst is zero the 5500 * first sendto will always follow the 'dst changed' code path. 5501 * Note that we defer massaging options and the related checksum 5502 * adjustment until we have a destination address. 5503 */ 5504 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5505 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5506 if (error != 0) { 5507 mutex_exit(&connp->conn_lock); 5508 goto late_error; 5509 } 5510 /* Just in case */ 5511 connp->conn_faddr_v6 = ipv6_all_zeros; 5512 connp->conn_fport = 0; 5513 connp->conn_v6lastdst = ipv6_all_zeros; 5514 mutex_exit(&connp->conn_lock); 5515 5516 error = ip_laddr_fanout_insert(connp); 5517 if (error != 0) 5518 goto late_error; 5519 5520 /* Bind succeeded */ 5521 return (0); 5522 5523 late_error: 5524 /* We had already picked the port number, and then the bind failed */ 5525 mutex_enter(&connp->conn_lock); 5526 udpf = &us->us_bind_fanout[ 5527 UDP_BIND_HASH(connp->conn_lport, 5528 us->us_bind_fanout_size)]; 5529 mutex_enter(&udpf->uf_lock); 5530 connp->conn_saddr_v6 = ipv6_all_zeros; 5531 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5532 connp->conn_laddr_v6 = ipv6_all_zeros; 5533 if (scopeid != 0) { 5534 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5535 connp->conn_incoming_ifindex = connp->conn_bound_if; 5536 } 5537 udp->udp_state = TS_UNBND; 5538 udp_bind_hash_remove(udp, B_TRUE); 5539 connp->conn_lport = 0; 5540 mutex_exit(&udpf->uf_lock); 5541 connp->conn_anon_port = B_FALSE; 5542 connp->conn_mlp_type = mlptSingle; 5543 5544 connp->conn_v6lastdst = ipv6_all_zeros; 5545 5546 /* Restore the header that was built above - different source address */ 5547 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5548 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5549 mutex_exit(&connp->conn_lock); 5550 return (error); 5551 } 5552 5553 int 5554 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5555 socklen_t len, cred_t *cr) 5556 { 5557 int error; 5558 conn_t *connp; 5559 5560 /* All Solaris components should pass a cred for this operation. */ 5561 ASSERT(cr != NULL); 5562 5563 connp = (conn_t *)proto_handle; 5564 5565 if (sa == NULL) 5566 error = udp_do_unbind(connp); 5567 else 5568 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5569 5570 if (error < 0) { 5571 if (error == -TOUTSTATE) 5572 error = EINVAL; 5573 else 5574 error = proto_tlitosyserr(-error); 5575 } 5576 5577 return (error); 5578 } 5579 5580 static int 5581 udp_implicit_bind(conn_t *connp, cred_t *cr) 5582 { 5583 sin6_t sin6addr; 5584 sin_t *sin; 5585 sin6_t *sin6; 5586 socklen_t len; 5587 int error; 5588 5589 /* All Solaris components should pass a cred for this operation. */ 5590 ASSERT(cr != NULL); 5591 5592 if (connp->conn_family == AF_INET) { 5593 len = sizeof (struct sockaddr_in); 5594 sin = (sin_t *)&sin6addr; 5595 *sin = sin_null; 5596 sin->sin_family = AF_INET; 5597 sin->sin_addr.s_addr = INADDR_ANY; 5598 } else { 5599 ASSERT(connp->conn_family == AF_INET6); 5600 len = sizeof (sin6_t); 5601 sin6 = (sin6_t *)&sin6addr; 5602 *sin6 = sin6_null; 5603 sin6->sin6_family = AF_INET6; 5604 V6_SET_ZERO(sin6->sin6_addr); 5605 } 5606 5607 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5608 cr, B_FALSE); 5609 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5610 } 5611 5612 /* 5613 * This routine removes a port number association from a stream. It 5614 * is called by udp_unbind and udp_tpi_unbind. 5615 */ 5616 static int 5617 udp_do_unbind(conn_t *connp) 5618 { 5619 udp_t *udp = connp->conn_udp; 5620 udp_fanout_t *udpf; 5621 udp_stack_t *us = udp->udp_us; 5622 5623 if (cl_inet_unbind != NULL) { 5624 /* 5625 * Running in cluster mode - register unbind information 5626 */ 5627 if (connp->conn_ipversion == IPV4_VERSION) { 5628 (*cl_inet_unbind)( 5629 connp->conn_netstack->netstack_stackid, 5630 IPPROTO_UDP, AF_INET, 5631 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5632 (in_port_t)connp->conn_lport, NULL); 5633 } else { 5634 (*cl_inet_unbind)( 5635 connp->conn_netstack->netstack_stackid, 5636 IPPROTO_UDP, AF_INET6, 5637 (uint8_t *)&(connp->conn_laddr_v6), 5638 (in_port_t)connp->conn_lport, NULL); 5639 } 5640 } 5641 5642 mutex_enter(&connp->conn_lock); 5643 /* If a bind has not been done, we can't unbind. */ 5644 if (udp->udp_state == TS_UNBND) { 5645 mutex_exit(&connp->conn_lock); 5646 return (-TOUTSTATE); 5647 } 5648 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5649 us->us_bind_fanout_size)]; 5650 mutex_enter(&udpf->uf_lock); 5651 udp_bind_hash_remove(udp, B_TRUE); 5652 connp->conn_saddr_v6 = ipv6_all_zeros; 5653 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5654 connp->conn_laddr_v6 = ipv6_all_zeros; 5655 connp->conn_mcbc_bind = B_FALSE; 5656 connp->conn_lport = 0; 5657 /* In case we were also connected */ 5658 connp->conn_faddr_v6 = ipv6_all_zeros; 5659 connp->conn_fport = 0; 5660 mutex_exit(&udpf->uf_lock); 5661 5662 connp->conn_v6lastdst = ipv6_all_zeros; 5663 udp->udp_state = TS_UNBND; 5664 5665 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5666 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5667 mutex_exit(&connp->conn_lock); 5668 5669 ip_unbind(connp); 5670 5671 return (0); 5672 } 5673 5674 /* 5675 * It associates a default destination address with the stream. 5676 */ 5677 static int 5678 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5679 cred_t *cr, pid_t pid) 5680 { 5681 sin6_t *sin6; 5682 sin_t *sin; 5683 in6_addr_t v6dst; 5684 ipaddr_t v4dst; 5685 uint16_t dstport; 5686 uint32_t flowinfo; 5687 udp_fanout_t *udpf; 5688 udp_t *udp, *udp1; 5689 ushort_t ipversion; 5690 udp_stack_t *us; 5691 int error; 5692 conn_t *connp1; 5693 ip_xmit_attr_t *ixa; 5694 ip_xmit_attr_t *oldixa; 5695 uint_t scopeid = 0; 5696 uint_t srcid = 0; 5697 in6_addr_t v6src = connp->conn_saddr_v6; 5698 boolean_t v4mapped; 5699 5700 udp = connp->conn_udp; 5701 us = udp->udp_us; 5702 sin = NULL; 5703 sin6 = NULL; 5704 v4dst = INADDR_ANY; 5705 flowinfo = 0; 5706 5707 /* 5708 * Address has been verified by the caller 5709 */ 5710 switch (len) { 5711 default: 5712 /* 5713 * Should never happen 5714 */ 5715 return (EINVAL); 5716 5717 case sizeof (sin_t): 5718 sin = (sin_t *)sa; 5719 v4dst = sin->sin_addr.s_addr; 5720 dstport = sin->sin_port; 5721 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5722 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5723 ipversion = IPV4_VERSION; 5724 break; 5725 5726 case sizeof (sin6_t): 5727 sin6 = (sin6_t *)sa; 5728 v6dst = sin6->sin6_addr; 5729 dstport = sin6->sin6_port; 5730 srcid = sin6->__sin6_src_id; 5731 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 5732 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5733 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5734 v4mapped, connp->conn_netstack)) { 5735 /* Mismatch v4mapped/v6 specified by srcid. */ 5736 return (EADDRNOTAVAIL); 5737 } 5738 } 5739 if (v4mapped) { 5740 if (connp->conn_ipv6_v6only) 5741 return (EADDRNOTAVAIL); 5742 5743 /* 5744 * Destination adress is mapped IPv6 address. 5745 * Source bound address should be unspecified or 5746 * IPv6 mapped address as well. 5747 */ 5748 if (!IN6_IS_ADDR_UNSPECIFIED( 5749 &connp->conn_bound_addr_v6) && 5750 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5751 return (EADDRNOTAVAIL); 5752 } 5753 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5754 ipversion = IPV4_VERSION; 5755 flowinfo = 0; 5756 } else { 5757 ipversion = IPV6_VERSION; 5758 flowinfo = sin6->sin6_flowinfo; 5759 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5760 scopeid = sin6->sin6_scope_id; 5761 } 5762 break; 5763 } 5764 5765 if (dstport == 0) 5766 return (-TBADADDR); 5767 5768 /* 5769 * If there is a different thread using conn_ixa then we get a new 5770 * copy and cut the old one loose from conn_ixa. Otherwise we use 5771 * conn_ixa and prevent any other thread from using/changing it. 5772 * Once connect() is done other threads can use conn_ixa since the 5773 * refcnt will be back at one. 5774 * We defer updating conn_ixa until later to handle any concurrent 5775 * conn_ixa_cleanup thread. 5776 */ 5777 ixa = conn_get_ixa(connp, B_FALSE); 5778 if (ixa == NULL) 5779 return (ENOMEM); 5780 5781 mutex_enter(&connp->conn_lock); 5782 /* 5783 * This udp_t must have bound to a port already before doing a connect. 5784 * Reject if a connect is in progress (we drop conn_lock during 5785 * udp_do_connect). 5786 */ 5787 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5788 mutex_exit(&connp->conn_lock); 5789 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5790 "udp_connect: bad state, %u", udp->udp_state); 5791 ixa_refrele(ixa); 5792 return (-TOUTSTATE); 5793 } 5794 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 5795 5796 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5797 us->us_bind_fanout_size)]; 5798 5799 mutex_enter(&udpf->uf_lock); 5800 if (udp->udp_state == TS_DATA_XFER) { 5801 /* Already connected - clear out state */ 5802 if (connp->conn_mcbc_bind) 5803 connp->conn_saddr_v6 = ipv6_all_zeros; 5804 else 5805 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5806 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5807 connp->conn_faddr_v6 = ipv6_all_zeros; 5808 connp->conn_fport = 0; 5809 udp->udp_state = TS_IDLE; 5810 } 5811 5812 connp->conn_fport = dstport; 5813 connp->conn_ipversion = ipversion; 5814 if (ipversion == IPV4_VERSION) { 5815 /* 5816 * Interpret a zero destination to mean loopback. 5817 * Update the T_CONN_REQ (sin/sin6) since it is used to 5818 * generate the T_CONN_CON. 5819 */ 5820 if (v4dst == INADDR_ANY) { 5821 v4dst = htonl(INADDR_LOOPBACK); 5822 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5823 if (connp->conn_family == AF_INET) { 5824 sin->sin_addr.s_addr = v4dst; 5825 } else { 5826 sin6->sin6_addr = v6dst; 5827 } 5828 } 5829 connp->conn_faddr_v6 = v6dst; 5830 connp->conn_flowinfo = 0; 5831 } else { 5832 ASSERT(connp->conn_ipversion == IPV6_VERSION); 5833 /* 5834 * Interpret a zero destination to mean loopback. 5835 * Update the T_CONN_REQ (sin/sin6) since it is used to 5836 * generate the T_CONN_CON. 5837 */ 5838 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 5839 v6dst = ipv6_loopback; 5840 sin6->sin6_addr = v6dst; 5841 } 5842 connp->conn_faddr_v6 = v6dst; 5843 connp->conn_flowinfo = flowinfo; 5844 } 5845 mutex_exit(&udpf->uf_lock); 5846 5847 /* 5848 * We update our cred/cpid based on the caller of connect 5849 */ 5850 if (connp->conn_cred != cr) { 5851 crhold(cr); 5852 crfree(connp->conn_cred); 5853 connp->conn_cred = cr; 5854 } 5855 connp->conn_cpid = pid; 5856 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 5857 ixa->ixa_cred = cr; 5858 ixa->ixa_cpid = pid; 5859 if (is_system_labeled()) { 5860 /* We need to restart with a label based on the cred */ 5861 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 5862 } 5863 5864 if (scopeid != 0) { 5865 ixa->ixa_flags |= IXAF_SCOPEID_SET; 5866 ixa->ixa_scopeid = scopeid; 5867 connp->conn_incoming_ifindex = scopeid; 5868 } else { 5869 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5870 connp->conn_incoming_ifindex = connp->conn_bound_if; 5871 } 5872 /* 5873 * conn_connect will drop conn_lock and reacquire it. 5874 * To prevent a send* from messing with this udp_t while the lock 5875 * is dropped we set udp_state and clear conn_v6lastdst. 5876 * That will make all send* fail with EISCONN. 5877 */ 5878 connp->conn_v6lastdst = ipv6_all_zeros; 5879 udp->udp_state = TS_WCON_CREQ; 5880 5881 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 5882 mutex_exit(&connp->conn_lock); 5883 if (error != 0) 5884 goto connect_failed; 5885 5886 /* 5887 * The addresses have been verified. Time to insert in 5888 * the correct fanout list. 5889 */ 5890 error = ipcl_conn_insert(connp); 5891 if (error != 0) 5892 goto connect_failed; 5893 5894 mutex_enter(&connp->conn_lock); 5895 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5896 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5897 if (error != 0) { 5898 mutex_exit(&connp->conn_lock); 5899 goto connect_failed; 5900 } 5901 5902 udp->udp_state = TS_DATA_XFER; 5903 /* Record this as the "last" send even though we haven't sent any */ 5904 connp->conn_v6lastdst = connp->conn_faddr_v6; 5905 connp->conn_lastipversion = connp->conn_ipversion; 5906 connp->conn_lastdstport = connp->conn_fport; 5907 connp->conn_lastflowinfo = connp->conn_flowinfo; 5908 connp->conn_lastscopeid = scopeid; 5909 connp->conn_lastsrcid = srcid; 5910 /* Also remember a source to use together with lastdst */ 5911 connp->conn_v6lastsrc = v6src; 5912 5913 oldixa = conn_replace_ixa(connp, ixa); 5914 mutex_exit(&connp->conn_lock); 5915 ixa_refrele(oldixa); 5916 5917 /* 5918 * We've picked a source address above. Now we can 5919 * verify that the src/port/dst/port is unique for all 5920 * connections in TS_DATA_XFER, skipping ourselves. 5921 */ 5922 mutex_enter(&udpf->uf_lock); 5923 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 5924 if (udp1->udp_state != TS_DATA_XFER) 5925 continue; 5926 5927 if (udp1 == udp) 5928 continue; 5929 5930 connp1 = udp1->udp_connp; 5931 if (connp->conn_lport != connp1->conn_lport || 5932 connp->conn_ipversion != connp1->conn_ipversion || 5933 dstport != connp1->conn_fport || 5934 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 5935 &connp1->conn_laddr_v6) || 5936 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 5937 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 5938 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 5939 continue; 5940 mutex_exit(&udpf->uf_lock); 5941 error = -TBADADDR; 5942 goto connect_failed; 5943 } 5944 if (cl_inet_connect2 != NULL) { 5945 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 5946 if (error != 0) { 5947 mutex_exit(&udpf->uf_lock); 5948 error = -TBADADDR; 5949 goto connect_failed; 5950 } 5951 } 5952 mutex_exit(&udpf->uf_lock); 5953 5954 ixa_refrele(ixa); 5955 return (0); 5956 5957 connect_failed: 5958 if (ixa != NULL) 5959 ixa_refrele(ixa); 5960 mutex_enter(&connp->conn_lock); 5961 mutex_enter(&udpf->uf_lock); 5962 udp->udp_state = TS_IDLE; 5963 connp->conn_faddr_v6 = ipv6_all_zeros; 5964 connp->conn_fport = 0; 5965 /* In case the source address was set above */ 5966 if (connp->conn_mcbc_bind) 5967 connp->conn_saddr_v6 = ipv6_all_zeros; 5968 else 5969 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5970 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5971 mutex_exit(&udpf->uf_lock); 5972 5973 connp->conn_v6lastdst = ipv6_all_zeros; 5974 connp->conn_flowinfo = 0; 5975 5976 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5977 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5978 mutex_exit(&connp->conn_lock); 5979 return (error); 5980 } 5981 5982 static int 5983 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5984 socklen_t len, sock_connid_t *id, cred_t *cr) 5985 { 5986 conn_t *connp = (conn_t *)proto_handle; 5987 udp_t *udp = connp->conn_udp; 5988 int error; 5989 boolean_t did_bind = B_FALSE; 5990 pid_t pid = curproc->p_pid; 5991 5992 /* All Solaris components should pass a cred for this operation. */ 5993 ASSERT(cr != NULL); 5994 5995 if (sa == NULL) { 5996 /* 5997 * Disconnect 5998 * Make sure we are connected 5999 */ 6000 if (udp->udp_state != TS_DATA_XFER) 6001 return (EINVAL); 6002 6003 error = udp_disconnect(connp); 6004 return (error); 6005 } 6006 6007 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6008 if (error != 0) 6009 goto done; 6010 6011 /* do an implicit bind if necessary */ 6012 if (udp->udp_state == TS_UNBND) { 6013 error = udp_implicit_bind(connp, cr); 6014 /* 6015 * We could be racing with an actual bind, in which case 6016 * we would see EPROTO. We cross our fingers and try 6017 * to connect. 6018 */ 6019 if (!(error == 0 || error == EPROTO)) 6020 goto done; 6021 did_bind = B_TRUE; 6022 } 6023 /* 6024 * set SO_DGRAM_ERRIND 6025 */ 6026 connp->conn_dgram_errind = B_TRUE; 6027 6028 error = udp_do_connect(connp, sa, len, cr, pid); 6029 6030 if (error != 0 && did_bind) { 6031 int unbind_err; 6032 6033 unbind_err = udp_do_unbind(connp); 6034 ASSERT(unbind_err == 0); 6035 } 6036 6037 if (error == 0) { 6038 *id = 0; 6039 (*connp->conn_upcalls->su_connected) 6040 (connp->conn_upper_handle, 0, NULL, -1); 6041 } else if (error < 0) { 6042 error = proto_tlitosyserr(-error); 6043 } 6044 6045 done: 6046 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6047 /* 6048 * No need to hold locks to set state 6049 * after connect failure socket state is undefined 6050 * We set the state only to imitate old sockfs behavior 6051 */ 6052 udp->udp_state = TS_IDLE; 6053 } 6054 return (error); 6055 } 6056 6057 int 6058 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6059 cred_t *cr) 6060 { 6061 sin6_t *sin6; 6062 sin_t *sin = NULL; 6063 uint_t srcid; 6064 conn_t *connp = (conn_t *)proto_handle; 6065 udp_t *udp = connp->conn_udp; 6066 int error = 0; 6067 udp_stack_t *us = udp->udp_us; 6068 ushort_t ipversion; 6069 pid_t pid = curproc->p_pid; 6070 ip_xmit_attr_t *ixa; 6071 6072 ASSERT(DB_TYPE(mp) == M_DATA); 6073 6074 /* All Solaris components should pass a cred for this operation. */ 6075 ASSERT(cr != NULL); 6076 6077 /* do an implicit bind if necessary */ 6078 if (udp->udp_state == TS_UNBND) { 6079 error = udp_implicit_bind(connp, cr); 6080 /* 6081 * We could be racing with an actual bind, in which case 6082 * we would see EPROTO. We cross our fingers and try 6083 * to connect. 6084 */ 6085 if (!(error == 0 || error == EPROTO)) { 6086 freemsg(mp); 6087 return (error); 6088 } 6089 } 6090 6091 /* Connected? */ 6092 if (msg->msg_name == NULL) { 6093 if (udp->udp_state != TS_DATA_XFER) { 6094 UDPS_BUMP_MIB(us, udpOutErrors); 6095 return (EDESTADDRREQ); 6096 } 6097 if (msg->msg_controllen != 0) { 6098 error = udp_output_ancillary(connp, NULL, NULL, mp, 6099 NULL, msg, cr, pid); 6100 } else { 6101 error = udp_output_connected(connp, mp, cr, pid); 6102 } 6103 if (us->us_sendto_ignerr) 6104 return (0); 6105 else 6106 return (error); 6107 } 6108 if (udp->udp_state == TS_DATA_XFER) { 6109 UDPS_BUMP_MIB(us, udpOutErrors); 6110 return (EISCONN); 6111 } 6112 error = proto_verify_ip_addr(connp->conn_family, 6113 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6114 if (error != 0) { 6115 UDPS_BUMP_MIB(us, udpOutErrors); 6116 return (error); 6117 } 6118 switch (connp->conn_family) { 6119 case AF_INET6: 6120 sin6 = (sin6_t *)msg->msg_name; 6121 6122 srcid = sin6->__sin6_src_id; 6123 6124 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6125 /* 6126 * Destination is a non-IPv4-compatible IPv6 address. 6127 * Send out an IPv6 format packet. 6128 */ 6129 6130 /* 6131 * If the local address is a mapped address return 6132 * an error. 6133 * It would be possible to send an IPv6 packet but the 6134 * response would never make it back to the application 6135 * since it is bound to a mapped address. 6136 */ 6137 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6138 UDPS_BUMP_MIB(us, udpOutErrors); 6139 return (EADDRNOTAVAIL); 6140 } 6141 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6142 sin6->sin6_addr = ipv6_loopback; 6143 ipversion = IPV6_VERSION; 6144 } else { 6145 if (connp->conn_ipv6_v6only) { 6146 UDPS_BUMP_MIB(us, udpOutErrors); 6147 return (EADDRNOTAVAIL); 6148 } 6149 6150 /* 6151 * If the local address is not zero or a mapped address 6152 * return an error. It would be possible to send an 6153 * IPv4 packet but the response would never make it 6154 * back to the application since it is bound to a 6155 * non-mapped address. 6156 */ 6157 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6158 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6159 UDPS_BUMP_MIB(us, udpOutErrors); 6160 return (EADDRNOTAVAIL); 6161 } 6162 6163 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6164 V4_PART_OF_V6(sin6->sin6_addr) = 6165 htonl(INADDR_LOOPBACK); 6166 } 6167 ipversion = IPV4_VERSION; 6168 } 6169 6170 /* 6171 * We have to allocate an ip_xmit_attr_t before we grab 6172 * conn_lock and we need to hold conn_lock once we've check 6173 * conn_same_as_last_v6 to handle concurrent send* calls on a 6174 * socket. 6175 */ 6176 if (msg->msg_controllen == 0) { 6177 ixa = conn_get_ixa(connp, B_FALSE); 6178 if (ixa == NULL) { 6179 UDPS_BUMP_MIB(us, udpOutErrors); 6180 return (ENOMEM); 6181 } 6182 } else { 6183 ixa = NULL; 6184 } 6185 mutex_enter(&connp->conn_lock); 6186 if (udp->udp_delayed_error != 0) { 6187 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6188 6189 error = udp->udp_delayed_error; 6190 udp->udp_delayed_error = 0; 6191 6192 /* Compare IP address, port, and family */ 6193 6194 if (sin6->sin6_port == sin2->sin6_port && 6195 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6196 &sin2->sin6_addr) && 6197 sin6->sin6_family == sin2->sin6_family) { 6198 mutex_exit(&connp->conn_lock); 6199 UDPS_BUMP_MIB(us, udpOutErrors); 6200 if (ixa != NULL) 6201 ixa_refrele(ixa); 6202 return (error); 6203 } 6204 } 6205 6206 if (msg->msg_controllen != 0) { 6207 mutex_exit(&connp->conn_lock); 6208 ASSERT(ixa == NULL); 6209 error = udp_output_ancillary(connp, NULL, sin6, mp, 6210 NULL, msg, cr, pid); 6211 } else if (conn_same_as_last_v6(connp, sin6) && 6212 connp->conn_lastsrcid == srcid && 6213 ipsec_outbound_policy_current(ixa)) { 6214 /* udp_output_lastdst drops conn_lock */ 6215 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6216 } else { 6217 /* udp_output_newdst drops conn_lock */ 6218 error = udp_output_newdst(connp, mp, NULL, sin6, 6219 ipversion, cr, pid, ixa); 6220 } 6221 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6222 if (us->us_sendto_ignerr) 6223 return (0); 6224 else 6225 return (error); 6226 case AF_INET: 6227 sin = (sin_t *)msg->msg_name; 6228 6229 ipversion = IPV4_VERSION; 6230 6231 if (sin->sin_addr.s_addr == INADDR_ANY) 6232 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6233 6234 /* 6235 * We have to allocate an ip_xmit_attr_t before we grab 6236 * conn_lock and we need to hold conn_lock once we've check 6237 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6238 */ 6239 if (msg->msg_controllen == 0) { 6240 ixa = conn_get_ixa(connp, B_FALSE); 6241 if (ixa == NULL) { 6242 UDPS_BUMP_MIB(us, udpOutErrors); 6243 return (ENOMEM); 6244 } 6245 } else { 6246 ixa = NULL; 6247 } 6248 mutex_enter(&connp->conn_lock); 6249 if (udp->udp_delayed_error != 0) { 6250 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6251 6252 error = udp->udp_delayed_error; 6253 udp->udp_delayed_error = 0; 6254 6255 /* Compare IP address and port */ 6256 6257 if (sin->sin_port == sin2->sin_port && 6258 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6259 mutex_exit(&connp->conn_lock); 6260 UDPS_BUMP_MIB(us, udpOutErrors); 6261 if (ixa != NULL) 6262 ixa_refrele(ixa); 6263 return (error); 6264 } 6265 } 6266 if (msg->msg_controllen != 0) { 6267 mutex_exit(&connp->conn_lock); 6268 ASSERT(ixa == NULL); 6269 error = udp_output_ancillary(connp, sin, NULL, mp, 6270 NULL, msg, cr, pid); 6271 } else if (conn_same_as_last_v4(connp, sin) && 6272 ipsec_outbound_policy_current(ixa)) { 6273 /* udp_output_lastdst drops conn_lock */ 6274 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6275 } else { 6276 /* udp_output_newdst drops conn_lock */ 6277 error = udp_output_newdst(connp, mp, sin, NULL, 6278 ipversion, cr, pid, ixa); 6279 } 6280 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6281 if (us->us_sendto_ignerr) 6282 return (0); 6283 else 6284 return (error); 6285 default: 6286 return (EINVAL); 6287 } 6288 } 6289 6290 int 6291 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6292 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6293 sock_quiesce_arg_t *arg) 6294 { 6295 conn_t *connp = (conn_t *)proto_handle; 6296 udp_t *udp; 6297 struct T_capability_ack tca; 6298 struct sockaddr_in6 laddr, faddr; 6299 socklen_t laddrlen, faddrlen; 6300 short opts; 6301 struct stroptions *stropt; 6302 mblk_t *mp, *stropt_mp; 6303 int error; 6304 6305 udp = connp->conn_udp; 6306 6307 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6308 6309 /* 6310 * setup the fallback stream that was allocated 6311 */ 6312 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6313 connp->conn_minor_arena = WR(q)->q_ptr; 6314 6315 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6316 6317 WR(q)->q_qinfo = &udp_winit; 6318 6319 connp->conn_rq = RD(q); 6320 connp->conn_wq = WR(q); 6321 6322 /* Notify stream head about options before sending up data */ 6323 stropt_mp->b_datap->db_type = M_SETOPTS; 6324 stropt_mp->b_wptr += sizeof (*stropt); 6325 stropt = (struct stroptions *)stropt_mp->b_rptr; 6326 stropt->so_flags = SO_WROFF | SO_HIWAT; 6327 stropt->so_wroff = connp->conn_wroff; 6328 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6329 putnext(RD(q), stropt_mp); 6330 6331 /* 6332 * Free the helper stream 6333 */ 6334 ip_free_helper_stream(connp); 6335 6336 if (!issocket) 6337 udp_use_pure_tpi(udp); 6338 6339 /* 6340 * Collect the information needed to sync with the sonode 6341 */ 6342 udp_do_capability_ack(udp, &tca, TC1_INFO); 6343 6344 laddrlen = faddrlen = sizeof (sin6_t); 6345 (void) udp_getsockname((sock_lower_handle_t)connp, 6346 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6347 error = udp_getpeername((sock_lower_handle_t)connp, 6348 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6349 if (error != 0) 6350 faddrlen = 0; 6351 6352 opts = 0; 6353 if (connp->conn_dgram_errind) 6354 opts |= SO_DGRAM_ERRIND; 6355 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6356 opts |= SO_DONTROUTE; 6357 6358 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6359 (struct sockaddr *)&laddr, laddrlen, 6360 (struct sockaddr *)&faddr, faddrlen, opts); 6361 6362 mutex_enter(&udp->udp_recv_lock); 6363 /* 6364 * Attempts to send data up during fallback will result in it being 6365 * queued in udp_t. First push up the datagrams obtained from the 6366 * socket, then any packets queued in udp_t. 6367 */ 6368 if (mp != NULL) { 6369 mp->b_next = udp->udp_fallback_queue_head; 6370 udp->udp_fallback_queue_head = mp; 6371 } 6372 while (udp->udp_fallback_queue_head != NULL) { 6373 mp = udp->udp_fallback_queue_head; 6374 udp->udp_fallback_queue_head = mp->b_next; 6375 mutex_exit(&udp->udp_recv_lock); 6376 mp->b_next = NULL; 6377 putnext(RD(q), mp); 6378 mutex_enter(&udp->udp_recv_lock); 6379 } 6380 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6381 /* 6382 * No longer a streams less socket 6383 */ 6384 mutex_enter(&connp->conn_lock); 6385 connp->conn_flags &= ~IPCL_NONSTR; 6386 mutex_exit(&connp->conn_lock); 6387 6388 mutex_exit(&udp->udp_recv_lock); 6389 6390 ASSERT(connp->conn_ref >= 1); 6391 6392 return (0); 6393 } 6394 6395 /* ARGSUSED3 */ 6396 int 6397 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6398 socklen_t *salenp, cred_t *cr) 6399 { 6400 conn_t *connp = (conn_t *)proto_handle; 6401 udp_t *udp = connp->conn_udp; 6402 int error; 6403 6404 /* All Solaris components should pass a cred for this operation. */ 6405 ASSERT(cr != NULL); 6406 6407 mutex_enter(&connp->conn_lock); 6408 if (udp->udp_state != TS_DATA_XFER) 6409 error = ENOTCONN; 6410 else 6411 error = conn_getpeername(connp, sa, salenp); 6412 mutex_exit(&connp->conn_lock); 6413 return (error); 6414 } 6415 6416 /* ARGSUSED3 */ 6417 int 6418 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6419 socklen_t *salenp, cred_t *cr) 6420 { 6421 conn_t *connp = (conn_t *)proto_handle; 6422 int error; 6423 6424 /* All Solaris components should pass a cred for this operation. */ 6425 ASSERT(cr != NULL); 6426 6427 mutex_enter(&connp->conn_lock); 6428 error = conn_getsockname(connp, sa, salenp); 6429 mutex_exit(&connp->conn_lock); 6430 return (error); 6431 } 6432 6433 int 6434 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6435 void *optvalp, socklen_t *optlen, cred_t *cr) 6436 { 6437 conn_t *connp = (conn_t *)proto_handle; 6438 int error; 6439 t_uscalar_t max_optbuf_len; 6440 void *optvalp_buf; 6441 int len; 6442 6443 /* All Solaris components should pass a cred for this operation. */ 6444 ASSERT(cr != NULL); 6445 6446 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6447 udp_opt_obj.odb_opt_des_arr, 6448 udp_opt_obj.odb_opt_arr_cnt, 6449 B_FALSE, B_TRUE, cr); 6450 if (error != 0) { 6451 if (error < 0) 6452 error = proto_tlitosyserr(-error); 6453 return (error); 6454 } 6455 6456 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6457 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6458 if (len == -1) { 6459 kmem_free(optvalp_buf, max_optbuf_len); 6460 return (EINVAL); 6461 } 6462 6463 /* 6464 * update optlen and copy option value 6465 */ 6466 t_uscalar_t size = MIN(len, *optlen); 6467 6468 bcopy(optvalp_buf, optvalp, size); 6469 bcopy(&size, optlen, sizeof (size)); 6470 6471 kmem_free(optvalp_buf, max_optbuf_len); 6472 return (0); 6473 } 6474 6475 int 6476 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6477 const void *optvalp, socklen_t optlen, cred_t *cr) 6478 { 6479 conn_t *connp = (conn_t *)proto_handle; 6480 int error; 6481 6482 /* All Solaris components should pass a cred for this operation. */ 6483 ASSERT(cr != NULL); 6484 6485 error = proto_opt_check(level, option_name, optlen, NULL, 6486 udp_opt_obj.odb_opt_des_arr, 6487 udp_opt_obj.odb_opt_arr_cnt, 6488 B_TRUE, B_FALSE, cr); 6489 6490 if (error != 0) { 6491 if (error < 0) 6492 error = proto_tlitosyserr(-error); 6493 return (error); 6494 } 6495 6496 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6497 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6498 NULL, cr); 6499 6500 ASSERT(error >= 0); 6501 6502 return (error); 6503 } 6504 6505 void 6506 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6507 { 6508 conn_t *connp = (conn_t *)proto_handle; 6509 udp_t *udp = connp->conn_udp; 6510 6511 mutex_enter(&udp->udp_recv_lock); 6512 connp->conn_flow_cntrld = B_FALSE; 6513 mutex_exit(&udp->udp_recv_lock); 6514 } 6515 6516 /* ARGSUSED2 */ 6517 int 6518 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6519 { 6520 conn_t *connp = (conn_t *)proto_handle; 6521 6522 /* All Solaris components should pass a cred for this operation. */ 6523 ASSERT(cr != NULL); 6524 6525 /* shut down the send side */ 6526 if (how != SHUT_RD) 6527 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6528 SOCK_OPCTL_SHUT_SEND, 0); 6529 /* shut down the recv side */ 6530 if (how != SHUT_WR) 6531 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6532 SOCK_OPCTL_SHUT_RECV, 0); 6533 return (0); 6534 } 6535 6536 int 6537 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6538 int mode, int32_t *rvalp, cred_t *cr) 6539 { 6540 conn_t *connp = (conn_t *)proto_handle; 6541 int error; 6542 6543 /* All Solaris components should pass a cred for this operation. */ 6544 ASSERT(cr != NULL); 6545 6546 /* 6547 * If we don't have a helper stream then create one. 6548 * ip_create_helper_stream takes care of locking the conn_t, 6549 * so this check for NULL is just a performance optimization. 6550 */ 6551 if (connp->conn_helper_info == NULL) { 6552 udp_stack_t *us = connp->conn_udp->udp_us; 6553 6554 ASSERT(us->us_ldi_ident != NULL); 6555 6556 /* 6557 * Create a helper stream for non-STREAMS socket. 6558 */ 6559 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6560 if (error != 0) { 6561 ip0dbg(("udp_ioctl: create of IP helper stream " 6562 "failed %d\n", error)); 6563 return (error); 6564 } 6565 } 6566 6567 switch (cmd) { 6568 case _SIOCSOCKFALLBACK: 6569 case TI_GETPEERNAME: 6570 case TI_GETMYNAME: 6571 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6572 cmd)); 6573 error = EINVAL; 6574 break; 6575 default: 6576 /* 6577 * Pass on to IP using helper stream 6578 */ 6579 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6580 cmd, arg, mode, cr, rvalp); 6581 break; 6582 } 6583 return (error); 6584 } 6585 6586 /* ARGSUSED */ 6587 int 6588 udp_accept(sock_lower_handle_t lproto_handle, 6589 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6590 cred_t *cr) 6591 { 6592 return (EOPNOTSUPP); 6593 } 6594 6595 /* ARGSUSED */ 6596 int 6597 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6598 { 6599 return (EOPNOTSUPP); 6600 } 6601 6602 sock_downcalls_t sock_udp_downcalls = { 6603 udp_activate, /* sd_activate */ 6604 udp_accept, /* sd_accept */ 6605 udp_bind, /* sd_bind */ 6606 udp_listen, /* sd_listen */ 6607 udp_connect, /* sd_connect */ 6608 udp_getpeername, /* sd_getpeername */ 6609 udp_getsockname, /* sd_getsockname */ 6610 udp_getsockopt, /* sd_getsockopt */ 6611 udp_setsockopt, /* sd_setsockopt */ 6612 udp_send, /* sd_send */ 6613 NULL, /* sd_send_uio */ 6614 NULL, /* sd_recv_uio */ 6615 NULL, /* sd_poll */ 6616 udp_shutdown, /* sd_shutdown */ 6617 udp_clr_flowctrl, /* sd_setflowctrl */ 6618 udp_ioctl, /* sd_ioctl */ 6619 udp_close /* sd_close */ 6620 }; 6621