1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2018, Joyent, Inc. 26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 27 */ 28 /* Copyright (c) 1990 Mentat Inc. */ 29 30 #include <sys/sysmacros.h> 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/stropts.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #define _SUN_TPI_VERSION 2 37 #include <sys/tihdr.h> 38 #include <sys/timod.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/strsubr.h> 42 #include <sys/suntpi.h> 43 #include <sys/xti_inet.h> 44 #include <sys/kmem.h> 45 #include <sys/cred_impl.h> 46 #include <sys/policy.h> 47 #include <sys/priv.h> 48 #include <sys/ucred.h> 49 #include <sys/zone.h> 50 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/sockio.h> 54 #include <sys/vtrace.h> 55 #include <sys/sdt.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 64 #include <inet/common.h> 65 #include <inet/ip.h> 66 #include <inet/ip_impl.h> 67 #include <inet/ipsec_impl.h> 68 #include <inet/ip6.h> 69 #include <inet/ip_ire.h> 70 #include <inet/ip_if.h> 71 #include <inet/ip_multi.h> 72 #include <inet/ip_ndp.h> 73 #include <inet/proto_set.h> 74 #include <inet/mib2.h> 75 #include <inet/optcom.h> 76 #include <inet/snmpcom.h> 77 #include <inet/kstatcom.h> 78 #include <inet/ipclassifier.h> 79 #include <sys/squeue_impl.h> 80 #include <inet/ipnet.h> 81 #include <sys/vxlan.h> 82 #include <inet/inet_hash.h> 83 84 #include <sys/tsol/label.h> 85 #include <sys/tsol/tnet.h> 86 #include <rpc/pmap_prot.h> 87 88 #include <inet/udp_impl.h> 89 90 /* 91 * Synchronization notes: 92 * 93 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 94 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 95 * protects the contents of the udp_t. uf_lock protects the address and the 96 * fanout information. 97 * The lock order is conn_lock -> uf_lock. 98 * 99 * The fanout lock uf_lock: 100 * When a UDP endpoint is bound to a local port, it is inserted into 101 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 102 * The size of the array is controlled by the udp_bind_fanout_size variable. 103 * This variable can be changed in /etc/system if the default value is 104 * not large enough. Each bind hash bucket is protected by a per bucket 105 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 106 * structure and a few other fields in the udp_t. A UDP endpoint is removed 107 * from the bind hash list only when it is being unbound or being closed. 108 * The per bucket lock also protects a UDP endpoint's state changes. 109 * 110 * Plumbing notes: 111 * UDP is always a device driver. For compatibility with mibopen() code 112 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 113 * dummy module. 114 * 115 * The above implies that we don't support any intermediate module to 116 * reside in between /dev/ip and udp -- in fact, we never supported such 117 * scenario in the past as the inter-layer communication semantics have 118 * always been private. 119 */ 120 121 /* For /etc/system control */ 122 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 123 124 static void udp_addr_req(queue_t *q, mblk_t *mp); 125 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 126 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 127 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 128 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 129 const in6_addr_t *, in_port_t, uint32_t); 130 static void udp_capability_req(queue_t *q, mblk_t *mp); 131 static int udp_tpi_close(queue_t *q, int flags, cred_t *); 132 static void udp_close_free(conn_t *); 133 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 134 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 135 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 136 int sys_error); 137 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 138 t_scalar_t tlierr, int sys_error); 139 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 140 cred_t *cr); 141 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 142 char *value, caddr_t cp, cred_t *cr); 143 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 144 char *value, caddr_t cp, cred_t *cr); 145 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 147 ip_recv_attr_t *ira); 148 static void udp_info_req(queue_t *q, mblk_t *mp); 149 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 150 static int udp_lrput(queue_t *, mblk_t *); 151 static int udp_lwput(queue_t *, mblk_t *); 152 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp, boolean_t isv6); 154 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 155 cred_t *credp); 156 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 157 cred_t *credp); 158 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 159 int udp_opt_set(conn_t *connp, uint_t optset_context, 160 int level, int name, uint_t inlen, 161 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 162 void *thisdg_attrs, cred_t *cr); 163 int udp_opt_get(conn_t *connp, int level, int name, 164 uchar_t *ptr); 165 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 166 pid_t pid); 167 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 168 pid_t pid, ip_xmit_attr_t *ixa); 169 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 170 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 171 ip_xmit_attr_t *ixa); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static int udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 /* Common routines for TPI and socket module */ 191 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 192 193 /* Common routine for TPI and socket module */ 194 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 195 static void udp_do_close(conn_t *); 196 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 197 boolean_t); 198 static int udp_do_unbind(conn_t *); 199 200 int udp_getsockname(sock_lower_handle_t, 201 struct sockaddr *, socklen_t *, cred_t *); 202 int udp_getpeername(sock_lower_handle_t, 203 struct sockaddr *, socklen_t *, cred_t *); 204 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 205 cred_t *, pid_t); 206 207 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 208 209 /* 210 * Checks if the given destination addr/port is allowed out. 211 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 212 * Called for each connect() and for sendto()/sendmsg() to a different 213 * destination. 214 * For connect(), called in udp_connect(). 215 * For sendto()/sendmsg(), called in udp_output_newdst(). 216 * 217 * This macro assumes that the cl_inet_connect2 hook is not NULL. 218 * Please check this before calling this macro. 219 * 220 * void 221 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 222 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 223 */ 224 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 225 (err) = 0; \ 226 /* \ 227 * Running in cluster mode - check and register active \ 228 * "connection" information \ 229 */ \ 230 if ((cp)->conn_ipversion == IPV4_VERSION) \ 231 (err) = (*cl_inet_connect2)( \ 232 (cp)->conn_netstack->netstack_stackid, \ 233 IPPROTO_UDP, is_outgoing, AF_INET, \ 234 (uint8_t *)&((cp)->conn_laddr_v4), \ 235 (cp)->conn_lport, \ 236 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 237 (in_port_t)(fport), NULL); \ 238 else \ 239 (err) = (*cl_inet_connect2)( \ 240 (cp)->conn_netstack->netstack_stackid, \ 241 IPPROTO_UDP, is_outgoing, AF_INET6, \ 242 (uint8_t *)&((cp)->conn_laddr_v6), \ 243 (cp)->conn_lport, \ 244 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 245 } 246 247 static struct module_info udp_mod_info = { 248 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 249 }; 250 251 /* 252 * Entry points for UDP as a device. 253 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 254 */ 255 static struct qinit udp_rinitv4 = { 256 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 257 }; 258 259 static struct qinit udp_rinitv6 = { 260 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 261 }; 262 263 static struct qinit udp_winit = { 264 udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info 265 }; 266 267 /* UDP entry point during fallback */ 268 struct qinit udp_fallback_sock_winit = { 269 udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 270 }; 271 272 /* 273 * UDP needs to handle I_LINK and I_PLINK since ifconfig 274 * likes to use it as a place to hang the various streams. 275 */ 276 static struct qinit udp_lrinit = { 277 udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 278 }; 279 280 static struct qinit udp_lwinit = { 281 udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 282 }; 283 284 /* For AF_INET aka /dev/udp */ 285 struct streamtab udpinfov4 = { 286 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 287 }; 288 289 /* For AF_INET6 aka /dev/udp6 */ 290 struct streamtab udpinfov6 = { 291 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 292 }; 293 294 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 295 296 /* Default structure copied into T_INFO_ACK messages */ 297 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 298 T_INFO_ACK, 299 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 300 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 301 T_INVALID, /* CDATA_size. udp does not support connect data. */ 302 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 303 sizeof (sin_t), /* ADDR_size. */ 304 0, /* OPT_size - not initialized here */ 305 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 306 T_CLTS, /* SERV_type. udp supports connection-less. */ 307 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 308 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 309 }; 310 311 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 312 313 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 314 T_INFO_ACK, 315 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 316 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 317 T_INVALID, /* CDATA_size. udp does not support connect data. */ 318 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 319 sizeof (sin6_t), /* ADDR_size. */ 320 0, /* OPT_size - not initialized here */ 321 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 322 T_CLTS, /* SERV_type. udp supports connection-less. */ 323 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 324 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 325 }; 326 327 /* 328 * UDP tunables related declarations. Definitions are in udp_tunables.c 329 */ 330 extern mod_prop_info_t udp_propinfo_tbl[]; 331 extern int udp_propinfo_count; 332 333 /* Setable in /etc/system */ 334 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 335 uint32_t udp_random_anon_port = 1; 336 337 /* 338 * Hook functions to enable cluster networking. 339 * On non-clustered systems these vectors must always be NULL 340 */ 341 342 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 343 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 344 void *args) = NULL; 345 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 346 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 347 void *args) = NULL; 348 349 typedef union T_primitives *t_primp_t; 350 351 /* 352 * Various protocols that encapsulate UDP have no real use for the source port. 353 * Instead, they want to vary the source port to provide better equal-cost 354 * multipathing and other systems that use fanout. Consider something like 355 * VXLAN. If you're actually sending multiple different streams to a single 356 * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP, 357 * SRC Port, DST Port) will always be the same. 358 * 359 * Here, we return a port to hash this to, if we know how to hash it. If for 360 * some reason we can't perform an L4 hash, then we just return the default 361 * value, usually the default port. After we determine the hash we transform it 362 * so that it's in the range of [ min, max ]. 363 * 364 * We'd like to avoid a pull up for the sake of performing the hash. If the 365 * first mblk_t doesn't have the full protocol header, then we just send it to 366 * the default. If for some reason we have an encapsulated packet that has its 367 * protocol header in different parts of an mblk_t, then we'll go with the 368 * default port. This means that that if a driver isn't consistent about how it 369 * generates the frames for a given flow, it will not always be consistently 370 * hashed. That should be an uncommon event. 371 */ 372 uint16_t 373 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max, 374 uint16_t def) 375 { 376 size_t szused = 0; 377 ip6_t *ip6h; 378 ipha_t *ipha; 379 uint16_t sap; 380 uint64_t hash; 381 uint32_t mod; 382 383 ASSERT(min <= max); 384 385 if (type != UDP_HASH_VXLAN) 386 return (def); 387 388 if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))) 389 return (def); 390 391 /* 392 * The following logic is VXLAN specific to get at the header, if we 393 * have formats, eg. GENEVE, then we should ignore this. 394 * 395 * The kernel overlay device often puts a first mblk_t for the data 396 * which is just the encap. If so, then we're going to use that and try 397 * to avoid a pull up. 398 */ 399 if (MBLKL(mp) == VXLAN_HDR_LEN) { 400 if (mp->b_cont == NULL) 401 return (def); 402 mp = mp->b_cont; 403 } else if (MBLKL(mp) < VXLAN_HDR_LEN) { 404 return (def); 405 } else { 406 szused = VXLAN_HDR_LEN; 407 } 408 409 /* Can we hold a MAC header? */ 410 if (MBLKL(mp) + szused < sizeof (struct ether_header)) 411 return (def); 412 413 /* 414 * We need to lie about the starting offset into the message block for 415 * convenience. Undo it at the end. We know that inet_pkt_hash() won't 416 * modify the mblk_t. 417 */ 418 mp->b_rptr += szused; 419 hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 | 420 INET_PKT_HASH_L3 | INET_PKT_HASH_L4); 421 mp->b_rptr -= szused; 422 423 if (hash == 0) 424 return (def); 425 426 mod = max - min + 1; 427 return ((hash % mod) + min); 428 } 429 430 /* 431 * Return the next anonymous port in the privileged port range for 432 * bind checking. 433 * 434 * Trusted Extension (TX) notes: TX allows administrator to mark or 435 * reserve ports as Multilevel ports (MLP). MLP has special function 436 * on TX systems. Once a port is made MLP, it's not available as 437 * ordinary port. This creates "holes" in the port name space. It 438 * may be necessary to skip the "holes" find a suitable anon port. 439 */ 440 static in_port_t 441 udp_get_next_priv_port(udp_t *udp) 442 { 443 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 444 in_port_t nextport; 445 boolean_t restart = B_FALSE; 446 udp_stack_t *us = udp->udp_us; 447 448 retry: 449 if (next_priv_port < us->us_min_anonpriv_port || 450 next_priv_port >= IPPORT_RESERVED) { 451 next_priv_port = IPPORT_RESERVED - 1; 452 if (restart) 453 return (0); 454 restart = B_TRUE; 455 } 456 457 if (is_system_labeled() && 458 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 459 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 460 next_priv_port = nextport; 461 goto retry; 462 } 463 464 return (next_priv_port--); 465 } 466 467 /* 468 * Hash list removal routine for udp_t structures. 469 */ 470 static void 471 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 472 { 473 udp_t *udpnext; 474 kmutex_t *lockp; 475 udp_stack_t *us = udp->udp_us; 476 conn_t *connp = udp->udp_connp; 477 478 if (udp->udp_ptpbhn == NULL) 479 return; 480 481 /* 482 * Extract the lock pointer in case there are concurrent 483 * hash_remove's for this instance. 484 */ 485 ASSERT(connp->conn_lport != 0); 486 if (!caller_holds_lock) { 487 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 488 us->us_bind_fanout_size)].uf_lock; 489 ASSERT(lockp != NULL); 490 mutex_enter(lockp); 491 } 492 if (udp->udp_ptpbhn != NULL) { 493 udpnext = udp->udp_bind_hash; 494 if (udpnext != NULL) { 495 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 496 udp->udp_bind_hash = NULL; 497 } 498 *udp->udp_ptpbhn = udpnext; 499 udp->udp_ptpbhn = NULL; 500 } 501 if (!caller_holds_lock) { 502 mutex_exit(lockp); 503 } 504 } 505 506 static void 507 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 508 { 509 conn_t *connp = udp->udp_connp; 510 udp_t **udpp; 511 udp_t *udpnext; 512 conn_t *connext; 513 514 ASSERT(MUTEX_HELD(&uf->uf_lock)); 515 ASSERT(udp->udp_ptpbhn == NULL); 516 udpp = &uf->uf_udp; 517 udpnext = udpp[0]; 518 if (udpnext != NULL) { 519 /* 520 * If the new udp bound to the INADDR_ANY address 521 * and the first one in the list is not bound to 522 * INADDR_ANY we skip all entries until we find the 523 * first one bound to INADDR_ANY. 524 * This makes sure that applications binding to a 525 * specific address get preference over those binding to 526 * INADDR_ANY. 527 */ 528 connext = udpnext->udp_connp; 529 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 530 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 531 while ((udpnext = udpp[0]) != NULL && 532 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 533 udpp = &(udpnext->udp_bind_hash); 534 } 535 if (udpnext != NULL) 536 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 537 } else { 538 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 539 } 540 } 541 udp->udp_bind_hash = udpnext; 542 udp->udp_ptpbhn = udpp; 543 udpp[0] = udp; 544 } 545 546 /* 547 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 548 * passed to udp_wput. 549 * It associates a port number and local address with the stream. 550 * It calls IP to verify the local IP address, and calls IP to insert 551 * the conn_t in the fanout table. 552 * If everything is ok it then sends the T_BIND_ACK back up. 553 * 554 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 555 * without setting SO_REUSEADDR. This is needed so that they 556 * can be viewed as two independent transport protocols. 557 * However, anonymouns ports are allocated from the same range to avoid 558 * duplicating the us->us_next_port_to_try. 559 */ 560 static void 561 udp_tpi_bind(queue_t *q, mblk_t *mp) 562 { 563 sin_t *sin; 564 sin6_t *sin6; 565 mblk_t *mp1; 566 struct T_bind_req *tbr; 567 conn_t *connp; 568 udp_t *udp; 569 int error; 570 struct sockaddr *sa; 571 cred_t *cr; 572 573 /* 574 * All Solaris components should pass a db_credp 575 * for this TPI message, hence we ASSERT. 576 * But in case there is some other M_PROTO that looks 577 * like a TPI message sent by some other kernel 578 * component, we check and return an error. 579 */ 580 cr = msg_getcred(mp, NULL); 581 ASSERT(cr != NULL); 582 if (cr == NULL) { 583 udp_err_ack(q, mp, TSYSERR, EINVAL); 584 return; 585 } 586 587 connp = Q_TO_CONN(q); 588 udp = connp->conn_udp; 589 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 590 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 591 "udp_bind: bad req, len %u", 592 (uint_t)(mp->b_wptr - mp->b_rptr)); 593 udp_err_ack(q, mp, TPROTO, 0); 594 return; 595 } 596 if (udp->udp_state != TS_UNBND) { 597 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 598 "udp_bind: bad state, %u", udp->udp_state); 599 udp_err_ack(q, mp, TOUTSTATE, 0); 600 return; 601 } 602 /* 603 * Reallocate the message to make sure we have enough room for an 604 * address. 605 */ 606 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 607 if (mp1 == NULL) { 608 udp_err_ack(q, mp, TSYSERR, ENOMEM); 609 return; 610 } 611 612 mp = mp1; 613 614 /* Reset the message type in preparation for shipping it back. */ 615 DB_TYPE(mp) = M_PCPROTO; 616 617 tbr = (struct T_bind_req *)mp->b_rptr; 618 switch (tbr->ADDR_length) { 619 case 0: /* Request for a generic port */ 620 tbr->ADDR_offset = sizeof (struct T_bind_req); 621 if (connp->conn_family == AF_INET) { 622 tbr->ADDR_length = sizeof (sin_t); 623 sin = (sin_t *)&tbr[1]; 624 *sin = sin_null; 625 sin->sin_family = AF_INET; 626 mp->b_wptr = (uchar_t *)&sin[1]; 627 sa = (struct sockaddr *)sin; 628 } else { 629 ASSERT(connp->conn_family == AF_INET6); 630 tbr->ADDR_length = sizeof (sin6_t); 631 sin6 = (sin6_t *)&tbr[1]; 632 *sin6 = sin6_null; 633 sin6->sin6_family = AF_INET6; 634 mp->b_wptr = (uchar_t *)&sin6[1]; 635 sa = (struct sockaddr *)sin6; 636 } 637 break; 638 639 case sizeof (sin_t): /* Complete IPv4 address */ 640 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 641 sizeof (sin_t)); 642 if (sa == NULL || !OK_32PTR((char *)sa)) { 643 udp_err_ack(q, mp, TSYSERR, EINVAL); 644 return; 645 } 646 if (connp->conn_family != AF_INET || 647 sa->sa_family != AF_INET) { 648 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 649 return; 650 } 651 break; 652 653 case sizeof (sin6_t): /* complete IPv6 address */ 654 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 655 sizeof (sin6_t)); 656 if (sa == NULL || !OK_32PTR((char *)sa)) { 657 udp_err_ack(q, mp, TSYSERR, EINVAL); 658 return; 659 } 660 if (connp->conn_family != AF_INET6 || 661 sa->sa_family != AF_INET6) { 662 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 663 return; 664 } 665 break; 666 667 default: /* Invalid request */ 668 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 669 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 670 udp_err_ack(q, mp, TBADADDR, 0); 671 return; 672 } 673 674 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 675 tbr->PRIM_type != O_T_BIND_REQ); 676 677 if (error != 0) { 678 if (error > 0) { 679 udp_err_ack(q, mp, TSYSERR, error); 680 } else { 681 udp_err_ack(q, mp, -error, 0); 682 } 683 } else { 684 tbr->PRIM_type = T_BIND_ACK; 685 qreply(q, mp); 686 } 687 } 688 689 /* 690 * This routine handles each T_CONN_REQ message passed to udp. It 691 * associates a default destination address with the stream. 692 * 693 * After various error checks are completed, udp_connect() lays 694 * the target address and port into the composite header template. 695 * Then we ask IP for information, including a source address if we didn't 696 * already have one. Finally we send up the T_OK_ACK reply message. 697 */ 698 static void 699 udp_tpi_connect(queue_t *q, mblk_t *mp) 700 { 701 conn_t *connp = Q_TO_CONN(q); 702 int error; 703 socklen_t len; 704 struct sockaddr *sa; 705 struct T_conn_req *tcr; 706 cred_t *cr; 707 pid_t pid; 708 /* 709 * All Solaris components should pass a db_credp 710 * for this TPI message, hence we ASSERT. 711 * But in case there is some other M_PROTO that looks 712 * like a TPI message sent by some other kernel 713 * component, we check and return an error. 714 */ 715 cr = msg_getcred(mp, &pid); 716 ASSERT(cr != NULL); 717 if (cr == NULL) { 718 udp_err_ack(q, mp, TSYSERR, EINVAL); 719 return; 720 } 721 722 tcr = (struct T_conn_req *)mp->b_rptr; 723 724 /* A bit of sanity checking */ 725 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 726 udp_err_ack(q, mp, TPROTO, 0); 727 return; 728 } 729 730 if (tcr->OPT_length != 0) { 731 udp_err_ack(q, mp, TBADOPT, 0); 732 return; 733 } 734 735 /* 736 * Determine packet type based on type of address passed in 737 * the request should contain an IPv4 or IPv6 address. 738 * Make sure that address family matches the type of 739 * family of the address passed down. 740 */ 741 len = tcr->DEST_length; 742 switch (tcr->DEST_length) { 743 default: 744 udp_err_ack(q, mp, TBADADDR, 0); 745 return; 746 747 case sizeof (sin_t): 748 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 749 sizeof (sin_t)); 750 break; 751 752 case sizeof (sin6_t): 753 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 754 sizeof (sin6_t)); 755 break; 756 } 757 758 error = proto_verify_ip_addr(connp->conn_family, sa, len); 759 if (error != 0) { 760 udp_err_ack(q, mp, TSYSERR, error); 761 return; 762 } 763 764 error = udp_do_connect(connp, sa, len, cr, pid); 765 if (error != 0) { 766 if (error < 0) 767 udp_err_ack(q, mp, -error, 0); 768 else 769 udp_err_ack(q, mp, TSYSERR, error); 770 } else { 771 mblk_t *mp1; 772 /* 773 * We have to send a connection confirmation to 774 * keep TLI happy. 775 */ 776 if (connp->conn_family == AF_INET) { 777 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 778 sizeof (sin_t), NULL, 0); 779 } else { 780 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 781 sizeof (sin6_t), NULL, 0); 782 } 783 if (mp1 == NULL) { 784 udp_err_ack(q, mp, TSYSERR, ENOMEM); 785 return; 786 } 787 788 /* 789 * Send ok_ack for T_CONN_REQ 790 */ 791 mp = mi_tpi_ok_ack_alloc(mp); 792 if (mp == NULL) { 793 /* Unable to reuse the T_CONN_REQ for the ack. */ 794 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 795 return; 796 } 797 798 putnext(connp->conn_rq, mp); 799 putnext(connp->conn_rq, mp1); 800 } 801 } 802 803 /* ARGSUSED */ 804 static int 805 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused) 806 { 807 conn_t *connp; 808 809 if (flags & SO_FALLBACK) { 810 /* 811 * stream is being closed while in fallback 812 * simply free the resources that were allocated 813 */ 814 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 815 qprocsoff(q); 816 goto done; 817 } 818 819 connp = Q_TO_CONN(q); 820 udp_do_close(connp); 821 done: 822 q->q_ptr = WR(q)->q_ptr = NULL; 823 return (0); 824 } 825 826 static void 827 udp_close_free(conn_t *connp) 828 { 829 udp_t *udp = connp->conn_udp; 830 831 /* If there are any options associated with the stream, free them. */ 832 if (udp->udp_recv_ipp.ipp_fields != 0) 833 ip_pkt_free(&udp->udp_recv_ipp); 834 835 /* 836 * Clear any fields which the kmem_cache constructor clears. 837 * Only udp_connp needs to be preserved. 838 * TBD: We should make this more efficient to avoid clearing 839 * everything. 840 */ 841 ASSERT(udp->udp_connp == connp); 842 bzero(udp, sizeof (udp_t)); 843 udp->udp_connp = connp; 844 } 845 846 static int 847 udp_do_disconnect(conn_t *connp) 848 { 849 udp_t *udp; 850 udp_fanout_t *udpf; 851 udp_stack_t *us; 852 int error; 853 854 udp = connp->conn_udp; 855 us = udp->udp_us; 856 mutex_enter(&connp->conn_lock); 857 if (udp->udp_state != TS_DATA_XFER) { 858 mutex_exit(&connp->conn_lock); 859 return (-TOUTSTATE); 860 } 861 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 862 us->us_bind_fanout_size)]; 863 mutex_enter(&udpf->uf_lock); 864 if (connp->conn_mcbc_bind) 865 connp->conn_saddr_v6 = ipv6_all_zeros; 866 else 867 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 868 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 869 connp->conn_faddr_v6 = ipv6_all_zeros; 870 connp->conn_fport = 0; 871 udp->udp_state = TS_IDLE; 872 mutex_exit(&udpf->uf_lock); 873 874 /* Remove any remnants of mapped address binding */ 875 if (connp->conn_family == AF_INET6) 876 connp->conn_ipversion = IPV6_VERSION; 877 878 connp->conn_v6lastdst = ipv6_all_zeros; 879 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 880 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 881 mutex_exit(&connp->conn_lock); 882 if (error != 0) 883 return (error); 884 885 /* 886 * Tell IP to remove the full binding and revert 887 * to the local address binding. 888 */ 889 return (ip_laddr_fanout_insert(connp)); 890 } 891 892 static void 893 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 894 { 895 conn_t *connp = Q_TO_CONN(q); 896 int error; 897 898 /* 899 * Allocate the largest primitive we need to send back 900 * T_error_ack is > than T_ok_ack 901 */ 902 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 903 if (mp == NULL) { 904 /* Unable to reuse the T_DISCON_REQ for the ack. */ 905 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 906 return; 907 } 908 909 error = udp_do_disconnect(connp); 910 911 if (error != 0) { 912 if (error < 0) { 913 udp_err_ack(q, mp, -error, 0); 914 } else { 915 udp_err_ack(q, mp, TSYSERR, error); 916 } 917 } else { 918 mp = mi_tpi_ok_ack_alloc(mp); 919 ASSERT(mp != NULL); 920 qreply(q, mp); 921 } 922 } 923 924 int 925 udp_disconnect(conn_t *connp) 926 { 927 int error; 928 929 connp->conn_dgram_errind = B_FALSE; 930 error = udp_do_disconnect(connp); 931 if (error < 0) 932 error = proto_tlitosyserr(-error); 933 934 return (error); 935 } 936 937 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 938 static void 939 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 940 { 941 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 942 qreply(q, mp); 943 } 944 945 /* Shorthand to generate and send TPI error acks to our client */ 946 static void 947 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 948 t_scalar_t t_error, int sys_error) 949 { 950 struct T_error_ack *teackp; 951 952 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 953 M_PCPROTO, T_ERROR_ACK)) != NULL) { 954 teackp = (struct T_error_ack *)mp->b_rptr; 955 teackp->ERROR_prim = primitive; 956 teackp->TLI_error = t_error; 957 teackp->UNIX_error = sys_error; 958 qreply(q, mp); 959 } 960 } 961 962 /* At minimum we need 4 bytes of UDP header */ 963 #define ICMP_MIN_UDP_HDR 4 964 965 /* 966 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 967 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 968 * Assumes that IP has pulled up everything up to and including the ICMP header. 969 */ 970 /* ARGSUSED2 */ 971 static void 972 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 973 { 974 conn_t *connp = (conn_t *)arg1; 975 icmph_t *icmph; 976 ipha_t *ipha; 977 int iph_hdr_length; 978 udpha_t *udpha; 979 sin_t sin; 980 sin6_t sin6; 981 mblk_t *mp1; 982 int error = 0; 983 udp_t *udp = connp->conn_udp; 984 985 ipha = (ipha_t *)mp->b_rptr; 986 987 ASSERT(OK_32PTR(mp->b_rptr)); 988 989 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 990 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 991 udp_icmp_error_ipv6(connp, mp, ira); 992 return; 993 } 994 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 995 996 /* Skip past the outer IP and ICMP headers */ 997 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 998 iph_hdr_length = ira->ira_ip_hdr_length; 999 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1000 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1001 1002 /* Skip past the inner IP and find the ULP header */ 1003 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1004 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1005 1006 switch (icmph->icmph_type) { 1007 case ICMP_DEST_UNREACHABLE: 1008 switch (icmph->icmph_code) { 1009 case ICMP_FRAGMENTATION_NEEDED: { 1010 ipha_t *ipha; 1011 ip_xmit_attr_t *ixa; 1012 /* 1013 * IP has already adjusted the path MTU. 1014 * But we need to adjust DF for IPv4. 1015 */ 1016 if (connp->conn_ipversion != IPV4_VERSION) 1017 break; 1018 1019 ixa = conn_get_ixa(connp, B_FALSE); 1020 if (ixa == NULL || ixa->ixa_ire == NULL) { 1021 /* 1022 * Some other thread holds conn_ixa. We will 1023 * redo this on the next ICMP too big. 1024 */ 1025 if (ixa != NULL) 1026 ixa_refrele(ixa); 1027 break; 1028 } 1029 (void) ip_get_pmtu(ixa); 1030 1031 mutex_enter(&connp->conn_lock); 1032 ipha = (ipha_t *)connp->conn_ht_iphc; 1033 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1034 ipha->ipha_fragment_offset_and_flags |= 1035 IPH_DF_HTONS; 1036 } else { 1037 ipha->ipha_fragment_offset_and_flags &= 1038 ~IPH_DF_HTONS; 1039 } 1040 mutex_exit(&connp->conn_lock); 1041 ixa_refrele(ixa); 1042 break; 1043 } 1044 case ICMP_PORT_UNREACHABLE: 1045 case ICMP_PROTOCOL_UNREACHABLE: 1046 error = ECONNREFUSED; 1047 break; 1048 default: 1049 /* Transient errors */ 1050 break; 1051 } 1052 break; 1053 default: 1054 /* Transient errors */ 1055 break; 1056 } 1057 if (error == 0) { 1058 freemsg(mp); 1059 return; 1060 } 1061 1062 /* 1063 * Deliver T_UDERROR_IND when the application has asked for it. 1064 * The socket layer enables this automatically when connected. 1065 */ 1066 if (!connp->conn_dgram_errind) { 1067 freemsg(mp); 1068 return; 1069 } 1070 1071 switch (connp->conn_family) { 1072 case AF_INET: 1073 sin = sin_null; 1074 sin.sin_family = AF_INET; 1075 sin.sin_addr.s_addr = ipha->ipha_dst; 1076 sin.sin_port = udpha->uha_dst_port; 1077 if (IPCL_IS_NONSTR(connp)) { 1078 mutex_enter(&connp->conn_lock); 1079 if (udp->udp_state == TS_DATA_XFER) { 1080 if (sin.sin_port == connp->conn_fport && 1081 sin.sin_addr.s_addr == 1082 connp->conn_faddr_v4) { 1083 mutex_exit(&connp->conn_lock); 1084 (*connp->conn_upcalls->su_set_error) 1085 (connp->conn_upper_handle, error); 1086 goto done; 1087 } 1088 } else { 1089 udp->udp_delayed_error = error; 1090 *((sin_t *)&udp->udp_delayed_addr) = sin; 1091 } 1092 mutex_exit(&connp->conn_lock); 1093 } else { 1094 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1095 NULL, 0, error); 1096 if (mp1 != NULL) 1097 putnext(connp->conn_rq, mp1); 1098 } 1099 break; 1100 case AF_INET6: 1101 sin6 = sin6_null; 1102 sin6.sin6_family = AF_INET6; 1103 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1104 sin6.sin6_port = udpha->uha_dst_port; 1105 if (IPCL_IS_NONSTR(connp)) { 1106 mutex_enter(&connp->conn_lock); 1107 if (udp->udp_state == TS_DATA_XFER) { 1108 if (sin6.sin6_port == connp->conn_fport && 1109 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1110 &connp->conn_faddr_v6)) { 1111 mutex_exit(&connp->conn_lock); 1112 (*connp->conn_upcalls->su_set_error) 1113 (connp->conn_upper_handle, error); 1114 goto done; 1115 } 1116 } else { 1117 udp->udp_delayed_error = error; 1118 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1119 } 1120 mutex_exit(&connp->conn_lock); 1121 } else { 1122 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1123 NULL, 0, error); 1124 if (mp1 != NULL) 1125 putnext(connp->conn_rq, mp1); 1126 } 1127 break; 1128 } 1129 done: 1130 freemsg(mp); 1131 } 1132 1133 /* 1134 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1135 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1136 * Assumes that IP has pulled up all the extension headers as well as the 1137 * ICMPv6 header. 1138 */ 1139 static void 1140 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1141 { 1142 icmp6_t *icmp6; 1143 ip6_t *ip6h, *outer_ip6h; 1144 uint16_t iph_hdr_length; 1145 uint8_t *nexthdrp; 1146 udpha_t *udpha; 1147 sin6_t sin6; 1148 mblk_t *mp1; 1149 int error = 0; 1150 udp_t *udp = connp->conn_udp; 1151 udp_stack_t *us = udp->udp_us; 1152 1153 outer_ip6h = (ip6_t *)mp->b_rptr; 1154 #ifdef DEBUG 1155 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1156 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1157 else 1158 iph_hdr_length = IPV6_HDR_LEN; 1159 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1160 #endif 1161 /* Skip past the outer IP and ICMP headers */ 1162 iph_hdr_length = ira->ira_ip_hdr_length; 1163 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1164 1165 /* Skip past the inner IP and find the ULP header */ 1166 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1167 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1168 freemsg(mp); 1169 return; 1170 } 1171 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1172 1173 switch (icmp6->icmp6_type) { 1174 case ICMP6_DST_UNREACH: 1175 switch (icmp6->icmp6_code) { 1176 case ICMP6_DST_UNREACH_NOPORT: 1177 error = ECONNREFUSED; 1178 break; 1179 case ICMP6_DST_UNREACH_ADMIN: 1180 case ICMP6_DST_UNREACH_NOROUTE: 1181 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1182 case ICMP6_DST_UNREACH_ADDR: 1183 /* Transient errors */ 1184 break; 1185 default: 1186 break; 1187 } 1188 break; 1189 case ICMP6_PACKET_TOO_BIG: { 1190 struct T_unitdata_ind *tudi; 1191 struct T_opthdr *toh; 1192 size_t udi_size; 1193 mblk_t *newmp; 1194 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1195 sizeof (struct ip6_mtuinfo); 1196 sin6_t *sin6; 1197 struct ip6_mtuinfo *mtuinfo; 1198 1199 /* 1200 * If the application has requested to receive path mtu 1201 * information, send up an empty message containing an 1202 * IPV6_PATHMTU ancillary data item. 1203 */ 1204 if (!connp->conn_ipv6_recvpathmtu) 1205 break; 1206 1207 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1208 opt_length; 1209 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1210 UDPS_BUMP_MIB(us, udpInErrors); 1211 break; 1212 } 1213 1214 /* 1215 * newmp->b_cont is left to NULL on purpose. This is an 1216 * empty message containing only ancillary data. 1217 */ 1218 newmp->b_datap->db_type = M_PROTO; 1219 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1220 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1221 tudi->PRIM_type = T_UNITDATA_IND; 1222 tudi->SRC_length = sizeof (sin6_t); 1223 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1224 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1225 tudi->OPT_length = opt_length; 1226 1227 sin6 = (sin6_t *)&tudi[1]; 1228 bzero(sin6, sizeof (sin6_t)); 1229 sin6->sin6_family = AF_INET6; 1230 sin6->sin6_addr = connp->conn_faddr_v6; 1231 1232 toh = (struct T_opthdr *)&sin6[1]; 1233 toh->level = IPPROTO_IPV6; 1234 toh->name = IPV6_PATHMTU; 1235 toh->len = opt_length; 1236 toh->status = 0; 1237 1238 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1239 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1240 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1241 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1242 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1243 /* 1244 * We've consumed everything we need from the original 1245 * message. Free it, then send our empty message. 1246 */ 1247 freemsg(mp); 1248 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1249 return; 1250 } 1251 case ICMP6_TIME_EXCEEDED: 1252 /* Transient errors */ 1253 break; 1254 case ICMP6_PARAM_PROB: 1255 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1256 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1257 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1258 (uchar_t *)nexthdrp) { 1259 error = ECONNREFUSED; 1260 break; 1261 } 1262 break; 1263 } 1264 if (error == 0) { 1265 freemsg(mp); 1266 return; 1267 } 1268 1269 /* 1270 * Deliver T_UDERROR_IND when the application has asked for it. 1271 * The socket layer enables this automatically when connected. 1272 */ 1273 if (!connp->conn_dgram_errind) { 1274 freemsg(mp); 1275 return; 1276 } 1277 1278 sin6 = sin6_null; 1279 sin6.sin6_family = AF_INET6; 1280 sin6.sin6_addr = ip6h->ip6_dst; 1281 sin6.sin6_port = udpha->uha_dst_port; 1282 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1283 1284 if (IPCL_IS_NONSTR(connp)) { 1285 mutex_enter(&connp->conn_lock); 1286 if (udp->udp_state == TS_DATA_XFER) { 1287 if (sin6.sin6_port == connp->conn_fport && 1288 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1289 &connp->conn_faddr_v6)) { 1290 mutex_exit(&connp->conn_lock); 1291 (*connp->conn_upcalls->su_set_error) 1292 (connp->conn_upper_handle, error); 1293 goto done; 1294 } 1295 } else { 1296 udp->udp_delayed_error = error; 1297 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1298 } 1299 mutex_exit(&connp->conn_lock); 1300 } else { 1301 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1302 NULL, 0, error); 1303 if (mp1 != NULL) 1304 putnext(connp->conn_rq, mp1); 1305 } 1306 done: 1307 freemsg(mp); 1308 } 1309 1310 /* 1311 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1312 * The local address is filled in if endpoint is bound. The remote address 1313 * is filled in if remote address has been precified ("connected endpoint") 1314 * (The concept of connected CLTS sockets is alien to published TPI 1315 * but we support it anyway). 1316 */ 1317 static void 1318 udp_addr_req(queue_t *q, mblk_t *mp) 1319 { 1320 struct sockaddr *sa; 1321 mblk_t *ackmp; 1322 struct T_addr_ack *taa; 1323 udp_t *udp = Q_TO_UDP(q); 1324 conn_t *connp = udp->udp_connp; 1325 uint_t addrlen; 1326 1327 /* Make it large enough for worst case */ 1328 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1329 2 * sizeof (sin6_t), 1); 1330 if (ackmp == NULL) { 1331 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1332 return; 1333 } 1334 taa = (struct T_addr_ack *)ackmp->b_rptr; 1335 1336 bzero(taa, sizeof (struct T_addr_ack)); 1337 ackmp->b_wptr = (uchar_t *)&taa[1]; 1338 1339 taa->PRIM_type = T_ADDR_ACK; 1340 ackmp->b_datap->db_type = M_PCPROTO; 1341 1342 if (connp->conn_family == AF_INET) 1343 addrlen = sizeof (sin_t); 1344 else 1345 addrlen = sizeof (sin6_t); 1346 1347 mutex_enter(&connp->conn_lock); 1348 /* 1349 * Note: Following code assumes 32 bit alignment of basic 1350 * data structures like sin_t and struct T_addr_ack. 1351 */ 1352 if (udp->udp_state != TS_UNBND) { 1353 /* 1354 * Fill in local address first 1355 */ 1356 taa->LOCADDR_offset = sizeof (*taa); 1357 taa->LOCADDR_length = addrlen; 1358 sa = (struct sockaddr *)&taa[1]; 1359 (void) conn_getsockname(connp, sa, &addrlen); 1360 ackmp->b_wptr += addrlen; 1361 } 1362 if (udp->udp_state == TS_DATA_XFER) { 1363 /* 1364 * connected, fill remote address too 1365 */ 1366 taa->REMADDR_length = addrlen; 1367 /* assumed 32-bit alignment */ 1368 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1369 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1370 (void) conn_getpeername(connp, sa, &addrlen); 1371 ackmp->b_wptr += addrlen; 1372 } 1373 mutex_exit(&connp->conn_lock); 1374 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1375 qreply(q, ackmp); 1376 } 1377 1378 static void 1379 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1380 { 1381 conn_t *connp = udp->udp_connp; 1382 1383 if (connp->conn_family == AF_INET) { 1384 *tap = udp_g_t_info_ack_ipv4; 1385 } else { 1386 *tap = udp_g_t_info_ack_ipv6; 1387 } 1388 tap->CURRENT_state = udp->udp_state; 1389 tap->OPT_size = udp_max_optsize; 1390 } 1391 1392 static void 1393 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1394 t_uscalar_t cap_bits1) 1395 { 1396 tcap->CAP_bits1 = 0; 1397 1398 if (cap_bits1 & TC1_INFO) { 1399 udp_copy_info(&tcap->INFO_ack, udp); 1400 tcap->CAP_bits1 |= TC1_INFO; 1401 } 1402 } 1403 1404 /* 1405 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1406 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1407 * udp_g_t_info_ack. The current state of the stream is copied from 1408 * udp_state. 1409 */ 1410 static void 1411 udp_capability_req(queue_t *q, mblk_t *mp) 1412 { 1413 t_uscalar_t cap_bits1; 1414 struct T_capability_ack *tcap; 1415 udp_t *udp = Q_TO_UDP(q); 1416 1417 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1418 1419 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1420 mp->b_datap->db_type, T_CAPABILITY_ACK); 1421 if (!mp) 1422 return; 1423 1424 tcap = (struct T_capability_ack *)mp->b_rptr; 1425 udp_do_capability_ack(udp, tcap, cap_bits1); 1426 1427 qreply(q, mp); 1428 } 1429 1430 /* 1431 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1432 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1433 * The current state of the stream is copied from udp_state. 1434 */ 1435 static void 1436 udp_info_req(queue_t *q, mblk_t *mp) 1437 { 1438 udp_t *udp = Q_TO_UDP(q); 1439 1440 /* Create a T_INFO_ACK message. */ 1441 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1442 T_INFO_ACK); 1443 if (!mp) 1444 return; 1445 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1446 qreply(q, mp); 1447 } 1448 1449 /* For /dev/udp aka AF_INET open */ 1450 static int 1451 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1452 { 1453 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1454 } 1455 1456 /* For /dev/udp6 aka AF_INET6 open */ 1457 static int 1458 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1459 { 1460 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1461 } 1462 1463 /* 1464 * This is the open routine for udp. It allocates a udp_t structure for 1465 * the stream and, on the first open of the module, creates an ND table. 1466 */ 1467 static int 1468 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1469 boolean_t isv6) 1470 { 1471 udp_t *udp; 1472 conn_t *connp; 1473 dev_t conn_dev; 1474 vmem_t *minor_arena; 1475 int err; 1476 1477 /* If the stream is already open, return immediately. */ 1478 if (q->q_ptr != NULL) 1479 return (0); 1480 1481 if (sflag == MODOPEN) 1482 return (EINVAL); 1483 1484 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1485 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1486 minor_arena = ip_minor_arena_la; 1487 } else { 1488 /* 1489 * Either minor numbers in the large arena were exhausted 1490 * or a non socket application is doing the open. 1491 * Try to allocate from the small arena. 1492 */ 1493 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1494 return (EBUSY); 1495 1496 minor_arena = ip_minor_arena_sa; 1497 } 1498 1499 if (flag & SO_FALLBACK) { 1500 /* 1501 * Non streams socket needs a stream to fallback to 1502 */ 1503 RD(q)->q_ptr = (void *)conn_dev; 1504 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1505 WR(q)->q_ptr = (void *)minor_arena; 1506 qprocson(q); 1507 return (0); 1508 } 1509 1510 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1511 if (connp == NULL) { 1512 inet_minor_free(minor_arena, conn_dev); 1513 return (err); 1514 } 1515 udp = connp->conn_udp; 1516 1517 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1518 connp->conn_dev = conn_dev; 1519 connp->conn_minor_arena = minor_arena; 1520 1521 /* 1522 * Initialize the udp_t structure for this stream. 1523 */ 1524 q->q_ptr = connp; 1525 WR(q)->q_ptr = connp; 1526 connp->conn_rq = q; 1527 connp->conn_wq = WR(q); 1528 1529 /* 1530 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1531 * need to lock anything. 1532 */ 1533 ASSERT(connp->conn_proto == IPPROTO_UDP); 1534 ASSERT(connp->conn_udp == udp); 1535 ASSERT(udp->udp_connp == connp); 1536 1537 if (flag & SO_SOCKSTR) { 1538 udp->udp_issocket = B_TRUE; 1539 } 1540 1541 WR(q)->q_hiwat = connp->conn_sndbuf; 1542 WR(q)->q_lowat = connp->conn_sndlowat; 1543 1544 qprocson(q); 1545 1546 /* Set the Stream head write offset and high watermark. */ 1547 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1548 (void) proto_set_rx_hiwat(q, connp, 1549 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1550 1551 mutex_enter(&connp->conn_lock); 1552 connp->conn_state_flags &= ~CONN_INCIPIENT; 1553 mutex_exit(&connp->conn_lock); 1554 return (0); 1555 } 1556 1557 /* 1558 * Which UDP options OK to set through T_UNITDATA_REQ... 1559 */ 1560 /* ARGSUSED */ 1561 static boolean_t 1562 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1563 { 1564 return (B_TRUE); 1565 } 1566 1567 /* 1568 * This routine gets default values of certain options whose default 1569 * values are maintained by protcol specific code 1570 */ 1571 int 1572 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1573 { 1574 udp_t *udp = Q_TO_UDP(q); 1575 udp_stack_t *us = udp->udp_us; 1576 int *i1 = (int *)ptr; 1577 1578 switch (level) { 1579 case IPPROTO_IP: 1580 switch (name) { 1581 case IP_MULTICAST_TTL: 1582 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1583 return (sizeof (uchar_t)); 1584 case IP_MULTICAST_LOOP: 1585 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1586 return (sizeof (uchar_t)); 1587 } 1588 break; 1589 case IPPROTO_IPV6: 1590 switch (name) { 1591 case IPV6_MULTICAST_HOPS: 1592 *i1 = IP_DEFAULT_MULTICAST_TTL; 1593 return (sizeof (int)); 1594 case IPV6_MULTICAST_LOOP: 1595 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1596 return (sizeof (int)); 1597 case IPV6_UNICAST_HOPS: 1598 *i1 = us->us_ipv6_hoplimit; 1599 return (sizeof (int)); 1600 } 1601 break; 1602 } 1603 return (-1); 1604 } 1605 1606 /* 1607 * This routine retrieves the current status of socket options. 1608 * It returns the size of the option retrieved, or -1. 1609 */ 1610 int 1611 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1612 uchar_t *ptr) 1613 { 1614 int *i1 = (int *)ptr; 1615 udp_t *udp = connp->conn_udp; 1616 int len; 1617 conn_opt_arg_t coas; 1618 int retval; 1619 1620 coas.coa_connp = connp; 1621 coas.coa_ixa = connp->conn_ixa; 1622 coas.coa_ipp = &connp->conn_xmit_ipp; 1623 coas.coa_ancillary = B_FALSE; 1624 coas.coa_changed = 0; 1625 1626 /* 1627 * We assume that the optcom framework has checked for the set 1628 * of levels and names that are supported, hence we don't worry 1629 * about rejecting based on that. 1630 * First check for UDP specific handling, then pass to common routine. 1631 */ 1632 switch (level) { 1633 case IPPROTO_IP: 1634 /* 1635 * Only allow IPv4 option processing on IPv4 sockets. 1636 */ 1637 if (connp->conn_family != AF_INET) 1638 return (-1); 1639 1640 switch (name) { 1641 case IP_OPTIONS: 1642 case T_IP_OPTIONS: 1643 mutex_enter(&connp->conn_lock); 1644 if (!(udp->udp_recv_ipp.ipp_fields & 1645 IPPF_IPV4_OPTIONS)) { 1646 mutex_exit(&connp->conn_lock); 1647 return (0); 1648 } 1649 1650 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1651 ASSERT(len != 0); 1652 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1653 mutex_exit(&connp->conn_lock); 1654 return (len); 1655 } 1656 break; 1657 case IPPROTO_UDP: 1658 switch (name) { 1659 case UDP_NAT_T_ENDPOINT: 1660 mutex_enter(&connp->conn_lock); 1661 *i1 = udp->udp_nat_t_endpoint; 1662 mutex_exit(&connp->conn_lock); 1663 return (sizeof (int)); 1664 case UDP_RCVHDR: 1665 mutex_enter(&connp->conn_lock); 1666 *i1 = udp->udp_rcvhdr ? 1 : 0; 1667 mutex_exit(&connp->conn_lock); 1668 return (sizeof (int)); 1669 case UDP_SRCPORT_HASH: 1670 mutex_enter(&connp->conn_lock); 1671 *i1 = udp->udp_vxlanhash; 1672 mutex_exit(&connp->conn_lock); 1673 return (sizeof (int)); 1674 } 1675 } 1676 mutex_enter(&connp->conn_lock); 1677 retval = conn_opt_get(&coas, level, name, ptr); 1678 mutex_exit(&connp->conn_lock); 1679 return (retval); 1680 } 1681 1682 /* 1683 * This routine retrieves the current status of socket options. 1684 * It returns the size of the option retrieved, or -1. 1685 */ 1686 int 1687 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1688 { 1689 conn_t *connp = Q_TO_CONN(q); 1690 int err; 1691 1692 err = udp_opt_get(connp, level, name, ptr); 1693 return (err); 1694 } 1695 1696 /* 1697 * This routine sets socket options. 1698 */ 1699 int 1700 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1701 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1702 { 1703 conn_t *connp = coa->coa_connp; 1704 ip_xmit_attr_t *ixa = coa->coa_ixa; 1705 udp_t *udp = connp->conn_udp; 1706 udp_stack_t *us = udp->udp_us; 1707 int *i1 = (int *)invalp; 1708 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1709 int error; 1710 1711 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1712 /* 1713 * First do UDP specific sanity checks and handle UDP specific 1714 * options. Note that some IPPROTO_UDP options are handled 1715 * by conn_opt_set. 1716 */ 1717 switch (level) { 1718 case SOL_SOCKET: 1719 switch (name) { 1720 case SO_SNDBUF: 1721 if (*i1 > us->us_max_buf) { 1722 return (ENOBUFS); 1723 } 1724 break; 1725 case SO_RCVBUF: 1726 if (*i1 > us->us_max_buf) { 1727 return (ENOBUFS); 1728 } 1729 break; 1730 1731 case SCM_UCRED: { 1732 struct ucred_s *ucr; 1733 cred_t *newcr; 1734 ts_label_t *tsl; 1735 1736 /* 1737 * Only sockets that have proper privileges and are 1738 * bound to MLPs will have any other value here, so 1739 * this implicitly tests for privilege to set label. 1740 */ 1741 if (connp->conn_mlp_type == mlptSingle) 1742 break; 1743 1744 ucr = (struct ucred_s *)invalp; 1745 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1746 ucr->uc_labeloff < sizeof (*ucr) || 1747 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1748 return (EINVAL); 1749 if (!checkonly) { 1750 /* 1751 * Set ixa_tsl to the new label. 1752 * We assume that crgetzoneid doesn't change 1753 * as part of the SCM_UCRED. 1754 */ 1755 ASSERT(cr != NULL); 1756 if ((tsl = crgetlabel(cr)) == NULL) 1757 return (EINVAL); 1758 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1759 tsl->tsl_doi, KM_NOSLEEP); 1760 if (newcr == NULL) 1761 return (ENOSR); 1762 ASSERT(newcr->cr_label != NULL); 1763 /* 1764 * Move the hold on the cr_label to ixa_tsl by 1765 * setting cr_label to NULL. Then release newcr. 1766 */ 1767 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1768 ixa->ixa_flags |= IXAF_UCRED_TSL; 1769 newcr->cr_label = NULL; 1770 crfree(newcr); 1771 coa->coa_changed |= COA_HEADER_CHANGED; 1772 coa->coa_changed |= COA_WROFF_CHANGED; 1773 } 1774 /* Fully handled this option. */ 1775 return (0); 1776 } 1777 } 1778 break; 1779 case IPPROTO_UDP: 1780 switch (name) { 1781 case UDP_NAT_T_ENDPOINT: 1782 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1783 return (error); 1784 } 1785 1786 /* 1787 * Use conn_family instead so we can avoid ambiguitites 1788 * with AF_INET6 sockets that may switch from IPv4 1789 * to IPv6. 1790 */ 1791 if (connp->conn_family != AF_INET) { 1792 return (EAFNOSUPPORT); 1793 } 1794 1795 if (!checkonly) { 1796 mutex_enter(&connp->conn_lock); 1797 udp->udp_nat_t_endpoint = onoff; 1798 mutex_exit(&connp->conn_lock); 1799 coa->coa_changed |= COA_HEADER_CHANGED; 1800 coa->coa_changed |= COA_WROFF_CHANGED; 1801 } 1802 /* Fully handled this option. */ 1803 return (0); 1804 case UDP_RCVHDR: 1805 mutex_enter(&connp->conn_lock); 1806 udp->udp_rcvhdr = onoff; 1807 mutex_exit(&connp->conn_lock); 1808 return (0); 1809 case UDP_SRCPORT_HASH: 1810 /* 1811 * This should have already been verified, but double 1812 * check. 1813 */ 1814 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1815 return (error); 1816 } 1817 1818 /* First see if the val is something we understand */ 1819 if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN) 1820 return (EINVAL); 1821 1822 if (!checkonly) { 1823 mutex_enter(&connp->conn_lock); 1824 udp->udp_vxlanhash = *i1; 1825 mutex_exit(&connp->conn_lock); 1826 } 1827 /* Fully handled this option. */ 1828 return (0); 1829 } 1830 break; 1831 } 1832 error = conn_opt_set(coa, level, name, inlen, invalp, 1833 checkonly, cr); 1834 return (error); 1835 } 1836 1837 /* 1838 * This routine sets socket options. 1839 */ 1840 int 1841 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1842 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1843 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1844 { 1845 udp_t *udp = connp->conn_udp; 1846 int err; 1847 conn_opt_arg_t coas, *coa; 1848 boolean_t checkonly; 1849 udp_stack_t *us = udp->udp_us; 1850 1851 switch (optset_context) { 1852 case SETFN_OPTCOM_CHECKONLY: 1853 checkonly = B_TRUE; 1854 /* 1855 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1856 * inlen != 0 implies value supplied and 1857 * we have to "pretend" to set it. 1858 * inlen == 0 implies that there is no 1859 * value part in T_CHECK request and just validation 1860 * done elsewhere should be enough, we just return here. 1861 */ 1862 if (inlen == 0) { 1863 *outlenp = 0; 1864 return (0); 1865 } 1866 break; 1867 case SETFN_OPTCOM_NEGOTIATE: 1868 checkonly = B_FALSE; 1869 break; 1870 case SETFN_UD_NEGOTIATE: 1871 case SETFN_CONN_NEGOTIATE: 1872 checkonly = B_FALSE; 1873 /* 1874 * Negotiating local and "association-related" options 1875 * through T_UNITDATA_REQ. 1876 * 1877 * Following routine can filter out ones we do not 1878 * want to be "set" this way. 1879 */ 1880 if (!udp_opt_allow_udr_set(level, name)) { 1881 *outlenp = 0; 1882 return (EINVAL); 1883 } 1884 break; 1885 default: 1886 /* 1887 * We should never get here 1888 */ 1889 *outlenp = 0; 1890 return (EINVAL); 1891 } 1892 1893 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1894 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1895 1896 if (thisdg_attrs != NULL) { 1897 /* Options from T_UNITDATA_REQ */ 1898 coa = (conn_opt_arg_t *)thisdg_attrs; 1899 ASSERT(coa->coa_connp == connp); 1900 ASSERT(coa->coa_ixa != NULL); 1901 ASSERT(coa->coa_ipp != NULL); 1902 ASSERT(coa->coa_ancillary); 1903 } else { 1904 coa = &coas; 1905 coas.coa_connp = connp; 1906 /* Get a reference on conn_ixa to prevent concurrent mods */ 1907 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1908 if (coas.coa_ixa == NULL) { 1909 *outlenp = 0; 1910 return (ENOMEM); 1911 } 1912 coas.coa_ipp = &connp->conn_xmit_ipp; 1913 coas.coa_ancillary = B_FALSE; 1914 coas.coa_changed = 0; 1915 } 1916 1917 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1918 cr, checkonly); 1919 if (err != 0) { 1920 errout: 1921 if (!coa->coa_ancillary) 1922 ixa_refrele(coa->coa_ixa); 1923 *outlenp = 0; 1924 return (err); 1925 } 1926 /* Handle DHCPINIT here outside of lock */ 1927 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1928 uint_t ifindex; 1929 ill_t *ill; 1930 1931 ifindex = *(uint_t *)invalp; 1932 if (ifindex == 0) { 1933 ill = NULL; 1934 } else { 1935 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1936 coa->coa_ixa->ixa_ipst); 1937 if (ill == NULL) { 1938 err = ENXIO; 1939 goto errout; 1940 } 1941 1942 mutex_enter(&ill->ill_lock); 1943 if (ill->ill_state_flags & ILL_CONDEMNED) { 1944 mutex_exit(&ill->ill_lock); 1945 ill_refrele(ill); 1946 err = ENXIO; 1947 goto errout; 1948 } 1949 if (IS_VNI(ill)) { 1950 mutex_exit(&ill->ill_lock); 1951 ill_refrele(ill); 1952 err = EINVAL; 1953 goto errout; 1954 } 1955 } 1956 mutex_enter(&connp->conn_lock); 1957 1958 if (connp->conn_dhcpinit_ill != NULL) { 1959 /* 1960 * We've locked the conn so conn_cleanup_ill() 1961 * cannot clear conn_dhcpinit_ill -- so it's 1962 * safe to access the ill. 1963 */ 1964 ill_t *oill = connp->conn_dhcpinit_ill; 1965 1966 ASSERT(oill->ill_dhcpinit != 0); 1967 atomic_dec_32(&oill->ill_dhcpinit); 1968 ill_set_inputfn(connp->conn_dhcpinit_ill); 1969 connp->conn_dhcpinit_ill = NULL; 1970 } 1971 1972 if (ill != NULL) { 1973 connp->conn_dhcpinit_ill = ill; 1974 atomic_inc_32(&ill->ill_dhcpinit); 1975 ill_set_inputfn(ill); 1976 mutex_exit(&connp->conn_lock); 1977 mutex_exit(&ill->ill_lock); 1978 ill_refrele(ill); 1979 } else { 1980 mutex_exit(&connp->conn_lock); 1981 } 1982 } 1983 1984 /* 1985 * Common case of OK return with outval same as inval. 1986 */ 1987 if (invalp != outvalp) { 1988 /* don't trust bcopy for identical src/dst */ 1989 (void) bcopy(invalp, outvalp, inlen); 1990 } 1991 *outlenp = inlen; 1992 1993 /* 1994 * If this was not ancillary data, then we rebuild the headers, 1995 * update the IRE/NCE, and IPsec as needed. 1996 * Since the label depends on the destination we go through 1997 * ip_set_destination first. 1998 */ 1999 if (coa->coa_ancillary) { 2000 return (0); 2001 } 2002 2003 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2004 in6_addr_t saddr, faddr, nexthop; 2005 in_port_t fport; 2006 2007 /* 2008 * We clear lastdst to make sure we pick up the change 2009 * next time sending. 2010 * If we are connected we re-cache the information. 2011 * We ignore errors to preserve BSD behavior. 2012 * Note that we don't redo IPsec policy lookup here 2013 * since the final destination (or source) didn't change. 2014 */ 2015 mutex_enter(&connp->conn_lock); 2016 connp->conn_v6lastdst = ipv6_all_zeros; 2017 2018 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2019 &connp->conn_faddr_v6, &nexthop); 2020 saddr = connp->conn_saddr_v6; 2021 faddr = connp->conn_faddr_v6; 2022 fport = connp->conn_fport; 2023 mutex_exit(&connp->conn_lock); 2024 2025 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2026 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2027 (void) ip_attr_connect(connp, coa->coa_ixa, 2028 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2029 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2030 } 2031 } 2032 2033 ixa_refrele(coa->coa_ixa); 2034 2035 if (coa->coa_changed & COA_HEADER_CHANGED) { 2036 /* 2037 * Rebuild the header template if we are connected. 2038 * Otherwise clear conn_v6lastdst so we rebuild the header 2039 * in the data path. 2040 */ 2041 mutex_enter(&connp->conn_lock); 2042 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2043 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2044 err = udp_build_hdr_template(connp, 2045 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2046 connp->conn_fport, connp->conn_flowinfo); 2047 if (err != 0) { 2048 mutex_exit(&connp->conn_lock); 2049 return (err); 2050 } 2051 } else { 2052 connp->conn_v6lastdst = ipv6_all_zeros; 2053 } 2054 mutex_exit(&connp->conn_lock); 2055 } 2056 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2057 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2058 connp->conn_rcvbuf); 2059 } 2060 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2061 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2062 } 2063 if (coa->coa_changed & COA_WROFF_CHANGED) { 2064 /* Increase wroff if needed */ 2065 uint_t wroff; 2066 2067 mutex_enter(&connp->conn_lock); 2068 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2069 if (udp->udp_nat_t_endpoint) 2070 wroff += sizeof (uint32_t); 2071 if (wroff > connp->conn_wroff) { 2072 connp->conn_wroff = wroff; 2073 mutex_exit(&connp->conn_lock); 2074 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2075 } else { 2076 mutex_exit(&connp->conn_lock); 2077 } 2078 } 2079 return (err); 2080 } 2081 2082 /* This routine sets socket options. */ 2083 int 2084 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2085 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2086 void *thisdg_attrs, cred_t *cr) 2087 { 2088 conn_t *connp = Q_TO_CONN(q); 2089 int error; 2090 2091 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2092 outlenp, outvalp, thisdg_attrs, cr); 2093 return (error); 2094 } 2095 2096 /* 2097 * Setup IP and UDP headers. 2098 * Returns NULL on allocation failure, in which case data_mp is freed. 2099 */ 2100 mblk_t * 2101 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2102 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2103 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2104 { 2105 mblk_t *mp; 2106 udpha_t *udpha; 2107 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2108 uint_t data_len; 2109 uint32_t cksum; 2110 udp_t *udp = connp->conn_udp; 2111 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2112 boolean_t hash_srcport = udp->udp_vxlanhash; 2113 uint_t ulp_hdr_len; 2114 uint16_t srcport; 2115 2116 data_len = msgdsize(data_mp); 2117 ulp_hdr_len = UDPH_SIZE; 2118 if (insert_spi) 2119 ulp_hdr_len += sizeof (uint32_t); 2120 2121 /* 2122 * If we have source port hashing going on, determine the hash before 2123 * we modify the mblk_t. 2124 */ 2125 if (hash_srcport == B_TRUE) { 2126 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 2127 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 2128 ntohs(connp->conn_lport)); 2129 } 2130 2131 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2132 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2133 if (mp == NULL) { 2134 ASSERT(*errorp != 0); 2135 return (NULL); 2136 } 2137 2138 data_len += ulp_hdr_len; 2139 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2140 2141 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2142 if (hash_srcport == B_TRUE) { 2143 udpha->uha_src_port = htons(srcport); 2144 } else { 2145 udpha->uha_src_port = connp->conn_lport; 2146 } 2147 udpha->uha_dst_port = dstport; 2148 udpha->uha_checksum = 0; 2149 udpha->uha_length = htons(data_len); 2150 2151 /* 2152 * If there was a routing option/header then conn_prepend_hdr 2153 * has massaged it and placed the pseudo-header checksum difference 2154 * in the cksum argument. 2155 * 2156 * Setup header length and prepare for ULP checksum done in IP. 2157 * 2158 * We make it easy for IP to include our pseudo header 2159 * by putting our length in uha_checksum. 2160 * The IP source, destination, and length have already been set by 2161 * conn_prepend_hdr. 2162 */ 2163 cksum += data_len; 2164 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2165 ASSERT(cksum < 0x10000); 2166 2167 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2168 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2169 2170 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2171 2172 /* IP does the checksum if uha_checksum is non-zero */ 2173 if (us->us_do_checksum) { 2174 if (cksum == 0) 2175 udpha->uha_checksum = 0xffff; 2176 else 2177 udpha->uha_checksum = htons(cksum); 2178 } else { 2179 udpha->uha_checksum = 0; 2180 } 2181 } else { 2182 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2183 2184 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2185 if (cksum == 0) 2186 udpha->uha_checksum = 0xffff; 2187 else 2188 udpha->uha_checksum = htons(cksum); 2189 } 2190 2191 /* Insert all-0s SPI now. */ 2192 if (insert_spi) 2193 *((uint32_t *)(udpha + 1)) = 0; 2194 2195 return (mp); 2196 } 2197 2198 static int 2199 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2200 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2201 { 2202 udpha_t *udpha; 2203 int error; 2204 2205 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2206 /* 2207 * We clear lastdst to make sure we don't use the lastdst path 2208 * next time sending since we might not have set v6dst yet. 2209 */ 2210 connp->conn_v6lastdst = ipv6_all_zeros; 2211 2212 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2213 flowinfo); 2214 if (error != 0) 2215 return (error); 2216 2217 /* 2218 * Any routing header/option has been massaged. The checksum difference 2219 * is stored in conn_sum. 2220 */ 2221 udpha = (udpha_t *)connp->conn_ht_ulp; 2222 udpha->uha_src_port = connp->conn_lport; 2223 udpha->uha_dst_port = dstport; 2224 udpha->uha_checksum = 0; 2225 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2226 return (0); 2227 } 2228 2229 static mblk_t * 2230 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2231 { 2232 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2233 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2234 /* 2235 * fallback has started but messages have not been moved yet 2236 */ 2237 if (udp->udp_fallback_queue_head == NULL) { 2238 ASSERT(udp->udp_fallback_queue_tail == NULL); 2239 udp->udp_fallback_queue_head = mp; 2240 udp->udp_fallback_queue_tail = mp; 2241 } else { 2242 ASSERT(udp->udp_fallback_queue_tail != NULL); 2243 udp->udp_fallback_queue_tail->b_next = mp; 2244 udp->udp_fallback_queue_tail = mp; 2245 } 2246 return (NULL); 2247 } else { 2248 /* 2249 * Fallback completed, let the caller putnext() the mblk. 2250 */ 2251 return (mp); 2252 } 2253 } 2254 2255 /* 2256 * Deliver data to ULP. In case we have a socket, and it's falling back to 2257 * TPI, then we'll queue the mp for later processing. 2258 */ 2259 static void 2260 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2261 { 2262 if (IPCL_IS_NONSTR(connp)) { 2263 udp_t *udp = connp->conn_udp; 2264 int error; 2265 2266 ASSERT(len == msgdsize(mp)); 2267 if ((*connp->conn_upcalls->su_recv) 2268 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2269 mutex_enter(&udp->udp_recv_lock); 2270 if (error == ENOSPC) { 2271 /* 2272 * let's confirm while holding the lock 2273 */ 2274 if ((*connp->conn_upcalls->su_recv) 2275 (connp->conn_upper_handle, NULL, 0, 0, 2276 &error, NULL) < 0) { 2277 ASSERT(error == ENOSPC); 2278 if (error == ENOSPC) { 2279 connp->conn_flow_cntrld = 2280 B_TRUE; 2281 } 2282 } 2283 mutex_exit(&udp->udp_recv_lock); 2284 } else { 2285 ASSERT(error == EOPNOTSUPP); 2286 mp = udp_queue_fallback(udp, mp); 2287 mutex_exit(&udp->udp_recv_lock); 2288 if (mp != NULL) 2289 putnext(connp->conn_rq, mp); 2290 } 2291 } 2292 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2293 } else { 2294 if (is_system_labeled()) { 2295 ASSERT(ira->ira_cred != NULL); 2296 /* 2297 * Provide for protocols above UDP such as RPC 2298 * NOPID leaves db_cpid unchanged. 2299 */ 2300 mblk_setcred(mp, ira->ira_cred, NOPID); 2301 } 2302 2303 putnext(connp->conn_rq, mp); 2304 } 2305 } 2306 2307 /* 2308 * This is the inbound data path. 2309 * IP has already pulled up the IP plus UDP headers and verified alignment 2310 * etc. 2311 */ 2312 /* ARGSUSED2 */ 2313 static void 2314 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2315 { 2316 conn_t *connp = (conn_t *)arg1; 2317 struct T_unitdata_ind *tudi; 2318 uchar_t *rptr; /* Pointer to IP header */ 2319 int hdr_length; /* Length of IP+UDP headers */ 2320 int udi_size; /* Size of T_unitdata_ind */ 2321 int pkt_len; 2322 udp_t *udp; 2323 udpha_t *udpha; 2324 ip_pkt_t ipps; 2325 ip6_t *ip6h; 2326 mblk_t *mp1; 2327 uint32_t udp_ipv4_options_len; 2328 crb_t recv_ancillary; 2329 udp_stack_t *us; 2330 2331 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2332 2333 udp = connp->conn_udp; 2334 us = udp->udp_us; 2335 rptr = mp->b_rptr; 2336 2337 ASSERT(DB_TYPE(mp) == M_DATA); 2338 ASSERT(OK_32PTR(rptr)); 2339 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2340 pkt_len = ira->ira_pktlen; 2341 2342 /* 2343 * Get a snapshot of these and allow other threads to change 2344 * them after that. We need the same recv_ancillary when determining 2345 * the size as when adding the ancillary data items. 2346 */ 2347 mutex_enter(&connp->conn_lock); 2348 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2349 recv_ancillary = connp->conn_recv_ancillary; 2350 mutex_exit(&connp->conn_lock); 2351 2352 hdr_length = ira->ira_ip_hdr_length; 2353 2354 /* 2355 * IP inspected the UDP header thus all of it must be in the mblk. 2356 * UDP length check is performed for IPv6 packets and IPv4 packets 2357 * to check if the size of the packet as specified 2358 * by the UDP header is the same as the length derived from the IP 2359 * header. 2360 */ 2361 udpha = (udpha_t *)(rptr + hdr_length); 2362 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2363 goto tossit; 2364 2365 hdr_length += UDPH_SIZE; 2366 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2367 2368 /* Initialize regardless of IP version */ 2369 ipps.ipp_fields = 0; 2370 2371 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2372 udp_ipv4_options_len > 0) && 2373 connp->conn_family == AF_INET) { 2374 int err; 2375 2376 /* 2377 * Record/update udp_recv_ipp with the lock 2378 * held. Not needed for AF_INET6 sockets 2379 * since they don't support a getsockopt of IP_OPTIONS. 2380 */ 2381 mutex_enter(&connp->conn_lock); 2382 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2383 B_TRUE); 2384 if (err != 0) { 2385 /* Allocation failed. Drop packet */ 2386 mutex_exit(&connp->conn_lock); 2387 freemsg(mp); 2388 UDPS_BUMP_MIB(us, udpInErrors); 2389 return; 2390 } 2391 mutex_exit(&connp->conn_lock); 2392 } 2393 2394 if (recv_ancillary.crb_all != 0) { 2395 /* 2396 * Record packet information in the ip_pkt_t 2397 */ 2398 if (ira->ira_flags & IRAF_IS_IPV4) { 2399 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2400 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2401 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2402 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2403 2404 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2405 } else { 2406 uint8_t nexthdrp; 2407 2408 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2409 /* 2410 * IPv6 packets can only be received by applications 2411 * that are prepared to receive IPv6 addresses. 2412 * The IP fanout must ensure this. 2413 */ 2414 ASSERT(connp->conn_family == AF_INET6); 2415 2416 ip6h = (ip6_t *)rptr; 2417 2418 /* We don't care about the length, but need the ipp */ 2419 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2420 &nexthdrp); 2421 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2422 /* Restore */ 2423 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2424 ASSERT(nexthdrp == IPPROTO_UDP); 2425 } 2426 } 2427 2428 /* 2429 * This is the inbound data path. Packets are passed upstream as 2430 * T_UNITDATA_IND messages. 2431 */ 2432 if (connp->conn_family == AF_INET) { 2433 sin_t *sin; 2434 2435 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2436 2437 /* 2438 * Normally only send up the source address. 2439 * If any ancillary data items are wanted we add those. 2440 */ 2441 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2442 if (recv_ancillary.crb_all != 0) { 2443 udi_size += conn_recvancillary_size(connp, 2444 recv_ancillary, ira, mp, &ipps); 2445 } 2446 2447 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2448 mp1 = allocb(udi_size, BPRI_MED); 2449 if (mp1 == NULL) { 2450 freemsg(mp); 2451 UDPS_BUMP_MIB(us, udpInErrors); 2452 return; 2453 } 2454 mp1->b_cont = mp; 2455 mp1->b_datap->db_type = M_PROTO; 2456 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2457 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2458 tudi->PRIM_type = T_UNITDATA_IND; 2459 tudi->SRC_length = sizeof (sin_t); 2460 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2461 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2462 sizeof (sin_t); 2463 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2464 tudi->OPT_length = udi_size; 2465 sin = (sin_t *)&tudi[1]; 2466 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2467 sin->sin_port = udpha->uha_src_port; 2468 sin->sin_family = connp->conn_family; 2469 *(uint32_t *)&sin->sin_zero[0] = 0; 2470 *(uint32_t *)&sin->sin_zero[4] = 0; 2471 2472 /* 2473 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA, 2474 * IP_RECVTTL or IP_RECVTOS has been set. 2475 */ 2476 if (udi_size != 0) { 2477 conn_recvancillary_add(connp, recv_ancillary, ira, 2478 &ipps, (uchar_t *)&sin[1], udi_size); 2479 } 2480 } else { 2481 sin6_t *sin6; 2482 2483 /* 2484 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2485 * 2486 * Normally we only send up the address. If receiving of any 2487 * optional receive side information is enabled, we also send 2488 * that up as options. 2489 */ 2490 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2491 2492 if (recv_ancillary.crb_all != 0) { 2493 udi_size += conn_recvancillary_size(connp, 2494 recv_ancillary, ira, mp, &ipps); 2495 } 2496 2497 mp1 = allocb(udi_size, BPRI_MED); 2498 if (mp1 == NULL) { 2499 freemsg(mp); 2500 UDPS_BUMP_MIB(us, udpInErrors); 2501 return; 2502 } 2503 mp1->b_cont = mp; 2504 mp1->b_datap->db_type = M_PROTO; 2505 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2506 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2507 tudi->PRIM_type = T_UNITDATA_IND; 2508 tudi->SRC_length = sizeof (sin6_t); 2509 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2510 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2511 sizeof (sin6_t); 2512 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2513 tudi->OPT_length = udi_size; 2514 sin6 = (sin6_t *)&tudi[1]; 2515 if (ira->ira_flags & IRAF_IS_IPV4) { 2516 in6_addr_t v6dst; 2517 2518 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2519 &sin6->sin6_addr); 2520 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2521 &v6dst); 2522 sin6->sin6_flowinfo = 0; 2523 sin6->sin6_scope_id = 0; 2524 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2525 IPCL_ZONEID(connp), us->us_netstack); 2526 } else { 2527 ip6h = (ip6_t *)rptr; 2528 2529 sin6->sin6_addr = ip6h->ip6_src; 2530 /* No sin6_flowinfo per API */ 2531 sin6->sin6_flowinfo = 0; 2532 /* For link-scope pass up scope id */ 2533 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2534 sin6->sin6_scope_id = ira->ira_ruifindex; 2535 else 2536 sin6->sin6_scope_id = 0; 2537 sin6->__sin6_src_id = ip_srcid_find_addr( 2538 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2539 us->us_netstack); 2540 } 2541 sin6->sin6_port = udpha->uha_src_port; 2542 sin6->sin6_family = connp->conn_family; 2543 2544 if (udi_size != 0) { 2545 conn_recvancillary_add(connp, recv_ancillary, ira, 2546 &ipps, (uchar_t *)&sin6[1], udi_size); 2547 } 2548 } 2549 2550 /* 2551 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2552 * loopback traffic). 2553 */ 2554 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2555 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2556 2557 /* Walk past the headers unless IP_RECVHDR was set. */ 2558 if (!udp->udp_rcvhdr) { 2559 mp->b_rptr = rptr + hdr_length; 2560 pkt_len -= hdr_length; 2561 } 2562 2563 UDPS_BUMP_MIB(us, udpHCInDatagrams); 2564 udp_ulp_recv(connp, mp1, pkt_len, ira); 2565 return; 2566 2567 tossit: 2568 freemsg(mp); 2569 UDPS_BUMP_MIB(us, udpInErrors); 2570 } 2571 2572 /* 2573 * This routine creates a T_UDERROR_IND message and passes it upstream. 2574 * The address and options are copied from the T_UNITDATA_REQ message 2575 * passed in mp. This message is freed. 2576 */ 2577 static void 2578 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2579 { 2580 struct T_unitdata_req *tudr; 2581 mblk_t *mp1; 2582 uchar_t *destaddr; 2583 t_scalar_t destlen; 2584 uchar_t *optaddr; 2585 t_scalar_t optlen; 2586 2587 if ((mp->b_wptr < mp->b_rptr) || 2588 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2589 goto done; 2590 } 2591 tudr = (struct T_unitdata_req *)mp->b_rptr; 2592 destaddr = mp->b_rptr + tudr->DEST_offset; 2593 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2594 destaddr + tudr->DEST_length < mp->b_rptr || 2595 destaddr + tudr->DEST_length > mp->b_wptr) { 2596 goto done; 2597 } 2598 optaddr = mp->b_rptr + tudr->OPT_offset; 2599 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2600 optaddr + tudr->OPT_length < mp->b_rptr || 2601 optaddr + tudr->OPT_length > mp->b_wptr) { 2602 goto done; 2603 } 2604 destlen = tudr->DEST_length; 2605 optlen = tudr->OPT_length; 2606 2607 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2608 (char *)optaddr, optlen, err); 2609 if (mp1 != NULL) 2610 qreply(q, mp1); 2611 2612 done: 2613 freemsg(mp); 2614 } 2615 2616 /* 2617 * This routine removes a port number association from a stream. It 2618 * is called by udp_wput to handle T_UNBIND_REQ messages. 2619 */ 2620 static void 2621 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2622 { 2623 conn_t *connp = Q_TO_CONN(q); 2624 int error; 2625 2626 error = udp_do_unbind(connp); 2627 if (error) { 2628 if (error < 0) 2629 udp_err_ack(q, mp, -error, 0); 2630 else 2631 udp_err_ack(q, mp, TSYSERR, error); 2632 return; 2633 } 2634 2635 mp = mi_tpi_ok_ack_alloc(mp); 2636 ASSERT(mp != NULL); 2637 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2638 qreply(q, mp); 2639 } 2640 2641 /* 2642 * Don't let port fall into the privileged range. 2643 * Since the extra privileged ports can be arbitrary we also 2644 * ensure that we exclude those from consideration. 2645 * us->us_epriv_ports is not sorted thus we loop over it until 2646 * there are no changes. 2647 */ 2648 static in_port_t 2649 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2650 { 2651 int i, bump; 2652 in_port_t nextport; 2653 boolean_t restart = B_FALSE; 2654 udp_stack_t *us = udp->udp_us; 2655 2656 if (random && udp_random_anon_port != 0) { 2657 (void) random_get_pseudo_bytes((uint8_t *)&port, 2658 sizeof (in_port_t)); 2659 /* 2660 * Unless changed by a sys admin, the smallest anon port 2661 * is 32768 and the largest anon port is 65535. It is 2662 * very likely (50%) for the random port to be smaller 2663 * than the smallest anon port. When that happens, 2664 * add port % (anon port range) to the smallest anon 2665 * port to get the random port. It should fall into the 2666 * valid anon port range. 2667 */ 2668 if ((port < us->us_smallest_anon_port) || 2669 (port > us->us_largest_anon_port)) { 2670 if (us->us_smallest_anon_port == 2671 us->us_largest_anon_port) { 2672 bump = 0; 2673 } else { 2674 bump = port % (us->us_largest_anon_port - 2675 us->us_smallest_anon_port); 2676 } 2677 2678 port = us->us_smallest_anon_port + bump; 2679 } 2680 } 2681 2682 retry: 2683 if (port < us->us_smallest_anon_port) 2684 port = us->us_smallest_anon_port; 2685 2686 if (port > us->us_largest_anon_port) { 2687 port = us->us_smallest_anon_port; 2688 if (restart) 2689 return (0); 2690 restart = B_TRUE; 2691 } 2692 2693 if (port < us->us_smallest_nonpriv_port) 2694 port = us->us_smallest_nonpriv_port; 2695 2696 for (i = 0; i < us->us_num_epriv_ports; i++) { 2697 if (port == us->us_epriv_ports[i]) { 2698 port++; 2699 /* 2700 * Make sure that the port is in the 2701 * valid range. 2702 */ 2703 goto retry; 2704 } 2705 } 2706 2707 if (is_system_labeled() && 2708 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2709 port, IPPROTO_UDP, B_TRUE)) != 0) { 2710 port = nextport; 2711 goto retry; 2712 } 2713 2714 return (port); 2715 } 2716 2717 /* 2718 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2719 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2720 * the TPI options, otherwise we take them from msg_control. 2721 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2722 * Always consumes mp; never consumes tudr_mp. 2723 */ 2724 static int 2725 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2726 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2727 { 2728 udp_t *udp = connp->conn_udp; 2729 udp_stack_t *us = udp->udp_us; 2730 int error; 2731 ip_xmit_attr_t *ixa; 2732 ip_pkt_t *ipp; 2733 in6_addr_t v6src; 2734 in6_addr_t v6dst; 2735 in6_addr_t v6nexthop; 2736 in_port_t dstport; 2737 uint32_t flowinfo; 2738 uint_t srcid; 2739 int is_absreq_failure = 0; 2740 conn_opt_arg_t coas, *coa; 2741 2742 ASSERT(tudr_mp != NULL || msg != NULL); 2743 2744 /* 2745 * Get ixa before checking state to handle a disconnect race. 2746 * 2747 * We need an exclusive copy of conn_ixa since the ancillary data 2748 * options might modify it. That copy has no pointers hence we 2749 * need to set them up once we've parsed the ancillary data. 2750 */ 2751 ixa = conn_get_ixa_exclusive(connp); 2752 if (ixa == NULL) { 2753 UDPS_BUMP_MIB(us, udpOutErrors); 2754 freemsg(mp); 2755 return (ENOMEM); 2756 } 2757 ASSERT(cr != NULL); 2758 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2759 ixa->ixa_cred = cr; 2760 ixa->ixa_cpid = pid; 2761 if (is_system_labeled()) { 2762 /* We need to restart with a label based on the cred */ 2763 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2764 } 2765 2766 /* In case previous destination was multicast or multirt */ 2767 ip_attr_newdst(ixa); 2768 2769 /* Get a copy of conn_xmit_ipp since the options might change it */ 2770 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2771 if (ipp == NULL) { 2772 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2773 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2774 ixa->ixa_cpid = connp->conn_cpid; 2775 ixa_refrele(ixa); 2776 UDPS_BUMP_MIB(us, udpOutErrors); 2777 freemsg(mp); 2778 return (ENOMEM); 2779 } 2780 mutex_enter(&connp->conn_lock); 2781 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2782 mutex_exit(&connp->conn_lock); 2783 if (error != 0) { 2784 UDPS_BUMP_MIB(us, udpOutErrors); 2785 freemsg(mp); 2786 goto done; 2787 } 2788 2789 /* 2790 * Parse the options and update ixa and ipp as a result. 2791 * Note that ixa_tsl can be updated if SCM_UCRED. 2792 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2793 */ 2794 2795 coa = &coas; 2796 coa->coa_connp = connp; 2797 coa->coa_ixa = ixa; 2798 coa->coa_ipp = ipp; 2799 coa->coa_ancillary = B_TRUE; 2800 coa->coa_changed = 0; 2801 2802 if (msg != NULL) { 2803 error = process_auxiliary_options(connp, msg->msg_control, 2804 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2805 } else { 2806 struct T_unitdata_req *tudr; 2807 2808 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2809 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2810 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2811 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2812 coa, &is_absreq_failure); 2813 } 2814 if (error != 0) { 2815 /* 2816 * Note: No special action needed in this 2817 * module for "is_absreq_failure" 2818 */ 2819 freemsg(mp); 2820 UDPS_BUMP_MIB(us, udpOutErrors); 2821 goto done; 2822 } 2823 ASSERT(is_absreq_failure == 0); 2824 2825 mutex_enter(&connp->conn_lock); 2826 /* 2827 * If laddr is unspecified then we look at sin6_src_id. 2828 * We will give precedence to a source address set with IPV6_PKTINFO 2829 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 2830 * want ip_attr_connect to select a source (since it can fail) when 2831 * IPV6_PKTINFO is specified. 2832 * If this doesn't result in a source address then we get a source 2833 * from ip_attr_connect() below. 2834 */ 2835 v6src = connp->conn_saddr_v6; 2836 if (sin != NULL) { 2837 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 2838 dstport = sin->sin_port; 2839 flowinfo = 0; 2840 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2841 ixa->ixa_flags |= IXAF_IS_IPV4; 2842 } else if (sin6 != NULL) { 2843 boolean_t v4mapped; 2844 2845 v6dst = sin6->sin6_addr; 2846 dstport = sin6->sin6_port; 2847 flowinfo = sin6->sin6_flowinfo; 2848 srcid = sin6->__sin6_src_id; 2849 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 2850 ixa->ixa_scopeid = sin6->sin6_scope_id; 2851 ixa->ixa_flags |= IXAF_SCOPEID_SET; 2852 } else { 2853 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2854 } 2855 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 2856 if (v4mapped) 2857 ixa->ixa_flags |= IXAF_IS_IPV4; 2858 else 2859 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2860 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 2861 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 2862 v4mapped, connp->conn_netstack)) { 2863 /* Mismatch - v4mapped/v6 specified by srcid. */ 2864 mutex_exit(&connp->conn_lock); 2865 error = EADDRNOTAVAIL; 2866 goto failed; /* Does freemsg() and mib. */ 2867 } 2868 } 2869 } else { 2870 /* Connected case */ 2871 v6dst = connp->conn_faddr_v6; 2872 dstport = connp->conn_fport; 2873 flowinfo = connp->conn_flowinfo; 2874 } 2875 mutex_exit(&connp->conn_lock); 2876 2877 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 2878 if (ipp->ipp_fields & IPPF_ADDR) { 2879 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2880 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2881 v6src = ipp->ipp_addr; 2882 } else { 2883 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2884 v6src = ipp->ipp_addr; 2885 } 2886 } 2887 2888 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 2889 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 2890 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 2891 2892 switch (error) { 2893 case 0: 2894 break; 2895 case EADDRNOTAVAIL: 2896 /* 2897 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2898 * Don't have the application see that errno 2899 */ 2900 error = ENETUNREACH; 2901 goto failed; 2902 case ENETDOWN: 2903 /* 2904 * Have !ipif_addr_ready address; drop packet silently 2905 * until we can get applications to not send until we 2906 * are ready. 2907 */ 2908 error = 0; 2909 goto failed; 2910 case EHOSTUNREACH: 2911 case ENETUNREACH: 2912 if (ixa->ixa_ire != NULL) { 2913 /* 2914 * Let conn_ip_output/ire_send_noroute return 2915 * the error and send any local ICMP error. 2916 */ 2917 error = 0; 2918 break; 2919 } 2920 /* FALLTHRU */ 2921 default: 2922 failed: 2923 freemsg(mp); 2924 UDPS_BUMP_MIB(us, udpOutErrors); 2925 goto done; 2926 } 2927 2928 /* 2929 * We might be going to a different destination than last time, 2930 * thus check that TX allows the communication and compute any 2931 * needed label. 2932 * 2933 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 2934 * don't have to worry about concurrent threads. 2935 */ 2936 if (is_system_labeled()) { 2937 /* Using UDP MLP requires SCM_UCRED from user */ 2938 if (connp->conn_mlp_type != mlptSingle && 2939 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 2940 UDPS_BUMP_MIB(us, udpOutErrors); 2941 error = ECONNREFUSED; 2942 freemsg(mp); 2943 goto done; 2944 } 2945 /* 2946 * Check whether Trusted Solaris policy allows communication 2947 * with this host, and pretend that the destination is 2948 * unreachable if not. 2949 * Compute any needed label and place it in ipp_label_v4/v6. 2950 * 2951 * Later conn_build_hdr_template/conn_prepend_hdr takes 2952 * ipp_label_v4/v6 to form the packet. 2953 * 2954 * Tsol note: We have ipp structure local to this thread so 2955 * no locking is needed. 2956 */ 2957 error = conn_update_label(connp, ixa, &v6dst, ipp); 2958 if (error != 0) { 2959 freemsg(mp); 2960 UDPS_BUMP_MIB(us, udpOutErrors); 2961 goto done; 2962 } 2963 } 2964 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 2965 flowinfo, mp, &error); 2966 if (mp == NULL) { 2967 ASSERT(error != 0); 2968 UDPS_BUMP_MIB(us, udpOutErrors); 2969 goto done; 2970 } 2971 if (ixa->ixa_pktlen > IP_MAXPACKET) { 2972 error = EMSGSIZE; 2973 UDPS_BUMP_MIB(us, udpOutErrors); 2974 freemsg(mp); 2975 goto done; 2976 } 2977 /* We're done. Pass the packet to ip. */ 2978 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2979 2980 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2981 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2982 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2983 2984 error = conn_ip_output(mp, ixa); 2985 /* No udpOutErrors if an error since IP increases its error counter */ 2986 switch (error) { 2987 case 0: 2988 break; 2989 case EWOULDBLOCK: 2990 (void) ixa_check_drain_insert(connp, ixa); 2991 error = 0; 2992 break; 2993 case EADDRNOTAVAIL: 2994 /* 2995 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2996 * Don't have the application see that errno 2997 */ 2998 error = ENETUNREACH; 2999 /* FALLTHRU */ 3000 default: 3001 mutex_enter(&connp->conn_lock); 3002 /* 3003 * Clear the source and v6lastdst so we call ip_attr_connect 3004 * for the next packet and try to pick a better source. 3005 */ 3006 if (connp->conn_mcbc_bind) 3007 connp->conn_saddr_v6 = ipv6_all_zeros; 3008 else 3009 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3010 connp->conn_v6lastdst = ipv6_all_zeros; 3011 mutex_exit(&connp->conn_lock); 3012 break; 3013 } 3014 done: 3015 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3016 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3017 ixa->ixa_cpid = connp->conn_cpid; 3018 ixa_refrele(ixa); 3019 ip_pkt_free(ipp); 3020 kmem_free(ipp, sizeof (*ipp)); 3021 return (error); 3022 } 3023 3024 /* 3025 * Handle sending an M_DATA for a connected socket. 3026 * Handles both IPv4 and IPv6. 3027 */ 3028 static int 3029 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3030 { 3031 udp_t *udp = connp->conn_udp; 3032 udp_stack_t *us = udp->udp_us; 3033 int error; 3034 ip_xmit_attr_t *ixa; 3035 3036 /* 3037 * If no other thread is using conn_ixa this just gets a reference to 3038 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3039 */ 3040 ixa = conn_get_ixa(connp, B_FALSE); 3041 if (ixa == NULL) { 3042 UDPS_BUMP_MIB(us, udpOutErrors); 3043 freemsg(mp); 3044 return (ENOMEM); 3045 } 3046 3047 ASSERT(cr != NULL); 3048 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3049 ixa->ixa_cred = cr; 3050 ixa->ixa_cpid = pid; 3051 3052 mutex_enter(&connp->conn_lock); 3053 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3054 connp->conn_fport, connp->conn_flowinfo, &error); 3055 3056 if (mp == NULL) { 3057 ASSERT(error != 0); 3058 mutex_exit(&connp->conn_lock); 3059 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3060 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3061 ixa->ixa_cpid = connp->conn_cpid; 3062 ixa_refrele(ixa); 3063 UDPS_BUMP_MIB(us, udpOutErrors); 3064 freemsg(mp); 3065 return (error); 3066 } 3067 3068 /* 3069 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3070 * safe copy, then we need to fill in any pointers in it. 3071 */ 3072 if (ixa->ixa_ire == NULL) { 3073 in6_addr_t faddr, saddr; 3074 in6_addr_t nexthop; 3075 in_port_t fport; 3076 3077 saddr = connp->conn_saddr_v6; 3078 faddr = connp->conn_faddr_v6; 3079 fport = connp->conn_fport; 3080 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3081 mutex_exit(&connp->conn_lock); 3082 3083 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3084 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3085 IPDF_IPSEC); 3086 switch (error) { 3087 case 0: 3088 break; 3089 case EADDRNOTAVAIL: 3090 /* 3091 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3092 * Don't have the application see that errno 3093 */ 3094 error = ENETUNREACH; 3095 goto failed; 3096 case ENETDOWN: 3097 /* 3098 * Have !ipif_addr_ready address; drop packet silently 3099 * until we can get applications to not send until we 3100 * are ready. 3101 */ 3102 error = 0; 3103 goto failed; 3104 case EHOSTUNREACH: 3105 case ENETUNREACH: 3106 if (ixa->ixa_ire != NULL) { 3107 /* 3108 * Let conn_ip_output/ire_send_noroute return 3109 * the error and send any local ICMP error. 3110 */ 3111 error = 0; 3112 break; 3113 } 3114 /* FALLTHRU */ 3115 default: 3116 failed: 3117 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3118 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3119 ixa->ixa_cpid = connp->conn_cpid; 3120 ixa_refrele(ixa); 3121 freemsg(mp); 3122 UDPS_BUMP_MIB(us, udpOutErrors); 3123 return (error); 3124 } 3125 } else { 3126 /* Done with conn_t */ 3127 mutex_exit(&connp->conn_lock); 3128 } 3129 ASSERT(ixa->ixa_ire != NULL); 3130 3131 /* We're done. Pass the packet to ip. */ 3132 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3133 3134 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3135 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3136 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3137 3138 error = conn_ip_output(mp, ixa); 3139 /* No udpOutErrors if an error since IP increases its error counter */ 3140 switch (error) { 3141 case 0: 3142 break; 3143 case EWOULDBLOCK: 3144 (void) ixa_check_drain_insert(connp, ixa); 3145 error = 0; 3146 break; 3147 case EADDRNOTAVAIL: 3148 /* 3149 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3150 * Don't have the application see that errno 3151 */ 3152 error = ENETUNREACH; 3153 break; 3154 } 3155 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3156 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3157 ixa->ixa_cpid = connp->conn_cpid; 3158 ixa_refrele(ixa); 3159 return (error); 3160 } 3161 3162 /* 3163 * Handle sending an M_DATA to the last destination. 3164 * Handles both IPv4 and IPv6. 3165 * 3166 * NOTE: The caller must hold conn_lock and we drop it here. 3167 */ 3168 static int 3169 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3170 ip_xmit_attr_t *ixa) 3171 { 3172 udp_t *udp = connp->conn_udp; 3173 udp_stack_t *us = udp->udp_us; 3174 int error; 3175 3176 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3177 ASSERT(ixa != NULL); 3178 3179 ASSERT(cr != NULL); 3180 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3181 ixa->ixa_cred = cr; 3182 ixa->ixa_cpid = pid; 3183 3184 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3185 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3186 3187 if (mp == NULL) { 3188 ASSERT(error != 0); 3189 mutex_exit(&connp->conn_lock); 3190 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3191 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3192 ixa->ixa_cpid = connp->conn_cpid; 3193 ixa_refrele(ixa); 3194 UDPS_BUMP_MIB(us, udpOutErrors); 3195 freemsg(mp); 3196 return (error); 3197 } 3198 3199 /* 3200 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3201 * safe copy, then we need to fill in any pointers in it. 3202 */ 3203 if (ixa->ixa_ire == NULL) { 3204 in6_addr_t lastdst, lastsrc; 3205 in6_addr_t nexthop; 3206 in_port_t lastport; 3207 3208 lastsrc = connp->conn_v6lastsrc; 3209 lastdst = connp->conn_v6lastdst; 3210 lastport = connp->conn_lastdstport; 3211 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3212 mutex_exit(&connp->conn_lock); 3213 3214 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3215 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3216 IPDF_VERIFY_DST | IPDF_IPSEC); 3217 switch (error) { 3218 case 0: 3219 break; 3220 case EADDRNOTAVAIL: 3221 /* 3222 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3223 * Don't have the application see that errno 3224 */ 3225 error = ENETUNREACH; 3226 goto failed; 3227 case ENETDOWN: 3228 /* 3229 * Have !ipif_addr_ready address; drop packet silently 3230 * until we can get applications to not send until we 3231 * are ready. 3232 */ 3233 error = 0; 3234 goto failed; 3235 case EHOSTUNREACH: 3236 case ENETUNREACH: 3237 if (ixa->ixa_ire != NULL) { 3238 /* 3239 * Let conn_ip_output/ire_send_noroute return 3240 * the error and send any local ICMP error. 3241 */ 3242 error = 0; 3243 break; 3244 } 3245 /* FALLTHRU */ 3246 default: 3247 failed: 3248 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3249 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3250 ixa->ixa_cpid = connp->conn_cpid; 3251 ixa_refrele(ixa); 3252 freemsg(mp); 3253 UDPS_BUMP_MIB(us, udpOutErrors); 3254 return (error); 3255 } 3256 } else { 3257 /* Done with conn_t */ 3258 mutex_exit(&connp->conn_lock); 3259 } 3260 3261 /* We're done. Pass the packet to ip. */ 3262 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3263 3264 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3265 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3266 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3267 3268 error = conn_ip_output(mp, ixa); 3269 /* No udpOutErrors if an error since IP increases its error counter */ 3270 switch (error) { 3271 case 0: 3272 break; 3273 case EWOULDBLOCK: 3274 (void) ixa_check_drain_insert(connp, ixa); 3275 error = 0; 3276 break; 3277 case EADDRNOTAVAIL: 3278 /* 3279 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3280 * Don't have the application see that errno 3281 */ 3282 error = ENETUNREACH; 3283 /* FALLTHRU */ 3284 default: 3285 mutex_enter(&connp->conn_lock); 3286 /* 3287 * Clear the source and v6lastdst so we call ip_attr_connect 3288 * for the next packet and try to pick a better source. 3289 */ 3290 if (connp->conn_mcbc_bind) 3291 connp->conn_saddr_v6 = ipv6_all_zeros; 3292 else 3293 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3294 connp->conn_v6lastdst = ipv6_all_zeros; 3295 mutex_exit(&connp->conn_lock); 3296 break; 3297 } 3298 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3299 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3300 ixa->ixa_cpid = connp->conn_cpid; 3301 ixa_refrele(ixa); 3302 return (error); 3303 } 3304 3305 3306 /* 3307 * Prepend the header template and then fill in the source and 3308 * flowinfo. The caller needs to handle the destination address since 3309 * it's setting is different if rthdr or source route. 3310 * 3311 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3312 * When it returns NULL it sets errorp. 3313 */ 3314 static mblk_t * 3315 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3316 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3317 { 3318 udp_t *udp = connp->conn_udp; 3319 udp_stack_t *us = udp->udp_us; 3320 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3321 boolean_t hash_srcport = udp->udp_vxlanhash; 3322 uint_t pktlen; 3323 uint_t alloclen; 3324 uint_t copylen; 3325 uint8_t *iph; 3326 uint_t ip_hdr_length; 3327 udpha_t *udpha; 3328 uint32_t cksum; 3329 ip_pkt_t *ipp; 3330 uint16_t srcport; 3331 3332 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3333 3334 /* 3335 * If we have source port hashing going on, determine the hash before 3336 * we modify the mblk_t. 3337 */ 3338 if (hash_srcport == B_TRUE) { 3339 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 3340 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 3341 ntohs(connp->conn_lport)); 3342 } 3343 3344 /* 3345 * Copy the header template and leave space for an SPI 3346 */ 3347 copylen = connp->conn_ht_iphc_len; 3348 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3349 pktlen = alloclen + msgdsize(mp); 3350 if (pktlen > IP_MAXPACKET) { 3351 freemsg(mp); 3352 *errorp = EMSGSIZE; 3353 return (NULL); 3354 } 3355 ixa->ixa_pktlen = pktlen; 3356 3357 /* check/fix buffer config, setup pointers into it */ 3358 iph = mp->b_rptr - alloclen; 3359 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3360 mblk_t *mp1; 3361 3362 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3363 if (mp1 == NULL) { 3364 freemsg(mp); 3365 *errorp = ENOMEM; 3366 return (NULL); 3367 } 3368 mp1->b_wptr = DB_LIM(mp1); 3369 mp1->b_cont = mp; 3370 mp = mp1; 3371 iph = (mp->b_wptr - alloclen); 3372 } 3373 mp->b_rptr = iph; 3374 bcopy(connp->conn_ht_iphc, iph, copylen); 3375 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3376 3377 ixa->ixa_ip_hdr_length = ip_hdr_length; 3378 udpha = (udpha_t *)(iph + ip_hdr_length); 3379 3380 /* 3381 * Setup header length and prepare for ULP checksum done in IP. 3382 * udp_build_hdr_template has already massaged any routing header 3383 * and placed the result in conn_sum. 3384 * 3385 * We make it easy for IP to include our pseudo header 3386 * by putting our length in uha_checksum. 3387 */ 3388 cksum = pktlen - ip_hdr_length; 3389 udpha->uha_length = htons(cksum); 3390 3391 cksum += connp->conn_sum; 3392 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3393 ASSERT(cksum < 0x10000); 3394 3395 ipp = &connp->conn_xmit_ipp; 3396 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3397 ipha_t *ipha = (ipha_t *)iph; 3398 3399 ipha->ipha_length = htons((uint16_t)pktlen); 3400 3401 /* IP does the checksum if uha_checksum is non-zero */ 3402 if (us->us_do_checksum) 3403 udpha->uha_checksum = htons(cksum); 3404 3405 /* if IP_PKTINFO specified an addres it wins over bind() */ 3406 if ((ipp->ipp_fields & IPPF_ADDR) && 3407 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3408 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3409 ipha->ipha_src = ipp->ipp_addr_v4; 3410 } else { 3411 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3412 } 3413 } else { 3414 ip6_t *ip6h = (ip6_t *)iph; 3415 3416 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3417 udpha->uha_checksum = htons(cksum); 3418 3419 /* if IP_PKTINFO specified an addres it wins over bind() */ 3420 if ((ipp->ipp_fields & IPPF_ADDR) && 3421 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3422 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3423 ip6h->ip6_src = ipp->ipp_addr; 3424 } else { 3425 ip6h->ip6_src = *v6src; 3426 } 3427 ip6h->ip6_vcf = 3428 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3429 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3430 if (ipp->ipp_fields & IPPF_TCLASS) { 3431 /* Overrides the class part of flowinfo */ 3432 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3433 ipp->ipp_tclass); 3434 } 3435 } 3436 3437 /* Insert all-0s SPI now. */ 3438 if (insert_spi) 3439 *((uint32_t *)(udpha + 1)) = 0; 3440 3441 udpha->uha_dst_port = dstport; 3442 if (hash_srcport == B_TRUE) 3443 udpha->uha_src_port = htons(srcport); 3444 3445 return (mp); 3446 } 3447 3448 /* 3449 * Send a T_UDERR_IND in response to an M_DATA 3450 */ 3451 static void 3452 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3453 { 3454 struct sockaddr_storage ss; 3455 sin_t *sin; 3456 sin6_t *sin6; 3457 struct sockaddr *addr; 3458 socklen_t addrlen; 3459 mblk_t *mp1; 3460 3461 mutex_enter(&connp->conn_lock); 3462 /* Initialize addr and addrlen as if they're passed in */ 3463 if (connp->conn_family == AF_INET) { 3464 sin = (sin_t *)&ss; 3465 *sin = sin_null; 3466 sin->sin_family = AF_INET; 3467 sin->sin_port = connp->conn_fport; 3468 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3469 addr = (struct sockaddr *)sin; 3470 addrlen = sizeof (*sin); 3471 } else { 3472 sin6 = (sin6_t *)&ss; 3473 *sin6 = sin6_null; 3474 sin6->sin6_family = AF_INET6; 3475 sin6->sin6_port = connp->conn_fport; 3476 sin6->sin6_flowinfo = connp->conn_flowinfo; 3477 sin6->sin6_addr = connp->conn_faddr_v6; 3478 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3479 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3480 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3481 } else { 3482 sin6->sin6_scope_id = 0; 3483 } 3484 sin6->__sin6_src_id = 0; 3485 addr = (struct sockaddr *)sin6; 3486 addrlen = sizeof (*sin6); 3487 } 3488 mutex_exit(&connp->conn_lock); 3489 3490 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3491 if (mp1 != NULL) 3492 putnext(connp->conn_rq, mp1); 3493 } 3494 3495 /* 3496 * This routine handles all messages passed downstream. It either 3497 * consumes the message or passes it downstream; it never queues a 3498 * a message. 3499 * 3500 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3501 * is valid when we are directly beneath the stream head, and thus sockfs 3502 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3503 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3504 * connected endpoints. 3505 */ 3506 int 3507 udp_wput(queue_t *q, mblk_t *mp) 3508 { 3509 sin6_t *sin6; 3510 sin_t *sin = NULL; 3511 uint_t srcid; 3512 conn_t *connp = Q_TO_CONN(q); 3513 udp_t *udp = connp->conn_udp; 3514 int error = 0; 3515 struct sockaddr *addr = NULL; 3516 socklen_t addrlen; 3517 udp_stack_t *us = udp->udp_us; 3518 struct T_unitdata_req *tudr; 3519 mblk_t *data_mp; 3520 ushort_t ipversion; 3521 cred_t *cr; 3522 pid_t pid; 3523 3524 /* 3525 * We directly handle several cases here: T_UNITDATA_REQ message 3526 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3527 * socket. 3528 */ 3529 switch (DB_TYPE(mp)) { 3530 case M_DATA: 3531 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3532 /* Not connected; address is required */ 3533 UDPS_BUMP_MIB(us, udpOutErrors); 3534 UDP_DBGSTAT(us, udp_data_notconn); 3535 UDP_STAT(us, udp_out_err_notconn); 3536 freemsg(mp); 3537 return (0); 3538 } 3539 /* 3540 * All Solaris components should pass a db_credp 3541 * for this message, hence we ASSERT. 3542 * On production kernels we return an error to be robust against 3543 * random streams modules sitting on top of us. 3544 */ 3545 cr = msg_getcred(mp, &pid); 3546 ASSERT(cr != NULL); 3547 if (cr == NULL) { 3548 UDPS_BUMP_MIB(us, udpOutErrors); 3549 freemsg(mp); 3550 return (0); 3551 } 3552 ASSERT(udp->udp_issocket); 3553 UDP_DBGSTAT(us, udp_data_conn); 3554 error = udp_output_connected(connp, mp, cr, pid); 3555 if (error != 0) { 3556 UDP_STAT(us, udp_out_err_output); 3557 if (connp->conn_rq != NULL) 3558 udp_ud_err_connected(connp, (t_scalar_t)error); 3559 #ifdef DEBUG 3560 printf("udp_output_connected returned %d\n", error); 3561 #endif 3562 } 3563 return (0); 3564 3565 case M_PROTO: 3566 case M_PCPROTO: 3567 tudr = (struct T_unitdata_req *)mp->b_rptr; 3568 if (MBLKL(mp) < sizeof (*tudr) || 3569 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3570 udp_wput_other(q, mp); 3571 return (0); 3572 } 3573 break; 3574 3575 default: 3576 udp_wput_other(q, mp); 3577 return (0); 3578 } 3579 3580 /* Handle valid T_UNITDATA_REQ here */ 3581 data_mp = mp->b_cont; 3582 if (data_mp == NULL) { 3583 error = EPROTO; 3584 goto ud_error2; 3585 } 3586 mp->b_cont = NULL; 3587 3588 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3589 error = EADDRNOTAVAIL; 3590 goto ud_error2; 3591 } 3592 3593 /* 3594 * All Solaris components should pass a db_credp 3595 * for this TPI message, hence we should ASSERT. 3596 * However, RPC (svc_clts_ksend) does this odd thing where it 3597 * passes the options from a T_UNITDATA_IND unchanged in a 3598 * T_UNITDATA_REQ. While that is the right thing to do for 3599 * some options, SCM_UCRED being the key one, this also makes it 3600 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3601 */ 3602 cr = msg_getcred(mp, &pid); 3603 if (cr == NULL) { 3604 cr = connp->conn_cred; 3605 pid = connp->conn_cpid; 3606 } 3607 3608 /* 3609 * If a port has not been bound to the stream, fail. 3610 * This is not a problem when sockfs is directly 3611 * above us, because it will ensure that the socket 3612 * is first bound before allowing data to be sent. 3613 */ 3614 if (udp->udp_state == TS_UNBND) { 3615 error = EPROTO; 3616 goto ud_error2; 3617 } 3618 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3619 addrlen = tudr->DEST_length; 3620 3621 switch (connp->conn_family) { 3622 case AF_INET6: 3623 sin6 = (sin6_t *)addr; 3624 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3625 (sin6->sin6_family != AF_INET6)) { 3626 error = EADDRNOTAVAIL; 3627 goto ud_error2; 3628 } 3629 3630 srcid = sin6->__sin6_src_id; 3631 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3632 /* 3633 * Destination is a non-IPv4-compatible IPv6 address. 3634 * Send out an IPv6 format packet. 3635 */ 3636 3637 /* 3638 * If the local address is a mapped address return 3639 * an error. 3640 * It would be possible to send an IPv6 packet but the 3641 * response would never make it back to the application 3642 * since it is bound to a mapped address. 3643 */ 3644 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3645 error = EADDRNOTAVAIL; 3646 goto ud_error2; 3647 } 3648 3649 UDP_DBGSTAT(us, udp_out_ipv6); 3650 3651 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3652 sin6->sin6_addr = ipv6_loopback; 3653 ipversion = IPV6_VERSION; 3654 } else { 3655 if (connp->conn_ipv6_v6only) { 3656 error = EADDRNOTAVAIL; 3657 goto ud_error2; 3658 } 3659 3660 /* 3661 * If the local address is not zero or a mapped address 3662 * return an error. It would be possible to send an 3663 * IPv4 packet but the response would never make it 3664 * back to the application since it is bound to a 3665 * non-mapped address. 3666 */ 3667 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3668 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3669 error = EADDRNOTAVAIL; 3670 goto ud_error2; 3671 } 3672 UDP_DBGSTAT(us, udp_out_mapped); 3673 3674 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3675 V4_PART_OF_V6(sin6->sin6_addr) = 3676 htonl(INADDR_LOOPBACK); 3677 } 3678 ipversion = IPV4_VERSION; 3679 } 3680 3681 if (tudr->OPT_length != 0) { 3682 /* 3683 * If we are connected then the destination needs to be 3684 * the same as the connected one. 3685 */ 3686 if (udp->udp_state == TS_DATA_XFER && 3687 !conn_same_as_last_v6(connp, sin6)) { 3688 error = EISCONN; 3689 goto ud_error2; 3690 } 3691 UDP_STAT(us, udp_out_opt); 3692 error = udp_output_ancillary(connp, NULL, sin6, 3693 data_mp, mp, NULL, cr, pid); 3694 } else { 3695 ip_xmit_attr_t *ixa; 3696 3697 /* 3698 * We have to allocate an ip_xmit_attr_t before we grab 3699 * conn_lock and we need to hold conn_lock once we've 3700 * checked conn_same_as_last_v6 to handle concurrent 3701 * send* calls on a socket. 3702 */ 3703 ixa = conn_get_ixa(connp, B_FALSE); 3704 if (ixa == NULL) { 3705 error = ENOMEM; 3706 goto ud_error2; 3707 } 3708 mutex_enter(&connp->conn_lock); 3709 3710 if (conn_same_as_last_v6(connp, sin6) && 3711 connp->conn_lastsrcid == srcid && 3712 ipsec_outbound_policy_current(ixa)) { 3713 UDP_DBGSTAT(us, udp_out_lastdst); 3714 /* udp_output_lastdst drops conn_lock */ 3715 error = udp_output_lastdst(connp, data_mp, cr, 3716 pid, ixa); 3717 } else { 3718 UDP_DBGSTAT(us, udp_out_diffdst); 3719 /* udp_output_newdst drops conn_lock */ 3720 error = udp_output_newdst(connp, data_mp, NULL, 3721 sin6, ipversion, cr, pid, ixa); 3722 } 3723 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3724 } 3725 if (error == 0) { 3726 freeb(mp); 3727 return (0); 3728 } 3729 break; 3730 3731 case AF_INET: 3732 sin = (sin_t *)addr; 3733 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3734 (sin->sin_family != AF_INET)) { 3735 error = EADDRNOTAVAIL; 3736 goto ud_error2; 3737 } 3738 UDP_DBGSTAT(us, udp_out_ipv4); 3739 if (sin->sin_addr.s_addr == INADDR_ANY) 3740 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3741 ipversion = IPV4_VERSION; 3742 3743 srcid = 0; 3744 if (tudr->OPT_length != 0) { 3745 /* 3746 * If we are connected then the destination needs to be 3747 * the same as the connected one. 3748 */ 3749 if (udp->udp_state == TS_DATA_XFER && 3750 !conn_same_as_last_v4(connp, sin)) { 3751 error = EISCONN; 3752 goto ud_error2; 3753 } 3754 UDP_STAT(us, udp_out_opt); 3755 error = udp_output_ancillary(connp, sin, NULL, 3756 data_mp, mp, NULL, cr, pid); 3757 } else { 3758 ip_xmit_attr_t *ixa; 3759 3760 /* 3761 * We have to allocate an ip_xmit_attr_t before we grab 3762 * conn_lock and we need to hold conn_lock once we've 3763 * checked conn_same_as_last_v4 to handle concurrent 3764 * send* calls on a socket. 3765 */ 3766 ixa = conn_get_ixa(connp, B_FALSE); 3767 if (ixa == NULL) { 3768 error = ENOMEM; 3769 goto ud_error2; 3770 } 3771 mutex_enter(&connp->conn_lock); 3772 3773 if (conn_same_as_last_v4(connp, sin) && 3774 ipsec_outbound_policy_current(ixa)) { 3775 UDP_DBGSTAT(us, udp_out_lastdst); 3776 /* udp_output_lastdst drops conn_lock */ 3777 error = udp_output_lastdst(connp, data_mp, cr, 3778 pid, ixa); 3779 } else { 3780 UDP_DBGSTAT(us, udp_out_diffdst); 3781 /* udp_output_newdst drops conn_lock */ 3782 error = udp_output_newdst(connp, data_mp, sin, 3783 NULL, ipversion, cr, pid, ixa); 3784 } 3785 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3786 } 3787 if (error == 0) { 3788 freeb(mp); 3789 return (0); 3790 } 3791 break; 3792 } 3793 UDP_STAT(us, udp_out_err_output); 3794 ASSERT(mp != NULL); 3795 /* mp is freed by the following routine */ 3796 udp_ud_err(q, mp, (t_scalar_t)error); 3797 return (0); 3798 3799 ud_error2: 3800 UDPS_BUMP_MIB(us, udpOutErrors); 3801 freemsg(data_mp); 3802 UDP_STAT(us, udp_out_err_output); 3803 ASSERT(mp != NULL); 3804 /* mp is freed by the following routine */ 3805 udp_ud_err(q, mp, (t_scalar_t)error); 3806 return (0); 3807 } 3808 3809 /* 3810 * Handle the case of the IP address, port, flow label being different 3811 * for both IPv4 and IPv6. 3812 * 3813 * NOTE: The caller must hold conn_lock and we drop it here. 3814 */ 3815 static int 3816 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3817 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3818 { 3819 uint_t srcid; 3820 uint32_t flowinfo; 3821 udp_t *udp = connp->conn_udp; 3822 int error = 0; 3823 ip_xmit_attr_t *oldixa; 3824 udp_stack_t *us = udp->udp_us; 3825 in6_addr_t v6src; 3826 in6_addr_t v6dst; 3827 in6_addr_t v6nexthop; 3828 in_port_t dstport; 3829 3830 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3831 ASSERT(ixa != NULL); 3832 /* 3833 * We hold conn_lock across all the use and modifications of 3834 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3835 * stay consistent. 3836 */ 3837 3838 ASSERT(cr != NULL); 3839 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3840 ixa->ixa_cred = cr; 3841 ixa->ixa_cpid = pid; 3842 if (is_system_labeled()) { 3843 /* We need to restart with a label based on the cred */ 3844 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3845 } 3846 3847 /* 3848 * If we are connected then the destination needs to be the 3849 * same as the connected one, which is not the case here since we 3850 * checked for that above. 3851 */ 3852 if (udp->udp_state == TS_DATA_XFER) { 3853 mutex_exit(&connp->conn_lock); 3854 error = EISCONN; 3855 goto ud_error; 3856 } 3857 3858 /* In case previous destination was multicast or multirt */ 3859 ip_attr_newdst(ixa); 3860 3861 /* 3862 * If laddr is unspecified then we look at sin6_src_id. 3863 * We will give precedence to a source address set with IPV6_PKTINFO 3864 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3865 * want ip_attr_connect to select a source (since it can fail) when 3866 * IPV6_PKTINFO is specified. 3867 * If this doesn't result in a source address then we get a source 3868 * from ip_attr_connect() below. 3869 */ 3870 v6src = connp->conn_saddr_v6; 3871 if (sin != NULL) { 3872 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3873 dstport = sin->sin_port; 3874 flowinfo = 0; 3875 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */ 3876 srcid = 0; 3877 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3878 ixa->ixa_flags |= IXAF_IS_IPV4; 3879 } else { 3880 boolean_t v4mapped; 3881 3882 v6dst = sin6->sin6_addr; 3883 dstport = sin6->sin6_port; 3884 flowinfo = sin6->sin6_flowinfo; 3885 srcid = sin6->__sin6_src_id; 3886 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3887 ixa->ixa_scopeid = sin6->sin6_scope_id; 3888 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3889 } else { 3890 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3891 } 3892 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 3893 if (v4mapped) 3894 ixa->ixa_flags |= IXAF_IS_IPV4; 3895 else 3896 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3897 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3898 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3899 v4mapped, connp->conn_netstack)) { 3900 /* Mismatched v4mapped/v6 specified by srcid. */ 3901 mutex_exit(&connp->conn_lock); 3902 error = EADDRNOTAVAIL; 3903 goto ud_error; 3904 } 3905 } 3906 } 3907 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3908 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 3909 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 3910 3911 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3912 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3913 v6src = ipp->ipp_addr; 3914 } else { 3915 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3916 v6src = ipp->ipp_addr; 3917 } 3918 } 3919 3920 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 3921 mutex_exit(&connp->conn_lock); 3922 3923 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3924 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3925 switch (error) { 3926 case 0: 3927 break; 3928 case EADDRNOTAVAIL: 3929 /* 3930 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3931 * Don't have the application see that errno 3932 */ 3933 error = ENETUNREACH; 3934 goto failed; 3935 case ENETDOWN: 3936 /* 3937 * Have !ipif_addr_ready address; drop packet silently 3938 * until we can get applications to not send until we 3939 * are ready. 3940 */ 3941 error = 0; 3942 goto failed; 3943 case EHOSTUNREACH: 3944 case ENETUNREACH: 3945 if (ixa->ixa_ire != NULL) { 3946 /* 3947 * Let conn_ip_output/ire_send_noroute return 3948 * the error and send any local ICMP error. 3949 */ 3950 error = 0; 3951 break; 3952 } 3953 /* FALLTHRU */ 3954 failed: 3955 default: 3956 goto ud_error; 3957 } 3958 3959 3960 /* 3961 * Cluster note: we let the cluster hook know that we are sending to a 3962 * new address and/or port. 3963 */ 3964 if (cl_inet_connect2 != NULL) { 3965 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 3966 if (error != 0) { 3967 error = EHOSTUNREACH; 3968 goto ud_error; 3969 } 3970 } 3971 3972 mutex_enter(&connp->conn_lock); 3973 /* 3974 * While we dropped the lock some other thread might have connected 3975 * this socket. If so we bail out with EISCONN to ensure that the 3976 * connecting thread is the one that updates conn_ixa, conn_ht_* 3977 * and conn_*last*. 3978 */ 3979 if (udp->udp_state == TS_DATA_XFER) { 3980 mutex_exit(&connp->conn_lock); 3981 error = EISCONN; 3982 goto ud_error; 3983 } 3984 3985 /* 3986 * We need to rebuild the headers if 3987 * - we are labeling packets (could be different for different 3988 * destinations) 3989 * - we have a source route (or routing header) since we need to 3990 * massage that to get the pseudo-header checksum 3991 * - the IP version is different than the last time 3992 * - a socket option with COA_HEADER_CHANGED has been set which 3993 * set conn_v6lastdst to zero. 3994 * 3995 * Otherwise the prepend function will just update the src, dst, 3996 * dstport, and flow label. 3997 */ 3998 if (is_system_labeled()) { 3999 /* TX MLP requires SCM_UCRED and don't have that here */ 4000 if (connp->conn_mlp_type != mlptSingle) { 4001 mutex_exit(&connp->conn_lock); 4002 error = ECONNREFUSED; 4003 goto ud_error; 4004 } 4005 /* 4006 * Check whether Trusted Solaris policy allows communication 4007 * with this host, and pretend that the destination is 4008 * unreachable if not. 4009 * Compute any needed label and place it in ipp_label_v4/v6. 4010 * 4011 * Later conn_build_hdr_template/conn_prepend_hdr takes 4012 * ipp_label_v4/v6 to form the packet. 4013 * 4014 * Tsol note: Since we hold conn_lock we know no other 4015 * thread manipulates conn_xmit_ipp. 4016 */ 4017 error = conn_update_label(connp, ixa, &v6dst, 4018 &connp->conn_xmit_ipp); 4019 if (error != 0) { 4020 mutex_exit(&connp->conn_lock); 4021 goto ud_error; 4022 } 4023 /* Rebuild the header template */ 4024 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4025 flowinfo); 4026 if (error != 0) { 4027 mutex_exit(&connp->conn_lock); 4028 goto ud_error; 4029 } 4030 } else if ((connp->conn_xmit_ipp.ipp_fields & 4031 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4032 ipversion != connp->conn_lastipversion || 4033 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4034 /* Rebuild the header template */ 4035 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4036 flowinfo); 4037 if (error != 0) { 4038 mutex_exit(&connp->conn_lock); 4039 goto ud_error; 4040 } 4041 } else { 4042 /* Simply update the destination address if no source route */ 4043 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4044 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4045 4046 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4047 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4048 ipha->ipha_fragment_offset_and_flags |= 4049 IPH_DF_HTONS; 4050 } else { 4051 ipha->ipha_fragment_offset_and_flags &= 4052 ~IPH_DF_HTONS; 4053 } 4054 } else { 4055 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4056 ip6h->ip6_dst = v6dst; 4057 } 4058 } 4059 4060 /* 4061 * Remember the dst/dstport etc which corresponds to the built header 4062 * template and conn_ixa. 4063 */ 4064 oldixa = conn_replace_ixa(connp, ixa); 4065 connp->conn_v6lastdst = v6dst; 4066 connp->conn_lastipversion = ipversion; 4067 connp->conn_lastdstport = dstport; 4068 connp->conn_lastflowinfo = flowinfo; 4069 connp->conn_lastscopeid = ixa->ixa_scopeid; 4070 connp->conn_lastsrcid = srcid; 4071 /* Also remember a source to use together with lastdst */ 4072 connp->conn_v6lastsrc = v6src; 4073 4074 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4075 dstport, flowinfo, &error); 4076 4077 /* Done with conn_t */ 4078 mutex_exit(&connp->conn_lock); 4079 ixa_refrele(oldixa); 4080 4081 if (data_mp == NULL) { 4082 ASSERT(error != 0); 4083 goto ud_error; 4084 } 4085 4086 /* We're done. Pass the packet to ip. */ 4087 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 4088 4089 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 4090 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 4091 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 4092 4093 error = conn_ip_output(data_mp, ixa); 4094 /* No udpOutErrors if an error since IP increases its error counter */ 4095 switch (error) { 4096 case 0: 4097 break; 4098 case EWOULDBLOCK: 4099 (void) ixa_check_drain_insert(connp, ixa); 4100 error = 0; 4101 break; 4102 case EADDRNOTAVAIL: 4103 /* 4104 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4105 * Don't have the application see that errno 4106 */ 4107 error = ENETUNREACH; 4108 /* FALLTHRU */ 4109 default: 4110 mutex_enter(&connp->conn_lock); 4111 /* 4112 * Clear the source and v6lastdst so we call ip_attr_connect 4113 * for the next packet and try to pick a better source. 4114 */ 4115 if (connp->conn_mcbc_bind) 4116 connp->conn_saddr_v6 = ipv6_all_zeros; 4117 else 4118 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4119 connp->conn_v6lastdst = ipv6_all_zeros; 4120 mutex_exit(&connp->conn_lock); 4121 break; 4122 } 4123 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4124 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4125 ixa->ixa_cpid = connp->conn_cpid; 4126 ixa_refrele(ixa); 4127 return (error); 4128 4129 ud_error: 4130 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4131 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4132 ixa->ixa_cpid = connp->conn_cpid; 4133 ixa_refrele(ixa); 4134 4135 freemsg(data_mp); 4136 UDPS_BUMP_MIB(us, udpOutErrors); 4137 UDP_STAT(us, udp_out_err_output); 4138 return (error); 4139 } 4140 4141 /* ARGSUSED */ 4142 static int 4143 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4144 { 4145 #ifdef DEBUG 4146 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4147 #endif 4148 freemsg(mp); 4149 return (0); 4150 } 4151 4152 4153 /* 4154 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4155 */ 4156 static void 4157 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4158 { 4159 void *data; 4160 mblk_t *datamp = mp->b_cont; 4161 conn_t *connp = Q_TO_CONN(q); 4162 udp_t *udp = connp->conn_udp; 4163 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4164 4165 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4166 cmdp->cb_error = EPROTO; 4167 qreply(q, mp); 4168 return; 4169 } 4170 data = datamp->b_rptr; 4171 4172 mutex_enter(&connp->conn_lock); 4173 switch (cmdp->cb_cmd) { 4174 case TI_GETPEERNAME: 4175 if (udp->udp_state != TS_DATA_XFER) 4176 cmdp->cb_error = ENOTCONN; 4177 else 4178 cmdp->cb_error = conn_getpeername(connp, data, 4179 &cmdp->cb_len); 4180 break; 4181 case TI_GETMYNAME: 4182 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4183 break; 4184 default: 4185 cmdp->cb_error = EINVAL; 4186 break; 4187 } 4188 mutex_exit(&connp->conn_lock); 4189 4190 qreply(q, mp); 4191 } 4192 4193 static void 4194 udp_use_pure_tpi(udp_t *udp) 4195 { 4196 conn_t *connp = udp->udp_connp; 4197 4198 mutex_enter(&connp->conn_lock); 4199 udp->udp_issocket = B_FALSE; 4200 mutex_exit(&connp->conn_lock); 4201 UDP_STAT(udp->udp_us, udp_sock_fallback); 4202 } 4203 4204 static void 4205 udp_wput_other(queue_t *q, mblk_t *mp) 4206 { 4207 uchar_t *rptr = mp->b_rptr; 4208 struct iocblk *iocp; 4209 conn_t *connp = Q_TO_CONN(q); 4210 udp_t *udp = connp->conn_udp; 4211 cred_t *cr; 4212 4213 switch (mp->b_datap->db_type) { 4214 case M_CMD: 4215 udp_wput_cmdblk(q, mp); 4216 return; 4217 4218 case M_PROTO: 4219 case M_PCPROTO: 4220 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4221 /* 4222 * If the message does not contain a PRIM_type, 4223 * throw it away. 4224 */ 4225 freemsg(mp); 4226 return; 4227 } 4228 switch (((t_primp_t)rptr)->type) { 4229 case T_ADDR_REQ: 4230 udp_addr_req(q, mp); 4231 return; 4232 case O_T_BIND_REQ: 4233 case T_BIND_REQ: 4234 udp_tpi_bind(q, mp); 4235 return; 4236 case T_CONN_REQ: 4237 udp_tpi_connect(q, mp); 4238 return; 4239 case T_CAPABILITY_REQ: 4240 udp_capability_req(q, mp); 4241 return; 4242 case T_INFO_REQ: 4243 udp_info_req(q, mp); 4244 return; 4245 case T_UNITDATA_REQ: 4246 /* 4247 * If a T_UNITDATA_REQ gets here, the address must 4248 * be bad. Valid T_UNITDATA_REQs are handled 4249 * in udp_wput. 4250 */ 4251 udp_ud_err(q, mp, EADDRNOTAVAIL); 4252 return; 4253 case T_UNBIND_REQ: 4254 udp_tpi_unbind(q, mp); 4255 return; 4256 case T_SVR4_OPTMGMT_REQ: 4257 /* 4258 * All Solaris components should pass a db_credp 4259 * for this TPI message, hence we ASSERT. 4260 * But in case there is some other M_PROTO that looks 4261 * like a TPI message sent by some other kernel 4262 * component, we check and return an error. 4263 */ 4264 cr = msg_getcred(mp, NULL); 4265 ASSERT(cr != NULL); 4266 if (cr == NULL) { 4267 udp_err_ack(q, mp, TSYSERR, EINVAL); 4268 return; 4269 } 4270 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4271 cr)) { 4272 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4273 } 4274 return; 4275 4276 case T_OPTMGMT_REQ: 4277 /* 4278 * All Solaris components should pass a db_credp 4279 * for this TPI message, hence we ASSERT. 4280 * But in case there is some other M_PROTO that looks 4281 * like a TPI message sent by some other kernel 4282 * component, we check and return an error. 4283 */ 4284 cr = msg_getcred(mp, NULL); 4285 ASSERT(cr != NULL); 4286 if (cr == NULL) { 4287 udp_err_ack(q, mp, TSYSERR, EINVAL); 4288 return; 4289 } 4290 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4291 return; 4292 4293 case T_DISCON_REQ: 4294 udp_tpi_disconnect(q, mp); 4295 return; 4296 4297 /* The following TPI message is not supported by udp. */ 4298 case O_T_CONN_RES: 4299 case T_CONN_RES: 4300 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4301 return; 4302 4303 /* The following 3 TPI requests are illegal for udp. */ 4304 case T_DATA_REQ: 4305 case T_EXDATA_REQ: 4306 case T_ORDREL_REQ: 4307 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4308 return; 4309 default: 4310 break; 4311 } 4312 break; 4313 case M_FLUSH: 4314 if (*rptr & FLUSHW) 4315 flushq(q, FLUSHDATA); 4316 break; 4317 case M_IOCTL: 4318 iocp = (struct iocblk *)mp->b_rptr; 4319 switch (iocp->ioc_cmd) { 4320 case TI_GETPEERNAME: 4321 if (udp->udp_state != TS_DATA_XFER) { 4322 /* 4323 * If a default destination address has not 4324 * been associated with the stream, then we 4325 * don't know the peer's name. 4326 */ 4327 iocp->ioc_error = ENOTCONN; 4328 iocp->ioc_count = 0; 4329 mp->b_datap->db_type = M_IOCACK; 4330 qreply(q, mp); 4331 return; 4332 } 4333 /* FALLTHRU */ 4334 case TI_GETMYNAME: 4335 /* 4336 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4337 * need to copyin the user's strbuf structure. 4338 * Processing will continue in the M_IOCDATA case 4339 * below. 4340 */ 4341 mi_copyin(q, mp, NULL, 4342 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4343 return; 4344 case _SIOCSOCKFALLBACK: 4345 /* 4346 * Either sockmod is about to be popped and the 4347 * socket would now be treated as a plain stream, 4348 * or a module is about to be pushed so we have 4349 * to follow pure TPI semantics. 4350 */ 4351 if (!udp->udp_issocket) { 4352 DB_TYPE(mp) = M_IOCNAK; 4353 iocp->ioc_error = EINVAL; 4354 } else { 4355 udp_use_pure_tpi(udp); 4356 4357 DB_TYPE(mp) = M_IOCACK; 4358 iocp->ioc_error = 0; 4359 } 4360 iocp->ioc_count = 0; 4361 iocp->ioc_rval = 0; 4362 qreply(q, mp); 4363 return; 4364 default: 4365 break; 4366 } 4367 break; 4368 case M_IOCDATA: 4369 udp_wput_iocdata(q, mp); 4370 return; 4371 default: 4372 /* Unrecognized messages are passed through without change. */ 4373 break; 4374 } 4375 ip_wput_nondata(q, mp); 4376 } 4377 4378 /* 4379 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4380 * messages. 4381 */ 4382 static void 4383 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4384 { 4385 mblk_t *mp1; 4386 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4387 STRUCT_HANDLE(strbuf, sb); 4388 uint_t addrlen; 4389 conn_t *connp = Q_TO_CONN(q); 4390 udp_t *udp = connp->conn_udp; 4391 4392 /* Make sure it is one of ours. */ 4393 switch (iocp->ioc_cmd) { 4394 case TI_GETMYNAME: 4395 case TI_GETPEERNAME: 4396 break; 4397 default: 4398 ip_wput_nondata(q, mp); 4399 return; 4400 } 4401 4402 switch (mi_copy_state(q, mp, &mp1)) { 4403 case -1: 4404 return; 4405 case MI_COPY_CASE(MI_COPY_IN, 1): 4406 break; 4407 case MI_COPY_CASE(MI_COPY_OUT, 1): 4408 /* 4409 * The address has been copied out, so now 4410 * copyout the strbuf. 4411 */ 4412 mi_copyout(q, mp); 4413 return; 4414 case MI_COPY_CASE(MI_COPY_OUT, 2): 4415 /* 4416 * The address and strbuf have been copied out. 4417 * We're done, so just acknowledge the original 4418 * M_IOCTL. 4419 */ 4420 mi_copy_done(q, mp, 0); 4421 return; 4422 default: 4423 /* 4424 * Something strange has happened, so acknowledge 4425 * the original M_IOCTL with an EPROTO error. 4426 */ 4427 mi_copy_done(q, mp, EPROTO); 4428 return; 4429 } 4430 4431 /* 4432 * Now we have the strbuf structure for TI_GETMYNAME 4433 * and TI_GETPEERNAME. Next we copyout the requested 4434 * address and then we'll copyout the strbuf. 4435 */ 4436 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4437 4438 if (connp->conn_family == AF_INET) 4439 addrlen = sizeof (sin_t); 4440 else 4441 addrlen = sizeof (sin6_t); 4442 4443 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4444 mi_copy_done(q, mp, EINVAL); 4445 return; 4446 } 4447 4448 switch (iocp->ioc_cmd) { 4449 case TI_GETMYNAME: 4450 break; 4451 case TI_GETPEERNAME: 4452 if (udp->udp_state != TS_DATA_XFER) { 4453 mi_copy_done(q, mp, ENOTCONN); 4454 return; 4455 } 4456 break; 4457 } 4458 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4459 if (!mp1) 4460 return; 4461 4462 STRUCT_FSET(sb, len, addrlen); 4463 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4464 case TI_GETMYNAME: 4465 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4466 &addrlen); 4467 break; 4468 case TI_GETPEERNAME: 4469 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4470 &addrlen); 4471 break; 4472 } 4473 mp1->b_wptr += addrlen; 4474 /* Copy out the address */ 4475 mi_copyout(q, mp); 4476 } 4477 4478 void 4479 udp_ddi_g_init(void) 4480 { 4481 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4482 udp_opt_obj.odb_opt_arr_cnt); 4483 4484 /* 4485 * We want to be informed each time a stack is created or 4486 * destroyed in the kernel, so we can maintain the 4487 * set of udp_stack_t's. 4488 */ 4489 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4490 } 4491 4492 void 4493 udp_ddi_g_destroy(void) 4494 { 4495 netstack_unregister(NS_UDP); 4496 } 4497 4498 #define INET_NAME "ip" 4499 4500 /* 4501 * Initialize the UDP stack instance. 4502 */ 4503 static void * 4504 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4505 { 4506 udp_stack_t *us; 4507 int i; 4508 int error = 0; 4509 major_t major; 4510 size_t arrsz; 4511 4512 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4513 us->us_netstack = ns; 4514 4515 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4516 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4517 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4518 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4519 4520 /* 4521 * The smallest anonymous port in the priviledged port range which UDP 4522 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4523 */ 4524 us->us_min_anonpriv_port = 512; 4525 4526 us->us_bind_fanout_size = udp_bind_fanout_size; 4527 4528 /* Roundup variable that might have been modified in /etc/system */ 4529 if (!ISP2(us->us_bind_fanout_size)) { 4530 /* Not a power of two. Round up to nearest power of two */ 4531 for (i = 0; i < 31; i++) { 4532 if (us->us_bind_fanout_size < (1 << i)) 4533 break; 4534 } 4535 us->us_bind_fanout_size = 1 << i; 4536 } 4537 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4538 sizeof (udp_fanout_t), KM_SLEEP); 4539 for (i = 0; i < us->us_bind_fanout_size; i++) { 4540 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4541 NULL); 4542 } 4543 4544 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4545 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4546 KM_SLEEP); 4547 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4548 4549 /* Allocate the per netstack stats */ 4550 mutex_enter(&cpu_lock); 4551 us->us_sc_cnt = MAX(ncpus, boot_ncpus); 4552 mutex_exit(&cpu_lock); 4553 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), 4554 KM_SLEEP); 4555 for (i = 0; i < us->us_sc_cnt; i++) { 4556 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4557 KM_SLEEP); 4558 } 4559 4560 us->us_kstat = udp_kstat2_init(stackid); 4561 us->us_mibkp = udp_kstat_init(stackid); 4562 4563 major = mod_name_to_major(INET_NAME); 4564 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4565 ASSERT(error == 0); 4566 return (us); 4567 } 4568 4569 /* 4570 * Free the UDP stack instance. 4571 */ 4572 static void 4573 udp_stack_fini(netstackid_t stackid, void *arg) 4574 { 4575 udp_stack_t *us = (udp_stack_t *)arg; 4576 int i; 4577 4578 for (i = 0; i < us->us_bind_fanout_size; i++) { 4579 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4580 } 4581 4582 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4583 sizeof (udp_fanout_t)); 4584 4585 us->us_bind_fanout = NULL; 4586 4587 for (i = 0; i < us->us_sc_cnt; i++) 4588 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); 4589 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); 4590 4591 kmem_free(us->us_propinfo_tbl, 4592 udp_propinfo_count * sizeof (mod_prop_info_t)); 4593 us->us_propinfo_tbl = NULL; 4594 4595 udp_kstat_fini(stackid, us->us_mibkp); 4596 us->us_mibkp = NULL; 4597 4598 udp_kstat2_fini(stackid, us->us_kstat); 4599 us->us_kstat = NULL; 4600 4601 mutex_destroy(&us->us_epriv_port_lock); 4602 ldi_ident_release(us->us_ldi_ident); 4603 kmem_free(us, sizeof (*us)); 4604 } 4605 4606 static size_t 4607 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4608 { 4609 udp_stack_t *us = udp->udp_us; 4610 4611 /* We add a bit of extra buffering */ 4612 size += size >> 1; 4613 if (size > us->us_max_buf) 4614 size = us->us_max_buf; 4615 4616 udp->udp_rcv_hiwat = size; 4617 return (size); 4618 } 4619 4620 /* 4621 * For the lower queue so that UDP can be a dummy mux. 4622 * Nobody should be sending 4623 * packets up this stream 4624 */ 4625 static int 4626 udp_lrput(queue_t *q, mblk_t *mp) 4627 { 4628 switch (mp->b_datap->db_type) { 4629 case M_FLUSH: 4630 /* Turn around */ 4631 if (*mp->b_rptr & FLUSHW) { 4632 *mp->b_rptr &= ~FLUSHR; 4633 qreply(q, mp); 4634 return (0); 4635 } 4636 break; 4637 } 4638 freemsg(mp); 4639 return (0); 4640 } 4641 4642 /* 4643 * For the lower queue so that UDP can be a dummy mux. 4644 * Nobody should be sending packets down this stream. 4645 */ 4646 /* ARGSUSED */ 4647 int 4648 udp_lwput(queue_t *q, mblk_t *mp) 4649 { 4650 freemsg(mp); 4651 return (0); 4652 } 4653 4654 /* 4655 * When a CPU is added, we need to allocate the per CPU stats struct. 4656 */ 4657 void 4658 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) 4659 { 4660 int i; 4661 4662 if (cpu_seqid < us->us_sc_cnt) 4663 return; 4664 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { 4665 ASSERT(us->us_sc[i] == NULL); 4666 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4667 KM_SLEEP); 4668 } 4669 membar_producer(); 4670 us->us_sc_cnt = cpu_seqid + 1; 4671 } 4672 4673 /* 4674 * Below routines for UDP socket module. 4675 */ 4676 4677 static conn_t * 4678 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4679 { 4680 udp_t *udp; 4681 conn_t *connp; 4682 zoneid_t zoneid; 4683 netstack_t *ns; 4684 udp_stack_t *us; 4685 int len; 4686 4687 ASSERT(errorp != NULL); 4688 4689 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4690 return (NULL); 4691 4692 ns = netstack_find_by_cred(credp); 4693 ASSERT(ns != NULL); 4694 us = ns->netstack_udp; 4695 ASSERT(us != NULL); 4696 4697 /* 4698 * For exclusive stacks we set the zoneid to zero 4699 * to make UDP operate as if in the global zone. 4700 */ 4701 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4702 zoneid = GLOBAL_ZONEID; 4703 else 4704 zoneid = crgetzoneid(credp); 4705 4706 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4707 4708 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4709 if (connp == NULL) { 4710 netstack_rele(ns); 4711 *errorp = ENOMEM; 4712 return (NULL); 4713 } 4714 udp = connp->conn_udp; 4715 4716 /* 4717 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4718 * done by netstack_find_by_cred() 4719 */ 4720 netstack_rele(ns); 4721 4722 /* 4723 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4724 * need to lock anything. 4725 */ 4726 ASSERT(connp->conn_proto == IPPROTO_UDP); 4727 ASSERT(connp->conn_udp == udp); 4728 ASSERT(udp->udp_connp == connp); 4729 4730 /* Set the initial state of the stream and the privilege status. */ 4731 udp->udp_state = TS_UNBND; 4732 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4733 if (isv6) { 4734 connp->conn_family = AF_INET6; 4735 connp->conn_ipversion = IPV6_VERSION; 4736 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4737 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4738 len = sizeof (ip6_t) + UDPH_SIZE; 4739 } else { 4740 connp->conn_family = AF_INET; 4741 connp->conn_ipversion = IPV4_VERSION; 4742 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4743 connp->conn_default_ttl = us->us_ipv4_ttl; 4744 len = sizeof (ipha_t) + UDPH_SIZE; 4745 } 4746 4747 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4748 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4749 4750 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4751 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4752 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4753 connp->conn_ixa->ixa_zoneid = zoneid; 4754 4755 connp->conn_zoneid = zoneid; 4756 4757 /* 4758 * If the caller has the process-wide flag set, then default to MAC 4759 * exempt mode. This allows read-down to unlabeled hosts. 4760 */ 4761 if (getpflags(NET_MAC_AWARE, credp) != 0) 4762 connp->conn_mac_mode = CONN_MAC_AWARE; 4763 4764 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4765 4766 udp->udp_us = us; 4767 4768 connp->conn_rcvbuf = us->us_recv_hiwat; 4769 connp->conn_sndbuf = us->us_xmit_hiwat; 4770 connp->conn_sndlowat = us->us_xmit_lowat; 4771 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4772 4773 connp->conn_wroff = len + us->us_wroff_extra; 4774 connp->conn_so_type = SOCK_DGRAM; 4775 4776 connp->conn_recv = udp_input; 4777 connp->conn_recvicmp = udp_icmp_input; 4778 crhold(credp); 4779 connp->conn_cred = credp; 4780 connp->conn_cpid = curproc->p_pid; 4781 connp->conn_open_time = ddi_get_lbolt64(); 4782 /* Cache things in ixa without an extra refhold */ 4783 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 4784 connp->conn_ixa->ixa_cred = connp->conn_cred; 4785 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 4786 if (is_system_labeled()) 4787 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 4788 4789 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 4790 4791 if (us->us_pmtu_discovery) 4792 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 4793 4794 return (connp); 4795 } 4796 4797 sock_lower_handle_t 4798 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 4799 uint_t *smodep, int *errorp, int flags, cred_t *credp) 4800 { 4801 udp_t *udp = NULL; 4802 udp_stack_t *us; 4803 conn_t *connp; 4804 boolean_t isv6; 4805 4806 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 4807 (proto != 0 && proto != IPPROTO_UDP)) { 4808 *errorp = EPROTONOSUPPORT; 4809 return (NULL); 4810 } 4811 4812 if (family == AF_INET6) 4813 isv6 = B_TRUE; 4814 else 4815 isv6 = B_FALSE; 4816 4817 connp = udp_do_open(credp, isv6, flags, errorp); 4818 if (connp == NULL) 4819 return (NULL); 4820 4821 udp = connp->conn_udp; 4822 ASSERT(udp != NULL); 4823 us = udp->udp_us; 4824 ASSERT(us != NULL); 4825 4826 udp->udp_issocket = B_TRUE; 4827 connp->conn_flags |= IPCL_NONSTR; 4828 4829 /* 4830 * Set flow control 4831 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4832 * need to lock anything. 4833 */ 4834 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 4835 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 4836 4837 connp->conn_flow_cntrld = B_FALSE; 4838 4839 mutex_enter(&connp->conn_lock); 4840 connp->conn_state_flags &= ~CONN_INCIPIENT; 4841 mutex_exit(&connp->conn_lock); 4842 4843 *errorp = 0; 4844 *smodep = SM_ATOMIC; 4845 *sock_downcalls = &sock_udp_downcalls; 4846 return ((sock_lower_handle_t)connp); 4847 } 4848 4849 /* ARGSUSED3 */ 4850 void 4851 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 4852 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 4853 { 4854 conn_t *connp = (conn_t *)proto_handle; 4855 struct sock_proto_props sopp; 4856 4857 /* All Solaris components should pass a cred for this operation. */ 4858 ASSERT(cr != NULL); 4859 4860 connp->conn_upcalls = sock_upcalls; 4861 connp->conn_upper_handle = sock_handle; 4862 4863 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 4864 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 4865 sopp.sopp_wroff = connp->conn_wroff; 4866 sopp.sopp_maxblk = INFPSZ; 4867 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 4868 sopp.sopp_rxlowat = connp->conn_rcvlowat; 4869 sopp.sopp_maxaddrlen = sizeof (sin6_t); 4870 sopp.sopp_maxpsz = 4871 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 4872 UDP_MAXPACKET_IPV6; 4873 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 4874 udp_mod_info.mi_minpsz; 4875 4876 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 4877 &sopp); 4878 } 4879 4880 static void 4881 udp_do_close(conn_t *connp) 4882 { 4883 udp_t *udp; 4884 4885 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 4886 udp = connp->conn_udp; 4887 4888 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 4889 /* 4890 * Running in cluster mode - register unbind information 4891 */ 4892 if (connp->conn_ipversion == IPV4_VERSION) { 4893 (*cl_inet_unbind)( 4894 connp->conn_netstack->netstack_stackid, 4895 IPPROTO_UDP, AF_INET, 4896 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 4897 (in_port_t)connp->conn_lport, NULL); 4898 } else { 4899 (*cl_inet_unbind)( 4900 connp->conn_netstack->netstack_stackid, 4901 IPPROTO_UDP, AF_INET6, 4902 (uint8_t *)&(connp->conn_laddr_v6), 4903 (in_port_t)connp->conn_lport, NULL); 4904 } 4905 } 4906 4907 udp_bind_hash_remove(udp, B_FALSE); 4908 4909 ip_quiesce_conn(connp); 4910 4911 if (!IPCL_IS_NONSTR(connp)) { 4912 ASSERT(connp->conn_wq != NULL); 4913 ASSERT(connp->conn_rq != NULL); 4914 qprocsoff(connp->conn_rq); 4915 } 4916 4917 udp_close_free(connp); 4918 4919 /* 4920 * Now we are truly single threaded on this stream, and can 4921 * delete the things hanging off the connp, and finally the connp. 4922 * We removed this connp from the fanout list, it cannot be 4923 * accessed thru the fanouts, and we already waited for the 4924 * conn_ref to drop to 0. We are already in close, so 4925 * there cannot be any other thread from the top. qprocsoff 4926 * has completed, and service has completed or won't run in 4927 * future. 4928 */ 4929 ASSERT(connp->conn_ref == 1); 4930 4931 if (!IPCL_IS_NONSTR(connp)) { 4932 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 4933 } else { 4934 ip_free_helper_stream(connp); 4935 } 4936 4937 connp->conn_ref--; 4938 ipcl_conn_destroy(connp); 4939 } 4940 4941 /* ARGSUSED1 */ 4942 int 4943 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 4944 { 4945 conn_t *connp = (conn_t *)proto_handle; 4946 4947 /* All Solaris components should pass a cred for this operation. */ 4948 ASSERT(cr != NULL); 4949 4950 udp_do_close(connp); 4951 return (0); 4952 } 4953 4954 static int 4955 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 4956 boolean_t bind_to_req_port_only) 4957 { 4958 sin_t *sin; 4959 sin6_t *sin6; 4960 udp_t *udp = connp->conn_udp; 4961 int error = 0; 4962 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 4963 in_port_t port; /* Host byte order */ 4964 in_port_t requested_port; /* Host byte order */ 4965 int count; 4966 ipaddr_t v4src; /* Set if AF_INET */ 4967 in6_addr_t v6src; 4968 int loopmax; 4969 udp_fanout_t *udpf; 4970 in_port_t lport; /* Network byte order */ 4971 uint_t scopeid = 0; 4972 zoneid_t zoneid = IPCL_ZONEID(connp); 4973 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4974 boolean_t is_inaddr_any; 4975 mlp_type_t addrtype, mlptype; 4976 udp_stack_t *us = udp->udp_us; 4977 4978 sin = NULL; 4979 sin6 = NULL; 4980 switch (len) { 4981 case sizeof (sin_t): /* Complete IPv4 address */ 4982 sin = (sin_t *)sa; 4983 4984 if (sin == NULL || !OK_32PTR((char *)sin)) 4985 return (EINVAL); 4986 4987 if (connp->conn_family != AF_INET || 4988 sin->sin_family != AF_INET) { 4989 return (EAFNOSUPPORT); 4990 } 4991 v4src = sin->sin_addr.s_addr; 4992 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 4993 if (v4src != INADDR_ANY) { 4994 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 4995 B_TRUE); 4996 } 4997 port = ntohs(sin->sin_port); 4998 break; 4999 5000 case sizeof (sin6_t): /* complete IPv6 address */ 5001 sin6 = (sin6_t *)sa; 5002 5003 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5004 return (EINVAL); 5005 5006 if (connp->conn_family != AF_INET6 || 5007 sin6->sin6_family != AF_INET6) { 5008 return (EAFNOSUPPORT); 5009 } 5010 v6src = sin6->sin6_addr; 5011 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5012 if (connp->conn_ipv6_v6only) 5013 return (EADDRNOTAVAIL); 5014 5015 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5016 if (v4src != INADDR_ANY) { 5017 laddr_type = ip_laddr_verify_v4(v4src, 5018 zoneid, ipst, B_FALSE); 5019 } 5020 } else { 5021 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5022 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5023 scopeid = sin6->sin6_scope_id; 5024 laddr_type = ip_laddr_verify_v6(&v6src, 5025 zoneid, ipst, B_TRUE, scopeid); 5026 } 5027 } 5028 port = ntohs(sin6->sin6_port); 5029 break; 5030 5031 default: /* Invalid request */ 5032 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5033 "udp_bind: bad ADDR_length length %u", len); 5034 return (-TBADADDR); 5035 } 5036 5037 /* Is the local address a valid unicast, multicast, or broadcast? */ 5038 if (laddr_type == IPVL_BAD) 5039 return (EADDRNOTAVAIL); 5040 5041 requested_port = port; 5042 5043 if (requested_port == 0 || !bind_to_req_port_only) 5044 bind_to_req_port_only = B_FALSE; 5045 else /* T_BIND_REQ and requested_port != 0 */ 5046 bind_to_req_port_only = B_TRUE; 5047 5048 if (requested_port == 0) { 5049 /* 5050 * If the application passed in zero for the port number, it 5051 * doesn't care which port number we bind to. Get one in the 5052 * valid range. 5053 */ 5054 if (connp->conn_anon_priv_bind) { 5055 port = udp_get_next_priv_port(udp); 5056 } else { 5057 port = udp_update_next_port(udp, 5058 us->us_next_port_to_try, B_TRUE); 5059 } 5060 } else { 5061 /* 5062 * If the port is in the well-known privileged range, 5063 * make sure the caller was privileged. 5064 */ 5065 int i; 5066 boolean_t priv = B_FALSE; 5067 5068 if (port < us->us_smallest_nonpriv_port) { 5069 priv = B_TRUE; 5070 } else { 5071 for (i = 0; i < us->us_num_epriv_ports; i++) { 5072 if (port == us->us_epriv_ports[i]) { 5073 priv = B_TRUE; 5074 break; 5075 } 5076 } 5077 } 5078 5079 if (priv) { 5080 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5081 return (-TACCES); 5082 } 5083 } 5084 5085 if (port == 0) 5086 return (-TNOADDR); 5087 5088 /* 5089 * The state must be TS_UNBND. TPI mandates that users must send 5090 * TPI primitives only 1 at a time and wait for the response before 5091 * sending the next primitive. 5092 */ 5093 mutex_enter(&connp->conn_lock); 5094 if (udp->udp_state != TS_UNBND) { 5095 mutex_exit(&connp->conn_lock); 5096 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5097 "udp_bind: bad state, %u", udp->udp_state); 5098 return (-TOUTSTATE); 5099 } 5100 /* 5101 * Copy the source address into our udp structure. This address 5102 * may still be zero; if so, IP will fill in the correct address 5103 * each time an outbound packet is passed to it. Since the udp is 5104 * not yet in the bind hash list, we don't grab the uf_lock to 5105 * change conn_ipversion 5106 */ 5107 if (connp->conn_family == AF_INET) { 5108 ASSERT(sin != NULL); 5109 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5110 } else { 5111 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5112 /* 5113 * no need to hold the uf_lock to set the conn_ipversion 5114 * since we are not yet in the fanout list 5115 */ 5116 connp->conn_ipversion = IPV4_VERSION; 5117 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5118 } else { 5119 connp->conn_ipversion = IPV6_VERSION; 5120 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5121 } 5122 } 5123 5124 /* 5125 * If conn_reuseaddr is not set, then we have to make sure that 5126 * the IP address and port number the application requested 5127 * (or we selected for the application) is not being used by 5128 * another stream. If another stream is already using the 5129 * requested IP address and port, the behavior depends on 5130 * "bind_to_req_port_only". If set the bind fails; otherwise we 5131 * search for any unused port to bind to the stream. 5132 * 5133 * As per the BSD semantics, as modified by the Deering multicast 5134 * changes, if conn_reuseaddr is set, then we allow multiple binds 5135 * to the same port independent of the local IP address. 5136 * 5137 * This is slightly different than in SunOS 4.X which did not 5138 * support IP multicast. Note that the change implemented by the 5139 * Deering multicast code effects all binds - not only binding 5140 * to IP multicast addresses. 5141 * 5142 * Note that when binding to port zero we ignore SO_REUSEADDR in 5143 * order to guarantee a unique port. 5144 */ 5145 5146 count = 0; 5147 if (connp->conn_anon_priv_bind) { 5148 /* 5149 * loopmax = (IPPORT_RESERVED-1) - 5150 * us->us_min_anonpriv_port + 1 5151 */ 5152 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5153 } else { 5154 loopmax = us->us_largest_anon_port - 5155 us->us_smallest_anon_port + 1; 5156 } 5157 5158 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5159 5160 for (;;) { 5161 udp_t *udp1; 5162 boolean_t found_exclbind = B_FALSE; 5163 conn_t *connp1; 5164 5165 /* 5166 * Walk through the list of udp streams bound to 5167 * requested port with the same IP address. 5168 */ 5169 lport = htons(port); 5170 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5171 us->us_bind_fanout_size)]; 5172 mutex_enter(&udpf->uf_lock); 5173 for (udp1 = udpf->uf_udp; udp1 != NULL; 5174 udp1 = udp1->udp_bind_hash) { 5175 connp1 = udp1->udp_connp; 5176 5177 if (lport != connp1->conn_lport) 5178 continue; 5179 5180 /* 5181 * On a labeled system, we must treat bindings to ports 5182 * on shared IP addresses by sockets with MAC exemption 5183 * privilege as being in all zones, as there's 5184 * otherwise no way to identify the right receiver. 5185 */ 5186 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5187 continue; 5188 5189 /* 5190 * If UDP_EXCLBIND is set for either the bound or 5191 * binding endpoint, the semantics of bind 5192 * is changed according to the following chart. 5193 * 5194 * spec = specified address (v4 or v6) 5195 * unspec = unspecified address (v4 or v6) 5196 * A = specified addresses are different for endpoints 5197 * 5198 * bound bind to allowed? 5199 * ------------------------------------- 5200 * unspec unspec no 5201 * unspec spec no 5202 * spec unspec no 5203 * spec spec yes if A 5204 * 5205 * For labeled systems, SO_MAC_EXEMPT behaves the same 5206 * as UDP_EXCLBIND, except that zoneid is ignored. 5207 */ 5208 if (connp1->conn_exclbind || connp->conn_exclbind || 5209 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5210 if (V6_OR_V4_INADDR_ANY( 5211 connp1->conn_bound_addr_v6) || 5212 is_inaddr_any || 5213 IN6_ARE_ADDR_EQUAL( 5214 &connp1->conn_bound_addr_v6, 5215 &v6src)) { 5216 found_exclbind = B_TRUE; 5217 break; 5218 } 5219 continue; 5220 } 5221 5222 /* 5223 * Check ipversion to allow IPv4 and IPv6 sockets to 5224 * have disjoint port number spaces. 5225 */ 5226 if (connp->conn_ipversion != connp1->conn_ipversion) { 5227 5228 /* 5229 * On the first time through the loop, if the 5230 * the user intentionally specified a 5231 * particular port number, then ignore any 5232 * bindings of the other protocol that may 5233 * conflict. This allows the user to bind IPv6 5234 * alone and get both v4 and v6, or bind both 5235 * both and get each seperately. On subsequent 5236 * times through the loop, we're checking a 5237 * port that we chose (not the user) and thus 5238 * we do not allow casual duplicate bindings. 5239 */ 5240 if (count == 0 && requested_port != 0) 5241 continue; 5242 } 5243 5244 /* 5245 * No difference depending on SO_REUSEADDR. 5246 * 5247 * If existing port is bound to a 5248 * non-wildcard IP address and 5249 * the requesting stream is bound to 5250 * a distinct different IP addresses 5251 * (non-wildcard, also), keep going. 5252 */ 5253 if (!is_inaddr_any && 5254 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5255 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5256 &v6src)) { 5257 continue; 5258 } 5259 break; 5260 } 5261 5262 if (!found_exclbind && 5263 (connp->conn_reuseaddr && requested_port != 0)) { 5264 break; 5265 } 5266 5267 if (udp1 == NULL) { 5268 /* 5269 * No other stream has this IP address 5270 * and port number. We can use it. 5271 */ 5272 break; 5273 } 5274 mutex_exit(&udpf->uf_lock); 5275 if (bind_to_req_port_only) { 5276 /* 5277 * We get here only when requested port 5278 * is bound (and only first of the for() 5279 * loop iteration). 5280 * 5281 * The semantics of this bind request 5282 * require it to fail so we return from 5283 * the routine (and exit the loop). 5284 * 5285 */ 5286 mutex_exit(&connp->conn_lock); 5287 return (-TADDRBUSY); 5288 } 5289 5290 if (connp->conn_anon_priv_bind) { 5291 port = udp_get_next_priv_port(udp); 5292 } else { 5293 if ((count == 0) && (requested_port != 0)) { 5294 /* 5295 * If the application wants us to find 5296 * a port, get one to start with. Set 5297 * requested_port to 0, so that we will 5298 * update us->us_next_port_to_try below. 5299 */ 5300 port = udp_update_next_port(udp, 5301 us->us_next_port_to_try, B_TRUE); 5302 requested_port = 0; 5303 } else { 5304 port = udp_update_next_port(udp, port + 1, 5305 B_FALSE); 5306 } 5307 } 5308 5309 if (port == 0 || ++count >= loopmax) { 5310 /* 5311 * We've tried every possible port number and 5312 * there are none available, so send an error 5313 * to the user. 5314 */ 5315 mutex_exit(&connp->conn_lock); 5316 return (-TNOADDR); 5317 } 5318 } 5319 5320 /* 5321 * Copy the source address into our udp structure. This address 5322 * may still be zero; if so, ip_attr_connect will fill in the correct 5323 * address when a packet is about to be sent. 5324 * If we are binding to a broadcast or multicast address then 5325 * we just set the conn_bound_addr since we don't want to use 5326 * that as the source address when sending. 5327 */ 5328 connp->conn_bound_addr_v6 = v6src; 5329 connp->conn_laddr_v6 = v6src; 5330 if (scopeid != 0) { 5331 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5332 connp->conn_ixa->ixa_scopeid = scopeid; 5333 connp->conn_incoming_ifindex = scopeid; 5334 } else { 5335 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5336 connp->conn_incoming_ifindex = connp->conn_bound_if; 5337 } 5338 5339 switch (laddr_type) { 5340 case IPVL_UNICAST_UP: 5341 case IPVL_UNICAST_DOWN: 5342 connp->conn_saddr_v6 = v6src; 5343 connp->conn_mcbc_bind = B_FALSE; 5344 break; 5345 case IPVL_MCAST: 5346 case IPVL_BCAST: 5347 /* ip_set_destination will pick a source address later */ 5348 connp->conn_saddr_v6 = ipv6_all_zeros; 5349 connp->conn_mcbc_bind = B_TRUE; 5350 break; 5351 } 5352 5353 /* Any errors after this point should use late_error */ 5354 connp->conn_lport = lport; 5355 5356 /* 5357 * Now reset the next anonymous port if the application requested 5358 * an anonymous port, or we handed out the next anonymous port. 5359 */ 5360 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5361 us->us_next_port_to_try = port + 1; 5362 } 5363 5364 /* Initialize the T_BIND_ACK. */ 5365 if (connp->conn_family == AF_INET) { 5366 sin->sin_port = connp->conn_lport; 5367 } else { 5368 sin6->sin6_port = connp->conn_lport; 5369 } 5370 udp->udp_state = TS_IDLE; 5371 udp_bind_hash_insert(udpf, udp); 5372 mutex_exit(&udpf->uf_lock); 5373 mutex_exit(&connp->conn_lock); 5374 5375 if (cl_inet_bind) { 5376 /* 5377 * Running in cluster mode - register bind information 5378 */ 5379 if (connp->conn_ipversion == IPV4_VERSION) { 5380 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5381 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5382 (in_port_t)connp->conn_lport, NULL); 5383 } else { 5384 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5385 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5386 (in_port_t)connp->conn_lport, NULL); 5387 } 5388 } 5389 5390 mutex_enter(&connp->conn_lock); 5391 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5392 if (is_system_labeled() && (!connp->conn_anon_port || 5393 connp->conn_anon_mlp)) { 5394 uint16_t mlpport; 5395 zone_t *zone; 5396 5397 zone = crgetzone(cr); 5398 connp->conn_mlp_type = 5399 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5400 mlptSingle; 5401 addrtype = tsol_mlp_addr_type( 5402 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5403 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5404 if (addrtype == mlptSingle) { 5405 error = -TNOADDR; 5406 mutex_exit(&connp->conn_lock); 5407 goto late_error; 5408 } 5409 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5410 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5411 addrtype); 5412 5413 /* 5414 * It is a coding error to attempt to bind an MLP port 5415 * without first setting SOL_SOCKET/SCM_UCRED. 5416 */ 5417 if (mlptype != mlptSingle && 5418 connp->conn_mlp_type == mlptSingle) { 5419 error = EINVAL; 5420 mutex_exit(&connp->conn_lock); 5421 goto late_error; 5422 } 5423 5424 /* 5425 * It is an access violation to attempt to bind an MLP port 5426 * without NET_BINDMLP privilege. 5427 */ 5428 if (mlptype != mlptSingle && 5429 secpolicy_net_bindmlp(cr) != 0) { 5430 if (connp->conn_debug) { 5431 (void) strlog(UDP_MOD_ID, 0, 1, 5432 SL_ERROR|SL_TRACE, 5433 "udp_bind: no priv for multilevel port %d", 5434 mlpport); 5435 } 5436 error = -TACCES; 5437 mutex_exit(&connp->conn_lock); 5438 goto late_error; 5439 } 5440 5441 /* 5442 * If we're specifically binding a shared IP address and the 5443 * port is MLP on shared addresses, then check to see if this 5444 * zone actually owns the MLP. Reject if not. 5445 */ 5446 if (mlptype == mlptShared && addrtype == mlptShared) { 5447 /* 5448 * No need to handle exclusive-stack zones since 5449 * ALL_ZONES only applies to the shared stack. 5450 */ 5451 zoneid_t mlpzone; 5452 5453 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5454 htons(mlpport)); 5455 if (connp->conn_zoneid != mlpzone) { 5456 if (connp->conn_debug) { 5457 (void) strlog(UDP_MOD_ID, 0, 1, 5458 SL_ERROR|SL_TRACE, 5459 "udp_bind: attempt to bind port " 5460 "%d on shared addr in zone %d " 5461 "(should be %d)", 5462 mlpport, connp->conn_zoneid, 5463 mlpzone); 5464 } 5465 error = -TACCES; 5466 mutex_exit(&connp->conn_lock); 5467 goto late_error; 5468 } 5469 } 5470 if (connp->conn_anon_port) { 5471 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5472 port, B_TRUE); 5473 if (error != 0) { 5474 if (connp->conn_debug) { 5475 (void) strlog(UDP_MOD_ID, 0, 1, 5476 SL_ERROR|SL_TRACE, 5477 "udp_bind: cannot establish anon " 5478 "MLP for port %d", port); 5479 } 5480 error = -TACCES; 5481 mutex_exit(&connp->conn_lock); 5482 goto late_error; 5483 } 5484 } 5485 connp->conn_mlp_type = mlptype; 5486 } 5487 5488 /* 5489 * We create an initial header template here to make a subsequent 5490 * sendto have a starting point. Since conn_last_dst is zero the 5491 * first sendto will always follow the 'dst changed' code path. 5492 * Note that we defer massaging options and the related checksum 5493 * adjustment until we have a destination address. 5494 */ 5495 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5496 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5497 if (error != 0) { 5498 mutex_exit(&connp->conn_lock); 5499 goto late_error; 5500 } 5501 /* Just in case */ 5502 connp->conn_faddr_v6 = ipv6_all_zeros; 5503 connp->conn_fport = 0; 5504 connp->conn_v6lastdst = ipv6_all_zeros; 5505 mutex_exit(&connp->conn_lock); 5506 5507 error = ip_laddr_fanout_insert(connp); 5508 if (error != 0) 5509 goto late_error; 5510 5511 /* Bind succeeded */ 5512 return (0); 5513 5514 late_error: 5515 /* We had already picked the port number, and then the bind failed */ 5516 mutex_enter(&connp->conn_lock); 5517 udpf = &us->us_bind_fanout[ 5518 UDP_BIND_HASH(connp->conn_lport, 5519 us->us_bind_fanout_size)]; 5520 mutex_enter(&udpf->uf_lock); 5521 connp->conn_saddr_v6 = ipv6_all_zeros; 5522 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5523 connp->conn_laddr_v6 = ipv6_all_zeros; 5524 if (scopeid != 0) { 5525 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5526 connp->conn_incoming_ifindex = connp->conn_bound_if; 5527 } 5528 udp->udp_state = TS_UNBND; 5529 udp_bind_hash_remove(udp, B_TRUE); 5530 connp->conn_lport = 0; 5531 mutex_exit(&udpf->uf_lock); 5532 connp->conn_anon_port = B_FALSE; 5533 connp->conn_mlp_type = mlptSingle; 5534 5535 connp->conn_v6lastdst = ipv6_all_zeros; 5536 5537 /* Restore the header that was built above - different source address */ 5538 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5539 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5540 mutex_exit(&connp->conn_lock); 5541 return (error); 5542 } 5543 5544 int 5545 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5546 socklen_t len, cred_t *cr) 5547 { 5548 int error; 5549 conn_t *connp; 5550 5551 /* All Solaris components should pass a cred for this operation. */ 5552 ASSERT(cr != NULL); 5553 5554 connp = (conn_t *)proto_handle; 5555 5556 if (sa == NULL) 5557 error = udp_do_unbind(connp); 5558 else 5559 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5560 5561 if (error < 0) { 5562 if (error == -TOUTSTATE) 5563 error = EINVAL; 5564 else 5565 error = proto_tlitosyserr(-error); 5566 } 5567 5568 return (error); 5569 } 5570 5571 static int 5572 udp_implicit_bind(conn_t *connp, cred_t *cr) 5573 { 5574 sin6_t sin6addr; 5575 sin_t *sin; 5576 sin6_t *sin6; 5577 socklen_t len; 5578 int error; 5579 5580 /* All Solaris components should pass a cred for this operation. */ 5581 ASSERT(cr != NULL); 5582 5583 if (connp->conn_family == AF_INET) { 5584 len = sizeof (struct sockaddr_in); 5585 sin = (sin_t *)&sin6addr; 5586 *sin = sin_null; 5587 sin->sin_family = AF_INET; 5588 sin->sin_addr.s_addr = INADDR_ANY; 5589 } else { 5590 ASSERT(connp->conn_family == AF_INET6); 5591 len = sizeof (sin6_t); 5592 sin6 = (sin6_t *)&sin6addr; 5593 *sin6 = sin6_null; 5594 sin6->sin6_family = AF_INET6; 5595 V6_SET_ZERO(sin6->sin6_addr); 5596 } 5597 5598 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5599 cr, B_FALSE); 5600 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5601 } 5602 5603 /* 5604 * This routine removes a port number association from a stream. It 5605 * is called by udp_unbind and udp_tpi_unbind. 5606 */ 5607 static int 5608 udp_do_unbind(conn_t *connp) 5609 { 5610 udp_t *udp = connp->conn_udp; 5611 udp_fanout_t *udpf; 5612 udp_stack_t *us = udp->udp_us; 5613 5614 if (cl_inet_unbind != NULL) { 5615 /* 5616 * Running in cluster mode - register unbind information 5617 */ 5618 if (connp->conn_ipversion == IPV4_VERSION) { 5619 (*cl_inet_unbind)( 5620 connp->conn_netstack->netstack_stackid, 5621 IPPROTO_UDP, AF_INET, 5622 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5623 (in_port_t)connp->conn_lport, NULL); 5624 } else { 5625 (*cl_inet_unbind)( 5626 connp->conn_netstack->netstack_stackid, 5627 IPPROTO_UDP, AF_INET6, 5628 (uint8_t *)&(connp->conn_laddr_v6), 5629 (in_port_t)connp->conn_lport, NULL); 5630 } 5631 } 5632 5633 mutex_enter(&connp->conn_lock); 5634 /* If a bind has not been done, we can't unbind. */ 5635 if (udp->udp_state == TS_UNBND) { 5636 mutex_exit(&connp->conn_lock); 5637 return (-TOUTSTATE); 5638 } 5639 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5640 us->us_bind_fanout_size)]; 5641 mutex_enter(&udpf->uf_lock); 5642 udp_bind_hash_remove(udp, B_TRUE); 5643 connp->conn_saddr_v6 = ipv6_all_zeros; 5644 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5645 connp->conn_laddr_v6 = ipv6_all_zeros; 5646 connp->conn_mcbc_bind = B_FALSE; 5647 connp->conn_lport = 0; 5648 /* In case we were also connected */ 5649 connp->conn_faddr_v6 = ipv6_all_zeros; 5650 connp->conn_fport = 0; 5651 mutex_exit(&udpf->uf_lock); 5652 5653 connp->conn_v6lastdst = ipv6_all_zeros; 5654 udp->udp_state = TS_UNBND; 5655 5656 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5657 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5658 mutex_exit(&connp->conn_lock); 5659 5660 ip_unbind(connp); 5661 5662 return (0); 5663 } 5664 5665 /* 5666 * It associates a default destination address with the stream. 5667 */ 5668 static int 5669 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5670 cred_t *cr, pid_t pid) 5671 { 5672 sin6_t *sin6; 5673 sin_t *sin; 5674 in6_addr_t v6dst; 5675 ipaddr_t v4dst; 5676 uint16_t dstport; 5677 uint32_t flowinfo; 5678 udp_fanout_t *udpf; 5679 udp_t *udp, *udp1; 5680 ushort_t ipversion; 5681 udp_stack_t *us; 5682 int error; 5683 conn_t *connp1; 5684 ip_xmit_attr_t *ixa; 5685 ip_xmit_attr_t *oldixa; 5686 uint_t scopeid = 0; 5687 uint_t srcid = 0; 5688 in6_addr_t v6src = connp->conn_saddr_v6; 5689 boolean_t v4mapped; 5690 5691 udp = connp->conn_udp; 5692 us = udp->udp_us; 5693 sin = NULL; 5694 sin6 = NULL; 5695 v4dst = INADDR_ANY; 5696 flowinfo = 0; 5697 5698 /* 5699 * Address has been verified by the caller 5700 */ 5701 switch (len) { 5702 default: 5703 /* 5704 * Should never happen 5705 */ 5706 return (EINVAL); 5707 5708 case sizeof (sin_t): 5709 sin = (sin_t *)sa; 5710 v4dst = sin->sin_addr.s_addr; 5711 dstport = sin->sin_port; 5712 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5713 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5714 ipversion = IPV4_VERSION; 5715 break; 5716 5717 case sizeof (sin6_t): 5718 sin6 = (sin6_t *)sa; 5719 v6dst = sin6->sin6_addr; 5720 dstport = sin6->sin6_port; 5721 srcid = sin6->__sin6_src_id; 5722 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 5723 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5724 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5725 v4mapped, connp->conn_netstack)) { 5726 /* Mismatch v4mapped/v6 specified by srcid. */ 5727 return (EADDRNOTAVAIL); 5728 } 5729 } 5730 if (v4mapped) { 5731 if (connp->conn_ipv6_v6only) 5732 return (EADDRNOTAVAIL); 5733 5734 /* 5735 * Destination adress is mapped IPv6 address. 5736 * Source bound address should be unspecified or 5737 * IPv6 mapped address as well. 5738 */ 5739 if (!IN6_IS_ADDR_UNSPECIFIED( 5740 &connp->conn_bound_addr_v6) && 5741 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5742 return (EADDRNOTAVAIL); 5743 } 5744 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5745 ipversion = IPV4_VERSION; 5746 flowinfo = 0; 5747 } else { 5748 ipversion = IPV6_VERSION; 5749 flowinfo = sin6->sin6_flowinfo; 5750 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5751 scopeid = sin6->sin6_scope_id; 5752 } 5753 break; 5754 } 5755 5756 if (dstport == 0) 5757 return (-TBADADDR); 5758 5759 /* 5760 * If there is a different thread using conn_ixa then we get a new 5761 * copy and cut the old one loose from conn_ixa. Otherwise we use 5762 * conn_ixa and prevent any other thread from using/changing it. 5763 * Once connect() is done other threads can use conn_ixa since the 5764 * refcnt will be back at one. 5765 * We defer updating conn_ixa until later to handle any concurrent 5766 * conn_ixa_cleanup thread. 5767 */ 5768 ixa = conn_get_ixa(connp, B_FALSE); 5769 if (ixa == NULL) 5770 return (ENOMEM); 5771 5772 mutex_enter(&connp->conn_lock); 5773 /* 5774 * This udp_t must have bound to a port already before doing a connect. 5775 * Reject if a connect is in progress (we drop conn_lock during 5776 * udp_do_connect). 5777 */ 5778 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5779 mutex_exit(&connp->conn_lock); 5780 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5781 "udp_connect: bad state, %u", udp->udp_state); 5782 ixa_refrele(ixa); 5783 return (-TOUTSTATE); 5784 } 5785 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 5786 5787 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5788 us->us_bind_fanout_size)]; 5789 5790 mutex_enter(&udpf->uf_lock); 5791 if (udp->udp_state == TS_DATA_XFER) { 5792 /* Already connected - clear out state */ 5793 if (connp->conn_mcbc_bind) 5794 connp->conn_saddr_v6 = ipv6_all_zeros; 5795 else 5796 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5797 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5798 connp->conn_faddr_v6 = ipv6_all_zeros; 5799 connp->conn_fport = 0; 5800 udp->udp_state = TS_IDLE; 5801 } 5802 5803 connp->conn_fport = dstport; 5804 connp->conn_ipversion = ipversion; 5805 if (ipversion == IPV4_VERSION) { 5806 /* 5807 * Interpret a zero destination to mean loopback. 5808 * Update the T_CONN_REQ (sin/sin6) since it is used to 5809 * generate the T_CONN_CON. 5810 */ 5811 if (v4dst == INADDR_ANY) { 5812 v4dst = htonl(INADDR_LOOPBACK); 5813 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5814 if (connp->conn_family == AF_INET) { 5815 sin->sin_addr.s_addr = v4dst; 5816 } else { 5817 sin6->sin6_addr = v6dst; 5818 } 5819 } 5820 connp->conn_faddr_v6 = v6dst; 5821 connp->conn_flowinfo = 0; 5822 } else { 5823 ASSERT(connp->conn_ipversion == IPV6_VERSION); 5824 /* 5825 * Interpret a zero destination to mean loopback. 5826 * Update the T_CONN_REQ (sin/sin6) since it is used to 5827 * generate the T_CONN_CON. 5828 */ 5829 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 5830 v6dst = ipv6_loopback; 5831 sin6->sin6_addr = v6dst; 5832 } 5833 connp->conn_faddr_v6 = v6dst; 5834 connp->conn_flowinfo = flowinfo; 5835 } 5836 mutex_exit(&udpf->uf_lock); 5837 5838 /* 5839 * We update our cred/cpid based on the caller of connect 5840 */ 5841 if (connp->conn_cred != cr) { 5842 crhold(cr); 5843 crfree(connp->conn_cred); 5844 connp->conn_cred = cr; 5845 } 5846 connp->conn_cpid = pid; 5847 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 5848 ixa->ixa_cred = cr; 5849 ixa->ixa_cpid = pid; 5850 if (is_system_labeled()) { 5851 /* We need to restart with a label based on the cred */ 5852 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 5853 } 5854 5855 if (scopeid != 0) { 5856 ixa->ixa_flags |= IXAF_SCOPEID_SET; 5857 ixa->ixa_scopeid = scopeid; 5858 connp->conn_incoming_ifindex = scopeid; 5859 } else { 5860 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5861 connp->conn_incoming_ifindex = connp->conn_bound_if; 5862 } 5863 /* 5864 * conn_connect will drop conn_lock and reacquire it. 5865 * To prevent a send* from messing with this udp_t while the lock 5866 * is dropped we set udp_state and clear conn_v6lastdst. 5867 * That will make all send* fail with EISCONN. 5868 */ 5869 connp->conn_v6lastdst = ipv6_all_zeros; 5870 udp->udp_state = TS_WCON_CREQ; 5871 5872 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 5873 mutex_exit(&connp->conn_lock); 5874 if (error != 0) 5875 goto connect_failed; 5876 5877 /* 5878 * The addresses have been verified. Time to insert in 5879 * the correct fanout list. 5880 */ 5881 error = ipcl_conn_insert(connp); 5882 if (error != 0) 5883 goto connect_failed; 5884 5885 mutex_enter(&connp->conn_lock); 5886 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5887 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5888 if (error != 0) { 5889 mutex_exit(&connp->conn_lock); 5890 goto connect_failed; 5891 } 5892 5893 udp->udp_state = TS_DATA_XFER; 5894 /* Record this as the "last" send even though we haven't sent any */ 5895 connp->conn_v6lastdst = connp->conn_faddr_v6; 5896 connp->conn_lastipversion = connp->conn_ipversion; 5897 connp->conn_lastdstport = connp->conn_fport; 5898 connp->conn_lastflowinfo = connp->conn_flowinfo; 5899 connp->conn_lastscopeid = scopeid; 5900 connp->conn_lastsrcid = srcid; 5901 /* Also remember a source to use together with lastdst */ 5902 connp->conn_v6lastsrc = v6src; 5903 5904 oldixa = conn_replace_ixa(connp, ixa); 5905 mutex_exit(&connp->conn_lock); 5906 ixa_refrele(oldixa); 5907 5908 /* 5909 * We've picked a source address above. Now we can 5910 * verify that the src/port/dst/port is unique for all 5911 * connections in TS_DATA_XFER, skipping ourselves. 5912 */ 5913 mutex_enter(&udpf->uf_lock); 5914 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 5915 if (udp1->udp_state != TS_DATA_XFER) 5916 continue; 5917 5918 if (udp1 == udp) 5919 continue; 5920 5921 connp1 = udp1->udp_connp; 5922 if (connp->conn_lport != connp1->conn_lport || 5923 connp->conn_ipversion != connp1->conn_ipversion || 5924 dstport != connp1->conn_fport || 5925 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 5926 &connp1->conn_laddr_v6) || 5927 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 5928 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 5929 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 5930 continue; 5931 mutex_exit(&udpf->uf_lock); 5932 error = -TBADADDR; 5933 goto connect_failed; 5934 } 5935 if (cl_inet_connect2 != NULL) { 5936 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 5937 if (error != 0) { 5938 mutex_exit(&udpf->uf_lock); 5939 error = -TBADADDR; 5940 goto connect_failed; 5941 } 5942 } 5943 mutex_exit(&udpf->uf_lock); 5944 5945 ixa_refrele(ixa); 5946 return (0); 5947 5948 connect_failed: 5949 if (ixa != NULL) 5950 ixa_refrele(ixa); 5951 mutex_enter(&connp->conn_lock); 5952 mutex_enter(&udpf->uf_lock); 5953 udp->udp_state = TS_IDLE; 5954 connp->conn_faddr_v6 = ipv6_all_zeros; 5955 connp->conn_fport = 0; 5956 /* In case the source address was set above */ 5957 if (connp->conn_mcbc_bind) 5958 connp->conn_saddr_v6 = ipv6_all_zeros; 5959 else 5960 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5961 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5962 mutex_exit(&udpf->uf_lock); 5963 5964 connp->conn_v6lastdst = ipv6_all_zeros; 5965 connp->conn_flowinfo = 0; 5966 5967 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5968 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5969 mutex_exit(&connp->conn_lock); 5970 return (error); 5971 } 5972 5973 static int 5974 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5975 socklen_t len, sock_connid_t *id, cred_t *cr) 5976 { 5977 conn_t *connp = (conn_t *)proto_handle; 5978 udp_t *udp = connp->conn_udp; 5979 int error; 5980 boolean_t did_bind = B_FALSE; 5981 pid_t pid = curproc->p_pid; 5982 5983 /* All Solaris components should pass a cred for this operation. */ 5984 ASSERT(cr != NULL); 5985 5986 if (sa == NULL) { 5987 /* 5988 * Disconnect 5989 * Make sure we are connected 5990 */ 5991 if (udp->udp_state != TS_DATA_XFER) 5992 return (EINVAL); 5993 5994 error = udp_disconnect(connp); 5995 return (error); 5996 } 5997 5998 error = proto_verify_ip_addr(connp->conn_family, sa, len); 5999 if (error != 0) 6000 goto done; 6001 6002 /* do an implicit bind if necessary */ 6003 if (udp->udp_state == TS_UNBND) { 6004 error = udp_implicit_bind(connp, cr); 6005 /* 6006 * We could be racing with an actual bind, in which case 6007 * we would see EPROTO. We cross our fingers and try 6008 * to connect. 6009 */ 6010 if (!(error == 0 || error == EPROTO)) 6011 goto done; 6012 did_bind = B_TRUE; 6013 } 6014 /* 6015 * set SO_DGRAM_ERRIND 6016 */ 6017 connp->conn_dgram_errind = B_TRUE; 6018 6019 error = udp_do_connect(connp, sa, len, cr, pid); 6020 6021 if (error != 0 && did_bind) { 6022 int unbind_err; 6023 6024 unbind_err = udp_do_unbind(connp); 6025 ASSERT(unbind_err == 0); 6026 } 6027 6028 if (error == 0) { 6029 *id = 0; 6030 (*connp->conn_upcalls->su_connected) 6031 (connp->conn_upper_handle, 0, NULL, -1); 6032 } else if (error < 0) { 6033 error = proto_tlitosyserr(-error); 6034 } 6035 6036 done: 6037 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6038 /* 6039 * No need to hold locks to set state 6040 * after connect failure socket state is undefined 6041 * We set the state only to imitate old sockfs behavior 6042 */ 6043 udp->udp_state = TS_IDLE; 6044 } 6045 return (error); 6046 } 6047 6048 int 6049 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6050 cred_t *cr) 6051 { 6052 sin6_t *sin6; 6053 sin_t *sin = NULL; 6054 uint_t srcid; 6055 conn_t *connp = (conn_t *)proto_handle; 6056 udp_t *udp = connp->conn_udp; 6057 int error = 0; 6058 udp_stack_t *us = udp->udp_us; 6059 ushort_t ipversion; 6060 pid_t pid = curproc->p_pid; 6061 ip_xmit_attr_t *ixa; 6062 6063 ASSERT(DB_TYPE(mp) == M_DATA); 6064 6065 /* All Solaris components should pass a cred for this operation. */ 6066 ASSERT(cr != NULL); 6067 6068 /* do an implicit bind if necessary */ 6069 if (udp->udp_state == TS_UNBND) { 6070 error = udp_implicit_bind(connp, cr); 6071 /* 6072 * We could be racing with an actual bind, in which case 6073 * we would see EPROTO. We cross our fingers and try 6074 * to connect. 6075 */ 6076 if (!(error == 0 || error == EPROTO)) { 6077 freemsg(mp); 6078 return (error); 6079 } 6080 } 6081 6082 /* Connected? */ 6083 if (msg->msg_name == NULL) { 6084 if (udp->udp_state != TS_DATA_XFER) { 6085 UDPS_BUMP_MIB(us, udpOutErrors); 6086 return (EDESTADDRREQ); 6087 } 6088 if (msg->msg_controllen != 0) { 6089 error = udp_output_ancillary(connp, NULL, NULL, mp, 6090 NULL, msg, cr, pid); 6091 } else { 6092 error = udp_output_connected(connp, mp, cr, pid); 6093 } 6094 if (us->us_sendto_ignerr) 6095 return (0); 6096 else 6097 return (error); 6098 } 6099 if (udp->udp_state == TS_DATA_XFER) { 6100 UDPS_BUMP_MIB(us, udpOutErrors); 6101 return (EISCONN); 6102 } 6103 error = proto_verify_ip_addr(connp->conn_family, 6104 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6105 if (error != 0) { 6106 UDPS_BUMP_MIB(us, udpOutErrors); 6107 return (error); 6108 } 6109 switch (connp->conn_family) { 6110 case AF_INET6: 6111 sin6 = (sin6_t *)msg->msg_name; 6112 6113 srcid = sin6->__sin6_src_id; 6114 6115 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6116 /* 6117 * Destination is a non-IPv4-compatible IPv6 address. 6118 * Send out an IPv6 format packet. 6119 */ 6120 6121 /* 6122 * If the local address is a mapped address return 6123 * an error. 6124 * It would be possible to send an IPv6 packet but the 6125 * response would never make it back to the application 6126 * since it is bound to a mapped address. 6127 */ 6128 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6129 UDPS_BUMP_MIB(us, udpOutErrors); 6130 return (EADDRNOTAVAIL); 6131 } 6132 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6133 sin6->sin6_addr = ipv6_loopback; 6134 ipversion = IPV6_VERSION; 6135 } else { 6136 if (connp->conn_ipv6_v6only) { 6137 UDPS_BUMP_MIB(us, udpOutErrors); 6138 return (EADDRNOTAVAIL); 6139 } 6140 6141 /* 6142 * If the local address is not zero or a mapped address 6143 * return an error. It would be possible to send an 6144 * IPv4 packet but the response would never make it 6145 * back to the application since it is bound to a 6146 * non-mapped address. 6147 */ 6148 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6149 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6150 UDPS_BUMP_MIB(us, udpOutErrors); 6151 return (EADDRNOTAVAIL); 6152 } 6153 6154 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6155 V4_PART_OF_V6(sin6->sin6_addr) = 6156 htonl(INADDR_LOOPBACK); 6157 } 6158 ipversion = IPV4_VERSION; 6159 } 6160 6161 /* 6162 * We have to allocate an ip_xmit_attr_t before we grab 6163 * conn_lock and we need to hold conn_lock once we've check 6164 * conn_same_as_last_v6 to handle concurrent send* calls on a 6165 * socket. 6166 */ 6167 if (msg->msg_controllen == 0) { 6168 ixa = conn_get_ixa(connp, B_FALSE); 6169 if (ixa == NULL) { 6170 UDPS_BUMP_MIB(us, udpOutErrors); 6171 return (ENOMEM); 6172 } 6173 } else { 6174 ixa = NULL; 6175 } 6176 mutex_enter(&connp->conn_lock); 6177 if (udp->udp_delayed_error != 0) { 6178 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6179 6180 error = udp->udp_delayed_error; 6181 udp->udp_delayed_error = 0; 6182 6183 /* Compare IP address, port, and family */ 6184 6185 if (sin6->sin6_port == sin2->sin6_port && 6186 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6187 &sin2->sin6_addr) && 6188 sin6->sin6_family == sin2->sin6_family) { 6189 mutex_exit(&connp->conn_lock); 6190 UDPS_BUMP_MIB(us, udpOutErrors); 6191 if (ixa != NULL) 6192 ixa_refrele(ixa); 6193 return (error); 6194 } 6195 } 6196 6197 if (msg->msg_controllen != 0) { 6198 mutex_exit(&connp->conn_lock); 6199 ASSERT(ixa == NULL); 6200 error = udp_output_ancillary(connp, NULL, sin6, mp, 6201 NULL, msg, cr, pid); 6202 } else if (conn_same_as_last_v6(connp, sin6) && 6203 connp->conn_lastsrcid == srcid && 6204 ipsec_outbound_policy_current(ixa)) { 6205 /* udp_output_lastdst drops conn_lock */ 6206 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6207 } else { 6208 /* udp_output_newdst drops conn_lock */ 6209 error = udp_output_newdst(connp, mp, NULL, sin6, 6210 ipversion, cr, pid, ixa); 6211 } 6212 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6213 if (us->us_sendto_ignerr) 6214 return (0); 6215 else 6216 return (error); 6217 case AF_INET: 6218 sin = (sin_t *)msg->msg_name; 6219 6220 ipversion = IPV4_VERSION; 6221 6222 if (sin->sin_addr.s_addr == INADDR_ANY) 6223 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6224 6225 /* 6226 * We have to allocate an ip_xmit_attr_t before we grab 6227 * conn_lock and we need to hold conn_lock once we've check 6228 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6229 */ 6230 if (msg->msg_controllen == 0) { 6231 ixa = conn_get_ixa(connp, B_FALSE); 6232 if (ixa == NULL) { 6233 UDPS_BUMP_MIB(us, udpOutErrors); 6234 return (ENOMEM); 6235 } 6236 } else { 6237 ixa = NULL; 6238 } 6239 mutex_enter(&connp->conn_lock); 6240 if (udp->udp_delayed_error != 0) { 6241 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6242 6243 error = udp->udp_delayed_error; 6244 udp->udp_delayed_error = 0; 6245 6246 /* Compare IP address and port */ 6247 6248 if (sin->sin_port == sin2->sin_port && 6249 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6250 mutex_exit(&connp->conn_lock); 6251 UDPS_BUMP_MIB(us, udpOutErrors); 6252 if (ixa != NULL) 6253 ixa_refrele(ixa); 6254 return (error); 6255 } 6256 } 6257 if (msg->msg_controllen != 0) { 6258 mutex_exit(&connp->conn_lock); 6259 ASSERT(ixa == NULL); 6260 error = udp_output_ancillary(connp, sin, NULL, mp, 6261 NULL, msg, cr, pid); 6262 } else if (conn_same_as_last_v4(connp, sin) && 6263 ipsec_outbound_policy_current(ixa)) { 6264 /* udp_output_lastdst drops conn_lock */ 6265 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6266 } else { 6267 /* udp_output_newdst drops conn_lock */ 6268 error = udp_output_newdst(connp, mp, sin, NULL, 6269 ipversion, cr, pid, ixa); 6270 } 6271 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6272 if (us->us_sendto_ignerr) 6273 return (0); 6274 else 6275 return (error); 6276 default: 6277 return (EINVAL); 6278 } 6279 } 6280 6281 int 6282 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6283 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6284 sock_quiesce_arg_t *arg) 6285 { 6286 conn_t *connp = (conn_t *)proto_handle; 6287 udp_t *udp; 6288 struct T_capability_ack tca; 6289 struct sockaddr_in6 laddr, faddr; 6290 socklen_t laddrlen, faddrlen; 6291 short opts; 6292 struct stroptions *stropt; 6293 mblk_t *mp, *stropt_mp; 6294 int error; 6295 6296 udp = connp->conn_udp; 6297 6298 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6299 6300 /* 6301 * setup the fallback stream that was allocated 6302 */ 6303 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6304 connp->conn_minor_arena = WR(q)->q_ptr; 6305 6306 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6307 6308 WR(q)->q_qinfo = &udp_winit; 6309 6310 connp->conn_rq = RD(q); 6311 connp->conn_wq = WR(q); 6312 6313 /* Notify stream head about options before sending up data */ 6314 stropt_mp->b_datap->db_type = M_SETOPTS; 6315 stropt_mp->b_wptr += sizeof (*stropt); 6316 stropt = (struct stroptions *)stropt_mp->b_rptr; 6317 stropt->so_flags = SO_WROFF | SO_HIWAT; 6318 stropt->so_wroff = connp->conn_wroff; 6319 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6320 putnext(RD(q), stropt_mp); 6321 6322 /* 6323 * Free the helper stream 6324 */ 6325 ip_free_helper_stream(connp); 6326 6327 if (!issocket) 6328 udp_use_pure_tpi(udp); 6329 6330 /* 6331 * Collect the information needed to sync with the sonode 6332 */ 6333 udp_do_capability_ack(udp, &tca, TC1_INFO); 6334 6335 laddrlen = faddrlen = sizeof (sin6_t); 6336 (void) udp_getsockname((sock_lower_handle_t)connp, 6337 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6338 error = udp_getpeername((sock_lower_handle_t)connp, 6339 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6340 if (error != 0) 6341 faddrlen = 0; 6342 6343 opts = 0; 6344 if (connp->conn_dgram_errind) 6345 opts |= SO_DGRAM_ERRIND; 6346 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6347 opts |= SO_DONTROUTE; 6348 6349 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6350 (struct sockaddr *)&laddr, laddrlen, 6351 (struct sockaddr *)&faddr, faddrlen, opts); 6352 6353 mutex_enter(&udp->udp_recv_lock); 6354 /* 6355 * Attempts to send data up during fallback will result in it being 6356 * queued in udp_t. First push up the datagrams obtained from the 6357 * socket, then any packets queued in udp_t. 6358 */ 6359 if (mp != NULL) { 6360 mp->b_next = udp->udp_fallback_queue_head; 6361 udp->udp_fallback_queue_head = mp; 6362 } 6363 while (udp->udp_fallback_queue_head != NULL) { 6364 mp = udp->udp_fallback_queue_head; 6365 udp->udp_fallback_queue_head = mp->b_next; 6366 mutex_exit(&udp->udp_recv_lock); 6367 mp->b_next = NULL; 6368 putnext(RD(q), mp); 6369 mutex_enter(&udp->udp_recv_lock); 6370 } 6371 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6372 /* 6373 * No longer a streams less socket 6374 */ 6375 mutex_enter(&connp->conn_lock); 6376 connp->conn_flags &= ~IPCL_NONSTR; 6377 mutex_exit(&connp->conn_lock); 6378 6379 mutex_exit(&udp->udp_recv_lock); 6380 6381 ASSERT(connp->conn_ref >= 1); 6382 6383 return (0); 6384 } 6385 6386 /* ARGSUSED3 */ 6387 int 6388 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6389 socklen_t *salenp, cred_t *cr) 6390 { 6391 conn_t *connp = (conn_t *)proto_handle; 6392 udp_t *udp = connp->conn_udp; 6393 int error; 6394 6395 /* All Solaris components should pass a cred for this operation. */ 6396 ASSERT(cr != NULL); 6397 6398 mutex_enter(&connp->conn_lock); 6399 if (udp->udp_state != TS_DATA_XFER) 6400 error = ENOTCONN; 6401 else 6402 error = conn_getpeername(connp, sa, salenp); 6403 mutex_exit(&connp->conn_lock); 6404 return (error); 6405 } 6406 6407 /* ARGSUSED3 */ 6408 int 6409 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6410 socklen_t *salenp, cred_t *cr) 6411 { 6412 conn_t *connp = (conn_t *)proto_handle; 6413 int error; 6414 6415 /* All Solaris components should pass a cred for this operation. */ 6416 ASSERT(cr != NULL); 6417 6418 mutex_enter(&connp->conn_lock); 6419 error = conn_getsockname(connp, sa, salenp); 6420 mutex_exit(&connp->conn_lock); 6421 return (error); 6422 } 6423 6424 int 6425 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6426 void *optvalp, socklen_t *optlen, cred_t *cr) 6427 { 6428 conn_t *connp = (conn_t *)proto_handle; 6429 int error; 6430 t_uscalar_t max_optbuf_len; 6431 void *optvalp_buf; 6432 int len; 6433 6434 /* All Solaris components should pass a cred for this operation. */ 6435 ASSERT(cr != NULL); 6436 6437 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6438 udp_opt_obj.odb_opt_des_arr, 6439 udp_opt_obj.odb_opt_arr_cnt, 6440 B_FALSE, B_TRUE, cr); 6441 if (error != 0) { 6442 if (error < 0) 6443 error = proto_tlitosyserr(-error); 6444 return (error); 6445 } 6446 6447 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6448 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6449 if (len == -1) { 6450 kmem_free(optvalp_buf, max_optbuf_len); 6451 return (EINVAL); 6452 } 6453 6454 /* 6455 * update optlen and copy option value 6456 */ 6457 t_uscalar_t size = MIN(len, *optlen); 6458 6459 bcopy(optvalp_buf, optvalp, size); 6460 bcopy(&size, optlen, sizeof (size)); 6461 6462 kmem_free(optvalp_buf, max_optbuf_len); 6463 return (0); 6464 } 6465 6466 int 6467 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6468 const void *optvalp, socklen_t optlen, cred_t *cr) 6469 { 6470 conn_t *connp = (conn_t *)proto_handle; 6471 int error; 6472 6473 /* All Solaris components should pass a cred for this operation. */ 6474 ASSERT(cr != NULL); 6475 6476 error = proto_opt_check(level, option_name, optlen, NULL, 6477 udp_opt_obj.odb_opt_des_arr, 6478 udp_opt_obj.odb_opt_arr_cnt, 6479 B_TRUE, B_FALSE, cr); 6480 6481 if (error != 0) { 6482 if (error < 0) 6483 error = proto_tlitosyserr(-error); 6484 return (error); 6485 } 6486 6487 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6488 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6489 NULL, cr); 6490 6491 ASSERT(error >= 0); 6492 6493 return (error); 6494 } 6495 6496 void 6497 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6498 { 6499 conn_t *connp = (conn_t *)proto_handle; 6500 udp_t *udp = connp->conn_udp; 6501 6502 mutex_enter(&udp->udp_recv_lock); 6503 connp->conn_flow_cntrld = B_FALSE; 6504 mutex_exit(&udp->udp_recv_lock); 6505 } 6506 6507 /* ARGSUSED2 */ 6508 int 6509 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6510 { 6511 conn_t *connp = (conn_t *)proto_handle; 6512 6513 /* All Solaris components should pass a cred for this operation. */ 6514 ASSERT(cr != NULL); 6515 6516 /* shut down the send side */ 6517 if (how != SHUT_RD) 6518 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6519 SOCK_OPCTL_SHUT_SEND, 0); 6520 /* shut down the recv side */ 6521 if (how != SHUT_WR) 6522 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6523 SOCK_OPCTL_SHUT_RECV, 0); 6524 return (0); 6525 } 6526 6527 int 6528 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6529 int mode, int32_t *rvalp, cred_t *cr) 6530 { 6531 conn_t *connp = (conn_t *)proto_handle; 6532 int error; 6533 6534 /* All Solaris components should pass a cred for this operation. */ 6535 ASSERT(cr != NULL); 6536 6537 /* 6538 * If we don't have a helper stream then create one. 6539 * ip_create_helper_stream takes care of locking the conn_t, 6540 * so this check for NULL is just a performance optimization. 6541 */ 6542 if (connp->conn_helper_info == NULL) { 6543 udp_stack_t *us = connp->conn_udp->udp_us; 6544 6545 ASSERT(us->us_ldi_ident != NULL); 6546 6547 /* 6548 * Create a helper stream for non-STREAMS socket. 6549 */ 6550 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6551 if (error != 0) { 6552 ip0dbg(("udp_ioctl: create of IP helper stream " 6553 "failed %d\n", error)); 6554 return (error); 6555 } 6556 } 6557 6558 switch (cmd) { 6559 case _SIOCSOCKFALLBACK: 6560 case TI_GETPEERNAME: 6561 case TI_GETMYNAME: 6562 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6563 cmd)); 6564 error = EINVAL; 6565 break; 6566 default: 6567 /* 6568 * Pass on to IP using helper stream 6569 */ 6570 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6571 cmd, arg, mode, cr, rvalp); 6572 break; 6573 } 6574 return (error); 6575 } 6576 6577 /* ARGSUSED */ 6578 int 6579 udp_accept(sock_lower_handle_t lproto_handle, 6580 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6581 cred_t *cr) 6582 { 6583 return (EOPNOTSUPP); 6584 } 6585 6586 /* ARGSUSED */ 6587 int 6588 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6589 { 6590 return (EOPNOTSUPP); 6591 } 6592 6593 sock_downcalls_t sock_udp_downcalls = { 6594 udp_activate, /* sd_activate */ 6595 udp_accept, /* sd_accept */ 6596 udp_bind, /* sd_bind */ 6597 udp_listen, /* sd_listen */ 6598 udp_connect, /* sd_connect */ 6599 udp_getpeername, /* sd_getpeername */ 6600 udp_getsockname, /* sd_getsockname */ 6601 udp_getsockopt, /* sd_getsockopt */ 6602 udp_setsockopt, /* sd_setsockopt */ 6603 udp_send, /* sd_send */ 6604 NULL, /* sd_send_uio */ 6605 NULL, /* sd_recv_uio */ 6606 NULL, /* sd_poll */ 6607 udp_shutdown, /* sd_shutdown */ 6608 udp_clr_flowctrl, /* sd_setflowctrl */ 6609 udp_ioctl, /* sd_ioctl */ 6610 udp_close /* sd_close */ 6611 }; 6612