1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 24 * Copyright 2014, OmniTI Computer Consulting, Inc. All rights reserved. 25 * Copyright 2015, Joyent, Inc. 26 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. 27 */ 28 /* Copyright (c) 1990 Mentat Inc. */ 29 30 #include <sys/sysmacros.h> 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/stropts.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #define _SUN_TPI_VERSION 2 37 #include <sys/tihdr.h> 38 #include <sys/timod.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/strsubr.h> 42 #include <sys/suntpi.h> 43 #include <sys/xti_inet.h> 44 #include <sys/kmem.h> 45 #include <sys/cred_impl.h> 46 #include <sys/policy.h> 47 #include <sys/priv.h> 48 #include <sys/ucred.h> 49 #include <sys/zone.h> 50 51 #include <sys/socket.h> 52 #include <sys/socketvar.h> 53 #include <sys/sockio.h> 54 #include <sys/vtrace.h> 55 #include <sys/sdt.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 64 #include <inet/common.h> 65 #include <inet/ip.h> 66 #include <inet/ip_impl.h> 67 #include <inet/ipsec_impl.h> 68 #include <inet/ip6.h> 69 #include <inet/ip_ire.h> 70 #include <inet/ip_if.h> 71 #include <inet/ip_multi.h> 72 #include <inet/ip_ndp.h> 73 #include <inet/proto_set.h> 74 #include <inet/mib2.h> 75 #include <inet/optcom.h> 76 #include <inet/snmpcom.h> 77 #include <inet/kstatcom.h> 78 #include <inet/ipclassifier.h> 79 #include <sys/squeue_impl.h> 80 #include <inet/ipnet.h> 81 #include <sys/vxlan.h> 82 #include <inet/inet_hash.h> 83 84 #include <sys/tsol/label.h> 85 #include <sys/tsol/tnet.h> 86 #include <rpc/pmap_prot.h> 87 88 #include <inet/udp_impl.h> 89 90 /* 91 * Synchronization notes: 92 * 93 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 94 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 95 * protects the contents of the udp_t. uf_lock protects the address and the 96 * fanout information. 97 * The lock order is conn_lock -> uf_lock. 98 * 99 * The fanout lock uf_lock: 100 * When a UDP endpoint is bound to a local port, it is inserted into 101 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 102 * The size of the array is controlled by the udp_bind_fanout_size variable. 103 * This variable can be changed in /etc/system if the default value is 104 * not large enough. Each bind hash bucket is protected by a per bucket 105 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 106 * structure and a few other fields in the udp_t. A UDP endpoint is removed 107 * from the bind hash list only when it is being unbound or being closed. 108 * The per bucket lock also protects a UDP endpoint's state changes. 109 * 110 * Plumbing notes: 111 * UDP is always a device driver. For compatibility with mibopen() code 112 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 113 * dummy module. 114 * 115 * The above implies that we don't support any intermediate module to 116 * reside in between /dev/ip and udp -- in fact, we never supported such 117 * scenario in the past as the inter-layer communication semantics have 118 * always been private. 119 */ 120 121 /* For /etc/system control */ 122 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 123 124 static void udp_addr_req(queue_t *q, mblk_t *mp); 125 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 126 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 127 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 128 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 129 const in6_addr_t *, in_port_t, uint32_t); 130 static void udp_capability_req(queue_t *q, mblk_t *mp); 131 static int udp_tpi_close(queue_t *q, int flags, cred_t *); 132 static void udp_close_free(conn_t *); 133 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 134 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 135 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 136 int sys_error); 137 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 138 t_scalar_t tlierr, int sys_error); 139 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 140 cred_t *cr); 141 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 142 char *value, caddr_t cp, cred_t *cr); 143 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 144 char *value, caddr_t cp, cred_t *cr); 145 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 147 ip_recv_attr_t *ira); 148 static void udp_info_req(queue_t *q, mblk_t *mp); 149 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 150 static int udp_lrput(queue_t *, mblk_t *); 151 static int udp_lwput(queue_t *, mblk_t *); 152 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp, boolean_t isv6); 154 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 155 cred_t *credp); 156 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 157 cred_t *credp); 158 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 159 int udp_opt_set(conn_t *connp, uint_t optset_context, 160 int level, int name, uint_t inlen, 161 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 162 void *thisdg_attrs, cred_t *cr); 163 int udp_opt_get(conn_t *connp, int level, int name, 164 uchar_t *ptr); 165 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 166 pid_t pid); 167 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 168 pid_t pid, ip_xmit_attr_t *ixa); 169 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 170 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 171 ip_xmit_attr_t *ixa); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static int udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 /* Common routines for TPI and socket module */ 191 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 192 193 /* Common routine for TPI and socket module */ 194 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 195 static void udp_do_close(conn_t *); 196 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 197 boolean_t); 198 static int udp_do_unbind(conn_t *); 199 200 int udp_getsockname(sock_lower_handle_t, 201 struct sockaddr *, socklen_t *, cred_t *); 202 int udp_getpeername(sock_lower_handle_t, 203 struct sockaddr *, socklen_t *, cred_t *); 204 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 205 cred_t *, pid_t); 206 207 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 208 209 /* 210 * Checks if the given destination addr/port is allowed out. 211 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 212 * Called for each connect() and for sendto()/sendmsg() to a different 213 * destination. 214 * For connect(), called in udp_connect(). 215 * For sendto()/sendmsg(), called in udp_output_newdst(). 216 * 217 * This macro assumes that the cl_inet_connect2 hook is not NULL. 218 * Please check this before calling this macro. 219 * 220 * void 221 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 222 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 223 */ 224 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 225 (err) = 0; \ 226 /* \ 227 * Running in cluster mode - check and register active \ 228 * "connection" information \ 229 */ \ 230 if ((cp)->conn_ipversion == IPV4_VERSION) \ 231 (err) = (*cl_inet_connect2)( \ 232 (cp)->conn_netstack->netstack_stackid, \ 233 IPPROTO_UDP, is_outgoing, AF_INET, \ 234 (uint8_t *)&((cp)->conn_laddr_v4), \ 235 (cp)->conn_lport, \ 236 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 237 (in_port_t)(fport), NULL); \ 238 else \ 239 (err) = (*cl_inet_connect2)( \ 240 (cp)->conn_netstack->netstack_stackid, \ 241 IPPROTO_UDP, is_outgoing, AF_INET6, \ 242 (uint8_t *)&((cp)->conn_laddr_v6), \ 243 (cp)->conn_lport, \ 244 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 245 } 246 247 static struct module_info udp_mod_info = { 248 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 249 }; 250 251 /* 252 * Entry points for UDP as a device. 253 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 254 */ 255 static struct qinit udp_rinitv4 = { 256 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 257 }; 258 259 static struct qinit udp_rinitv6 = { 260 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 261 }; 262 263 static struct qinit udp_winit = { 264 udp_wput, ip_wsrv, NULL, NULL, NULL, &udp_mod_info 265 }; 266 267 /* UDP entry point during fallback */ 268 struct qinit udp_fallback_sock_winit = { 269 udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 270 }; 271 272 /* 273 * UDP needs to handle I_LINK and I_PLINK since ifconfig 274 * likes to use it as a place to hang the various streams. 275 */ 276 static struct qinit udp_lrinit = { 277 udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 278 }; 279 280 static struct qinit udp_lwinit = { 281 udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 282 }; 283 284 /* For AF_INET aka /dev/udp */ 285 struct streamtab udpinfov4 = { 286 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 287 }; 288 289 /* For AF_INET6 aka /dev/udp6 */ 290 struct streamtab udpinfov6 = { 291 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 292 }; 293 294 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 295 296 /* Default structure copied into T_INFO_ACK messages */ 297 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 298 T_INFO_ACK, 299 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 300 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 301 T_INVALID, /* CDATA_size. udp does not support connect data. */ 302 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 303 sizeof (sin_t), /* ADDR_size. */ 304 0, /* OPT_size - not initialized here */ 305 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 306 T_CLTS, /* SERV_type. udp supports connection-less. */ 307 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 308 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 309 }; 310 311 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 312 313 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 314 T_INFO_ACK, 315 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 316 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 317 T_INVALID, /* CDATA_size. udp does not support connect data. */ 318 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 319 sizeof (sin6_t), /* ADDR_size. */ 320 0, /* OPT_size - not initialized here */ 321 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 322 T_CLTS, /* SERV_type. udp supports connection-less. */ 323 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 324 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 325 }; 326 327 /* 328 * UDP tunables related declarations. Definitions are in udp_tunables.c 329 */ 330 extern mod_prop_info_t udp_propinfo_tbl[]; 331 extern int udp_propinfo_count; 332 333 /* Setable in /etc/system */ 334 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 335 uint32_t udp_random_anon_port = 1; 336 337 /* 338 * Hook functions to enable cluster networking. 339 * On non-clustered systems these vectors must always be NULL 340 */ 341 342 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 343 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 344 void *args) = NULL; 345 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 346 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 347 void *args) = NULL; 348 349 typedef union T_primitives *t_primp_t; 350 351 /* 352 * Various protocols that encapsulate UDP have no real use for the source port. 353 * Instead, they want to vary the source port to provide better equal-cost 354 * multipathing and other systems that use fanout. Consider something like 355 * VXLAN. If you're actually sending multiple different streams to a single 356 * host, if you don't vary the source port, then the tuple of ( SRC IP, DST IP, 357 * SRC Port, DST Port) will always be the same. 358 * 359 * Here, we return a port to hash this to, if we know how to hash it. If for 360 * some reason we can't perform an L4 hash, then we just return the default 361 * value, usually the default port. After we determine the hash we transform it 362 * so that it's in the range of [ min, max ]. 363 * 364 * We'd like to avoid a pull up for the sake of performing the hash. If the 365 * first mblk_t doesn't have the full protocol header, then we just send it to 366 * the default. If for some reason we have an encapsulated packet that has its 367 * protocol header in different parts of an mblk_t, then we'll go with the 368 * default port. This means that that if a driver isn't consistent about how it 369 * generates the frames for a given flow, it will not always be consistently 370 * hashed. That should be an uncommon event. 371 */ 372 uint16_t 373 udp_srcport_hash(mblk_t *mp, int type, uint16_t min, uint16_t max, 374 uint16_t def) 375 { 376 size_t szused = 0; 377 ip6_t *ip6h; 378 ipha_t *ipha; 379 uint16_t sap; 380 uint64_t hash; 381 uint32_t mod; 382 383 ASSERT(min <= max); 384 385 if (type != UDP_HASH_VXLAN) 386 return (def); 387 388 if (!IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t))) 389 return (def); 390 391 if (MBLKL(mp) < VXLAN_HDR_LEN) { 392 return (def); 393 } else { 394 szused = VXLAN_HDR_LEN; 395 } 396 397 /* Can we hold a MAC header? */ 398 if (MBLKL(mp) + szused < sizeof (struct ether_header)) 399 return (def); 400 401 /* 402 * We need to lie about the starting offset into the message block for 403 * convenience. Undo it at the end. We know that inet_pkt_hash() won't 404 * modify the mblk_t. 405 */ 406 mp->b_rptr += szused; 407 hash = inet_pkt_hash(DL_ETHER, mp, INET_PKT_HASH_L2 | 408 INET_PKT_HASH_L3 | INET_PKT_HASH_L4); 409 mp->b_rptr -= szused; 410 411 if (hash == 0) 412 return (def); 413 414 mod = max - min + 1; 415 return ((hash % mod) + min); 416 } 417 418 /* 419 * Return the next anonymous port in the privileged port range for 420 * bind checking. 421 * 422 * Trusted Extension (TX) notes: TX allows administrator to mark or 423 * reserve ports as Multilevel ports (MLP). MLP has special function 424 * on TX systems. Once a port is made MLP, it's not available as 425 * ordinary port. This creates "holes" in the port name space. It 426 * may be necessary to skip the "holes" find a suitable anon port. 427 */ 428 static in_port_t 429 udp_get_next_priv_port(udp_t *udp) 430 { 431 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 432 in_port_t nextport; 433 boolean_t restart = B_FALSE; 434 udp_stack_t *us = udp->udp_us; 435 436 retry: 437 if (next_priv_port < us->us_min_anonpriv_port || 438 next_priv_port >= IPPORT_RESERVED) { 439 next_priv_port = IPPORT_RESERVED - 1; 440 if (restart) 441 return (0); 442 restart = B_TRUE; 443 } 444 445 if (is_system_labeled() && 446 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 447 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 448 next_priv_port = nextport; 449 goto retry; 450 } 451 452 return (next_priv_port--); 453 } 454 455 /* 456 * Hash list removal routine for udp_t structures. 457 */ 458 static void 459 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 460 { 461 udp_t *udpnext; 462 kmutex_t *lockp; 463 udp_stack_t *us = udp->udp_us; 464 conn_t *connp = udp->udp_connp; 465 466 if (udp->udp_ptpbhn == NULL) 467 return; 468 469 /* 470 * Extract the lock pointer in case there are concurrent 471 * hash_remove's for this instance. 472 */ 473 ASSERT(connp->conn_lport != 0); 474 if (!caller_holds_lock) { 475 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 476 us->us_bind_fanout_size)].uf_lock; 477 ASSERT(lockp != NULL); 478 mutex_enter(lockp); 479 } 480 if (udp->udp_ptpbhn != NULL) { 481 udpnext = udp->udp_bind_hash; 482 if (udpnext != NULL) { 483 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 484 udp->udp_bind_hash = NULL; 485 } 486 *udp->udp_ptpbhn = udpnext; 487 udp->udp_ptpbhn = NULL; 488 } 489 if (!caller_holds_lock) { 490 mutex_exit(lockp); 491 } 492 } 493 494 static void 495 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 496 { 497 conn_t *connp = udp->udp_connp; 498 udp_t **udpp; 499 udp_t *udpnext; 500 conn_t *connext; 501 502 ASSERT(MUTEX_HELD(&uf->uf_lock)); 503 ASSERT(udp->udp_ptpbhn == NULL); 504 udpp = &uf->uf_udp; 505 udpnext = udpp[0]; 506 if (udpnext != NULL) { 507 /* 508 * If the new udp bound to the INADDR_ANY address 509 * and the first one in the list is not bound to 510 * INADDR_ANY we skip all entries until we find the 511 * first one bound to INADDR_ANY. 512 * This makes sure that applications binding to a 513 * specific address get preference over those binding to 514 * INADDR_ANY. 515 */ 516 connext = udpnext->udp_connp; 517 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 518 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 519 while ((udpnext = udpp[0]) != NULL && 520 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 521 udpp = &(udpnext->udp_bind_hash); 522 } 523 if (udpnext != NULL) 524 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 525 } else { 526 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 527 } 528 } 529 udp->udp_bind_hash = udpnext; 530 udp->udp_ptpbhn = udpp; 531 udpp[0] = udp; 532 } 533 534 /* 535 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 536 * passed to udp_wput. 537 * It associates a port number and local address with the stream. 538 * It calls IP to verify the local IP address, and calls IP to insert 539 * the conn_t in the fanout table. 540 * If everything is ok it then sends the T_BIND_ACK back up. 541 * 542 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 543 * without setting SO_REUSEADDR. This is needed so that they 544 * can be viewed as two independent transport protocols. 545 * However, anonymouns ports are allocated from the same range to avoid 546 * duplicating the us->us_next_port_to_try. 547 */ 548 static void 549 udp_tpi_bind(queue_t *q, mblk_t *mp) 550 { 551 sin_t *sin; 552 sin6_t *sin6; 553 mblk_t *mp1; 554 struct T_bind_req *tbr; 555 conn_t *connp; 556 udp_t *udp; 557 int error; 558 struct sockaddr *sa; 559 cred_t *cr; 560 561 /* 562 * All Solaris components should pass a db_credp 563 * for this TPI message, hence we ASSERT. 564 * But in case there is some other M_PROTO that looks 565 * like a TPI message sent by some other kernel 566 * component, we check and return an error. 567 */ 568 cr = msg_getcred(mp, NULL); 569 ASSERT(cr != NULL); 570 if (cr == NULL) { 571 udp_err_ack(q, mp, TSYSERR, EINVAL); 572 return; 573 } 574 575 connp = Q_TO_CONN(q); 576 udp = connp->conn_udp; 577 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 578 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 579 "udp_bind: bad req, len %u", 580 (uint_t)(mp->b_wptr - mp->b_rptr)); 581 udp_err_ack(q, mp, TPROTO, 0); 582 return; 583 } 584 if (udp->udp_state != TS_UNBND) { 585 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 586 "udp_bind: bad state, %u", udp->udp_state); 587 udp_err_ack(q, mp, TOUTSTATE, 0); 588 return; 589 } 590 /* 591 * Reallocate the message to make sure we have enough room for an 592 * address. 593 */ 594 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 595 if (mp1 == NULL) { 596 udp_err_ack(q, mp, TSYSERR, ENOMEM); 597 return; 598 } 599 600 mp = mp1; 601 602 /* Reset the message type in preparation for shipping it back. */ 603 DB_TYPE(mp) = M_PCPROTO; 604 605 tbr = (struct T_bind_req *)mp->b_rptr; 606 switch (tbr->ADDR_length) { 607 case 0: /* Request for a generic port */ 608 tbr->ADDR_offset = sizeof (struct T_bind_req); 609 if (connp->conn_family == AF_INET) { 610 tbr->ADDR_length = sizeof (sin_t); 611 sin = (sin_t *)&tbr[1]; 612 *sin = sin_null; 613 sin->sin_family = AF_INET; 614 mp->b_wptr = (uchar_t *)&sin[1]; 615 sa = (struct sockaddr *)sin; 616 } else { 617 ASSERT(connp->conn_family == AF_INET6); 618 tbr->ADDR_length = sizeof (sin6_t); 619 sin6 = (sin6_t *)&tbr[1]; 620 *sin6 = sin6_null; 621 sin6->sin6_family = AF_INET6; 622 mp->b_wptr = (uchar_t *)&sin6[1]; 623 sa = (struct sockaddr *)sin6; 624 } 625 break; 626 627 case sizeof (sin_t): /* Complete IPv4 address */ 628 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 629 sizeof (sin_t)); 630 if (sa == NULL || !OK_32PTR((char *)sa)) { 631 udp_err_ack(q, mp, TSYSERR, EINVAL); 632 return; 633 } 634 if (connp->conn_family != AF_INET || 635 sa->sa_family != AF_INET) { 636 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 637 return; 638 } 639 break; 640 641 case sizeof (sin6_t): /* complete IPv6 address */ 642 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 643 sizeof (sin6_t)); 644 if (sa == NULL || !OK_32PTR((char *)sa)) { 645 udp_err_ack(q, mp, TSYSERR, EINVAL); 646 return; 647 } 648 if (connp->conn_family != AF_INET6 || 649 sa->sa_family != AF_INET6) { 650 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 651 return; 652 } 653 break; 654 655 default: /* Invalid request */ 656 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 657 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 658 udp_err_ack(q, mp, TBADADDR, 0); 659 return; 660 } 661 662 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 663 tbr->PRIM_type != O_T_BIND_REQ); 664 665 if (error != 0) { 666 if (error > 0) { 667 udp_err_ack(q, mp, TSYSERR, error); 668 } else { 669 udp_err_ack(q, mp, -error, 0); 670 } 671 } else { 672 tbr->PRIM_type = T_BIND_ACK; 673 qreply(q, mp); 674 } 675 } 676 677 /* 678 * This routine handles each T_CONN_REQ message passed to udp. It 679 * associates a default destination address with the stream. 680 * 681 * After various error checks are completed, udp_connect() lays 682 * the target address and port into the composite header template. 683 * Then we ask IP for information, including a source address if we didn't 684 * already have one. Finally we send up the T_OK_ACK reply message. 685 */ 686 static void 687 udp_tpi_connect(queue_t *q, mblk_t *mp) 688 { 689 conn_t *connp = Q_TO_CONN(q); 690 int error; 691 socklen_t len; 692 struct sockaddr *sa; 693 struct T_conn_req *tcr; 694 cred_t *cr; 695 pid_t pid; 696 /* 697 * All Solaris components should pass a db_credp 698 * for this TPI message, hence we ASSERT. 699 * But in case there is some other M_PROTO that looks 700 * like a TPI message sent by some other kernel 701 * component, we check and return an error. 702 */ 703 cr = msg_getcred(mp, &pid); 704 ASSERT(cr != NULL); 705 if (cr == NULL) { 706 udp_err_ack(q, mp, TSYSERR, EINVAL); 707 return; 708 } 709 710 tcr = (struct T_conn_req *)mp->b_rptr; 711 712 /* A bit of sanity checking */ 713 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 714 udp_err_ack(q, mp, TPROTO, 0); 715 return; 716 } 717 718 if (tcr->OPT_length != 0) { 719 udp_err_ack(q, mp, TBADOPT, 0); 720 return; 721 } 722 723 /* 724 * Determine packet type based on type of address passed in 725 * the request should contain an IPv4 or IPv6 address. 726 * Make sure that address family matches the type of 727 * family of the address passed down. 728 */ 729 len = tcr->DEST_length; 730 switch (tcr->DEST_length) { 731 default: 732 udp_err_ack(q, mp, TBADADDR, 0); 733 return; 734 735 case sizeof (sin_t): 736 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 737 sizeof (sin_t)); 738 break; 739 740 case sizeof (sin6_t): 741 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 742 sizeof (sin6_t)); 743 break; 744 } 745 746 error = proto_verify_ip_addr(connp->conn_family, sa, len); 747 if (error != 0) { 748 udp_err_ack(q, mp, TSYSERR, error); 749 return; 750 } 751 752 error = udp_do_connect(connp, sa, len, cr, pid); 753 if (error != 0) { 754 if (error < 0) 755 udp_err_ack(q, mp, -error, 0); 756 else 757 udp_err_ack(q, mp, TSYSERR, error); 758 } else { 759 mblk_t *mp1; 760 /* 761 * We have to send a connection confirmation to 762 * keep TLI happy. 763 */ 764 if (connp->conn_family == AF_INET) { 765 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 766 sizeof (sin_t), NULL, 0); 767 } else { 768 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 769 sizeof (sin6_t), NULL, 0); 770 } 771 if (mp1 == NULL) { 772 udp_err_ack(q, mp, TSYSERR, ENOMEM); 773 return; 774 } 775 776 /* 777 * Send ok_ack for T_CONN_REQ 778 */ 779 mp = mi_tpi_ok_ack_alloc(mp); 780 if (mp == NULL) { 781 /* Unable to reuse the T_CONN_REQ for the ack. */ 782 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 783 return; 784 } 785 786 putnext(connp->conn_rq, mp); 787 putnext(connp->conn_rq, mp1); 788 } 789 } 790 791 /* ARGSUSED */ 792 static int 793 udp_tpi_close(queue_t *q, int flags, cred_t *credp __unused) 794 { 795 conn_t *connp; 796 797 if (flags & SO_FALLBACK) { 798 /* 799 * stream is being closed while in fallback 800 * simply free the resources that were allocated 801 */ 802 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 803 qprocsoff(q); 804 goto done; 805 } 806 807 connp = Q_TO_CONN(q); 808 udp_do_close(connp); 809 done: 810 q->q_ptr = WR(q)->q_ptr = NULL; 811 return (0); 812 } 813 814 static void 815 udp_close_free(conn_t *connp) 816 { 817 udp_t *udp = connp->conn_udp; 818 819 /* If there are any options associated with the stream, free them. */ 820 if (udp->udp_recv_ipp.ipp_fields != 0) 821 ip_pkt_free(&udp->udp_recv_ipp); 822 823 /* 824 * Clear any fields which the kmem_cache constructor clears. 825 * Only udp_connp needs to be preserved. 826 * TBD: We should make this more efficient to avoid clearing 827 * everything. 828 */ 829 ASSERT(udp->udp_connp == connp); 830 bzero(udp, sizeof (udp_t)); 831 udp->udp_connp = connp; 832 } 833 834 static int 835 udp_do_disconnect(conn_t *connp) 836 { 837 udp_t *udp; 838 udp_fanout_t *udpf; 839 udp_stack_t *us; 840 int error; 841 842 udp = connp->conn_udp; 843 us = udp->udp_us; 844 mutex_enter(&connp->conn_lock); 845 if (udp->udp_state != TS_DATA_XFER) { 846 mutex_exit(&connp->conn_lock); 847 return (-TOUTSTATE); 848 } 849 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 850 us->us_bind_fanout_size)]; 851 mutex_enter(&udpf->uf_lock); 852 if (connp->conn_mcbc_bind) 853 connp->conn_saddr_v6 = ipv6_all_zeros; 854 else 855 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 856 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 857 connp->conn_faddr_v6 = ipv6_all_zeros; 858 connp->conn_fport = 0; 859 udp->udp_state = TS_IDLE; 860 mutex_exit(&udpf->uf_lock); 861 862 /* Remove any remnants of mapped address binding */ 863 if (connp->conn_family == AF_INET6) 864 connp->conn_ipversion = IPV6_VERSION; 865 866 connp->conn_v6lastdst = ipv6_all_zeros; 867 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 868 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 869 mutex_exit(&connp->conn_lock); 870 if (error != 0) 871 return (error); 872 873 /* 874 * Tell IP to remove the full binding and revert 875 * to the local address binding. 876 */ 877 return (ip_laddr_fanout_insert(connp)); 878 } 879 880 static void 881 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 882 { 883 conn_t *connp = Q_TO_CONN(q); 884 int error; 885 886 /* 887 * Allocate the largest primitive we need to send back 888 * T_error_ack is > than T_ok_ack 889 */ 890 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 891 if (mp == NULL) { 892 /* Unable to reuse the T_DISCON_REQ for the ack. */ 893 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 894 return; 895 } 896 897 error = udp_do_disconnect(connp); 898 899 if (error != 0) { 900 if (error < 0) { 901 udp_err_ack(q, mp, -error, 0); 902 } else { 903 udp_err_ack(q, mp, TSYSERR, error); 904 } 905 } else { 906 mp = mi_tpi_ok_ack_alloc(mp); 907 ASSERT(mp != NULL); 908 qreply(q, mp); 909 } 910 } 911 912 int 913 udp_disconnect(conn_t *connp) 914 { 915 int error; 916 917 connp->conn_dgram_errind = B_FALSE; 918 error = udp_do_disconnect(connp); 919 if (error < 0) 920 error = proto_tlitosyserr(-error); 921 922 return (error); 923 } 924 925 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 926 static void 927 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 928 { 929 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 930 qreply(q, mp); 931 } 932 933 /* Shorthand to generate and send TPI error acks to our client */ 934 static void 935 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 936 t_scalar_t t_error, int sys_error) 937 { 938 struct T_error_ack *teackp; 939 940 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 941 M_PCPROTO, T_ERROR_ACK)) != NULL) { 942 teackp = (struct T_error_ack *)mp->b_rptr; 943 teackp->ERROR_prim = primitive; 944 teackp->TLI_error = t_error; 945 teackp->UNIX_error = sys_error; 946 qreply(q, mp); 947 } 948 } 949 950 /* At minimum we need 4 bytes of UDP header */ 951 #define ICMP_MIN_UDP_HDR 4 952 953 /* 954 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 955 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 956 * Assumes that IP has pulled up everything up to and including the ICMP header. 957 */ 958 /* ARGSUSED2 */ 959 static void 960 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 961 { 962 conn_t *connp = (conn_t *)arg1; 963 icmph_t *icmph; 964 ipha_t *ipha; 965 int iph_hdr_length; 966 udpha_t *udpha; 967 sin_t sin; 968 sin6_t sin6; 969 mblk_t *mp1; 970 int error = 0; 971 udp_t *udp = connp->conn_udp; 972 973 ipha = (ipha_t *)mp->b_rptr; 974 975 ASSERT(OK_32PTR(mp->b_rptr)); 976 977 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 978 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 979 udp_icmp_error_ipv6(connp, mp, ira); 980 return; 981 } 982 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 983 984 /* Skip past the outer IP and ICMP headers */ 985 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 986 iph_hdr_length = ira->ira_ip_hdr_length; 987 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 988 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 989 990 /* Skip past the inner IP and find the ULP header */ 991 iph_hdr_length = IPH_HDR_LENGTH(ipha); 992 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 993 994 switch (icmph->icmph_type) { 995 case ICMP_DEST_UNREACHABLE: 996 switch (icmph->icmph_code) { 997 case ICMP_FRAGMENTATION_NEEDED: { 998 ipha_t *ipha; 999 ip_xmit_attr_t *ixa; 1000 /* 1001 * IP has already adjusted the path MTU. 1002 * But we need to adjust DF for IPv4. 1003 */ 1004 if (connp->conn_ipversion != IPV4_VERSION) 1005 break; 1006 1007 ixa = conn_get_ixa(connp, B_FALSE); 1008 if (ixa == NULL || ixa->ixa_ire == NULL) { 1009 /* 1010 * Some other thread holds conn_ixa. We will 1011 * redo this on the next ICMP too big. 1012 */ 1013 if (ixa != NULL) 1014 ixa_refrele(ixa); 1015 break; 1016 } 1017 (void) ip_get_pmtu(ixa); 1018 1019 mutex_enter(&connp->conn_lock); 1020 ipha = (ipha_t *)connp->conn_ht_iphc; 1021 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1022 ipha->ipha_fragment_offset_and_flags |= 1023 IPH_DF_HTONS; 1024 } else { 1025 ipha->ipha_fragment_offset_and_flags &= 1026 ~IPH_DF_HTONS; 1027 } 1028 mutex_exit(&connp->conn_lock); 1029 ixa_refrele(ixa); 1030 break; 1031 } 1032 case ICMP_PORT_UNREACHABLE: 1033 case ICMP_PROTOCOL_UNREACHABLE: 1034 error = ECONNREFUSED; 1035 break; 1036 default: 1037 /* Transient errors */ 1038 break; 1039 } 1040 break; 1041 default: 1042 /* Transient errors */ 1043 break; 1044 } 1045 if (error == 0) { 1046 freemsg(mp); 1047 return; 1048 } 1049 1050 /* 1051 * Deliver T_UDERROR_IND when the application has asked for it. 1052 * The socket layer enables this automatically when connected. 1053 */ 1054 if (!connp->conn_dgram_errind) { 1055 freemsg(mp); 1056 return; 1057 } 1058 1059 switch (connp->conn_family) { 1060 case AF_INET: 1061 sin = sin_null; 1062 sin.sin_family = AF_INET; 1063 sin.sin_addr.s_addr = ipha->ipha_dst; 1064 sin.sin_port = udpha->uha_dst_port; 1065 if (IPCL_IS_NONSTR(connp)) { 1066 mutex_enter(&connp->conn_lock); 1067 if (udp->udp_state == TS_DATA_XFER) { 1068 if (sin.sin_port == connp->conn_fport && 1069 sin.sin_addr.s_addr == 1070 connp->conn_faddr_v4) { 1071 mutex_exit(&connp->conn_lock); 1072 (*connp->conn_upcalls->su_set_error) 1073 (connp->conn_upper_handle, error); 1074 goto done; 1075 } 1076 } else { 1077 udp->udp_delayed_error = error; 1078 *((sin_t *)&udp->udp_delayed_addr) = sin; 1079 } 1080 mutex_exit(&connp->conn_lock); 1081 } else { 1082 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1083 NULL, 0, error); 1084 if (mp1 != NULL) 1085 putnext(connp->conn_rq, mp1); 1086 } 1087 break; 1088 case AF_INET6: 1089 sin6 = sin6_null; 1090 sin6.sin6_family = AF_INET6; 1091 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1092 sin6.sin6_port = udpha->uha_dst_port; 1093 if (IPCL_IS_NONSTR(connp)) { 1094 mutex_enter(&connp->conn_lock); 1095 if (udp->udp_state == TS_DATA_XFER) { 1096 if (sin6.sin6_port == connp->conn_fport && 1097 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1098 &connp->conn_faddr_v6)) { 1099 mutex_exit(&connp->conn_lock); 1100 (*connp->conn_upcalls->su_set_error) 1101 (connp->conn_upper_handle, error); 1102 goto done; 1103 } 1104 } else { 1105 udp->udp_delayed_error = error; 1106 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1107 } 1108 mutex_exit(&connp->conn_lock); 1109 } else { 1110 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1111 NULL, 0, error); 1112 if (mp1 != NULL) 1113 putnext(connp->conn_rq, mp1); 1114 } 1115 break; 1116 } 1117 done: 1118 freemsg(mp); 1119 } 1120 1121 /* 1122 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1123 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1124 * Assumes that IP has pulled up all the extension headers as well as the 1125 * ICMPv6 header. 1126 */ 1127 static void 1128 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1129 { 1130 icmp6_t *icmp6; 1131 ip6_t *ip6h, *outer_ip6h; 1132 uint16_t iph_hdr_length; 1133 uint8_t *nexthdrp; 1134 udpha_t *udpha; 1135 sin6_t sin6; 1136 mblk_t *mp1; 1137 int error = 0; 1138 udp_t *udp = connp->conn_udp; 1139 udp_stack_t *us = udp->udp_us; 1140 1141 outer_ip6h = (ip6_t *)mp->b_rptr; 1142 #ifdef DEBUG 1143 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1144 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1145 else 1146 iph_hdr_length = IPV6_HDR_LEN; 1147 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1148 #endif 1149 /* Skip past the outer IP and ICMP headers */ 1150 iph_hdr_length = ira->ira_ip_hdr_length; 1151 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1152 1153 /* Skip past the inner IP and find the ULP header */ 1154 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1155 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1156 freemsg(mp); 1157 return; 1158 } 1159 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1160 1161 switch (icmp6->icmp6_type) { 1162 case ICMP6_DST_UNREACH: 1163 switch (icmp6->icmp6_code) { 1164 case ICMP6_DST_UNREACH_NOPORT: 1165 error = ECONNREFUSED; 1166 break; 1167 case ICMP6_DST_UNREACH_ADMIN: 1168 case ICMP6_DST_UNREACH_NOROUTE: 1169 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1170 case ICMP6_DST_UNREACH_ADDR: 1171 /* Transient errors */ 1172 break; 1173 default: 1174 break; 1175 } 1176 break; 1177 case ICMP6_PACKET_TOO_BIG: { 1178 struct T_unitdata_ind *tudi; 1179 struct T_opthdr *toh; 1180 size_t udi_size; 1181 mblk_t *newmp; 1182 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1183 sizeof (struct ip6_mtuinfo); 1184 sin6_t *sin6; 1185 struct ip6_mtuinfo *mtuinfo; 1186 1187 /* 1188 * If the application has requested to receive path mtu 1189 * information, send up an empty message containing an 1190 * IPV6_PATHMTU ancillary data item. 1191 */ 1192 if (!connp->conn_ipv6_recvpathmtu) 1193 break; 1194 1195 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1196 opt_length; 1197 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1198 UDPS_BUMP_MIB(us, udpInErrors); 1199 break; 1200 } 1201 1202 /* 1203 * newmp->b_cont is left to NULL on purpose. This is an 1204 * empty message containing only ancillary data. 1205 */ 1206 newmp->b_datap->db_type = M_PROTO; 1207 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1208 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1209 tudi->PRIM_type = T_UNITDATA_IND; 1210 tudi->SRC_length = sizeof (sin6_t); 1211 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1212 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1213 tudi->OPT_length = opt_length; 1214 1215 sin6 = (sin6_t *)&tudi[1]; 1216 bzero(sin6, sizeof (sin6_t)); 1217 sin6->sin6_family = AF_INET6; 1218 sin6->sin6_addr = connp->conn_faddr_v6; 1219 1220 toh = (struct T_opthdr *)&sin6[1]; 1221 toh->level = IPPROTO_IPV6; 1222 toh->name = IPV6_PATHMTU; 1223 toh->len = opt_length; 1224 toh->status = 0; 1225 1226 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1227 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1228 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1229 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1230 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1231 /* 1232 * We've consumed everything we need from the original 1233 * message. Free it, then send our empty message. 1234 */ 1235 freemsg(mp); 1236 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1237 return; 1238 } 1239 case ICMP6_TIME_EXCEEDED: 1240 /* Transient errors */ 1241 break; 1242 case ICMP6_PARAM_PROB: 1243 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1244 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1245 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1246 (uchar_t *)nexthdrp) { 1247 error = ECONNREFUSED; 1248 break; 1249 } 1250 break; 1251 } 1252 if (error == 0) { 1253 freemsg(mp); 1254 return; 1255 } 1256 1257 /* 1258 * Deliver T_UDERROR_IND when the application has asked for it. 1259 * The socket layer enables this automatically when connected. 1260 */ 1261 if (!connp->conn_dgram_errind) { 1262 freemsg(mp); 1263 return; 1264 } 1265 1266 sin6 = sin6_null; 1267 sin6.sin6_family = AF_INET6; 1268 sin6.sin6_addr = ip6h->ip6_dst; 1269 sin6.sin6_port = udpha->uha_dst_port; 1270 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1271 1272 if (IPCL_IS_NONSTR(connp)) { 1273 mutex_enter(&connp->conn_lock); 1274 if (udp->udp_state == TS_DATA_XFER) { 1275 if (sin6.sin6_port == connp->conn_fport && 1276 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1277 &connp->conn_faddr_v6)) { 1278 mutex_exit(&connp->conn_lock); 1279 (*connp->conn_upcalls->su_set_error) 1280 (connp->conn_upper_handle, error); 1281 goto done; 1282 } 1283 } else { 1284 udp->udp_delayed_error = error; 1285 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1286 } 1287 mutex_exit(&connp->conn_lock); 1288 } else { 1289 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1290 NULL, 0, error); 1291 if (mp1 != NULL) 1292 putnext(connp->conn_rq, mp1); 1293 } 1294 done: 1295 freemsg(mp); 1296 } 1297 1298 /* 1299 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1300 * The local address is filled in if endpoint is bound. The remote address 1301 * is filled in if remote address has been precified ("connected endpoint") 1302 * (The concept of connected CLTS sockets is alien to published TPI 1303 * but we support it anyway). 1304 */ 1305 static void 1306 udp_addr_req(queue_t *q, mblk_t *mp) 1307 { 1308 struct sockaddr *sa; 1309 mblk_t *ackmp; 1310 struct T_addr_ack *taa; 1311 udp_t *udp = Q_TO_UDP(q); 1312 conn_t *connp = udp->udp_connp; 1313 uint_t addrlen; 1314 1315 /* Make it large enough for worst case */ 1316 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1317 2 * sizeof (sin6_t), 1); 1318 if (ackmp == NULL) { 1319 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1320 return; 1321 } 1322 taa = (struct T_addr_ack *)ackmp->b_rptr; 1323 1324 bzero(taa, sizeof (struct T_addr_ack)); 1325 ackmp->b_wptr = (uchar_t *)&taa[1]; 1326 1327 taa->PRIM_type = T_ADDR_ACK; 1328 ackmp->b_datap->db_type = M_PCPROTO; 1329 1330 if (connp->conn_family == AF_INET) 1331 addrlen = sizeof (sin_t); 1332 else 1333 addrlen = sizeof (sin6_t); 1334 1335 mutex_enter(&connp->conn_lock); 1336 /* 1337 * Note: Following code assumes 32 bit alignment of basic 1338 * data structures like sin_t and struct T_addr_ack. 1339 */ 1340 if (udp->udp_state != TS_UNBND) { 1341 /* 1342 * Fill in local address first 1343 */ 1344 taa->LOCADDR_offset = sizeof (*taa); 1345 taa->LOCADDR_length = addrlen; 1346 sa = (struct sockaddr *)&taa[1]; 1347 (void) conn_getsockname(connp, sa, &addrlen); 1348 ackmp->b_wptr += addrlen; 1349 } 1350 if (udp->udp_state == TS_DATA_XFER) { 1351 /* 1352 * connected, fill remote address too 1353 */ 1354 taa->REMADDR_length = addrlen; 1355 /* assumed 32-bit alignment */ 1356 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1357 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1358 (void) conn_getpeername(connp, sa, &addrlen); 1359 ackmp->b_wptr += addrlen; 1360 } 1361 mutex_exit(&connp->conn_lock); 1362 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1363 qreply(q, ackmp); 1364 } 1365 1366 static void 1367 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1368 { 1369 conn_t *connp = udp->udp_connp; 1370 1371 if (connp->conn_family == AF_INET) { 1372 *tap = udp_g_t_info_ack_ipv4; 1373 } else { 1374 *tap = udp_g_t_info_ack_ipv6; 1375 } 1376 tap->CURRENT_state = udp->udp_state; 1377 tap->OPT_size = udp_max_optsize; 1378 } 1379 1380 static void 1381 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1382 t_uscalar_t cap_bits1) 1383 { 1384 tcap->CAP_bits1 = 0; 1385 1386 if (cap_bits1 & TC1_INFO) { 1387 udp_copy_info(&tcap->INFO_ack, udp); 1388 tcap->CAP_bits1 |= TC1_INFO; 1389 } 1390 } 1391 1392 /* 1393 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1394 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1395 * udp_g_t_info_ack. The current state of the stream is copied from 1396 * udp_state. 1397 */ 1398 static void 1399 udp_capability_req(queue_t *q, mblk_t *mp) 1400 { 1401 t_uscalar_t cap_bits1; 1402 struct T_capability_ack *tcap; 1403 udp_t *udp = Q_TO_UDP(q); 1404 1405 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1406 1407 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1408 mp->b_datap->db_type, T_CAPABILITY_ACK); 1409 if (!mp) 1410 return; 1411 1412 tcap = (struct T_capability_ack *)mp->b_rptr; 1413 udp_do_capability_ack(udp, tcap, cap_bits1); 1414 1415 qreply(q, mp); 1416 } 1417 1418 /* 1419 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1420 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1421 * The current state of the stream is copied from udp_state. 1422 */ 1423 static void 1424 udp_info_req(queue_t *q, mblk_t *mp) 1425 { 1426 udp_t *udp = Q_TO_UDP(q); 1427 1428 /* Create a T_INFO_ACK message. */ 1429 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1430 T_INFO_ACK); 1431 if (!mp) 1432 return; 1433 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1434 qreply(q, mp); 1435 } 1436 1437 /* For /dev/udp aka AF_INET open */ 1438 static int 1439 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1440 { 1441 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1442 } 1443 1444 /* For /dev/udp6 aka AF_INET6 open */ 1445 static int 1446 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1447 { 1448 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1449 } 1450 1451 /* 1452 * This is the open routine for udp. It allocates a udp_t structure for 1453 * the stream and, on the first open of the module, creates an ND table. 1454 */ 1455 static int 1456 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1457 boolean_t isv6) 1458 { 1459 udp_t *udp; 1460 conn_t *connp; 1461 dev_t conn_dev; 1462 vmem_t *minor_arena; 1463 int err; 1464 1465 /* If the stream is already open, return immediately. */ 1466 if (q->q_ptr != NULL) 1467 return (0); 1468 1469 if (sflag == MODOPEN) 1470 return (EINVAL); 1471 1472 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1473 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1474 minor_arena = ip_minor_arena_la; 1475 } else { 1476 /* 1477 * Either minor numbers in the large arena were exhausted 1478 * or a non socket application is doing the open. 1479 * Try to allocate from the small arena. 1480 */ 1481 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1482 return (EBUSY); 1483 1484 minor_arena = ip_minor_arena_sa; 1485 } 1486 1487 if (flag & SO_FALLBACK) { 1488 /* 1489 * Non streams socket needs a stream to fallback to 1490 */ 1491 RD(q)->q_ptr = (void *)conn_dev; 1492 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1493 WR(q)->q_ptr = (void *)minor_arena; 1494 qprocson(q); 1495 return (0); 1496 } 1497 1498 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1499 if (connp == NULL) { 1500 inet_minor_free(minor_arena, conn_dev); 1501 return (err); 1502 } 1503 udp = connp->conn_udp; 1504 1505 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1506 connp->conn_dev = conn_dev; 1507 connp->conn_minor_arena = minor_arena; 1508 1509 /* 1510 * Initialize the udp_t structure for this stream. 1511 */ 1512 q->q_ptr = connp; 1513 WR(q)->q_ptr = connp; 1514 connp->conn_rq = q; 1515 connp->conn_wq = WR(q); 1516 1517 /* 1518 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1519 * need to lock anything. 1520 */ 1521 ASSERT(connp->conn_proto == IPPROTO_UDP); 1522 ASSERT(connp->conn_udp == udp); 1523 ASSERT(udp->udp_connp == connp); 1524 1525 if (flag & SO_SOCKSTR) { 1526 udp->udp_issocket = B_TRUE; 1527 } 1528 1529 WR(q)->q_hiwat = connp->conn_sndbuf; 1530 WR(q)->q_lowat = connp->conn_sndlowat; 1531 1532 qprocson(q); 1533 1534 /* Set the Stream head write offset and high watermark. */ 1535 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1536 (void) proto_set_rx_hiwat(q, connp, 1537 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1538 1539 mutex_enter(&connp->conn_lock); 1540 connp->conn_state_flags &= ~CONN_INCIPIENT; 1541 mutex_exit(&connp->conn_lock); 1542 return (0); 1543 } 1544 1545 /* 1546 * Which UDP options OK to set through T_UNITDATA_REQ... 1547 */ 1548 /* ARGSUSED */ 1549 static boolean_t 1550 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1551 { 1552 return (B_TRUE); 1553 } 1554 1555 /* 1556 * This routine gets default values of certain options whose default 1557 * values are maintained by protcol specific code 1558 */ 1559 int 1560 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1561 { 1562 udp_t *udp = Q_TO_UDP(q); 1563 udp_stack_t *us = udp->udp_us; 1564 int *i1 = (int *)ptr; 1565 1566 switch (level) { 1567 case IPPROTO_IP: 1568 switch (name) { 1569 case IP_MULTICAST_TTL: 1570 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1571 return (sizeof (uchar_t)); 1572 case IP_MULTICAST_LOOP: 1573 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1574 return (sizeof (uchar_t)); 1575 } 1576 break; 1577 case IPPROTO_IPV6: 1578 switch (name) { 1579 case IPV6_MULTICAST_HOPS: 1580 *i1 = IP_DEFAULT_MULTICAST_TTL; 1581 return (sizeof (int)); 1582 case IPV6_MULTICAST_LOOP: 1583 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1584 return (sizeof (int)); 1585 case IPV6_UNICAST_HOPS: 1586 *i1 = us->us_ipv6_hoplimit; 1587 return (sizeof (int)); 1588 } 1589 break; 1590 } 1591 return (-1); 1592 } 1593 1594 /* 1595 * This routine retrieves the current status of socket options. 1596 * It returns the size of the option retrieved, or -1. 1597 */ 1598 int 1599 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1600 uchar_t *ptr) 1601 { 1602 int *i1 = (int *)ptr; 1603 udp_t *udp = connp->conn_udp; 1604 int len; 1605 conn_opt_arg_t coas; 1606 int retval; 1607 1608 coas.coa_connp = connp; 1609 coas.coa_ixa = connp->conn_ixa; 1610 coas.coa_ipp = &connp->conn_xmit_ipp; 1611 coas.coa_ancillary = B_FALSE; 1612 coas.coa_changed = 0; 1613 1614 /* 1615 * We assume that the optcom framework has checked for the set 1616 * of levels and names that are supported, hence we don't worry 1617 * about rejecting based on that. 1618 * First check for UDP specific handling, then pass to common routine. 1619 */ 1620 switch (level) { 1621 case IPPROTO_IP: 1622 /* 1623 * Only allow IPv4 option processing on IPv4 sockets. 1624 */ 1625 if (connp->conn_family != AF_INET) 1626 return (-1); 1627 1628 switch (name) { 1629 case IP_OPTIONS: 1630 case T_IP_OPTIONS: 1631 mutex_enter(&connp->conn_lock); 1632 if (!(udp->udp_recv_ipp.ipp_fields & 1633 IPPF_IPV4_OPTIONS)) { 1634 mutex_exit(&connp->conn_lock); 1635 return (0); 1636 } 1637 1638 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1639 ASSERT(len != 0); 1640 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1641 mutex_exit(&connp->conn_lock); 1642 return (len); 1643 } 1644 break; 1645 case IPPROTO_UDP: 1646 switch (name) { 1647 case UDP_NAT_T_ENDPOINT: 1648 mutex_enter(&connp->conn_lock); 1649 *i1 = udp->udp_nat_t_endpoint; 1650 mutex_exit(&connp->conn_lock); 1651 return (sizeof (int)); 1652 case UDP_RCVHDR: 1653 mutex_enter(&connp->conn_lock); 1654 *i1 = udp->udp_rcvhdr ? 1 : 0; 1655 mutex_exit(&connp->conn_lock); 1656 return (sizeof (int)); 1657 case UDP_SRCPORT_HASH: 1658 mutex_enter(&connp->conn_lock); 1659 *i1 = udp->udp_vxlanhash; 1660 mutex_exit(&connp->conn_lock); 1661 return (sizeof (int)); 1662 } 1663 } 1664 mutex_enter(&connp->conn_lock); 1665 retval = conn_opt_get(&coas, level, name, ptr); 1666 mutex_exit(&connp->conn_lock); 1667 return (retval); 1668 } 1669 1670 /* 1671 * This routine retrieves the current status of socket options. 1672 * It returns the size of the option retrieved, or -1. 1673 */ 1674 int 1675 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1676 { 1677 conn_t *connp = Q_TO_CONN(q); 1678 int err; 1679 1680 err = udp_opt_get(connp, level, name, ptr); 1681 return (err); 1682 } 1683 1684 /* 1685 * This routine sets socket options. 1686 */ 1687 int 1688 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1689 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1690 { 1691 conn_t *connp = coa->coa_connp; 1692 ip_xmit_attr_t *ixa = coa->coa_ixa; 1693 udp_t *udp = connp->conn_udp; 1694 udp_stack_t *us = udp->udp_us; 1695 int *i1 = (int *)invalp; 1696 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1697 int error; 1698 1699 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1700 /* 1701 * First do UDP specific sanity checks and handle UDP specific 1702 * options. Note that some IPPROTO_UDP options are handled 1703 * by conn_opt_set. 1704 */ 1705 switch (level) { 1706 case SOL_SOCKET: 1707 switch (name) { 1708 case SO_SNDBUF: 1709 if (*i1 > us->us_max_buf) { 1710 return (ENOBUFS); 1711 } 1712 break; 1713 case SO_RCVBUF: 1714 if (*i1 > us->us_max_buf) { 1715 return (ENOBUFS); 1716 } 1717 break; 1718 1719 case SCM_UCRED: { 1720 struct ucred_s *ucr; 1721 cred_t *newcr; 1722 ts_label_t *tsl; 1723 1724 /* 1725 * Only sockets that have proper privileges and are 1726 * bound to MLPs will have any other value here, so 1727 * this implicitly tests for privilege to set label. 1728 */ 1729 if (connp->conn_mlp_type == mlptSingle) 1730 break; 1731 1732 ucr = (struct ucred_s *)invalp; 1733 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1734 ucr->uc_labeloff < sizeof (*ucr) || 1735 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1736 return (EINVAL); 1737 if (!checkonly) { 1738 /* 1739 * Set ixa_tsl to the new label. 1740 * We assume that crgetzoneid doesn't change 1741 * as part of the SCM_UCRED. 1742 */ 1743 ASSERT(cr != NULL); 1744 if ((tsl = crgetlabel(cr)) == NULL) 1745 return (EINVAL); 1746 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1747 tsl->tsl_doi, KM_NOSLEEP); 1748 if (newcr == NULL) 1749 return (ENOSR); 1750 ASSERT(newcr->cr_label != NULL); 1751 /* 1752 * Move the hold on the cr_label to ixa_tsl by 1753 * setting cr_label to NULL. Then release newcr. 1754 */ 1755 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1756 ixa->ixa_flags |= IXAF_UCRED_TSL; 1757 newcr->cr_label = NULL; 1758 crfree(newcr); 1759 coa->coa_changed |= COA_HEADER_CHANGED; 1760 coa->coa_changed |= COA_WROFF_CHANGED; 1761 } 1762 /* Fully handled this option. */ 1763 return (0); 1764 } 1765 } 1766 break; 1767 case IPPROTO_UDP: 1768 switch (name) { 1769 case UDP_NAT_T_ENDPOINT: 1770 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1771 return (error); 1772 } 1773 1774 /* 1775 * Use conn_family instead so we can avoid ambiguitites 1776 * with AF_INET6 sockets that may switch from IPv4 1777 * to IPv6. 1778 */ 1779 if (connp->conn_family != AF_INET) { 1780 return (EAFNOSUPPORT); 1781 } 1782 1783 if (!checkonly) { 1784 mutex_enter(&connp->conn_lock); 1785 udp->udp_nat_t_endpoint = onoff; 1786 mutex_exit(&connp->conn_lock); 1787 coa->coa_changed |= COA_HEADER_CHANGED; 1788 coa->coa_changed |= COA_WROFF_CHANGED; 1789 } 1790 /* Fully handled this option. */ 1791 return (0); 1792 case UDP_RCVHDR: 1793 mutex_enter(&connp->conn_lock); 1794 udp->udp_rcvhdr = onoff; 1795 mutex_exit(&connp->conn_lock); 1796 return (0); 1797 case UDP_SRCPORT_HASH: 1798 /* 1799 * This should have already been verified, but double 1800 * check. 1801 */ 1802 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1803 return (error); 1804 } 1805 1806 /* First see if the val is something we understand */ 1807 if (*i1 != UDP_HASH_DISABLE && *i1 != UDP_HASH_VXLAN) 1808 return (EINVAL); 1809 1810 if (!checkonly) { 1811 mutex_enter(&connp->conn_lock); 1812 udp->udp_vxlanhash = *i1; 1813 mutex_exit(&connp->conn_lock); 1814 } 1815 /* Fully handled this option. */ 1816 return (0); 1817 } 1818 break; 1819 } 1820 error = conn_opt_set(coa, level, name, inlen, invalp, 1821 checkonly, cr); 1822 return (error); 1823 } 1824 1825 /* 1826 * This routine sets socket options. 1827 */ 1828 int 1829 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1830 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1831 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1832 { 1833 udp_t *udp = connp->conn_udp; 1834 int err; 1835 conn_opt_arg_t coas, *coa; 1836 boolean_t checkonly; 1837 udp_stack_t *us = udp->udp_us; 1838 1839 switch (optset_context) { 1840 case SETFN_OPTCOM_CHECKONLY: 1841 checkonly = B_TRUE; 1842 /* 1843 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1844 * inlen != 0 implies value supplied and 1845 * we have to "pretend" to set it. 1846 * inlen == 0 implies that there is no 1847 * value part in T_CHECK request and just validation 1848 * done elsewhere should be enough, we just return here. 1849 */ 1850 if (inlen == 0) { 1851 *outlenp = 0; 1852 return (0); 1853 } 1854 break; 1855 case SETFN_OPTCOM_NEGOTIATE: 1856 checkonly = B_FALSE; 1857 break; 1858 case SETFN_UD_NEGOTIATE: 1859 case SETFN_CONN_NEGOTIATE: 1860 checkonly = B_FALSE; 1861 /* 1862 * Negotiating local and "association-related" options 1863 * through T_UNITDATA_REQ. 1864 * 1865 * Following routine can filter out ones we do not 1866 * want to be "set" this way. 1867 */ 1868 if (!udp_opt_allow_udr_set(level, name)) { 1869 *outlenp = 0; 1870 return (EINVAL); 1871 } 1872 break; 1873 default: 1874 /* 1875 * We should never get here 1876 */ 1877 *outlenp = 0; 1878 return (EINVAL); 1879 } 1880 1881 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1882 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1883 1884 if (thisdg_attrs != NULL) { 1885 /* Options from T_UNITDATA_REQ */ 1886 coa = (conn_opt_arg_t *)thisdg_attrs; 1887 ASSERT(coa->coa_connp == connp); 1888 ASSERT(coa->coa_ixa != NULL); 1889 ASSERT(coa->coa_ipp != NULL); 1890 ASSERT(coa->coa_ancillary); 1891 } else { 1892 coa = &coas; 1893 coas.coa_connp = connp; 1894 /* Get a reference on conn_ixa to prevent concurrent mods */ 1895 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1896 if (coas.coa_ixa == NULL) { 1897 *outlenp = 0; 1898 return (ENOMEM); 1899 } 1900 coas.coa_ipp = &connp->conn_xmit_ipp; 1901 coas.coa_ancillary = B_FALSE; 1902 coas.coa_changed = 0; 1903 } 1904 1905 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1906 cr, checkonly); 1907 if (err != 0) { 1908 errout: 1909 if (!coa->coa_ancillary) 1910 ixa_refrele(coa->coa_ixa); 1911 *outlenp = 0; 1912 return (err); 1913 } 1914 /* Handle DHCPINIT here outside of lock */ 1915 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1916 uint_t ifindex; 1917 ill_t *ill; 1918 1919 ifindex = *(uint_t *)invalp; 1920 if (ifindex == 0) { 1921 ill = NULL; 1922 } else { 1923 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1924 coa->coa_ixa->ixa_ipst); 1925 if (ill == NULL) { 1926 err = ENXIO; 1927 goto errout; 1928 } 1929 1930 mutex_enter(&ill->ill_lock); 1931 if (ill->ill_state_flags & ILL_CONDEMNED) { 1932 mutex_exit(&ill->ill_lock); 1933 ill_refrele(ill); 1934 err = ENXIO; 1935 goto errout; 1936 } 1937 if (IS_VNI(ill)) { 1938 mutex_exit(&ill->ill_lock); 1939 ill_refrele(ill); 1940 err = EINVAL; 1941 goto errout; 1942 } 1943 } 1944 mutex_enter(&connp->conn_lock); 1945 1946 if (connp->conn_dhcpinit_ill != NULL) { 1947 /* 1948 * We've locked the conn so conn_cleanup_ill() 1949 * cannot clear conn_dhcpinit_ill -- so it's 1950 * safe to access the ill. 1951 */ 1952 ill_t *oill = connp->conn_dhcpinit_ill; 1953 1954 ASSERT(oill->ill_dhcpinit != 0); 1955 atomic_dec_32(&oill->ill_dhcpinit); 1956 ill_set_inputfn(connp->conn_dhcpinit_ill); 1957 connp->conn_dhcpinit_ill = NULL; 1958 } 1959 1960 if (ill != NULL) { 1961 connp->conn_dhcpinit_ill = ill; 1962 atomic_inc_32(&ill->ill_dhcpinit); 1963 ill_set_inputfn(ill); 1964 mutex_exit(&connp->conn_lock); 1965 mutex_exit(&ill->ill_lock); 1966 ill_refrele(ill); 1967 } else { 1968 mutex_exit(&connp->conn_lock); 1969 } 1970 } 1971 1972 /* 1973 * Common case of OK return with outval same as inval. 1974 */ 1975 if (invalp != outvalp) { 1976 /* don't trust bcopy for identical src/dst */ 1977 (void) bcopy(invalp, outvalp, inlen); 1978 } 1979 *outlenp = inlen; 1980 1981 /* 1982 * If this was not ancillary data, then we rebuild the headers, 1983 * update the IRE/NCE, and IPsec as needed. 1984 * Since the label depends on the destination we go through 1985 * ip_set_destination first. 1986 */ 1987 if (coa->coa_ancillary) { 1988 return (0); 1989 } 1990 1991 if (coa->coa_changed & COA_ROUTE_CHANGED) { 1992 in6_addr_t saddr, faddr, nexthop; 1993 in_port_t fport; 1994 1995 /* 1996 * We clear lastdst to make sure we pick up the change 1997 * next time sending. 1998 * If we are connected we re-cache the information. 1999 * We ignore errors to preserve BSD behavior. 2000 * Note that we don't redo IPsec policy lookup here 2001 * since the final destination (or source) didn't change. 2002 */ 2003 mutex_enter(&connp->conn_lock); 2004 connp->conn_v6lastdst = ipv6_all_zeros; 2005 2006 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2007 &connp->conn_faddr_v6, &nexthop); 2008 saddr = connp->conn_saddr_v6; 2009 faddr = connp->conn_faddr_v6; 2010 fport = connp->conn_fport; 2011 mutex_exit(&connp->conn_lock); 2012 2013 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2014 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2015 (void) ip_attr_connect(connp, coa->coa_ixa, 2016 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2017 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2018 } 2019 } 2020 2021 ixa_refrele(coa->coa_ixa); 2022 2023 if (coa->coa_changed & COA_HEADER_CHANGED) { 2024 /* 2025 * Rebuild the header template if we are connected. 2026 * Otherwise clear conn_v6lastdst so we rebuild the header 2027 * in the data path. 2028 */ 2029 mutex_enter(&connp->conn_lock); 2030 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2031 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2032 err = udp_build_hdr_template(connp, 2033 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2034 connp->conn_fport, connp->conn_flowinfo); 2035 if (err != 0) { 2036 mutex_exit(&connp->conn_lock); 2037 return (err); 2038 } 2039 } else { 2040 connp->conn_v6lastdst = ipv6_all_zeros; 2041 } 2042 mutex_exit(&connp->conn_lock); 2043 } 2044 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2045 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2046 connp->conn_rcvbuf); 2047 } 2048 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2049 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2050 } 2051 if (coa->coa_changed & COA_WROFF_CHANGED) { 2052 /* Increase wroff if needed */ 2053 uint_t wroff; 2054 2055 mutex_enter(&connp->conn_lock); 2056 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2057 if (udp->udp_nat_t_endpoint) 2058 wroff += sizeof (uint32_t); 2059 if (wroff > connp->conn_wroff) { 2060 connp->conn_wroff = wroff; 2061 mutex_exit(&connp->conn_lock); 2062 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2063 } else { 2064 mutex_exit(&connp->conn_lock); 2065 } 2066 } 2067 return (err); 2068 } 2069 2070 /* This routine sets socket options. */ 2071 int 2072 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2073 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2074 void *thisdg_attrs, cred_t *cr) 2075 { 2076 conn_t *connp = Q_TO_CONN(q); 2077 int error; 2078 2079 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2080 outlenp, outvalp, thisdg_attrs, cr); 2081 return (error); 2082 } 2083 2084 /* 2085 * Setup IP and UDP headers. 2086 * Returns NULL on allocation failure, in which case data_mp is freed. 2087 */ 2088 mblk_t * 2089 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2090 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2091 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2092 { 2093 mblk_t *mp; 2094 udpha_t *udpha; 2095 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2096 uint_t data_len; 2097 uint32_t cksum; 2098 udp_t *udp = connp->conn_udp; 2099 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2100 boolean_t hash_srcport = udp->udp_vxlanhash; 2101 uint_t ulp_hdr_len; 2102 uint16_t srcport; 2103 2104 data_len = msgdsize(data_mp); 2105 ulp_hdr_len = UDPH_SIZE; 2106 if (insert_spi) 2107 ulp_hdr_len += sizeof (uint32_t); 2108 2109 /* 2110 * If we have source port hashing going on, determine the hash before 2111 * we modify the mblk_t. 2112 */ 2113 if (hash_srcport == B_TRUE) { 2114 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 2115 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 2116 ntohs(connp->conn_lport)); 2117 } 2118 2119 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2120 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2121 if (mp == NULL) { 2122 ASSERT(*errorp != 0); 2123 return (NULL); 2124 } 2125 2126 data_len += ulp_hdr_len; 2127 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2128 2129 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2130 if (hash_srcport == B_TRUE) { 2131 udpha->uha_src_port = htons(srcport); 2132 } else { 2133 udpha->uha_src_port = connp->conn_lport; 2134 } 2135 udpha->uha_dst_port = dstport; 2136 udpha->uha_checksum = 0; 2137 udpha->uha_length = htons(data_len); 2138 2139 /* 2140 * If there was a routing option/header then conn_prepend_hdr 2141 * has massaged it and placed the pseudo-header checksum difference 2142 * in the cksum argument. 2143 * 2144 * Setup header length and prepare for ULP checksum done in IP. 2145 * 2146 * We make it easy for IP to include our pseudo header 2147 * by putting our length in uha_checksum. 2148 * The IP source, destination, and length have already been set by 2149 * conn_prepend_hdr. 2150 */ 2151 cksum += data_len; 2152 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2153 ASSERT(cksum < 0x10000); 2154 2155 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2156 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2157 2158 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2159 2160 /* IP does the checksum if uha_checksum is non-zero */ 2161 if (us->us_do_checksum) { 2162 if (cksum == 0) 2163 udpha->uha_checksum = 0xffff; 2164 else 2165 udpha->uha_checksum = htons(cksum); 2166 } else { 2167 udpha->uha_checksum = 0; 2168 } 2169 } else { 2170 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2171 2172 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2173 if (cksum == 0) 2174 udpha->uha_checksum = 0xffff; 2175 else 2176 udpha->uha_checksum = htons(cksum); 2177 } 2178 2179 /* Insert all-0s SPI now. */ 2180 if (insert_spi) 2181 *((uint32_t *)(udpha + 1)) = 0; 2182 2183 return (mp); 2184 } 2185 2186 static int 2187 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2188 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2189 { 2190 udpha_t *udpha; 2191 int error; 2192 2193 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2194 /* 2195 * We clear lastdst to make sure we don't use the lastdst path 2196 * next time sending since we might not have set v6dst yet. 2197 */ 2198 connp->conn_v6lastdst = ipv6_all_zeros; 2199 2200 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2201 flowinfo); 2202 if (error != 0) 2203 return (error); 2204 2205 /* 2206 * Any routing header/option has been massaged. The checksum difference 2207 * is stored in conn_sum. 2208 */ 2209 udpha = (udpha_t *)connp->conn_ht_ulp; 2210 udpha->uha_src_port = connp->conn_lport; 2211 udpha->uha_dst_port = dstport; 2212 udpha->uha_checksum = 0; 2213 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2214 return (0); 2215 } 2216 2217 static mblk_t * 2218 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2219 { 2220 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2221 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2222 /* 2223 * fallback has started but messages have not been moved yet 2224 */ 2225 if (udp->udp_fallback_queue_head == NULL) { 2226 ASSERT(udp->udp_fallback_queue_tail == NULL); 2227 udp->udp_fallback_queue_head = mp; 2228 udp->udp_fallback_queue_tail = mp; 2229 } else { 2230 ASSERT(udp->udp_fallback_queue_tail != NULL); 2231 udp->udp_fallback_queue_tail->b_next = mp; 2232 udp->udp_fallback_queue_tail = mp; 2233 } 2234 return (NULL); 2235 } else { 2236 /* 2237 * Fallback completed, let the caller putnext() the mblk. 2238 */ 2239 return (mp); 2240 } 2241 } 2242 2243 /* 2244 * Deliver data to ULP. In case we have a socket, and it's falling back to 2245 * TPI, then we'll queue the mp for later processing. 2246 */ 2247 static void 2248 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2249 { 2250 if (IPCL_IS_NONSTR(connp)) { 2251 udp_t *udp = connp->conn_udp; 2252 int error; 2253 2254 ASSERT(len == msgdsize(mp)); 2255 if ((*connp->conn_upcalls->su_recv) 2256 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2257 mutex_enter(&udp->udp_recv_lock); 2258 if (error == ENOSPC) { 2259 /* 2260 * let's confirm while holding the lock 2261 */ 2262 if ((*connp->conn_upcalls->su_recv) 2263 (connp->conn_upper_handle, NULL, 0, 0, 2264 &error, NULL) < 0) { 2265 ASSERT(error == ENOSPC); 2266 if (error == ENOSPC) { 2267 connp->conn_flow_cntrld = 2268 B_TRUE; 2269 } 2270 } 2271 mutex_exit(&udp->udp_recv_lock); 2272 } else { 2273 ASSERT(error == EOPNOTSUPP); 2274 mp = udp_queue_fallback(udp, mp); 2275 mutex_exit(&udp->udp_recv_lock); 2276 if (mp != NULL) 2277 putnext(connp->conn_rq, mp); 2278 } 2279 } 2280 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2281 } else { 2282 if (is_system_labeled()) { 2283 ASSERT(ira->ira_cred != NULL); 2284 /* 2285 * Provide for protocols above UDP such as RPC 2286 * NOPID leaves db_cpid unchanged. 2287 */ 2288 mblk_setcred(mp, ira->ira_cred, NOPID); 2289 } 2290 2291 putnext(connp->conn_rq, mp); 2292 } 2293 } 2294 2295 /* 2296 * This is the inbound data path. 2297 * IP has already pulled up the IP plus UDP headers and verified alignment 2298 * etc. 2299 */ 2300 /* ARGSUSED2 */ 2301 static void 2302 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2303 { 2304 conn_t *connp = (conn_t *)arg1; 2305 struct T_unitdata_ind *tudi; 2306 uchar_t *rptr; /* Pointer to IP header */ 2307 int hdr_length; /* Length of IP+UDP headers */ 2308 int udi_size; /* Size of T_unitdata_ind */ 2309 int pkt_len; 2310 udp_t *udp; 2311 udpha_t *udpha; 2312 ip_pkt_t ipps; 2313 ip6_t *ip6h; 2314 mblk_t *mp1; 2315 uint32_t udp_ipv4_options_len; 2316 crb_t recv_ancillary; 2317 udp_stack_t *us; 2318 2319 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2320 2321 udp = connp->conn_udp; 2322 us = udp->udp_us; 2323 rptr = mp->b_rptr; 2324 2325 ASSERT(DB_TYPE(mp) == M_DATA); 2326 ASSERT(OK_32PTR(rptr)); 2327 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2328 pkt_len = ira->ira_pktlen; 2329 2330 /* 2331 * Get a snapshot of these and allow other threads to change 2332 * them after that. We need the same recv_ancillary when determining 2333 * the size as when adding the ancillary data items. 2334 */ 2335 mutex_enter(&connp->conn_lock); 2336 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2337 recv_ancillary = connp->conn_recv_ancillary; 2338 mutex_exit(&connp->conn_lock); 2339 2340 hdr_length = ira->ira_ip_hdr_length; 2341 2342 /* 2343 * IP inspected the UDP header thus all of it must be in the mblk. 2344 * UDP length check is performed for IPv6 packets and IPv4 packets 2345 * to check if the size of the packet as specified 2346 * by the UDP header is the same as the length derived from the IP 2347 * header. 2348 */ 2349 udpha = (udpha_t *)(rptr + hdr_length); 2350 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2351 goto tossit; 2352 2353 hdr_length += UDPH_SIZE; 2354 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2355 2356 /* Initialize regardless of IP version */ 2357 ipps.ipp_fields = 0; 2358 2359 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2360 udp_ipv4_options_len > 0) && 2361 connp->conn_family == AF_INET) { 2362 int err; 2363 2364 /* 2365 * Record/update udp_recv_ipp with the lock 2366 * held. Not needed for AF_INET6 sockets 2367 * since they don't support a getsockopt of IP_OPTIONS. 2368 */ 2369 mutex_enter(&connp->conn_lock); 2370 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2371 B_TRUE); 2372 if (err != 0) { 2373 /* Allocation failed. Drop packet */ 2374 mutex_exit(&connp->conn_lock); 2375 freemsg(mp); 2376 UDPS_BUMP_MIB(us, udpInErrors); 2377 return; 2378 } 2379 mutex_exit(&connp->conn_lock); 2380 } 2381 2382 if (recv_ancillary.crb_all != 0) { 2383 /* 2384 * Record packet information in the ip_pkt_t 2385 */ 2386 if (ira->ira_flags & IRAF_IS_IPV4) { 2387 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2388 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2389 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2390 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2391 2392 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2393 } else { 2394 uint8_t nexthdrp; 2395 2396 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2397 /* 2398 * IPv6 packets can only be received by applications 2399 * that are prepared to receive IPv6 addresses. 2400 * The IP fanout must ensure this. 2401 */ 2402 ASSERT(connp->conn_family == AF_INET6); 2403 2404 ip6h = (ip6_t *)rptr; 2405 2406 /* We don't care about the length, but need the ipp */ 2407 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2408 &nexthdrp); 2409 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2410 /* Restore */ 2411 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2412 ASSERT(nexthdrp == IPPROTO_UDP); 2413 } 2414 } 2415 2416 /* 2417 * This is the inbound data path. Packets are passed upstream as 2418 * T_UNITDATA_IND messages. 2419 */ 2420 if (connp->conn_family == AF_INET) { 2421 sin_t *sin; 2422 2423 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2424 2425 /* 2426 * Normally only send up the source address. 2427 * If any ancillary data items are wanted we add those. 2428 */ 2429 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2430 if (recv_ancillary.crb_all != 0) { 2431 udi_size += conn_recvancillary_size(connp, 2432 recv_ancillary, ira, mp, &ipps); 2433 } 2434 2435 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2436 mp1 = allocb(udi_size, BPRI_MED); 2437 if (mp1 == NULL) { 2438 freemsg(mp); 2439 UDPS_BUMP_MIB(us, udpInErrors); 2440 return; 2441 } 2442 mp1->b_cont = mp; 2443 mp1->b_datap->db_type = M_PROTO; 2444 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2445 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2446 tudi->PRIM_type = T_UNITDATA_IND; 2447 tudi->SRC_length = sizeof (sin_t); 2448 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2449 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2450 sizeof (sin_t); 2451 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2452 tudi->OPT_length = udi_size; 2453 sin = (sin_t *)&tudi[1]; 2454 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2455 sin->sin_port = udpha->uha_src_port; 2456 sin->sin_family = connp->conn_family; 2457 *(uint32_t *)&sin->sin_zero[0] = 0; 2458 *(uint32_t *)&sin->sin_zero[4] = 0; 2459 2460 /* 2461 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA, 2462 * IP_RECVTTL or IP_RECVTOS has been set. 2463 */ 2464 if (udi_size != 0) { 2465 conn_recvancillary_add(connp, recv_ancillary, ira, 2466 &ipps, (uchar_t *)&sin[1], udi_size); 2467 } 2468 } else { 2469 sin6_t *sin6; 2470 2471 /* 2472 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2473 * 2474 * Normally we only send up the address. If receiving of any 2475 * optional receive side information is enabled, we also send 2476 * that up as options. 2477 */ 2478 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2479 2480 if (recv_ancillary.crb_all != 0) { 2481 udi_size += conn_recvancillary_size(connp, 2482 recv_ancillary, ira, mp, &ipps); 2483 } 2484 2485 mp1 = allocb(udi_size, BPRI_MED); 2486 if (mp1 == NULL) { 2487 freemsg(mp); 2488 UDPS_BUMP_MIB(us, udpInErrors); 2489 return; 2490 } 2491 mp1->b_cont = mp; 2492 mp1->b_datap->db_type = M_PROTO; 2493 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2494 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2495 tudi->PRIM_type = T_UNITDATA_IND; 2496 tudi->SRC_length = sizeof (sin6_t); 2497 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2498 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2499 sizeof (sin6_t); 2500 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2501 tudi->OPT_length = udi_size; 2502 sin6 = (sin6_t *)&tudi[1]; 2503 if (ira->ira_flags & IRAF_IS_IPV4) { 2504 in6_addr_t v6dst; 2505 2506 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2507 &sin6->sin6_addr); 2508 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2509 &v6dst); 2510 sin6->sin6_flowinfo = 0; 2511 sin6->sin6_scope_id = 0; 2512 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2513 IPCL_ZONEID(connp), us->us_netstack); 2514 } else { 2515 ip6h = (ip6_t *)rptr; 2516 2517 sin6->sin6_addr = ip6h->ip6_src; 2518 /* No sin6_flowinfo per API */ 2519 sin6->sin6_flowinfo = 0; 2520 /* For link-scope pass up scope id */ 2521 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2522 sin6->sin6_scope_id = ira->ira_ruifindex; 2523 else 2524 sin6->sin6_scope_id = 0; 2525 sin6->__sin6_src_id = ip_srcid_find_addr( 2526 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2527 us->us_netstack); 2528 } 2529 sin6->sin6_port = udpha->uha_src_port; 2530 sin6->sin6_family = connp->conn_family; 2531 2532 if (udi_size != 0) { 2533 conn_recvancillary_add(connp, recv_ancillary, ira, 2534 &ipps, (uchar_t *)&sin6[1], udi_size); 2535 } 2536 } 2537 2538 /* 2539 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2540 * loopback traffic). 2541 */ 2542 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2543 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2544 2545 /* Walk past the headers unless IP_RECVHDR was set. */ 2546 if (!udp->udp_rcvhdr) { 2547 mp->b_rptr = rptr + hdr_length; 2548 pkt_len -= hdr_length; 2549 } 2550 2551 UDPS_BUMP_MIB(us, udpHCInDatagrams); 2552 udp_ulp_recv(connp, mp1, pkt_len, ira); 2553 return; 2554 2555 tossit: 2556 freemsg(mp); 2557 UDPS_BUMP_MIB(us, udpInErrors); 2558 } 2559 2560 /* 2561 * This routine creates a T_UDERROR_IND message and passes it upstream. 2562 * The address and options are copied from the T_UNITDATA_REQ message 2563 * passed in mp. This message is freed. 2564 */ 2565 static void 2566 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2567 { 2568 struct T_unitdata_req *tudr; 2569 mblk_t *mp1; 2570 uchar_t *destaddr; 2571 t_scalar_t destlen; 2572 uchar_t *optaddr; 2573 t_scalar_t optlen; 2574 2575 if ((mp->b_wptr < mp->b_rptr) || 2576 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2577 goto done; 2578 } 2579 tudr = (struct T_unitdata_req *)mp->b_rptr; 2580 destaddr = mp->b_rptr + tudr->DEST_offset; 2581 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2582 destaddr + tudr->DEST_length < mp->b_rptr || 2583 destaddr + tudr->DEST_length > mp->b_wptr) { 2584 goto done; 2585 } 2586 optaddr = mp->b_rptr + tudr->OPT_offset; 2587 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2588 optaddr + tudr->OPT_length < mp->b_rptr || 2589 optaddr + tudr->OPT_length > mp->b_wptr) { 2590 goto done; 2591 } 2592 destlen = tudr->DEST_length; 2593 optlen = tudr->OPT_length; 2594 2595 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2596 (char *)optaddr, optlen, err); 2597 if (mp1 != NULL) 2598 qreply(q, mp1); 2599 2600 done: 2601 freemsg(mp); 2602 } 2603 2604 /* 2605 * This routine removes a port number association from a stream. It 2606 * is called by udp_wput to handle T_UNBIND_REQ messages. 2607 */ 2608 static void 2609 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2610 { 2611 conn_t *connp = Q_TO_CONN(q); 2612 int error; 2613 2614 error = udp_do_unbind(connp); 2615 if (error) { 2616 if (error < 0) 2617 udp_err_ack(q, mp, -error, 0); 2618 else 2619 udp_err_ack(q, mp, TSYSERR, error); 2620 return; 2621 } 2622 2623 mp = mi_tpi_ok_ack_alloc(mp); 2624 ASSERT(mp != NULL); 2625 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2626 qreply(q, mp); 2627 } 2628 2629 /* 2630 * Don't let port fall into the privileged range. 2631 * Since the extra privileged ports can be arbitrary we also 2632 * ensure that we exclude those from consideration. 2633 * us->us_epriv_ports is not sorted thus we loop over it until 2634 * there are no changes. 2635 */ 2636 static in_port_t 2637 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2638 { 2639 int i, bump; 2640 in_port_t nextport; 2641 boolean_t restart = B_FALSE; 2642 udp_stack_t *us = udp->udp_us; 2643 2644 if (random && udp_random_anon_port != 0) { 2645 (void) random_get_pseudo_bytes((uint8_t *)&port, 2646 sizeof (in_port_t)); 2647 /* 2648 * Unless changed by a sys admin, the smallest anon port 2649 * is 32768 and the largest anon port is 65535. It is 2650 * very likely (50%) for the random port to be smaller 2651 * than the smallest anon port. When that happens, 2652 * add port % (anon port range) to the smallest anon 2653 * port to get the random port. It should fall into the 2654 * valid anon port range. 2655 */ 2656 if ((port < us->us_smallest_anon_port) || 2657 (port > us->us_largest_anon_port)) { 2658 if (us->us_smallest_anon_port == 2659 us->us_largest_anon_port) { 2660 bump = 0; 2661 } else { 2662 bump = port % (us->us_largest_anon_port - 2663 us->us_smallest_anon_port); 2664 } 2665 2666 port = us->us_smallest_anon_port + bump; 2667 } 2668 } 2669 2670 retry: 2671 if (port < us->us_smallest_anon_port) 2672 port = us->us_smallest_anon_port; 2673 2674 if (port > us->us_largest_anon_port) { 2675 port = us->us_smallest_anon_port; 2676 if (restart) 2677 return (0); 2678 restart = B_TRUE; 2679 } 2680 2681 if (port < us->us_smallest_nonpriv_port) 2682 port = us->us_smallest_nonpriv_port; 2683 2684 for (i = 0; i < us->us_num_epriv_ports; i++) { 2685 if (port == us->us_epriv_ports[i]) { 2686 port++; 2687 /* 2688 * Make sure that the port is in the 2689 * valid range. 2690 */ 2691 goto retry; 2692 } 2693 } 2694 2695 if (is_system_labeled() && 2696 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2697 port, IPPROTO_UDP, B_TRUE)) != 0) { 2698 port = nextport; 2699 goto retry; 2700 } 2701 2702 return (port); 2703 } 2704 2705 /* 2706 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2707 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2708 * the TPI options, otherwise we take them from msg_control. 2709 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2710 * Always consumes mp; never consumes tudr_mp. 2711 */ 2712 static int 2713 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2714 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2715 { 2716 udp_t *udp = connp->conn_udp; 2717 udp_stack_t *us = udp->udp_us; 2718 int error; 2719 ip_xmit_attr_t *ixa; 2720 ip_pkt_t *ipp; 2721 in6_addr_t v6src; 2722 in6_addr_t v6dst; 2723 in6_addr_t v6nexthop; 2724 in_port_t dstport; 2725 uint32_t flowinfo; 2726 uint_t srcid; 2727 int is_absreq_failure = 0; 2728 conn_opt_arg_t coas, *coa; 2729 2730 ASSERT(tudr_mp != NULL || msg != NULL); 2731 2732 /* 2733 * Get ixa before checking state to handle a disconnect race. 2734 * 2735 * We need an exclusive copy of conn_ixa since the ancillary data 2736 * options might modify it. That copy has no pointers hence we 2737 * need to set them up once we've parsed the ancillary data. 2738 */ 2739 ixa = conn_get_ixa_exclusive(connp); 2740 if (ixa == NULL) { 2741 UDPS_BUMP_MIB(us, udpOutErrors); 2742 freemsg(mp); 2743 return (ENOMEM); 2744 } 2745 ASSERT(cr != NULL); 2746 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2747 ixa->ixa_cred = cr; 2748 ixa->ixa_cpid = pid; 2749 if (is_system_labeled()) { 2750 /* We need to restart with a label based on the cred */ 2751 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2752 } 2753 2754 /* In case previous destination was multicast or multirt */ 2755 ip_attr_newdst(ixa); 2756 2757 /* Get a copy of conn_xmit_ipp since the options might change it */ 2758 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2759 if (ipp == NULL) { 2760 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2761 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2762 ixa->ixa_cpid = connp->conn_cpid; 2763 ixa_refrele(ixa); 2764 UDPS_BUMP_MIB(us, udpOutErrors); 2765 freemsg(mp); 2766 return (ENOMEM); 2767 } 2768 mutex_enter(&connp->conn_lock); 2769 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2770 mutex_exit(&connp->conn_lock); 2771 if (error != 0) { 2772 UDPS_BUMP_MIB(us, udpOutErrors); 2773 freemsg(mp); 2774 goto done; 2775 } 2776 2777 /* 2778 * Parse the options and update ixa and ipp as a result. 2779 * Note that ixa_tsl can be updated if SCM_UCRED. 2780 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2781 */ 2782 2783 coa = &coas; 2784 coa->coa_connp = connp; 2785 coa->coa_ixa = ixa; 2786 coa->coa_ipp = ipp; 2787 coa->coa_ancillary = B_TRUE; 2788 coa->coa_changed = 0; 2789 2790 if (msg != NULL) { 2791 error = process_auxiliary_options(connp, msg->msg_control, 2792 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2793 } else { 2794 struct T_unitdata_req *tudr; 2795 2796 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2797 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2798 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2799 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2800 coa, &is_absreq_failure); 2801 } 2802 if (error != 0) { 2803 /* 2804 * Note: No special action needed in this 2805 * module for "is_absreq_failure" 2806 */ 2807 freemsg(mp); 2808 UDPS_BUMP_MIB(us, udpOutErrors); 2809 goto done; 2810 } 2811 ASSERT(is_absreq_failure == 0); 2812 2813 mutex_enter(&connp->conn_lock); 2814 /* 2815 * If laddr is unspecified then we look at sin6_src_id. 2816 * We will give precedence to a source address set with IPV6_PKTINFO 2817 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 2818 * want ip_attr_connect to select a source (since it can fail) when 2819 * IPV6_PKTINFO is specified. 2820 * If this doesn't result in a source address then we get a source 2821 * from ip_attr_connect() below. 2822 */ 2823 v6src = connp->conn_saddr_v6; 2824 if (sin != NULL) { 2825 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 2826 dstport = sin->sin_port; 2827 flowinfo = 0; 2828 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2829 ixa->ixa_flags |= IXAF_IS_IPV4; 2830 } else if (sin6 != NULL) { 2831 boolean_t v4mapped; 2832 2833 v6dst = sin6->sin6_addr; 2834 dstport = sin6->sin6_port; 2835 flowinfo = sin6->sin6_flowinfo; 2836 srcid = sin6->__sin6_src_id; 2837 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 2838 ixa->ixa_scopeid = sin6->sin6_scope_id; 2839 ixa->ixa_flags |= IXAF_SCOPEID_SET; 2840 } else { 2841 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2842 } 2843 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 2844 if (v4mapped) 2845 ixa->ixa_flags |= IXAF_IS_IPV4; 2846 else 2847 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2848 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 2849 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 2850 v4mapped, connp->conn_netstack)) { 2851 /* Mismatch - v4mapped/v6 specified by srcid. */ 2852 mutex_exit(&connp->conn_lock); 2853 error = EADDRNOTAVAIL; 2854 goto failed; /* Does freemsg() and mib. */ 2855 } 2856 } 2857 } else { 2858 /* Connected case */ 2859 v6dst = connp->conn_faddr_v6; 2860 dstport = connp->conn_fport; 2861 flowinfo = connp->conn_flowinfo; 2862 } 2863 mutex_exit(&connp->conn_lock); 2864 2865 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 2866 if (ipp->ipp_fields & IPPF_ADDR) { 2867 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2868 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2869 v6src = ipp->ipp_addr; 2870 } else { 2871 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2872 v6src = ipp->ipp_addr; 2873 } 2874 } 2875 2876 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 2877 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 2878 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 2879 2880 switch (error) { 2881 case 0: 2882 break; 2883 case EADDRNOTAVAIL: 2884 /* 2885 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2886 * Don't have the application see that errno 2887 */ 2888 error = ENETUNREACH; 2889 goto failed; 2890 case ENETDOWN: 2891 /* 2892 * Have !ipif_addr_ready address; drop packet silently 2893 * until we can get applications to not send until we 2894 * are ready. 2895 */ 2896 error = 0; 2897 goto failed; 2898 case EHOSTUNREACH: 2899 case ENETUNREACH: 2900 if (ixa->ixa_ire != NULL) { 2901 /* 2902 * Let conn_ip_output/ire_send_noroute return 2903 * the error and send any local ICMP error. 2904 */ 2905 error = 0; 2906 break; 2907 } 2908 /* FALLTHRU */ 2909 default: 2910 failed: 2911 freemsg(mp); 2912 UDPS_BUMP_MIB(us, udpOutErrors); 2913 goto done; 2914 } 2915 2916 /* 2917 * We might be going to a different destination than last time, 2918 * thus check that TX allows the communication and compute any 2919 * needed label. 2920 * 2921 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 2922 * don't have to worry about concurrent threads. 2923 */ 2924 if (is_system_labeled()) { 2925 /* Using UDP MLP requires SCM_UCRED from user */ 2926 if (connp->conn_mlp_type != mlptSingle && 2927 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 2928 UDPS_BUMP_MIB(us, udpOutErrors); 2929 error = ECONNREFUSED; 2930 freemsg(mp); 2931 goto done; 2932 } 2933 /* 2934 * Check whether Trusted Solaris policy allows communication 2935 * with this host, and pretend that the destination is 2936 * unreachable if not. 2937 * Compute any needed label and place it in ipp_label_v4/v6. 2938 * 2939 * Later conn_build_hdr_template/conn_prepend_hdr takes 2940 * ipp_label_v4/v6 to form the packet. 2941 * 2942 * Tsol note: We have ipp structure local to this thread so 2943 * no locking is needed. 2944 */ 2945 error = conn_update_label(connp, ixa, &v6dst, ipp); 2946 if (error != 0) { 2947 freemsg(mp); 2948 UDPS_BUMP_MIB(us, udpOutErrors); 2949 goto done; 2950 } 2951 } 2952 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 2953 flowinfo, mp, &error); 2954 if (mp == NULL) { 2955 ASSERT(error != 0); 2956 UDPS_BUMP_MIB(us, udpOutErrors); 2957 goto done; 2958 } 2959 if (ixa->ixa_pktlen > IP_MAXPACKET) { 2960 error = EMSGSIZE; 2961 UDPS_BUMP_MIB(us, udpOutErrors); 2962 freemsg(mp); 2963 goto done; 2964 } 2965 /* We're done. Pass the packet to ip. */ 2966 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2967 2968 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2969 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2970 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2971 2972 error = conn_ip_output(mp, ixa); 2973 /* No udpOutErrors if an error since IP increases its error counter */ 2974 switch (error) { 2975 case 0: 2976 break; 2977 case EWOULDBLOCK: 2978 (void) ixa_check_drain_insert(connp, ixa); 2979 error = 0; 2980 break; 2981 case EADDRNOTAVAIL: 2982 /* 2983 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2984 * Don't have the application see that errno 2985 */ 2986 error = ENETUNREACH; 2987 /* FALLTHRU */ 2988 default: 2989 mutex_enter(&connp->conn_lock); 2990 /* 2991 * Clear the source and v6lastdst so we call ip_attr_connect 2992 * for the next packet and try to pick a better source. 2993 */ 2994 if (connp->conn_mcbc_bind) 2995 connp->conn_saddr_v6 = ipv6_all_zeros; 2996 else 2997 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 2998 connp->conn_v6lastdst = ipv6_all_zeros; 2999 mutex_exit(&connp->conn_lock); 3000 break; 3001 } 3002 done: 3003 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3004 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3005 ixa->ixa_cpid = connp->conn_cpid; 3006 ixa_refrele(ixa); 3007 ip_pkt_free(ipp); 3008 kmem_free(ipp, sizeof (*ipp)); 3009 return (error); 3010 } 3011 3012 /* 3013 * Handle sending an M_DATA for a connected socket. 3014 * Handles both IPv4 and IPv6. 3015 */ 3016 static int 3017 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3018 { 3019 udp_t *udp = connp->conn_udp; 3020 udp_stack_t *us = udp->udp_us; 3021 int error; 3022 ip_xmit_attr_t *ixa; 3023 3024 /* 3025 * If no other thread is using conn_ixa this just gets a reference to 3026 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3027 */ 3028 ixa = conn_get_ixa(connp, B_FALSE); 3029 if (ixa == NULL) { 3030 UDPS_BUMP_MIB(us, udpOutErrors); 3031 freemsg(mp); 3032 return (ENOMEM); 3033 } 3034 3035 ASSERT(cr != NULL); 3036 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3037 ixa->ixa_cred = cr; 3038 ixa->ixa_cpid = pid; 3039 3040 mutex_enter(&connp->conn_lock); 3041 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3042 connp->conn_fport, connp->conn_flowinfo, &error); 3043 3044 if (mp == NULL) { 3045 ASSERT(error != 0); 3046 mutex_exit(&connp->conn_lock); 3047 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3048 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3049 ixa->ixa_cpid = connp->conn_cpid; 3050 ixa_refrele(ixa); 3051 UDPS_BUMP_MIB(us, udpOutErrors); 3052 freemsg(mp); 3053 return (error); 3054 } 3055 3056 /* 3057 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3058 * safe copy, then we need to fill in any pointers in it. 3059 */ 3060 if (ixa->ixa_ire == NULL) { 3061 in6_addr_t faddr, saddr; 3062 in6_addr_t nexthop; 3063 in_port_t fport; 3064 3065 saddr = connp->conn_saddr_v6; 3066 faddr = connp->conn_faddr_v6; 3067 fport = connp->conn_fport; 3068 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3069 mutex_exit(&connp->conn_lock); 3070 3071 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3072 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3073 IPDF_IPSEC); 3074 switch (error) { 3075 case 0: 3076 break; 3077 case EADDRNOTAVAIL: 3078 /* 3079 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3080 * Don't have the application see that errno 3081 */ 3082 error = ENETUNREACH; 3083 goto failed; 3084 case ENETDOWN: 3085 /* 3086 * Have !ipif_addr_ready address; drop packet silently 3087 * until we can get applications to not send until we 3088 * are ready. 3089 */ 3090 error = 0; 3091 goto failed; 3092 case EHOSTUNREACH: 3093 case ENETUNREACH: 3094 if (ixa->ixa_ire != NULL) { 3095 /* 3096 * Let conn_ip_output/ire_send_noroute return 3097 * the error and send any local ICMP error. 3098 */ 3099 error = 0; 3100 break; 3101 } 3102 /* FALLTHRU */ 3103 default: 3104 failed: 3105 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3106 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3107 ixa->ixa_cpid = connp->conn_cpid; 3108 ixa_refrele(ixa); 3109 freemsg(mp); 3110 UDPS_BUMP_MIB(us, udpOutErrors); 3111 return (error); 3112 } 3113 } else { 3114 /* Done with conn_t */ 3115 mutex_exit(&connp->conn_lock); 3116 } 3117 ASSERT(ixa->ixa_ire != NULL); 3118 3119 /* We're done. Pass the packet to ip. */ 3120 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3121 3122 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3123 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3124 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3125 3126 error = conn_ip_output(mp, ixa); 3127 /* No udpOutErrors if an error since IP increases its error counter */ 3128 switch (error) { 3129 case 0: 3130 break; 3131 case EWOULDBLOCK: 3132 (void) ixa_check_drain_insert(connp, ixa); 3133 error = 0; 3134 break; 3135 case EADDRNOTAVAIL: 3136 /* 3137 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3138 * Don't have the application see that errno 3139 */ 3140 error = ENETUNREACH; 3141 break; 3142 } 3143 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3144 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3145 ixa->ixa_cpid = connp->conn_cpid; 3146 ixa_refrele(ixa); 3147 return (error); 3148 } 3149 3150 /* 3151 * Handle sending an M_DATA to the last destination. 3152 * Handles both IPv4 and IPv6. 3153 * 3154 * NOTE: The caller must hold conn_lock and we drop it here. 3155 */ 3156 static int 3157 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3158 ip_xmit_attr_t *ixa) 3159 { 3160 udp_t *udp = connp->conn_udp; 3161 udp_stack_t *us = udp->udp_us; 3162 int error; 3163 3164 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3165 ASSERT(ixa != NULL); 3166 3167 ASSERT(cr != NULL); 3168 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3169 ixa->ixa_cred = cr; 3170 ixa->ixa_cpid = pid; 3171 3172 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3173 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3174 3175 if (mp == NULL) { 3176 ASSERT(error != 0); 3177 mutex_exit(&connp->conn_lock); 3178 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3179 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3180 ixa->ixa_cpid = connp->conn_cpid; 3181 ixa_refrele(ixa); 3182 UDPS_BUMP_MIB(us, udpOutErrors); 3183 freemsg(mp); 3184 return (error); 3185 } 3186 3187 /* 3188 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3189 * safe copy, then we need to fill in any pointers in it. 3190 */ 3191 if (ixa->ixa_ire == NULL) { 3192 in6_addr_t lastdst, lastsrc; 3193 in6_addr_t nexthop; 3194 in_port_t lastport; 3195 3196 lastsrc = connp->conn_v6lastsrc; 3197 lastdst = connp->conn_v6lastdst; 3198 lastport = connp->conn_lastdstport; 3199 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3200 mutex_exit(&connp->conn_lock); 3201 3202 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3203 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3204 IPDF_VERIFY_DST | IPDF_IPSEC); 3205 switch (error) { 3206 case 0: 3207 break; 3208 case EADDRNOTAVAIL: 3209 /* 3210 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3211 * Don't have the application see that errno 3212 */ 3213 error = ENETUNREACH; 3214 goto failed; 3215 case ENETDOWN: 3216 /* 3217 * Have !ipif_addr_ready address; drop packet silently 3218 * until we can get applications to not send until we 3219 * are ready. 3220 */ 3221 error = 0; 3222 goto failed; 3223 case EHOSTUNREACH: 3224 case ENETUNREACH: 3225 if (ixa->ixa_ire != NULL) { 3226 /* 3227 * Let conn_ip_output/ire_send_noroute return 3228 * the error and send any local ICMP error. 3229 */ 3230 error = 0; 3231 break; 3232 } 3233 /* FALLTHRU */ 3234 default: 3235 failed: 3236 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3237 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3238 ixa->ixa_cpid = connp->conn_cpid; 3239 ixa_refrele(ixa); 3240 freemsg(mp); 3241 UDPS_BUMP_MIB(us, udpOutErrors); 3242 return (error); 3243 } 3244 } else { 3245 /* Done with conn_t */ 3246 mutex_exit(&connp->conn_lock); 3247 } 3248 3249 /* We're done. Pass the packet to ip. */ 3250 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3251 3252 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3253 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3254 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3255 3256 error = conn_ip_output(mp, ixa); 3257 /* No udpOutErrors if an error since IP increases its error counter */ 3258 switch (error) { 3259 case 0: 3260 break; 3261 case EWOULDBLOCK: 3262 (void) ixa_check_drain_insert(connp, ixa); 3263 error = 0; 3264 break; 3265 case EADDRNOTAVAIL: 3266 /* 3267 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3268 * Don't have the application see that errno 3269 */ 3270 error = ENETUNREACH; 3271 /* FALLTHRU */ 3272 default: 3273 mutex_enter(&connp->conn_lock); 3274 /* 3275 * Clear the source and v6lastdst so we call ip_attr_connect 3276 * for the next packet and try to pick a better source. 3277 */ 3278 if (connp->conn_mcbc_bind) 3279 connp->conn_saddr_v6 = ipv6_all_zeros; 3280 else 3281 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3282 connp->conn_v6lastdst = ipv6_all_zeros; 3283 mutex_exit(&connp->conn_lock); 3284 break; 3285 } 3286 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3287 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3288 ixa->ixa_cpid = connp->conn_cpid; 3289 ixa_refrele(ixa); 3290 return (error); 3291 } 3292 3293 3294 /* 3295 * Prepend the header template and then fill in the source and 3296 * flowinfo. The caller needs to handle the destination address since 3297 * it's setting is different if rthdr or source route. 3298 * 3299 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3300 * When it returns NULL it sets errorp. 3301 */ 3302 static mblk_t * 3303 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3304 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3305 { 3306 udp_t *udp = connp->conn_udp; 3307 udp_stack_t *us = udp->udp_us; 3308 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3309 boolean_t hash_srcport = udp->udp_vxlanhash; 3310 uint_t pktlen; 3311 uint_t alloclen; 3312 uint_t copylen; 3313 uint8_t *iph; 3314 uint_t ip_hdr_length; 3315 udpha_t *udpha; 3316 uint32_t cksum; 3317 ip_pkt_t *ipp; 3318 uint16_t srcport; 3319 3320 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3321 3322 /* 3323 * If we have source port hashing going on, determine the hash before 3324 * we modify the mblk_t. 3325 */ 3326 if (hash_srcport == B_TRUE) { 3327 srcport = udp_srcport_hash(mp, UDP_HASH_VXLAN, 3328 IPPORT_DYNAMIC_MIN, IPPORT_DYNAMIC_MAX, 3329 ntohs(connp->conn_lport)); 3330 } 3331 3332 /* 3333 * Copy the header template and leave space for an SPI 3334 */ 3335 copylen = connp->conn_ht_iphc_len; 3336 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3337 pktlen = alloclen + msgdsize(mp); 3338 if (pktlen > IP_MAXPACKET) { 3339 freemsg(mp); 3340 *errorp = EMSGSIZE; 3341 return (NULL); 3342 } 3343 ixa->ixa_pktlen = pktlen; 3344 3345 /* check/fix buffer config, setup pointers into it */ 3346 iph = mp->b_rptr - alloclen; 3347 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3348 mblk_t *mp1; 3349 3350 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3351 if (mp1 == NULL) { 3352 freemsg(mp); 3353 *errorp = ENOMEM; 3354 return (NULL); 3355 } 3356 mp1->b_wptr = DB_LIM(mp1); 3357 mp1->b_cont = mp; 3358 mp = mp1; 3359 iph = (mp->b_wptr - alloclen); 3360 } 3361 mp->b_rptr = iph; 3362 bcopy(connp->conn_ht_iphc, iph, copylen); 3363 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3364 3365 ixa->ixa_ip_hdr_length = ip_hdr_length; 3366 udpha = (udpha_t *)(iph + ip_hdr_length); 3367 3368 /* 3369 * Setup header length and prepare for ULP checksum done in IP. 3370 * udp_build_hdr_template has already massaged any routing header 3371 * and placed the result in conn_sum. 3372 * 3373 * We make it easy for IP to include our pseudo header 3374 * by putting our length in uha_checksum. 3375 */ 3376 cksum = pktlen - ip_hdr_length; 3377 udpha->uha_length = htons(cksum); 3378 3379 cksum += connp->conn_sum; 3380 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3381 ASSERT(cksum < 0x10000); 3382 3383 ipp = &connp->conn_xmit_ipp; 3384 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3385 ipha_t *ipha = (ipha_t *)iph; 3386 3387 ipha->ipha_length = htons((uint16_t)pktlen); 3388 3389 /* IP does the checksum if uha_checksum is non-zero */ 3390 if (us->us_do_checksum) 3391 udpha->uha_checksum = htons(cksum); 3392 3393 /* if IP_PKTINFO specified an addres it wins over bind() */ 3394 if ((ipp->ipp_fields & IPPF_ADDR) && 3395 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3396 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3397 ipha->ipha_src = ipp->ipp_addr_v4; 3398 } else { 3399 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3400 } 3401 } else { 3402 ip6_t *ip6h = (ip6_t *)iph; 3403 3404 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3405 udpha->uha_checksum = htons(cksum); 3406 3407 /* if IP_PKTINFO specified an addres it wins over bind() */ 3408 if ((ipp->ipp_fields & IPPF_ADDR) && 3409 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3410 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3411 ip6h->ip6_src = ipp->ipp_addr; 3412 } else { 3413 ip6h->ip6_src = *v6src; 3414 } 3415 ip6h->ip6_vcf = 3416 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3417 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3418 if (ipp->ipp_fields & IPPF_TCLASS) { 3419 /* Overrides the class part of flowinfo */ 3420 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3421 ipp->ipp_tclass); 3422 } 3423 } 3424 3425 /* Insert all-0s SPI now. */ 3426 if (insert_spi) 3427 *((uint32_t *)(udpha + 1)) = 0; 3428 3429 udpha->uha_dst_port = dstport; 3430 if (hash_srcport == B_TRUE) 3431 udpha->uha_src_port = htons(srcport); 3432 3433 return (mp); 3434 } 3435 3436 /* 3437 * Send a T_UDERR_IND in response to an M_DATA 3438 */ 3439 static void 3440 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3441 { 3442 struct sockaddr_storage ss; 3443 sin_t *sin; 3444 sin6_t *sin6; 3445 struct sockaddr *addr; 3446 socklen_t addrlen; 3447 mblk_t *mp1; 3448 3449 mutex_enter(&connp->conn_lock); 3450 /* Initialize addr and addrlen as if they're passed in */ 3451 if (connp->conn_family == AF_INET) { 3452 sin = (sin_t *)&ss; 3453 *sin = sin_null; 3454 sin->sin_family = AF_INET; 3455 sin->sin_port = connp->conn_fport; 3456 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3457 addr = (struct sockaddr *)sin; 3458 addrlen = sizeof (*sin); 3459 } else { 3460 sin6 = (sin6_t *)&ss; 3461 *sin6 = sin6_null; 3462 sin6->sin6_family = AF_INET6; 3463 sin6->sin6_port = connp->conn_fport; 3464 sin6->sin6_flowinfo = connp->conn_flowinfo; 3465 sin6->sin6_addr = connp->conn_faddr_v6; 3466 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3467 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3468 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3469 } else { 3470 sin6->sin6_scope_id = 0; 3471 } 3472 sin6->__sin6_src_id = 0; 3473 addr = (struct sockaddr *)sin6; 3474 addrlen = sizeof (*sin6); 3475 } 3476 mutex_exit(&connp->conn_lock); 3477 3478 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3479 if (mp1 != NULL) 3480 putnext(connp->conn_rq, mp1); 3481 } 3482 3483 /* 3484 * This routine handles all messages passed downstream. It either 3485 * consumes the message or passes it downstream; it never queues a 3486 * a message. 3487 * 3488 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3489 * is valid when we are directly beneath the stream head, and thus sockfs 3490 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3491 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3492 * connected endpoints. 3493 */ 3494 int 3495 udp_wput(queue_t *q, mblk_t *mp) 3496 { 3497 sin6_t *sin6; 3498 sin_t *sin = NULL; 3499 uint_t srcid; 3500 conn_t *connp = Q_TO_CONN(q); 3501 udp_t *udp = connp->conn_udp; 3502 int error = 0; 3503 struct sockaddr *addr = NULL; 3504 socklen_t addrlen; 3505 udp_stack_t *us = udp->udp_us; 3506 struct T_unitdata_req *tudr; 3507 mblk_t *data_mp; 3508 ushort_t ipversion; 3509 cred_t *cr; 3510 pid_t pid; 3511 3512 /* 3513 * We directly handle several cases here: T_UNITDATA_REQ message 3514 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3515 * socket. 3516 */ 3517 switch (DB_TYPE(mp)) { 3518 case M_DATA: 3519 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3520 /* Not connected; address is required */ 3521 UDPS_BUMP_MIB(us, udpOutErrors); 3522 UDP_DBGSTAT(us, udp_data_notconn); 3523 UDP_STAT(us, udp_out_err_notconn); 3524 freemsg(mp); 3525 return (0); 3526 } 3527 /* 3528 * All Solaris components should pass a db_credp 3529 * for this message, hence we ASSERT. 3530 * On production kernels we return an error to be robust against 3531 * random streams modules sitting on top of us. 3532 */ 3533 cr = msg_getcred(mp, &pid); 3534 ASSERT(cr != NULL); 3535 if (cr == NULL) { 3536 UDPS_BUMP_MIB(us, udpOutErrors); 3537 freemsg(mp); 3538 return (0); 3539 } 3540 ASSERT(udp->udp_issocket); 3541 UDP_DBGSTAT(us, udp_data_conn); 3542 error = udp_output_connected(connp, mp, cr, pid); 3543 if (error != 0) { 3544 UDP_STAT(us, udp_out_err_output); 3545 if (connp->conn_rq != NULL) 3546 udp_ud_err_connected(connp, (t_scalar_t)error); 3547 #ifdef DEBUG 3548 printf("udp_output_connected returned %d\n", error); 3549 #endif 3550 } 3551 return (0); 3552 3553 case M_PROTO: 3554 case M_PCPROTO: 3555 tudr = (struct T_unitdata_req *)mp->b_rptr; 3556 if (MBLKL(mp) < sizeof (*tudr) || 3557 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3558 udp_wput_other(q, mp); 3559 return (0); 3560 } 3561 break; 3562 3563 default: 3564 udp_wput_other(q, mp); 3565 return (0); 3566 } 3567 3568 /* Handle valid T_UNITDATA_REQ here */ 3569 data_mp = mp->b_cont; 3570 if (data_mp == NULL) { 3571 error = EPROTO; 3572 goto ud_error2; 3573 } 3574 mp->b_cont = NULL; 3575 3576 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3577 error = EADDRNOTAVAIL; 3578 goto ud_error2; 3579 } 3580 3581 /* 3582 * All Solaris components should pass a db_credp 3583 * for this TPI message, hence we should ASSERT. 3584 * However, RPC (svc_clts_ksend) does this odd thing where it 3585 * passes the options from a T_UNITDATA_IND unchanged in a 3586 * T_UNITDATA_REQ. While that is the right thing to do for 3587 * some options, SCM_UCRED being the key one, this also makes it 3588 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3589 */ 3590 cr = msg_getcred(mp, &pid); 3591 if (cr == NULL) { 3592 cr = connp->conn_cred; 3593 pid = connp->conn_cpid; 3594 } 3595 3596 /* 3597 * If a port has not been bound to the stream, fail. 3598 * This is not a problem when sockfs is directly 3599 * above us, because it will ensure that the socket 3600 * is first bound before allowing data to be sent. 3601 */ 3602 if (udp->udp_state == TS_UNBND) { 3603 error = EPROTO; 3604 goto ud_error2; 3605 } 3606 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3607 addrlen = tudr->DEST_length; 3608 3609 switch (connp->conn_family) { 3610 case AF_INET6: 3611 sin6 = (sin6_t *)addr; 3612 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3613 (sin6->sin6_family != AF_INET6)) { 3614 error = EADDRNOTAVAIL; 3615 goto ud_error2; 3616 } 3617 3618 srcid = sin6->__sin6_src_id; 3619 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3620 /* 3621 * Destination is a non-IPv4-compatible IPv6 address. 3622 * Send out an IPv6 format packet. 3623 */ 3624 3625 /* 3626 * If the local address is a mapped address return 3627 * an error. 3628 * It would be possible to send an IPv6 packet but the 3629 * response would never make it back to the application 3630 * since it is bound to a mapped address. 3631 */ 3632 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3633 error = EADDRNOTAVAIL; 3634 goto ud_error2; 3635 } 3636 3637 UDP_DBGSTAT(us, udp_out_ipv6); 3638 3639 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3640 sin6->sin6_addr = ipv6_loopback; 3641 ipversion = IPV6_VERSION; 3642 } else { 3643 if (connp->conn_ipv6_v6only) { 3644 error = EADDRNOTAVAIL; 3645 goto ud_error2; 3646 } 3647 3648 /* 3649 * If the local address is not zero or a mapped address 3650 * return an error. It would be possible to send an 3651 * IPv4 packet but the response would never make it 3652 * back to the application since it is bound to a 3653 * non-mapped address. 3654 */ 3655 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3656 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3657 error = EADDRNOTAVAIL; 3658 goto ud_error2; 3659 } 3660 UDP_DBGSTAT(us, udp_out_mapped); 3661 3662 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3663 V4_PART_OF_V6(sin6->sin6_addr) = 3664 htonl(INADDR_LOOPBACK); 3665 } 3666 ipversion = IPV4_VERSION; 3667 } 3668 3669 if (tudr->OPT_length != 0) { 3670 /* 3671 * If we are connected then the destination needs to be 3672 * the same as the connected one. 3673 */ 3674 if (udp->udp_state == TS_DATA_XFER && 3675 !conn_same_as_last_v6(connp, sin6)) { 3676 error = EISCONN; 3677 goto ud_error2; 3678 } 3679 UDP_STAT(us, udp_out_opt); 3680 error = udp_output_ancillary(connp, NULL, sin6, 3681 data_mp, mp, NULL, cr, pid); 3682 } else { 3683 ip_xmit_attr_t *ixa; 3684 3685 /* 3686 * We have to allocate an ip_xmit_attr_t before we grab 3687 * conn_lock and we need to hold conn_lock once we've 3688 * checked conn_same_as_last_v6 to handle concurrent 3689 * send* calls on a socket. 3690 */ 3691 ixa = conn_get_ixa(connp, B_FALSE); 3692 if (ixa == NULL) { 3693 error = ENOMEM; 3694 goto ud_error2; 3695 } 3696 mutex_enter(&connp->conn_lock); 3697 3698 if (conn_same_as_last_v6(connp, sin6) && 3699 connp->conn_lastsrcid == srcid && 3700 ipsec_outbound_policy_current(ixa)) { 3701 UDP_DBGSTAT(us, udp_out_lastdst); 3702 /* udp_output_lastdst drops conn_lock */ 3703 error = udp_output_lastdst(connp, data_mp, cr, 3704 pid, ixa); 3705 } else { 3706 UDP_DBGSTAT(us, udp_out_diffdst); 3707 /* udp_output_newdst drops conn_lock */ 3708 error = udp_output_newdst(connp, data_mp, NULL, 3709 sin6, ipversion, cr, pid, ixa); 3710 } 3711 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3712 } 3713 if (error == 0) { 3714 freeb(mp); 3715 return (0); 3716 } 3717 break; 3718 3719 case AF_INET: 3720 sin = (sin_t *)addr; 3721 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3722 (sin->sin_family != AF_INET)) { 3723 error = EADDRNOTAVAIL; 3724 goto ud_error2; 3725 } 3726 UDP_DBGSTAT(us, udp_out_ipv4); 3727 if (sin->sin_addr.s_addr == INADDR_ANY) 3728 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3729 ipversion = IPV4_VERSION; 3730 3731 srcid = 0; 3732 if (tudr->OPT_length != 0) { 3733 /* 3734 * If we are connected then the destination needs to be 3735 * the same as the connected one. 3736 */ 3737 if (udp->udp_state == TS_DATA_XFER && 3738 !conn_same_as_last_v4(connp, sin)) { 3739 error = EISCONN; 3740 goto ud_error2; 3741 } 3742 UDP_STAT(us, udp_out_opt); 3743 error = udp_output_ancillary(connp, sin, NULL, 3744 data_mp, mp, NULL, cr, pid); 3745 } else { 3746 ip_xmit_attr_t *ixa; 3747 3748 /* 3749 * We have to allocate an ip_xmit_attr_t before we grab 3750 * conn_lock and we need to hold conn_lock once we've 3751 * checked conn_same_as_last_v4 to handle concurrent 3752 * send* calls on a socket. 3753 */ 3754 ixa = conn_get_ixa(connp, B_FALSE); 3755 if (ixa == NULL) { 3756 error = ENOMEM; 3757 goto ud_error2; 3758 } 3759 mutex_enter(&connp->conn_lock); 3760 3761 if (conn_same_as_last_v4(connp, sin) && 3762 ipsec_outbound_policy_current(ixa)) { 3763 UDP_DBGSTAT(us, udp_out_lastdst); 3764 /* udp_output_lastdst drops conn_lock */ 3765 error = udp_output_lastdst(connp, data_mp, cr, 3766 pid, ixa); 3767 } else { 3768 UDP_DBGSTAT(us, udp_out_diffdst); 3769 /* udp_output_newdst drops conn_lock */ 3770 error = udp_output_newdst(connp, data_mp, sin, 3771 NULL, ipversion, cr, pid, ixa); 3772 } 3773 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3774 } 3775 if (error == 0) { 3776 freeb(mp); 3777 return (0); 3778 } 3779 break; 3780 } 3781 UDP_STAT(us, udp_out_err_output); 3782 ASSERT(mp != NULL); 3783 /* mp is freed by the following routine */ 3784 udp_ud_err(q, mp, (t_scalar_t)error); 3785 return (0); 3786 3787 ud_error2: 3788 UDPS_BUMP_MIB(us, udpOutErrors); 3789 freemsg(data_mp); 3790 UDP_STAT(us, udp_out_err_output); 3791 ASSERT(mp != NULL); 3792 /* mp is freed by the following routine */ 3793 udp_ud_err(q, mp, (t_scalar_t)error); 3794 return (0); 3795 } 3796 3797 /* 3798 * Handle the case of the IP address, port, flow label being different 3799 * for both IPv4 and IPv6. 3800 * 3801 * NOTE: The caller must hold conn_lock and we drop it here. 3802 */ 3803 static int 3804 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3805 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3806 { 3807 uint_t srcid; 3808 uint32_t flowinfo; 3809 udp_t *udp = connp->conn_udp; 3810 int error = 0; 3811 ip_xmit_attr_t *oldixa; 3812 udp_stack_t *us = udp->udp_us; 3813 in6_addr_t v6src; 3814 in6_addr_t v6dst; 3815 in6_addr_t v6nexthop; 3816 in_port_t dstport; 3817 3818 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3819 ASSERT(ixa != NULL); 3820 /* 3821 * We hold conn_lock across all the use and modifications of 3822 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3823 * stay consistent. 3824 */ 3825 3826 ASSERT(cr != NULL); 3827 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3828 ixa->ixa_cred = cr; 3829 ixa->ixa_cpid = pid; 3830 if (is_system_labeled()) { 3831 /* We need to restart with a label based on the cred */ 3832 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3833 } 3834 3835 /* 3836 * If we are connected then the destination needs to be the 3837 * same as the connected one, which is not the case here since we 3838 * checked for that above. 3839 */ 3840 if (udp->udp_state == TS_DATA_XFER) { 3841 mutex_exit(&connp->conn_lock); 3842 error = EISCONN; 3843 goto ud_error; 3844 } 3845 3846 /* In case previous destination was multicast or multirt */ 3847 ip_attr_newdst(ixa); 3848 3849 /* 3850 * If laddr is unspecified then we look at sin6_src_id. 3851 * We will give precedence to a source address set with IPV6_PKTINFO 3852 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3853 * want ip_attr_connect to select a source (since it can fail) when 3854 * IPV6_PKTINFO is specified. 3855 * If this doesn't result in a source address then we get a source 3856 * from ip_attr_connect() below. 3857 */ 3858 v6src = connp->conn_saddr_v6; 3859 if (sin != NULL) { 3860 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3861 dstport = sin->sin_port; 3862 flowinfo = 0; 3863 /* Don't bother with ip_srcid_find_id(), but indicate anyway. */ 3864 srcid = 0; 3865 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3866 ixa->ixa_flags |= IXAF_IS_IPV4; 3867 } else { 3868 boolean_t v4mapped; 3869 3870 v6dst = sin6->sin6_addr; 3871 dstport = sin6->sin6_port; 3872 flowinfo = sin6->sin6_flowinfo; 3873 srcid = sin6->__sin6_src_id; 3874 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3875 ixa->ixa_scopeid = sin6->sin6_scope_id; 3876 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3877 } else { 3878 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3879 } 3880 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 3881 if (v4mapped) 3882 ixa->ixa_flags |= IXAF_IS_IPV4; 3883 else 3884 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3885 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3886 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3887 v4mapped, connp->conn_netstack)) { 3888 /* Mismatched v4mapped/v6 specified by srcid. */ 3889 mutex_exit(&connp->conn_lock); 3890 error = EADDRNOTAVAIL; 3891 goto ud_error; 3892 } 3893 } 3894 } 3895 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3896 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 3897 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 3898 3899 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3900 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3901 v6src = ipp->ipp_addr; 3902 } else { 3903 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3904 v6src = ipp->ipp_addr; 3905 } 3906 } 3907 3908 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 3909 mutex_exit(&connp->conn_lock); 3910 3911 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3912 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3913 switch (error) { 3914 case 0: 3915 break; 3916 case EADDRNOTAVAIL: 3917 /* 3918 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3919 * Don't have the application see that errno 3920 */ 3921 error = ENETUNREACH; 3922 goto failed; 3923 case ENETDOWN: 3924 /* 3925 * Have !ipif_addr_ready address; drop packet silently 3926 * until we can get applications to not send until we 3927 * are ready. 3928 */ 3929 error = 0; 3930 goto failed; 3931 case EHOSTUNREACH: 3932 case ENETUNREACH: 3933 if (ixa->ixa_ire != NULL) { 3934 /* 3935 * Let conn_ip_output/ire_send_noroute return 3936 * the error and send any local ICMP error. 3937 */ 3938 error = 0; 3939 break; 3940 } 3941 /* FALLTHRU */ 3942 failed: 3943 default: 3944 goto ud_error; 3945 } 3946 3947 3948 /* 3949 * Cluster note: we let the cluster hook know that we are sending to a 3950 * new address and/or port. 3951 */ 3952 if (cl_inet_connect2 != NULL) { 3953 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 3954 if (error != 0) { 3955 error = EHOSTUNREACH; 3956 goto ud_error; 3957 } 3958 } 3959 3960 mutex_enter(&connp->conn_lock); 3961 /* 3962 * While we dropped the lock some other thread might have connected 3963 * this socket. If so we bail out with EISCONN to ensure that the 3964 * connecting thread is the one that updates conn_ixa, conn_ht_* 3965 * and conn_*last*. 3966 */ 3967 if (udp->udp_state == TS_DATA_XFER) { 3968 mutex_exit(&connp->conn_lock); 3969 error = EISCONN; 3970 goto ud_error; 3971 } 3972 3973 /* 3974 * We need to rebuild the headers if 3975 * - we are labeling packets (could be different for different 3976 * destinations) 3977 * - we have a source route (or routing header) since we need to 3978 * massage that to get the pseudo-header checksum 3979 * - the IP version is different than the last time 3980 * - a socket option with COA_HEADER_CHANGED has been set which 3981 * set conn_v6lastdst to zero. 3982 * 3983 * Otherwise the prepend function will just update the src, dst, 3984 * dstport, and flow label. 3985 */ 3986 if (is_system_labeled()) { 3987 /* TX MLP requires SCM_UCRED and don't have that here */ 3988 if (connp->conn_mlp_type != mlptSingle) { 3989 mutex_exit(&connp->conn_lock); 3990 error = ECONNREFUSED; 3991 goto ud_error; 3992 } 3993 /* 3994 * Check whether Trusted Solaris policy allows communication 3995 * with this host, and pretend that the destination is 3996 * unreachable if not. 3997 * Compute any needed label and place it in ipp_label_v4/v6. 3998 * 3999 * Later conn_build_hdr_template/conn_prepend_hdr takes 4000 * ipp_label_v4/v6 to form the packet. 4001 * 4002 * Tsol note: Since we hold conn_lock we know no other 4003 * thread manipulates conn_xmit_ipp. 4004 */ 4005 error = conn_update_label(connp, ixa, &v6dst, 4006 &connp->conn_xmit_ipp); 4007 if (error != 0) { 4008 mutex_exit(&connp->conn_lock); 4009 goto ud_error; 4010 } 4011 /* Rebuild the header template */ 4012 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4013 flowinfo); 4014 if (error != 0) { 4015 mutex_exit(&connp->conn_lock); 4016 goto ud_error; 4017 } 4018 } else if ((connp->conn_xmit_ipp.ipp_fields & 4019 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4020 ipversion != connp->conn_lastipversion || 4021 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4022 /* Rebuild the header template */ 4023 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4024 flowinfo); 4025 if (error != 0) { 4026 mutex_exit(&connp->conn_lock); 4027 goto ud_error; 4028 } 4029 } else { 4030 /* Simply update the destination address if no source route */ 4031 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4032 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4033 4034 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4035 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4036 ipha->ipha_fragment_offset_and_flags |= 4037 IPH_DF_HTONS; 4038 } else { 4039 ipha->ipha_fragment_offset_and_flags &= 4040 ~IPH_DF_HTONS; 4041 } 4042 } else { 4043 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4044 ip6h->ip6_dst = v6dst; 4045 } 4046 } 4047 4048 /* 4049 * Remember the dst/dstport etc which corresponds to the built header 4050 * template and conn_ixa. 4051 */ 4052 oldixa = conn_replace_ixa(connp, ixa); 4053 connp->conn_v6lastdst = v6dst; 4054 connp->conn_lastipversion = ipversion; 4055 connp->conn_lastdstport = dstport; 4056 connp->conn_lastflowinfo = flowinfo; 4057 connp->conn_lastscopeid = ixa->ixa_scopeid; 4058 connp->conn_lastsrcid = srcid; 4059 /* Also remember a source to use together with lastdst */ 4060 connp->conn_v6lastsrc = v6src; 4061 4062 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4063 dstport, flowinfo, &error); 4064 4065 /* Done with conn_t */ 4066 mutex_exit(&connp->conn_lock); 4067 ixa_refrele(oldixa); 4068 4069 if (data_mp == NULL) { 4070 ASSERT(error != 0); 4071 goto ud_error; 4072 } 4073 4074 /* We're done. Pass the packet to ip. */ 4075 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 4076 4077 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 4078 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 4079 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 4080 4081 error = conn_ip_output(data_mp, ixa); 4082 /* No udpOutErrors if an error since IP increases its error counter */ 4083 switch (error) { 4084 case 0: 4085 break; 4086 case EWOULDBLOCK: 4087 (void) ixa_check_drain_insert(connp, ixa); 4088 error = 0; 4089 break; 4090 case EADDRNOTAVAIL: 4091 /* 4092 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4093 * Don't have the application see that errno 4094 */ 4095 error = ENETUNREACH; 4096 /* FALLTHRU */ 4097 default: 4098 mutex_enter(&connp->conn_lock); 4099 /* 4100 * Clear the source and v6lastdst so we call ip_attr_connect 4101 * for the next packet and try to pick a better source. 4102 */ 4103 if (connp->conn_mcbc_bind) 4104 connp->conn_saddr_v6 = ipv6_all_zeros; 4105 else 4106 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4107 connp->conn_v6lastdst = ipv6_all_zeros; 4108 mutex_exit(&connp->conn_lock); 4109 break; 4110 } 4111 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4112 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4113 ixa->ixa_cpid = connp->conn_cpid; 4114 ixa_refrele(ixa); 4115 return (error); 4116 4117 ud_error: 4118 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4119 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4120 ixa->ixa_cpid = connp->conn_cpid; 4121 ixa_refrele(ixa); 4122 4123 freemsg(data_mp); 4124 UDPS_BUMP_MIB(us, udpOutErrors); 4125 UDP_STAT(us, udp_out_err_output); 4126 return (error); 4127 } 4128 4129 /* ARGSUSED */ 4130 static int 4131 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4132 { 4133 #ifdef DEBUG 4134 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4135 #endif 4136 freemsg(mp); 4137 return (0); 4138 } 4139 4140 4141 /* 4142 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4143 */ 4144 static void 4145 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4146 { 4147 void *data; 4148 mblk_t *datamp = mp->b_cont; 4149 conn_t *connp = Q_TO_CONN(q); 4150 udp_t *udp = connp->conn_udp; 4151 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4152 4153 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4154 cmdp->cb_error = EPROTO; 4155 qreply(q, mp); 4156 return; 4157 } 4158 data = datamp->b_rptr; 4159 4160 mutex_enter(&connp->conn_lock); 4161 switch (cmdp->cb_cmd) { 4162 case TI_GETPEERNAME: 4163 if (udp->udp_state != TS_DATA_XFER) 4164 cmdp->cb_error = ENOTCONN; 4165 else 4166 cmdp->cb_error = conn_getpeername(connp, data, 4167 &cmdp->cb_len); 4168 break; 4169 case TI_GETMYNAME: 4170 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4171 break; 4172 default: 4173 cmdp->cb_error = EINVAL; 4174 break; 4175 } 4176 mutex_exit(&connp->conn_lock); 4177 4178 qreply(q, mp); 4179 } 4180 4181 static void 4182 udp_use_pure_tpi(udp_t *udp) 4183 { 4184 conn_t *connp = udp->udp_connp; 4185 4186 mutex_enter(&connp->conn_lock); 4187 udp->udp_issocket = B_FALSE; 4188 mutex_exit(&connp->conn_lock); 4189 UDP_STAT(udp->udp_us, udp_sock_fallback); 4190 } 4191 4192 static void 4193 udp_wput_other(queue_t *q, mblk_t *mp) 4194 { 4195 uchar_t *rptr = mp->b_rptr; 4196 struct iocblk *iocp; 4197 conn_t *connp = Q_TO_CONN(q); 4198 udp_t *udp = connp->conn_udp; 4199 cred_t *cr; 4200 4201 switch (mp->b_datap->db_type) { 4202 case M_CMD: 4203 udp_wput_cmdblk(q, mp); 4204 return; 4205 4206 case M_PROTO: 4207 case M_PCPROTO: 4208 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4209 /* 4210 * If the message does not contain a PRIM_type, 4211 * throw it away. 4212 */ 4213 freemsg(mp); 4214 return; 4215 } 4216 switch (((t_primp_t)rptr)->type) { 4217 case T_ADDR_REQ: 4218 udp_addr_req(q, mp); 4219 return; 4220 case O_T_BIND_REQ: 4221 case T_BIND_REQ: 4222 udp_tpi_bind(q, mp); 4223 return; 4224 case T_CONN_REQ: 4225 udp_tpi_connect(q, mp); 4226 return; 4227 case T_CAPABILITY_REQ: 4228 udp_capability_req(q, mp); 4229 return; 4230 case T_INFO_REQ: 4231 udp_info_req(q, mp); 4232 return; 4233 case T_UNITDATA_REQ: 4234 /* 4235 * If a T_UNITDATA_REQ gets here, the address must 4236 * be bad. Valid T_UNITDATA_REQs are handled 4237 * in udp_wput. 4238 */ 4239 udp_ud_err(q, mp, EADDRNOTAVAIL); 4240 return; 4241 case T_UNBIND_REQ: 4242 udp_tpi_unbind(q, mp); 4243 return; 4244 case T_SVR4_OPTMGMT_REQ: 4245 /* 4246 * All Solaris components should pass a db_credp 4247 * for this TPI message, hence we ASSERT. 4248 * But in case there is some other M_PROTO that looks 4249 * like a TPI message sent by some other kernel 4250 * component, we check and return an error. 4251 */ 4252 cr = msg_getcred(mp, NULL); 4253 ASSERT(cr != NULL); 4254 if (cr == NULL) { 4255 udp_err_ack(q, mp, TSYSERR, EINVAL); 4256 return; 4257 } 4258 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4259 cr)) { 4260 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4261 } 4262 return; 4263 4264 case T_OPTMGMT_REQ: 4265 /* 4266 * All Solaris components should pass a db_credp 4267 * for this TPI message, hence we ASSERT. 4268 * But in case there is some other M_PROTO that looks 4269 * like a TPI message sent by some other kernel 4270 * component, we check and return an error. 4271 */ 4272 cr = msg_getcred(mp, NULL); 4273 ASSERT(cr != NULL); 4274 if (cr == NULL) { 4275 udp_err_ack(q, mp, TSYSERR, EINVAL); 4276 return; 4277 } 4278 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4279 return; 4280 4281 case T_DISCON_REQ: 4282 udp_tpi_disconnect(q, mp); 4283 return; 4284 4285 /* The following TPI message is not supported by udp. */ 4286 case O_T_CONN_RES: 4287 case T_CONN_RES: 4288 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4289 return; 4290 4291 /* The following 3 TPI requests are illegal for udp. */ 4292 case T_DATA_REQ: 4293 case T_EXDATA_REQ: 4294 case T_ORDREL_REQ: 4295 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4296 return; 4297 default: 4298 break; 4299 } 4300 break; 4301 case M_FLUSH: 4302 if (*rptr & FLUSHW) 4303 flushq(q, FLUSHDATA); 4304 break; 4305 case M_IOCTL: 4306 iocp = (struct iocblk *)mp->b_rptr; 4307 switch (iocp->ioc_cmd) { 4308 case TI_GETPEERNAME: 4309 if (udp->udp_state != TS_DATA_XFER) { 4310 /* 4311 * If a default destination address has not 4312 * been associated with the stream, then we 4313 * don't know the peer's name. 4314 */ 4315 iocp->ioc_error = ENOTCONN; 4316 iocp->ioc_count = 0; 4317 mp->b_datap->db_type = M_IOCACK; 4318 qreply(q, mp); 4319 return; 4320 } 4321 /* FALLTHRU */ 4322 case TI_GETMYNAME: 4323 /* 4324 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4325 * need to copyin the user's strbuf structure. 4326 * Processing will continue in the M_IOCDATA case 4327 * below. 4328 */ 4329 mi_copyin(q, mp, NULL, 4330 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4331 return; 4332 case _SIOCSOCKFALLBACK: 4333 /* 4334 * Either sockmod is about to be popped and the 4335 * socket would now be treated as a plain stream, 4336 * or a module is about to be pushed so we have 4337 * to follow pure TPI semantics. 4338 */ 4339 if (!udp->udp_issocket) { 4340 DB_TYPE(mp) = M_IOCNAK; 4341 iocp->ioc_error = EINVAL; 4342 } else { 4343 udp_use_pure_tpi(udp); 4344 4345 DB_TYPE(mp) = M_IOCACK; 4346 iocp->ioc_error = 0; 4347 } 4348 iocp->ioc_count = 0; 4349 iocp->ioc_rval = 0; 4350 qreply(q, mp); 4351 return; 4352 default: 4353 break; 4354 } 4355 break; 4356 case M_IOCDATA: 4357 udp_wput_iocdata(q, mp); 4358 return; 4359 default: 4360 /* Unrecognized messages are passed through without change. */ 4361 break; 4362 } 4363 ip_wput_nondata(q, mp); 4364 } 4365 4366 /* 4367 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4368 * messages. 4369 */ 4370 static void 4371 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4372 { 4373 mblk_t *mp1; 4374 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4375 STRUCT_HANDLE(strbuf, sb); 4376 uint_t addrlen; 4377 conn_t *connp = Q_TO_CONN(q); 4378 udp_t *udp = connp->conn_udp; 4379 4380 /* Make sure it is one of ours. */ 4381 switch (iocp->ioc_cmd) { 4382 case TI_GETMYNAME: 4383 case TI_GETPEERNAME: 4384 break; 4385 default: 4386 ip_wput_nondata(q, mp); 4387 return; 4388 } 4389 4390 switch (mi_copy_state(q, mp, &mp1)) { 4391 case -1: 4392 return; 4393 case MI_COPY_CASE(MI_COPY_IN, 1): 4394 break; 4395 case MI_COPY_CASE(MI_COPY_OUT, 1): 4396 /* 4397 * The address has been copied out, so now 4398 * copyout the strbuf. 4399 */ 4400 mi_copyout(q, mp); 4401 return; 4402 case MI_COPY_CASE(MI_COPY_OUT, 2): 4403 /* 4404 * The address and strbuf have been copied out. 4405 * We're done, so just acknowledge the original 4406 * M_IOCTL. 4407 */ 4408 mi_copy_done(q, mp, 0); 4409 return; 4410 default: 4411 /* 4412 * Something strange has happened, so acknowledge 4413 * the original M_IOCTL with an EPROTO error. 4414 */ 4415 mi_copy_done(q, mp, EPROTO); 4416 return; 4417 } 4418 4419 /* 4420 * Now we have the strbuf structure for TI_GETMYNAME 4421 * and TI_GETPEERNAME. Next we copyout the requested 4422 * address and then we'll copyout the strbuf. 4423 */ 4424 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4425 4426 if (connp->conn_family == AF_INET) 4427 addrlen = sizeof (sin_t); 4428 else 4429 addrlen = sizeof (sin6_t); 4430 4431 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4432 mi_copy_done(q, mp, EINVAL); 4433 return; 4434 } 4435 4436 switch (iocp->ioc_cmd) { 4437 case TI_GETMYNAME: 4438 break; 4439 case TI_GETPEERNAME: 4440 if (udp->udp_state != TS_DATA_XFER) { 4441 mi_copy_done(q, mp, ENOTCONN); 4442 return; 4443 } 4444 break; 4445 } 4446 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4447 if (!mp1) 4448 return; 4449 4450 STRUCT_FSET(sb, len, addrlen); 4451 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4452 case TI_GETMYNAME: 4453 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4454 &addrlen); 4455 break; 4456 case TI_GETPEERNAME: 4457 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4458 &addrlen); 4459 break; 4460 } 4461 mp1->b_wptr += addrlen; 4462 /* Copy out the address */ 4463 mi_copyout(q, mp); 4464 } 4465 4466 void 4467 udp_ddi_g_init(void) 4468 { 4469 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4470 udp_opt_obj.odb_opt_arr_cnt); 4471 4472 /* 4473 * We want to be informed each time a stack is created or 4474 * destroyed in the kernel, so we can maintain the 4475 * set of udp_stack_t's. 4476 */ 4477 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4478 } 4479 4480 void 4481 udp_ddi_g_destroy(void) 4482 { 4483 netstack_unregister(NS_UDP); 4484 } 4485 4486 #define INET_NAME "ip" 4487 4488 /* 4489 * Initialize the UDP stack instance. 4490 */ 4491 static void * 4492 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4493 { 4494 udp_stack_t *us; 4495 int i; 4496 int error = 0; 4497 major_t major; 4498 size_t arrsz; 4499 4500 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4501 us->us_netstack = ns; 4502 4503 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4504 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4505 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4506 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4507 4508 /* 4509 * The smallest anonymous port in the priviledged port range which UDP 4510 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4511 */ 4512 us->us_min_anonpriv_port = 512; 4513 4514 us->us_bind_fanout_size = udp_bind_fanout_size; 4515 4516 /* Roundup variable that might have been modified in /etc/system */ 4517 if (!ISP2(us->us_bind_fanout_size)) { 4518 /* Not a power of two. Round up to nearest power of two */ 4519 for (i = 0; i < 31; i++) { 4520 if (us->us_bind_fanout_size < (1 << i)) 4521 break; 4522 } 4523 us->us_bind_fanout_size = 1 << i; 4524 } 4525 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4526 sizeof (udp_fanout_t), KM_SLEEP); 4527 for (i = 0; i < us->us_bind_fanout_size; i++) { 4528 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4529 NULL); 4530 } 4531 4532 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4533 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4534 KM_SLEEP); 4535 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4536 4537 /* Allocate the per netstack stats */ 4538 mutex_enter(&cpu_lock); 4539 us->us_sc_cnt = MAX(ncpus, boot_ncpus); 4540 mutex_exit(&cpu_lock); 4541 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), 4542 KM_SLEEP); 4543 for (i = 0; i < us->us_sc_cnt; i++) { 4544 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4545 KM_SLEEP); 4546 } 4547 4548 us->us_kstat = udp_kstat2_init(stackid); 4549 us->us_mibkp = udp_kstat_init(stackid); 4550 4551 major = mod_name_to_major(INET_NAME); 4552 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4553 ASSERT(error == 0); 4554 return (us); 4555 } 4556 4557 /* 4558 * Free the UDP stack instance. 4559 */ 4560 static void 4561 udp_stack_fini(netstackid_t stackid, void *arg) 4562 { 4563 udp_stack_t *us = (udp_stack_t *)arg; 4564 int i; 4565 4566 for (i = 0; i < us->us_bind_fanout_size; i++) { 4567 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4568 } 4569 4570 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4571 sizeof (udp_fanout_t)); 4572 4573 us->us_bind_fanout = NULL; 4574 4575 for (i = 0; i < us->us_sc_cnt; i++) 4576 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); 4577 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); 4578 4579 kmem_free(us->us_propinfo_tbl, 4580 udp_propinfo_count * sizeof (mod_prop_info_t)); 4581 us->us_propinfo_tbl = NULL; 4582 4583 udp_kstat_fini(stackid, us->us_mibkp); 4584 us->us_mibkp = NULL; 4585 4586 udp_kstat2_fini(stackid, us->us_kstat); 4587 us->us_kstat = NULL; 4588 4589 mutex_destroy(&us->us_epriv_port_lock); 4590 ldi_ident_release(us->us_ldi_ident); 4591 kmem_free(us, sizeof (*us)); 4592 } 4593 4594 static size_t 4595 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4596 { 4597 udp_stack_t *us = udp->udp_us; 4598 4599 /* We add a bit of extra buffering */ 4600 size += size >> 1; 4601 if (size > us->us_max_buf) 4602 size = us->us_max_buf; 4603 4604 udp->udp_rcv_hiwat = size; 4605 return (size); 4606 } 4607 4608 /* 4609 * For the lower queue so that UDP can be a dummy mux. 4610 * Nobody should be sending 4611 * packets up this stream 4612 */ 4613 static int 4614 udp_lrput(queue_t *q, mblk_t *mp) 4615 { 4616 switch (mp->b_datap->db_type) { 4617 case M_FLUSH: 4618 /* Turn around */ 4619 if (*mp->b_rptr & FLUSHW) { 4620 *mp->b_rptr &= ~FLUSHR; 4621 qreply(q, mp); 4622 return (0); 4623 } 4624 break; 4625 } 4626 freemsg(mp); 4627 return (0); 4628 } 4629 4630 /* 4631 * For the lower queue so that UDP can be a dummy mux. 4632 * Nobody should be sending packets down this stream. 4633 */ 4634 /* ARGSUSED */ 4635 int 4636 udp_lwput(queue_t *q, mblk_t *mp) 4637 { 4638 freemsg(mp); 4639 return (0); 4640 } 4641 4642 /* 4643 * When a CPU is added, we need to allocate the per CPU stats struct. 4644 */ 4645 void 4646 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) 4647 { 4648 int i; 4649 4650 if (cpu_seqid < us->us_sc_cnt) 4651 return; 4652 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { 4653 ASSERT(us->us_sc[i] == NULL); 4654 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4655 KM_SLEEP); 4656 } 4657 membar_producer(); 4658 us->us_sc_cnt = cpu_seqid + 1; 4659 } 4660 4661 /* 4662 * Below routines for UDP socket module. 4663 */ 4664 4665 static conn_t * 4666 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4667 { 4668 udp_t *udp; 4669 conn_t *connp; 4670 zoneid_t zoneid; 4671 netstack_t *ns; 4672 udp_stack_t *us; 4673 int len; 4674 4675 ASSERT(errorp != NULL); 4676 4677 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4678 return (NULL); 4679 4680 ns = netstack_find_by_cred(credp); 4681 ASSERT(ns != NULL); 4682 us = ns->netstack_udp; 4683 ASSERT(us != NULL); 4684 4685 /* 4686 * For exclusive stacks we set the zoneid to zero 4687 * to make UDP operate as if in the global zone. 4688 */ 4689 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4690 zoneid = GLOBAL_ZONEID; 4691 else 4692 zoneid = crgetzoneid(credp); 4693 4694 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4695 4696 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4697 if (connp == NULL) { 4698 netstack_rele(ns); 4699 *errorp = ENOMEM; 4700 return (NULL); 4701 } 4702 udp = connp->conn_udp; 4703 4704 /* 4705 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4706 * done by netstack_find_by_cred() 4707 */ 4708 netstack_rele(ns); 4709 4710 /* 4711 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4712 * need to lock anything. 4713 */ 4714 ASSERT(connp->conn_proto == IPPROTO_UDP); 4715 ASSERT(connp->conn_udp == udp); 4716 ASSERT(udp->udp_connp == connp); 4717 4718 /* Set the initial state of the stream and the privilege status. */ 4719 udp->udp_state = TS_UNBND; 4720 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4721 if (isv6) { 4722 connp->conn_family = AF_INET6; 4723 connp->conn_ipversion = IPV6_VERSION; 4724 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4725 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4726 len = sizeof (ip6_t) + UDPH_SIZE; 4727 } else { 4728 connp->conn_family = AF_INET; 4729 connp->conn_ipversion = IPV4_VERSION; 4730 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4731 connp->conn_default_ttl = us->us_ipv4_ttl; 4732 len = sizeof (ipha_t) + UDPH_SIZE; 4733 } 4734 4735 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4736 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4737 4738 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4739 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4740 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4741 connp->conn_ixa->ixa_zoneid = zoneid; 4742 4743 connp->conn_zoneid = zoneid; 4744 4745 /* 4746 * If the caller has the process-wide flag set, then default to MAC 4747 * exempt mode. This allows read-down to unlabeled hosts. 4748 */ 4749 if (getpflags(NET_MAC_AWARE, credp) != 0) 4750 connp->conn_mac_mode = CONN_MAC_AWARE; 4751 4752 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4753 4754 udp->udp_us = us; 4755 4756 connp->conn_rcvbuf = us->us_recv_hiwat; 4757 connp->conn_sndbuf = us->us_xmit_hiwat; 4758 connp->conn_sndlowat = us->us_xmit_lowat; 4759 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4760 4761 connp->conn_wroff = len + us->us_wroff_extra; 4762 connp->conn_so_type = SOCK_DGRAM; 4763 4764 connp->conn_recv = udp_input; 4765 connp->conn_recvicmp = udp_icmp_input; 4766 crhold(credp); 4767 connp->conn_cred = credp; 4768 connp->conn_cpid = curproc->p_pid; 4769 connp->conn_open_time = ddi_get_lbolt64(); 4770 /* Cache things in ixa without an extra refhold */ 4771 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 4772 connp->conn_ixa->ixa_cred = connp->conn_cred; 4773 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 4774 if (is_system_labeled()) 4775 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 4776 4777 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 4778 4779 if (us->us_pmtu_discovery) 4780 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 4781 4782 return (connp); 4783 } 4784 4785 sock_lower_handle_t 4786 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 4787 uint_t *smodep, int *errorp, int flags, cred_t *credp) 4788 { 4789 udp_t *udp = NULL; 4790 udp_stack_t *us; 4791 conn_t *connp; 4792 boolean_t isv6; 4793 4794 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 4795 (proto != 0 && proto != IPPROTO_UDP)) { 4796 *errorp = EPROTONOSUPPORT; 4797 return (NULL); 4798 } 4799 4800 if (family == AF_INET6) 4801 isv6 = B_TRUE; 4802 else 4803 isv6 = B_FALSE; 4804 4805 connp = udp_do_open(credp, isv6, flags, errorp); 4806 if (connp == NULL) 4807 return (NULL); 4808 4809 udp = connp->conn_udp; 4810 ASSERT(udp != NULL); 4811 us = udp->udp_us; 4812 ASSERT(us != NULL); 4813 4814 udp->udp_issocket = B_TRUE; 4815 connp->conn_flags |= IPCL_NONSTR; 4816 4817 /* 4818 * Set flow control 4819 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4820 * need to lock anything. 4821 */ 4822 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 4823 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 4824 4825 connp->conn_flow_cntrld = B_FALSE; 4826 4827 mutex_enter(&connp->conn_lock); 4828 connp->conn_state_flags &= ~CONN_INCIPIENT; 4829 mutex_exit(&connp->conn_lock); 4830 4831 *errorp = 0; 4832 *smodep = SM_ATOMIC; 4833 *sock_downcalls = &sock_udp_downcalls; 4834 return ((sock_lower_handle_t)connp); 4835 } 4836 4837 /* ARGSUSED3 */ 4838 void 4839 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 4840 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 4841 { 4842 conn_t *connp = (conn_t *)proto_handle; 4843 struct sock_proto_props sopp; 4844 4845 /* All Solaris components should pass a cred for this operation. */ 4846 ASSERT(cr != NULL); 4847 4848 connp->conn_upcalls = sock_upcalls; 4849 connp->conn_upper_handle = sock_handle; 4850 4851 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 4852 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 4853 sopp.sopp_wroff = connp->conn_wroff; 4854 sopp.sopp_maxblk = INFPSZ; 4855 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 4856 sopp.sopp_rxlowat = connp->conn_rcvlowat; 4857 sopp.sopp_maxaddrlen = sizeof (sin6_t); 4858 sopp.sopp_maxpsz = 4859 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 4860 UDP_MAXPACKET_IPV6; 4861 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 4862 udp_mod_info.mi_minpsz; 4863 4864 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 4865 &sopp); 4866 } 4867 4868 static void 4869 udp_do_close(conn_t *connp) 4870 { 4871 udp_t *udp; 4872 4873 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 4874 udp = connp->conn_udp; 4875 4876 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 4877 /* 4878 * Running in cluster mode - register unbind information 4879 */ 4880 if (connp->conn_ipversion == IPV4_VERSION) { 4881 (*cl_inet_unbind)( 4882 connp->conn_netstack->netstack_stackid, 4883 IPPROTO_UDP, AF_INET, 4884 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 4885 (in_port_t)connp->conn_lport, NULL); 4886 } else { 4887 (*cl_inet_unbind)( 4888 connp->conn_netstack->netstack_stackid, 4889 IPPROTO_UDP, AF_INET6, 4890 (uint8_t *)&(connp->conn_laddr_v6), 4891 (in_port_t)connp->conn_lport, NULL); 4892 } 4893 } 4894 4895 udp_bind_hash_remove(udp, B_FALSE); 4896 4897 ip_quiesce_conn(connp); 4898 4899 if (!IPCL_IS_NONSTR(connp)) { 4900 ASSERT(connp->conn_wq != NULL); 4901 ASSERT(connp->conn_rq != NULL); 4902 qprocsoff(connp->conn_rq); 4903 } 4904 4905 udp_close_free(connp); 4906 4907 /* 4908 * Now we are truly single threaded on this stream, and can 4909 * delete the things hanging off the connp, and finally the connp. 4910 * We removed this connp from the fanout list, it cannot be 4911 * accessed thru the fanouts, and we already waited for the 4912 * conn_ref to drop to 0. We are already in close, so 4913 * there cannot be any other thread from the top. qprocsoff 4914 * has completed, and service has completed or won't run in 4915 * future. 4916 */ 4917 ASSERT(connp->conn_ref == 1); 4918 4919 if (!IPCL_IS_NONSTR(connp)) { 4920 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 4921 } else { 4922 ip_free_helper_stream(connp); 4923 } 4924 4925 connp->conn_ref--; 4926 ipcl_conn_destroy(connp); 4927 } 4928 4929 /* ARGSUSED1 */ 4930 int 4931 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 4932 { 4933 conn_t *connp = (conn_t *)proto_handle; 4934 4935 /* All Solaris components should pass a cred for this operation. */ 4936 ASSERT(cr != NULL); 4937 4938 udp_do_close(connp); 4939 return (0); 4940 } 4941 4942 static int 4943 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 4944 boolean_t bind_to_req_port_only) 4945 { 4946 sin_t *sin; 4947 sin6_t *sin6; 4948 udp_t *udp = connp->conn_udp; 4949 int error = 0; 4950 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 4951 in_port_t port; /* Host byte order */ 4952 in_port_t requested_port; /* Host byte order */ 4953 int count; 4954 ipaddr_t v4src; /* Set if AF_INET */ 4955 in6_addr_t v6src; 4956 int loopmax; 4957 udp_fanout_t *udpf; 4958 in_port_t lport; /* Network byte order */ 4959 uint_t scopeid = 0; 4960 zoneid_t zoneid = IPCL_ZONEID(connp); 4961 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4962 boolean_t is_inaddr_any; 4963 mlp_type_t addrtype, mlptype; 4964 udp_stack_t *us = udp->udp_us; 4965 4966 sin = NULL; 4967 sin6 = NULL; 4968 switch (len) { 4969 case sizeof (sin_t): /* Complete IPv4 address */ 4970 sin = (sin_t *)sa; 4971 4972 if (sin == NULL || !OK_32PTR((char *)sin)) 4973 return (EINVAL); 4974 4975 if (connp->conn_family != AF_INET || 4976 sin->sin_family != AF_INET) { 4977 return (EAFNOSUPPORT); 4978 } 4979 v4src = sin->sin_addr.s_addr; 4980 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 4981 if (v4src != INADDR_ANY) { 4982 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 4983 B_TRUE); 4984 } 4985 port = ntohs(sin->sin_port); 4986 break; 4987 4988 case sizeof (sin6_t): /* complete IPv6 address */ 4989 sin6 = (sin6_t *)sa; 4990 4991 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 4992 return (EINVAL); 4993 4994 if (connp->conn_family != AF_INET6 || 4995 sin6->sin6_family != AF_INET6) { 4996 return (EAFNOSUPPORT); 4997 } 4998 v6src = sin6->sin6_addr; 4999 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5000 if (connp->conn_ipv6_v6only) 5001 return (EADDRNOTAVAIL); 5002 5003 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5004 if (v4src != INADDR_ANY) { 5005 laddr_type = ip_laddr_verify_v4(v4src, 5006 zoneid, ipst, B_FALSE); 5007 } 5008 } else { 5009 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5010 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5011 scopeid = sin6->sin6_scope_id; 5012 laddr_type = ip_laddr_verify_v6(&v6src, 5013 zoneid, ipst, B_TRUE, scopeid); 5014 } 5015 } 5016 port = ntohs(sin6->sin6_port); 5017 break; 5018 5019 default: /* Invalid request */ 5020 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5021 "udp_bind: bad ADDR_length length %u", len); 5022 return (-TBADADDR); 5023 } 5024 5025 /* Is the local address a valid unicast, multicast, or broadcast? */ 5026 if (laddr_type == IPVL_BAD) 5027 return (EADDRNOTAVAIL); 5028 5029 requested_port = port; 5030 5031 if (requested_port == 0 || !bind_to_req_port_only) 5032 bind_to_req_port_only = B_FALSE; 5033 else /* T_BIND_REQ and requested_port != 0 */ 5034 bind_to_req_port_only = B_TRUE; 5035 5036 if (requested_port == 0) { 5037 /* 5038 * If the application passed in zero for the port number, it 5039 * doesn't care which port number we bind to. Get one in the 5040 * valid range. 5041 */ 5042 if (connp->conn_anon_priv_bind) { 5043 port = udp_get_next_priv_port(udp); 5044 } else { 5045 port = udp_update_next_port(udp, 5046 us->us_next_port_to_try, B_TRUE); 5047 } 5048 } else { 5049 /* 5050 * If the port is in the well-known privileged range, 5051 * make sure the caller was privileged. 5052 */ 5053 int i; 5054 boolean_t priv = B_FALSE; 5055 5056 if (port < us->us_smallest_nonpriv_port) { 5057 priv = B_TRUE; 5058 } else { 5059 for (i = 0; i < us->us_num_epriv_ports; i++) { 5060 if (port == us->us_epriv_ports[i]) { 5061 priv = B_TRUE; 5062 break; 5063 } 5064 } 5065 } 5066 5067 if (priv) { 5068 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5069 return (-TACCES); 5070 } 5071 } 5072 5073 if (port == 0) 5074 return (-TNOADDR); 5075 5076 /* 5077 * The state must be TS_UNBND. TPI mandates that users must send 5078 * TPI primitives only 1 at a time and wait for the response before 5079 * sending the next primitive. 5080 */ 5081 mutex_enter(&connp->conn_lock); 5082 if (udp->udp_state != TS_UNBND) { 5083 mutex_exit(&connp->conn_lock); 5084 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5085 "udp_bind: bad state, %u", udp->udp_state); 5086 return (-TOUTSTATE); 5087 } 5088 /* 5089 * Copy the source address into our udp structure. This address 5090 * may still be zero; if so, IP will fill in the correct address 5091 * each time an outbound packet is passed to it. Since the udp is 5092 * not yet in the bind hash list, we don't grab the uf_lock to 5093 * change conn_ipversion 5094 */ 5095 if (connp->conn_family == AF_INET) { 5096 ASSERT(sin != NULL); 5097 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5098 } else { 5099 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5100 /* 5101 * no need to hold the uf_lock to set the conn_ipversion 5102 * since we are not yet in the fanout list 5103 */ 5104 connp->conn_ipversion = IPV4_VERSION; 5105 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5106 } else { 5107 connp->conn_ipversion = IPV6_VERSION; 5108 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5109 } 5110 } 5111 5112 /* 5113 * If conn_reuseaddr is not set, then we have to make sure that 5114 * the IP address and port number the application requested 5115 * (or we selected for the application) is not being used by 5116 * another stream. If another stream is already using the 5117 * requested IP address and port, the behavior depends on 5118 * "bind_to_req_port_only". If set the bind fails; otherwise we 5119 * search for any unused port to bind to the stream. 5120 * 5121 * As per the BSD semantics, as modified by the Deering multicast 5122 * changes, if conn_reuseaddr is set, then we allow multiple binds 5123 * to the same port independent of the local IP address. 5124 * 5125 * This is slightly different than in SunOS 4.X which did not 5126 * support IP multicast. Note that the change implemented by the 5127 * Deering multicast code effects all binds - not only binding 5128 * to IP multicast addresses. 5129 * 5130 * Note that when binding to port zero we ignore SO_REUSEADDR in 5131 * order to guarantee a unique port. 5132 */ 5133 5134 count = 0; 5135 if (connp->conn_anon_priv_bind) { 5136 /* 5137 * loopmax = (IPPORT_RESERVED-1) - 5138 * us->us_min_anonpriv_port + 1 5139 */ 5140 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5141 } else { 5142 loopmax = us->us_largest_anon_port - 5143 us->us_smallest_anon_port + 1; 5144 } 5145 5146 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5147 5148 for (;;) { 5149 udp_t *udp1; 5150 boolean_t found_exclbind = B_FALSE; 5151 conn_t *connp1; 5152 5153 /* 5154 * Walk through the list of udp streams bound to 5155 * requested port with the same IP address. 5156 */ 5157 lport = htons(port); 5158 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5159 us->us_bind_fanout_size)]; 5160 mutex_enter(&udpf->uf_lock); 5161 for (udp1 = udpf->uf_udp; udp1 != NULL; 5162 udp1 = udp1->udp_bind_hash) { 5163 connp1 = udp1->udp_connp; 5164 5165 if (lport != connp1->conn_lport) 5166 continue; 5167 5168 /* 5169 * On a labeled system, we must treat bindings to ports 5170 * on shared IP addresses by sockets with MAC exemption 5171 * privilege as being in all zones, as there's 5172 * otherwise no way to identify the right receiver. 5173 */ 5174 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5175 continue; 5176 5177 /* 5178 * If UDP_EXCLBIND is set for either the bound or 5179 * binding endpoint, the semantics of bind 5180 * is changed according to the following chart. 5181 * 5182 * spec = specified address (v4 or v6) 5183 * unspec = unspecified address (v4 or v6) 5184 * A = specified addresses are different for endpoints 5185 * 5186 * bound bind to allowed? 5187 * ------------------------------------- 5188 * unspec unspec no 5189 * unspec spec no 5190 * spec unspec no 5191 * spec spec yes if A 5192 * 5193 * For labeled systems, SO_MAC_EXEMPT behaves the same 5194 * as UDP_EXCLBIND, except that zoneid is ignored. 5195 */ 5196 if (connp1->conn_exclbind || connp->conn_exclbind || 5197 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5198 if (V6_OR_V4_INADDR_ANY( 5199 connp1->conn_bound_addr_v6) || 5200 is_inaddr_any || 5201 IN6_ARE_ADDR_EQUAL( 5202 &connp1->conn_bound_addr_v6, 5203 &v6src)) { 5204 found_exclbind = B_TRUE; 5205 break; 5206 } 5207 continue; 5208 } 5209 5210 /* 5211 * Check ipversion to allow IPv4 and IPv6 sockets to 5212 * have disjoint port number spaces. 5213 */ 5214 if (connp->conn_ipversion != connp1->conn_ipversion) { 5215 5216 /* 5217 * On the first time through the loop, if the 5218 * the user intentionally specified a 5219 * particular port number, then ignore any 5220 * bindings of the other protocol that may 5221 * conflict. This allows the user to bind IPv6 5222 * alone and get both v4 and v6, or bind both 5223 * both and get each seperately. On subsequent 5224 * times through the loop, we're checking a 5225 * port that we chose (not the user) and thus 5226 * we do not allow casual duplicate bindings. 5227 */ 5228 if (count == 0 && requested_port != 0) 5229 continue; 5230 } 5231 5232 /* 5233 * No difference depending on SO_REUSEADDR. 5234 * 5235 * If existing port is bound to a 5236 * non-wildcard IP address and 5237 * the requesting stream is bound to 5238 * a distinct different IP addresses 5239 * (non-wildcard, also), keep going. 5240 */ 5241 if (!is_inaddr_any && 5242 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5243 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5244 &v6src)) { 5245 continue; 5246 } 5247 break; 5248 } 5249 5250 if (!found_exclbind && 5251 (connp->conn_reuseaddr && requested_port != 0)) { 5252 break; 5253 } 5254 5255 if (udp1 == NULL) { 5256 /* 5257 * No other stream has this IP address 5258 * and port number. We can use it. 5259 */ 5260 break; 5261 } 5262 mutex_exit(&udpf->uf_lock); 5263 if (bind_to_req_port_only) { 5264 /* 5265 * We get here only when requested port 5266 * is bound (and only first of the for() 5267 * loop iteration). 5268 * 5269 * The semantics of this bind request 5270 * require it to fail so we return from 5271 * the routine (and exit the loop). 5272 * 5273 */ 5274 mutex_exit(&connp->conn_lock); 5275 return (-TADDRBUSY); 5276 } 5277 5278 if (connp->conn_anon_priv_bind) { 5279 port = udp_get_next_priv_port(udp); 5280 } else { 5281 if ((count == 0) && (requested_port != 0)) { 5282 /* 5283 * If the application wants us to find 5284 * a port, get one to start with. Set 5285 * requested_port to 0, so that we will 5286 * update us->us_next_port_to_try below. 5287 */ 5288 port = udp_update_next_port(udp, 5289 us->us_next_port_to_try, B_TRUE); 5290 requested_port = 0; 5291 } else { 5292 port = udp_update_next_port(udp, port + 1, 5293 B_FALSE); 5294 } 5295 } 5296 5297 if (port == 0 || ++count >= loopmax) { 5298 /* 5299 * We've tried every possible port number and 5300 * there are none available, so send an error 5301 * to the user. 5302 */ 5303 mutex_exit(&connp->conn_lock); 5304 return (-TNOADDR); 5305 } 5306 } 5307 5308 /* 5309 * Copy the source address into our udp structure. This address 5310 * may still be zero; if so, ip_attr_connect will fill in the correct 5311 * address when a packet is about to be sent. 5312 * If we are binding to a broadcast or multicast address then 5313 * we just set the conn_bound_addr since we don't want to use 5314 * that as the source address when sending. 5315 */ 5316 connp->conn_bound_addr_v6 = v6src; 5317 connp->conn_laddr_v6 = v6src; 5318 if (scopeid != 0) { 5319 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5320 connp->conn_ixa->ixa_scopeid = scopeid; 5321 connp->conn_incoming_ifindex = scopeid; 5322 } else { 5323 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5324 connp->conn_incoming_ifindex = connp->conn_bound_if; 5325 } 5326 5327 switch (laddr_type) { 5328 case IPVL_UNICAST_UP: 5329 case IPVL_UNICAST_DOWN: 5330 connp->conn_saddr_v6 = v6src; 5331 connp->conn_mcbc_bind = B_FALSE; 5332 break; 5333 case IPVL_MCAST: 5334 case IPVL_BCAST: 5335 /* ip_set_destination will pick a source address later */ 5336 connp->conn_saddr_v6 = ipv6_all_zeros; 5337 connp->conn_mcbc_bind = B_TRUE; 5338 break; 5339 } 5340 5341 /* Any errors after this point should use late_error */ 5342 connp->conn_lport = lport; 5343 5344 /* 5345 * Now reset the next anonymous port if the application requested 5346 * an anonymous port, or we handed out the next anonymous port. 5347 */ 5348 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5349 us->us_next_port_to_try = port + 1; 5350 } 5351 5352 /* Initialize the T_BIND_ACK. */ 5353 if (connp->conn_family == AF_INET) { 5354 sin->sin_port = connp->conn_lport; 5355 } else { 5356 sin6->sin6_port = connp->conn_lport; 5357 } 5358 udp->udp_state = TS_IDLE; 5359 udp_bind_hash_insert(udpf, udp); 5360 mutex_exit(&udpf->uf_lock); 5361 mutex_exit(&connp->conn_lock); 5362 5363 if (cl_inet_bind) { 5364 /* 5365 * Running in cluster mode - register bind information 5366 */ 5367 if (connp->conn_ipversion == IPV4_VERSION) { 5368 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5369 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5370 (in_port_t)connp->conn_lport, NULL); 5371 } else { 5372 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5373 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5374 (in_port_t)connp->conn_lport, NULL); 5375 } 5376 } 5377 5378 mutex_enter(&connp->conn_lock); 5379 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5380 if (is_system_labeled() && (!connp->conn_anon_port || 5381 connp->conn_anon_mlp)) { 5382 uint16_t mlpport; 5383 zone_t *zone; 5384 5385 zone = crgetzone(cr); 5386 connp->conn_mlp_type = 5387 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5388 mlptSingle; 5389 addrtype = tsol_mlp_addr_type( 5390 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5391 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5392 if (addrtype == mlptSingle) { 5393 error = -TNOADDR; 5394 mutex_exit(&connp->conn_lock); 5395 goto late_error; 5396 } 5397 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5398 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5399 addrtype); 5400 5401 /* 5402 * It is a coding error to attempt to bind an MLP port 5403 * without first setting SOL_SOCKET/SCM_UCRED. 5404 */ 5405 if (mlptype != mlptSingle && 5406 connp->conn_mlp_type == mlptSingle) { 5407 error = EINVAL; 5408 mutex_exit(&connp->conn_lock); 5409 goto late_error; 5410 } 5411 5412 /* 5413 * It is an access violation to attempt to bind an MLP port 5414 * without NET_BINDMLP privilege. 5415 */ 5416 if (mlptype != mlptSingle && 5417 secpolicy_net_bindmlp(cr) != 0) { 5418 if (connp->conn_debug) { 5419 (void) strlog(UDP_MOD_ID, 0, 1, 5420 SL_ERROR|SL_TRACE, 5421 "udp_bind: no priv for multilevel port %d", 5422 mlpport); 5423 } 5424 error = -TACCES; 5425 mutex_exit(&connp->conn_lock); 5426 goto late_error; 5427 } 5428 5429 /* 5430 * If we're specifically binding a shared IP address and the 5431 * port is MLP on shared addresses, then check to see if this 5432 * zone actually owns the MLP. Reject if not. 5433 */ 5434 if (mlptype == mlptShared && addrtype == mlptShared) { 5435 /* 5436 * No need to handle exclusive-stack zones since 5437 * ALL_ZONES only applies to the shared stack. 5438 */ 5439 zoneid_t mlpzone; 5440 5441 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5442 htons(mlpport)); 5443 if (connp->conn_zoneid != mlpzone) { 5444 if (connp->conn_debug) { 5445 (void) strlog(UDP_MOD_ID, 0, 1, 5446 SL_ERROR|SL_TRACE, 5447 "udp_bind: attempt to bind port " 5448 "%d on shared addr in zone %d " 5449 "(should be %d)", 5450 mlpport, connp->conn_zoneid, 5451 mlpzone); 5452 } 5453 error = -TACCES; 5454 mutex_exit(&connp->conn_lock); 5455 goto late_error; 5456 } 5457 } 5458 if (connp->conn_anon_port) { 5459 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5460 port, B_TRUE); 5461 if (error != 0) { 5462 if (connp->conn_debug) { 5463 (void) strlog(UDP_MOD_ID, 0, 1, 5464 SL_ERROR|SL_TRACE, 5465 "udp_bind: cannot establish anon " 5466 "MLP for port %d", port); 5467 } 5468 error = -TACCES; 5469 mutex_exit(&connp->conn_lock); 5470 goto late_error; 5471 } 5472 } 5473 connp->conn_mlp_type = mlptype; 5474 } 5475 5476 /* 5477 * We create an initial header template here to make a subsequent 5478 * sendto have a starting point. Since conn_last_dst is zero the 5479 * first sendto will always follow the 'dst changed' code path. 5480 * Note that we defer massaging options and the related checksum 5481 * adjustment until we have a destination address. 5482 */ 5483 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5484 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5485 if (error != 0) { 5486 mutex_exit(&connp->conn_lock); 5487 goto late_error; 5488 } 5489 /* Just in case */ 5490 connp->conn_faddr_v6 = ipv6_all_zeros; 5491 connp->conn_fport = 0; 5492 connp->conn_v6lastdst = ipv6_all_zeros; 5493 mutex_exit(&connp->conn_lock); 5494 5495 error = ip_laddr_fanout_insert(connp); 5496 if (error != 0) 5497 goto late_error; 5498 5499 /* Bind succeeded */ 5500 return (0); 5501 5502 late_error: 5503 /* We had already picked the port number, and then the bind failed */ 5504 mutex_enter(&connp->conn_lock); 5505 udpf = &us->us_bind_fanout[ 5506 UDP_BIND_HASH(connp->conn_lport, 5507 us->us_bind_fanout_size)]; 5508 mutex_enter(&udpf->uf_lock); 5509 connp->conn_saddr_v6 = ipv6_all_zeros; 5510 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5511 connp->conn_laddr_v6 = ipv6_all_zeros; 5512 if (scopeid != 0) { 5513 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5514 connp->conn_incoming_ifindex = connp->conn_bound_if; 5515 } 5516 udp->udp_state = TS_UNBND; 5517 udp_bind_hash_remove(udp, B_TRUE); 5518 connp->conn_lport = 0; 5519 mutex_exit(&udpf->uf_lock); 5520 connp->conn_anon_port = B_FALSE; 5521 connp->conn_mlp_type = mlptSingle; 5522 5523 connp->conn_v6lastdst = ipv6_all_zeros; 5524 5525 /* Restore the header that was built above - different source address */ 5526 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5527 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5528 mutex_exit(&connp->conn_lock); 5529 return (error); 5530 } 5531 5532 int 5533 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5534 socklen_t len, cred_t *cr) 5535 { 5536 int error; 5537 conn_t *connp; 5538 5539 /* All Solaris components should pass a cred for this operation. */ 5540 ASSERT(cr != NULL); 5541 5542 connp = (conn_t *)proto_handle; 5543 5544 if (sa == NULL) 5545 error = udp_do_unbind(connp); 5546 else 5547 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5548 5549 if (error < 0) { 5550 if (error == -TOUTSTATE) 5551 error = EINVAL; 5552 else 5553 error = proto_tlitosyserr(-error); 5554 } 5555 5556 return (error); 5557 } 5558 5559 static int 5560 udp_implicit_bind(conn_t *connp, cred_t *cr) 5561 { 5562 sin6_t sin6addr; 5563 sin_t *sin; 5564 sin6_t *sin6; 5565 socklen_t len; 5566 int error; 5567 5568 /* All Solaris components should pass a cred for this operation. */ 5569 ASSERT(cr != NULL); 5570 5571 if (connp->conn_family == AF_INET) { 5572 len = sizeof (struct sockaddr_in); 5573 sin = (sin_t *)&sin6addr; 5574 *sin = sin_null; 5575 sin->sin_family = AF_INET; 5576 sin->sin_addr.s_addr = INADDR_ANY; 5577 } else { 5578 ASSERT(connp->conn_family == AF_INET6); 5579 len = sizeof (sin6_t); 5580 sin6 = (sin6_t *)&sin6addr; 5581 *sin6 = sin6_null; 5582 sin6->sin6_family = AF_INET6; 5583 V6_SET_ZERO(sin6->sin6_addr); 5584 } 5585 5586 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5587 cr, B_FALSE); 5588 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5589 } 5590 5591 /* 5592 * This routine removes a port number association from a stream. It 5593 * is called by udp_unbind and udp_tpi_unbind. 5594 */ 5595 static int 5596 udp_do_unbind(conn_t *connp) 5597 { 5598 udp_t *udp = connp->conn_udp; 5599 udp_fanout_t *udpf; 5600 udp_stack_t *us = udp->udp_us; 5601 5602 if (cl_inet_unbind != NULL) { 5603 /* 5604 * Running in cluster mode - register unbind information 5605 */ 5606 if (connp->conn_ipversion == IPV4_VERSION) { 5607 (*cl_inet_unbind)( 5608 connp->conn_netstack->netstack_stackid, 5609 IPPROTO_UDP, AF_INET, 5610 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5611 (in_port_t)connp->conn_lport, NULL); 5612 } else { 5613 (*cl_inet_unbind)( 5614 connp->conn_netstack->netstack_stackid, 5615 IPPROTO_UDP, AF_INET6, 5616 (uint8_t *)&(connp->conn_laddr_v6), 5617 (in_port_t)connp->conn_lport, NULL); 5618 } 5619 } 5620 5621 mutex_enter(&connp->conn_lock); 5622 /* If a bind has not been done, we can't unbind. */ 5623 if (udp->udp_state == TS_UNBND) { 5624 mutex_exit(&connp->conn_lock); 5625 return (-TOUTSTATE); 5626 } 5627 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5628 us->us_bind_fanout_size)]; 5629 mutex_enter(&udpf->uf_lock); 5630 udp_bind_hash_remove(udp, B_TRUE); 5631 connp->conn_saddr_v6 = ipv6_all_zeros; 5632 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5633 connp->conn_laddr_v6 = ipv6_all_zeros; 5634 connp->conn_mcbc_bind = B_FALSE; 5635 connp->conn_lport = 0; 5636 /* In case we were also connected */ 5637 connp->conn_faddr_v6 = ipv6_all_zeros; 5638 connp->conn_fport = 0; 5639 mutex_exit(&udpf->uf_lock); 5640 5641 connp->conn_v6lastdst = ipv6_all_zeros; 5642 udp->udp_state = TS_UNBND; 5643 5644 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5645 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5646 mutex_exit(&connp->conn_lock); 5647 5648 ip_unbind(connp); 5649 5650 return (0); 5651 } 5652 5653 /* 5654 * It associates a default destination address with the stream. 5655 */ 5656 static int 5657 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5658 cred_t *cr, pid_t pid) 5659 { 5660 sin6_t *sin6; 5661 sin_t *sin; 5662 in6_addr_t v6dst; 5663 ipaddr_t v4dst; 5664 uint16_t dstport; 5665 uint32_t flowinfo; 5666 udp_fanout_t *udpf; 5667 udp_t *udp, *udp1; 5668 ushort_t ipversion; 5669 udp_stack_t *us; 5670 int error; 5671 conn_t *connp1; 5672 ip_xmit_attr_t *ixa; 5673 ip_xmit_attr_t *oldixa; 5674 uint_t scopeid = 0; 5675 uint_t srcid = 0; 5676 in6_addr_t v6src = connp->conn_saddr_v6; 5677 boolean_t v4mapped; 5678 5679 udp = connp->conn_udp; 5680 us = udp->udp_us; 5681 sin = NULL; 5682 sin6 = NULL; 5683 v4dst = INADDR_ANY; 5684 flowinfo = 0; 5685 5686 /* 5687 * Address has been verified by the caller 5688 */ 5689 switch (len) { 5690 default: 5691 /* 5692 * Should never happen 5693 */ 5694 return (EINVAL); 5695 5696 case sizeof (sin_t): 5697 sin = (sin_t *)sa; 5698 v4dst = sin->sin_addr.s_addr; 5699 dstport = sin->sin_port; 5700 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5701 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5702 ipversion = IPV4_VERSION; 5703 break; 5704 5705 case sizeof (sin6_t): 5706 sin6 = (sin6_t *)sa; 5707 v6dst = sin6->sin6_addr; 5708 dstport = sin6->sin6_port; 5709 srcid = sin6->__sin6_src_id; 5710 v4mapped = IN6_IS_ADDR_V4MAPPED(&v6dst); 5711 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5712 if (!ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5713 v4mapped, connp->conn_netstack)) { 5714 /* Mismatch v4mapped/v6 specified by srcid. */ 5715 return (EADDRNOTAVAIL); 5716 } 5717 } 5718 if (v4mapped) { 5719 if (connp->conn_ipv6_v6only) 5720 return (EADDRNOTAVAIL); 5721 5722 /* 5723 * Destination adress is mapped IPv6 address. 5724 * Source bound address should be unspecified or 5725 * IPv6 mapped address as well. 5726 */ 5727 if (!IN6_IS_ADDR_UNSPECIFIED( 5728 &connp->conn_bound_addr_v6) && 5729 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5730 return (EADDRNOTAVAIL); 5731 } 5732 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5733 ipversion = IPV4_VERSION; 5734 flowinfo = 0; 5735 } else { 5736 ipversion = IPV6_VERSION; 5737 flowinfo = sin6->sin6_flowinfo; 5738 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5739 scopeid = sin6->sin6_scope_id; 5740 } 5741 break; 5742 } 5743 5744 if (dstport == 0) 5745 return (-TBADADDR); 5746 5747 /* 5748 * If there is a different thread using conn_ixa then we get a new 5749 * copy and cut the old one loose from conn_ixa. Otherwise we use 5750 * conn_ixa and prevent any other thread from using/changing it. 5751 * Once connect() is done other threads can use conn_ixa since the 5752 * refcnt will be back at one. 5753 * We defer updating conn_ixa until later to handle any concurrent 5754 * conn_ixa_cleanup thread. 5755 */ 5756 ixa = conn_get_ixa(connp, B_FALSE); 5757 if (ixa == NULL) 5758 return (ENOMEM); 5759 5760 mutex_enter(&connp->conn_lock); 5761 /* 5762 * This udp_t must have bound to a port already before doing a connect. 5763 * Reject if a connect is in progress (we drop conn_lock during 5764 * udp_do_connect). 5765 */ 5766 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5767 mutex_exit(&connp->conn_lock); 5768 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5769 "udp_connect: bad state, %u", udp->udp_state); 5770 ixa_refrele(ixa); 5771 return (-TOUTSTATE); 5772 } 5773 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 5774 5775 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5776 us->us_bind_fanout_size)]; 5777 5778 mutex_enter(&udpf->uf_lock); 5779 if (udp->udp_state == TS_DATA_XFER) { 5780 /* Already connected - clear out state */ 5781 if (connp->conn_mcbc_bind) 5782 connp->conn_saddr_v6 = ipv6_all_zeros; 5783 else 5784 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5785 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5786 connp->conn_faddr_v6 = ipv6_all_zeros; 5787 connp->conn_fport = 0; 5788 udp->udp_state = TS_IDLE; 5789 } 5790 5791 connp->conn_fport = dstport; 5792 connp->conn_ipversion = ipversion; 5793 if (ipversion == IPV4_VERSION) { 5794 /* 5795 * Interpret a zero destination to mean loopback. 5796 * Update the T_CONN_REQ (sin/sin6) since it is used to 5797 * generate the T_CONN_CON. 5798 */ 5799 if (v4dst == INADDR_ANY) { 5800 v4dst = htonl(INADDR_LOOPBACK); 5801 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5802 if (connp->conn_family == AF_INET) { 5803 sin->sin_addr.s_addr = v4dst; 5804 } else { 5805 sin6->sin6_addr = v6dst; 5806 } 5807 } 5808 connp->conn_faddr_v6 = v6dst; 5809 connp->conn_flowinfo = 0; 5810 } else { 5811 ASSERT(connp->conn_ipversion == IPV6_VERSION); 5812 /* 5813 * Interpret a zero destination to mean loopback. 5814 * Update the T_CONN_REQ (sin/sin6) since it is used to 5815 * generate the T_CONN_CON. 5816 */ 5817 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 5818 v6dst = ipv6_loopback; 5819 sin6->sin6_addr = v6dst; 5820 } 5821 connp->conn_faddr_v6 = v6dst; 5822 connp->conn_flowinfo = flowinfo; 5823 } 5824 mutex_exit(&udpf->uf_lock); 5825 5826 /* 5827 * We update our cred/cpid based on the caller of connect 5828 */ 5829 if (connp->conn_cred != cr) { 5830 crhold(cr); 5831 crfree(connp->conn_cred); 5832 connp->conn_cred = cr; 5833 } 5834 connp->conn_cpid = pid; 5835 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 5836 ixa->ixa_cred = cr; 5837 ixa->ixa_cpid = pid; 5838 if (is_system_labeled()) { 5839 /* We need to restart with a label based on the cred */ 5840 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 5841 } 5842 5843 if (scopeid != 0) { 5844 ixa->ixa_flags |= IXAF_SCOPEID_SET; 5845 ixa->ixa_scopeid = scopeid; 5846 connp->conn_incoming_ifindex = scopeid; 5847 } else { 5848 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5849 connp->conn_incoming_ifindex = connp->conn_bound_if; 5850 } 5851 /* 5852 * conn_connect will drop conn_lock and reacquire it. 5853 * To prevent a send* from messing with this udp_t while the lock 5854 * is dropped we set udp_state and clear conn_v6lastdst. 5855 * That will make all send* fail with EISCONN. 5856 */ 5857 connp->conn_v6lastdst = ipv6_all_zeros; 5858 udp->udp_state = TS_WCON_CREQ; 5859 5860 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 5861 mutex_exit(&connp->conn_lock); 5862 if (error != 0) 5863 goto connect_failed; 5864 5865 /* 5866 * The addresses have been verified. Time to insert in 5867 * the correct fanout list. 5868 */ 5869 error = ipcl_conn_insert(connp); 5870 if (error != 0) 5871 goto connect_failed; 5872 5873 mutex_enter(&connp->conn_lock); 5874 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5875 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5876 if (error != 0) { 5877 mutex_exit(&connp->conn_lock); 5878 goto connect_failed; 5879 } 5880 5881 udp->udp_state = TS_DATA_XFER; 5882 /* Record this as the "last" send even though we haven't sent any */ 5883 connp->conn_v6lastdst = connp->conn_faddr_v6; 5884 connp->conn_lastipversion = connp->conn_ipversion; 5885 connp->conn_lastdstport = connp->conn_fport; 5886 connp->conn_lastflowinfo = connp->conn_flowinfo; 5887 connp->conn_lastscopeid = scopeid; 5888 connp->conn_lastsrcid = srcid; 5889 /* Also remember a source to use together with lastdst */ 5890 connp->conn_v6lastsrc = v6src; 5891 5892 oldixa = conn_replace_ixa(connp, ixa); 5893 mutex_exit(&connp->conn_lock); 5894 ixa_refrele(oldixa); 5895 5896 /* 5897 * We've picked a source address above. Now we can 5898 * verify that the src/port/dst/port is unique for all 5899 * connections in TS_DATA_XFER, skipping ourselves. 5900 */ 5901 mutex_enter(&udpf->uf_lock); 5902 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 5903 if (udp1->udp_state != TS_DATA_XFER) 5904 continue; 5905 5906 if (udp1 == udp) 5907 continue; 5908 5909 connp1 = udp1->udp_connp; 5910 if (connp->conn_lport != connp1->conn_lport || 5911 connp->conn_ipversion != connp1->conn_ipversion || 5912 dstport != connp1->conn_fport || 5913 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 5914 &connp1->conn_laddr_v6) || 5915 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 5916 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 5917 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 5918 continue; 5919 mutex_exit(&udpf->uf_lock); 5920 error = -TBADADDR; 5921 goto connect_failed; 5922 } 5923 if (cl_inet_connect2 != NULL) { 5924 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 5925 if (error != 0) { 5926 mutex_exit(&udpf->uf_lock); 5927 error = -TBADADDR; 5928 goto connect_failed; 5929 } 5930 } 5931 mutex_exit(&udpf->uf_lock); 5932 5933 ixa_refrele(ixa); 5934 return (0); 5935 5936 connect_failed: 5937 if (ixa != NULL) 5938 ixa_refrele(ixa); 5939 mutex_enter(&connp->conn_lock); 5940 mutex_enter(&udpf->uf_lock); 5941 udp->udp_state = TS_IDLE; 5942 connp->conn_faddr_v6 = ipv6_all_zeros; 5943 connp->conn_fport = 0; 5944 /* In case the source address was set above */ 5945 if (connp->conn_mcbc_bind) 5946 connp->conn_saddr_v6 = ipv6_all_zeros; 5947 else 5948 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5949 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5950 mutex_exit(&udpf->uf_lock); 5951 5952 connp->conn_v6lastdst = ipv6_all_zeros; 5953 connp->conn_flowinfo = 0; 5954 5955 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5956 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5957 mutex_exit(&connp->conn_lock); 5958 return (error); 5959 } 5960 5961 static int 5962 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5963 socklen_t len, sock_connid_t *id, cred_t *cr) 5964 { 5965 conn_t *connp = (conn_t *)proto_handle; 5966 udp_t *udp = connp->conn_udp; 5967 int error; 5968 boolean_t did_bind = B_FALSE; 5969 pid_t pid = curproc->p_pid; 5970 5971 /* All Solaris components should pass a cred for this operation. */ 5972 ASSERT(cr != NULL); 5973 5974 if (sa == NULL) { 5975 /* 5976 * Disconnect 5977 * Make sure we are connected 5978 */ 5979 if (udp->udp_state != TS_DATA_XFER) 5980 return (EINVAL); 5981 5982 error = udp_disconnect(connp); 5983 return (error); 5984 } 5985 5986 error = proto_verify_ip_addr(connp->conn_family, sa, len); 5987 if (error != 0) 5988 goto done; 5989 5990 /* do an implicit bind if necessary */ 5991 if (udp->udp_state == TS_UNBND) { 5992 error = udp_implicit_bind(connp, cr); 5993 /* 5994 * We could be racing with an actual bind, in which case 5995 * we would see EPROTO. We cross our fingers and try 5996 * to connect. 5997 */ 5998 if (!(error == 0 || error == EPROTO)) 5999 goto done; 6000 did_bind = B_TRUE; 6001 } 6002 /* 6003 * set SO_DGRAM_ERRIND 6004 */ 6005 connp->conn_dgram_errind = B_TRUE; 6006 6007 error = udp_do_connect(connp, sa, len, cr, pid); 6008 6009 if (error != 0 && did_bind) { 6010 int unbind_err; 6011 6012 unbind_err = udp_do_unbind(connp); 6013 ASSERT(unbind_err == 0); 6014 } 6015 6016 if (error == 0) { 6017 *id = 0; 6018 (*connp->conn_upcalls->su_connected) 6019 (connp->conn_upper_handle, 0, NULL, -1); 6020 } else if (error < 0) { 6021 error = proto_tlitosyserr(-error); 6022 } 6023 6024 done: 6025 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6026 /* 6027 * No need to hold locks to set state 6028 * after connect failure socket state is undefined 6029 * We set the state only to imitate old sockfs behavior 6030 */ 6031 udp->udp_state = TS_IDLE; 6032 } 6033 return (error); 6034 } 6035 6036 int 6037 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6038 cred_t *cr) 6039 { 6040 sin6_t *sin6; 6041 sin_t *sin = NULL; 6042 uint_t srcid; 6043 conn_t *connp = (conn_t *)proto_handle; 6044 udp_t *udp = connp->conn_udp; 6045 int error = 0; 6046 udp_stack_t *us = udp->udp_us; 6047 ushort_t ipversion; 6048 pid_t pid = curproc->p_pid; 6049 ip_xmit_attr_t *ixa; 6050 6051 ASSERT(DB_TYPE(mp) == M_DATA); 6052 6053 /* All Solaris components should pass a cred for this operation. */ 6054 ASSERT(cr != NULL); 6055 6056 /* do an implicit bind if necessary */ 6057 if (udp->udp_state == TS_UNBND) { 6058 error = udp_implicit_bind(connp, cr); 6059 /* 6060 * We could be racing with an actual bind, in which case 6061 * we would see EPROTO. We cross our fingers and try 6062 * to connect. 6063 */ 6064 if (!(error == 0 || error == EPROTO)) { 6065 freemsg(mp); 6066 return (error); 6067 } 6068 } 6069 6070 /* Connected? */ 6071 if (msg->msg_name == NULL) { 6072 if (udp->udp_state != TS_DATA_XFER) { 6073 UDPS_BUMP_MIB(us, udpOutErrors); 6074 return (EDESTADDRREQ); 6075 } 6076 if (msg->msg_controllen != 0) { 6077 error = udp_output_ancillary(connp, NULL, NULL, mp, 6078 NULL, msg, cr, pid); 6079 } else { 6080 error = udp_output_connected(connp, mp, cr, pid); 6081 } 6082 if (us->us_sendto_ignerr) 6083 return (0); 6084 else 6085 return (error); 6086 } 6087 if (udp->udp_state == TS_DATA_XFER) { 6088 UDPS_BUMP_MIB(us, udpOutErrors); 6089 return (EISCONN); 6090 } 6091 error = proto_verify_ip_addr(connp->conn_family, 6092 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6093 if (error != 0) { 6094 UDPS_BUMP_MIB(us, udpOutErrors); 6095 return (error); 6096 } 6097 switch (connp->conn_family) { 6098 case AF_INET6: 6099 sin6 = (sin6_t *)msg->msg_name; 6100 6101 srcid = sin6->__sin6_src_id; 6102 6103 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6104 /* 6105 * Destination is a non-IPv4-compatible IPv6 address. 6106 * Send out an IPv6 format packet. 6107 */ 6108 6109 /* 6110 * If the local address is a mapped address return 6111 * an error. 6112 * It would be possible to send an IPv6 packet but the 6113 * response would never make it back to the application 6114 * since it is bound to a mapped address. 6115 */ 6116 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6117 UDPS_BUMP_MIB(us, udpOutErrors); 6118 return (EADDRNOTAVAIL); 6119 } 6120 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6121 sin6->sin6_addr = ipv6_loopback; 6122 ipversion = IPV6_VERSION; 6123 } else { 6124 if (connp->conn_ipv6_v6only) { 6125 UDPS_BUMP_MIB(us, udpOutErrors); 6126 return (EADDRNOTAVAIL); 6127 } 6128 6129 /* 6130 * If the local address is not zero or a mapped address 6131 * return an error. It would be possible to send an 6132 * IPv4 packet but the response would never make it 6133 * back to the application since it is bound to a 6134 * non-mapped address. 6135 */ 6136 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6137 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6138 UDPS_BUMP_MIB(us, udpOutErrors); 6139 return (EADDRNOTAVAIL); 6140 } 6141 6142 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6143 V4_PART_OF_V6(sin6->sin6_addr) = 6144 htonl(INADDR_LOOPBACK); 6145 } 6146 ipversion = IPV4_VERSION; 6147 } 6148 6149 /* 6150 * We have to allocate an ip_xmit_attr_t before we grab 6151 * conn_lock and we need to hold conn_lock once we've check 6152 * conn_same_as_last_v6 to handle concurrent send* calls on a 6153 * socket. 6154 */ 6155 if (msg->msg_controllen == 0) { 6156 ixa = conn_get_ixa(connp, B_FALSE); 6157 if (ixa == NULL) { 6158 UDPS_BUMP_MIB(us, udpOutErrors); 6159 return (ENOMEM); 6160 } 6161 } else { 6162 ixa = NULL; 6163 } 6164 mutex_enter(&connp->conn_lock); 6165 if (udp->udp_delayed_error != 0) { 6166 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6167 6168 error = udp->udp_delayed_error; 6169 udp->udp_delayed_error = 0; 6170 6171 /* Compare IP address, port, and family */ 6172 6173 if (sin6->sin6_port == sin2->sin6_port && 6174 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6175 &sin2->sin6_addr) && 6176 sin6->sin6_family == sin2->sin6_family) { 6177 mutex_exit(&connp->conn_lock); 6178 UDPS_BUMP_MIB(us, udpOutErrors); 6179 if (ixa != NULL) 6180 ixa_refrele(ixa); 6181 return (error); 6182 } 6183 } 6184 6185 if (msg->msg_controllen != 0) { 6186 mutex_exit(&connp->conn_lock); 6187 ASSERT(ixa == NULL); 6188 error = udp_output_ancillary(connp, NULL, sin6, mp, 6189 NULL, msg, cr, pid); 6190 } else if (conn_same_as_last_v6(connp, sin6) && 6191 connp->conn_lastsrcid == srcid && 6192 ipsec_outbound_policy_current(ixa)) { 6193 /* udp_output_lastdst drops conn_lock */ 6194 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6195 } else { 6196 /* udp_output_newdst drops conn_lock */ 6197 error = udp_output_newdst(connp, mp, NULL, sin6, 6198 ipversion, cr, pid, ixa); 6199 } 6200 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6201 if (us->us_sendto_ignerr) 6202 return (0); 6203 else 6204 return (error); 6205 case AF_INET: 6206 sin = (sin_t *)msg->msg_name; 6207 6208 ipversion = IPV4_VERSION; 6209 6210 if (sin->sin_addr.s_addr == INADDR_ANY) 6211 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6212 6213 /* 6214 * We have to allocate an ip_xmit_attr_t before we grab 6215 * conn_lock and we need to hold conn_lock once we've check 6216 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6217 */ 6218 if (msg->msg_controllen == 0) { 6219 ixa = conn_get_ixa(connp, B_FALSE); 6220 if (ixa == NULL) { 6221 UDPS_BUMP_MIB(us, udpOutErrors); 6222 return (ENOMEM); 6223 } 6224 } else { 6225 ixa = NULL; 6226 } 6227 mutex_enter(&connp->conn_lock); 6228 if (udp->udp_delayed_error != 0) { 6229 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6230 6231 error = udp->udp_delayed_error; 6232 udp->udp_delayed_error = 0; 6233 6234 /* Compare IP address and port */ 6235 6236 if (sin->sin_port == sin2->sin_port && 6237 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6238 mutex_exit(&connp->conn_lock); 6239 UDPS_BUMP_MIB(us, udpOutErrors); 6240 if (ixa != NULL) 6241 ixa_refrele(ixa); 6242 return (error); 6243 } 6244 } 6245 if (msg->msg_controllen != 0) { 6246 mutex_exit(&connp->conn_lock); 6247 ASSERT(ixa == NULL); 6248 error = udp_output_ancillary(connp, sin, NULL, mp, 6249 NULL, msg, cr, pid); 6250 } else if (conn_same_as_last_v4(connp, sin) && 6251 ipsec_outbound_policy_current(ixa)) { 6252 /* udp_output_lastdst drops conn_lock */ 6253 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6254 } else { 6255 /* udp_output_newdst drops conn_lock */ 6256 error = udp_output_newdst(connp, mp, sin, NULL, 6257 ipversion, cr, pid, ixa); 6258 } 6259 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6260 if (us->us_sendto_ignerr) 6261 return (0); 6262 else 6263 return (error); 6264 default: 6265 return (EINVAL); 6266 } 6267 } 6268 6269 int 6270 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6271 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6272 sock_quiesce_arg_t *arg) 6273 { 6274 conn_t *connp = (conn_t *)proto_handle; 6275 udp_t *udp; 6276 struct T_capability_ack tca; 6277 struct sockaddr_in6 laddr, faddr; 6278 socklen_t laddrlen, faddrlen; 6279 short opts; 6280 struct stroptions *stropt; 6281 mblk_t *mp, *stropt_mp; 6282 int error; 6283 6284 udp = connp->conn_udp; 6285 6286 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6287 6288 /* 6289 * setup the fallback stream that was allocated 6290 */ 6291 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6292 connp->conn_minor_arena = WR(q)->q_ptr; 6293 6294 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6295 6296 WR(q)->q_qinfo = &udp_winit; 6297 6298 connp->conn_rq = RD(q); 6299 connp->conn_wq = WR(q); 6300 6301 /* Notify stream head about options before sending up data */ 6302 stropt_mp->b_datap->db_type = M_SETOPTS; 6303 stropt_mp->b_wptr += sizeof (*stropt); 6304 stropt = (struct stroptions *)stropt_mp->b_rptr; 6305 stropt->so_flags = SO_WROFF | SO_HIWAT; 6306 stropt->so_wroff = connp->conn_wroff; 6307 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6308 putnext(RD(q), stropt_mp); 6309 6310 /* 6311 * Free the helper stream 6312 */ 6313 ip_free_helper_stream(connp); 6314 6315 if (!issocket) 6316 udp_use_pure_tpi(udp); 6317 6318 /* 6319 * Collect the information needed to sync with the sonode 6320 */ 6321 udp_do_capability_ack(udp, &tca, TC1_INFO); 6322 6323 laddrlen = faddrlen = sizeof (sin6_t); 6324 (void) udp_getsockname((sock_lower_handle_t)connp, 6325 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6326 error = udp_getpeername((sock_lower_handle_t)connp, 6327 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6328 if (error != 0) 6329 faddrlen = 0; 6330 6331 opts = 0; 6332 if (connp->conn_dgram_errind) 6333 opts |= SO_DGRAM_ERRIND; 6334 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6335 opts |= SO_DONTROUTE; 6336 6337 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6338 (struct sockaddr *)&laddr, laddrlen, 6339 (struct sockaddr *)&faddr, faddrlen, opts); 6340 6341 mutex_enter(&udp->udp_recv_lock); 6342 /* 6343 * Attempts to send data up during fallback will result in it being 6344 * queued in udp_t. First push up the datagrams obtained from the 6345 * socket, then any packets queued in udp_t. 6346 */ 6347 if (mp != NULL) { 6348 mp->b_next = udp->udp_fallback_queue_head; 6349 udp->udp_fallback_queue_head = mp; 6350 } 6351 while (udp->udp_fallback_queue_head != NULL) { 6352 mp = udp->udp_fallback_queue_head; 6353 udp->udp_fallback_queue_head = mp->b_next; 6354 mutex_exit(&udp->udp_recv_lock); 6355 mp->b_next = NULL; 6356 putnext(RD(q), mp); 6357 mutex_enter(&udp->udp_recv_lock); 6358 } 6359 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6360 /* 6361 * No longer a streams less socket 6362 */ 6363 mutex_enter(&connp->conn_lock); 6364 connp->conn_flags &= ~IPCL_NONSTR; 6365 mutex_exit(&connp->conn_lock); 6366 6367 mutex_exit(&udp->udp_recv_lock); 6368 6369 ASSERT(connp->conn_ref >= 1); 6370 6371 return (0); 6372 } 6373 6374 /* ARGSUSED3 */ 6375 int 6376 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6377 socklen_t *salenp, cred_t *cr) 6378 { 6379 conn_t *connp = (conn_t *)proto_handle; 6380 udp_t *udp = connp->conn_udp; 6381 int error; 6382 6383 /* All Solaris components should pass a cred for this operation. */ 6384 ASSERT(cr != NULL); 6385 6386 mutex_enter(&connp->conn_lock); 6387 if (udp->udp_state != TS_DATA_XFER) 6388 error = ENOTCONN; 6389 else 6390 error = conn_getpeername(connp, sa, salenp); 6391 mutex_exit(&connp->conn_lock); 6392 return (error); 6393 } 6394 6395 /* ARGSUSED3 */ 6396 int 6397 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6398 socklen_t *salenp, cred_t *cr) 6399 { 6400 conn_t *connp = (conn_t *)proto_handle; 6401 int error; 6402 6403 /* All Solaris components should pass a cred for this operation. */ 6404 ASSERT(cr != NULL); 6405 6406 mutex_enter(&connp->conn_lock); 6407 error = conn_getsockname(connp, sa, salenp); 6408 mutex_exit(&connp->conn_lock); 6409 return (error); 6410 } 6411 6412 int 6413 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6414 void *optvalp, socklen_t *optlen, cred_t *cr) 6415 { 6416 conn_t *connp = (conn_t *)proto_handle; 6417 int error; 6418 t_uscalar_t max_optbuf_len; 6419 void *optvalp_buf; 6420 int len; 6421 6422 /* All Solaris components should pass a cred for this operation. */ 6423 ASSERT(cr != NULL); 6424 6425 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6426 udp_opt_obj.odb_opt_des_arr, 6427 udp_opt_obj.odb_opt_arr_cnt, 6428 B_FALSE, B_TRUE, cr); 6429 if (error != 0) { 6430 if (error < 0) 6431 error = proto_tlitosyserr(-error); 6432 return (error); 6433 } 6434 6435 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6436 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6437 if (len == -1) { 6438 kmem_free(optvalp_buf, max_optbuf_len); 6439 return (EINVAL); 6440 } 6441 6442 /* 6443 * update optlen and copy option value 6444 */ 6445 t_uscalar_t size = MIN(len, *optlen); 6446 6447 bcopy(optvalp_buf, optvalp, size); 6448 bcopy(&size, optlen, sizeof (size)); 6449 6450 kmem_free(optvalp_buf, max_optbuf_len); 6451 return (0); 6452 } 6453 6454 int 6455 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6456 const void *optvalp, socklen_t optlen, cred_t *cr) 6457 { 6458 conn_t *connp = (conn_t *)proto_handle; 6459 int error; 6460 6461 /* All Solaris components should pass a cred for this operation. */ 6462 ASSERT(cr != NULL); 6463 6464 error = proto_opt_check(level, option_name, optlen, NULL, 6465 udp_opt_obj.odb_opt_des_arr, 6466 udp_opt_obj.odb_opt_arr_cnt, 6467 B_TRUE, B_FALSE, cr); 6468 6469 if (error != 0) { 6470 if (error < 0) 6471 error = proto_tlitosyserr(-error); 6472 return (error); 6473 } 6474 6475 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6476 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6477 NULL, cr); 6478 6479 ASSERT(error >= 0); 6480 6481 return (error); 6482 } 6483 6484 void 6485 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6486 { 6487 conn_t *connp = (conn_t *)proto_handle; 6488 udp_t *udp = connp->conn_udp; 6489 6490 mutex_enter(&udp->udp_recv_lock); 6491 connp->conn_flow_cntrld = B_FALSE; 6492 mutex_exit(&udp->udp_recv_lock); 6493 } 6494 6495 /* ARGSUSED2 */ 6496 int 6497 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6498 { 6499 conn_t *connp = (conn_t *)proto_handle; 6500 6501 /* All Solaris components should pass a cred for this operation. */ 6502 ASSERT(cr != NULL); 6503 6504 /* shut down the send side */ 6505 if (how != SHUT_RD) 6506 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6507 SOCK_OPCTL_SHUT_SEND, 0); 6508 /* shut down the recv side */ 6509 if (how != SHUT_WR) 6510 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6511 SOCK_OPCTL_SHUT_RECV, 0); 6512 return (0); 6513 } 6514 6515 int 6516 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6517 int mode, int32_t *rvalp, cred_t *cr) 6518 { 6519 conn_t *connp = (conn_t *)proto_handle; 6520 int error; 6521 6522 /* All Solaris components should pass a cred for this operation. */ 6523 ASSERT(cr != NULL); 6524 6525 /* 6526 * If we don't have a helper stream then create one. 6527 * ip_create_helper_stream takes care of locking the conn_t, 6528 * so this check for NULL is just a performance optimization. 6529 */ 6530 if (connp->conn_helper_info == NULL) { 6531 udp_stack_t *us = connp->conn_udp->udp_us; 6532 6533 ASSERT(us->us_ldi_ident != NULL); 6534 6535 /* 6536 * Create a helper stream for non-STREAMS socket. 6537 */ 6538 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6539 if (error != 0) { 6540 ip0dbg(("udp_ioctl: create of IP helper stream " 6541 "failed %d\n", error)); 6542 return (error); 6543 } 6544 } 6545 6546 switch (cmd) { 6547 case _SIOCSOCKFALLBACK: 6548 case TI_GETPEERNAME: 6549 case TI_GETMYNAME: 6550 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6551 cmd)); 6552 error = EINVAL; 6553 break; 6554 default: 6555 /* 6556 * Pass on to IP using helper stream 6557 */ 6558 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6559 cmd, arg, mode, cr, rvalp); 6560 break; 6561 } 6562 return (error); 6563 } 6564 6565 /* ARGSUSED */ 6566 int 6567 udp_accept(sock_lower_handle_t lproto_handle, 6568 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6569 cred_t *cr) 6570 { 6571 return (EOPNOTSUPP); 6572 } 6573 6574 /* ARGSUSED */ 6575 int 6576 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6577 { 6578 return (EOPNOTSUPP); 6579 } 6580 6581 sock_downcalls_t sock_udp_downcalls = { 6582 udp_activate, /* sd_activate */ 6583 udp_accept, /* sd_accept */ 6584 udp_bind, /* sd_bind */ 6585 udp_listen, /* sd_listen */ 6586 udp_connect, /* sd_connect */ 6587 udp_getpeername, /* sd_getpeername */ 6588 udp_getsockname, /* sd_getsockname */ 6589 udp_getsockopt, /* sd_getsockopt */ 6590 udp_setsockopt, /* sd_setsockopt */ 6591 udp_send, /* sd_send */ 6592 NULL, /* sd_send_uio */ 6593 NULL, /* sd_recv_uio */ 6594 NULL, /* sd_poll */ 6595 udp_shutdown, /* sd_shutdown */ 6596 udp_clr_flowctrl, /* sd_setflowctrl */ 6597 udp_ioctl, /* sd_ioctl */ 6598 udp_close /* sd_close */ 6599 }; 6600