1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013, Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/optcom.h> 72 #include <inet/snmpcom.h> 73 #include <inet/kstatcom.h> 74 #include <inet/ipclassifier.h> 75 #include <sys/squeue_impl.h> 76 #include <inet/ipnet.h> 77 #include <sys/ethernet.h> 78 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <rpc/pmap_prot.h> 82 83 #include <inet/udp_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 89 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 90 * protects the contents of the udp_t. uf_lock protects the address and the 91 * fanout information. 92 * The lock order is conn_lock -> uf_lock. 93 * 94 * The fanout lock uf_lock: 95 * When a UDP endpoint is bound to a local port, it is inserted into 96 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 97 * The size of the array is controlled by the udp_bind_fanout_size variable. 98 * This variable can be changed in /etc/system if the default value is 99 * not large enough. Each bind hash bucket is protected by a per bucket 100 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 101 * structure and a few other fields in the udp_t. A UDP endpoint is removed 102 * from the bind hash list only when it is being unbound or being closed. 103 * The per bucket lock also protects a UDP endpoint's state changes. 104 * 105 * Plumbing notes: 106 * UDP is always a device driver. For compatibility with mibopen() code 107 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 108 * dummy module. 109 * 110 * The above implies that we don't support any intermediate module to 111 * reside in between /dev/ip and udp -- in fact, we never supported such 112 * scenario in the past as the inter-layer communication semantics have 113 * always been private. 114 */ 115 116 /* For /etc/system control */ 117 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 118 119 static void udp_addr_req(queue_t *q, mblk_t *mp); 120 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 121 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 122 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 123 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 124 const in6_addr_t *, in_port_t, uint32_t); 125 static void udp_capability_req(queue_t *q, mblk_t *mp); 126 static int udp_tpi_close(queue_t *q, int flags); 127 static void udp_close_free(conn_t *); 128 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 129 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 130 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 131 int sys_error); 132 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 133 t_scalar_t tlierr, int sys_error); 134 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 135 cred_t *cr); 136 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 137 char *value, caddr_t cp, cred_t *cr); 138 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 139 char *value, caddr_t cp, cred_t *cr); 140 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 141 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 142 ip_recv_attr_t *ira); 143 static void udp_info_req(queue_t *q, mblk_t *mp); 144 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 145 static void udp_lrput(queue_t *, mblk_t *); 146 static void udp_lwput(queue_t *, mblk_t *); 147 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 148 cred_t *credp, boolean_t isv6); 149 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 150 cred_t *credp); 151 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 152 cred_t *credp); 153 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 154 int udp_opt_set(conn_t *connp, uint_t optset_context, 155 int level, int name, uint_t inlen, 156 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 157 void *thisdg_attrs, cred_t *cr); 158 int udp_opt_get(conn_t *connp, int level, int name, 159 uchar_t *ptr); 160 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 161 pid_t pid); 162 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 163 pid_t pid, ip_xmit_attr_t *ixa); 164 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 165 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 166 ip_xmit_attr_t *ixa); 167 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 168 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 169 int *); 170 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 171 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 172 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 173 static void udp_ud_err_connected(conn_t *, t_scalar_t); 174 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 175 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 176 boolean_t random); 177 static void udp_wput_other(queue_t *q, mblk_t *mp); 178 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 179 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 180 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 181 182 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 183 static void udp_stack_fini(netstackid_t stackid, void *arg); 184 185 /* Common routines for TPI and socket module */ 186 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 187 188 /* Common routine for TPI and socket module */ 189 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 190 static void udp_do_close(conn_t *); 191 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 192 boolean_t); 193 static int udp_do_unbind(conn_t *); 194 195 int udp_getsockname(sock_lower_handle_t, 196 struct sockaddr *, socklen_t *, cred_t *); 197 int udp_getpeername(sock_lower_handle_t, 198 struct sockaddr *, socklen_t *, cred_t *); 199 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 200 cred_t *, pid_t); 201 202 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 203 204 /* 205 * Checks if the given destination addr/port is allowed out. 206 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 207 * Called for each connect() and for sendto()/sendmsg() to a different 208 * destination. 209 * For connect(), called in udp_connect(). 210 * For sendto()/sendmsg(), called in udp_output_newdst(). 211 * 212 * This macro assumes that the cl_inet_connect2 hook is not NULL. 213 * Please check this before calling this macro. 214 * 215 * void 216 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 217 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 218 */ 219 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 220 (err) = 0; \ 221 /* \ 222 * Running in cluster mode - check and register active \ 223 * "connection" information \ 224 */ \ 225 if ((cp)->conn_ipversion == IPV4_VERSION) \ 226 (err) = (*cl_inet_connect2)( \ 227 (cp)->conn_netstack->netstack_stackid, \ 228 IPPROTO_UDP, is_outgoing, AF_INET, \ 229 (uint8_t *)&((cp)->conn_laddr_v4), \ 230 (cp)->conn_lport, \ 231 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 232 (in_port_t)(fport), NULL); \ 233 else \ 234 (err) = (*cl_inet_connect2)( \ 235 (cp)->conn_netstack->netstack_stackid, \ 236 IPPROTO_UDP, is_outgoing, AF_INET6, \ 237 (uint8_t *)&((cp)->conn_laddr_v6), \ 238 (cp)->conn_lport, \ 239 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 240 } 241 242 static struct module_info udp_mod_info = { 243 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 244 }; 245 246 /* 247 * Entry points for UDP as a device. 248 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 249 */ 250 static struct qinit udp_rinitv4 = { 251 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 252 }; 253 254 static struct qinit udp_rinitv6 = { 255 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 256 }; 257 258 static struct qinit udp_winit = { 259 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 260 }; 261 262 /* UDP entry point during fallback */ 263 struct qinit udp_fallback_sock_winit = { 264 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 265 }; 266 267 /* 268 * UDP needs to handle I_LINK and I_PLINK since ifconfig 269 * likes to use it as a place to hang the various streams. 270 */ 271 static struct qinit udp_lrinit = { 272 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 273 }; 274 275 static struct qinit udp_lwinit = { 276 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 277 }; 278 279 /* For AF_INET aka /dev/udp */ 280 struct streamtab udpinfov4 = { 281 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 282 }; 283 284 /* For AF_INET6 aka /dev/udp6 */ 285 struct streamtab udpinfov6 = { 286 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 287 }; 288 289 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 290 291 /* Default structure copied into T_INFO_ACK messages */ 292 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 293 T_INFO_ACK, 294 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 295 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 296 T_INVALID, /* CDATA_size. udp does not support connect data. */ 297 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 298 sizeof (sin_t), /* ADDR_size. */ 299 0, /* OPT_size - not initialized here */ 300 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 301 T_CLTS, /* SERV_type. udp supports connection-less. */ 302 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 303 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 304 }; 305 306 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 307 308 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 309 T_INFO_ACK, 310 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 311 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 312 T_INVALID, /* CDATA_size. udp does not support connect data. */ 313 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 314 sizeof (sin6_t), /* ADDR_size. */ 315 0, /* OPT_size - not initialized here */ 316 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 317 T_CLTS, /* SERV_type. udp supports connection-less. */ 318 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 319 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 320 }; 321 322 /* 323 * UDP tunables related declarations. Definitions are in udp_tunables.c 324 */ 325 extern mod_prop_info_t udp_propinfo_tbl[]; 326 extern int udp_propinfo_count; 327 328 /* Setable in /etc/system */ 329 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 330 uint32_t udp_random_anon_port = 1; 331 332 /* 333 * Hook functions to enable cluster networking. 334 * On non-clustered systems these vectors must always be NULL 335 */ 336 337 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 338 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 339 void *args) = NULL; 340 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 341 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 342 void *args) = NULL; 343 344 typedef union T_primitives *t_primp_t; 345 346 /* 347 * Return the next anonymous port in the privileged port range for 348 * bind checking. 349 * 350 * Trusted Extension (TX) notes: TX allows administrator to mark or 351 * reserve ports as Multilevel ports (MLP). MLP has special function 352 * on TX systems. Once a port is made MLP, it's not available as 353 * ordinary port. This creates "holes" in the port name space. It 354 * may be necessary to skip the "holes" find a suitable anon port. 355 */ 356 static in_port_t 357 udp_get_next_priv_port(udp_t *udp) 358 { 359 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 360 in_port_t nextport; 361 boolean_t restart = B_FALSE; 362 udp_stack_t *us = udp->udp_us; 363 364 retry: 365 if (next_priv_port < us->us_min_anonpriv_port || 366 next_priv_port >= IPPORT_RESERVED) { 367 next_priv_port = IPPORT_RESERVED - 1; 368 if (restart) 369 return (0); 370 restart = B_TRUE; 371 } 372 373 if (is_system_labeled() && 374 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 375 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 376 next_priv_port = nextport; 377 goto retry; 378 } 379 380 return (next_priv_port--); 381 } 382 383 /* 384 * Hash list removal routine for udp_t structures. 385 */ 386 static void 387 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 388 { 389 udp_t *udpnext; 390 kmutex_t *lockp; 391 udp_stack_t *us = udp->udp_us; 392 conn_t *connp = udp->udp_connp; 393 394 if (udp->udp_ptpbhn == NULL) 395 return; 396 397 /* 398 * Extract the lock pointer in case there are concurrent 399 * hash_remove's for this instance. 400 */ 401 ASSERT(connp->conn_lport != 0); 402 if (!caller_holds_lock) { 403 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 404 us->us_bind_fanout_size)].uf_lock; 405 ASSERT(lockp != NULL); 406 mutex_enter(lockp); 407 } 408 if (udp->udp_ptpbhn != NULL) { 409 udpnext = udp->udp_bind_hash; 410 if (udpnext != NULL) { 411 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 412 udp->udp_bind_hash = NULL; 413 } 414 *udp->udp_ptpbhn = udpnext; 415 udp->udp_ptpbhn = NULL; 416 } 417 if (!caller_holds_lock) { 418 mutex_exit(lockp); 419 } 420 } 421 422 static void 423 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 424 { 425 conn_t *connp = udp->udp_connp; 426 udp_t **udpp; 427 udp_t *udpnext; 428 conn_t *connext; 429 430 ASSERT(MUTEX_HELD(&uf->uf_lock)); 431 ASSERT(udp->udp_ptpbhn == NULL); 432 udpp = &uf->uf_udp; 433 udpnext = udpp[0]; 434 if (udpnext != NULL) { 435 /* 436 * If the new udp bound to the INADDR_ANY address 437 * and the first one in the list is not bound to 438 * INADDR_ANY we skip all entries until we find the 439 * first one bound to INADDR_ANY. 440 * This makes sure that applications binding to a 441 * specific address get preference over those binding to 442 * INADDR_ANY. 443 */ 444 connext = udpnext->udp_connp; 445 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 446 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 447 while ((udpnext = udpp[0]) != NULL && 448 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 449 udpp = &(udpnext->udp_bind_hash); 450 } 451 if (udpnext != NULL) 452 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 453 } else { 454 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 455 } 456 } 457 udp->udp_bind_hash = udpnext; 458 udp->udp_ptpbhn = udpp; 459 udpp[0] = udp; 460 } 461 462 /* 463 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 464 * passed to udp_wput. 465 * It associates a port number and local address with the stream. 466 * It calls IP to verify the local IP address, and calls IP to insert 467 * the conn_t in the fanout table. 468 * If everything is ok it then sends the T_BIND_ACK back up. 469 * 470 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 471 * without setting SO_REUSEADDR. This is needed so that they 472 * can be viewed as two independent transport protocols. 473 * However, anonymouns ports are allocated from the same range to avoid 474 * duplicating the us->us_next_port_to_try. 475 */ 476 static void 477 udp_tpi_bind(queue_t *q, mblk_t *mp) 478 { 479 sin_t *sin; 480 sin6_t *sin6; 481 mblk_t *mp1; 482 struct T_bind_req *tbr; 483 conn_t *connp; 484 udp_t *udp; 485 int error; 486 struct sockaddr *sa; 487 cred_t *cr; 488 489 /* 490 * All Solaris components should pass a db_credp 491 * for this TPI message, hence we ASSERT. 492 * But in case there is some other M_PROTO that looks 493 * like a TPI message sent by some other kernel 494 * component, we check and return an error. 495 */ 496 cr = msg_getcred(mp, NULL); 497 ASSERT(cr != NULL); 498 if (cr == NULL) { 499 udp_err_ack(q, mp, TSYSERR, EINVAL); 500 return; 501 } 502 503 connp = Q_TO_CONN(q); 504 udp = connp->conn_udp; 505 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 506 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 507 "udp_bind: bad req, len %u", 508 (uint_t)(mp->b_wptr - mp->b_rptr)); 509 udp_err_ack(q, mp, TPROTO, 0); 510 return; 511 } 512 if (udp->udp_state != TS_UNBND) { 513 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 514 "udp_bind: bad state, %u", udp->udp_state); 515 udp_err_ack(q, mp, TOUTSTATE, 0); 516 return; 517 } 518 /* 519 * Reallocate the message to make sure we have enough room for an 520 * address. 521 */ 522 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 523 if (mp1 == NULL) { 524 udp_err_ack(q, mp, TSYSERR, ENOMEM); 525 return; 526 } 527 528 mp = mp1; 529 530 /* Reset the message type in preparation for shipping it back. */ 531 DB_TYPE(mp) = M_PCPROTO; 532 533 tbr = (struct T_bind_req *)mp->b_rptr; 534 switch (tbr->ADDR_length) { 535 case 0: /* Request for a generic port */ 536 tbr->ADDR_offset = sizeof (struct T_bind_req); 537 if (connp->conn_family == AF_INET) { 538 tbr->ADDR_length = sizeof (sin_t); 539 sin = (sin_t *)&tbr[1]; 540 *sin = sin_null; 541 sin->sin_family = AF_INET; 542 mp->b_wptr = (uchar_t *)&sin[1]; 543 sa = (struct sockaddr *)sin; 544 } else { 545 ASSERT(connp->conn_family == AF_INET6); 546 tbr->ADDR_length = sizeof (sin6_t); 547 sin6 = (sin6_t *)&tbr[1]; 548 *sin6 = sin6_null; 549 sin6->sin6_family = AF_INET6; 550 mp->b_wptr = (uchar_t *)&sin6[1]; 551 sa = (struct sockaddr *)sin6; 552 } 553 break; 554 555 case sizeof (sin_t): /* Complete IPv4 address */ 556 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 557 sizeof (sin_t)); 558 if (sa == NULL || !OK_32PTR((char *)sa)) { 559 udp_err_ack(q, mp, TSYSERR, EINVAL); 560 return; 561 } 562 if (connp->conn_family != AF_INET || 563 sa->sa_family != AF_INET) { 564 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 565 return; 566 } 567 break; 568 569 case sizeof (sin6_t): /* complete IPv6 address */ 570 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 571 sizeof (sin6_t)); 572 if (sa == NULL || !OK_32PTR((char *)sa)) { 573 udp_err_ack(q, mp, TSYSERR, EINVAL); 574 return; 575 } 576 if (connp->conn_family != AF_INET6 || 577 sa->sa_family != AF_INET6) { 578 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 579 return; 580 } 581 break; 582 583 default: /* Invalid request */ 584 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 585 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 586 udp_err_ack(q, mp, TBADADDR, 0); 587 return; 588 } 589 590 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 591 tbr->PRIM_type != O_T_BIND_REQ); 592 593 if (error != 0) { 594 if (error > 0) { 595 udp_err_ack(q, mp, TSYSERR, error); 596 } else { 597 udp_err_ack(q, mp, -error, 0); 598 } 599 } else { 600 tbr->PRIM_type = T_BIND_ACK; 601 qreply(q, mp); 602 } 603 } 604 605 /* 606 * This routine handles each T_CONN_REQ message passed to udp. It 607 * associates a default destination address with the stream. 608 * 609 * After various error checks are completed, udp_connect() lays 610 * the target address and port into the composite header template. 611 * Then we ask IP for information, including a source address if we didn't 612 * already have one. Finally we send up the T_OK_ACK reply message. 613 */ 614 static void 615 udp_tpi_connect(queue_t *q, mblk_t *mp) 616 { 617 conn_t *connp = Q_TO_CONN(q); 618 int error; 619 socklen_t len; 620 struct sockaddr *sa; 621 struct T_conn_req *tcr; 622 cred_t *cr; 623 pid_t pid; 624 /* 625 * All Solaris components should pass a db_credp 626 * for this TPI message, hence we ASSERT. 627 * But in case there is some other M_PROTO that looks 628 * like a TPI message sent by some other kernel 629 * component, we check and return an error. 630 */ 631 cr = msg_getcred(mp, &pid); 632 ASSERT(cr != NULL); 633 if (cr == NULL) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 638 tcr = (struct T_conn_req *)mp->b_rptr; 639 640 /* A bit of sanity checking */ 641 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 642 udp_err_ack(q, mp, TPROTO, 0); 643 return; 644 } 645 646 if (tcr->OPT_length != 0) { 647 udp_err_ack(q, mp, TBADOPT, 0); 648 return; 649 } 650 651 /* 652 * Determine packet type based on type of address passed in 653 * the request should contain an IPv4 or IPv6 address. 654 * Make sure that address family matches the type of 655 * family of the address passed down. 656 */ 657 len = tcr->DEST_length; 658 switch (tcr->DEST_length) { 659 default: 660 udp_err_ack(q, mp, TBADADDR, 0); 661 return; 662 663 case sizeof (sin_t): 664 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 665 sizeof (sin_t)); 666 break; 667 668 case sizeof (sin6_t): 669 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 670 sizeof (sin6_t)); 671 break; 672 } 673 674 error = proto_verify_ip_addr(connp->conn_family, sa, len); 675 if (error != 0) { 676 udp_err_ack(q, mp, TSYSERR, error); 677 return; 678 } 679 680 error = udp_do_connect(connp, sa, len, cr, pid); 681 if (error != 0) { 682 if (error < 0) 683 udp_err_ack(q, mp, -error, 0); 684 else 685 udp_err_ack(q, mp, TSYSERR, error); 686 } else { 687 mblk_t *mp1; 688 /* 689 * We have to send a connection confirmation to 690 * keep TLI happy. 691 */ 692 if (connp->conn_family == AF_INET) { 693 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 694 sizeof (sin_t), NULL, 0); 695 } else { 696 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 697 sizeof (sin6_t), NULL, 0); 698 } 699 if (mp1 == NULL) { 700 udp_err_ack(q, mp, TSYSERR, ENOMEM); 701 return; 702 } 703 704 /* 705 * Send ok_ack for T_CONN_REQ 706 */ 707 mp = mi_tpi_ok_ack_alloc(mp); 708 if (mp == NULL) { 709 /* Unable to reuse the T_CONN_REQ for the ack. */ 710 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 711 return; 712 } 713 714 putnext(connp->conn_rq, mp); 715 putnext(connp->conn_rq, mp1); 716 } 717 } 718 719 static int 720 udp_tpi_close(queue_t *q, int flags) 721 { 722 conn_t *connp; 723 724 if (flags & SO_FALLBACK) { 725 /* 726 * stream is being closed while in fallback 727 * simply free the resources that were allocated 728 */ 729 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 730 qprocsoff(q); 731 goto done; 732 } 733 734 connp = Q_TO_CONN(q); 735 udp_do_close(connp); 736 done: 737 q->q_ptr = WR(q)->q_ptr = NULL; 738 return (0); 739 } 740 741 static void 742 udp_close_free(conn_t *connp) 743 { 744 udp_t *udp = connp->conn_udp; 745 746 /* If there are any options associated with the stream, free them. */ 747 if (udp->udp_recv_ipp.ipp_fields != 0) 748 ip_pkt_free(&udp->udp_recv_ipp); 749 750 /* 751 * Clear any fields which the kmem_cache constructor clears. 752 * Only udp_connp needs to be preserved. 753 * TBD: We should make this more efficient to avoid clearing 754 * everything. 755 */ 756 ASSERT(udp->udp_connp == connp); 757 bzero(udp, sizeof (udp_t)); 758 udp->udp_connp = connp; 759 } 760 761 static int 762 udp_do_disconnect(conn_t *connp) 763 { 764 udp_t *udp; 765 udp_fanout_t *udpf; 766 udp_stack_t *us; 767 int error; 768 769 udp = connp->conn_udp; 770 us = udp->udp_us; 771 mutex_enter(&connp->conn_lock); 772 if (udp->udp_state != TS_DATA_XFER) { 773 mutex_exit(&connp->conn_lock); 774 return (-TOUTSTATE); 775 } 776 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 777 us->us_bind_fanout_size)]; 778 mutex_enter(&udpf->uf_lock); 779 if (connp->conn_mcbc_bind) 780 connp->conn_saddr_v6 = ipv6_all_zeros; 781 else 782 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 783 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 784 connp->conn_faddr_v6 = ipv6_all_zeros; 785 connp->conn_fport = 0; 786 udp->udp_state = TS_IDLE; 787 mutex_exit(&udpf->uf_lock); 788 789 /* Remove any remnants of mapped address binding */ 790 if (connp->conn_family == AF_INET6) 791 connp->conn_ipversion = IPV6_VERSION; 792 793 connp->conn_v6lastdst = ipv6_all_zeros; 794 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 795 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 796 mutex_exit(&connp->conn_lock); 797 if (error != 0) 798 return (error); 799 800 /* 801 * Tell IP to remove the full binding and revert 802 * to the local address binding. 803 */ 804 return (ip_laddr_fanout_insert(connp)); 805 } 806 807 static void 808 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 809 { 810 conn_t *connp = Q_TO_CONN(q); 811 int error; 812 813 /* 814 * Allocate the largest primitive we need to send back 815 * T_error_ack is > than T_ok_ack 816 */ 817 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 818 if (mp == NULL) { 819 /* Unable to reuse the T_DISCON_REQ for the ack. */ 820 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 821 return; 822 } 823 824 error = udp_do_disconnect(connp); 825 826 if (error != 0) { 827 if (error < 0) { 828 udp_err_ack(q, mp, -error, 0); 829 } else { 830 udp_err_ack(q, mp, TSYSERR, error); 831 } 832 } else { 833 mp = mi_tpi_ok_ack_alloc(mp); 834 ASSERT(mp != NULL); 835 qreply(q, mp); 836 } 837 } 838 839 int 840 udp_disconnect(conn_t *connp) 841 { 842 int error; 843 844 connp->conn_dgram_errind = B_FALSE; 845 error = udp_do_disconnect(connp); 846 if (error < 0) 847 error = proto_tlitosyserr(-error); 848 849 return (error); 850 } 851 852 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 853 static void 854 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 855 { 856 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 857 qreply(q, mp); 858 } 859 860 /* Shorthand to generate and send TPI error acks to our client */ 861 static void 862 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 863 t_scalar_t t_error, int sys_error) 864 { 865 struct T_error_ack *teackp; 866 867 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 868 M_PCPROTO, T_ERROR_ACK)) != NULL) { 869 teackp = (struct T_error_ack *)mp->b_rptr; 870 teackp->ERROR_prim = primitive; 871 teackp->TLI_error = t_error; 872 teackp->UNIX_error = sys_error; 873 qreply(q, mp); 874 } 875 } 876 877 /* At minimum we need 4 bytes of UDP header */ 878 #define ICMP_MIN_UDP_HDR 4 879 880 /* 881 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 882 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 883 * Assumes that IP has pulled up everything up to and including the ICMP header. 884 */ 885 /* ARGSUSED2 */ 886 static void 887 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 888 { 889 conn_t *connp = (conn_t *)arg1; 890 icmph_t *icmph; 891 ipha_t *ipha; 892 int iph_hdr_length; 893 udpha_t *udpha; 894 sin_t sin; 895 sin6_t sin6; 896 mblk_t *mp1; 897 int error = 0; 898 udp_t *udp = connp->conn_udp; 899 900 ipha = (ipha_t *)mp->b_rptr; 901 902 ASSERT(OK_32PTR(mp->b_rptr)); 903 904 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 905 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 906 udp_icmp_error_ipv6(connp, mp, ira); 907 return; 908 } 909 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 910 911 /* Skip past the outer IP and ICMP headers */ 912 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 913 iph_hdr_length = ira->ira_ip_hdr_length; 914 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 915 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 916 917 /* Skip past the inner IP and find the ULP header */ 918 iph_hdr_length = IPH_HDR_LENGTH(ipha); 919 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 920 921 switch (icmph->icmph_type) { 922 case ICMP_DEST_UNREACHABLE: 923 switch (icmph->icmph_code) { 924 case ICMP_FRAGMENTATION_NEEDED: { 925 ipha_t *ipha; 926 ip_xmit_attr_t *ixa; 927 /* 928 * IP has already adjusted the path MTU. 929 * But we need to adjust DF for IPv4. 930 */ 931 if (connp->conn_ipversion != IPV4_VERSION) 932 break; 933 934 ixa = conn_get_ixa(connp, B_FALSE); 935 if (ixa == NULL || ixa->ixa_ire == NULL) { 936 /* 937 * Some other thread holds conn_ixa. We will 938 * redo this on the next ICMP too big. 939 */ 940 if (ixa != NULL) 941 ixa_refrele(ixa); 942 break; 943 } 944 (void) ip_get_pmtu(ixa); 945 946 mutex_enter(&connp->conn_lock); 947 ipha = (ipha_t *)connp->conn_ht_iphc; 948 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 949 ipha->ipha_fragment_offset_and_flags |= 950 IPH_DF_HTONS; 951 } else { 952 ipha->ipha_fragment_offset_and_flags &= 953 ~IPH_DF_HTONS; 954 } 955 mutex_exit(&connp->conn_lock); 956 ixa_refrele(ixa); 957 break; 958 } 959 case ICMP_PORT_UNREACHABLE: 960 case ICMP_PROTOCOL_UNREACHABLE: 961 error = ECONNREFUSED; 962 break; 963 default: 964 /* Transient errors */ 965 break; 966 } 967 break; 968 default: 969 /* Transient errors */ 970 break; 971 } 972 if (error == 0) { 973 freemsg(mp); 974 return; 975 } 976 977 /* 978 * Deliver T_UDERROR_IND when the application has asked for it. 979 * The socket layer enables this automatically when connected. 980 */ 981 if (!connp->conn_dgram_errind) { 982 freemsg(mp); 983 return; 984 } 985 986 switch (connp->conn_family) { 987 case AF_INET: 988 sin = sin_null; 989 sin.sin_family = AF_INET; 990 sin.sin_addr.s_addr = ipha->ipha_dst; 991 sin.sin_port = udpha->uha_dst_port; 992 if (IPCL_IS_NONSTR(connp)) { 993 mutex_enter(&connp->conn_lock); 994 if (udp->udp_state == TS_DATA_XFER) { 995 if (sin.sin_port == connp->conn_fport && 996 sin.sin_addr.s_addr == 997 connp->conn_faddr_v4) { 998 mutex_exit(&connp->conn_lock); 999 (*connp->conn_upcalls->su_set_error) 1000 (connp->conn_upper_handle, error); 1001 goto done; 1002 } 1003 } else { 1004 udp->udp_delayed_error = error; 1005 *((sin_t *)&udp->udp_delayed_addr) = sin; 1006 } 1007 mutex_exit(&connp->conn_lock); 1008 } else { 1009 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1010 NULL, 0, error); 1011 if (mp1 != NULL) 1012 putnext(connp->conn_rq, mp1); 1013 } 1014 break; 1015 case AF_INET6: 1016 sin6 = sin6_null; 1017 sin6.sin6_family = AF_INET6; 1018 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1019 sin6.sin6_port = udpha->uha_dst_port; 1020 if (IPCL_IS_NONSTR(connp)) { 1021 mutex_enter(&connp->conn_lock); 1022 if (udp->udp_state == TS_DATA_XFER) { 1023 if (sin6.sin6_port == connp->conn_fport && 1024 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1025 &connp->conn_faddr_v6)) { 1026 mutex_exit(&connp->conn_lock); 1027 (*connp->conn_upcalls->su_set_error) 1028 (connp->conn_upper_handle, error); 1029 goto done; 1030 } 1031 } else { 1032 udp->udp_delayed_error = error; 1033 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1034 } 1035 mutex_exit(&connp->conn_lock); 1036 } else { 1037 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1038 NULL, 0, error); 1039 if (mp1 != NULL) 1040 putnext(connp->conn_rq, mp1); 1041 } 1042 break; 1043 } 1044 done: 1045 freemsg(mp); 1046 } 1047 1048 /* 1049 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1050 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1051 * Assumes that IP has pulled up all the extension headers as well as the 1052 * ICMPv6 header. 1053 */ 1054 static void 1055 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1056 { 1057 icmp6_t *icmp6; 1058 ip6_t *ip6h, *outer_ip6h; 1059 uint16_t iph_hdr_length; 1060 uint8_t *nexthdrp; 1061 udpha_t *udpha; 1062 sin6_t sin6; 1063 mblk_t *mp1; 1064 int error = 0; 1065 udp_t *udp = connp->conn_udp; 1066 udp_stack_t *us = udp->udp_us; 1067 1068 outer_ip6h = (ip6_t *)mp->b_rptr; 1069 #ifdef DEBUG 1070 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1071 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1072 else 1073 iph_hdr_length = IPV6_HDR_LEN; 1074 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1075 #endif 1076 /* Skip past the outer IP and ICMP headers */ 1077 iph_hdr_length = ira->ira_ip_hdr_length; 1078 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1079 1080 /* Skip past the inner IP and find the ULP header */ 1081 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1082 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1083 freemsg(mp); 1084 return; 1085 } 1086 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1087 1088 switch (icmp6->icmp6_type) { 1089 case ICMP6_DST_UNREACH: 1090 switch (icmp6->icmp6_code) { 1091 case ICMP6_DST_UNREACH_NOPORT: 1092 error = ECONNREFUSED; 1093 break; 1094 case ICMP6_DST_UNREACH_ADMIN: 1095 case ICMP6_DST_UNREACH_NOROUTE: 1096 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1097 case ICMP6_DST_UNREACH_ADDR: 1098 /* Transient errors */ 1099 break; 1100 default: 1101 break; 1102 } 1103 break; 1104 case ICMP6_PACKET_TOO_BIG: { 1105 struct T_unitdata_ind *tudi; 1106 struct T_opthdr *toh; 1107 size_t udi_size; 1108 mblk_t *newmp; 1109 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1110 sizeof (struct ip6_mtuinfo); 1111 sin6_t *sin6; 1112 struct ip6_mtuinfo *mtuinfo; 1113 1114 /* 1115 * If the application has requested to receive path mtu 1116 * information, send up an empty message containing an 1117 * IPV6_PATHMTU ancillary data item. 1118 */ 1119 if (!connp->conn_ipv6_recvpathmtu) 1120 break; 1121 1122 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1123 opt_length; 1124 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1125 UDPS_BUMP_MIB(us, udpInErrors); 1126 break; 1127 } 1128 1129 /* 1130 * newmp->b_cont is left to NULL on purpose. This is an 1131 * empty message containing only ancillary data. 1132 */ 1133 newmp->b_datap->db_type = M_PROTO; 1134 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1135 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1136 tudi->PRIM_type = T_UNITDATA_IND; 1137 tudi->SRC_length = sizeof (sin6_t); 1138 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1139 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1140 tudi->OPT_length = opt_length; 1141 1142 sin6 = (sin6_t *)&tudi[1]; 1143 bzero(sin6, sizeof (sin6_t)); 1144 sin6->sin6_family = AF_INET6; 1145 sin6->sin6_addr = connp->conn_faddr_v6; 1146 1147 toh = (struct T_opthdr *)&sin6[1]; 1148 toh->level = IPPROTO_IPV6; 1149 toh->name = IPV6_PATHMTU; 1150 toh->len = opt_length; 1151 toh->status = 0; 1152 1153 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1154 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1155 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1156 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1157 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1158 /* 1159 * We've consumed everything we need from the original 1160 * message. Free it, then send our empty message. 1161 */ 1162 freemsg(mp); 1163 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1164 return; 1165 } 1166 case ICMP6_TIME_EXCEEDED: 1167 /* Transient errors */ 1168 break; 1169 case ICMP6_PARAM_PROB: 1170 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1171 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1172 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1173 (uchar_t *)nexthdrp) { 1174 error = ECONNREFUSED; 1175 break; 1176 } 1177 break; 1178 } 1179 if (error == 0) { 1180 freemsg(mp); 1181 return; 1182 } 1183 1184 /* 1185 * Deliver T_UDERROR_IND when the application has asked for it. 1186 * The socket layer enables this automatically when connected. 1187 */ 1188 if (!connp->conn_dgram_errind) { 1189 freemsg(mp); 1190 return; 1191 } 1192 1193 sin6 = sin6_null; 1194 sin6.sin6_family = AF_INET6; 1195 sin6.sin6_addr = ip6h->ip6_dst; 1196 sin6.sin6_port = udpha->uha_dst_port; 1197 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1198 1199 if (IPCL_IS_NONSTR(connp)) { 1200 mutex_enter(&connp->conn_lock); 1201 if (udp->udp_state == TS_DATA_XFER) { 1202 if (sin6.sin6_port == connp->conn_fport && 1203 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1204 &connp->conn_faddr_v6)) { 1205 mutex_exit(&connp->conn_lock); 1206 (*connp->conn_upcalls->su_set_error) 1207 (connp->conn_upper_handle, error); 1208 goto done; 1209 } 1210 } else { 1211 udp->udp_delayed_error = error; 1212 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1213 } 1214 mutex_exit(&connp->conn_lock); 1215 } else { 1216 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1217 NULL, 0, error); 1218 if (mp1 != NULL) 1219 putnext(connp->conn_rq, mp1); 1220 } 1221 done: 1222 freemsg(mp); 1223 } 1224 1225 /* 1226 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1227 * The local address is filled in if endpoint is bound. The remote address 1228 * is filled in if remote address has been precified ("connected endpoint") 1229 * (The concept of connected CLTS sockets is alien to published TPI 1230 * but we support it anyway). 1231 */ 1232 static void 1233 udp_addr_req(queue_t *q, mblk_t *mp) 1234 { 1235 struct sockaddr *sa; 1236 mblk_t *ackmp; 1237 struct T_addr_ack *taa; 1238 udp_t *udp = Q_TO_UDP(q); 1239 conn_t *connp = udp->udp_connp; 1240 uint_t addrlen; 1241 1242 /* Make it large enough for worst case */ 1243 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1244 2 * sizeof (sin6_t), 1); 1245 if (ackmp == NULL) { 1246 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1247 return; 1248 } 1249 taa = (struct T_addr_ack *)ackmp->b_rptr; 1250 1251 bzero(taa, sizeof (struct T_addr_ack)); 1252 ackmp->b_wptr = (uchar_t *)&taa[1]; 1253 1254 taa->PRIM_type = T_ADDR_ACK; 1255 ackmp->b_datap->db_type = M_PCPROTO; 1256 1257 if (connp->conn_family == AF_INET) 1258 addrlen = sizeof (sin_t); 1259 else 1260 addrlen = sizeof (sin6_t); 1261 1262 mutex_enter(&connp->conn_lock); 1263 /* 1264 * Note: Following code assumes 32 bit alignment of basic 1265 * data structures like sin_t and struct T_addr_ack. 1266 */ 1267 if (udp->udp_state != TS_UNBND) { 1268 /* 1269 * Fill in local address first 1270 */ 1271 taa->LOCADDR_offset = sizeof (*taa); 1272 taa->LOCADDR_length = addrlen; 1273 sa = (struct sockaddr *)&taa[1]; 1274 (void) conn_getsockname(connp, sa, &addrlen); 1275 ackmp->b_wptr += addrlen; 1276 } 1277 if (udp->udp_state == TS_DATA_XFER) { 1278 /* 1279 * connected, fill remote address too 1280 */ 1281 taa->REMADDR_length = addrlen; 1282 /* assumed 32-bit alignment */ 1283 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1284 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1285 (void) conn_getpeername(connp, sa, &addrlen); 1286 ackmp->b_wptr += addrlen; 1287 } 1288 mutex_exit(&connp->conn_lock); 1289 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1290 qreply(q, ackmp); 1291 } 1292 1293 static void 1294 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1295 { 1296 conn_t *connp = udp->udp_connp; 1297 1298 if (connp->conn_family == AF_INET) { 1299 *tap = udp_g_t_info_ack_ipv4; 1300 } else { 1301 *tap = udp_g_t_info_ack_ipv6; 1302 } 1303 tap->CURRENT_state = udp->udp_state; 1304 tap->OPT_size = udp_max_optsize; 1305 } 1306 1307 static void 1308 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1309 t_uscalar_t cap_bits1) 1310 { 1311 tcap->CAP_bits1 = 0; 1312 1313 if (cap_bits1 & TC1_INFO) { 1314 udp_copy_info(&tcap->INFO_ack, udp); 1315 tcap->CAP_bits1 |= TC1_INFO; 1316 } 1317 } 1318 1319 /* 1320 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1321 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1322 * udp_g_t_info_ack. The current state of the stream is copied from 1323 * udp_state. 1324 */ 1325 static void 1326 udp_capability_req(queue_t *q, mblk_t *mp) 1327 { 1328 t_uscalar_t cap_bits1; 1329 struct T_capability_ack *tcap; 1330 udp_t *udp = Q_TO_UDP(q); 1331 1332 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1333 1334 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1335 mp->b_datap->db_type, T_CAPABILITY_ACK); 1336 if (!mp) 1337 return; 1338 1339 tcap = (struct T_capability_ack *)mp->b_rptr; 1340 udp_do_capability_ack(udp, tcap, cap_bits1); 1341 1342 qreply(q, mp); 1343 } 1344 1345 /* 1346 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1347 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1348 * The current state of the stream is copied from udp_state. 1349 */ 1350 static void 1351 udp_info_req(queue_t *q, mblk_t *mp) 1352 { 1353 udp_t *udp = Q_TO_UDP(q); 1354 1355 /* Create a T_INFO_ACK message. */ 1356 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1357 T_INFO_ACK); 1358 if (!mp) 1359 return; 1360 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1361 qreply(q, mp); 1362 } 1363 1364 /* For /dev/udp aka AF_INET open */ 1365 static int 1366 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1367 { 1368 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1369 } 1370 1371 /* For /dev/udp6 aka AF_INET6 open */ 1372 static int 1373 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1374 { 1375 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1376 } 1377 1378 /* 1379 * This is the open routine for udp. It allocates a udp_t structure for 1380 * the stream and, on the first open of the module, creates an ND table. 1381 */ 1382 static int 1383 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1384 boolean_t isv6) 1385 { 1386 udp_t *udp; 1387 conn_t *connp; 1388 dev_t conn_dev; 1389 vmem_t *minor_arena; 1390 int err; 1391 1392 /* If the stream is already open, return immediately. */ 1393 if (q->q_ptr != NULL) 1394 return (0); 1395 1396 if (sflag == MODOPEN) 1397 return (EINVAL); 1398 1399 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1400 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1401 minor_arena = ip_minor_arena_la; 1402 } else { 1403 /* 1404 * Either minor numbers in the large arena were exhausted 1405 * or a non socket application is doing the open. 1406 * Try to allocate from the small arena. 1407 */ 1408 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1409 return (EBUSY); 1410 1411 minor_arena = ip_minor_arena_sa; 1412 } 1413 1414 if (flag & SO_FALLBACK) { 1415 /* 1416 * Non streams socket needs a stream to fallback to 1417 */ 1418 RD(q)->q_ptr = (void *)conn_dev; 1419 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1420 WR(q)->q_ptr = (void *)minor_arena; 1421 qprocson(q); 1422 return (0); 1423 } 1424 1425 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1426 if (connp == NULL) { 1427 inet_minor_free(minor_arena, conn_dev); 1428 return (err); 1429 } 1430 udp = connp->conn_udp; 1431 1432 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1433 connp->conn_dev = conn_dev; 1434 connp->conn_minor_arena = minor_arena; 1435 1436 /* 1437 * Initialize the udp_t structure for this stream. 1438 */ 1439 q->q_ptr = connp; 1440 WR(q)->q_ptr = connp; 1441 connp->conn_rq = q; 1442 connp->conn_wq = WR(q); 1443 1444 /* 1445 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1446 * need to lock anything. 1447 */ 1448 ASSERT(connp->conn_proto == IPPROTO_UDP); 1449 ASSERT(connp->conn_udp == udp); 1450 ASSERT(udp->udp_connp == connp); 1451 1452 if (flag & SO_SOCKSTR) { 1453 udp->udp_issocket = B_TRUE; 1454 } 1455 1456 WR(q)->q_hiwat = connp->conn_sndbuf; 1457 WR(q)->q_lowat = connp->conn_sndlowat; 1458 1459 qprocson(q); 1460 1461 /* Set the Stream head write offset and high watermark. */ 1462 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1463 (void) proto_set_rx_hiwat(q, connp, 1464 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1465 1466 mutex_enter(&connp->conn_lock); 1467 connp->conn_state_flags &= ~CONN_INCIPIENT; 1468 mutex_exit(&connp->conn_lock); 1469 return (0); 1470 } 1471 1472 /* 1473 * Which UDP options OK to set through T_UNITDATA_REQ... 1474 */ 1475 /* ARGSUSED */ 1476 static boolean_t 1477 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1478 { 1479 return (B_TRUE); 1480 } 1481 1482 /* 1483 * This routine gets default values of certain options whose default 1484 * values are maintained by protcol specific code 1485 */ 1486 int 1487 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1488 { 1489 udp_t *udp = Q_TO_UDP(q); 1490 udp_stack_t *us = udp->udp_us; 1491 int *i1 = (int *)ptr; 1492 1493 switch (level) { 1494 case IPPROTO_IP: 1495 switch (name) { 1496 case IP_MULTICAST_TTL: 1497 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1498 return (sizeof (uchar_t)); 1499 case IP_MULTICAST_LOOP: 1500 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1501 return (sizeof (uchar_t)); 1502 } 1503 break; 1504 case IPPROTO_IPV6: 1505 switch (name) { 1506 case IPV6_MULTICAST_HOPS: 1507 *i1 = IP_DEFAULT_MULTICAST_TTL; 1508 return (sizeof (int)); 1509 case IPV6_MULTICAST_LOOP: 1510 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1511 return (sizeof (int)); 1512 case IPV6_UNICAST_HOPS: 1513 *i1 = us->us_ipv6_hoplimit; 1514 return (sizeof (int)); 1515 } 1516 break; 1517 } 1518 return (-1); 1519 } 1520 1521 /* 1522 * This routine retrieves the current status of socket options. 1523 * It returns the size of the option retrieved, or -1. 1524 */ 1525 int 1526 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1527 uchar_t *ptr) 1528 { 1529 int *i1 = (int *)ptr; 1530 udp_t *udp = connp->conn_udp; 1531 int len; 1532 conn_opt_arg_t coas; 1533 int retval; 1534 1535 coas.coa_connp = connp; 1536 coas.coa_ixa = connp->conn_ixa; 1537 coas.coa_ipp = &connp->conn_xmit_ipp; 1538 coas.coa_ancillary = B_FALSE; 1539 coas.coa_changed = 0; 1540 1541 /* 1542 * We assume that the optcom framework has checked for the set 1543 * of levels and names that are supported, hence we don't worry 1544 * about rejecting based on that. 1545 * First check for UDP specific handling, then pass to common routine. 1546 */ 1547 switch (level) { 1548 case IPPROTO_IP: 1549 /* 1550 * Only allow IPv4 option processing on IPv4 sockets. 1551 */ 1552 if (connp->conn_family != AF_INET) 1553 return (-1); 1554 1555 switch (name) { 1556 case IP_OPTIONS: 1557 case T_IP_OPTIONS: 1558 mutex_enter(&connp->conn_lock); 1559 if (!(udp->udp_recv_ipp.ipp_fields & 1560 IPPF_IPV4_OPTIONS)) { 1561 mutex_exit(&connp->conn_lock); 1562 return (0); 1563 } 1564 1565 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1566 ASSERT(len != 0); 1567 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1568 mutex_exit(&connp->conn_lock); 1569 return (len); 1570 } 1571 break; 1572 case IPPROTO_UDP: 1573 switch (name) { 1574 case UDP_NAT_T_ENDPOINT: 1575 mutex_enter(&connp->conn_lock); 1576 *i1 = udp->udp_nat_t_endpoint; 1577 mutex_exit(&connp->conn_lock); 1578 return (sizeof (int)); 1579 case UDP_RCVHDR: 1580 mutex_enter(&connp->conn_lock); 1581 *i1 = udp->udp_rcvhdr ? 1 : 0; 1582 mutex_exit(&connp->conn_lock); 1583 return (sizeof (int)); 1584 } 1585 } 1586 mutex_enter(&connp->conn_lock); 1587 retval = conn_opt_get(&coas, level, name, ptr); 1588 mutex_exit(&connp->conn_lock); 1589 return (retval); 1590 } 1591 1592 /* 1593 * This routine retrieves the current status of socket options. 1594 * It returns the size of the option retrieved, or -1. 1595 */ 1596 int 1597 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1598 { 1599 conn_t *connp = Q_TO_CONN(q); 1600 int err; 1601 1602 err = udp_opt_get(connp, level, name, ptr); 1603 return (err); 1604 } 1605 1606 /* 1607 * This routine sets socket options. 1608 */ 1609 int 1610 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1611 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1612 { 1613 conn_t *connp = coa->coa_connp; 1614 ip_xmit_attr_t *ixa = coa->coa_ixa; 1615 udp_t *udp = connp->conn_udp; 1616 udp_stack_t *us = udp->udp_us; 1617 int *i1 = (int *)invalp; 1618 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1619 int error; 1620 1621 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1622 /* 1623 * First do UDP specific sanity checks and handle UDP specific 1624 * options. Note that some IPPROTO_UDP options are handled 1625 * by conn_opt_set. 1626 */ 1627 switch (level) { 1628 case SOL_SOCKET: 1629 switch (name) { 1630 case SO_SNDBUF: 1631 if (*i1 > us->us_max_buf) { 1632 return (ENOBUFS); 1633 } 1634 break; 1635 case SO_RCVBUF: 1636 if (*i1 > us->us_max_buf) { 1637 return (ENOBUFS); 1638 } 1639 break; 1640 1641 case SCM_UCRED: { 1642 struct ucred_s *ucr; 1643 cred_t *newcr; 1644 ts_label_t *tsl; 1645 1646 /* 1647 * Only sockets that have proper privileges and are 1648 * bound to MLPs will have any other value here, so 1649 * this implicitly tests for privilege to set label. 1650 */ 1651 if (connp->conn_mlp_type == mlptSingle) 1652 break; 1653 1654 ucr = (struct ucred_s *)invalp; 1655 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1656 ucr->uc_labeloff < sizeof (*ucr) || 1657 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1658 return (EINVAL); 1659 if (!checkonly) { 1660 /* 1661 * Set ixa_tsl to the new label. 1662 * We assume that crgetzoneid doesn't change 1663 * as part of the SCM_UCRED. 1664 */ 1665 ASSERT(cr != NULL); 1666 if ((tsl = crgetlabel(cr)) == NULL) 1667 return (EINVAL); 1668 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1669 tsl->tsl_doi, KM_NOSLEEP); 1670 if (newcr == NULL) 1671 return (ENOSR); 1672 ASSERT(newcr->cr_label != NULL); 1673 /* 1674 * Move the hold on the cr_label to ixa_tsl by 1675 * setting cr_label to NULL. Then release newcr. 1676 */ 1677 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1678 ixa->ixa_flags |= IXAF_UCRED_TSL; 1679 newcr->cr_label = NULL; 1680 crfree(newcr); 1681 coa->coa_changed |= COA_HEADER_CHANGED; 1682 coa->coa_changed |= COA_WROFF_CHANGED; 1683 } 1684 /* Fully handled this option. */ 1685 return (0); 1686 } 1687 } 1688 break; 1689 case IPPROTO_UDP: 1690 switch (name) { 1691 case UDP_NAT_T_ENDPOINT: 1692 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1693 return (error); 1694 } 1695 1696 /* 1697 * Use conn_family instead so we can avoid ambiguitites 1698 * with AF_INET6 sockets that may switch from IPv4 1699 * to IPv6. 1700 */ 1701 if (connp->conn_family != AF_INET) { 1702 return (EAFNOSUPPORT); 1703 } 1704 1705 if (!checkonly) { 1706 mutex_enter(&connp->conn_lock); 1707 udp->udp_nat_t_endpoint = onoff; 1708 mutex_exit(&connp->conn_lock); 1709 coa->coa_changed |= COA_HEADER_CHANGED; 1710 coa->coa_changed |= COA_WROFF_CHANGED; 1711 } 1712 /* Fully handled this option. */ 1713 return (0); 1714 case UDP_RCVHDR: 1715 mutex_enter(&connp->conn_lock); 1716 udp->udp_rcvhdr = onoff; 1717 mutex_exit(&connp->conn_lock); 1718 return (0); 1719 } 1720 break; 1721 } 1722 error = conn_opt_set(coa, level, name, inlen, invalp, 1723 checkonly, cr); 1724 return (error); 1725 } 1726 1727 /* 1728 * This routine sets socket options. 1729 */ 1730 int 1731 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1732 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1733 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1734 { 1735 udp_t *udp = connp->conn_udp; 1736 int err; 1737 conn_opt_arg_t coas, *coa; 1738 boolean_t checkonly; 1739 udp_stack_t *us = udp->udp_us; 1740 1741 switch (optset_context) { 1742 case SETFN_OPTCOM_CHECKONLY: 1743 checkonly = B_TRUE; 1744 /* 1745 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1746 * inlen != 0 implies value supplied and 1747 * we have to "pretend" to set it. 1748 * inlen == 0 implies that there is no 1749 * value part in T_CHECK request and just validation 1750 * done elsewhere should be enough, we just return here. 1751 */ 1752 if (inlen == 0) { 1753 *outlenp = 0; 1754 return (0); 1755 } 1756 break; 1757 case SETFN_OPTCOM_NEGOTIATE: 1758 checkonly = B_FALSE; 1759 break; 1760 case SETFN_UD_NEGOTIATE: 1761 case SETFN_CONN_NEGOTIATE: 1762 checkonly = B_FALSE; 1763 /* 1764 * Negotiating local and "association-related" options 1765 * through T_UNITDATA_REQ. 1766 * 1767 * Following routine can filter out ones we do not 1768 * want to be "set" this way. 1769 */ 1770 if (!udp_opt_allow_udr_set(level, name)) { 1771 *outlenp = 0; 1772 return (EINVAL); 1773 } 1774 break; 1775 default: 1776 /* 1777 * We should never get here 1778 */ 1779 *outlenp = 0; 1780 return (EINVAL); 1781 } 1782 1783 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1784 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1785 1786 if (thisdg_attrs != NULL) { 1787 /* Options from T_UNITDATA_REQ */ 1788 coa = (conn_opt_arg_t *)thisdg_attrs; 1789 ASSERT(coa->coa_connp == connp); 1790 ASSERT(coa->coa_ixa != NULL); 1791 ASSERT(coa->coa_ipp != NULL); 1792 ASSERT(coa->coa_ancillary); 1793 } else { 1794 coa = &coas; 1795 coas.coa_connp = connp; 1796 /* Get a reference on conn_ixa to prevent concurrent mods */ 1797 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1798 if (coas.coa_ixa == NULL) { 1799 *outlenp = 0; 1800 return (ENOMEM); 1801 } 1802 coas.coa_ipp = &connp->conn_xmit_ipp; 1803 coas.coa_ancillary = B_FALSE; 1804 coas.coa_changed = 0; 1805 } 1806 1807 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1808 cr, checkonly); 1809 if (err != 0) { 1810 errout: 1811 if (!coa->coa_ancillary) 1812 ixa_refrele(coa->coa_ixa); 1813 *outlenp = 0; 1814 return (err); 1815 } 1816 /* Handle DHCPINIT here outside of lock */ 1817 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1818 uint_t ifindex; 1819 ill_t *ill; 1820 1821 ifindex = *(uint_t *)invalp; 1822 if (ifindex == 0) { 1823 ill = NULL; 1824 } else { 1825 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1826 coa->coa_ixa->ixa_ipst); 1827 if (ill == NULL) { 1828 err = ENXIO; 1829 goto errout; 1830 } 1831 1832 mutex_enter(&ill->ill_lock); 1833 if (ill->ill_state_flags & ILL_CONDEMNED) { 1834 mutex_exit(&ill->ill_lock); 1835 ill_refrele(ill); 1836 err = ENXIO; 1837 goto errout; 1838 } 1839 if (IS_VNI(ill)) { 1840 mutex_exit(&ill->ill_lock); 1841 ill_refrele(ill); 1842 err = EINVAL; 1843 goto errout; 1844 } 1845 } 1846 mutex_enter(&connp->conn_lock); 1847 1848 if (connp->conn_dhcpinit_ill != NULL) { 1849 /* 1850 * We've locked the conn so conn_cleanup_ill() 1851 * cannot clear conn_dhcpinit_ill -- so it's 1852 * safe to access the ill. 1853 */ 1854 ill_t *oill = connp->conn_dhcpinit_ill; 1855 1856 ASSERT(oill->ill_dhcpinit != 0); 1857 atomic_dec_32(&oill->ill_dhcpinit); 1858 ill_set_inputfn(connp->conn_dhcpinit_ill); 1859 connp->conn_dhcpinit_ill = NULL; 1860 } 1861 1862 if (ill != NULL) { 1863 connp->conn_dhcpinit_ill = ill; 1864 atomic_inc_32(&ill->ill_dhcpinit); 1865 ill_set_inputfn(ill); 1866 mutex_exit(&connp->conn_lock); 1867 mutex_exit(&ill->ill_lock); 1868 ill_refrele(ill); 1869 } else { 1870 mutex_exit(&connp->conn_lock); 1871 } 1872 } 1873 1874 /* 1875 * Common case of OK return with outval same as inval. 1876 */ 1877 if (invalp != outvalp) { 1878 /* don't trust bcopy for identical src/dst */ 1879 (void) bcopy(invalp, outvalp, inlen); 1880 } 1881 *outlenp = inlen; 1882 1883 /* 1884 * If this was not ancillary data, then we rebuild the headers, 1885 * update the IRE/NCE, and IPsec as needed. 1886 * Since the label depends on the destination we go through 1887 * ip_set_destination first. 1888 */ 1889 if (coa->coa_ancillary) { 1890 return (0); 1891 } 1892 1893 if (coa->coa_changed & COA_ROUTE_CHANGED) { 1894 in6_addr_t saddr, faddr, nexthop; 1895 in_port_t fport; 1896 1897 /* 1898 * We clear lastdst to make sure we pick up the change 1899 * next time sending. 1900 * If we are connected we re-cache the information. 1901 * We ignore errors to preserve BSD behavior. 1902 * Note that we don't redo IPsec policy lookup here 1903 * since the final destination (or source) didn't change. 1904 */ 1905 mutex_enter(&connp->conn_lock); 1906 connp->conn_v6lastdst = ipv6_all_zeros; 1907 1908 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 1909 &connp->conn_faddr_v6, &nexthop); 1910 saddr = connp->conn_saddr_v6; 1911 faddr = connp->conn_faddr_v6; 1912 fport = connp->conn_fport; 1913 mutex_exit(&connp->conn_lock); 1914 1915 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 1916 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 1917 (void) ip_attr_connect(connp, coa->coa_ixa, 1918 &saddr, &faddr, &nexthop, fport, NULL, NULL, 1919 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 1920 } 1921 } 1922 1923 ixa_refrele(coa->coa_ixa); 1924 1925 if (coa->coa_changed & COA_HEADER_CHANGED) { 1926 /* 1927 * Rebuild the header template if we are connected. 1928 * Otherwise clear conn_v6lastdst so we rebuild the header 1929 * in the data path. 1930 */ 1931 mutex_enter(&connp->conn_lock); 1932 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1933 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1934 err = udp_build_hdr_template(connp, 1935 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 1936 connp->conn_fport, connp->conn_flowinfo); 1937 if (err != 0) { 1938 mutex_exit(&connp->conn_lock); 1939 return (err); 1940 } 1941 } else { 1942 connp->conn_v6lastdst = ipv6_all_zeros; 1943 } 1944 mutex_exit(&connp->conn_lock); 1945 } 1946 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 1947 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 1948 connp->conn_rcvbuf); 1949 } 1950 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 1951 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 1952 } 1953 if (coa->coa_changed & COA_WROFF_CHANGED) { 1954 /* Increase wroff if needed */ 1955 uint_t wroff; 1956 1957 mutex_enter(&connp->conn_lock); 1958 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 1959 if (udp->udp_nat_t_endpoint) 1960 wroff += sizeof (uint32_t); 1961 if (wroff > connp->conn_wroff) { 1962 connp->conn_wroff = wroff; 1963 mutex_exit(&connp->conn_lock); 1964 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 1965 } else { 1966 mutex_exit(&connp->conn_lock); 1967 } 1968 } 1969 return (err); 1970 } 1971 1972 /* This routine sets socket options. */ 1973 int 1974 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1975 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1976 void *thisdg_attrs, cred_t *cr) 1977 { 1978 conn_t *connp = Q_TO_CONN(q); 1979 int error; 1980 1981 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 1982 outlenp, outvalp, thisdg_attrs, cr); 1983 return (error); 1984 } 1985 1986 /* 1987 * Setup IP and UDP headers. 1988 * Returns NULL on allocation failure, in which case data_mp is freed. 1989 */ 1990 mblk_t * 1991 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 1992 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 1993 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 1994 { 1995 mblk_t *mp; 1996 udpha_t *udpha; 1997 udp_stack_t *us = connp->conn_netstack->netstack_udp; 1998 uint_t data_len; 1999 uint32_t cksum; 2000 udp_t *udp = connp->conn_udp; 2001 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2002 uint_t ulp_hdr_len; 2003 2004 data_len = msgdsize(data_mp); 2005 ulp_hdr_len = UDPH_SIZE; 2006 if (insert_spi) 2007 ulp_hdr_len += sizeof (uint32_t); 2008 2009 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2010 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2011 if (mp == NULL) { 2012 ASSERT(*errorp != 0); 2013 return (NULL); 2014 } 2015 2016 data_len += ulp_hdr_len; 2017 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2018 2019 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2020 udpha->uha_src_port = connp->conn_lport; 2021 udpha->uha_dst_port = dstport; 2022 udpha->uha_checksum = 0; 2023 udpha->uha_length = htons(data_len); 2024 2025 /* 2026 * If there was a routing option/header then conn_prepend_hdr 2027 * has massaged it and placed the pseudo-header checksum difference 2028 * in the cksum argument. 2029 * 2030 * Setup header length and prepare for ULP checksum done in IP. 2031 * 2032 * We make it easy for IP to include our pseudo header 2033 * by putting our length in uha_checksum. 2034 * The IP source, destination, and length have already been set by 2035 * conn_prepend_hdr. 2036 */ 2037 cksum += data_len; 2038 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2039 ASSERT(cksum < 0x10000); 2040 2041 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2042 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2043 2044 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2045 2046 /* IP does the checksum if uha_checksum is non-zero */ 2047 if (us->us_do_checksum) { 2048 if (cksum == 0) 2049 udpha->uha_checksum = 0xffff; 2050 else 2051 udpha->uha_checksum = htons(cksum); 2052 } else { 2053 udpha->uha_checksum = 0; 2054 } 2055 } else { 2056 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2057 2058 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2059 if (cksum == 0) 2060 udpha->uha_checksum = 0xffff; 2061 else 2062 udpha->uha_checksum = htons(cksum); 2063 } 2064 2065 /* Insert all-0s SPI now. */ 2066 if (insert_spi) 2067 *((uint32_t *)(udpha + 1)) = 0; 2068 2069 return (mp); 2070 } 2071 2072 static int 2073 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2074 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2075 { 2076 udpha_t *udpha; 2077 int error; 2078 2079 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2080 /* 2081 * We clear lastdst to make sure we don't use the lastdst path 2082 * next time sending since we might not have set v6dst yet. 2083 */ 2084 connp->conn_v6lastdst = ipv6_all_zeros; 2085 2086 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2087 flowinfo); 2088 if (error != 0) 2089 return (error); 2090 2091 /* 2092 * Any routing header/option has been massaged. The checksum difference 2093 * is stored in conn_sum. 2094 */ 2095 udpha = (udpha_t *)connp->conn_ht_ulp; 2096 udpha->uha_src_port = connp->conn_lport; 2097 udpha->uha_dst_port = dstport; 2098 udpha->uha_checksum = 0; 2099 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2100 return (0); 2101 } 2102 2103 static mblk_t * 2104 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2105 { 2106 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2107 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2108 /* 2109 * fallback has started but messages have not been moved yet 2110 */ 2111 if (udp->udp_fallback_queue_head == NULL) { 2112 ASSERT(udp->udp_fallback_queue_tail == NULL); 2113 udp->udp_fallback_queue_head = mp; 2114 udp->udp_fallback_queue_tail = mp; 2115 } else { 2116 ASSERT(udp->udp_fallback_queue_tail != NULL); 2117 udp->udp_fallback_queue_tail->b_next = mp; 2118 udp->udp_fallback_queue_tail = mp; 2119 } 2120 return (NULL); 2121 } else { 2122 /* 2123 * Fallback completed, let the caller putnext() the mblk. 2124 */ 2125 return (mp); 2126 } 2127 } 2128 2129 /* 2130 * Deliver data to ULP. In case we have a socket, and it's falling back to 2131 * TPI, then we'll queue the mp for later processing. 2132 */ 2133 static void 2134 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2135 { 2136 if (IPCL_IS_NONSTR(connp)) { 2137 udp_t *udp = connp->conn_udp; 2138 int error; 2139 2140 ASSERT(len == msgdsize(mp)); 2141 if ((*connp->conn_upcalls->su_recv) 2142 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2143 mutex_enter(&udp->udp_recv_lock); 2144 if (error == ENOSPC) { 2145 /* 2146 * let's confirm while holding the lock 2147 */ 2148 if ((*connp->conn_upcalls->su_recv) 2149 (connp->conn_upper_handle, NULL, 0, 0, 2150 &error, NULL) < 0) { 2151 ASSERT(error == ENOSPC); 2152 if (error == ENOSPC) { 2153 connp->conn_flow_cntrld = 2154 B_TRUE; 2155 } 2156 } 2157 mutex_exit(&udp->udp_recv_lock); 2158 } else { 2159 ASSERT(error == EOPNOTSUPP); 2160 mp = udp_queue_fallback(udp, mp); 2161 mutex_exit(&udp->udp_recv_lock); 2162 if (mp != NULL) 2163 putnext(connp->conn_rq, mp); 2164 } 2165 } 2166 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2167 } else { 2168 if (is_system_labeled()) { 2169 ASSERT(ira->ira_cred != NULL); 2170 /* 2171 * Provide for protocols above UDP such as RPC 2172 * NOPID leaves db_cpid unchanged. 2173 */ 2174 mblk_setcred(mp, ira->ira_cred, NOPID); 2175 } 2176 2177 putnext(connp->conn_rq, mp); 2178 } 2179 } 2180 2181 /* 2182 * This is the inbound data path. 2183 * IP has already pulled up the IP plus UDP headers and verified alignment 2184 * etc. 2185 */ 2186 /* ARGSUSED2 */ 2187 static void 2188 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2189 { 2190 conn_t *connp = (conn_t *)arg1; 2191 struct T_unitdata_ind *tudi; 2192 uchar_t *rptr; /* Pointer to IP header */ 2193 int hdr_length; /* Length of IP+UDP headers */ 2194 int udi_size; /* Size of T_unitdata_ind */ 2195 int pkt_len; 2196 udp_t *udp; 2197 udpha_t *udpha; 2198 ip_pkt_t ipps; 2199 ip6_t *ip6h; 2200 mblk_t *mp1; 2201 uint32_t udp_ipv4_options_len; 2202 crb_t recv_ancillary; 2203 udp_stack_t *us; 2204 2205 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2206 2207 udp = connp->conn_udp; 2208 us = udp->udp_us; 2209 rptr = mp->b_rptr; 2210 2211 ASSERT(DB_TYPE(mp) == M_DATA); 2212 ASSERT(OK_32PTR(rptr)); 2213 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2214 pkt_len = ira->ira_pktlen; 2215 2216 /* 2217 * Get a snapshot of these and allow other threads to change 2218 * them after that. We need the same recv_ancillary when determining 2219 * the size as when adding the ancillary data items. 2220 */ 2221 mutex_enter(&connp->conn_lock); 2222 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2223 recv_ancillary = connp->conn_recv_ancillary; 2224 mutex_exit(&connp->conn_lock); 2225 2226 hdr_length = ira->ira_ip_hdr_length; 2227 2228 /* 2229 * IP inspected the UDP header thus all of it must be in the mblk. 2230 * UDP length check is performed for IPv6 packets and IPv4 packets 2231 * to check if the size of the packet as specified 2232 * by the UDP header is the same as the length derived from the IP 2233 * header. 2234 */ 2235 udpha = (udpha_t *)(rptr + hdr_length); 2236 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2237 goto tossit; 2238 2239 hdr_length += UDPH_SIZE; 2240 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2241 2242 /* Initialize regardless of IP version */ 2243 ipps.ipp_fields = 0; 2244 2245 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2246 udp_ipv4_options_len > 0) && 2247 connp->conn_family == AF_INET) { 2248 int err; 2249 2250 /* 2251 * Record/update udp_recv_ipp with the lock 2252 * held. Not needed for AF_INET6 sockets 2253 * since they don't support a getsockopt of IP_OPTIONS. 2254 */ 2255 mutex_enter(&connp->conn_lock); 2256 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2257 B_TRUE); 2258 if (err != 0) { 2259 /* Allocation failed. Drop packet */ 2260 mutex_exit(&connp->conn_lock); 2261 freemsg(mp); 2262 UDPS_BUMP_MIB(us, udpInErrors); 2263 return; 2264 } 2265 mutex_exit(&connp->conn_lock); 2266 } 2267 2268 if (recv_ancillary.crb_all != 0) { 2269 /* 2270 * Record packet information in the ip_pkt_t 2271 */ 2272 if (ira->ira_flags & IRAF_IS_IPV4) { 2273 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2274 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2275 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2276 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2277 2278 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2279 } else { 2280 uint8_t nexthdrp; 2281 2282 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2283 /* 2284 * IPv6 packets can only be received by applications 2285 * that are prepared to receive IPv6 addresses. 2286 * The IP fanout must ensure this. 2287 */ 2288 ASSERT(connp->conn_family == AF_INET6); 2289 2290 ip6h = (ip6_t *)rptr; 2291 2292 /* We don't care about the length, but need the ipp */ 2293 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2294 &nexthdrp); 2295 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2296 /* Restore */ 2297 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2298 ASSERT(nexthdrp == IPPROTO_UDP); 2299 } 2300 } 2301 2302 /* 2303 * This is the inbound data path. Packets are passed upstream as 2304 * T_UNITDATA_IND messages. 2305 */ 2306 if (connp->conn_family == AF_INET) { 2307 sin_t *sin; 2308 2309 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2310 2311 /* 2312 * Normally only send up the source address. 2313 * If any ancillary data items are wanted we add those. 2314 */ 2315 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2316 if (recv_ancillary.crb_all != 0) { 2317 udi_size += conn_recvancillary_size(connp, 2318 recv_ancillary, ira, mp, &ipps); 2319 } 2320 2321 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2322 mp1 = allocb(udi_size, BPRI_MED); 2323 if (mp1 == NULL) { 2324 freemsg(mp); 2325 UDPS_BUMP_MIB(us, udpInErrors); 2326 return; 2327 } 2328 mp1->b_cont = mp; 2329 mp1->b_datap->db_type = M_PROTO; 2330 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2331 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2332 tudi->PRIM_type = T_UNITDATA_IND; 2333 tudi->SRC_length = sizeof (sin_t); 2334 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2335 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2336 sizeof (sin_t); 2337 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2338 tudi->OPT_length = udi_size; 2339 sin = (sin_t *)&tudi[1]; 2340 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2341 sin->sin_port = udpha->uha_src_port; 2342 sin->sin_family = connp->conn_family; 2343 *(uint32_t *)&sin->sin_zero[0] = 0; 2344 *(uint32_t *)&sin->sin_zero[4] = 0; 2345 2346 /* 2347 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2348 * IP_RECVTTL has been set. 2349 */ 2350 if (udi_size != 0) { 2351 conn_recvancillary_add(connp, recv_ancillary, ira, 2352 &ipps, (uchar_t *)&sin[1], udi_size); 2353 } 2354 } else { 2355 sin6_t *sin6; 2356 2357 /* 2358 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2359 * 2360 * Normally we only send up the address. If receiving of any 2361 * optional receive side information is enabled, we also send 2362 * that up as options. 2363 */ 2364 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2365 2366 if (recv_ancillary.crb_all != 0) { 2367 udi_size += conn_recvancillary_size(connp, 2368 recv_ancillary, ira, mp, &ipps); 2369 } 2370 2371 mp1 = allocb(udi_size, BPRI_MED); 2372 if (mp1 == NULL) { 2373 freemsg(mp); 2374 UDPS_BUMP_MIB(us, udpInErrors); 2375 return; 2376 } 2377 mp1->b_cont = mp; 2378 mp1->b_datap->db_type = M_PROTO; 2379 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2380 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2381 tudi->PRIM_type = T_UNITDATA_IND; 2382 tudi->SRC_length = sizeof (sin6_t); 2383 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2384 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2385 sizeof (sin6_t); 2386 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2387 tudi->OPT_length = udi_size; 2388 sin6 = (sin6_t *)&tudi[1]; 2389 if (ira->ira_flags & IRAF_IS_IPV4) { 2390 in6_addr_t v6dst; 2391 2392 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2393 &sin6->sin6_addr); 2394 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2395 &v6dst); 2396 sin6->sin6_flowinfo = 0; 2397 sin6->sin6_scope_id = 0; 2398 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2399 IPCL_ZONEID(connp), us->us_netstack); 2400 } else { 2401 ip6h = (ip6_t *)rptr; 2402 2403 sin6->sin6_addr = ip6h->ip6_src; 2404 /* No sin6_flowinfo per API */ 2405 sin6->sin6_flowinfo = 0; 2406 /* For link-scope pass up scope id */ 2407 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2408 sin6->sin6_scope_id = ira->ira_ruifindex; 2409 else 2410 sin6->sin6_scope_id = 0; 2411 sin6->__sin6_src_id = ip_srcid_find_addr( 2412 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2413 us->us_netstack); 2414 } 2415 sin6->sin6_port = udpha->uha_src_port; 2416 sin6->sin6_family = connp->conn_family; 2417 2418 if (udi_size != 0) { 2419 conn_recvancillary_add(connp, recv_ancillary, ira, 2420 &ipps, (uchar_t *)&sin6[1], udi_size); 2421 } 2422 } 2423 2424 /* 2425 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2426 * loopback traffic). 2427 */ 2428 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2429 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2430 2431 /* Walk past the headers unless IP_RECVHDR was set. */ 2432 if (!udp->udp_rcvhdr) { 2433 mp->b_rptr = rptr + hdr_length; 2434 pkt_len -= hdr_length; 2435 } 2436 2437 UDPS_BUMP_MIB(us, udpHCInDatagrams); 2438 udp_ulp_recv(connp, mp1, pkt_len, ira); 2439 return; 2440 2441 tossit: 2442 freemsg(mp); 2443 UDPS_BUMP_MIB(us, udpInErrors); 2444 } 2445 2446 /* 2447 * This routine creates a T_UDERROR_IND message and passes it upstream. 2448 * The address and options are copied from the T_UNITDATA_REQ message 2449 * passed in mp. This message is freed. 2450 */ 2451 static void 2452 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2453 { 2454 struct T_unitdata_req *tudr; 2455 mblk_t *mp1; 2456 uchar_t *destaddr; 2457 t_scalar_t destlen; 2458 uchar_t *optaddr; 2459 t_scalar_t optlen; 2460 2461 if ((mp->b_wptr < mp->b_rptr) || 2462 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2463 goto done; 2464 } 2465 tudr = (struct T_unitdata_req *)mp->b_rptr; 2466 destaddr = mp->b_rptr + tudr->DEST_offset; 2467 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2468 destaddr + tudr->DEST_length < mp->b_rptr || 2469 destaddr + tudr->DEST_length > mp->b_wptr) { 2470 goto done; 2471 } 2472 optaddr = mp->b_rptr + tudr->OPT_offset; 2473 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2474 optaddr + tudr->OPT_length < mp->b_rptr || 2475 optaddr + tudr->OPT_length > mp->b_wptr) { 2476 goto done; 2477 } 2478 destlen = tudr->DEST_length; 2479 optlen = tudr->OPT_length; 2480 2481 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2482 (char *)optaddr, optlen, err); 2483 if (mp1 != NULL) 2484 qreply(q, mp1); 2485 2486 done: 2487 freemsg(mp); 2488 } 2489 2490 /* 2491 * This routine removes a port number association from a stream. It 2492 * is called by udp_wput to handle T_UNBIND_REQ messages. 2493 */ 2494 static void 2495 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2496 { 2497 conn_t *connp = Q_TO_CONN(q); 2498 int error; 2499 2500 error = udp_do_unbind(connp); 2501 if (error) { 2502 if (error < 0) 2503 udp_err_ack(q, mp, -error, 0); 2504 else 2505 udp_err_ack(q, mp, TSYSERR, error); 2506 return; 2507 } 2508 2509 mp = mi_tpi_ok_ack_alloc(mp); 2510 ASSERT(mp != NULL); 2511 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2512 qreply(q, mp); 2513 } 2514 2515 /* 2516 * Don't let port fall into the privileged range. 2517 * Since the extra privileged ports can be arbitrary we also 2518 * ensure that we exclude those from consideration. 2519 * us->us_epriv_ports is not sorted thus we loop over it until 2520 * there are no changes. 2521 */ 2522 static in_port_t 2523 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2524 { 2525 int i; 2526 in_port_t nextport; 2527 boolean_t restart = B_FALSE; 2528 udp_stack_t *us = udp->udp_us; 2529 2530 if (random && udp_random_anon_port != 0) { 2531 (void) random_get_pseudo_bytes((uint8_t *)&port, 2532 sizeof (in_port_t)); 2533 /* 2534 * Unless changed by a sys admin, the smallest anon port 2535 * is 32768 and the largest anon port is 65535. It is 2536 * very likely (50%) for the random port to be smaller 2537 * than the smallest anon port. When that happens, 2538 * add port % (anon port range) to the smallest anon 2539 * port to get the random port. It should fall into the 2540 * valid anon port range. 2541 */ 2542 if ((port < us->us_smallest_anon_port) || 2543 (port > us->us_largest_anon_port)) { 2544 port = us->us_smallest_anon_port + 2545 port % (us->us_largest_anon_port - 2546 us->us_smallest_anon_port); 2547 } 2548 } 2549 2550 retry: 2551 if (port < us->us_smallest_anon_port) 2552 port = us->us_smallest_anon_port; 2553 2554 if (port > us->us_largest_anon_port) { 2555 port = us->us_smallest_anon_port; 2556 if (restart) 2557 return (0); 2558 restart = B_TRUE; 2559 } 2560 2561 if (port < us->us_smallest_nonpriv_port) 2562 port = us->us_smallest_nonpriv_port; 2563 2564 for (i = 0; i < us->us_num_epriv_ports; i++) { 2565 if (port == us->us_epriv_ports[i]) { 2566 port++; 2567 /* 2568 * Make sure that the port is in the 2569 * valid range. 2570 */ 2571 goto retry; 2572 } 2573 } 2574 2575 if (is_system_labeled() && 2576 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2577 port, IPPROTO_UDP, B_TRUE)) != 0) { 2578 port = nextport; 2579 goto retry; 2580 } 2581 2582 return (port); 2583 } 2584 2585 /* 2586 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2587 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2588 * the TPI options, otherwise we take them from msg_control. 2589 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2590 * Always consumes mp; never consumes tudr_mp. 2591 */ 2592 static int 2593 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2594 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2595 { 2596 udp_t *udp = connp->conn_udp; 2597 udp_stack_t *us = udp->udp_us; 2598 int error; 2599 ip_xmit_attr_t *ixa; 2600 ip_pkt_t *ipp; 2601 in6_addr_t v6src; 2602 in6_addr_t v6dst; 2603 in6_addr_t v6nexthop; 2604 in_port_t dstport; 2605 uint32_t flowinfo; 2606 uint_t srcid; 2607 int is_absreq_failure = 0; 2608 conn_opt_arg_t coas, *coa; 2609 2610 ASSERT(tudr_mp != NULL || msg != NULL); 2611 2612 /* 2613 * Get ixa before checking state to handle a disconnect race. 2614 * 2615 * We need an exclusive copy of conn_ixa since the ancillary data 2616 * options might modify it. That copy has no pointers hence we 2617 * need to set them up once we've parsed the ancillary data. 2618 */ 2619 ixa = conn_get_ixa_exclusive(connp); 2620 if (ixa == NULL) { 2621 UDPS_BUMP_MIB(us, udpOutErrors); 2622 freemsg(mp); 2623 return (ENOMEM); 2624 } 2625 ASSERT(cr != NULL); 2626 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2627 ixa->ixa_cred = cr; 2628 ixa->ixa_cpid = pid; 2629 if (is_system_labeled()) { 2630 /* We need to restart with a label based on the cred */ 2631 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2632 } 2633 2634 /* In case previous destination was multicast or multirt */ 2635 ip_attr_newdst(ixa); 2636 2637 /* Get a copy of conn_xmit_ipp since the options might change it */ 2638 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2639 if (ipp == NULL) { 2640 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2641 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2642 ixa->ixa_cpid = connp->conn_cpid; 2643 ixa_refrele(ixa); 2644 UDPS_BUMP_MIB(us, udpOutErrors); 2645 freemsg(mp); 2646 return (ENOMEM); 2647 } 2648 mutex_enter(&connp->conn_lock); 2649 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2650 mutex_exit(&connp->conn_lock); 2651 if (error != 0) { 2652 UDPS_BUMP_MIB(us, udpOutErrors); 2653 freemsg(mp); 2654 goto done; 2655 } 2656 2657 /* 2658 * Parse the options and update ixa and ipp as a result. 2659 * Note that ixa_tsl can be updated if SCM_UCRED. 2660 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2661 */ 2662 2663 coa = &coas; 2664 coa->coa_connp = connp; 2665 coa->coa_ixa = ixa; 2666 coa->coa_ipp = ipp; 2667 coa->coa_ancillary = B_TRUE; 2668 coa->coa_changed = 0; 2669 2670 if (msg != NULL) { 2671 error = process_auxiliary_options(connp, msg->msg_control, 2672 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2673 } else { 2674 struct T_unitdata_req *tudr; 2675 2676 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2677 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2678 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2679 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2680 coa, &is_absreq_failure); 2681 } 2682 if (error != 0) { 2683 /* 2684 * Note: No special action needed in this 2685 * module for "is_absreq_failure" 2686 */ 2687 freemsg(mp); 2688 UDPS_BUMP_MIB(us, udpOutErrors); 2689 goto done; 2690 } 2691 ASSERT(is_absreq_failure == 0); 2692 2693 mutex_enter(&connp->conn_lock); 2694 /* 2695 * If laddr is unspecified then we look at sin6_src_id. 2696 * We will give precedence to a source address set with IPV6_PKTINFO 2697 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 2698 * want ip_attr_connect to select a source (since it can fail) when 2699 * IPV6_PKTINFO is specified. 2700 * If this doesn't result in a source address then we get a source 2701 * from ip_attr_connect() below. 2702 */ 2703 v6src = connp->conn_saddr_v6; 2704 if (sin != NULL) { 2705 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 2706 dstport = sin->sin_port; 2707 flowinfo = 0; 2708 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2709 ixa->ixa_flags |= IXAF_IS_IPV4; 2710 } else if (sin6 != NULL) { 2711 v6dst = sin6->sin6_addr; 2712 dstport = sin6->sin6_port; 2713 flowinfo = sin6->sin6_flowinfo; 2714 srcid = sin6->__sin6_src_id; 2715 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 2716 ixa->ixa_scopeid = sin6->sin6_scope_id; 2717 ixa->ixa_flags |= IXAF_SCOPEID_SET; 2718 } else { 2719 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2720 } 2721 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 2722 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 2723 connp->conn_netstack); 2724 } 2725 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 2726 ixa->ixa_flags |= IXAF_IS_IPV4; 2727 else 2728 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2729 } else { 2730 /* Connected case */ 2731 v6dst = connp->conn_faddr_v6; 2732 dstport = connp->conn_fport; 2733 flowinfo = connp->conn_flowinfo; 2734 } 2735 mutex_exit(&connp->conn_lock); 2736 2737 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 2738 if (ipp->ipp_fields & IPPF_ADDR) { 2739 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2740 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2741 v6src = ipp->ipp_addr; 2742 } else { 2743 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2744 v6src = ipp->ipp_addr; 2745 } 2746 } 2747 2748 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 2749 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 2750 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 2751 2752 switch (error) { 2753 case 0: 2754 break; 2755 case EADDRNOTAVAIL: 2756 /* 2757 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2758 * Don't have the application see that errno 2759 */ 2760 error = ENETUNREACH; 2761 goto failed; 2762 case ENETDOWN: 2763 /* 2764 * Have !ipif_addr_ready address; drop packet silently 2765 * until we can get applications to not send until we 2766 * are ready. 2767 */ 2768 error = 0; 2769 goto failed; 2770 case EHOSTUNREACH: 2771 case ENETUNREACH: 2772 if (ixa->ixa_ire != NULL) { 2773 /* 2774 * Let conn_ip_output/ire_send_noroute return 2775 * the error and send any local ICMP error. 2776 */ 2777 error = 0; 2778 break; 2779 } 2780 /* FALLTHRU */ 2781 default: 2782 failed: 2783 freemsg(mp); 2784 UDPS_BUMP_MIB(us, udpOutErrors); 2785 goto done; 2786 } 2787 2788 /* 2789 * We might be going to a different destination than last time, 2790 * thus check that TX allows the communication and compute any 2791 * needed label. 2792 * 2793 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 2794 * don't have to worry about concurrent threads. 2795 */ 2796 if (is_system_labeled()) { 2797 /* Using UDP MLP requires SCM_UCRED from user */ 2798 if (connp->conn_mlp_type != mlptSingle && 2799 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 2800 UDPS_BUMP_MIB(us, udpOutErrors); 2801 error = ECONNREFUSED; 2802 freemsg(mp); 2803 goto done; 2804 } 2805 /* 2806 * Check whether Trusted Solaris policy allows communication 2807 * with this host, and pretend that the destination is 2808 * unreachable if not. 2809 * Compute any needed label and place it in ipp_label_v4/v6. 2810 * 2811 * Later conn_build_hdr_template/conn_prepend_hdr takes 2812 * ipp_label_v4/v6 to form the packet. 2813 * 2814 * Tsol note: We have ipp structure local to this thread so 2815 * no locking is needed. 2816 */ 2817 error = conn_update_label(connp, ixa, &v6dst, ipp); 2818 if (error != 0) { 2819 freemsg(mp); 2820 UDPS_BUMP_MIB(us, udpOutErrors); 2821 goto done; 2822 } 2823 } 2824 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 2825 flowinfo, mp, &error); 2826 if (mp == NULL) { 2827 ASSERT(error != 0); 2828 UDPS_BUMP_MIB(us, udpOutErrors); 2829 goto done; 2830 } 2831 if (ixa->ixa_pktlen > IP_MAXPACKET) { 2832 error = EMSGSIZE; 2833 UDPS_BUMP_MIB(us, udpOutErrors); 2834 freemsg(mp); 2835 goto done; 2836 } 2837 /* We're done. Pass the packet to ip. */ 2838 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2839 2840 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2841 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2842 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2843 2844 error = conn_ip_output(mp, ixa); 2845 /* No udpOutErrors if an error since IP increases its error counter */ 2846 switch (error) { 2847 case 0: 2848 break; 2849 case EWOULDBLOCK: 2850 (void) ixa_check_drain_insert(connp, ixa); 2851 error = 0; 2852 break; 2853 case EADDRNOTAVAIL: 2854 /* 2855 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2856 * Don't have the application see that errno 2857 */ 2858 error = ENETUNREACH; 2859 /* FALLTHRU */ 2860 default: 2861 mutex_enter(&connp->conn_lock); 2862 /* 2863 * Clear the source and v6lastdst so we call ip_attr_connect 2864 * for the next packet and try to pick a better source. 2865 */ 2866 if (connp->conn_mcbc_bind) 2867 connp->conn_saddr_v6 = ipv6_all_zeros; 2868 else 2869 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 2870 connp->conn_v6lastdst = ipv6_all_zeros; 2871 mutex_exit(&connp->conn_lock); 2872 break; 2873 } 2874 done: 2875 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2876 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2877 ixa->ixa_cpid = connp->conn_cpid; 2878 ixa_refrele(ixa); 2879 ip_pkt_free(ipp); 2880 kmem_free(ipp, sizeof (*ipp)); 2881 return (error); 2882 } 2883 2884 /* 2885 * Handle sending an M_DATA for a connected socket. 2886 * Handles both IPv4 and IPv6. 2887 */ 2888 static int 2889 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 2890 { 2891 udp_t *udp = connp->conn_udp; 2892 udp_stack_t *us = udp->udp_us; 2893 int error; 2894 ip_xmit_attr_t *ixa; 2895 2896 /* 2897 * If no other thread is using conn_ixa this just gets a reference to 2898 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 2899 */ 2900 ixa = conn_get_ixa(connp, B_FALSE); 2901 if (ixa == NULL) { 2902 UDPS_BUMP_MIB(us, udpOutErrors); 2903 freemsg(mp); 2904 return (ENOMEM); 2905 } 2906 2907 ASSERT(cr != NULL); 2908 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2909 ixa->ixa_cred = cr; 2910 ixa->ixa_cpid = pid; 2911 2912 mutex_enter(&connp->conn_lock); 2913 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 2914 connp->conn_fport, connp->conn_flowinfo, &error); 2915 2916 if (mp == NULL) { 2917 ASSERT(error != 0); 2918 mutex_exit(&connp->conn_lock); 2919 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2920 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2921 ixa->ixa_cpid = connp->conn_cpid; 2922 ixa_refrele(ixa); 2923 UDPS_BUMP_MIB(us, udpOutErrors); 2924 freemsg(mp); 2925 return (error); 2926 } 2927 2928 /* 2929 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 2930 * safe copy, then we need to fill in any pointers in it. 2931 */ 2932 if (ixa->ixa_ire == NULL) { 2933 in6_addr_t faddr, saddr; 2934 in6_addr_t nexthop; 2935 in_port_t fport; 2936 2937 saddr = connp->conn_saddr_v6; 2938 faddr = connp->conn_faddr_v6; 2939 fport = connp->conn_fport; 2940 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 2941 mutex_exit(&connp->conn_lock); 2942 2943 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 2944 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 2945 IPDF_IPSEC); 2946 switch (error) { 2947 case 0: 2948 break; 2949 case EADDRNOTAVAIL: 2950 /* 2951 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2952 * Don't have the application see that errno 2953 */ 2954 error = ENETUNREACH; 2955 goto failed; 2956 case ENETDOWN: 2957 /* 2958 * Have !ipif_addr_ready address; drop packet silently 2959 * until we can get applications to not send until we 2960 * are ready. 2961 */ 2962 error = 0; 2963 goto failed; 2964 case EHOSTUNREACH: 2965 case ENETUNREACH: 2966 if (ixa->ixa_ire != NULL) { 2967 /* 2968 * Let conn_ip_output/ire_send_noroute return 2969 * the error and send any local ICMP error. 2970 */ 2971 error = 0; 2972 break; 2973 } 2974 /* FALLTHRU */ 2975 default: 2976 failed: 2977 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2978 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2979 ixa->ixa_cpid = connp->conn_cpid; 2980 ixa_refrele(ixa); 2981 freemsg(mp); 2982 UDPS_BUMP_MIB(us, udpOutErrors); 2983 return (error); 2984 } 2985 } else { 2986 /* Done with conn_t */ 2987 mutex_exit(&connp->conn_lock); 2988 } 2989 ASSERT(ixa->ixa_ire != NULL); 2990 2991 /* We're done. Pass the packet to ip. */ 2992 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2993 2994 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2995 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2996 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2997 2998 error = conn_ip_output(mp, ixa); 2999 /* No udpOutErrors if an error since IP increases its error counter */ 3000 switch (error) { 3001 case 0: 3002 break; 3003 case EWOULDBLOCK: 3004 (void) ixa_check_drain_insert(connp, ixa); 3005 error = 0; 3006 break; 3007 case EADDRNOTAVAIL: 3008 /* 3009 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3010 * Don't have the application see that errno 3011 */ 3012 error = ENETUNREACH; 3013 break; 3014 } 3015 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3016 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3017 ixa->ixa_cpid = connp->conn_cpid; 3018 ixa_refrele(ixa); 3019 return (error); 3020 } 3021 3022 /* 3023 * Handle sending an M_DATA to the last destination. 3024 * Handles both IPv4 and IPv6. 3025 * 3026 * NOTE: The caller must hold conn_lock and we drop it here. 3027 */ 3028 static int 3029 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3030 ip_xmit_attr_t *ixa) 3031 { 3032 udp_t *udp = connp->conn_udp; 3033 udp_stack_t *us = udp->udp_us; 3034 int error; 3035 3036 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3037 ASSERT(ixa != NULL); 3038 3039 ASSERT(cr != NULL); 3040 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3041 ixa->ixa_cred = cr; 3042 ixa->ixa_cpid = pid; 3043 3044 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3045 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3046 3047 if (mp == NULL) { 3048 ASSERT(error != 0); 3049 mutex_exit(&connp->conn_lock); 3050 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3051 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3052 ixa->ixa_cpid = connp->conn_cpid; 3053 ixa_refrele(ixa); 3054 UDPS_BUMP_MIB(us, udpOutErrors); 3055 freemsg(mp); 3056 return (error); 3057 } 3058 3059 /* 3060 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3061 * safe copy, then we need to fill in any pointers in it. 3062 */ 3063 if (ixa->ixa_ire == NULL) { 3064 in6_addr_t lastdst, lastsrc; 3065 in6_addr_t nexthop; 3066 in_port_t lastport; 3067 3068 lastsrc = connp->conn_v6lastsrc; 3069 lastdst = connp->conn_v6lastdst; 3070 lastport = connp->conn_lastdstport; 3071 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3072 mutex_exit(&connp->conn_lock); 3073 3074 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3075 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3076 IPDF_VERIFY_DST | IPDF_IPSEC); 3077 switch (error) { 3078 case 0: 3079 break; 3080 case EADDRNOTAVAIL: 3081 /* 3082 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3083 * Don't have the application see that errno 3084 */ 3085 error = ENETUNREACH; 3086 goto failed; 3087 case ENETDOWN: 3088 /* 3089 * Have !ipif_addr_ready address; drop packet silently 3090 * until we can get applications to not send until we 3091 * are ready. 3092 */ 3093 error = 0; 3094 goto failed; 3095 case EHOSTUNREACH: 3096 case ENETUNREACH: 3097 if (ixa->ixa_ire != NULL) { 3098 /* 3099 * Let conn_ip_output/ire_send_noroute return 3100 * the error and send any local ICMP error. 3101 */ 3102 error = 0; 3103 break; 3104 } 3105 /* FALLTHRU */ 3106 default: 3107 failed: 3108 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3109 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3110 ixa->ixa_cpid = connp->conn_cpid; 3111 ixa_refrele(ixa); 3112 freemsg(mp); 3113 UDPS_BUMP_MIB(us, udpOutErrors); 3114 return (error); 3115 } 3116 } else { 3117 /* Done with conn_t */ 3118 mutex_exit(&connp->conn_lock); 3119 } 3120 3121 /* We're done. Pass the packet to ip. */ 3122 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3123 3124 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3125 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3126 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3127 3128 error = conn_ip_output(mp, ixa); 3129 /* No udpOutErrors if an error since IP increases its error counter */ 3130 switch (error) { 3131 case 0: 3132 break; 3133 case EWOULDBLOCK: 3134 (void) ixa_check_drain_insert(connp, ixa); 3135 error = 0; 3136 break; 3137 case EADDRNOTAVAIL: 3138 /* 3139 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3140 * Don't have the application see that errno 3141 */ 3142 error = ENETUNREACH; 3143 /* FALLTHRU */ 3144 default: 3145 mutex_enter(&connp->conn_lock); 3146 /* 3147 * Clear the source and v6lastdst so we call ip_attr_connect 3148 * for the next packet and try to pick a better source. 3149 */ 3150 if (connp->conn_mcbc_bind) 3151 connp->conn_saddr_v6 = ipv6_all_zeros; 3152 else 3153 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3154 connp->conn_v6lastdst = ipv6_all_zeros; 3155 mutex_exit(&connp->conn_lock); 3156 break; 3157 } 3158 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3159 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3160 ixa->ixa_cpid = connp->conn_cpid; 3161 ixa_refrele(ixa); 3162 return (error); 3163 } 3164 3165 3166 /* 3167 * Prepend the header template and then fill in the source and 3168 * flowinfo. The caller needs to handle the destination address since 3169 * it's setting is different if rthdr or source route. 3170 * 3171 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3172 * When it returns NULL it sets errorp. 3173 */ 3174 static mblk_t * 3175 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3176 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3177 { 3178 udp_t *udp = connp->conn_udp; 3179 udp_stack_t *us = udp->udp_us; 3180 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3181 uint_t pktlen; 3182 uint_t alloclen; 3183 uint_t copylen; 3184 uint8_t *iph; 3185 uint_t ip_hdr_length; 3186 udpha_t *udpha; 3187 uint32_t cksum; 3188 ip_pkt_t *ipp; 3189 3190 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3191 3192 /* 3193 * Copy the header template and leave space for an SPI 3194 */ 3195 copylen = connp->conn_ht_iphc_len; 3196 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3197 pktlen = alloclen + msgdsize(mp); 3198 if (pktlen > IP_MAXPACKET) { 3199 freemsg(mp); 3200 *errorp = EMSGSIZE; 3201 return (NULL); 3202 } 3203 ixa->ixa_pktlen = pktlen; 3204 3205 /* check/fix buffer config, setup pointers into it */ 3206 iph = mp->b_rptr - alloclen; 3207 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3208 mblk_t *mp1; 3209 3210 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3211 if (mp1 == NULL) { 3212 freemsg(mp); 3213 *errorp = ENOMEM; 3214 return (NULL); 3215 } 3216 mp1->b_wptr = DB_LIM(mp1); 3217 mp1->b_cont = mp; 3218 mp = mp1; 3219 iph = (mp->b_wptr - alloclen); 3220 } 3221 mp->b_rptr = iph; 3222 bcopy(connp->conn_ht_iphc, iph, copylen); 3223 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3224 3225 ixa->ixa_ip_hdr_length = ip_hdr_length; 3226 udpha = (udpha_t *)(iph + ip_hdr_length); 3227 3228 /* 3229 * Setup header length and prepare for ULP checksum done in IP. 3230 * udp_build_hdr_template has already massaged any routing header 3231 * and placed the result in conn_sum. 3232 * 3233 * We make it easy for IP to include our pseudo header 3234 * by putting our length in uha_checksum. 3235 */ 3236 cksum = pktlen - ip_hdr_length; 3237 udpha->uha_length = htons(cksum); 3238 3239 cksum += connp->conn_sum; 3240 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3241 ASSERT(cksum < 0x10000); 3242 3243 ipp = &connp->conn_xmit_ipp; 3244 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3245 ipha_t *ipha = (ipha_t *)iph; 3246 3247 ipha->ipha_length = htons((uint16_t)pktlen); 3248 3249 /* IP does the checksum if uha_checksum is non-zero */ 3250 if (us->us_do_checksum) 3251 udpha->uha_checksum = htons(cksum); 3252 3253 /* if IP_PKTINFO specified an addres it wins over bind() */ 3254 if ((ipp->ipp_fields & IPPF_ADDR) && 3255 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3256 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3257 ipha->ipha_src = ipp->ipp_addr_v4; 3258 } else { 3259 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3260 } 3261 } else { 3262 ip6_t *ip6h = (ip6_t *)iph; 3263 3264 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3265 udpha->uha_checksum = htons(cksum); 3266 3267 /* if IP_PKTINFO specified an addres it wins over bind() */ 3268 if ((ipp->ipp_fields & IPPF_ADDR) && 3269 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3270 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3271 ip6h->ip6_src = ipp->ipp_addr; 3272 } else { 3273 ip6h->ip6_src = *v6src; 3274 } 3275 ip6h->ip6_vcf = 3276 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3277 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3278 if (ipp->ipp_fields & IPPF_TCLASS) { 3279 /* Overrides the class part of flowinfo */ 3280 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3281 ipp->ipp_tclass); 3282 } 3283 } 3284 3285 /* Insert all-0s SPI now. */ 3286 if (insert_spi) 3287 *((uint32_t *)(udpha + 1)) = 0; 3288 3289 udpha->uha_dst_port = dstport; 3290 return (mp); 3291 } 3292 3293 /* 3294 * Send a T_UDERR_IND in response to an M_DATA 3295 */ 3296 static void 3297 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3298 { 3299 struct sockaddr_storage ss; 3300 sin_t *sin; 3301 sin6_t *sin6; 3302 struct sockaddr *addr; 3303 socklen_t addrlen; 3304 mblk_t *mp1; 3305 3306 mutex_enter(&connp->conn_lock); 3307 /* Initialize addr and addrlen as if they're passed in */ 3308 if (connp->conn_family == AF_INET) { 3309 sin = (sin_t *)&ss; 3310 *sin = sin_null; 3311 sin->sin_family = AF_INET; 3312 sin->sin_port = connp->conn_fport; 3313 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3314 addr = (struct sockaddr *)sin; 3315 addrlen = sizeof (*sin); 3316 } else { 3317 sin6 = (sin6_t *)&ss; 3318 *sin6 = sin6_null; 3319 sin6->sin6_family = AF_INET6; 3320 sin6->sin6_port = connp->conn_fport; 3321 sin6->sin6_flowinfo = connp->conn_flowinfo; 3322 sin6->sin6_addr = connp->conn_faddr_v6; 3323 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3324 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3325 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3326 } else { 3327 sin6->sin6_scope_id = 0; 3328 } 3329 sin6->__sin6_src_id = 0; 3330 addr = (struct sockaddr *)sin6; 3331 addrlen = sizeof (*sin6); 3332 } 3333 mutex_exit(&connp->conn_lock); 3334 3335 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3336 if (mp1 != NULL) 3337 putnext(connp->conn_rq, mp1); 3338 } 3339 3340 /* 3341 * This routine handles all messages passed downstream. It either 3342 * consumes the message or passes it downstream; it never queues a 3343 * a message. 3344 * 3345 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3346 * is valid when we are directly beneath the stream head, and thus sockfs 3347 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3348 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3349 * connected endpoints. 3350 */ 3351 void 3352 udp_wput(queue_t *q, mblk_t *mp) 3353 { 3354 sin6_t *sin6; 3355 sin_t *sin = NULL; 3356 uint_t srcid; 3357 conn_t *connp = Q_TO_CONN(q); 3358 udp_t *udp = connp->conn_udp; 3359 int error = 0; 3360 struct sockaddr *addr = NULL; 3361 socklen_t addrlen; 3362 udp_stack_t *us = udp->udp_us; 3363 struct T_unitdata_req *tudr; 3364 mblk_t *data_mp; 3365 ushort_t ipversion; 3366 cred_t *cr; 3367 pid_t pid; 3368 3369 /* 3370 * We directly handle several cases here: T_UNITDATA_REQ message 3371 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3372 * socket. 3373 */ 3374 switch (DB_TYPE(mp)) { 3375 case M_DATA: 3376 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3377 /* Not connected; address is required */ 3378 UDPS_BUMP_MIB(us, udpOutErrors); 3379 UDP_DBGSTAT(us, udp_data_notconn); 3380 UDP_STAT(us, udp_out_err_notconn); 3381 freemsg(mp); 3382 return; 3383 } 3384 /* 3385 * All Solaris components should pass a db_credp 3386 * for this message, hence we ASSERT. 3387 * On production kernels we return an error to be robust against 3388 * random streams modules sitting on top of us. 3389 */ 3390 cr = msg_getcred(mp, &pid); 3391 ASSERT(cr != NULL); 3392 if (cr == NULL) { 3393 UDPS_BUMP_MIB(us, udpOutErrors); 3394 freemsg(mp); 3395 return; 3396 } 3397 ASSERT(udp->udp_issocket); 3398 UDP_DBGSTAT(us, udp_data_conn); 3399 error = udp_output_connected(connp, mp, cr, pid); 3400 if (error != 0) { 3401 UDP_STAT(us, udp_out_err_output); 3402 if (connp->conn_rq != NULL) 3403 udp_ud_err_connected(connp, (t_scalar_t)error); 3404 #ifdef DEBUG 3405 printf("udp_output_connected returned %d\n", error); 3406 #endif 3407 } 3408 return; 3409 3410 case M_PROTO: 3411 case M_PCPROTO: 3412 tudr = (struct T_unitdata_req *)mp->b_rptr; 3413 if (MBLKL(mp) < sizeof (*tudr) || 3414 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3415 udp_wput_other(q, mp); 3416 return; 3417 } 3418 break; 3419 3420 default: 3421 udp_wput_other(q, mp); 3422 return; 3423 } 3424 3425 /* Handle valid T_UNITDATA_REQ here */ 3426 data_mp = mp->b_cont; 3427 if (data_mp == NULL) { 3428 error = EPROTO; 3429 goto ud_error2; 3430 } 3431 mp->b_cont = NULL; 3432 3433 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3434 error = EADDRNOTAVAIL; 3435 goto ud_error2; 3436 } 3437 3438 /* 3439 * All Solaris components should pass a db_credp 3440 * for this TPI message, hence we should ASSERT. 3441 * However, RPC (svc_clts_ksend) does this odd thing where it 3442 * passes the options from a T_UNITDATA_IND unchanged in a 3443 * T_UNITDATA_REQ. While that is the right thing to do for 3444 * some options, SCM_UCRED being the key one, this also makes it 3445 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3446 */ 3447 cr = msg_getcred(mp, &pid); 3448 if (cr == NULL) { 3449 cr = connp->conn_cred; 3450 pid = connp->conn_cpid; 3451 } 3452 3453 /* 3454 * If a port has not been bound to the stream, fail. 3455 * This is not a problem when sockfs is directly 3456 * above us, because it will ensure that the socket 3457 * is first bound before allowing data to be sent. 3458 */ 3459 if (udp->udp_state == TS_UNBND) { 3460 error = EPROTO; 3461 goto ud_error2; 3462 } 3463 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3464 addrlen = tudr->DEST_length; 3465 3466 switch (connp->conn_family) { 3467 case AF_INET6: 3468 sin6 = (sin6_t *)addr; 3469 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3470 (sin6->sin6_family != AF_INET6)) { 3471 error = EADDRNOTAVAIL; 3472 goto ud_error2; 3473 } 3474 3475 srcid = sin6->__sin6_src_id; 3476 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3477 /* 3478 * Destination is a non-IPv4-compatible IPv6 address. 3479 * Send out an IPv6 format packet. 3480 */ 3481 3482 /* 3483 * If the local address is a mapped address return 3484 * an error. 3485 * It would be possible to send an IPv6 packet but the 3486 * response would never make it back to the application 3487 * since it is bound to a mapped address. 3488 */ 3489 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3490 error = EADDRNOTAVAIL; 3491 goto ud_error2; 3492 } 3493 3494 UDP_DBGSTAT(us, udp_out_ipv6); 3495 3496 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3497 sin6->sin6_addr = ipv6_loopback; 3498 ipversion = IPV6_VERSION; 3499 } else { 3500 if (connp->conn_ipv6_v6only) { 3501 error = EADDRNOTAVAIL; 3502 goto ud_error2; 3503 } 3504 3505 /* 3506 * If the local address is not zero or a mapped address 3507 * return an error. It would be possible to send an 3508 * IPv4 packet but the response would never make it 3509 * back to the application since it is bound to a 3510 * non-mapped address. 3511 */ 3512 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3513 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3514 error = EADDRNOTAVAIL; 3515 goto ud_error2; 3516 } 3517 UDP_DBGSTAT(us, udp_out_mapped); 3518 3519 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3520 V4_PART_OF_V6(sin6->sin6_addr) = 3521 htonl(INADDR_LOOPBACK); 3522 } 3523 ipversion = IPV4_VERSION; 3524 } 3525 3526 if (tudr->OPT_length != 0) { 3527 /* 3528 * If we are connected then the destination needs to be 3529 * the same as the connected one. 3530 */ 3531 if (udp->udp_state == TS_DATA_XFER && 3532 !conn_same_as_last_v6(connp, sin6)) { 3533 error = EISCONN; 3534 goto ud_error2; 3535 } 3536 UDP_STAT(us, udp_out_opt); 3537 error = udp_output_ancillary(connp, NULL, sin6, 3538 data_mp, mp, NULL, cr, pid); 3539 } else { 3540 ip_xmit_attr_t *ixa; 3541 3542 /* 3543 * We have to allocate an ip_xmit_attr_t before we grab 3544 * conn_lock and we need to hold conn_lock once we've 3545 * checked conn_same_as_last_v6 to handle concurrent 3546 * send* calls on a socket. 3547 */ 3548 ixa = conn_get_ixa(connp, B_FALSE); 3549 if (ixa == NULL) { 3550 error = ENOMEM; 3551 goto ud_error2; 3552 } 3553 mutex_enter(&connp->conn_lock); 3554 3555 if (conn_same_as_last_v6(connp, sin6) && 3556 connp->conn_lastsrcid == srcid && 3557 ipsec_outbound_policy_current(ixa)) { 3558 UDP_DBGSTAT(us, udp_out_lastdst); 3559 /* udp_output_lastdst drops conn_lock */ 3560 error = udp_output_lastdst(connp, data_mp, cr, 3561 pid, ixa); 3562 } else { 3563 UDP_DBGSTAT(us, udp_out_diffdst); 3564 /* udp_output_newdst drops conn_lock */ 3565 error = udp_output_newdst(connp, data_mp, NULL, 3566 sin6, ipversion, cr, pid, ixa); 3567 } 3568 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3569 } 3570 if (error == 0) { 3571 freeb(mp); 3572 return; 3573 } 3574 break; 3575 3576 case AF_INET: 3577 sin = (sin_t *)addr; 3578 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3579 (sin->sin_family != AF_INET)) { 3580 error = EADDRNOTAVAIL; 3581 goto ud_error2; 3582 } 3583 UDP_DBGSTAT(us, udp_out_ipv4); 3584 if (sin->sin_addr.s_addr == INADDR_ANY) 3585 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3586 ipversion = IPV4_VERSION; 3587 3588 srcid = 0; 3589 if (tudr->OPT_length != 0) { 3590 /* 3591 * If we are connected then the destination needs to be 3592 * the same as the connected one. 3593 */ 3594 if (udp->udp_state == TS_DATA_XFER && 3595 !conn_same_as_last_v4(connp, sin)) { 3596 error = EISCONN; 3597 goto ud_error2; 3598 } 3599 UDP_STAT(us, udp_out_opt); 3600 error = udp_output_ancillary(connp, sin, NULL, 3601 data_mp, mp, NULL, cr, pid); 3602 } else { 3603 ip_xmit_attr_t *ixa; 3604 3605 /* 3606 * We have to allocate an ip_xmit_attr_t before we grab 3607 * conn_lock and we need to hold conn_lock once we've 3608 * checked conn_same_as_last_v4 to handle concurrent 3609 * send* calls on a socket. 3610 */ 3611 ixa = conn_get_ixa(connp, B_FALSE); 3612 if (ixa == NULL) { 3613 error = ENOMEM; 3614 goto ud_error2; 3615 } 3616 mutex_enter(&connp->conn_lock); 3617 3618 if (conn_same_as_last_v4(connp, sin) && 3619 ipsec_outbound_policy_current(ixa)) { 3620 UDP_DBGSTAT(us, udp_out_lastdst); 3621 /* udp_output_lastdst drops conn_lock */ 3622 error = udp_output_lastdst(connp, data_mp, cr, 3623 pid, ixa); 3624 } else { 3625 UDP_DBGSTAT(us, udp_out_diffdst); 3626 /* udp_output_newdst drops conn_lock */ 3627 error = udp_output_newdst(connp, data_mp, sin, 3628 NULL, ipversion, cr, pid, ixa); 3629 } 3630 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3631 } 3632 if (error == 0) { 3633 freeb(mp); 3634 return; 3635 } 3636 break; 3637 } 3638 UDP_STAT(us, udp_out_err_output); 3639 ASSERT(mp != NULL); 3640 /* mp is freed by the following routine */ 3641 udp_ud_err(q, mp, (t_scalar_t)error); 3642 return; 3643 3644 ud_error2: 3645 UDPS_BUMP_MIB(us, udpOutErrors); 3646 freemsg(data_mp); 3647 UDP_STAT(us, udp_out_err_output); 3648 ASSERT(mp != NULL); 3649 /* mp is freed by the following routine */ 3650 udp_ud_err(q, mp, (t_scalar_t)error); 3651 } 3652 3653 /* 3654 * Handle the case of the IP address, port, flow label being different 3655 * for both IPv4 and IPv6. 3656 * 3657 * NOTE: The caller must hold conn_lock and we drop it here. 3658 */ 3659 static int 3660 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3661 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3662 { 3663 uint_t srcid; 3664 uint32_t flowinfo; 3665 udp_t *udp = connp->conn_udp; 3666 int error = 0; 3667 ip_xmit_attr_t *oldixa; 3668 udp_stack_t *us = udp->udp_us; 3669 in6_addr_t v6src; 3670 in6_addr_t v6dst; 3671 in6_addr_t v6nexthop; 3672 in_port_t dstport; 3673 3674 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3675 ASSERT(ixa != NULL); 3676 /* 3677 * We hold conn_lock across all the use and modifications of 3678 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3679 * stay consistent. 3680 */ 3681 3682 ASSERT(cr != NULL); 3683 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3684 ixa->ixa_cred = cr; 3685 ixa->ixa_cpid = pid; 3686 if (is_system_labeled()) { 3687 /* We need to restart with a label based on the cred */ 3688 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3689 } 3690 3691 /* 3692 * If we are connected then the destination needs to be the 3693 * same as the connected one, which is not the case here since we 3694 * checked for that above. 3695 */ 3696 if (udp->udp_state == TS_DATA_XFER) { 3697 mutex_exit(&connp->conn_lock); 3698 error = EISCONN; 3699 goto ud_error; 3700 } 3701 3702 /* In case previous destination was multicast or multirt */ 3703 ip_attr_newdst(ixa); 3704 3705 /* 3706 * If laddr is unspecified then we look at sin6_src_id. 3707 * We will give precedence to a source address set with IPV6_PKTINFO 3708 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3709 * want ip_attr_connect to select a source (since it can fail) when 3710 * IPV6_PKTINFO is specified. 3711 * If this doesn't result in a source address then we get a source 3712 * from ip_attr_connect() below. 3713 */ 3714 v6src = connp->conn_saddr_v6; 3715 if (sin != NULL) { 3716 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3717 dstport = sin->sin_port; 3718 flowinfo = 0; 3719 srcid = 0; 3720 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3721 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 3722 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3723 connp->conn_netstack); 3724 } 3725 ixa->ixa_flags |= IXAF_IS_IPV4; 3726 } else { 3727 v6dst = sin6->sin6_addr; 3728 dstport = sin6->sin6_port; 3729 flowinfo = sin6->sin6_flowinfo; 3730 srcid = sin6->__sin6_src_id; 3731 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3732 ixa->ixa_scopeid = sin6->sin6_scope_id; 3733 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3734 } else { 3735 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3736 } 3737 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3738 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3739 connp->conn_netstack); 3740 } 3741 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3742 ixa->ixa_flags |= IXAF_IS_IPV4; 3743 else 3744 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3745 } 3746 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3747 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 3748 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 3749 3750 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3751 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3752 v6src = ipp->ipp_addr; 3753 } else { 3754 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3755 v6src = ipp->ipp_addr; 3756 } 3757 } 3758 3759 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 3760 mutex_exit(&connp->conn_lock); 3761 3762 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3763 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3764 switch (error) { 3765 case 0: 3766 break; 3767 case EADDRNOTAVAIL: 3768 /* 3769 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3770 * Don't have the application see that errno 3771 */ 3772 error = ENETUNREACH; 3773 goto failed; 3774 case ENETDOWN: 3775 /* 3776 * Have !ipif_addr_ready address; drop packet silently 3777 * until we can get applications to not send until we 3778 * are ready. 3779 */ 3780 error = 0; 3781 goto failed; 3782 case EHOSTUNREACH: 3783 case ENETUNREACH: 3784 if (ixa->ixa_ire != NULL) { 3785 /* 3786 * Let conn_ip_output/ire_send_noroute return 3787 * the error and send any local ICMP error. 3788 */ 3789 error = 0; 3790 break; 3791 } 3792 /* FALLTHRU */ 3793 failed: 3794 default: 3795 goto ud_error; 3796 } 3797 3798 3799 /* 3800 * Cluster note: we let the cluster hook know that we are sending to a 3801 * new address and/or port. 3802 */ 3803 if (cl_inet_connect2 != NULL) { 3804 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 3805 if (error != 0) { 3806 error = EHOSTUNREACH; 3807 goto ud_error; 3808 } 3809 } 3810 3811 mutex_enter(&connp->conn_lock); 3812 /* 3813 * While we dropped the lock some other thread might have connected 3814 * this socket. If so we bail out with EISCONN to ensure that the 3815 * connecting thread is the one that updates conn_ixa, conn_ht_* 3816 * and conn_*last*. 3817 */ 3818 if (udp->udp_state == TS_DATA_XFER) { 3819 mutex_exit(&connp->conn_lock); 3820 error = EISCONN; 3821 goto ud_error; 3822 } 3823 3824 /* 3825 * We need to rebuild the headers if 3826 * - we are labeling packets (could be different for different 3827 * destinations) 3828 * - we have a source route (or routing header) since we need to 3829 * massage that to get the pseudo-header checksum 3830 * - the IP version is different than the last time 3831 * - a socket option with COA_HEADER_CHANGED has been set which 3832 * set conn_v6lastdst to zero. 3833 * 3834 * Otherwise the prepend function will just update the src, dst, 3835 * dstport, and flow label. 3836 */ 3837 if (is_system_labeled()) { 3838 /* TX MLP requires SCM_UCRED and don't have that here */ 3839 if (connp->conn_mlp_type != mlptSingle) { 3840 mutex_exit(&connp->conn_lock); 3841 error = ECONNREFUSED; 3842 goto ud_error; 3843 } 3844 /* 3845 * Check whether Trusted Solaris policy allows communication 3846 * with this host, and pretend that the destination is 3847 * unreachable if not. 3848 * Compute any needed label and place it in ipp_label_v4/v6. 3849 * 3850 * Later conn_build_hdr_template/conn_prepend_hdr takes 3851 * ipp_label_v4/v6 to form the packet. 3852 * 3853 * Tsol note: Since we hold conn_lock we know no other 3854 * thread manipulates conn_xmit_ipp. 3855 */ 3856 error = conn_update_label(connp, ixa, &v6dst, 3857 &connp->conn_xmit_ipp); 3858 if (error != 0) { 3859 mutex_exit(&connp->conn_lock); 3860 goto ud_error; 3861 } 3862 /* Rebuild the header template */ 3863 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 3864 flowinfo); 3865 if (error != 0) { 3866 mutex_exit(&connp->conn_lock); 3867 goto ud_error; 3868 } 3869 } else if ((connp->conn_xmit_ipp.ipp_fields & 3870 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 3871 ipversion != connp->conn_lastipversion || 3872 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 3873 /* Rebuild the header template */ 3874 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 3875 flowinfo); 3876 if (error != 0) { 3877 mutex_exit(&connp->conn_lock); 3878 goto ud_error; 3879 } 3880 } else { 3881 /* Simply update the destination address if no source route */ 3882 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3883 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 3884 3885 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 3886 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 3887 ipha->ipha_fragment_offset_and_flags |= 3888 IPH_DF_HTONS; 3889 } else { 3890 ipha->ipha_fragment_offset_and_flags &= 3891 ~IPH_DF_HTONS; 3892 } 3893 } else { 3894 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 3895 ip6h->ip6_dst = v6dst; 3896 } 3897 } 3898 3899 /* 3900 * Remember the dst/dstport etc which corresponds to the built header 3901 * template and conn_ixa. 3902 */ 3903 oldixa = conn_replace_ixa(connp, ixa); 3904 connp->conn_v6lastdst = v6dst; 3905 connp->conn_lastipversion = ipversion; 3906 connp->conn_lastdstport = dstport; 3907 connp->conn_lastflowinfo = flowinfo; 3908 connp->conn_lastscopeid = ixa->ixa_scopeid; 3909 connp->conn_lastsrcid = srcid; 3910 /* Also remember a source to use together with lastdst */ 3911 connp->conn_v6lastsrc = v6src; 3912 3913 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 3914 dstport, flowinfo, &error); 3915 3916 /* Done with conn_t */ 3917 mutex_exit(&connp->conn_lock); 3918 ixa_refrele(oldixa); 3919 3920 if (data_mp == NULL) { 3921 ASSERT(error != 0); 3922 goto ud_error; 3923 } 3924 3925 /* We're done. Pass the packet to ip. */ 3926 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3927 3928 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3929 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 3930 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 3931 3932 error = conn_ip_output(data_mp, ixa); 3933 /* No udpOutErrors if an error since IP increases its error counter */ 3934 switch (error) { 3935 case 0: 3936 break; 3937 case EWOULDBLOCK: 3938 (void) ixa_check_drain_insert(connp, ixa); 3939 error = 0; 3940 break; 3941 case EADDRNOTAVAIL: 3942 /* 3943 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3944 * Don't have the application see that errno 3945 */ 3946 error = ENETUNREACH; 3947 /* FALLTHRU */ 3948 default: 3949 mutex_enter(&connp->conn_lock); 3950 /* 3951 * Clear the source and v6lastdst so we call ip_attr_connect 3952 * for the next packet and try to pick a better source. 3953 */ 3954 if (connp->conn_mcbc_bind) 3955 connp->conn_saddr_v6 = ipv6_all_zeros; 3956 else 3957 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3958 connp->conn_v6lastdst = ipv6_all_zeros; 3959 mutex_exit(&connp->conn_lock); 3960 break; 3961 } 3962 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3963 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3964 ixa->ixa_cpid = connp->conn_cpid; 3965 ixa_refrele(ixa); 3966 return (error); 3967 3968 ud_error: 3969 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3970 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3971 ixa->ixa_cpid = connp->conn_cpid; 3972 ixa_refrele(ixa); 3973 3974 freemsg(data_mp); 3975 UDPS_BUMP_MIB(us, udpOutErrors); 3976 UDP_STAT(us, udp_out_err_output); 3977 return (error); 3978 } 3979 3980 /* ARGSUSED */ 3981 static void 3982 udp_wput_fallback(queue_t *wq, mblk_t *mp) 3983 { 3984 #ifdef DEBUG 3985 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 3986 #endif 3987 freemsg(mp); 3988 } 3989 3990 3991 /* 3992 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 3993 */ 3994 static void 3995 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 3996 { 3997 void *data; 3998 mblk_t *datamp = mp->b_cont; 3999 conn_t *connp = Q_TO_CONN(q); 4000 udp_t *udp = connp->conn_udp; 4001 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4002 4003 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4004 cmdp->cb_error = EPROTO; 4005 qreply(q, mp); 4006 return; 4007 } 4008 data = datamp->b_rptr; 4009 4010 mutex_enter(&connp->conn_lock); 4011 switch (cmdp->cb_cmd) { 4012 case TI_GETPEERNAME: 4013 if (udp->udp_state != TS_DATA_XFER) 4014 cmdp->cb_error = ENOTCONN; 4015 else 4016 cmdp->cb_error = conn_getpeername(connp, data, 4017 &cmdp->cb_len); 4018 break; 4019 case TI_GETMYNAME: 4020 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4021 break; 4022 default: 4023 cmdp->cb_error = EINVAL; 4024 break; 4025 } 4026 mutex_exit(&connp->conn_lock); 4027 4028 qreply(q, mp); 4029 } 4030 4031 static void 4032 udp_use_pure_tpi(udp_t *udp) 4033 { 4034 conn_t *connp = udp->udp_connp; 4035 4036 mutex_enter(&connp->conn_lock); 4037 udp->udp_issocket = B_FALSE; 4038 mutex_exit(&connp->conn_lock); 4039 UDP_STAT(udp->udp_us, udp_sock_fallback); 4040 } 4041 4042 static void 4043 udp_wput_other(queue_t *q, mblk_t *mp) 4044 { 4045 uchar_t *rptr = mp->b_rptr; 4046 struct iocblk *iocp; 4047 conn_t *connp = Q_TO_CONN(q); 4048 udp_t *udp = connp->conn_udp; 4049 cred_t *cr; 4050 4051 switch (mp->b_datap->db_type) { 4052 case M_CMD: 4053 udp_wput_cmdblk(q, mp); 4054 return; 4055 4056 case M_PROTO: 4057 case M_PCPROTO: 4058 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4059 /* 4060 * If the message does not contain a PRIM_type, 4061 * throw it away. 4062 */ 4063 freemsg(mp); 4064 return; 4065 } 4066 switch (((t_primp_t)rptr)->type) { 4067 case T_ADDR_REQ: 4068 udp_addr_req(q, mp); 4069 return; 4070 case O_T_BIND_REQ: 4071 case T_BIND_REQ: 4072 udp_tpi_bind(q, mp); 4073 return; 4074 case T_CONN_REQ: 4075 udp_tpi_connect(q, mp); 4076 return; 4077 case T_CAPABILITY_REQ: 4078 udp_capability_req(q, mp); 4079 return; 4080 case T_INFO_REQ: 4081 udp_info_req(q, mp); 4082 return; 4083 case T_UNITDATA_REQ: 4084 /* 4085 * If a T_UNITDATA_REQ gets here, the address must 4086 * be bad. Valid T_UNITDATA_REQs are handled 4087 * in udp_wput. 4088 */ 4089 udp_ud_err(q, mp, EADDRNOTAVAIL); 4090 return; 4091 case T_UNBIND_REQ: 4092 udp_tpi_unbind(q, mp); 4093 return; 4094 case T_SVR4_OPTMGMT_REQ: 4095 /* 4096 * All Solaris components should pass a db_credp 4097 * for this TPI message, hence we ASSERT. 4098 * But in case there is some other M_PROTO that looks 4099 * like a TPI message sent by some other kernel 4100 * component, we check and return an error. 4101 */ 4102 cr = msg_getcred(mp, NULL); 4103 ASSERT(cr != NULL); 4104 if (cr == NULL) { 4105 udp_err_ack(q, mp, TSYSERR, EINVAL); 4106 return; 4107 } 4108 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4109 cr)) { 4110 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4111 } 4112 return; 4113 4114 case T_OPTMGMT_REQ: 4115 /* 4116 * All Solaris components should pass a db_credp 4117 * for this TPI message, hence we ASSERT. 4118 * But in case there is some other M_PROTO that looks 4119 * like a TPI message sent by some other kernel 4120 * component, we check and return an error. 4121 */ 4122 cr = msg_getcred(mp, NULL); 4123 ASSERT(cr != NULL); 4124 if (cr == NULL) { 4125 udp_err_ack(q, mp, TSYSERR, EINVAL); 4126 return; 4127 } 4128 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4129 return; 4130 4131 case T_DISCON_REQ: 4132 udp_tpi_disconnect(q, mp); 4133 return; 4134 4135 /* The following TPI message is not supported by udp. */ 4136 case O_T_CONN_RES: 4137 case T_CONN_RES: 4138 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4139 return; 4140 4141 /* The following 3 TPI requests are illegal for udp. */ 4142 case T_DATA_REQ: 4143 case T_EXDATA_REQ: 4144 case T_ORDREL_REQ: 4145 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4146 return; 4147 default: 4148 break; 4149 } 4150 break; 4151 case M_FLUSH: 4152 if (*rptr & FLUSHW) 4153 flushq(q, FLUSHDATA); 4154 break; 4155 case M_IOCTL: 4156 iocp = (struct iocblk *)mp->b_rptr; 4157 switch (iocp->ioc_cmd) { 4158 case TI_GETPEERNAME: 4159 if (udp->udp_state != TS_DATA_XFER) { 4160 /* 4161 * If a default destination address has not 4162 * been associated with the stream, then we 4163 * don't know the peer's name. 4164 */ 4165 iocp->ioc_error = ENOTCONN; 4166 iocp->ioc_count = 0; 4167 mp->b_datap->db_type = M_IOCACK; 4168 qreply(q, mp); 4169 return; 4170 } 4171 /* FALLTHRU */ 4172 case TI_GETMYNAME: 4173 /* 4174 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4175 * need to copyin the user's strbuf structure. 4176 * Processing will continue in the M_IOCDATA case 4177 * below. 4178 */ 4179 mi_copyin(q, mp, NULL, 4180 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4181 return; 4182 case _SIOCSOCKFALLBACK: 4183 /* 4184 * Either sockmod is about to be popped and the 4185 * socket would now be treated as a plain stream, 4186 * or a module is about to be pushed so we have 4187 * to follow pure TPI semantics. 4188 */ 4189 if (!udp->udp_issocket) { 4190 DB_TYPE(mp) = M_IOCNAK; 4191 iocp->ioc_error = EINVAL; 4192 } else { 4193 udp_use_pure_tpi(udp); 4194 4195 DB_TYPE(mp) = M_IOCACK; 4196 iocp->ioc_error = 0; 4197 } 4198 iocp->ioc_count = 0; 4199 iocp->ioc_rval = 0; 4200 qreply(q, mp); 4201 return; 4202 default: 4203 break; 4204 } 4205 break; 4206 case M_IOCDATA: 4207 udp_wput_iocdata(q, mp); 4208 return; 4209 default: 4210 /* Unrecognized messages are passed through without change. */ 4211 break; 4212 } 4213 ip_wput_nondata(q, mp); 4214 } 4215 4216 /* 4217 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4218 * messages. 4219 */ 4220 static void 4221 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4222 { 4223 mblk_t *mp1; 4224 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4225 STRUCT_HANDLE(strbuf, sb); 4226 uint_t addrlen; 4227 conn_t *connp = Q_TO_CONN(q); 4228 udp_t *udp = connp->conn_udp; 4229 4230 /* Make sure it is one of ours. */ 4231 switch (iocp->ioc_cmd) { 4232 case TI_GETMYNAME: 4233 case TI_GETPEERNAME: 4234 break; 4235 default: 4236 ip_wput_nondata(q, mp); 4237 return; 4238 } 4239 4240 switch (mi_copy_state(q, mp, &mp1)) { 4241 case -1: 4242 return; 4243 case MI_COPY_CASE(MI_COPY_IN, 1): 4244 break; 4245 case MI_COPY_CASE(MI_COPY_OUT, 1): 4246 /* 4247 * The address has been copied out, so now 4248 * copyout the strbuf. 4249 */ 4250 mi_copyout(q, mp); 4251 return; 4252 case MI_COPY_CASE(MI_COPY_OUT, 2): 4253 /* 4254 * The address and strbuf have been copied out. 4255 * We're done, so just acknowledge the original 4256 * M_IOCTL. 4257 */ 4258 mi_copy_done(q, mp, 0); 4259 return; 4260 default: 4261 /* 4262 * Something strange has happened, so acknowledge 4263 * the original M_IOCTL with an EPROTO error. 4264 */ 4265 mi_copy_done(q, mp, EPROTO); 4266 return; 4267 } 4268 4269 /* 4270 * Now we have the strbuf structure for TI_GETMYNAME 4271 * and TI_GETPEERNAME. Next we copyout the requested 4272 * address and then we'll copyout the strbuf. 4273 */ 4274 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4275 4276 if (connp->conn_family == AF_INET) 4277 addrlen = sizeof (sin_t); 4278 else 4279 addrlen = sizeof (sin6_t); 4280 4281 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4282 mi_copy_done(q, mp, EINVAL); 4283 return; 4284 } 4285 4286 switch (iocp->ioc_cmd) { 4287 case TI_GETMYNAME: 4288 break; 4289 case TI_GETPEERNAME: 4290 if (udp->udp_state != TS_DATA_XFER) { 4291 mi_copy_done(q, mp, ENOTCONN); 4292 return; 4293 } 4294 break; 4295 } 4296 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4297 if (!mp1) 4298 return; 4299 4300 STRUCT_FSET(sb, len, addrlen); 4301 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4302 case TI_GETMYNAME: 4303 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4304 &addrlen); 4305 break; 4306 case TI_GETPEERNAME: 4307 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4308 &addrlen); 4309 break; 4310 } 4311 mp1->b_wptr += addrlen; 4312 /* Copy out the address */ 4313 mi_copyout(q, mp); 4314 } 4315 4316 void 4317 udp_ddi_g_init(void) 4318 { 4319 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4320 udp_opt_obj.odb_opt_arr_cnt); 4321 4322 /* 4323 * We want to be informed each time a stack is created or 4324 * destroyed in the kernel, so we can maintain the 4325 * set of udp_stack_t's. 4326 */ 4327 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4328 } 4329 4330 void 4331 udp_ddi_g_destroy(void) 4332 { 4333 netstack_unregister(NS_UDP); 4334 } 4335 4336 #define INET_NAME "ip" 4337 4338 /* 4339 * Initialize the UDP stack instance. 4340 */ 4341 static void * 4342 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4343 { 4344 udp_stack_t *us; 4345 int i; 4346 int error = 0; 4347 major_t major; 4348 size_t arrsz; 4349 4350 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4351 us->us_netstack = ns; 4352 4353 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4354 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4355 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4356 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4357 4358 /* 4359 * The smallest anonymous port in the priviledged port range which UDP 4360 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4361 */ 4362 us->us_min_anonpriv_port = 512; 4363 4364 us->us_bind_fanout_size = udp_bind_fanout_size; 4365 4366 /* Roundup variable that might have been modified in /etc/system */ 4367 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4368 /* Not a power of two. Round up to nearest power of two */ 4369 for (i = 0; i < 31; i++) { 4370 if (us->us_bind_fanout_size < (1 << i)) 4371 break; 4372 } 4373 us->us_bind_fanout_size = 1 << i; 4374 } 4375 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4376 sizeof (udp_fanout_t), KM_SLEEP); 4377 for (i = 0; i < us->us_bind_fanout_size; i++) { 4378 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4379 NULL); 4380 } 4381 4382 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4383 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4384 KM_SLEEP); 4385 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4386 4387 /* Allocate the per netstack stats */ 4388 mutex_enter(&cpu_lock); 4389 us->us_sc_cnt = MAX(ncpus, boot_ncpus); 4390 mutex_exit(&cpu_lock); 4391 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), 4392 KM_SLEEP); 4393 for (i = 0; i < us->us_sc_cnt; i++) { 4394 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4395 KM_SLEEP); 4396 } 4397 4398 us->us_kstat = udp_kstat2_init(stackid); 4399 us->us_mibkp = udp_kstat_init(stackid); 4400 4401 major = mod_name_to_major(INET_NAME); 4402 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4403 ASSERT(error == 0); 4404 return (us); 4405 } 4406 4407 /* 4408 * Free the UDP stack instance. 4409 */ 4410 static void 4411 udp_stack_fini(netstackid_t stackid, void *arg) 4412 { 4413 udp_stack_t *us = (udp_stack_t *)arg; 4414 int i; 4415 4416 for (i = 0; i < us->us_bind_fanout_size; i++) { 4417 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4418 } 4419 4420 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4421 sizeof (udp_fanout_t)); 4422 4423 us->us_bind_fanout = NULL; 4424 4425 for (i = 0; i < us->us_sc_cnt; i++) 4426 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); 4427 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); 4428 4429 kmem_free(us->us_propinfo_tbl, 4430 udp_propinfo_count * sizeof (mod_prop_info_t)); 4431 us->us_propinfo_tbl = NULL; 4432 4433 udp_kstat_fini(stackid, us->us_mibkp); 4434 us->us_mibkp = NULL; 4435 4436 udp_kstat2_fini(stackid, us->us_kstat); 4437 us->us_kstat = NULL; 4438 4439 mutex_destroy(&us->us_epriv_port_lock); 4440 ldi_ident_release(us->us_ldi_ident); 4441 kmem_free(us, sizeof (*us)); 4442 } 4443 4444 static size_t 4445 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4446 { 4447 udp_stack_t *us = udp->udp_us; 4448 4449 /* We add a bit of extra buffering */ 4450 size += size >> 1; 4451 if (size > us->us_max_buf) 4452 size = us->us_max_buf; 4453 4454 udp->udp_rcv_hiwat = size; 4455 return (size); 4456 } 4457 4458 /* 4459 * For the lower queue so that UDP can be a dummy mux. 4460 * Nobody should be sending 4461 * packets up this stream 4462 */ 4463 static void 4464 udp_lrput(queue_t *q, mblk_t *mp) 4465 { 4466 switch (mp->b_datap->db_type) { 4467 case M_FLUSH: 4468 /* Turn around */ 4469 if (*mp->b_rptr & FLUSHW) { 4470 *mp->b_rptr &= ~FLUSHR; 4471 qreply(q, mp); 4472 return; 4473 } 4474 break; 4475 } 4476 freemsg(mp); 4477 } 4478 4479 /* 4480 * For the lower queue so that UDP can be a dummy mux. 4481 * Nobody should be sending packets down this stream. 4482 */ 4483 /* ARGSUSED */ 4484 void 4485 udp_lwput(queue_t *q, mblk_t *mp) 4486 { 4487 freemsg(mp); 4488 } 4489 4490 /* 4491 * When a CPU is added, we need to allocate the per CPU stats struct. 4492 */ 4493 void 4494 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) 4495 { 4496 int i; 4497 4498 if (cpu_seqid < us->us_sc_cnt) 4499 return; 4500 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { 4501 ASSERT(us->us_sc[i] == NULL); 4502 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4503 KM_SLEEP); 4504 } 4505 membar_producer(); 4506 us->us_sc_cnt = cpu_seqid + 1; 4507 } 4508 4509 /* 4510 * Below routines for UDP socket module. 4511 */ 4512 4513 static conn_t * 4514 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4515 { 4516 udp_t *udp; 4517 conn_t *connp; 4518 zoneid_t zoneid; 4519 netstack_t *ns; 4520 udp_stack_t *us; 4521 int len; 4522 4523 ASSERT(errorp != NULL); 4524 4525 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4526 return (NULL); 4527 4528 ns = netstack_find_by_cred(credp); 4529 ASSERT(ns != NULL); 4530 us = ns->netstack_udp; 4531 ASSERT(us != NULL); 4532 4533 /* 4534 * For exclusive stacks we set the zoneid to zero 4535 * to make UDP operate as if in the global zone. 4536 */ 4537 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4538 zoneid = GLOBAL_ZONEID; 4539 else 4540 zoneid = crgetzoneid(credp); 4541 4542 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4543 4544 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4545 if (connp == NULL) { 4546 netstack_rele(ns); 4547 *errorp = ENOMEM; 4548 return (NULL); 4549 } 4550 udp = connp->conn_udp; 4551 4552 /* 4553 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4554 * done by netstack_find_by_cred() 4555 */ 4556 netstack_rele(ns); 4557 4558 /* 4559 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4560 * need to lock anything. 4561 */ 4562 ASSERT(connp->conn_proto == IPPROTO_UDP); 4563 ASSERT(connp->conn_udp == udp); 4564 ASSERT(udp->udp_connp == connp); 4565 4566 /* Set the initial state of the stream and the privilege status. */ 4567 udp->udp_state = TS_UNBND; 4568 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4569 if (isv6) { 4570 connp->conn_family = AF_INET6; 4571 connp->conn_ipversion = IPV6_VERSION; 4572 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4573 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4574 len = sizeof (ip6_t) + UDPH_SIZE; 4575 } else { 4576 connp->conn_family = AF_INET; 4577 connp->conn_ipversion = IPV4_VERSION; 4578 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4579 connp->conn_default_ttl = us->us_ipv4_ttl; 4580 len = sizeof (ipha_t) + UDPH_SIZE; 4581 } 4582 4583 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4584 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4585 4586 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4587 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4588 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4589 connp->conn_ixa->ixa_zoneid = zoneid; 4590 4591 connp->conn_zoneid = zoneid; 4592 4593 /* 4594 * If the caller has the process-wide flag set, then default to MAC 4595 * exempt mode. This allows read-down to unlabeled hosts. 4596 */ 4597 if (getpflags(NET_MAC_AWARE, credp) != 0) 4598 connp->conn_mac_mode = CONN_MAC_AWARE; 4599 4600 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4601 4602 udp->udp_us = us; 4603 4604 connp->conn_rcvbuf = us->us_recv_hiwat; 4605 connp->conn_sndbuf = us->us_xmit_hiwat; 4606 connp->conn_sndlowat = us->us_xmit_lowat; 4607 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4608 4609 connp->conn_wroff = len + us->us_wroff_extra; 4610 connp->conn_so_type = SOCK_DGRAM; 4611 4612 connp->conn_recv = udp_input; 4613 connp->conn_recvicmp = udp_icmp_input; 4614 crhold(credp); 4615 connp->conn_cred = credp; 4616 connp->conn_cpid = curproc->p_pid; 4617 connp->conn_open_time = ddi_get_lbolt64(); 4618 /* Cache things in ixa without an extra refhold */ 4619 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 4620 connp->conn_ixa->ixa_cred = connp->conn_cred; 4621 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 4622 if (is_system_labeled()) 4623 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 4624 4625 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 4626 4627 if (us->us_pmtu_discovery) 4628 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 4629 4630 return (connp); 4631 } 4632 4633 sock_lower_handle_t 4634 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 4635 uint_t *smodep, int *errorp, int flags, cred_t *credp) 4636 { 4637 udp_t *udp = NULL; 4638 udp_stack_t *us; 4639 conn_t *connp; 4640 boolean_t isv6; 4641 4642 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 4643 (proto != 0 && proto != IPPROTO_UDP)) { 4644 *errorp = EPROTONOSUPPORT; 4645 return (NULL); 4646 } 4647 4648 if (family == AF_INET6) 4649 isv6 = B_TRUE; 4650 else 4651 isv6 = B_FALSE; 4652 4653 connp = udp_do_open(credp, isv6, flags, errorp); 4654 if (connp == NULL) 4655 return (NULL); 4656 4657 udp = connp->conn_udp; 4658 ASSERT(udp != NULL); 4659 us = udp->udp_us; 4660 ASSERT(us != NULL); 4661 4662 udp->udp_issocket = B_TRUE; 4663 connp->conn_flags |= IPCL_NONSTR; 4664 4665 /* 4666 * Set flow control 4667 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4668 * need to lock anything. 4669 */ 4670 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 4671 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 4672 4673 connp->conn_flow_cntrld = B_FALSE; 4674 4675 mutex_enter(&connp->conn_lock); 4676 connp->conn_state_flags &= ~CONN_INCIPIENT; 4677 mutex_exit(&connp->conn_lock); 4678 4679 *errorp = 0; 4680 *smodep = SM_ATOMIC; 4681 *sock_downcalls = &sock_udp_downcalls; 4682 return ((sock_lower_handle_t)connp); 4683 } 4684 4685 /* ARGSUSED3 */ 4686 void 4687 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 4688 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 4689 { 4690 conn_t *connp = (conn_t *)proto_handle; 4691 struct sock_proto_props sopp; 4692 4693 /* All Solaris components should pass a cred for this operation. */ 4694 ASSERT(cr != NULL); 4695 4696 connp->conn_upcalls = sock_upcalls; 4697 connp->conn_upper_handle = sock_handle; 4698 4699 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 4700 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 4701 sopp.sopp_wroff = connp->conn_wroff; 4702 sopp.sopp_maxblk = INFPSZ; 4703 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 4704 sopp.sopp_rxlowat = connp->conn_rcvlowat; 4705 sopp.sopp_maxaddrlen = sizeof (sin6_t); 4706 sopp.sopp_maxpsz = 4707 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 4708 UDP_MAXPACKET_IPV6; 4709 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 4710 udp_mod_info.mi_minpsz; 4711 4712 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 4713 &sopp); 4714 } 4715 4716 static void 4717 udp_do_close(conn_t *connp) 4718 { 4719 udp_t *udp; 4720 4721 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 4722 udp = connp->conn_udp; 4723 4724 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 4725 /* 4726 * Running in cluster mode - register unbind information 4727 */ 4728 if (connp->conn_ipversion == IPV4_VERSION) { 4729 (*cl_inet_unbind)( 4730 connp->conn_netstack->netstack_stackid, 4731 IPPROTO_UDP, AF_INET, 4732 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 4733 (in_port_t)connp->conn_lport, NULL); 4734 } else { 4735 (*cl_inet_unbind)( 4736 connp->conn_netstack->netstack_stackid, 4737 IPPROTO_UDP, AF_INET6, 4738 (uint8_t *)&(connp->conn_laddr_v6), 4739 (in_port_t)connp->conn_lport, NULL); 4740 } 4741 } 4742 4743 udp_bind_hash_remove(udp, B_FALSE); 4744 4745 ip_quiesce_conn(connp); 4746 4747 if (!IPCL_IS_NONSTR(connp)) { 4748 ASSERT(connp->conn_wq != NULL); 4749 ASSERT(connp->conn_rq != NULL); 4750 qprocsoff(connp->conn_rq); 4751 } 4752 4753 udp_close_free(connp); 4754 4755 /* 4756 * Now we are truly single threaded on this stream, and can 4757 * delete the things hanging off the connp, and finally the connp. 4758 * We removed this connp from the fanout list, it cannot be 4759 * accessed thru the fanouts, and we already waited for the 4760 * conn_ref to drop to 0. We are already in close, so 4761 * there cannot be any other thread from the top. qprocsoff 4762 * has completed, and service has completed or won't run in 4763 * future. 4764 */ 4765 ASSERT(connp->conn_ref == 1); 4766 4767 if (!IPCL_IS_NONSTR(connp)) { 4768 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 4769 } else { 4770 ip_free_helper_stream(connp); 4771 } 4772 4773 connp->conn_ref--; 4774 ipcl_conn_destroy(connp); 4775 } 4776 4777 /* ARGSUSED1 */ 4778 int 4779 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 4780 { 4781 conn_t *connp = (conn_t *)proto_handle; 4782 4783 /* All Solaris components should pass a cred for this operation. */ 4784 ASSERT(cr != NULL); 4785 4786 udp_do_close(connp); 4787 return (0); 4788 } 4789 4790 static int 4791 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 4792 boolean_t bind_to_req_port_only) 4793 { 4794 sin_t *sin; 4795 sin6_t *sin6; 4796 udp_t *udp = connp->conn_udp; 4797 int error = 0; 4798 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 4799 in_port_t port; /* Host byte order */ 4800 in_port_t requested_port; /* Host byte order */ 4801 int count; 4802 ipaddr_t v4src; /* Set if AF_INET */ 4803 in6_addr_t v6src; 4804 int loopmax; 4805 udp_fanout_t *udpf; 4806 in_port_t lport; /* Network byte order */ 4807 uint_t scopeid = 0; 4808 zoneid_t zoneid = IPCL_ZONEID(connp); 4809 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4810 boolean_t is_inaddr_any; 4811 mlp_type_t addrtype, mlptype; 4812 udp_stack_t *us = udp->udp_us; 4813 4814 switch (len) { 4815 case sizeof (sin_t): /* Complete IPv4 address */ 4816 sin = (sin_t *)sa; 4817 4818 if (sin == NULL || !OK_32PTR((char *)sin)) 4819 return (EINVAL); 4820 4821 if (connp->conn_family != AF_INET || 4822 sin->sin_family != AF_INET) { 4823 return (EAFNOSUPPORT); 4824 } 4825 v4src = sin->sin_addr.s_addr; 4826 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 4827 if (v4src != INADDR_ANY) { 4828 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 4829 B_TRUE); 4830 } 4831 port = ntohs(sin->sin_port); 4832 break; 4833 4834 case sizeof (sin6_t): /* complete IPv6 address */ 4835 sin6 = (sin6_t *)sa; 4836 4837 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 4838 return (EINVAL); 4839 4840 if (connp->conn_family != AF_INET6 || 4841 sin6->sin6_family != AF_INET6) { 4842 return (EAFNOSUPPORT); 4843 } 4844 v6src = sin6->sin6_addr; 4845 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 4846 if (connp->conn_ipv6_v6only) 4847 return (EADDRNOTAVAIL); 4848 4849 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 4850 if (v4src != INADDR_ANY) { 4851 laddr_type = ip_laddr_verify_v4(v4src, 4852 zoneid, ipst, B_FALSE); 4853 } 4854 } else { 4855 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4856 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 4857 scopeid = sin6->sin6_scope_id; 4858 laddr_type = ip_laddr_verify_v6(&v6src, 4859 zoneid, ipst, B_TRUE, scopeid); 4860 } 4861 } 4862 port = ntohs(sin6->sin6_port); 4863 break; 4864 4865 default: /* Invalid request */ 4866 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 4867 "udp_bind: bad ADDR_length length %u", len); 4868 return (-TBADADDR); 4869 } 4870 4871 /* Is the local address a valid unicast, multicast, or broadcast? */ 4872 if (laddr_type == IPVL_BAD) 4873 return (EADDRNOTAVAIL); 4874 4875 requested_port = port; 4876 4877 if (requested_port == 0 || !bind_to_req_port_only) 4878 bind_to_req_port_only = B_FALSE; 4879 else /* T_BIND_REQ and requested_port != 0 */ 4880 bind_to_req_port_only = B_TRUE; 4881 4882 if (requested_port == 0) { 4883 /* 4884 * If the application passed in zero for the port number, it 4885 * doesn't care which port number we bind to. Get one in the 4886 * valid range. 4887 */ 4888 if (connp->conn_anon_priv_bind) { 4889 port = udp_get_next_priv_port(udp); 4890 } else { 4891 port = udp_update_next_port(udp, 4892 us->us_next_port_to_try, B_TRUE); 4893 } 4894 } else { 4895 /* 4896 * If the port is in the well-known privileged range, 4897 * make sure the caller was privileged. 4898 */ 4899 int i; 4900 boolean_t priv = B_FALSE; 4901 4902 if (port < us->us_smallest_nonpriv_port) { 4903 priv = B_TRUE; 4904 } else { 4905 for (i = 0; i < us->us_num_epriv_ports; i++) { 4906 if (port == us->us_epriv_ports[i]) { 4907 priv = B_TRUE; 4908 break; 4909 } 4910 } 4911 } 4912 4913 if (priv) { 4914 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 4915 return (-TACCES); 4916 } 4917 } 4918 4919 if (port == 0) 4920 return (-TNOADDR); 4921 4922 /* 4923 * The state must be TS_UNBND. TPI mandates that users must send 4924 * TPI primitives only 1 at a time and wait for the response before 4925 * sending the next primitive. 4926 */ 4927 mutex_enter(&connp->conn_lock); 4928 if (udp->udp_state != TS_UNBND) { 4929 mutex_exit(&connp->conn_lock); 4930 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 4931 "udp_bind: bad state, %u", udp->udp_state); 4932 return (-TOUTSTATE); 4933 } 4934 /* 4935 * Copy the source address into our udp structure. This address 4936 * may still be zero; if so, IP will fill in the correct address 4937 * each time an outbound packet is passed to it. Since the udp is 4938 * not yet in the bind hash list, we don't grab the uf_lock to 4939 * change conn_ipversion 4940 */ 4941 if (connp->conn_family == AF_INET) { 4942 ASSERT(sin != NULL); 4943 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 4944 } else { 4945 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 4946 /* 4947 * no need to hold the uf_lock to set the conn_ipversion 4948 * since we are not yet in the fanout list 4949 */ 4950 connp->conn_ipversion = IPV4_VERSION; 4951 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4952 } else { 4953 connp->conn_ipversion = IPV6_VERSION; 4954 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4955 } 4956 } 4957 4958 /* 4959 * If conn_reuseaddr is not set, then we have to make sure that 4960 * the IP address and port number the application requested 4961 * (or we selected for the application) is not being used by 4962 * another stream. If another stream is already using the 4963 * requested IP address and port, the behavior depends on 4964 * "bind_to_req_port_only". If set the bind fails; otherwise we 4965 * search for any an unused port to bind to the stream. 4966 * 4967 * As per the BSD semantics, as modified by the Deering multicast 4968 * changes, if udp_reuseaddr is set, then we allow multiple binds 4969 * to the same port independent of the local IP address. 4970 * 4971 * This is slightly different than in SunOS 4.X which did not 4972 * support IP multicast. Note that the change implemented by the 4973 * Deering multicast code effects all binds - not only binding 4974 * to IP multicast addresses. 4975 * 4976 * Note that when binding to port zero we ignore SO_REUSEADDR in 4977 * order to guarantee a unique port. 4978 */ 4979 4980 count = 0; 4981 if (connp->conn_anon_priv_bind) { 4982 /* 4983 * loopmax = (IPPORT_RESERVED-1) - 4984 * us->us_min_anonpriv_port + 1 4985 */ 4986 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 4987 } else { 4988 loopmax = us->us_largest_anon_port - 4989 us->us_smallest_anon_port + 1; 4990 } 4991 4992 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 4993 4994 for (;;) { 4995 udp_t *udp1; 4996 boolean_t found_exclbind = B_FALSE; 4997 conn_t *connp1; 4998 4999 /* 5000 * Walk through the list of udp streams bound to 5001 * requested port with the same IP address. 5002 */ 5003 lport = htons(port); 5004 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5005 us->us_bind_fanout_size)]; 5006 mutex_enter(&udpf->uf_lock); 5007 for (udp1 = udpf->uf_udp; udp1 != NULL; 5008 udp1 = udp1->udp_bind_hash) { 5009 connp1 = udp1->udp_connp; 5010 5011 if (lport != connp1->conn_lport) 5012 continue; 5013 5014 /* 5015 * On a labeled system, we must treat bindings to ports 5016 * on shared IP addresses by sockets with MAC exemption 5017 * privilege as being in all zones, as there's 5018 * otherwise no way to identify the right receiver. 5019 */ 5020 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5021 continue; 5022 5023 /* 5024 * If UDP_EXCLBIND is set for either the bound or 5025 * binding endpoint, the semantics of bind 5026 * is changed according to the following chart. 5027 * 5028 * spec = specified address (v4 or v6) 5029 * unspec = unspecified address (v4 or v6) 5030 * A = specified addresses are different for endpoints 5031 * 5032 * bound bind to allowed? 5033 * ------------------------------------- 5034 * unspec unspec no 5035 * unspec spec no 5036 * spec unspec no 5037 * spec spec yes if A 5038 * 5039 * For labeled systems, SO_MAC_EXEMPT behaves the same 5040 * as UDP_EXCLBIND, except that zoneid is ignored. 5041 */ 5042 if (connp1->conn_exclbind || connp->conn_exclbind || 5043 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5044 if (V6_OR_V4_INADDR_ANY( 5045 connp1->conn_bound_addr_v6) || 5046 is_inaddr_any || 5047 IN6_ARE_ADDR_EQUAL( 5048 &connp1->conn_bound_addr_v6, 5049 &v6src)) { 5050 found_exclbind = B_TRUE; 5051 break; 5052 } 5053 continue; 5054 } 5055 5056 /* 5057 * Check ipversion to allow IPv4 and IPv6 sockets to 5058 * have disjoint port number spaces. 5059 */ 5060 if (connp->conn_ipversion != connp1->conn_ipversion) { 5061 5062 /* 5063 * On the first time through the loop, if the 5064 * the user intentionally specified a 5065 * particular port number, then ignore any 5066 * bindings of the other protocol that may 5067 * conflict. This allows the user to bind IPv6 5068 * alone and get both v4 and v6, or bind both 5069 * both and get each seperately. On subsequent 5070 * times through the loop, we're checking a 5071 * port that we chose (not the user) and thus 5072 * we do not allow casual duplicate bindings. 5073 */ 5074 if (count == 0 && requested_port != 0) 5075 continue; 5076 } 5077 5078 /* 5079 * No difference depending on SO_REUSEADDR. 5080 * 5081 * If existing port is bound to a 5082 * non-wildcard IP address and 5083 * the requesting stream is bound to 5084 * a distinct different IP addresses 5085 * (non-wildcard, also), keep going. 5086 */ 5087 if (!is_inaddr_any && 5088 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5089 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5090 &v6src)) { 5091 continue; 5092 } 5093 break; 5094 } 5095 5096 if (!found_exclbind && 5097 (connp->conn_reuseaddr && requested_port != 0)) { 5098 break; 5099 } 5100 5101 if (udp1 == NULL) { 5102 /* 5103 * No other stream has this IP address 5104 * and port number. We can use it. 5105 */ 5106 break; 5107 } 5108 mutex_exit(&udpf->uf_lock); 5109 if (bind_to_req_port_only) { 5110 /* 5111 * We get here only when requested port 5112 * is bound (and only first of the for() 5113 * loop iteration). 5114 * 5115 * The semantics of this bind request 5116 * require it to fail so we return from 5117 * the routine (and exit the loop). 5118 * 5119 */ 5120 mutex_exit(&connp->conn_lock); 5121 return (-TADDRBUSY); 5122 } 5123 5124 if (connp->conn_anon_priv_bind) { 5125 port = udp_get_next_priv_port(udp); 5126 } else { 5127 if ((count == 0) && (requested_port != 0)) { 5128 /* 5129 * If the application wants us to find 5130 * a port, get one to start with. Set 5131 * requested_port to 0, so that we will 5132 * update us->us_next_port_to_try below. 5133 */ 5134 port = udp_update_next_port(udp, 5135 us->us_next_port_to_try, B_TRUE); 5136 requested_port = 0; 5137 } else { 5138 port = udp_update_next_port(udp, port + 1, 5139 B_FALSE); 5140 } 5141 } 5142 5143 if (port == 0 || ++count >= loopmax) { 5144 /* 5145 * We've tried every possible port number and 5146 * there are none available, so send an error 5147 * to the user. 5148 */ 5149 mutex_exit(&connp->conn_lock); 5150 return (-TNOADDR); 5151 } 5152 } 5153 5154 /* 5155 * Copy the source address into our udp structure. This address 5156 * may still be zero; if so, ip_attr_connect will fill in the correct 5157 * address when a packet is about to be sent. 5158 * If we are binding to a broadcast or multicast address then 5159 * we just set the conn_bound_addr since we don't want to use 5160 * that as the source address when sending. 5161 */ 5162 connp->conn_bound_addr_v6 = v6src; 5163 connp->conn_laddr_v6 = v6src; 5164 if (scopeid != 0) { 5165 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5166 connp->conn_ixa->ixa_scopeid = scopeid; 5167 connp->conn_incoming_ifindex = scopeid; 5168 } else { 5169 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5170 connp->conn_incoming_ifindex = connp->conn_bound_if; 5171 } 5172 5173 switch (laddr_type) { 5174 case IPVL_UNICAST_UP: 5175 case IPVL_UNICAST_DOWN: 5176 connp->conn_saddr_v6 = v6src; 5177 connp->conn_mcbc_bind = B_FALSE; 5178 break; 5179 case IPVL_MCAST: 5180 case IPVL_BCAST: 5181 /* ip_set_destination will pick a source address later */ 5182 connp->conn_saddr_v6 = ipv6_all_zeros; 5183 connp->conn_mcbc_bind = B_TRUE; 5184 break; 5185 } 5186 5187 /* Any errors after this point should use late_error */ 5188 connp->conn_lport = lport; 5189 5190 /* 5191 * Now reset the next anonymous port if the application requested 5192 * an anonymous port, or we handed out the next anonymous port. 5193 */ 5194 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5195 us->us_next_port_to_try = port + 1; 5196 } 5197 5198 /* Initialize the T_BIND_ACK. */ 5199 if (connp->conn_family == AF_INET) { 5200 sin->sin_port = connp->conn_lport; 5201 } else { 5202 sin6->sin6_port = connp->conn_lport; 5203 } 5204 udp->udp_state = TS_IDLE; 5205 udp_bind_hash_insert(udpf, udp); 5206 mutex_exit(&udpf->uf_lock); 5207 mutex_exit(&connp->conn_lock); 5208 5209 if (cl_inet_bind) { 5210 /* 5211 * Running in cluster mode - register bind information 5212 */ 5213 if (connp->conn_ipversion == IPV4_VERSION) { 5214 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5215 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5216 (in_port_t)connp->conn_lport, NULL); 5217 } else { 5218 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5219 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5220 (in_port_t)connp->conn_lport, NULL); 5221 } 5222 } 5223 5224 mutex_enter(&connp->conn_lock); 5225 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5226 if (is_system_labeled() && (!connp->conn_anon_port || 5227 connp->conn_anon_mlp)) { 5228 uint16_t mlpport; 5229 zone_t *zone; 5230 5231 zone = crgetzone(cr); 5232 connp->conn_mlp_type = 5233 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5234 mlptSingle; 5235 addrtype = tsol_mlp_addr_type( 5236 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5237 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5238 if (addrtype == mlptSingle) { 5239 error = -TNOADDR; 5240 mutex_exit(&connp->conn_lock); 5241 goto late_error; 5242 } 5243 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5244 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5245 addrtype); 5246 5247 /* 5248 * It is a coding error to attempt to bind an MLP port 5249 * without first setting SOL_SOCKET/SCM_UCRED. 5250 */ 5251 if (mlptype != mlptSingle && 5252 connp->conn_mlp_type == mlptSingle) { 5253 error = EINVAL; 5254 mutex_exit(&connp->conn_lock); 5255 goto late_error; 5256 } 5257 5258 /* 5259 * It is an access violation to attempt to bind an MLP port 5260 * without NET_BINDMLP privilege. 5261 */ 5262 if (mlptype != mlptSingle && 5263 secpolicy_net_bindmlp(cr) != 0) { 5264 if (connp->conn_debug) { 5265 (void) strlog(UDP_MOD_ID, 0, 1, 5266 SL_ERROR|SL_TRACE, 5267 "udp_bind: no priv for multilevel port %d", 5268 mlpport); 5269 } 5270 error = -TACCES; 5271 mutex_exit(&connp->conn_lock); 5272 goto late_error; 5273 } 5274 5275 /* 5276 * If we're specifically binding a shared IP address and the 5277 * port is MLP on shared addresses, then check to see if this 5278 * zone actually owns the MLP. Reject if not. 5279 */ 5280 if (mlptype == mlptShared && addrtype == mlptShared) { 5281 /* 5282 * No need to handle exclusive-stack zones since 5283 * ALL_ZONES only applies to the shared stack. 5284 */ 5285 zoneid_t mlpzone; 5286 5287 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5288 htons(mlpport)); 5289 if (connp->conn_zoneid != mlpzone) { 5290 if (connp->conn_debug) { 5291 (void) strlog(UDP_MOD_ID, 0, 1, 5292 SL_ERROR|SL_TRACE, 5293 "udp_bind: attempt to bind port " 5294 "%d on shared addr in zone %d " 5295 "(should be %d)", 5296 mlpport, connp->conn_zoneid, 5297 mlpzone); 5298 } 5299 error = -TACCES; 5300 mutex_exit(&connp->conn_lock); 5301 goto late_error; 5302 } 5303 } 5304 if (connp->conn_anon_port) { 5305 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5306 port, B_TRUE); 5307 if (error != 0) { 5308 if (connp->conn_debug) { 5309 (void) strlog(UDP_MOD_ID, 0, 1, 5310 SL_ERROR|SL_TRACE, 5311 "udp_bind: cannot establish anon " 5312 "MLP for port %d", port); 5313 } 5314 error = -TACCES; 5315 mutex_exit(&connp->conn_lock); 5316 goto late_error; 5317 } 5318 } 5319 connp->conn_mlp_type = mlptype; 5320 } 5321 5322 /* 5323 * We create an initial header template here to make a subsequent 5324 * sendto have a starting point. Since conn_last_dst is zero the 5325 * first sendto will always follow the 'dst changed' code path. 5326 * Note that we defer massaging options and the related checksum 5327 * adjustment until we have a destination address. 5328 */ 5329 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5330 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5331 if (error != 0) { 5332 mutex_exit(&connp->conn_lock); 5333 goto late_error; 5334 } 5335 /* Just in case */ 5336 connp->conn_faddr_v6 = ipv6_all_zeros; 5337 connp->conn_fport = 0; 5338 connp->conn_v6lastdst = ipv6_all_zeros; 5339 mutex_exit(&connp->conn_lock); 5340 5341 error = ip_laddr_fanout_insert(connp); 5342 if (error != 0) 5343 goto late_error; 5344 5345 /* Bind succeeded */ 5346 return (0); 5347 5348 late_error: 5349 /* We had already picked the port number, and then the bind failed */ 5350 mutex_enter(&connp->conn_lock); 5351 udpf = &us->us_bind_fanout[ 5352 UDP_BIND_HASH(connp->conn_lport, 5353 us->us_bind_fanout_size)]; 5354 mutex_enter(&udpf->uf_lock); 5355 connp->conn_saddr_v6 = ipv6_all_zeros; 5356 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5357 connp->conn_laddr_v6 = ipv6_all_zeros; 5358 if (scopeid != 0) { 5359 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5360 connp->conn_incoming_ifindex = connp->conn_bound_if; 5361 } 5362 udp->udp_state = TS_UNBND; 5363 udp_bind_hash_remove(udp, B_TRUE); 5364 connp->conn_lport = 0; 5365 mutex_exit(&udpf->uf_lock); 5366 connp->conn_anon_port = B_FALSE; 5367 connp->conn_mlp_type = mlptSingle; 5368 5369 connp->conn_v6lastdst = ipv6_all_zeros; 5370 5371 /* Restore the header that was built above - different source address */ 5372 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5373 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5374 mutex_exit(&connp->conn_lock); 5375 return (error); 5376 } 5377 5378 int 5379 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5380 socklen_t len, cred_t *cr) 5381 { 5382 int error; 5383 conn_t *connp; 5384 5385 /* All Solaris components should pass a cred for this operation. */ 5386 ASSERT(cr != NULL); 5387 5388 connp = (conn_t *)proto_handle; 5389 5390 if (sa == NULL) 5391 error = udp_do_unbind(connp); 5392 else 5393 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5394 5395 if (error < 0) { 5396 if (error == -TOUTSTATE) 5397 error = EINVAL; 5398 else 5399 error = proto_tlitosyserr(-error); 5400 } 5401 5402 return (error); 5403 } 5404 5405 static int 5406 udp_implicit_bind(conn_t *connp, cred_t *cr) 5407 { 5408 sin6_t sin6addr; 5409 sin_t *sin; 5410 sin6_t *sin6; 5411 socklen_t len; 5412 int error; 5413 5414 /* All Solaris components should pass a cred for this operation. */ 5415 ASSERT(cr != NULL); 5416 5417 if (connp->conn_family == AF_INET) { 5418 len = sizeof (struct sockaddr_in); 5419 sin = (sin_t *)&sin6addr; 5420 *sin = sin_null; 5421 sin->sin_family = AF_INET; 5422 sin->sin_addr.s_addr = INADDR_ANY; 5423 } else { 5424 ASSERT(connp->conn_family == AF_INET6); 5425 len = sizeof (sin6_t); 5426 sin6 = (sin6_t *)&sin6addr; 5427 *sin6 = sin6_null; 5428 sin6->sin6_family = AF_INET6; 5429 V6_SET_ZERO(sin6->sin6_addr); 5430 } 5431 5432 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5433 cr, B_FALSE); 5434 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5435 } 5436 5437 /* 5438 * This routine removes a port number association from a stream. It 5439 * is called by udp_unbind and udp_tpi_unbind. 5440 */ 5441 static int 5442 udp_do_unbind(conn_t *connp) 5443 { 5444 udp_t *udp = connp->conn_udp; 5445 udp_fanout_t *udpf; 5446 udp_stack_t *us = udp->udp_us; 5447 5448 if (cl_inet_unbind != NULL) { 5449 /* 5450 * Running in cluster mode - register unbind information 5451 */ 5452 if (connp->conn_ipversion == IPV4_VERSION) { 5453 (*cl_inet_unbind)( 5454 connp->conn_netstack->netstack_stackid, 5455 IPPROTO_UDP, AF_INET, 5456 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5457 (in_port_t)connp->conn_lport, NULL); 5458 } else { 5459 (*cl_inet_unbind)( 5460 connp->conn_netstack->netstack_stackid, 5461 IPPROTO_UDP, AF_INET6, 5462 (uint8_t *)&(connp->conn_laddr_v6), 5463 (in_port_t)connp->conn_lport, NULL); 5464 } 5465 } 5466 5467 mutex_enter(&connp->conn_lock); 5468 /* If a bind has not been done, we can't unbind. */ 5469 if (udp->udp_state == TS_UNBND) { 5470 mutex_exit(&connp->conn_lock); 5471 return (-TOUTSTATE); 5472 } 5473 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5474 us->us_bind_fanout_size)]; 5475 mutex_enter(&udpf->uf_lock); 5476 udp_bind_hash_remove(udp, B_TRUE); 5477 connp->conn_saddr_v6 = ipv6_all_zeros; 5478 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5479 connp->conn_laddr_v6 = ipv6_all_zeros; 5480 connp->conn_mcbc_bind = B_FALSE; 5481 connp->conn_lport = 0; 5482 /* In case we were also connected */ 5483 connp->conn_faddr_v6 = ipv6_all_zeros; 5484 connp->conn_fport = 0; 5485 mutex_exit(&udpf->uf_lock); 5486 5487 connp->conn_v6lastdst = ipv6_all_zeros; 5488 udp->udp_state = TS_UNBND; 5489 5490 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5491 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5492 mutex_exit(&connp->conn_lock); 5493 5494 ip_unbind(connp); 5495 5496 return (0); 5497 } 5498 5499 /* 5500 * It associates a default destination address with the stream. 5501 */ 5502 static int 5503 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5504 cred_t *cr, pid_t pid) 5505 { 5506 sin6_t *sin6; 5507 sin_t *sin; 5508 in6_addr_t v6dst; 5509 ipaddr_t v4dst; 5510 uint16_t dstport; 5511 uint32_t flowinfo; 5512 udp_fanout_t *udpf; 5513 udp_t *udp, *udp1; 5514 ushort_t ipversion; 5515 udp_stack_t *us; 5516 int error; 5517 conn_t *connp1; 5518 ip_xmit_attr_t *ixa; 5519 ip_xmit_attr_t *oldixa; 5520 uint_t scopeid = 0; 5521 uint_t srcid = 0; 5522 in6_addr_t v6src = connp->conn_saddr_v6; 5523 5524 udp = connp->conn_udp; 5525 us = udp->udp_us; 5526 5527 /* 5528 * Address has been verified by the caller 5529 */ 5530 switch (len) { 5531 default: 5532 /* 5533 * Should never happen 5534 */ 5535 return (EINVAL); 5536 5537 case sizeof (sin_t): 5538 sin = (sin_t *)sa; 5539 v4dst = sin->sin_addr.s_addr; 5540 dstport = sin->sin_port; 5541 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5542 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5543 ipversion = IPV4_VERSION; 5544 break; 5545 5546 case sizeof (sin6_t): 5547 sin6 = (sin6_t *)sa; 5548 v6dst = sin6->sin6_addr; 5549 dstport = sin6->sin6_port; 5550 srcid = sin6->__sin6_src_id; 5551 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5552 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5553 connp->conn_netstack); 5554 } 5555 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 5556 if (connp->conn_ipv6_v6only) 5557 return (EADDRNOTAVAIL); 5558 5559 /* 5560 * Destination adress is mapped IPv6 address. 5561 * Source bound address should be unspecified or 5562 * IPv6 mapped address as well. 5563 */ 5564 if (!IN6_IS_ADDR_UNSPECIFIED( 5565 &connp->conn_bound_addr_v6) && 5566 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5567 return (EADDRNOTAVAIL); 5568 } 5569 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5570 ipversion = IPV4_VERSION; 5571 flowinfo = 0; 5572 } else { 5573 ipversion = IPV6_VERSION; 5574 flowinfo = sin6->sin6_flowinfo; 5575 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5576 scopeid = sin6->sin6_scope_id; 5577 } 5578 break; 5579 } 5580 5581 if (dstport == 0) 5582 return (-TBADADDR); 5583 5584 /* 5585 * If there is a different thread using conn_ixa then we get a new 5586 * copy and cut the old one loose from conn_ixa. Otherwise we use 5587 * conn_ixa and prevent any other thread from using/changing it. 5588 * Once connect() is done other threads can use conn_ixa since the 5589 * refcnt will be back at one. 5590 * We defer updating conn_ixa until later to handle any concurrent 5591 * conn_ixa_cleanup thread. 5592 */ 5593 ixa = conn_get_ixa(connp, B_FALSE); 5594 if (ixa == NULL) 5595 return (ENOMEM); 5596 5597 mutex_enter(&connp->conn_lock); 5598 /* 5599 * This udp_t must have bound to a port already before doing a connect. 5600 * Reject if a connect is in progress (we drop conn_lock during 5601 * udp_do_connect). 5602 */ 5603 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5604 mutex_exit(&connp->conn_lock); 5605 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5606 "udp_connect: bad state, %u", udp->udp_state); 5607 ixa_refrele(ixa); 5608 return (-TOUTSTATE); 5609 } 5610 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 5611 5612 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5613 us->us_bind_fanout_size)]; 5614 5615 mutex_enter(&udpf->uf_lock); 5616 if (udp->udp_state == TS_DATA_XFER) { 5617 /* Already connected - clear out state */ 5618 if (connp->conn_mcbc_bind) 5619 connp->conn_saddr_v6 = ipv6_all_zeros; 5620 else 5621 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5622 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5623 connp->conn_faddr_v6 = ipv6_all_zeros; 5624 connp->conn_fport = 0; 5625 udp->udp_state = TS_IDLE; 5626 } 5627 5628 connp->conn_fport = dstport; 5629 connp->conn_ipversion = ipversion; 5630 if (ipversion == IPV4_VERSION) { 5631 /* 5632 * Interpret a zero destination to mean loopback. 5633 * Update the T_CONN_REQ (sin/sin6) since it is used to 5634 * generate the T_CONN_CON. 5635 */ 5636 if (v4dst == INADDR_ANY) { 5637 v4dst = htonl(INADDR_LOOPBACK); 5638 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5639 if (connp->conn_family == AF_INET) { 5640 sin->sin_addr.s_addr = v4dst; 5641 } else { 5642 sin6->sin6_addr = v6dst; 5643 } 5644 } 5645 connp->conn_faddr_v6 = v6dst; 5646 connp->conn_flowinfo = 0; 5647 } else { 5648 ASSERT(connp->conn_ipversion == IPV6_VERSION); 5649 /* 5650 * Interpret a zero destination to mean loopback. 5651 * Update the T_CONN_REQ (sin/sin6) since it is used to 5652 * generate the T_CONN_CON. 5653 */ 5654 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 5655 v6dst = ipv6_loopback; 5656 sin6->sin6_addr = v6dst; 5657 } 5658 connp->conn_faddr_v6 = v6dst; 5659 connp->conn_flowinfo = flowinfo; 5660 } 5661 mutex_exit(&udpf->uf_lock); 5662 5663 /* 5664 * We update our cred/cpid based on the caller of connect 5665 */ 5666 if (connp->conn_cred != cr) { 5667 crhold(cr); 5668 crfree(connp->conn_cred); 5669 connp->conn_cred = cr; 5670 } 5671 connp->conn_cpid = pid; 5672 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 5673 ixa->ixa_cred = cr; 5674 ixa->ixa_cpid = pid; 5675 if (is_system_labeled()) { 5676 /* We need to restart with a label based on the cred */ 5677 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 5678 } 5679 5680 if (scopeid != 0) { 5681 ixa->ixa_flags |= IXAF_SCOPEID_SET; 5682 ixa->ixa_scopeid = scopeid; 5683 connp->conn_incoming_ifindex = scopeid; 5684 } else { 5685 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5686 connp->conn_incoming_ifindex = connp->conn_bound_if; 5687 } 5688 /* 5689 * conn_connect will drop conn_lock and reacquire it. 5690 * To prevent a send* from messing with this udp_t while the lock 5691 * is dropped we set udp_state and clear conn_v6lastdst. 5692 * That will make all send* fail with EISCONN. 5693 */ 5694 connp->conn_v6lastdst = ipv6_all_zeros; 5695 udp->udp_state = TS_WCON_CREQ; 5696 5697 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 5698 mutex_exit(&connp->conn_lock); 5699 if (error != 0) 5700 goto connect_failed; 5701 5702 /* 5703 * The addresses have been verified. Time to insert in 5704 * the correct fanout list. 5705 */ 5706 error = ipcl_conn_insert(connp); 5707 if (error != 0) 5708 goto connect_failed; 5709 5710 mutex_enter(&connp->conn_lock); 5711 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5712 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5713 if (error != 0) { 5714 mutex_exit(&connp->conn_lock); 5715 goto connect_failed; 5716 } 5717 5718 udp->udp_state = TS_DATA_XFER; 5719 /* Record this as the "last" send even though we haven't sent any */ 5720 connp->conn_v6lastdst = connp->conn_faddr_v6; 5721 connp->conn_lastipversion = connp->conn_ipversion; 5722 connp->conn_lastdstport = connp->conn_fport; 5723 connp->conn_lastflowinfo = connp->conn_flowinfo; 5724 connp->conn_lastscopeid = scopeid; 5725 connp->conn_lastsrcid = srcid; 5726 /* Also remember a source to use together with lastdst */ 5727 connp->conn_v6lastsrc = v6src; 5728 5729 oldixa = conn_replace_ixa(connp, ixa); 5730 mutex_exit(&connp->conn_lock); 5731 ixa_refrele(oldixa); 5732 5733 /* 5734 * We've picked a source address above. Now we can 5735 * verify that the src/port/dst/port is unique for all 5736 * connections in TS_DATA_XFER, skipping ourselves. 5737 */ 5738 mutex_enter(&udpf->uf_lock); 5739 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 5740 if (udp1->udp_state != TS_DATA_XFER) 5741 continue; 5742 5743 if (udp1 == udp) 5744 continue; 5745 5746 connp1 = udp1->udp_connp; 5747 if (connp->conn_lport != connp1->conn_lport || 5748 connp->conn_ipversion != connp1->conn_ipversion || 5749 dstport != connp1->conn_fport || 5750 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 5751 &connp1->conn_laddr_v6) || 5752 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 5753 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 5754 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 5755 continue; 5756 mutex_exit(&udpf->uf_lock); 5757 error = -TBADADDR; 5758 goto connect_failed; 5759 } 5760 if (cl_inet_connect2 != NULL) { 5761 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 5762 if (error != 0) { 5763 mutex_exit(&udpf->uf_lock); 5764 error = -TBADADDR; 5765 goto connect_failed; 5766 } 5767 } 5768 mutex_exit(&udpf->uf_lock); 5769 5770 ixa_refrele(ixa); 5771 return (0); 5772 5773 connect_failed: 5774 if (ixa != NULL) 5775 ixa_refrele(ixa); 5776 mutex_enter(&connp->conn_lock); 5777 mutex_enter(&udpf->uf_lock); 5778 udp->udp_state = TS_IDLE; 5779 connp->conn_faddr_v6 = ipv6_all_zeros; 5780 connp->conn_fport = 0; 5781 /* In case the source address was set above */ 5782 if (connp->conn_mcbc_bind) 5783 connp->conn_saddr_v6 = ipv6_all_zeros; 5784 else 5785 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5786 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5787 mutex_exit(&udpf->uf_lock); 5788 5789 connp->conn_v6lastdst = ipv6_all_zeros; 5790 connp->conn_flowinfo = 0; 5791 5792 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5793 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5794 mutex_exit(&connp->conn_lock); 5795 return (error); 5796 } 5797 5798 static int 5799 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5800 socklen_t len, sock_connid_t *id, cred_t *cr) 5801 { 5802 conn_t *connp = (conn_t *)proto_handle; 5803 udp_t *udp = connp->conn_udp; 5804 int error; 5805 boolean_t did_bind = B_FALSE; 5806 pid_t pid = curproc->p_pid; 5807 5808 /* All Solaris components should pass a cred for this operation. */ 5809 ASSERT(cr != NULL); 5810 5811 if (sa == NULL) { 5812 /* 5813 * Disconnect 5814 * Make sure we are connected 5815 */ 5816 if (udp->udp_state != TS_DATA_XFER) 5817 return (EINVAL); 5818 5819 error = udp_disconnect(connp); 5820 return (error); 5821 } 5822 5823 error = proto_verify_ip_addr(connp->conn_family, sa, len); 5824 if (error != 0) 5825 goto done; 5826 5827 /* do an implicit bind if necessary */ 5828 if (udp->udp_state == TS_UNBND) { 5829 error = udp_implicit_bind(connp, cr); 5830 /* 5831 * We could be racing with an actual bind, in which case 5832 * we would see EPROTO. We cross our fingers and try 5833 * to connect. 5834 */ 5835 if (!(error == 0 || error == EPROTO)) 5836 goto done; 5837 did_bind = B_TRUE; 5838 } 5839 /* 5840 * set SO_DGRAM_ERRIND 5841 */ 5842 connp->conn_dgram_errind = B_TRUE; 5843 5844 error = udp_do_connect(connp, sa, len, cr, pid); 5845 5846 if (error != 0 && did_bind) { 5847 int unbind_err; 5848 5849 unbind_err = udp_do_unbind(connp); 5850 ASSERT(unbind_err == 0); 5851 } 5852 5853 if (error == 0) { 5854 *id = 0; 5855 (*connp->conn_upcalls->su_connected) 5856 (connp->conn_upper_handle, 0, NULL, -1); 5857 } else if (error < 0) { 5858 error = proto_tlitosyserr(-error); 5859 } 5860 5861 done: 5862 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 5863 /* 5864 * No need to hold locks to set state 5865 * after connect failure socket state is undefined 5866 * We set the state only to imitate old sockfs behavior 5867 */ 5868 udp->udp_state = TS_IDLE; 5869 } 5870 return (error); 5871 } 5872 5873 int 5874 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 5875 cred_t *cr) 5876 { 5877 sin6_t *sin6; 5878 sin_t *sin = NULL; 5879 uint_t srcid; 5880 conn_t *connp = (conn_t *)proto_handle; 5881 udp_t *udp = connp->conn_udp; 5882 int error = 0; 5883 udp_stack_t *us = udp->udp_us; 5884 ushort_t ipversion; 5885 pid_t pid = curproc->p_pid; 5886 ip_xmit_attr_t *ixa; 5887 5888 ASSERT(DB_TYPE(mp) == M_DATA); 5889 5890 /* All Solaris components should pass a cred for this operation. */ 5891 ASSERT(cr != NULL); 5892 5893 /* do an implicit bind if necessary */ 5894 if (udp->udp_state == TS_UNBND) { 5895 error = udp_implicit_bind(connp, cr); 5896 /* 5897 * We could be racing with an actual bind, in which case 5898 * we would see EPROTO. We cross our fingers and try 5899 * to connect. 5900 */ 5901 if (!(error == 0 || error == EPROTO)) { 5902 freemsg(mp); 5903 return (error); 5904 } 5905 } 5906 5907 /* Connected? */ 5908 if (msg->msg_name == NULL) { 5909 if (udp->udp_state != TS_DATA_XFER) { 5910 UDPS_BUMP_MIB(us, udpOutErrors); 5911 return (EDESTADDRREQ); 5912 } 5913 if (msg->msg_controllen != 0) { 5914 error = udp_output_ancillary(connp, NULL, NULL, mp, 5915 NULL, msg, cr, pid); 5916 } else { 5917 error = udp_output_connected(connp, mp, cr, pid); 5918 } 5919 if (us->us_sendto_ignerr) 5920 return (0); 5921 else 5922 return (error); 5923 } 5924 if (udp->udp_state == TS_DATA_XFER) { 5925 UDPS_BUMP_MIB(us, udpOutErrors); 5926 return (EISCONN); 5927 } 5928 error = proto_verify_ip_addr(connp->conn_family, 5929 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 5930 if (error != 0) { 5931 UDPS_BUMP_MIB(us, udpOutErrors); 5932 return (error); 5933 } 5934 switch (connp->conn_family) { 5935 case AF_INET6: 5936 sin6 = (sin6_t *)msg->msg_name; 5937 5938 srcid = sin6->__sin6_src_id; 5939 5940 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5941 /* 5942 * Destination is a non-IPv4-compatible IPv6 address. 5943 * Send out an IPv6 format packet. 5944 */ 5945 5946 /* 5947 * If the local address is a mapped address return 5948 * an error. 5949 * It would be possible to send an IPv6 packet but the 5950 * response would never make it back to the application 5951 * since it is bound to a mapped address. 5952 */ 5953 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 5954 UDPS_BUMP_MIB(us, udpOutErrors); 5955 return (EADDRNOTAVAIL); 5956 } 5957 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 5958 sin6->sin6_addr = ipv6_loopback; 5959 ipversion = IPV6_VERSION; 5960 } else { 5961 if (connp->conn_ipv6_v6only) { 5962 UDPS_BUMP_MIB(us, udpOutErrors); 5963 return (EADDRNOTAVAIL); 5964 } 5965 5966 /* 5967 * If the local address is not zero or a mapped address 5968 * return an error. It would be possible to send an 5969 * IPv4 packet but the response would never make it 5970 * back to the application since it is bound to a 5971 * non-mapped address. 5972 */ 5973 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 5974 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 5975 UDPS_BUMP_MIB(us, udpOutErrors); 5976 return (EADDRNOTAVAIL); 5977 } 5978 5979 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 5980 V4_PART_OF_V6(sin6->sin6_addr) = 5981 htonl(INADDR_LOOPBACK); 5982 } 5983 ipversion = IPV4_VERSION; 5984 } 5985 5986 /* 5987 * We have to allocate an ip_xmit_attr_t before we grab 5988 * conn_lock and we need to hold conn_lock once we've check 5989 * conn_same_as_last_v6 to handle concurrent send* calls on a 5990 * socket. 5991 */ 5992 if (msg->msg_controllen == 0) { 5993 ixa = conn_get_ixa(connp, B_FALSE); 5994 if (ixa == NULL) { 5995 UDPS_BUMP_MIB(us, udpOutErrors); 5996 return (ENOMEM); 5997 } 5998 } else { 5999 ixa = NULL; 6000 } 6001 mutex_enter(&connp->conn_lock); 6002 if (udp->udp_delayed_error != 0) { 6003 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6004 6005 error = udp->udp_delayed_error; 6006 udp->udp_delayed_error = 0; 6007 6008 /* Compare IP address, port, and family */ 6009 6010 if (sin6->sin6_port == sin2->sin6_port && 6011 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6012 &sin2->sin6_addr) && 6013 sin6->sin6_family == sin2->sin6_family) { 6014 mutex_exit(&connp->conn_lock); 6015 UDPS_BUMP_MIB(us, udpOutErrors); 6016 if (ixa != NULL) 6017 ixa_refrele(ixa); 6018 return (error); 6019 } 6020 } 6021 6022 if (msg->msg_controllen != 0) { 6023 mutex_exit(&connp->conn_lock); 6024 ASSERT(ixa == NULL); 6025 error = udp_output_ancillary(connp, NULL, sin6, mp, 6026 NULL, msg, cr, pid); 6027 } else if (conn_same_as_last_v6(connp, sin6) && 6028 connp->conn_lastsrcid == srcid && 6029 ipsec_outbound_policy_current(ixa)) { 6030 /* udp_output_lastdst drops conn_lock */ 6031 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6032 } else { 6033 /* udp_output_newdst drops conn_lock */ 6034 error = udp_output_newdst(connp, mp, NULL, sin6, 6035 ipversion, cr, pid, ixa); 6036 } 6037 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6038 if (us->us_sendto_ignerr) 6039 return (0); 6040 else 6041 return (error); 6042 case AF_INET: 6043 sin = (sin_t *)msg->msg_name; 6044 6045 ipversion = IPV4_VERSION; 6046 6047 if (sin->sin_addr.s_addr == INADDR_ANY) 6048 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6049 6050 /* 6051 * We have to allocate an ip_xmit_attr_t before we grab 6052 * conn_lock and we need to hold conn_lock once we've check 6053 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6054 */ 6055 if (msg->msg_controllen == 0) { 6056 ixa = conn_get_ixa(connp, B_FALSE); 6057 if (ixa == NULL) { 6058 UDPS_BUMP_MIB(us, udpOutErrors); 6059 return (ENOMEM); 6060 } 6061 } else { 6062 ixa = NULL; 6063 } 6064 mutex_enter(&connp->conn_lock); 6065 if (udp->udp_delayed_error != 0) { 6066 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6067 6068 error = udp->udp_delayed_error; 6069 udp->udp_delayed_error = 0; 6070 6071 /* Compare IP address and port */ 6072 6073 if (sin->sin_port == sin2->sin_port && 6074 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6075 mutex_exit(&connp->conn_lock); 6076 UDPS_BUMP_MIB(us, udpOutErrors); 6077 if (ixa != NULL) 6078 ixa_refrele(ixa); 6079 return (error); 6080 } 6081 } 6082 if (msg->msg_controllen != 0) { 6083 mutex_exit(&connp->conn_lock); 6084 ASSERT(ixa == NULL); 6085 error = udp_output_ancillary(connp, sin, NULL, mp, 6086 NULL, msg, cr, pid); 6087 } else if (conn_same_as_last_v4(connp, sin) && 6088 ipsec_outbound_policy_current(ixa)) { 6089 /* udp_output_lastdst drops conn_lock */ 6090 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6091 } else { 6092 /* udp_output_newdst drops conn_lock */ 6093 error = udp_output_newdst(connp, mp, sin, NULL, 6094 ipversion, cr, pid, ixa); 6095 } 6096 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6097 if (us->us_sendto_ignerr) 6098 return (0); 6099 else 6100 return (error); 6101 default: 6102 return (EINVAL); 6103 } 6104 } 6105 6106 int 6107 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6108 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6109 sock_quiesce_arg_t *arg) 6110 { 6111 conn_t *connp = (conn_t *)proto_handle; 6112 udp_t *udp; 6113 struct T_capability_ack tca; 6114 struct sockaddr_in6 laddr, faddr; 6115 socklen_t laddrlen, faddrlen; 6116 short opts; 6117 struct stroptions *stropt; 6118 mblk_t *mp, *stropt_mp; 6119 int error; 6120 6121 udp = connp->conn_udp; 6122 6123 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6124 6125 /* 6126 * setup the fallback stream that was allocated 6127 */ 6128 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6129 connp->conn_minor_arena = WR(q)->q_ptr; 6130 6131 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6132 6133 WR(q)->q_qinfo = &udp_winit; 6134 6135 connp->conn_rq = RD(q); 6136 connp->conn_wq = WR(q); 6137 6138 /* Notify stream head about options before sending up data */ 6139 stropt_mp->b_datap->db_type = M_SETOPTS; 6140 stropt_mp->b_wptr += sizeof (*stropt); 6141 stropt = (struct stroptions *)stropt_mp->b_rptr; 6142 stropt->so_flags = SO_WROFF | SO_HIWAT; 6143 stropt->so_wroff = connp->conn_wroff; 6144 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6145 putnext(RD(q), stropt_mp); 6146 6147 /* 6148 * Free the helper stream 6149 */ 6150 ip_free_helper_stream(connp); 6151 6152 if (!issocket) 6153 udp_use_pure_tpi(udp); 6154 6155 /* 6156 * Collect the information needed to sync with the sonode 6157 */ 6158 udp_do_capability_ack(udp, &tca, TC1_INFO); 6159 6160 laddrlen = faddrlen = sizeof (sin6_t); 6161 (void) udp_getsockname((sock_lower_handle_t)connp, 6162 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6163 error = udp_getpeername((sock_lower_handle_t)connp, 6164 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6165 if (error != 0) 6166 faddrlen = 0; 6167 6168 opts = 0; 6169 if (connp->conn_dgram_errind) 6170 opts |= SO_DGRAM_ERRIND; 6171 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6172 opts |= SO_DONTROUTE; 6173 6174 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6175 (struct sockaddr *)&laddr, laddrlen, 6176 (struct sockaddr *)&faddr, faddrlen, opts); 6177 6178 mutex_enter(&udp->udp_recv_lock); 6179 /* 6180 * Attempts to send data up during fallback will result in it being 6181 * queued in udp_t. First push up the datagrams obtained from the 6182 * socket, then any packets queued in udp_t. 6183 */ 6184 if (mp != NULL) { 6185 mp->b_next = udp->udp_fallback_queue_head; 6186 udp->udp_fallback_queue_head = mp; 6187 } 6188 while (udp->udp_fallback_queue_head != NULL) { 6189 mp = udp->udp_fallback_queue_head; 6190 udp->udp_fallback_queue_head = mp->b_next; 6191 mutex_exit(&udp->udp_recv_lock); 6192 mp->b_next = NULL; 6193 putnext(RD(q), mp); 6194 mutex_enter(&udp->udp_recv_lock); 6195 } 6196 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6197 /* 6198 * No longer a streams less socket 6199 */ 6200 mutex_enter(&connp->conn_lock); 6201 connp->conn_flags &= ~IPCL_NONSTR; 6202 mutex_exit(&connp->conn_lock); 6203 6204 mutex_exit(&udp->udp_recv_lock); 6205 6206 ASSERT(connp->conn_ref >= 1); 6207 6208 return (0); 6209 } 6210 6211 /* ARGSUSED3 */ 6212 int 6213 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6214 socklen_t *salenp, cred_t *cr) 6215 { 6216 conn_t *connp = (conn_t *)proto_handle; 6217 udp_t *udp = connp->conn_udp; 6218 int error; 6219 6220 /* All Solaris components should pass a cred for this operation. */ 6221 ASSERT(cr != NULL); 6222 6223 mutex_enter(&connp->conn_lock); 6224 if (udp->udp_state != TS_DATA_XFER) 6225 error = ENOTCONN; 6226 else 6227 error = conn_getpeername(connp, sa, salenp); 6228 mutex_exit(&connp->conn_lock); 6229 return (error); 6230 } 6231 6232 /* ARGSUSED3 */ 6233 int 6234 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6235 socklen_t *salenp, cred_t *cr) 6236 { 6237 conn_t *connp = (conn_t *)proto_handle; 6238 int error; 6239 6240 /* All Solaris components should pass a cred for this operation. */ 6241 ASSERT(cr != NULL); 6242 6243 mutex_enter(&connp->conn_lock); 6244 error = conn_getsockname(connp, sa, salenp); 6245 mutex_exit(&connp->conn_lock); 6246 return (error); 6247 } 6248 6249 int 6250 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6251 void *optvalp, socklen_t *optlen, cred_t *cr) 6252 { 6253 conn_t *connp = (conn_t *)proto_handle; 6254 int error; 6255 t_uscalar_t max_optbuf_len; 6256 void *optvalp_buf; 6257 int len; 6258 6259 /* All Solaris components should pass a cred for this operation. */ 6260 ASSERT(cr != NULL); 6261 6262 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6263 udp_opt_obj.odb_opt_des_arr, 6264 udp_opt_obj.odb_opt_arr_cnt, 6265 B_FALSE, B_TRUE, cr); 6266 if (error != 0) { 6267 if (error < 0) 6268 error = proto_tlitosyserr(-error); 6269 return (error); 6270 } 6271 6272 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6273 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6274 if (len == -1) { 6275 kmem_free(optvalp_buf, max_optbuf_len); 6276 return (EINVAL); 6277 } 6278 6279 /* 6280 * update optlen and copy option value 6281 */ 6282 t_uscalar_t size = MIN(len, *optlen); 6283 6284 bcopy(optvalp_buf, optvalp, size); 6285 bcopy(&size, optlen, sizeof (size)); 6286 6287 kmem_free(optvalp_buf, max_optbuf_len); 6288 return (0); 6289 } 6290 6291 int 6292 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6293 const void *optvalp, socklen_t optlen, cred_t *cr) 6294 { 6295 conn_t *connp = (conn_t *)proto_handle; 6296 int error; 6297 6298 /* All Solaris components should pass a cred for this operation. */ 6299 ASSERT(cr != NULL); 6300 6301 error = proto_opt_check(level, option_name, optlen, NULL, 6302 udp_opt_obj.odb_opt_des_arr, 6303 udp_opt_obj.odb_opt_arr_cnt, 6304 B_TRUE, B_FALSE, cr); 6305 6306 if (error != 0) { 6307 if (error < 0) 6308 error = proto_tlitosyserr(-error); 6309 return (error); 6310 } 6311 6312 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6313 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6314 NULL, cr); 6315 6316 ASSERT(error >= 0); 6317 6318 return (error); 6319 } 6320 6321 void 6322 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6323 { 6324 conn_t *connp = (conn_t *)proto_handle; 6325 udp_t *udp = connp->conn_udp; 6326 6327 mutex_enter(&udp->udp_recv_lock); 6328 connp->conn_flow_cntrld = B_FALSE; 6329 mutex_exit(&udp->udp_recv_lock); 6330 } 6331 6332 /* ARGSUSED2 */ 6333 int 6334 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6335 { 6336 conn_t *connp = (conn_t *)proto_handle; 6337 6338 /* All Solaris components should pass a cred for this operation. */ 6339 ASSERT(cr != NULL); 6340 6341 /* shut down the send side */ 6342 if (how != SHUT_RD) 6343 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6344 SOCK_OPCTL_SHUT_SEND, 0); 6345 /* shut down the recv side */ 6346 if (how != SHUT_WR) 6347 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6348 SOCK_OPCTL_SHUT_RECV, 0); 6349 return (0); 6350 } 6351 6352 int 6353 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6354 int mode, int32_t *rvalp, cred_t *cr) 6355 { 6356 conn_t *connp = (conn_t *)proto_handle; 6357 int error; 6358 6359 /* All Solaris components should pass a cred for this operation. */ 6360 ASSERT(cr != NULL); 6361 6362 /* 6363 * If we don't have a helper stream then create one. 6364 * ip_create_helper_stream takes care of locking the conn_t, 6365 * so this check for NULL is just a performance optimization. 6366 */ 6367 if (connp->conn_helper_info == NULL) { 6368 udp_stack_t *us = connp->conn_udp->udp_us; 6369 6370 ASSERT(us->us_ldi_ident != NULL); 6371 6372 /* 6373 * Create a helper stream for non-STREAMS socket. 6374 */ 6375 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6376 if (error != 0) { 6377 ip0dbg(("tcp_ioctl: create of IP helper stream " 6378 "failed %d\n", error)); 6379 return (error); 6380 } 6381 } 6382 6383 switch (cmd) { 6384 case _SIOCSOCKFALLBACK: 6385 case TI_GETPEERNAME: 6386 case TI_GETMYNAME: 6387 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6388 cmd)); 6389 error = EINVAL; 6390 break; 6391 default: 6392 /* 6393 * Pass on to IP using helper stream 6394 */ 6395 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6396 cmd, arg, mode, cr, rvalp); 6397 break; 6398 } 6399 return (error); 6400 } 6401 6402 /* ARGSUSED */ 6403 int 6404 udp_accept(sock_lower_handle_t lproto_handle, 6405 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6406 cred_t *cr) 6407 { 6408 return (EOPNOTSUPP); 6409 } 6410 6411 /* ARGSUSED */ 6412 int 6413 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6414 { 6415 return (EOPNOTSUPP); 6416 } 6417 6418 sock_downcalls_t sock_udp_downcalls = { 6419 udp_activate, /* sd_activate */ 6420 udp_accept, /* sd_accept */ 6421 udp_bind, /* sd_bind */ 6422 udp_listen, /* sd_listen */ 6423 udp_connect, /* sd_connect */ 6424 udp_getpeername, /* sd_getpeername */ 6425 udp_getsockname, /* sd_getsockname */ 6426 udp_getsockopt, /* sd_getsockopt */ 6427 udp_setsockopt, /* sd_setsockopt */ 6428 udp_send, /* sd_send */ 6429 NULL, /* sd_send_uio */ 6430 NULL, /* sd_recv_uio */ 6431 NULL, /* sd_poll */ 6432 udp_shutdown, /* sd_shutdown */ 6433 udp_clr_flowctrl, /* sd_setflowctrl */ 6434 udp_ioctl, /* sd_ioctl */ 6435 udp_close /* sd_close */ 6436 }; 6437