1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright 2013 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/optcom.h> 72 #include <inet/snmpcom.h> 73 #include <inet/kstatcom.h> 74 #include <inet/ipclassifier.h> 75 #include <sys/squeue_impl.h> 76 #include <inet/ipnet.h> 77 #include <sys/ethernet.h> 78 79 #include <sys/tsol/label.h> 80 #include <sys/tsol/tnet.h> 81 #include <rpc/pmap_prot.h> 82 83 #include <inet/udp_impl.h> 84 85 /* 86 * Synchronization notes: 87 * 88 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 89 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 90 * protects the contents of the udp_t. uf_lock protects the address and the 91 * fanout information. 92 * The lock order is conn_lock -> uf_lock. 93 * 94 * The fanout lock uf_lock: 95 * When a UDP endpoint is bound to a local port, it is inserted into 96 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 97 * The size of the array is controlled by the udp_bind_fanout_size variable. 98 * This variable can be changed in /etc/system if the default value is 99 * not large enough. Each bind hash bucket is protected by a per bucket 100 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 101 * structure and a few other fields in the udp_t. A UDP endpoint is removed 102 * from the bind hash list only when it is being unbound or being closed. 103 * The per bucket lock also protects a UDP endpoint's state changes. 104 * 105 * Plumbing notes: 106 * UDP is always a device driver. For compatibility with mibopen() code 107 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 108 * dummy module. 109 * 110 * The above implies that we don't support any intermediate module to 111 * reside in between /dev/ip and udp -- in fact, we never supported such 112 * scenario in the past as the inter-layer communication semantics have 113 * always been private. 114 */ 115 116 /* For /etc/system control */ 117 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 118 119 static void udp_addr_req(queue_t *q, mblk_t *mp); 120 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 121 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 122 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 123 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 124 const in6_addr_t *, in_port_t, uint32_t); 125 static void udp_capability_req(queue_t *q, mblk_t *mp); 126 static int udp_tpi_close(queue_t *q, int flags); 127 static void udp_close_free(conn_t *); 128 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 129 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 130 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 131 int sys_error); 132 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 133 t_scalar_t tlierr, int sys_error); 134 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 135 cred_t *cr); 136 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 137 char *value, caddr_t cp, cred_t *cr); 138 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 139 char *value, caddr_t cp, cred_t *cr); 140 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 141 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 142 ip_recv_attr_t *ira); 143 static void udp_info_req(queue_t *q, mblk_t *mp); 144 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 145 static void udp_lrput(queue_t *, mblk_t *); 146 static void udp_lwput(queue_t *, mblk_t *); 147 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 148 cred_t *credp, boolean_t isv6); 149 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 150 cred_t *credp); 151 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 152 cred_t *credp); 153 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 154 int udp_opt_set(conn_t *connp, uint_t optset_context, 155 int level, int name, uint_t inlen, 156 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 157 void *thisdg_attrs, cred_t *cr); 158 int udp_opt_get(conn_t *connp, int level, int name, 159 uchar_t *ptr); 160 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 161 pid_t pid); 162 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 163 pid_t pid, ip_xmit_attr_t *ixa); 164 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 165 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 166 ip_xmit_attr_t *ixa); 167 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 168 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 169 int *); 170 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 171 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 172 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 173 static void udp_ud_err_connected(conn_t *, t_scalar_t); 174 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 175 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 176 boolean_t random); 177 static void udp_wput_other(queue_t *q, mblk_t *mp); 178 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 179 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 180 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 181 182 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 183 static void udp_stack_fini(netstackid_t stackid, void *arg); 184 185 /* Common routines for TPI and socket module */ 186 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 187 188 /* Common routine for TPI and socket module */ 189 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 190 static void udp_do_close(conn_t *); 191 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 192 boolean_t); 193 static int udp_do_unbind(conn_t *); 194 195 int udp_getsockname(sock_lower_handle_t, 196 struct sockaddr *, socklen_t *, cred_t *); 197 int udp_getpeername(sock_lower_handle_t, 198 struct sockaddr *, socklen_t *, cred_t *); 199 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 200 cred_t *, pid_t); 201 202 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 203 204 /* 205 * Checks if the given destination addr/port is allowed out. 206 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 207 * Called for each connect() and for sendto()/sendmsg() to a different 208 * destination. 209 * For connect(), called in udp_connect(). 210 * For sendto()/sendmsg(), called in udp_output_newdst(). 211 * 212 * This macro assumes that the cl_inet_connect2 hook is not NULL. 213 * Please check this before calling this macro. 214 * 215 * void 216 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 217 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 218 */ 219 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 220 (err) = 0; \ 221 /* \ 222 * Running in cluster mode - check and register active \ 223 * "connection" information \ 224 */ \ 225 if ((cp)->conn_ipversion == IPV4_VERSION) \ 226 (err) = (*cl_inet_connect2)( \ 227 (cp)->conn_netstack->netstack_stackid, \ 228 IPPROTO_UDP, is_outgoing, AF_INET, \ 229 (uint8_t *)&((cp)->conn_laddr_v4), \ 230 (cp)->conn_lport, \ 231 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 232 (in_port_t)(fport), NULL); \ 233 else \ 234 (err) = (*cl_inet_connect2)( \ 235 (cp)->conn_netstack->netstack_stackid, \ 236 IPPROTO_UDP, is_outgoing, AF_INET6, \ 237 (uint8_t *)&((cp)->conn_laddr_v6), \ 238 (cp)->conn_lport, \ 239 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 240 } 241 242 static struct module_info udp_mod_info = { 243 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 244 }; 245 246 /* 247 * Entry points for UDP as a device. 248 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 249 */ 250 static struct qinit udp_rinitv4 = { 251 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 252 }; 253 254 static struct qinit udp_rinitv6 = { 255 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 256 }; 257 258 static struct qinit udp_winit = { 259 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 260 }; 261 262 /* UDP entry point during fallback */ 263 struct qinit udp_fallback_sock_winit = { 264 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 265 }; 266 267 /* 268 * UDP needs to handle I_LINK and I_PLINK since ifconfig 269 * likes to use it as a place to hang the various streams. 270 */ 271 static struct qinit udp_lrinit = { 272 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 273 }; 274 275 static struct qinit udp_lwinit = { 276 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 277 }; 278 279 /* For AF_INET aka /dev/udp */ 280 struct streamtab udpinfov4 = { 281 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 282 }; 283 284 /* For AF_INET6 aka /dev/udp6 */ 285 struct streamtab udpinfov6 = { 286 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 287 }; 288 289 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 290 291 /* Default structure copied into T_INFO_ACK messages */ 292 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 293 T_INFO_ACK, 294 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 295 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 296 T_INVALID, /* CDATA_size. udp does not support connect data. */ 297 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 298 sizeof (sin_t), /* ADDR_size. */ 299 0, /* OPT_size - not initialized here */ 300 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 301 T_CLTS, /* SERV_type. udp supports connection-less. */ 302 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 303 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 304 }; 305 306 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 307 308 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 309 T_INFO_ACK, 310 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 311 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 312 T_INVALID, /* CDATA_size. udp does not support connect data. */ 313 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 314 sizeof (sin6_t), /* ADDR_size. */ 315 0, /* OPT_size - not initialized here */ 316 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 317 T_CLTS, /* SERV_type. udp supports connection-less. */ 318 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 319 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 320 }; 321 322 /* 323 * UDP tunables related declarations. Definitions are in udp_tunables.c 324 */ 325 extern mod_prop_info_t udp_propinfo_tbl[]; 326 extern int udp_propinfo_count; 327 328 /* Setable in /etc/system */ 329 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 330 uint32_t udp_random_anon_port = 1; 331 332 /* 333 * Hook functions to enable cluster networking. 334 * On non-clustered systems these vectors must always be NULL 335 */ 336 337 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 338 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 339 void *args) = NULL; 340 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 341 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 342 void *args) = NULL; 343 344 typedef union T_primitives *t_primp_t; 345 346 /* 347 * Return the next anonymous port in the privileged port range for 348 * bind checking. 349 * 350 * Trusted Extension (TX) notes: TX allows administrator to mark or 351 * reserve ports as Multilevel ports (MLP). MLP has special function 352 * on TX systems. Once a port is made MLP, it's not available as 353 * ordinary port. This creates "holes" in the port name space. It 354 * may be necessary to skip the "holes" find a suitable anon port. 355 */ 356 static in_port_t 357 udp_get_next_priv_port(udp_t *udp) 358 { 359 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 360 in_port_t nextport; 361 boolean_t restart = B_FALSE; 362 udp_stack_t *us = udp->udp_us; 363 364 retry: 365 if (next_priv_port < us->us_min_anonpriv_port || 366 next_priv_port >= IPPORT_RESERVED) { 367 next_priv_port = IPPORT_RESERVED - 1; 368 if (restart) 369 return (0); 370 restart = B_TRUE; 371 } 372 373 if (is_system_labeled() && 374 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 375 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 376 next_priv_port = nextport; 377 goto retry; 378 } 379 380 return (next_priv_port--); 381 } 382 383 /* 384 * Hash list removal routine for udp_t structures. 385 */ 386 static void 387 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 388 { 389 udp_t *udpnext; 390 kmutex_t *lockp; 391 udp_stack_t *us = udp->udp_us; 392 conn_t *connp = udp->udp_connp; 393 394 if (udp->udp_ptpbhn == NULL) 395 return; 396 397 /* 398 * Extract the lock pointer in case there are concurrent 399 * hash_remove's for this instance. 400 */ 401 ASSERT(connp->conn_lport != 0); 402 if (!caller_holds_lock) { 403 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 404 us->us_bind_fanout_size)].uf_lock; 405 ASSERT(lockp != NULL); 406 mutex_enter(lockp); 407 } 408 if (udp->udp_ptpbhn != NULL) { 409 udpnext = udp->udp_bind_hash; 410 if (udpnext != NULL) { 411 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 412 udp->udp_bind_hash = NULL; 413 } 414 *udp->udp_ptpbhn = udpnext; 415 udp->udp_ptpbhn = NULL; 416 } 417 if (!caller_holds_lock) { 418 mutex_exit(lockp); 419 } 420 } 421 422 static void 423 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 424 { 425 conn_t *connp = udp->udp_connp; 426 udp_t **udpp; 427 udp_t *udpnext; 428 conn_t *connext; 429 430 ASSERT(MUTEX_HELD(&uf->uf_lock)); 431 ASSERT(udp->udp_ptpbhn == NULL); 432 udpp = &uf->uf_udp; 433 udpnext = udpp[0]; 434 if (udpnext != NULL) { 435 /* 436 * If the new udp bound to the INADDR_ANY address 437 * and the first one in the list is not bound to 438 * INADDR_ANY we skip all entries until we find the 439 * first one bound to INADDR_ANY. 440 * This makes sure that applications binding to a 441 * specific address get preference over those binding to 442 * INADDR_ANY. 443 */ 444 connext = udpnext->udp_connp; 445 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 446 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 447 while ((udpnext = udpp[0]) != NULL && 448 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 449 udpp = &(udpnext->udp_bind_hash); 450 } 451 if (udpnext != NULL) 452 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 453 } else { 454 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 455 } 456 } 457 udp->udp_bind_hash = udpnext; 458 udp->udp_ptpbhn = udpp; 459 udpp[0] = udp; 460 } 461 462 /* 463 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 464 * passed to udp_wput. 465 * It associates a port number and local address with the stream. 466 * It calls IP to verify the local IP address, and calls IP to insert 467 * the conn_t in the fanout table. 468 * If everything is ok it then sends the T_BIND_ACK back up. 469 * 470 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 471 * without setting SO_REUSEADDR. This is needed so that they 472 * can be viewed as two independent transport protocols. 473 * However, anonymouns ports are allocated from the same range to avoid 474 * duplicating the us->us_next_port_to_try. 475 */ 476 static void 477 udp_tpi_bind(queue_t *q, mblk_t *mp) 478 { 479 sin_t *sin; 480 sin6_t *sin6; 481 mblk_t *mp1; 482 struct T_bind_req *tbr; 483 conn_t *connp; 484 udp_t *udp; 485 int error; 486 struct sockaddr *sa; 487 cred_t *cr; 488 489 /* 490 * All Solaris components should pass a db_credp 491 * for this TPI message, hence we ASSERT. 492 * But in case there is some other M_PROTO that looks 493 * like a TPI message sent by some other kernel 494 * component, we check and return an error. 495 */ 496 cr = msg_getcred(mp, NULL); 497 ASSERT(cr != NULL); 498 if (cr == NULL) { 499 udp_err_ack(q, mp, TSYSERR, EINVAL); 500 return; 501 } 502 503 connp = Q_TO_CONN(q); 504 udp = connp->conn_udp; 505 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 506 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 507 "udp_bind: bad req, len %u", 508 (uint_t)(mp->b_wptr - mp->b_rptr)); 509 udp_err_ack(q, mp, TPROTO, 0); 510 return; 511 } 512 if (udp->udp_state != TS_UNBND) { 513 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 514 "udp_bind: bad state, %u", udp->udp_state); 515 udp_err_ack(q, mp, TOUTSTATE, 0); 516 return; 517 } 518 /* 519 * Reallocate the message to make sure we have enough room for an 520 * address. 521 */ 522 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 523 if (mp1 == NULL) { 524 udp_err_ack(q, mp, TSYSERR, ENOMEM); 525 return; 526 } 527 528 mp = mp1; 529 530 /* Reset the message type in preparation for shipping it back. */ 531 DB_TYPE(mp) = M_PCPROTO; 532 533 tbr = (struct T_bind_req *)mp->b_rptr; 534 switch (tbr->ADDR_length) { 535 case 0: /* Request for a generic port */ 536 tbr->ADDR_offset = sizeof (struct T_bind_req); 537 if (connp->conn_family == AF_INET) { 538 tbr->ADDR_length = sizeof (sin_t); 539 sin = (sin_t *)&tbr[1]; 540 *sin = sin_null; 541 sin->sin_family = AF_INET; 542 mp->b_wptr = (uchar_t *)&sin[1]; 543 sa = (struct sockaddr *)sin; 544 } else { 545 ASSERT(connp->conn_family == AF_INET6); 546 tbr->ADDR_length = sizeof (sin6_t); 547 sin6 = (sin6_t *)&tbr[1]; 548 *sin6 = sin6_null; 549 sin6->sin6_family = AF_INET6; 550 mp->b_wptr = (uchar_t *)&sin6[1]; 551 sa = (struct sockaddr *)sin6; 552 } 553 break; 554 555 case sizeof (sin_t): /* Complete IPv4 address */ 556 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 557 sizeof (sin_t)); 558 if (sa == NULL || !OK_32PTR((char *)sa)) { 559 udp_err_ack(q, mp, TSYSERR, EINVAL); 560 return; 561 } 562 if (connp->conn_family != AF_INET || 563 sa->sa_family != AF_INET) { 564 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 565 return; 566 } 567 break; 568 569 case sizeof (sin6_t): /* complete IPv6 address */ 570 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 571 sizeof (sin6_t)); 572 if (sa == NULL || !OK_32PTR((char *)sa)) { 573 udp_err_ack(q, mp, TSYSERR, EINVAL); 574 return; 575 } 576 if (connp->conn_family != AF_INET6 || 577 sa->sa_family != AF_INET6) { 578 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 579 return; 580 } 581 break; 582 583 default: /* Invalid request */ 584 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 585 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 586 udp_err_ack(q, mp, TBADADDR, 0); 587 return; 588 } 589 590 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 591 tbr->PRIM_type != O_T_BIND_REQ); 592 593 if (error != 0) { 594 if (error > 0) { 595 udp_err_ack(q, mp, TSYSERR, error); 596 } else { 597 udp_err_ack(q, mp, -error, 0); 598 } 599 } else { 600 tbr->PRIM_type = T_BIND_ACK; 601 qreply(q, mp); 602 } 603 } 604 605 /* 606 * This routine handles each T_CONN_REQ message passed to udp. It 607 * associates a default destination address with the stream. 608 * 609 * After various error checks are completed, udp_connect() lays 610 * the target address and port into the composite header template. 611 * Then we ask IP for information, including a source address if we didn't 612 * already have one. Finally we send up the T_OK_ACK reply message. 613 */ 614 static void 615 udp_tpi_connect(queue_t *q, mblk_t *mp) 616 { 617 conn_t *connp = Q_TO_CONN(q); 618 int error; 619 socklen_t len; 620 struct sockaddr *sa; 621 struct T_conn_req *tcr; 622 cred_t *cr; 623 pid_t pid; 624 /* 625 * All Solaris components should pass a db_credp 626 * for this TPI message, hence we ASSERT. 627 * But in case there is some other M_PROTO that looks 628 * like a TPI message sent by some other kernel 629 * component, we check and return an error. 630 */ 631 cr = msg_getcred(mp, &pid); 632 ASSERT(cr != NULL); 633 if (cr == NULL) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 638 tcr = (struct T_conn_req *)mp->b_rptr; 639 640 /* A bit of sanity checking */ 641 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 642 udp_err_ack(q, mp, TPROTO, 0); 643 return; 644 } 645 646 if (tcr->OPT_length != 0) { 647 udp_err_ack(q, mp, TBADOPT, 0); 648 return; 649 } 650 651 /* 652 * Determine packet type based on type of address passed in 653 * the request should contain an IPv4 or IPv6 address. 654 * Make sure that address family matches the type of 655 * family of the address passed down. 656 */ 657 len = tcr->DEST_length; 658 switch (tcr->DEST_length) { 659 default: 660 udp_err_ack(q, mp, TBADADDR, 0); 661 return; 662 663 case sizeof (sin_t): 664 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 665 sizeof (sin_t)); 666 break; 667 668 case sizeof (sin6_t): 669 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 670 sizeof (sin6_t)); 671 break; 672 } 673 674 error = proto_verify_ip_addr(connp->conn_family, sa, len); 675 if (error != 0) { 676 udp_err_ack(q, mp, TSYSERR, error); 677 return; 678 } 679 680 error = udp_do_connect(connp, sa, len, cr, pid); 681 if (error != 0) { 682 if (error < 0) 683 udp_err_ack(q, mp, -error, 0); 684 else 685 udp_err_ack(q, mp, TSYSERR, error); 686 } else { 687 mblk_t *mp1; 688 /* 689 * We have to send a connection confirmation to 690 * keep TLI happy. 691 */ 692 if (connp->conn_family == AF_INET) { 693 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 694 sizeof (sin_t), NULL, 0); 695 } else { 696 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 697 sizeof (sin6_t), NULL, 0); 698 } 699 if (mp1 == NULL) { 700 udp_err_ack(q, mp, TSYSERR, ENOMEM); 701 return; 702 } 703 704 /* 705 * Send ok_ack for T_CONN_REQ 706 */ 707 mp = mi_tpi_ok_ack_alloc(mp); 708 if (mp == NULL) { 709 /* Unable to reuse the T_CONN_REQ for the ack. */ 710 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 711 return; 712 } 713 714 putnext(connp->conn_rq, mp); 715 putnext(connp->conn_rq, mp1); 716 } 717 } 718 719 static int 720 udp_tpi_close(queue_t *q, int flags) 721 { 722 conn_t *connp; 723 724 if (flags & SO_FALLBACK) { 725 /* 726 * stream is being closed while in fallback 727 * simply free the resources that were allocated 728 */ 729 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 730 qprocsoff(q); 731 goto done; 732 } 733 734 connp = Q_TO_CONN(q); 735 udp_do_close(connp); 736 done: 737 q->q_ptr = WR(q)->q_ptr = NULL; 738 return (0); 739 } 740 741 static void 742 udp_close_free(conn_t *connp) 743 { 744 udp_t *udp = connp->conn_udp; 745 746 /* If there are any options associated with the stream, free them. */ 747 if (udp->udp_recv_ipp.ipp_fields != 0) 748 ip_pkt_free(&udp->udp_recv_ipp); 749 750 /* 751 * Clear any fields which the kmem_cache constructor clears. 752 * Only udp_connp needs to be preserved. 753 * TBD: We should make this more efficient to avoid clearing 754 * everything. 755 */ 756 ASSERT(udp->udp_connp == connp); 757 bzero(udp, sizeof (udp_t)); 758 udp->udp_connp = connp; 759 } 760 761 static int 762 udp_do_disconnect(conn_t *connp) 763 { 764 udp_t *udp; 765 udp_fanout_t *udpf; 766 udp_stack_t *us; 767 int error; 768 769 udp = connp->conn_udp; 770 us = udp->udp_us; 771 mutex_enter(&connp->conn_lock); 772 if (udp->udp_state != TS_DATA_XFER) { 773 mutex_exit(&connp->conn_lock); 774 return (-TOUTSTATE); 775 } 776 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 777 us->us_bind_fanout_size)]; 778 mutex_enter(&udpf->uf_lock); 779 if (connp->conn_mcbc_bind) 780 connp->conn_saddr_v6 = ipv6_all_zeros; 781 else 782 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 783 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 784 connp->conn_faddr_v6 = ipv6_all_zeros; 785 connp->conn_fport = 0; 786 udp->udp_state = TS_IDLE; 787 mutex_exit(&udpf->uf_lock); 788 789 /* Remove any remnants of mapped address binding */ 790 if (connp->conn_family == AF_INET6) 791 connp->conn_ipversion = IPV6_VERSION; 792 793 connp->conn_v6lastdst = ipv6_all_zeros; 794 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 795 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 796 mutex_exit(&connp->conn_lock); 797 if (error != 0) 798 return (error); 799 800 /* 801 * Tell IP to remove the full binding and revert 802 * to the local address binding. 803 */ 804 return (ip_laddr_fanout_insert(connp)); 805 } 806 807 static void 808 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 809 { 810 conn_t *connp = Q_TO_CONN(q); 811 int error; 812 813 /* 814 * Allocate the largest primitive we need to send back 815 * T_error_ack is > than T_ok_ack 816 */ 817 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 818 if (mp == NULL) { 819 /* Unable to reuse the T_DISCON_REQ for the ack. */ 820 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 821 return; 822 } 823 824 error = udp_do_disconnect(connp); 825 826 if (error != 0) { 827 if (error < 0) { 828 udp_err_ack(q, mp, -error, 0); 829 } else { 830 udp_err_ack(q, mp, TSYSERR, error); 831 } 832 } else { 833 mp = mi_tpi_ok_ack_alloc(mp); 834 ASSERT(mp != NULL); 835 qreply(q, mp); 836 } 837 } 838 839 int 840 udp_disconnect(conn_t *connp) 841 { 842 int error; 843 844 connp->conn_dgram_errind = B_FALSE; 845 error = udp_do_disconnect(connp); 846 if (error < 0) 847 error = proto_tlitosyserr(-error); 848 849 return (error); 850 } 851 852 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 853 static void 854 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 855 { 856 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 857 qreply(q, mp); 858 } 859 860 /* Shorthand to generate and send TPI error acks to our client */ 861 static void 862 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 863 t_scalar_t t_error, int sys_error) 864 { 865 struct T_error_ack *teackp; 866 867 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 868 M_PCPROTO, T_ERROR_ACK)) != NULL) { 869 teackp = (struct T_error_ack *)mp->b_rptr; 870 teackp->ERROR_prim = primitive; 871 teackp->TLI_error = t_error; 872 teackp->UNIX_error = sys_error; 873 qreply(q, mp); 874 } 875 } 876 877 /* At minimum we need 4 bytes of UDP header */ 878 #define ICMP_MIN_UDP_HDR 4 879 880 /* 881 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 882 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 883 * Assumes that IP has pulled up everything up to and including the ICMP header. 884 */ 885 /* ARGSUSED2 */ 886 static void 887 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 888 { 889 conn_t *connp = (conn_t *)arg1; 890 icmph_t *icmph; 891 ipha_t *ipha; 892 int iph_hdr_length; 893 udpha_t *udpha; 894 sin_t sin; 895 sin6_t sin6; 896 mblk_t *mp1; 897 int error = 0; 898 udp_t *udp = connp->conn_udp; 899 900 ipha = (ipha_t *)mp->b_rptr; 901 902 ASSERT(OK_32PTR(mp->b_rptr)); 903 904 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 905 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 906 udp_icmp_error_ipv6(connp, mp, ira); 907 return; 908 } 909 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 910 911 /* Skip past the outer IP and ICMP headers */ 912 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 913 iph_hdr_length = ira->ira_ip_hdr_length; 914 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 915 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 916 917 /* Skip past the inner IP and find the ULP header */ 918 iph_hdr_length = IPH_HDR_LENGTH(ipha); 919 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 920 921 switch (icmph->icmph_type) { 922 case ICMP_DEST_UNREACHABLE: 923 switch (icmph->icmph_code) { 924 case ICMP_FRAGMENTATION_NEEDED: { 925 ipha_t *ipha; 926 ip_xmit_attr_t *ixa; 927 /* 928 * IP has already adjusted the path MTU. 929 * But we need to adjust DF for IPv4. 930 */ 931 if (connp->conn_ipversion != IPV4_VERSION) 932 break; 933 934 ixa = conn_get_ixa(connp, B_FALSE); 935 if (ixa == NULL || ixa->ixa_ire == NULL) { 936 /* 937 * Some other thread holds conn_ixa. We will 938 * redo this on the next ICMP too big. 939 */ 940 if (ixa != NULL) 941 ixa_refrele(ixa); 942 break; 943 } 944 (void) ip_get_pmtu(ixa); 945 946 mutex_enter(&connp->conn_lock); 947 ipha = (ipha_t *)connp->conn_ht_iphc; 948 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 949 ipha->ipha_fragment_offset_and_flags |= 950 IPH_DF_HTONS; 951 } else { 952 ipha->ipha_fragment_offset_and_flags &= 953 ~IPH_DF_HTONS; 954 } 955 mutex_exit(&connp->conn_lock); 956 ixa_refrele(ixa); 957 break; 958 } 959 case ICMP_PORT_UNREACHABLE: 960 case ICMP_PROTOCOL_UNREACHABLE: 961 error = ECONNREFUSED; 962 break; 963 default: 964 /* Transient errors */ 965 break; 966 } 967 break; 968 default: 969 /* Transient errors */ 970 break; 971 } 972 if (error == 0) { 973 freemsg(mp); 974 return; 975 } 976 977 /* 978 * Deliver T_UDERROR_IND when the application has asked for it. 979 * The socket layer enables this automatically when connected. 980 */ 981 if (!connp->conn_dgram_errind) { 982 freemsg(mp); 983 return; 984 } 985 986 switch (connp->conn_family) { 987 case AF_INET: 988 sin = sin_null; 989 sin.sin_family = AF_INET; 990 sin.sin_addr.s_addr = ipha->ipha_dst; 991 sin.sin_port = udpha->uha_dst_port; 992 if (IPCL_IS_NONSTR(connp)) { 993 mutex_enter(&connp->conn_lock); 994 if (udp->udp_state == TS_DATA_XFER) { 995 if (sin.sin_port == connp->conn_fport && 996 sin.sin_addr.s_addr == 997 connp->conn_faddr_v4) { 998 mutex_exit(&connp->conn_lock); 999 (*connp->conn_upcalls->su_set_error) 1000 (connp->conn_upper_handle, error); 1001 goto done; 1002 } 1003 } else { 1004 udp->udp_delayed_error = error; 1005 *((sin_t *)&udp->udp_delayed_addr) = sin; 1006 } 1007 mutex_exit(&connp->conn_lock); 1008 } else { 1009 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1010 NULL, 0, error); 1011 if (mp1 != NULL) 1012 putnext(connp->conn_rq, mp1); 1013 } 1014 break; 1015 case AF_INET6: 1016 sin6 = sin6_null; 1017 sin6.sin6_family = AF_INET6; 1018 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1019 sin6.sin6_port = udpha->uha_dst_port; 1020 if (IPCL_IS_NONSTR(connp)) { 1021 mutex_enter(&connp->conn_lock); 1022 if (udp->udp_state == TS_DATA_XFER) { 1023 if (sin6.sin6_port == connp->conn_fport && 1024 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1025 &connp->conn_faddr_v6)) { 1026 mutex_exit(&connp->conn_lock); 1027 (*connp->conn_upcalls->su_set_error) 1028 (connp->conn_upper_handle, error); 1029 goto done; 1030 } 1031 } else { 1032 udp->udp_delayed_error = error; 1033 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1034 } 1035 mutex_exit(&connp->conn_lock); 1036 } else { 1037 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1038 NULL, 0, error); 1039 if (mp1 != NULL) 1040 putnext(connp->conn_rq, mp1); 1041 } 1042 break; 1043 } 1044 done: 1045 freemsg(mp); 1046 } 1047 1048 /* 1049 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1050 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1051 * Assumes that IP has pulled up all the extension headers as well as the 1052 * ICMPv6 header. 1053 */ 1054 static void 1055 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1056 { 1057 icmp6_t *icmp6; 1058 ip6_t *ip6h, *outer_ip6h; 1059 uint16_t iph_hdr_length; 1060 uint8_t *nexthdrp; 1061 udpha_t *udpha; 1062 sin6_t sin6; 1063 mblk_t *mp1; 1064 int error = 0; 1065 udp_t *udp = connp->conn_udp; 1066 udp_stack_t *us = udp->udp_us; 1067 1068 outer_ip6h = (ip6_t *)mp->b_rptr; 1069 #ifdef DEBUG 1070 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1071 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1072 else 1073 iph_hdr_length = IPV6_HDR_LEN; 1074 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1075 #endif 1076 /* Skip past the outer IP and ICMP headers */ 1077 iph_hdr_length = ira->ira_ip_hdr_length; 1078 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1079 1080 /* Skip past the inner IP and find the ULP header */ 1081 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1082 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1083 freemsg(mp); 1084 return; 1085 } 1086 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1087 1088 switch (icmp6->icmp6_type) { 1089 case ICMP6_DST_UNREACH: 1090 switch (icmp6->icmp6_code) { 1091 case ICMP6_DST_UNREACH_NOPORT: 1092 error = ECONNREFUSED; 1093 break; 1094 case ICMP6_DST_UNREACH_ADMIN: 1095 case ICMP6_DST_UNREACH_NOROUTE: 1096 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1097 case ICMP6_DST_UNREACH_ADDR: 1098 /* Transient errors */ 1099 break; 1100 default: 1101 break; 1102 } 1103 break; 1104 case ICMP6_PACKET_TOO_BIG: { 1105 struct T_unitdata_ind *tudi; 1106 struct T_opthdr *toh; 1107 size_t udi_size; 1108 mblk_t *newmp; 1109 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1110 sizeof (struct ip6_mtuinfo); 1111 sin6_t *sin6; 1112 struct ip6_mtuinfo *mtuinfo; 1113 1114 /* 1115 * If the application has requested to receive path mtu 1116 * information, send up an empty message containing an 1117 * IPV6_PATHMTU ancillary data item. 1118 */ 1119 if (!connp->conn_ipv6_recvpathmtu) 1120 break; 1121 1122 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1123 opt_length; 1124 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1125 UDPS_BUMP_MIB(us, udpInErrors); 1126 break; 1127 } 1128 1129 /* 1130 * newmp->b_cont is left to NULL on purpose. This is an 1131 * empty message containing only ancillary data. 1132 */ 1133 newmp->b_datap->db_type = M_PROTO; 1134 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1135 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1136 tudi->PRIM_type = T_UNITDATA_IND; 1137 tudi->SRC_length = sizeof (sin6_t); 1138 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1139 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1140 tudi->OPT_length = opt_length; 1141 1142 sin6 = (sin6_t *)&tudi[1]; 1143 bzero(sin6, sizeof (sin6_t)); 1144 sin6->sin6_family = AF_INET6; 1145 sin6->sin6_addr = connp->conn_faddr_v6; 1146 1147 toh = (struct T_opthdr *)&sin6[1]; 1148 toh->level = IPPROTO_IPV6; 1149 toh->name = IPV6_PATHMTU; 1150 toh->len = opt_length; 1151 toh->status = 0; 1152 1153 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1154 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1155 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1156 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1157 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1158 /* 1159 * We've consumed everything we need from the original 1160 * message. Free it, then send our empty message. 1161 */ 1162 freemsg(mp); 1163 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1164 return; 1165 } 1166 case ICMP6_TIME_EXCEEDED: 1167 /* Transient errors */ 1168 break; 1169 case ICMP6_PARAM_PROB: 1170 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1171 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1172 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1173 (uchar_t *)nexthdrp) { 1174 error = ECONNREFUSED; 1175 break; 1176 } 1177 break; 1178 } 1179 if (error == 0) { 1180 freemsg(mp); 1181 return; 1182 } 1183 1184 /* 1185 * Deliver T_UDERROR_IND when the application has asked for it. 1186 * The socket layer enables this automatically when connected. 1187 */ 1188 if (!connp->conn_dgram_errind) { 1189 freemsg(mp); 1190 return; 1191 } 1192 1193 sin6 = sin6_null; 1194 sin6.sin6_family = AF_INET6; 1195 sin6.sin6_addr = ip6h->ip6_dst; 1196 sin6.sin6_port = udpha->uha_dst_port; 1197 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1198 1199 if (IPCL_IS_NONSTR(connp)) { 1200 mutex_enter(&connp->conn_lock); 1201 if (udp->udp_state == TS_DATA_XFER) { 1202 if (sin6.sin6_port == connp->conn_fport && 1203 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1204 &connp->conn_faddr_v6)) { 1205 mutex_exit(&connp->conn_lock); 1206 (*connp->conn_upcalls->su_set_error) 1207 (connp->conn_upper_handle, error); 1208 goto done; 1209 } 1210 } else { 1211 udp->udp_delayed_error = error; 1212 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1213 } 1214 mutex_exit(&connp->conn_lock); 1215 } else { 1216 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1217 NULL, 0, error); 1218 if (mp1 != NULL) 1219 putnext(connp->conn_rq, mp1); 1220 } 1221 done: 1222 freemsg(mp); 1223 } 1224 1225 /* 1226 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1227 * The local address is filled in if endpoint is bound. The remote address 1228 * is filled in if remote address has been precified ("connected endpoint") 1229 * (The concept of connected CLTS sockets is alien to published TPI 1230 * but we support it anyway). 1231 */ 1232 static void 1233 udp_addr_req(queue_t *q, mblk_t *mp) 1234 { 1235 struct sockaddr *sa; 1236 mblk_t *ackmp; 1237 struct T_addr_ack *taa; 1238 udp_t *udp = Q_TO_UDP(q); 1239 conn_t *connp = udp->udp_connp; 1240 uint_t addrlen; 1241 1242 /* Make it large enough for worst case */ 1243 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1244 2 * sizeof (sin6_t), 1); 1245 if (ackmp == NULL) { 1246 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1247 return; 1248 } 1249 taa = (struct T_addr_ack *)ackmp->b_rptr; 1250 1251 bzero(taa, sizeof (struct T_addr_ack)); 1252 ackmp->b_wptr = (uchar_t *)&taa[1]; 1253 1254 taa->PRIM_type = T_ADDR_ACK; 1255 ackmp->b_datap->db_type = M_PCPROTO; 1256 1257 if (connp->conn_family == AF_INET) 1258 addrlen = sizeof (sin_t); 1259 else 1260 addrlen = sizeof (sin6_t); 1261 1262 mutex_enter(&connp->conn_lock); 1263 /* 1264 * Note: Following code assumes 32 bit alignment of basic 1265 * data structures like sin_t and struct T_addr_ack. 1266 */ 1267 if (udp->udp_state != TS_UNBND) { 1268 /* 1269 * Fill in local address first 1270 */ 1271 taa->LOCADDR_offset = sizeof (*taa); 1272 taa->LOCADDR_length = addrlen; 1273 sa = (struct sockaddr *)&taa[1]; 1274 (void) conn_getsockname(connp, sa, &addrlen); 1275 ackmp->b_wptr += addrlen; 1276 } 1277 if (udp->udp_state == TS_DATA_XFER) { 1278 /* 1279 * connected, fill remote address too 1280 */ 1281 taa->REMADDR_length = addrlen; 1282 /* assumed 32-bit alignment */ 1283 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1284 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1285 (void) conn_getpeername(connp, sa, &addrlen); 1286 ackmp->b_wptr += addrlen; 1287 } 1288 mutex_exit(&connp->conn_lock); 1289 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1290 qreply(q, ackmp); 1291 } 1292 1293 static void 1294 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1295 { 1296 conn_t *connp = udp->udp_connp; 1297 1298 if (connp->conn_family == AF_INET) { 1299 *tap = udp_g_t_info_ack_ipv4; 1300 } else { 1301 *tap = udp_g_t_info_ack_ipv6; 1302 } 1303 tap->CURRENT_state = udp->udp_state; 1304 tap->OPT_size = udp_max_optsize; 1305 } 1306 1307 static void 1308 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1309 t_uscalar_t cap_bits1) 1310 { 1311 tcap->CAP_bits1 = 0; 1312 1313 if (cap_bits1 & TC1_INFO) { 1314 udp_copy_info(&tcap->INFO_ack, udp); 1315 tcap->CAP_bits1 |= TC1_INFO; 1316 } 1317 } 1318 1319 /* 1320 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1321 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1322 * udp_g_t_info_ack. The current state of the stream is copied from 1323 * udp_state. 1324 */ 1325 static void 1326 udp_capability_req(queue_t *q, mblk_t *mp) 1327 { 1328 t_uscalar_t cap_bits1; 1329 struct T_capability_ack *tcap; 1330 udp_t *udp = Q_TO_UDP(q); 1331 1332 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1333 1334 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1335 mp->b_datap->db_type, T_CAPABILITY_ACK); 1336 if (!mp) 1337 return; 1338 1339 tcap = (struct T_capability_ack *)mp->b_rptr; 1340 udp_do_capability_ack(udp, tcap, cap_bits1); 1341 1342 qreply(q, mp); 1343 } 1344 1345 /* 1346 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1347 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1348 * The current state of the stream is copied from udp_state. 1349 */ 1350 static void 1351 udp_info_req(queue_t *q, mblk_t *mp) 1352 { 1353 udp_t *udp = Q_TO_UDP(q); 1354 1355 /* Create a T_INFO_ACK message. */ 1356 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1357 T_INFO_ACK); 1358 if (!mp) 1359 return; 1360 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1361 qreply(q, mp); 1362 } 1363 1364 /* For /dev/udp aka AF_INET open */ 1365 static int 1366 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1367 { 1368 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1369 } 1370 1371 /* For /dev/udp6 aka AF_INET6 open */ 1372 static int 1373 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1374 { 1375 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1376 } 1377 1378 /* 1379 * This is the open routine for udp. It allocates a udp_t structure for 1380 * the stream and, on the first open of the module, creates an ND table. 1381 */ 1382 static int 1383 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1384 boolean_t isv6) 1385 { 1386 udp_t *udp; 1387 conn_t *connp; 1388 dev_t conn_dev; 1389 vmem_t *minor_arena; 1390 int err; 1391 1392 /* If the stream is already open, return immediately. */ 1393 if (q->q_ptr != NULL) 1394 return (0); 1395 1396 if (sflag == MODOPEN) 1397 return (EINVAL); 1398 1399 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1400 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1401 minor_arena = ip_minor_arena_la; 1402 } else { 1403 /* 1404 * Either minor numbers in the large arena were exhausted 1405 * or a non socket application is doing the open. 1406 * Try to allocate from the small arena. 1407 */ 1408 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1409 return (EBUSY); 1410 1411 minor_arena = ip_minor_arena_sa; 1412 } 1413 1414 if (flag & SO_FALLBACK) { 1415 /* 1416 * Non streams socket needs a stream to fallback to 1417 */ 1418 RD(q)->q_ptr = (void *)conn_dev; 1419 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1420 WR(q)->q_ptr = (void *)minor_arena; 1421 qprocson(q); 1422 return (0); 1423 } 1424 1425 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1426 if (connp == NULL) { 1427 inet_minor_free(minor_arena, conn_dev); 1428 return (err); 1429 } 1430 udp = connp->conn_udp; 1431 1432 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1433 connp->conn_dev = conn_dev; 1434 connp->conn_minor_arena = minor_arena; 1435 1436 /* 1437 * Initialize the udp_t structure for this stream. 1438 */ 1439 q->q_ptr = connp; 1440 WR(q)->q_ptr = connp; 1441 connp->conn_rq = q; 1442 connp->conn_wq = WR(q); 1443 1444 /* 1445 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1446 * need to lock anything. 1447 */ 1448 ASSERT(connp->conn_proto == IPPROTO_UDP); 1449 ASSERT(connp->conn_udp == udp); 1450 ASSERT(udp->udp_connp == connp); 1451 1452 if (flag & SO_SOCKSTR) { 1453 udp->udp_issocket = B_TRUE; 1454 } 1455 1456 WR(q)->q_hiwat = connp->conn_sndbuf; 1457 WR(q)->q_lowat = connp->conn_sndlowat; 1458 1459 qprocson(q); 1460 1461 /* Set the Stream head write offset and high watermark. */ 1462 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1463 (void) proto_set_rx_hiwat(q, connp, 1464 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1465 1466 mutex_enter(&connp->conn_lock); 1467 connp->conn_state_flags &= ~CONN_INCIPIENT; 1468 mutex_exit(&connp->conn_lock); 1469 return (0); 1470 } 1471 1472 /* 1473 * Which UDP options OK to set through T_UNITDATA_REQ... 1474 */ 1475 /* ARGSUSED */ 1476 static boolean_t 1477 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1478 { 1479 return (B_TRUE); 1480 } 1481 1482 /* 1483 * This routine gets default values of certain options whose default 1484 * values are maintained by protcol specific code 1485 */ 1486 int 1487 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1488 { 1489 udp_t *udp = Q_TO_UDP(q); 1490 udp_stack_t *us = udp->udp_us; 1491 int *i1 = (int *)ptr; 1492 1493 switch (level) { 1494 case IPPROTO_IP: 1495 switch (name) { 1496 case IP_MULTICAST_TTL: 1497 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1498 return (sizeof (uchar_t)); 1499 case IP_MULTICAST_LOOP: 1500 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1501 return (sizeof (uchar_t)); 1502 } 1503 break; 1504 case IPPROTO_IPV6: 1505 switch (name) { 1506 case IPV6_MULTICAST_HOPS: 1507 *i1 = IP_DEFAULT_MULTICAST_TTL; 1508 return (sizeof (int)); 1509 case IPV6_MULTICAST_LOOP: 1510 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1511 return (sizeof (int)); 1512 case IPV6_UNICAST_HOPS: 1513 *i1 = us->us_ipv6_hoplimit; 1514 return (sizeof (int)); 1515 } 1516 break; 1517 } 1518 return (-1); 1519 } 1520 1521 /* 1522 * This routine retrieves the current status of socket options. 1523 * It returns the size of the option retrieved, or -1. 1524 */ 1525 int 1526 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1527 uchar_t *ptr) 1528 { 1529 int *i1 = (int *)ptr; 1530 udp_t *udp = connp->conn_udp; 1531 int len; 1532 conn_opt_arg_t coas; 1533 int retval; 1534 1535 coas.coa_connp = connp; 1536 coas.coa_ixa = connp->conn_ixa; 1537 coas.coa_ipp = &connp->conn_xmit_ipp; 1538 coas.coa_ancillary = B_FALSE; 1539 coas.coa_changed = 0; 1540 1541 /* 1542 * We assume that the optcom framework has checked for the set 1543 * of levels and names that are supported, hence we don't worry 1544 * about rejecting based on that. 1545 * First check for UDP specific handling, then pass to common routine. 1546 */ 1547 switch (level) { 1548 case IPPROTO_IP: 1549 /* 1550 * Only allow IPv4 option processing on IPv4 sockets. 1551 */ 1552 if (connp->conn_family != AF_INET) 1553 return (-1); 1554 1555 switch (name) { 1556 case IP_OPTIONS: 1557 case T_IP_OPTIONS: 1558 mutex_enter(&connp->conn_lock); 1559 if (!(udp->udp_recv_ipp.ipp_fields & 1560 IPPF_IPV4_OPTIONS)) { 1561 mutex_exit(&connp->conn_lock); 1562 return (0); 1563 } 1564 1565 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1566 ASSERT(len != 0); 1567 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1568 mutex_exit(&connp->conn_lock); 1569 return (len); 1570 } 1571 break; 1572 case IPPROTO_UDP: 1573 switch (name) { 1574 case UDP_NAT_T_ENDPOINT: 1575 mutex_enter(&connp->conn_lock); 1576 *i1 = udp->udp_nat_t_endpoint; 1577 mutex_exit(&connp->conn_lock); 1578 return (sizeof (int)); 1579 case UDP_RCVHDR: 1580 mutex_enter(&connp->conn_lock); 1581 *i1 = udp->udp_rcvhdr ? 1 : 0; 1582 mutex_exit(&connp->conn_lock); 1583 return (sizeof (int)); 1584 } 1585 } 1586 mutex_enter(&connp->conn_lock); 1587 retval = conn_opt_get(&coas, level, name, ptr); 1588 mutex_exit(&connp->conn_lock); 1589 return (retval); 1590 } 1591 1592 /* 1593 * This routine retrieves the current status of socket options. 1594 * It returns the size of the option retrieved, or -1. 1595 */ 1596 int 1597 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1598 { 1599 conn_t *connp = Q_TO_CONN(q); 1600 int err; 1601 1602 err = udp_opt_get(connp, level, name, ptr); 1603 return (err); 1604 } 1605 1606 /* 1607 * This routine sets socket options. 1608 */ 1609 int 1610 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1611 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1612 { 1613 conn_t *connp = coa->coa_connp; 1614 ip_xmit_attr_t *ixa = coa->coa_ixa; 1615 udp_t *udp = connp->conn_udp; 1616 udp_stack_t *us = udp->udp_us; 1617 int *i1 = (int *)invalp; 1618 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1619 int error; 1620 1621 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1622 /* 1623 * First do UDP specific sanity checks and handle UDP specific 1624 * options. Note that some IPPROTO_UDP options are handled 1625 * by conn_opt_set. 1626 */ 1627 switch (level) { 1628 case SOL_SOCKET: 1629 switch (name) { 1630 case SO_SNDBUF: 1631 if (*i1 > us->us_max_buf) { 1632 return (ENOBUFS); 1633 } 1634 break; 1635 case SO_RCVBUF: 1636 if (*i1 > us->us_max_buf) { 1637 return (ENOBUFS); 1638 } 1639 break; 1640 1641 case SCM_UCRED: { 1642 struct ucred_s *ucr; 1643 cred_t *newcr; 1644 ts_label_t *tsl; 1645 1646 /* 1647 * Only sockets that have proper privileges and are 1648 * bound to MLPs will have any other value here, so 1649 * this implicitly tests for privilege to set label. 1650 */ 1651 if (connp->conn_mlp_type == mlptSingle) 1652 break; 1653 1654 ucr = (struct ucred_s *)invalp; 1655 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1656 ucr->uc_labeloff < sizeof (*ucr) || 1657 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1658 return (EINVAL); 1659 if (!checkonly) { 1660 /* 1661 * Set ixa_tsl to the new label. 1662 * We assume that crgetzoneid doesn't change 1663 * as part of the SCM_UCRED. 1664 */ 1665 ASSERT(cr != NULL); 1666 if ((tsl = crgetlabel(cr)) == NULL) 1667 return (EINVAL); 1668 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1669 tsl->tsl_doi, KM_NOSLEEP); 1670 if (newcr == NULL) 1671 return (ENOSR); 1672 ASSERT(newcr->cr_label != NULL); 1673 /* 1674 * Move the hold on the cr_label to ixa_tsl by 1675 * setting cr_label to NULL. Then release newcr. 1676 */ 1677 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1678 ixa->ixa_flags |= IXAF_UCRED_TSL; 1679 newcr->cr_label = NULL; 1680 crfree(newcr); 1681 coa->coa_changed |= COA_HEADER_CHANGED; 1682 coa->coa_changed |= COA_WROFF_CHANGED; 1683 } 1684 /* Fully handled this option. */ 1685 return (0); 1686 } 1687 } 1688 break; 1689 case IPPROTO_UDP: 1690 switch (name) { 1691 case UDP_NAT_T_ENDPOINT: 1692 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1693 return (error); 1694 } 1695 1696 /* 1697 * Use conn_family instead so we can avoid ambiguitites 1698 * with AF_INET6 sockets that may switch from IPv4 1699 * to IPv6. 1700 */ 1701 if (connp->conn_family != AF_INET) { 1702 return (EAFNOSUPPORT); 1703 } 1704 1705 if (!checkonly) { 1706 mutex_enter(&connp->conn_lock); 1707 udp->udp_nat_t_endpoint = onoff; 1708 mutex_exit(&connp->conn_lock); 1709 coa->coa_changed |= COA_HEADER_CHANGED; 1710 coa->coa_changed |= COA_WROFF_CHANGED; 1711 } 1712 /* Fully handled this option. */ 1713 return (0); 1714 case UDP_RCVHDR: 1715 mutex_enter(&connp->conn_lock); 1716 udp->udp_rcvhdr = onoff; 1717 mutex_exit(&connp->conn_lock); 1718 return (0); 1719 } 1720 break; 1721 } 1722 error = conn_opt_set(coa, level, name, inlen, invalp, 1723 checkonly, cr); 1724 return (error); 1725 } 1726 1727 /* 1728 * This routine sets socket options. 1729 */ 1730 int 1731 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1732 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1733 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1734 { 1735 udp_t *udp = connp->conn_udp; 1736 int err; 1737 conn_opt_arg_t coas, *coa; 1738 boolean_t checkonly; 1739 udp_stack_t *us = udp->udp_us; 1740 1741 switch (optset_context) { 1742 case SETFN_OPTCOM_CHECKONLY: 1743 checkonly = B_TRUE; 1744 /* 1745 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1746 * inlen != 0 implies value supplied and 1747 * we have to "pretend" to set it. 1748 * inlen == 0 implies that there is no 1749 * value part in T_CHECK request and just validation 1750 * done elsewhere should be enough, we just return here. 1751 */ 1752 if (inlen == 0) { 1753 *outlenp = 0; 1754 return (0); 1755 } 1756 break; 1757 case SETFN_OPTCOM_NEGOTIATE: 1758 checkonly = B_FALSE; 1759 break; 1760 case SETFN_UD_NEGOTIATE: 1761 case SETFN_CONN_NEGOTIATE: 1762 checkonly = B_FALSE; 1763 /* 1764 * Negotiating local and "association-related" options 1765 * through T_UNITDATA_REQ. 1766 * 1767 * Following routine can filter out ones we do not 1768 * want to be "set" this way. 1769 */ 1770 if (!udp_opt_allow_udr_set(level, name)) { 1771 *outlenp = 0; 1772 return (EINVAL); 1773 } 1774 break; 1775 default: 1776 /* 1777 * We should never get here 1778 */ 1779 *outlenp = 0; 1780 return (EINVAL); 1781 } 1782 1783 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1784 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1785 1786 if (thisdg_attrs != NULL) { 1787 /* Options from T_UNITDATA_REQ */ 1788 coa = (conn_opt_arg_t *)thisdg_attrs; 1789 ASSERT(coa->coa_connp == connp); 1790 ASSERT(coa->coa_ixa != NULL); 1791 ASSERT(coa->coa_ipp != NULL); 1792 ASSERT(coa->coa_ancillary); 1793 } else { 1794 coa = &coas; 1795 coas.coa_connp = connp; 1796 /* Get a reference on conn_ixa to prevent concurrent mods */ 1797 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1798 if (coas.coa_ixa == NULL) { 1799 *outlenp = 0; 1800 return (ENOMEM); 1801 } 1802 coas.coa_ipp = &connp->conn_xmit_ipp; 1803 coas.coa_ancillary = B_FALSE; 1804 coas.coa_changed = 0; 1805 } 1806 1807 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1808 cr, checkonly); 1809 if (err != 0) { 1810 errout: 1811 if (!coa->coa_ancillary) 1812 ixa_refrele(coa->coa_ixa); 1813 *outlenp = 0; 1814 return (err); 1815 } 1816 /* Handle DHCPINIT here outside of lock */ 1817 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1818 uint_t ifindex; 1819 ill_t *ill; 1820 1821 ifindex = *(uint_t *)invalp; 1822 if (ifindex == 0) { 1823 ill = NULL; 1824 } else { 1825 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1826 coa->coa_ixa->ixa_ipst); 1827 if (ill == NULL) { 1828 err = ENXIO; 1829 goto errout; 1830 } 1831 1832 mutex_enter(&ill->ill_lock); 1833 if (ill->ill_state_flags & ILL_CONDEMNED) { 1834 mutex_exit(&ill->ill_lock); 1835 ill_refrele(ill); 1836 err = ENXIO; 1837 goto errout; 1838 } 1839 if (IS_VNI(ill)) { 1840 mutex_exit(&ill->ill_lock); 1841 ill_refrele(ill); 1842 err = EINVAL; 1843 goto errout; 1844 } 1845 } 1846 mutex_enter(&connp->conn_lock); 1847 1848 if (connp->conn_dhcpinit_ill != NULL) { 1849 /* 1850 * We've locked the conn so conn_cleanup_ill() 1851 * cannot clear conn_dhcpinit_ill -- so it's 1852 * safe to access the ill. 1853 */ 1854 ill_t *oill = connp->conn_dhcpinit_ill; 1855 1856 ASSERT(oill->ill_dhcpinit != 0); 1857 atomic_dec_32(&oill->ill_dhcpinit); 1858 ill_set_inputfn(connp->conn_dhcpinit_ill); 1859 connp->conn_dhcpinit_ill = NULL; 1860 } 1861 1862 if (ill != NULL) { 1863 connp->conn_dhcpinit_ill = ill; 1864 atomic_inc_32(&ill->ill_dhcpinit); 1865 ill_set_inputfn(ill); 1866 mutex_exit(&connp->conn_lock); 1867 mutex_exit(&ill->ill_lock); 1868 ill_refrele(ill); 1869 } else { 1870 mutex_exit(&connp->conn_lock); 1871 } 1872 } 1873 1874 /* 1875 * Common case of OK return with outval same as inval. 1876 */ 1877 if (invalp != outvalp) { 1878 /* don't trust bcopy for identical src/dst */ 1879 (void) bcopy(invalp, outvalp, inlen); 1880 } 1881 *outlenp = inlen; 1882 1883 /* 1884 * If this was not ancillary data, then we rebuild the headers, 1885 * update the IRE/NCE, and IPsec as needed. 1886 * Since the label depends on the destination we go through 1887 * ip_set_destination first. 1888 */ 1889 if (coa->coa_ancillary) { 1890 return (0); 1891 } 1892 1893 if (coa->coa_changed & COA_ROUTE_CHANGED) { 1894 in6_addr_t saddr, faddr, nexthop; 1895 in_port_t fport; 1896 1897 /* 1898 * We clear lastdst to make sure we pick up the change 1899 * next time sending. 1900 * If we are connected we re-cache the information. 1901 * We ignore errors to preserve BSD behavior. 1902 * Note that we don't redo IPsec policy lookup here 1903 * since the final destination (or source) didn't change. 1904 */ 1905 mutex_enter(&connp->conn_lock); 1906 connp->conn_v6lastdst = ipv6_all_zeros; 1907 1908 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 1909 &connp->conn_faddr_v6, &nexthop); 1910 saddr = connp->conn_saddr_v6; 1911 faddr = connp->conn_faddr_v6; 1912 fport = connp->conn_fport; 1913 mutex_exit(&connp->conn_lock); 1914 1915 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 1916 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 1917 (void) ip_attr_connect(connp, coa->coa_ixa, 1918 &saddr, &faddr, &nexthop, fport, NULL, NULL, 1919 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 1920 } 1921 } 1922 1923 ixa_refrele(coa->coa_ixa); 1924 1925 if (coa->coa_changed & COA_HEADER_CHANGED) { 1926 /* 1927 * Rebuild the header template if we are connected. 1928 * Otherwise clear conn_v6lastdst so we rebuild the header 1929 * in the data path. 1930 */ 1931 mutex_enter(&connp->conn_lock); 1932 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1933 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1934 err = udp_build_hdr_template(connp, 1935 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 1936 connp->conn_fport, connp->conn_flowinfo); 1937 if (err != 0) { 1938 mutex_exit(&connp->conn_lock); 1939 return (err); 1940 } 1941 } else { 1942 connp->conn_v6lastdst = ipv6_all_zeros; 1943 } 1944 mutex_exit(&connp->conn_lock); 1945 } 1946 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 1947 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 1948 connp->conn_rcvbuf); 1949 } 1950 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 1951 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 1952 } 1953 if (coa->coa_changed & COA_WROFF_CHANGED) { 1954 /* Increase wroff if needed */ 1955 uint_t wroff; 1956 1957 mutex_enter(&connp->conn_lock); 1958 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 1959 if (udp->udp_nat_t_endpoint) 1960 wroff += sizeof (uint32_t); 1961 if (wroff > connp->conn_wroff) { 1962 connp->conn_wroff = wroff; 1963 mutex_exit(&connp->conn_lock); 1964 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 1965 } else { 1966 mutex_exit(&connp->conn_lock); 1967 } 1968 } 1969 return (err); 1970 } 1971 1972 /* This routine sets socket options. */ 1973 int 1974 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1975 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1976 void *thisdg_attrs, cred_t *cr) 1977 { 1978 conn_t *connp = Q_TO_CONN(q); 1979 int error; 1980 1981 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 1982 outlenp, outvalp, thisdg_attrs, cr); 1983 return (error); 1984 } 1985 1986 /* 1987 * Setup IP and UDP headers. 1988 * Returns NULL on allocation failure, in which case data_mp is freed. 1989 */ 1990 mblk_t * 1991 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 1992 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 1993 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 1994 { 1995 mblk_t *mp; 1996 udpha_t *udpha; 1997 udp_stack_t *us = connp->conn_netstack->netstack_udp; 1998 uint_t data_len; 1999 uint32_t cksum; 2000 udp_t *udp = connp->conn_udp; 2001 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2002 uint_t ulp_hdr_len; 2003 2004 data_len = msgdsize(data_mp); 2005 ulp_hdr_len = UDPH_SIZE; 2006 if (insert_spi) 2007 ulp_hdr_len += sizeof (uint32_t); 2008 2009 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2010 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2011 if (mp == NULL) { 2012 ASSERT(*errorp != 0); 2013 return (NULL); 2014 } 2015 2016 data_len += ulp_hdr_len; 2017 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2018 2019 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2020 udpha->uha_src_port = connp->conn_lport; 2021 udpha->uha_dst_port = dstport; 2022 udpha->uha_checksum = 0; 2023 udpha->uha_length = htons(data_len); 2024 2025 /* 2026 * If there was a routing option/header then conn_prepend_hdr 2027 * has massaged it and placed the pseudo-header checksum difference 2028 * in the cksum argument. 2029 * 2030 * Setup header length and prepare for ULP checksum done in IP. 2031 * 2032 * We make it easy for IP to include our pseudo header 2033 * by putting our length in uha_checksum. 2034 * The IP source, destination, and length have already been set by 2035 * conn_prepend_hdr. 2036 */ 2037 cksum += data_len; 2038 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2039 ASSERT(cksum < 0x10000); 2040 2041 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2042 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2043 2044 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2045 2046 /* IP does the checksum if uha_checksum is non-zero */ 2047 if (us->us_do_checksum) { 2048 if (cksum == 0) 2049 udpha->uha_checksum = 0xffff; 2050 else 2051 udpha->uha_checksum = htons(cksum); 2052 } else { 2053 udpha->uha_checksum = 0; 2054 } 2055 } else { 2056 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2057 2058 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2059 if (cksum == 0) 2060 udpha->uha_checksum = 0xffff; 2061 else 2062 udpha->uha_checksum = htons(cksum); 2063 } 2064 2065 /* Insert all-0s SPI now. */ 2066 if (insert_spi) 2067 *((uint32_t *)(udpha + 1)) = 0; 2068 2069 return (mp); 2070 } 2071 2072 static int 2073 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2074 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2075 { 2076 udpha_t *udpha; 2077 int error; 2078 2079 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2080 /* 2081 * We clear lastdst to make sure we don't use the lastdst path 2082 * next time sending since we might not have set v6dst yet. 2083 */ 2084 connp->conn_v6lastdst = ipv6_all_zeros; 2085 2086 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2087 flowinfo); 2088 if (error != 0) 2089 return (error); 2090 2091 /* 2092 * Any routing header/option has been massaged. The checksum difference 2093 * is stored in conn_sum. 2094 */ 2095 udpha = (udpha_t *)connp->conn_ht_ulp; 2096 udpha->uha_src_port = connp->conn_lport; 2097 udpha->uha_dst_port = dstport; 2098 udpha->uha_checksum = 0; 2099 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2100 return (0); 2101 } 2102 2103 static mblk_t * 2104 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2105 { 2106 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2107 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2108 /* 2109 * fallback has started but messages have not been moved yet 2110 */ 2111 if (udp->udp_fallback_queue_head == NULL) { 2112 ASSERT(udp->udp_fallback_queue_tail == NULL); 2113 udp->udp_fallback_queue_head = mp; 2114 udp->udp_fallback_queue_tail = mp; 2115 } else { 2116 ASSERT(udp->udp_fallback_queue_tail != NULL); 2117 udp->udp_fallback_queue_tail->b_next = mp; 2118 udp->udp_fallback_queue_tail = mp; 2119 } 2120 return (NULL); 2121 } else { 2122 /* 2123 * Fallback completed, let the caller putnext() the mblk. 2124 */ 2125 return (mp); 2126 } 2127 } 2128 2129 /* 2130 * Deliver data to ULP. In case we have a socket, and it's falling back to 2131 * TPI, then we'll queue the mp for later processing. 2132 */ 2133 static void 2134 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2135 { 2136 if (IPCL_IS_NONSTR(connp)) { 2137 udp_t *udp = connp->conn_udp; 2138 int error; 2139 2140 ASSERT(len == msgdsize(mp)); 2141 if ((*connp->conn_upcalls->su_recv) 2142 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2143 mutex_enter(&udp->udp_recv_lock); 2144 if (error == ENOSPC) { 2145 /* 2146 * let's confirm while holding the lock 2147 */ 2148 if ((*connp->conn_upcalls->su_recv) 2149 (connp->conn_upper_handle, NULL, 0, 0, 2150 &error, NULL) < 0) { 2151 ASSERT(error == ENOSPC); 2152 if (error == ENOSPC) { 2153 connp->conn_flow_cntrld = 2154 B_TRUE; 2155 } 2156 } 2157 mutex_exit(&udp->udp_recv_lock); 2158 } else { 2159 ASSERT(error == EOPNOTSUPP); 2160 mp = udp_queue_fallback(udp, mp); 2161 mutex_exit(&udp->udp_recv_lock); 2162 if (mp != NULL) 2163 putnext(connp->conn_rq, mp); 2164 } 2165 } 2166 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2167 } else { 2168 if (is_system_labeled()) { 2169 ASSERT(ira->ira_cred != NULL); 2170 /* 2171 * Provide for protocols above UDP such as RPC 2172 * NOPID leaves db_cpid unchanged. 2173 */ 2174 mblk_setcred(mp, ira->ira_cred, NOPID); 2175 } 2176 2177 putnext(connp->conn_rq, mp); 2178 } 2179 } 2180 2181 /* 2182 * This is the inbound data path. 2183 * IP has already pulled up the IP plus UDP headers and verified alignment 2184 * etc. 2185 */ 2186 /* ARGSUSED2 */ 2187 static void 2188 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2189 { 2190 conn_t *connp = (conn_t *)arg1; 2191 struct T_unitdata_ind *tudi; 2192 uchar_t *rptr; /* Pointer to IP header */ 2193 int hdr_length; /* Length of IP+UDP headers */ 2194 int udi_size; /* Size of T_unitdata_ind */ 2195 int pkt_len; 2196 udp_t *udp; 2197 udpha_t *udpha; 2198 ip_pkt_t ipps; 2199 ip6_t *ip6h; 2200 mblk_t *mp1; 2201 uint32_t udp_ipv4_options_len; 2202 crb_t recv_ancillary; 2203 udp_stack_t *us; 2204 2205 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2206 2207 udp = connp->conn_udp; 2208 us = udp->udp_us; 2209 rptr = mp->b_rptr; 2210 2211 ASSERT(DB_TYPE(mp) == M_DATA); 2212 ASSERT(OK_32PTR(rptr)); 2213 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2214 pkt_len = ira->ira_pktlen; 2215 2216 /* 2217 * Get a snapshot of these and allow other threads to change 2218 * them after that. We need the same recv_ancillary when determining 2219 * the size as when adding the ancillary data items. 2220 */ 2221 mutex_enter(&connp->conn_lock); 2222 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2223 recv_ancillary = connp->conn_recv_ancillary; 2224 mutex_exit(&connp->conn_lock); 2225 2226 hdr_length = ira->ira_ip_hdr_length; 2227 2228 /* 2229 * IP inspected the UDP header thus all of it must be in the mblk. 2230 * UDP length check is performed for IPv6 packets and IPv4 packets 2231 * to check if the size of the packet as specified 2232 * by the UDP header is the same as the length derived from the IP 2233 * header. 2234 */ 2235 udpha = (udpha_t *)(rptr + hdr_length); 2236 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2237 goto tossit; 2238 2239 hdr_length += UDPH_SIZE; 2240 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2241 2242 /* Initialize regardless of IP version */ 2243 ipps.ipp_fields = 0; 2244 2245 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2246 udp_ipv4_options_len > 0) && 2247 connp->conn_family == AF_INET) { 2248 int err; 2249 2250 /* 2251 * Record/update udp_recv_ipp with the lock 2252 * held. Not needed for AF_INET6 sockets 2253 * since they don't support a getsockopt of IP_OPTIONS. 2254 */ 2255 mutex_enter(&connp->conn_lock); 2256 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2257 B_TRUE); 2258 if (err != 0) { 2259 /* Allocation failed. Drop packet */ 2260 mutex_exit(&connp->conn_lock); 2261 freemsg(mp); 2262 UDPS_BUMP_MIB(us, udpInErrors); 2263 return; 2264 } 2265 mutex_exit(&connp->conn_lock); 2266 } 2267 2268 if (recv_ancillary.crb_all != 0) { 2269 /* 2270 * Record packet information in the ip_pkt_t 2271 */ 2272 if (ira->ira_flags & IRAF_IS_IPV4) { 2273 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2274 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2275 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2276 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2277 2278 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2279 } else { 2280 uint8_t nexthdrp; 2281 2282 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2283 /* 2284 * IPv6 packets can only be received by applications 2285 * that are prepared to receive IPv6 addresses. 2286 * The IP fanout must ensure this. 2287 */ 2288 ASSERT(connp->conn_family == AF_INET6); 2289 2290 ip6h = (ip6_t *)rptr; 2291 2292 /* We don't care about the length, but need the ipp */ 2293 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2294 &nexthdrp); 2295 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2296 /* Restore */ 2297 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2298 ASSERT(nexthdrp == IPPROTO_UDP); 2299 } 2300 } 2301 2302 /* 2303 * This is the inbound data path. Packets are passed upstream as 2304 * T_UNITDATA_IND messages. 2305 */ 2306 if (connp->conn_family == AF_INET) { 2307 sin_t *sin; 2308 2309 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2310 2311 /* 2312 * Normally only send up the source address. 2313 * If any ancillary data items are wanted we add those. 2314 */ 2315 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2316 if (recv_ancillary.crb_all != 0) { 2317 udi_size += conn_recvancillary_size(connp, 2318 recv_ancillary, ira, mp, &ipps); 2319 } 2320 2321 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2322 mp1 = allocb(udi_size, BPRI_MED); 2323 if (mp1 == NULL) { 2324 freemsg(mp); 2325 UDPS_BUMP_MIB(us, udpInErrors); 2326 return; 2327 } 2328 mp1->b_cont = mp; 2329 mp1->b_datap->db_type = M_PROTO; 2330 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2331 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2332 tudi->PRIM_type = T_UNITDATA_IND; 2333 tudi->SRC_length = sizeof (sin_t); 2334 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2335 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2336 sizeof (sin_t); 2337 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2338 tudi->OPT_length = udi_size; 2339 sin = (sin_t *)&tudi[1]; 2340 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2341 sin->sin_port = udpha->uha_src_port; 2342 sin->sin_family = connp->conn_family; 2343 *(uint32_t *)&sin->sin_zero[0] = 0; 2344 *(uint32_t *)&sin->sin_zero[4] = 0; 2345 2346 /* 2347 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2348 * IP_RECVTTL has been set. 2349 */ 2350 if (udi_size != 0) { 2351 conn_recvancillary_add(connp, recv_ancillary, ira, 2352 &ipps, (uchar_t *)&sin[1], udi_size); 2353 } 2354 } else { 2355 sin6_t *sin6; 2356 2357 /* 2358 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2359 * 2360 * Normally we only send up the address. If receiving of any 2361 * optional receive side information is enabled, we also send 2362 * that up as options. 2363 */ 2364 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2365 2366 if (recv_ancillary.crb_all != 0) { 2367 udi_size += conn_recvancillary_size(connp, 2368 recv_ancillary, ira, mp, &ipps); 2369 } 2370 2371 mp1 = allocb(udi_size, BPRI_MED); 2372 if (mp1 == NULL) { 2373 freemsg(mp); 2374 UDPS_BUMP_MIB(us, udpInErrors); 2375 return; 2376 } 2377 mp1->b_cont = mp; 2378 mp1->b_datap->db_type = M_PROTO; 2379 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2380 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2381 tudi->PRIM_type = T_UNITDATA_IND; 2382 tudi->SRC_length = sizeof (sin6_t); 2383 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2384 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2385 sizeof (sin6_t); 2386 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2387 tudi->OPT_length = udi_size; 2388 sin6 = (sin6_t *)&tudi[1]; 2389 if (ira->ira_flags & IRAF_IS_IPV4) { 2390 in6_addr_t v6dst; 2391 2392 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2393 &sin6->sin6_addr); 2394 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2395 &v6dst); 2396 sin6->sin6_flowinfo = 0; 2397 sin6->sin6_scope_id = 0; 2398 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2399 IPCL_ZONEID(connp), us->us_netstack); 2400 } else { 2401 ip6h = (ip6_t *)rptr; 2402 2403 sin6->sin6_addr = ip6h->ip6_src; 2404 /* No sin6_flowinfo per API */ 2405 sin6->sin6_flowinfo = 0; 2406 /* For link-scope pass up scope id */ 2407 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2408 sin6->sin6_scope_id = ira->ira_ruifindex; 2409 else 2410 sin6->sin6_scope_id = 0; 2411 sin6->__sin6_src_id = ip_srcid_find_addr( 2412 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2413 us->us_netstack); 2414 } 2415 sin6->sin6_port = udpha->uha_src_port; 2416 sin6->sin6_family = connp->conn_family; 2417 2418 if (udi_size != 0) { 2419 conn_recvancillary_add(connp, recv_ancillary, ira, 2420 &ipps, (uchar_t *)&sin6[1], udi_size); 2421 } 2422 } 2423 2424 /* 2425 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2426 * loopback traffic). 2427 */ 2428 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2429 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2430 2431 /* Walk past the headers unless IP_RECVHDR was set. */ 2432 if (!udp->udp_rcvhdr) { 2433 mp->b_rptr = rptr + hdr_length; 2434 pkt_len -= hdr_length; 2435 } 2436 2437 UDPS_BUMP_MIB(us, udpHCInDatagrams); 2438 udp_ulp_recv(connp, mp1, pkt_len, ira); 2439 return; 2440 2441 tossit: 2442 freemsg(mp); 2443 UDPS_BUMP_MIB(us, udpInErrors); 2444 } 2445 2446 /* 2447 * This routine creates a T_UDERROR_IND message and passes it upstream. 2448 * The address and options are copied from the T_UNITDATA_REQ message 2449 * passed in mp. This message is freed. 2450 */ 2451 static void 2452 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2453 { 2454 struct T_unitdata_req *tudr; 2455 mblk_t *mp1; 2456 uchar_t *destaddr; 2457 t_scalar_t destlen; 2458 uchar_t *optaddr; 2459 t_scalar_t optlen; 2460 2461 if ((mp->b_wptr < mp->b_rptr) || 2462 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2463 goto done; 2464 } 2465 tudr = (struct T_unitdata_req *)mp->b_rptr; 2466 destaddr = mp->b_rptr + tudr->DEST_offset; 2467 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2468 destaddr + tudr->DEST_length < mp->b_rptr || 2469 destaddr + tudr->DEST_length > mp->b_wptr) { 2470 goto done; 2471 } 2472 optaddr = mp->b_rptr + tudr->OPT_offset; 2473 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2474 optaddr + tudr->OPT_length < mp->b_rptr || 2475 optaddr + tudr->OPT_length > mp->b_wptr) { 2476 goto done; 2477 } 2478 destlen = tudr->DEST_length; 2479 optlen = tudr->OPT_length; 2480 2481 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2482 (char *)optaddr, optlen, err); 2483 if (mp1 != NULL) 2484 qreply(q, mp1); 2485 2486 done: 2487 freemsg(mp); 2488 } 2489 2490 /* 2491 * This routine removes a port number association from a stream. It 2492 * is called by udp_wput to handle T_UNBIND_REQ messages. 2493 */ 2494 static void 2495 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2496 { 2497 conn_t *connp = Q_TO_CONN(q); 2498 int error; 2499 2500 error = udp_do_unbind(connp); 2501 if (error) { 2502 if (error < 0) 2503 udp_err_ack(q, mp, -error, 0); 2504 else 2505 udp_err_ack(q, mp, TSYSERR, error); 2506 return; 2507 } 2508 2509 mp = mi_tpi_ok_ack_alloc(mp); 2510 ASSERT(mp != NULL); 2511 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2512 qreply(q, mp); 2513 } 2514 2515 /* 2516 * Don't let port fall into the privileged range. 2517 * Since the extra privileged ports can be arbitrary we also 2518 * ensure that we exclude those from consideration. 2519 * us->us_epriv_ports is not sorted thus we loop over it until 2520 * there are no changes. 2521 */ 2522 static in_port_t 2523 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2524 { 2525 int i, bump; 2526 in_port_t nextport; 2527 boolean_t restart = B_FALSE; 2528 udp_stack_t *us = udp->udp_us; 2529 2530 if (random && udp_random_anon_port != 0) { 2531 (void) random_get_pseudo_bytes((uint8_t *)&port, 2532 sizeof (in_port_t)); 2533 /* 2534 * Unless changed by a sys admin, the smallest anon port 2535 * is 32768 and the largest anon port is 65535. It is 2536 * very likely (50%) for the random port to be smaller 2537 * than the smallest anon port. When that happens, 2538 * add port % (anon port range) to the smallest anon 2539 * port to get the random port. It should fall into the 2540 * valid anon port range. 2541 */ 2542 if ((port < us->us_smallest_anon_port) || 2543 (port > us->us_largest_anon_port)) { 2544 if (us->us_smallest_anon_port == 2545 us->us_largest_anon_port) { 2546 bump = 0; 2547 } else { 2548 bump = port % (us->us_largest_anon_port - 2549 us->us_smallest_anon_port); 2550 } 2551 2552 port = us->us_smallest_anon_port + bump; 2553 } 2554 } 2555 2556 retry: 2557 if (port < us->us_smallest_anon_port) 2558 port = us->us_smallest_anon_port; 2559 2560 if (port > us->us_largest_anon_port) { 2561 port = us->us_smallest_anon_port; 2562 if (restart) 2563 return (0); 2564 restart = B_TRUE; 2565 } 2566 2567 if (port < us->us_smallest_nonpriv_port) 2568 port = us->us_smallest_nonpriv_port; 2569 2570 for (i = 0; i < us->us_num_epriv_ports; i++) { 2571 if (port == us->us_epriv_ports[i]) { 2572 port++; 2573 /* 2574 * Make sure that the port is in the 2575 * valid range. 2576 */ 2577 goto retry; 2578 } 2579 } 2580 2581 if (is_system_labeled() && 2582 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2583 port, IPPROTO_UDP, B_TRUE)) != 0) { 2584 port = nextport; 2585 goto retry; 2586 } 2587 2588 return (port); 2589 } 2590 2591 /* 2592 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2593 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2594 * the TPI options, otherwise we take them from msg_control. 2595 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2596 * Always consumes mp; never consumes tudr_mp. 2597 */ 2598 static int 2599 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2600 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2601 { 2602 udp_t *udp = connp->conn_udp; 2603 udp_stack_t *us = udp->udp_us; 2604 int error; 2605 ip_xmit_attr_t *ixa; 2606 ip_pkt_t *ipp; 2607 in6_addr_t v6src; 2608 in6_addr_t v6dst; 2609 in6_addr_t v6nexthop; 2610 in_port_t dstport; 2611 uint32_t flowinfo; 2612 uint_t srcid; 2613 int is_absreq_failure = 0; 2614 conn_opt_arg_t coas, *coa; 2615 2616 ASSERT(tudr_mp != NULL || msg != NULL); 2617 2618 /* 2619 * Get ixa before checking state to handle a disconnect race. 2620 * 2621 * We need an exclusive copy of conn_ixa since the ancillary data 2622 * options might modify it. That copy has no pointers hence we 2623 * need to set them up once we've parsed the ancillary data. 2624 */ 2625 ixa = conn_get_ixa_exclusive(connp); 2626 if (ixa == NULL) { 2627 UDPS_BUMP_MIB(us, udpOutErrors); 2628 freemsg(mp); 2629 return (ENOMEM); 2630 } 2631 ASSERT(cr != NULL); 2632 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2633 ixa->ixa_cred = cr; 2634 ixa->ixa_cpid = pid; 2635 if (is_system_labeled()) { 2636 /* We need to restart with a label based on the cred */ 2637 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2638 } 2639 2640 /* In case previous destination was multicast or multirt */ 2641 ip_attr_newdst(ixa); 2642 2643 /* Get a copy of conn_xmit_ipp since the options might change it */ 2644 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2645 if (ipp == NULL) { 2646 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2647 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2648 ixa->ixa_cpid = connp->conn_cpid; 2649 ixa_refrele(ixa); 2650 UDPS_BUMP_MIB(us, udpOutErrors); 2651 freemsg(mp); 2652 return (ENOMEM); 2653 } 2654 mutex_enter(&connp->conn_lock); 2655 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2656 mutex_exit(&connp->conn_lock); 2657 if (error != 0) { 2658 UDPS_BUMP_MIB(us, udpOutErrors); 2659 freemsg(mp); 2660 goto done; 2661 } 2662 2663 /* 2664 * Parse the options and update ixa and ipp as a result. 2665 * Note that ixa_tsl can be updated if SCM_UCRED. 2666 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2667 */ 2668 2669 coa = &coas; 2670 coa->coa_connp = connp; 2671 coa->coa_ixa = ixa; 2672 coa->coa_ipp = ipp; 2673 coa->coa_ancillary = B_TRUE; 2674 coa->coa_changed = 0; 2675 2676 if (msg != NULL) { 2677 error = process_auxiliary_options(connp, msg->msg_control, 2678 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2679 } else { 2680 struct T_unitdata_req *tudr; 2681 2682 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2683 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2684 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2685 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2686 coa, &is_absreq_failure); 2687 } 2688 if (error != 0) { 2689 /* 2690 * Note: No special action needed in this 2691 * module for "is_absreq_failure" 2692 */ 2693 freemsg(mp); 2694 UDPS_BUMP_MIB(us, udpOutErrors); 2695 goto done; 2696 } 2697 ASSERT(is_absreq_failure == 0); 2698 2699 mutex_enter(&connp->conn_lock); 2700 /* 2701 * If laddr is unspecified then we look at sin6_src_id. 2702 * We will give precedence to a source address set with IPV6_PKTINFO 2703 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 2704 * want ip_attr_connect to select a source (since it can fail) when 2705 * IPV6_PKTINFO is specified. 2706 * If this doesn't result in a source address then we get a source 2707 * from ip_attr_connect() below. 2708 */ 2709 v6src = connp->conn_saddr_v6; 2710 if (sin != NULL) { 2711 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 2712 dstport = sin->sin_port; 2713 flowinfo = 0; 2714 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2715 ixa->ixa_flags |= IXAF_IS_IPV4; 2716 } else if (sin6 != NULL) { 2717 v6dst = sin6->sin6_addr; 2718 dstport = sin6->sin6_port; 2719 flowinfo = sin6->sin6_flowinfo; 2720 srcid = sin6->__sin6_src_id; 2721 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 2722 ixa->ixa_scopeid = sin6->sin6_scope_id; 2723 ixa->ixa_flags |= IXAF_SCOPEID_SET; 2724 } else { 2725 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 2726 } 2727 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 2728 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 2729 connp->conn_netstack); 2730 } 2731 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 2732 ixa->ixa_flags |= IXAF_IS_IPV4; 2733 else 2734 ixa->ixa_flags &= ~IXAF_IS_IPV4; 2735 } else { 2736 /* Connected case */ 2737 v6dst = connp->conn_faddr_v6; 2738 dstport = connp->conn_fport; 2739 flowinfo = connp->conn_flowinfo; 2740 } 2741 mutex_exit(&connp->conn_lock); 2742 2743 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 2744 if (ipp->ipp_fields & IPPF_ADDR) { 2745 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2746 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2747 v6src = ipp->ipp_addr; 2748 } else { 2749 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 2750 v6src = ipp->ipp_addr; 2751 } 2752 } 2753 2754 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 2755 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 2756 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 2757 2758 switch (error) { 2759 case 0: 2760 break; 2761 case EADDRNOTAVAIL: 2762 /* 2763 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2764 * Don't have the application see that errno 2765 */ 2766 error = ENETUNREACH; 2767 goto failed; 2768 case ENETDOWN: 2769 /* 2770 * Have !ipif_addr_ready address; drop packet silently 2771 * until we can get applications to not send until we 2772 * are ready. 2773 */ 2774 error = 0; 2775 goto failed; 2776 case EHOSTUNREACH: 2777 case ENETUNREACH: 2778 if (ixa->ixa_ire != NULL) { 2779 /* 2780 * Let conn_ip_output/ire_send_noroute return 2781 * the error and send any local ICMP error. 2782 */ 2783 error = 0; 2784 break; 2785 } 2786 /* FALLTHRU */ 2787 default: 2788 failed: 2789 freemsg(mp); 2790 UDPS_BUMP_MIB(us, udpOutErrors); 2791 goto done; 2792 } 2793 2794 /* 2795 * We might be going to a different destination than last time, 2796 * thus check that TX allows the communication and compute any 2797 * needed label. 2798 * 2799 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 2800 * don't have to worry about concurrent threads. 2801 */ 2802 if (is_system_labeled()) { 2803 /* Using UDP MLP requires SCM_UCRED from user */ 2804 if (connp->conn_mlp_type != mlptSingle && 2805 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 2806 UDPS_BUMP_MIB(us, udpOutErrors); 2807 error = ECONNREFUSED; 2808 freemsg(mp); 2809 goto done; 2810 } 2811 /* 2812 * Check whether Trusted Solaris policy allows communication 2813 * with this host, and pretend that the destination is 2814 * unreachable if not. 2815 * Compute any needed label and place it in ipp_label_v4/v6. 2816 * 2817 * Later conn_build_hdr_template/conn_prepend_hdr takes 2818 * ipp_label_v4/v6 to form the packet. 2819 * 2820 * Tsol note: We have ipp structure local to this thread so 2821 * no locking is needed. 2822 */ 2823 error = conn_update_label(connp, ixa, &v6dst, ipp); 2824 if (error != 0) { 2825 freemsg(mp); 2826 UDPS_BUMP_MIB(us, udpOutErrors); 2827 goto done; 2828 } 2829 } 2830 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 2831 flowinfo, mp, &error); 2832 if (mp == NULL) { 2833 ASSERT(error != 0); 2834 UDPS_BUMP_MIB(us, udpOutErrors); 2835 goto done; 2836 } 2837 if (ixa->ixa_pktlen > IP_MAXPACKET) { 2838 error = EMSGSIZE; 2839 UDPS_BUMP_MIB(us, udpOutErrors); 2840 freemsg(mp); 2841 goto done; 2842 } 2843 /* We're done. Pass the packet to ip. */ 2844 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2845 2846 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 2847 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 2848 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 2849 2850 error = conn_ip_output(mp, ixa); 2851 /* No udpOutErrors if an error since IP increases its error counter */ 2852 switch (error) { 2853 case 0: 2854 break; 2855 case EWOULDBLOCK: 2856 (void) ixa_check_drain_insert(connp, ixa); 2857 error = 0; 2858 break; 2859 case EADDRNOTAVAIL: 2860 /* 2861 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2862 * Don't have the application see that errno 2863 */ 2864 error = ENETUNREACH; 2865 /* FALLTHRU */ 2866 default: 2867 mutex_enter(&connp->conn_lock); 2868 /* 2869 * Clear the source and v6lastdst so we call ip_attr_connect 2870 * for the next packet and try to pick a better source. 2871 */ 2872 if (connp->conn_mcbc_bind) 2873 connp->conn_saddr_v6 = ipv6_all_zeros; 2874 else 2875 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 2876 connp->conn_v6lastdst = ipv6_all_zeros; 2877 mutex_exit(&connp->conn_lock); 2878 break; 2879 } 2880 done: 2881 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2882 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2883 ixa->ixa_cpid = connp->conn_cpid; 2884 ixa_refrele(ixa); 2885 ip_pkt_free(ipp); 2886 kmem_free(ipp, sizeof (*ipp)); 2887 return (error); 2888 } 2889 2890 /* 2891 * Handle sending an M_DATA for a connected socket. 2892 * Handles both IPv4 and IPv6. 2893 */ 2894 static int 2895 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 2896 { 2897 udp_t *udp = connp->conn_udp; 2898 udp_stack_t *us = udp->udp_us; 2899 int error; 2900 ip_xmit_attr_t *ixa; 2901 2902 /* 2903 * If no other thread is using conn_ixa this just gets a reference to 2904 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 2905 */ 2906 ixa = conn_get_ixa(connp, B_FALSE); 2907 if (ixa == NULL) { 2908 UDPS_BUMP_MIB(us, udpOutErrors); 2909 freemsg(mp); 2910 return (ENOMEM); 2911 } 2912 2913 ASSERT(cr != NULL); 2914 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2915 ixa->ixa_cred = cr; 2916 ixa->ixa_cpid = pid; 2917 2918 mutex_enter(&connp->conn_lock); 2919 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 2920 connp->conn_fport, connp->conn_flowinfo, &error); 2921 2922 if (mp == NULL) { 2923 ASSERT(error != 0); 2924 mutex_exit(&connp->conn_lock); 2925 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2926 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2927 ixa->ixa_cpid = connp->conn_cpid; 2928 ixa_refrele(ixa); 2929 UDPS_BUMP_MIB(us, udpOutErrors); 2930 freemsg(mp); 2931 return (error); 2932 } 2933 2934 /* 2935 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 2936 * safe copy, then we need to fill in any pointers in it. 2937 */ 2938 if (ixa->ixa_ire == NULL) { 2939 in6_addr_t faddr, saddr; 2940 in6_addr_t nexthop; 2941 in_port_t fport; 2942 2943 saddr = connp->conn_saddr_v6; 2944 faddr = connp->conn_faddr_v6; 2945 fport = connp->conn_fport; 2946 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 2947 mutex_exit(&connp->conn_lock); 2948 2949 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 2950 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 2951 IPDF_IPSEC); 2952 switch (error) { 2953 case 0: 2954 break; 2955 case EADDRNOTAVAIL: 2956 /* 2957 * IXAF_VERIFY_SOURCE tells us to pick a better source. 2958 * Don't have the application see that errno 2959 */ 2960 error = ENETUNREACH; 2961 goto failed; 2962 case ENETDOWN: 2963 /* 2964 * Have !ipif_addr_ready address; drop packet silently 2965 * until we can get applications to not send until we 2966 * are ready. 2967 */ 2968 error = 0; 2969 goto failed; 2970 case EHOSTUNREACH: 2971 case ENETUNREACH: 2972 if (ixa->ixa_ire != NULL) { 2973 /* 2974 * Let conn_ip_output/ire_send_noroute return 2975 * the error and send any local ICMP error. 2976 */ 2977 error = 0; 2978 break; 2979 } 2980 /* FALLTHRU */ 2981 default: 2982 failed: 2983 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2984 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2985 ixa->ixa_cpid = connp->conn_cpid; 2986 ixa_refrele(ixa); 2987 freemsg(mp); 2988 UDPS_BUMP_MIB(us, udpOutErrors); 2989 return (error); 2990 } 2991 } else { 2992 /* Done with conn_t */ 2993 mutex_exit(&connp->conn_lock); 2994 } 2995 ASSERT(ixa->ixa_ire != NULL); 2996 2997 /* We're done. Pass the packet to ip. */ 2998 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 2999 3000 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3001 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3002 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3003 3004 error = conn_ip_output(mp, ixa); 3005 /* No udpOutErrors if an error since IP increases its error counter */ 3006 switch (error) { 3007 case 0: 3008 break; 3009 case EWOULDBLOCK: 3010 (void) ixa_check_drain_insert(connp, ixa); 3011 error = 0; 3012 break; 3013 case EADDRNOTAVAIL: 3014 /* 3015 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3016 * Don't have the application see that errno 3017 */ 3018 error = ENETUNREACH; 3019 break; 3020 } 3021 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3022 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3023 ixa->ixa_cpid = connp->conn_cpid; 3024 ixa_refrele(ixa); 3025 return (error); 3026 } 3027 3028 /* 3029 * Handle sending an M_DATA to the last destination. 3030 * Handles both IPv4 and IPv6. 3031 * 3032 * NOTE: The caller must hold conn_lock and we drop it here. 3033 */ 3034 static int 3035 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3036 ip_xmit_attr_t *ixa) 3037 { 3038 udp_t *udp = connp->conn_udp; 3039 udp_stack_t *us = udp->udp_us; 3040 int error; 3041 3042 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3043 ASSERT(ixa != NULL); 3044 3045 ASSERT(cr != NULL); 3046 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3047 ixa->ixa_cred = cr; 3048 ixa->ixa_cpid = pid; 3049 3050 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3051 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3052 3053 if (mp == NULL) { 3054 ASSERT(error != 0); 3055 mutex_exit(&connp->conn_lock); 3056 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3057 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3058 ixa->ixa_cpid = connp->conn_cpid; 3059 ixa_refrele(ixa); 3060 UDPS_BUMP_MIB(us, udpOutErrors); 3061 freemsg(mp); 3062 return (error); 3063 } 3064 3065 /* 3066 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3067 * safe copy, then we need to fill in any pointers in it. 3068 */ 3069 if (ixa->ixa_ire == NULL) { 3070 in6_addr_t lastdst, lastsrc; 3071 in6_addr_t nexthop; 3072 in_port_t lastport; 3073 3074 lastsrc = connp->conn_v6lastsrc; 3075 lastdst = connp->conn_v6lastdst; 3076 lastport = connp->conn_lastdstport; 3077 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3078 mutex_exit(&connp->conn_lock); 3079 3080 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3081 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3082 IPDF_VERIFY_DST | IPDF_IPSEC); 3083 switch (error) { 3084 case 0: 3085 break; 3086 case EADDRNOTAVAIL: 3087 /* 3088 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3089 * Don't have the application see that errno 3090 */ 3091 error = ENETUNREACH; 3092 goto failed; 3093 case ENETDOWN: 3094 /* 3095 * Have !ipif_addr_ready address; drop packet silently 3096 * until we can get applications to not send until we 3097 * are ready. 3098 */ 3099 error = 0; 3100 goto failed; 3101 case EHOSTUNREACH: 3102 case ENETUNREACH: 3103 if (ixa->ixa_ire != NULL) { 3104 /* 3105 * Let conn_ip_output/ire_send_noroute return 3106 * the error and send any local ICMP error. 3107 */ 3108 error = 0; 3109 break; 3110 } 3111 /* FALLTHRU */ 3112 default: 3113 failed: 3114 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3115 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3116 ixa->ixa_cpid = connp->conn_cpid; 3117 ixa_refrele(ixa); 3118 freemsg(mp); 3119 UDPS_BUMP_MIB(us, udpOutErrors); 3120 return (error); 3121 } 3122 } else { 3123 /* Done with conn_t */ 3124 mutex_exit(&connp->conn_lock); 3125 } 3126 3127 /* We're done. Pass the packet to ip. */ 3128 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3129 3130 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3131 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3132 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3133 3134 error = conn_ip_output(mp, ixa); 3135 /* No udpOutErrors if an error since IP increases its error counter */ 3136 switch (error) { 3137 case 0: 3138 break; 3139 case EWOULDBLOCK: 3140 (void) ixa_check_drain_insert(connp, ixa); 3141 error = 0; 3142 break; 3143 case EADDRNOTAVAIL: 3144 /* 3145 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3146 * Don't have the application see that errno 3147 */ 3148 error = ENETUNREACH; 3149 /* FALLTHRU */ 3150 default: 3151 mutex_enter(&connp->conn_lock); 3152 /* 3153 * Clear the source and v6lastdst so we call ip_attr_connect 3154 * for the next packet and try to pick a better source. 3155 */ 3156 if (connp->conn_mcbc_bind) 3157 connp->conn_saddr_v6 = ipv6_all_zeros; 3158 else 3159 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3160 connp->conn_v6lastdst = ipv6_all_zeros; 3161 mutex_exit(&connp->conn_lock); 3162 break; 3163 } 3164 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3165 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3166 ixa->ixa_cpid = connp->conn_cpid; 3167 ixa_refrele(ixa); 3168 return (error); 3169 } 3170 3171 3172 /* 3173 * Prepend the header template and then fill in the source and 3174 * flowinfo. The caller needs to handle the destination address since 3175 * it's setting is different if rthdr or source route. 3176 * 3177 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3178 * When it returns NULL it sets errorp. 3179 */ 3180 static mblk_t * 3181 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3182 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3183 { 3184 udp_t *udp = connp->conn_udp; 3185 udp_stack_t *us = udp->udp_us; 3186 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3187 uint_t pktlen; 3188 uint_t alloclen; 3189 uint_t copylen; 3190 uint8_t *iph; 3191 uint_t ip_hdr_length; 3192 udpha_t *udpha; 3193 uint32_t cksum; 3194 ip_pkt_t *ipp; 3195 3196 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3197 3198 /* 3199 * Copy the header template and leave space for an SPI 3200 */ 3201 copylen = connp->conn_ht_iphc_len; 3202 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3203 pktlen = alloclen + msgdsize(mp); 3204 if (pktlen > IP_MAXPACKET) { 3205 freemsg(mp); 3206 *errorp = EMSGSIZE; 3207 return (NULL); 3208 } 3209 ixa->ixa_pktlen = pktlen; 3210 3211 /* check/fix buffer config, setup pointers into it */ 3212 iph = mp->b_rptr - alloclen; 3213 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3214 mblk_t *mp1; 3215 3216 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3217 if (mp1 == NULL) { 3218 freemsg(mp); 3219 *errorp = ENOMEM; 3220 return (NULL); 3221 } 3222 mp1->b_wptr = DB_LIM(mp1); 3223 mp1->b_cont = mp; 3224 mp = mp1; 3225 iph = (mp->b_wptr - alloclen); 3226 } 3227 mp->b_rptr = iph; 3228 bcopy(connp->conn_ht_iphc, iph, copylen); 3229 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3230 3231 ixa->ixa_ip_hdr_length = ip_hdr_length; 3232 udpha = (udpha_t *)(iph + ip_hdr_length); 3233 3234 /* 3235 * Setup header length and prepare for ULP checksum done in IP. 3236 * udp_build_hdr_template has already massaged any routing header 3237 * and placed the result in conn_sum. 3238 * 3239 * We make it easy for IP to include our pseudo header 3240 * by putting our length in uha_checksum. 3241 */ 3242 cksum = pktlen - ip_hdr_length; 3243 udpha->uha_length = htons(cksum); 3244 3245 cksum += connp->conn_sum; 3246 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3247 ASSERT(cksum < 0x10000); 3248 3249 ipp = &connp->conn_xmit_ipp; 3250 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3251 ipha_t *ipha = (ipha_t *)iph; 3252 3253 ipha->ipha_length = htons((uint16_t)pktlen); 3254 3255 /* IP does the checksum if uha_checksum is non-zero */ 3256 if (us->us_do_checksum) 3257 udpha->uha_checksum = htons(cksum); 3258 3259 /* if IP_PKTINFO specified an addres it wins over bind() */ 3260 if ((ipp->ipp_fields & IPPF_ADDR) && 3261 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3262 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3263 ipha->ipha_src = ipp->ipp_addr_v4; 3264 } else { 3265 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3266 } 3267 } else { 3268 ip6_t *ip6h = (ip6_t *)iph; 3269 3270 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3271 udpha->uha_checksum = htons(cksum); 3272 3273 /* if IP_PKTINFO specified an addres it wins over bind() */ 3274 if ((ipp->ipp_fields & IPPF_ADDR) && 3275 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3276 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3277 ip6h->ip6_src = ipp->ipp_addr; 3278 } else { 3279 ip6h->ip6_src = *v6src; 3280 } 3281 ip6h->ip6_vcf = 3282 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3283 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3284 if (ipp->ipp_fields & IPPF_TCLASS) { 3285 /* Overrides the class part of flowinfo */ 3286 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3287 ipp->ipp_tclass); 3288 } 3289 } 3290 3291 /* Insert all-0s SPI now. */ 3292 if (insert_spi) 3293 *((uint32_t *)(udpha + 1)) = 0; 3294 3295 udpha->uha_dst_port = dstport; 3296 return (mp); 3297 } 3298 3299 /* 3300 * Send a T_UDERR_IND in response to an M_DATA 3301 */ 3302 static void 3303 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3304 { 3305 struct sockaddr_storage ss; 3306 sin_t *sin; 3307 sin6_t *sin6; 3308 struct sockaddr *addr; 3309 socklen_t addrlen; 3310 mblk_t *mp1; 3311 3312 mutex_enter(&connp->conn_lock); 3313 /* Initialize addr and addrlen as if they're passed in */ 3314 if (connp->conn_family == AF_INET) { 3315 sin = (sin_t *)&ss; 3316 *sin = sin_null; 3317 sin->sin_family = AF_INET; 3318 sin->sin_port = connp->conn_fport; 3319 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3320 addr = (struct sockaddr *)sin; 3321 addrlen = sizeof (*sin); 3322 } else { 3323 sin6 = (sin6_t *)&ss; 3324 *sin6 = sin6_null; 3325 sin6->sin6_family = AF_INET6; 3326 sin6->sin6_port = connp->conn_fport; 3327 sin6->sin6_flowinfo = connp->conn_flowinfo; 3328 sin6->sin6_addr = connp->conn_faddr_v6; 3329 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3330 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3331 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3332 } else { 3333 sin6->sin6_scope_id = 0; 3334 } 3335 sin6->__sin6_src_id = 0; 3336 addr = (struct sockaddr *)sin6; 3337 addrlen = sizeof (*sin6); 3338 } 3339 mutex_exit(&connp->conn_lock); 3340 3341 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3342 if (mp1 != NULL) 3343 putnext(connp->conn_rq, mp1); 3344 } 3345 3346 /* 3347 * This routine handles all messages passed downstream. It either 3348 * consumes the message or passes it downstream; it never queues a 3349 * a message. 3350 * 3351 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3352 * is valid when we are directly beneath the stream head, and thus sockfs 3353 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3354 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3355 * connected endpoints. 3356 */ 3357 void 3358 udp_wput(queue_t *q, mblk_t *mp) 3359 { 3360 sin6_t *sin6; 3361 sin_t *sin = NULL; 3362 uint_t srcid; 3363 conn_t *connp = Q_TO_CONN(q); 3364 udp_t *udp = connp->conn_udp; 3365 int error = 0; 3366 struct sockaddr *addr = NULL; 3367 socklen_t addrlen; 3368 udp_stack_t *us = udp->udp_us; 3369 struct T_unitdata_req *tudr; 3370 mblk_t *data_mp; 3371 ushort_t ipversion; 3372 cred_t *cr; 3373 pid_t pid; 3374 3375 /* 3376 * We directly handle several cases here: T_UNITDATA_REQ message 3377 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3378 * socket. 3379 */ 3380 switch (DB_TYPE(mp)) { 3381 case M_DATA: 3382 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3383 /* Not connected; address is required */ 3384 UDPS_BUMP_MIB(us, udpOutErrors); 3385 UDP_DBGSTAT(us, udp_data_notconn); 3386 UDP_STAT(us, udp_out_err_notconn); 3387 freemsg(mp); 3388 return; 3389 } 3390 /* 3391 * All Solaris components should pass a db_credp 3392 * for this message, hence we ASSERT. 3393 * On production kernels we return an error to be robust against 3394 * random streams modules sitting on top of us. 3395 */ 3396 cr = msg_getcred(mp, &pid); 3397 ASSERT(cr != NULL); 3398 if (cr == NULL) { 3399 UDPS_BUMP_MIB(us, udpOutErrors); 3400 freemsg(mp); 3401 return; 3402 } 3403 ASSERT(udp->udp_issocket); 3404 UDP_DBGSTAT(us, udp_data_conn); 3405 error = udp_output_connected(connp, mp, cr, pid); 3406 if (error != 0) { 3407 UDP_STAT(us, udp_out_err_output); 3408 if (connp->conn_rq != NULL) 3409 udp_ud_err_connected(connp, (t_scalar_t)error); 3410 #ifdef DEBUG 3411 printf("udp_output_connected returned %d\n", error); 3412 #endif 3413 } 3414 return; 3415 3416 case M_PROTO: 3417 case M_PCPROTO: 3418 tudr = (struct T_unitdata_req *)mp->b_rptr; 3419 if (MBLKL(mp) < sizeof (*tudr) || 3420 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3421 udp_wput_other(q, mp); 3422 return; 3423 } 3424 break; 3425 3426 default: 3427 udp_wput_other(q, mp); 3428 return; 3429 } 3430 3431 /* Handle valid T_UNITDATA_REQ here */ 3432 data_mp = mp->b_cont; 3433 if (data_mp == NULL) { 3434 error = EPROTO; 3435 goto ud_error2; 3436 } 3437 mp->b_cont = NULL; 3438 3439 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3440 error = EADDRNOTAVAIL; 3441 goto ud_error2; 3442 } 3443 3444 /* 3445 * All Solaris components should pass a db_credp 3446 * for this TPI message, hence we should ASSERT. 3447 * However, RPC (svc_clts_ksend) does this odd thing where it 3448 * passes the options from a T_UNITDATA_IND unchanged in a 3449 * T_UNITDATA_REQ. While that is the right thing to do for 3450 * some options, SCM_UCRED being the key one, this also makes it 3451 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3452 */ 3453 cr = msg_getcred(mp, &pid); 3454 if (cr == NULL) { 3455 cr = connp->conn_cred; 3456 pid = connp->conn_cpid; 3457 } 3458 3459 /* 3460 * If a port has not been bound to the stream, fail. 3461 * This is not a problem when sockfs is directly 3462 * above us, because it will ensure that the socket 3463 * is first bound before allowing data to be sent. 3464 */ 3465 if (udp->udp_state == TS_UNBND) { 3466 error = EPROTO; 3467 goto ud_error2; 3468 } 3469 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3470 addrlen = tudr->DEST_length; 3471 3472 switch (connp->conn_family) { 3473 case AF_INET6: 3474 sin6 = (sin6_t *)addr; 3475 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3476 (sin6->sin6_family != AF_INET6)) { 3477 error = EADDRNOTAVAIL; 3478 goto ud_error2; 3479 } 3480 3481 srcid = sin6->__sin6_src_id; 3482 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3483 /* 3484 * Destination is a non-IPv4-compatible IPv6 address. 3485 * Send out an IPv6 format packet. 3486 */ 3487 3488 /* 3489 * If the local address is a mapped address return 3490 * an error. 3491 * It would be possible to send an IPv6 packet but the 3492 * response would never make it back to the application 3493 * since it is bound to a mapped address. 3494 */ 3495 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3496 error = EADDRNOTAVAIL; 3497 goto ud_error2; 3498 } 3499 3500 UDP_DBGSTAT(us, udp_out_ipv6); 3501 3502 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3503 sin6->sin6_addr = ipv6_loopback; 3504 ipversion = IPV6_VERSION; 3505 } else { 3506 if (connp->conn_ipv6_v6only) { 3507 error = EADDRNOTAVAIL; 3508 goto ud_error2; 3509 } 3510 3511 /* 3512 * If the local address is not zero or a mapped address 3513 * return an error. It would be possible to send an 3514 * IPv4 packet but the response would never make it 3515 * back to the application since it is bound to a 3516 * non-mapped address. 3517 */ 3518 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3519 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3520 error = EADDRNOTAVAIL; 3521 goto ud_error2; 3522 } 3523 UDP_DBGSTAT(us, udp_out_mapped); 3524 3525 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3526 V4_PART_OF_V6(sin6->sin6_addr) = 3527 htonl(INADDR_LOOPBACK); 3528 } 3529 ipversion = IPV4_VERSION; 3530 } 3531 3532 if (tudr->OPT_length != 0) { 3533 /* 3534 * If we are connected then the destination needs to be 3535 * the same as the connected one. 3536 */ 3537 if (udp->udp_state == TS_DATA_XFER && 3538 !conn_same_as_last_v6(connp, sin6)) { 3539 error = EISCONN; 3540 goto ud_error2; 3541 } 3542 UDP_STAT(us, udp_out_opt); 3543 error = udp_output_ancillary(connp, NULL, sin6, 3544 data_mp, mp, NULL, cr, pid); 3545 } else { 3546 ip_xmit_attr_t *ixa; 3547 3548 /* 3549 * We have to allocate an ip_xmit_attr_t before we grab 3550 * conn_lock and we need to hold conn_lock once we've 3551 * checked conn_same_as_last_v6 to handle concurrent 3552 * send* calls on a socket. 3553 */ 3554 ixa = conn_get_ixa(connp, B_FALSE); 3555 if (ixa == NULL) { 3556 error = ENOMEM; 3557 goto ud_error2; 3558 } 3559 mutex_enter(&connp->conn_lock); 3560 3561 if (conn_same_as_last_v6(connp, sin6) && 3562 connp->conn_lastsrcid == srcid && 3563 ipsec_outbound_policy_current(ixa)) { 3564 UDP_DBGSTAT(us, udp_out_lastdst); 3565 /* udp_output_lastdst drops conn_lock */ 3566 error = udp_output_lastdst(connp, data_mp, cr, 3567 pid, ixa); 3568 } else { 3569 UDP_DBGSTAT(us, udp_out_diffdst); 3570 /* udp_output_newdst drops conn_lock */ 3571 error = udp_output_newdst(connp, data_mp, NULL, 3572 sin6, ipversion, cr, pid, ixa); 3573 } 3574 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3575 } 3576 if (error == 0) { 3577 freeb(mp); 3578 return; 3579 } 3580 break; 3581 3582 case AF_INET: 3583 sin = (sin_t *)addr; 3584 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3585 (sin->sin_family != AF_INET)) { 3586 error = EADDRNOTAVAIL; 3587 goto ud_error2; 3588 } 3589 UDP_DBGSTAT(us, udp_out_ipv4); 3590 if (sin->sin_addr.s_addr == INADDR_ANY) 3591 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3592 ipversion = IPV4_VERSION; 3593 3594 srcid = 0; 3595 if (tudr->OPT_length != 0) { 3596 /* 3597 * If we are connected then the destination needs to be 3598 * the same as the connected one. 3599 */ 3600 if (udp->udp_state == TS_DATA_XFER && 3601 !conn_same_as_last_v4(connp, sin)) { 3602 error = EISCONN; 3603 goto ud_error2; 3604 } 3605 UDP_STAT(us, udp_out_opt); 3606 error = udp_output_ancillary(connp, sin, NULL, 3607 data_mp, mp, NULL, cr, pid); 3608 } else { 3609 ip_xmit_attr_t *ixa; 3610 3611 /* 3612 * We have to allocate an ip_xmit_attr_t before we grab 3613 * conn_lock and we need to hold conn_lock once we've 3614 * checked conn_same_as_last_v4 to handle concurrent 3615 * send* calls on a socket. 3616 */ 3617 ixa = conn_get_ixa(connp, B_FALSE); 3618 if (ixa == NULL) { 3619 error = ENOMEM; 3620 goto ud_error2; 3621 } 3622 mutex_enter(&connp->conn_lock); 3623 3624 if (conn_same_as_last_v4(connp, sin) && 3625 ipsec_outbound_policy_current(ixa)) { 3626 UDP_DBGSTAT(us, udp_out_lastdst); 3627 /* udp_output_lastdst drops conn_lock */ 3628 error = udp_output_lastdst(connp, data_mp, cr, 3629 pid, ixa); 3630 } else { 3631 UDP_DBGSTAT(us, udp_out_diffdst); 3632 /* udp_output_newdst drops conn_lock */ 3633 error = udp_output_newdst(connp, data_mp, sin, 3634 NULL, ipversion, cr, pid, ixa); 3635 } 3636 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3637 } 3638 if (error == 0) { 3639 freeb(mp); 3640 return; 3641 } 3642 break; 3643 } 3644 UDP_STAT(us, udp_out_err_output); 3645 ASSERT(mp != NULL); 3646 /* mp is freed by the following routine */ 3647 udp_ud_err(q, mp, (t_scalar_t)error); 3648 return; 3649 3650 ud_error2: 3651 UDPS_BUMP_MIB(us, udpOutErrors); 3652 freemsg(data_mp); 3653 UDP_STAT(us, udp_out_err_output); 3654 ASSERT(mp != NULL); 3655 /* mp is freed by the following routine */ 3656 udp_ud_err(q, mp, (t_scalar_t)error); 3657 } 3658 3659 /* 3660 * Handle the case of the IP address, port, flow label being different 3661 * for both IPv4 and IPv6. 3662 * 3663 * NOTE: The caller must hold conn_lock and we drop it here. 3664 */ 3665 static int 3666 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3667 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3668 { 3669 uint_t srcid; 3670 uint32_t flowinfo; 3671 udp_t *udp = connp->conn_udp; 3672 int error = 0; 3673 ip_xmit_attr_t *oldixa; 3674 udp_stack_t *us = udp->udp_us; 3675 in6_addr_t v6src; 3676 in6_addr_t v6dst; 3677 in6_addr_t v6nexthop; 3678 in_port_t dstport; 3679 3680 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3681 ASSERT(ixa != NULL); 3682 /* 3683 * We hold conn_lock across all the use and modifications of 3684 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3685 * stay consistent. 3686 */ 3687 3688 ASSERT(cr != NULL); 3689 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3690 ixa->ixa_cred = cr; 3691 ixa->ixa_cpid = pid; 3692 if (is_system_labeled()) { 3693 /* We need to restart with a label based on the cred */ 3694 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3695 } 3696 3697 /* 3698 * If we are connected then the destination needs to be the 3699 * same as the connected one, which is not the case here since we 3700 * checked for that above. 3701 */ 3702 if (udp->udp_state == TS_DATA_XFER) { 3703 mutex_exit(&connp->conn_lock); 3704 error = EISCONN; 3705 goto ud_error; 3706 } 3707 3708 /* In case previous destination was multicast or multirt */ 3709 ip_attr_newdst(ixa); 3710 3711 /* 3712 * If laddr is unspecified then we look at sin6_src_id. 3713 * We will give precedence to a source address set with IPV6_PKTINFO 3714 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3715 * want ip_attr_connect to select a source (since it can fail) when 3716 * IPV6_PKTINFO is specified. 3717 * If this doesn't result in a source address then we get a source 3718 * from ip_attr_connect() below. 3719 */ 3720 v6src = connp->conn_saddr_v6; 3721 if (sin != NULL) { 3722 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3723 dstport = sin->sin_port; 3724 flowinfo = 0; 3725 srcid = 0; 3726 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3727 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 3728 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3729 connp->conn_netstack); 3730 } 3731 ixa->ixa_flags |= IXAF_IS_IPV4; 3732 } else { 3733 v6dst = sin6->sin6_addr; 3734 dstport = sin6->sin6_port; 3735 flowinfo = sin6->sin6_flowinfo; 3736 srcid = sin6->__sin6_src_id; 3737 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3738 ixa->ixa_scopeid = sin6->sin6_scope_id; 3739 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3740 } else { 3741 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3742 } 3743 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3744 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3745 connp->conn_netstack); 3746 } 3747 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3748 ixa->ixa_flags |= IXAF_IS_IPV4; 3749 else 3750 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3751 } 3752 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3753 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 3754 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 3755 3756 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3757 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3758 v6src = ipp->ipp_addr; 3759 } else { 3760 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3761 v6src = ipp->ipp_addr; 3762 } 3763 } 3764 3765 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 3766 mutex_exit(&connp->conn_lock); 3767 3768 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3769 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3770 switch (error) { 3771 case 0: 3772 break; 3773 case EADDRNOTAVAIL: 3774 /* 3775 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3776 * Don't have the application see that errno 3777 */ 3778 error = ENETUNREACH; 3779 goto failed; 3780 case ENETDOWN: 3781 /* 3782 * Have !ipif_addr_ready address; drop packet silently 3783 * until we can get applications to not send until we 3784 * are ready. 3785 */ 3786 error = 0; 3787 goto failed; 3788 case EHOSTUNREACH: 3789 case ENETUNREACH: 3790 if (ixa->ixa_ire != NULL) { 3791 /* 3792 * Let conn_ip_output/ire_send_noroute return 3793 * the error and send any local ICMP error. 3794 */ 3795 error = 0; 3796 break; 3797 } 3798 /* FALLTHRU */ 3799 failed: 3800 default: 3801 goto ud_error; 3802 } 3803 3804 3805 /* 3806 * Cluster note: we let the cluster hook know that we are sending to a 3807 * new address and/or port. 3808 */ 3809 if (cl_inet_connect2 != NULL) { 3810 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 3811 if (error != 0) { 3812 error = EHOSTUNREACH; 3813 goto ud_error; 3814 } 3815 } 3816 3817 mutex_enter(&connp->conn_lock); 3818 /* 3819 * While we dropped the lock some other thread might have connected 3820 * this socket. If so we bail out with EISCONN to ensure that the 3821 * connecting thread is the one that updates conn_ixa, conn_ht_* 3822 * and conn_*last*. 3823 */ 3824 if (udp->udp_state == TS_DATA_XFER) { 3825 mutex_exit(&connp->conn_lock); 3826 error = EISCONN; 3827 goto ud_error; 3828 } 3829 3830 /* 3831 * We need to rebuild the headers if 3832 * - we are labeling packets (could be different for different 3833 * destinations) 3834 * - we have a source route (or routing header) since we need to 3835 * massage that to get the pseudo-header checksum 3836 * - the IP version is different than the last time 3837 * - a socket option with COA_HEADER_CHANGED has been set which 3838 * set conn_v6lastdst to zero. 3839 * 3840 * Otherwise the prepend function will just update the src, dst, 3841 * dstport, and flow label. 3842 */ 3843 if (is_system_labeled()) { 3844 /* TX MLP requires SCM_UCRED and don't have that here */ 3845 if (connp->conn_mlp_type != mlptSingle) { 3846 mutex_exit(&connp->conn_lock); 3847 error = ECONNREFUSED; 3848 goto ud_error; 3849 } 3850 /* 3851 * Check whether Trusted Solaris policy allows communication 3852 * with this host, and pretend that the destination is 3853 * unreachable if not. 3854 * Compute any needed label and place it in ipp_label_v4/v6. 3855 * 3856 * Later conn_build_hdr_template/conn_prepend_hdr takes 3857 * ipp_label_v4/v6 to form the packet. 3858 * 3859 * Tsol note: Since we hold conn_lock we know no other 3860 * thread manipulates conn_xmit_ipp. 3861 */ 3862 error = conn_update_label(connp, ixa, &v6dst, 3863 &connp->conn_xmit_ipp); 3864 if (error != 0) { 3865 mutex_exit(&connp->conn_lock); 3866 goto ud_error; 3867 } 3868 /* Rebuild the header template */ 3869 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 3870 flowinfo); 3871 if (error != 0) { 3872 mutex_exit(&connp->conn_lock); 3873 goto ud_error; 3874 } 3875 } else if ((connp->conn_xmit_ipp.ipp_fields & 3876 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 3877 ipversion != connp->conn_lastipversion || 3878 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 3879 /* Rebuild the header template */ 3880 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 3881 flowinfo); 3882 if (error != 0) { 3883 mutex_exit(&connp->conn_lock); 3884 goto ud_error; 3885 } 3886 } else { 3887 /* Simply update the destination address if no source route */ 3888 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3889 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 3890 3891 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 3892 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 3893 ipha->ipha_fragment_offset_and_flags |= 3894 IPH_DF_HTONS; 3895 } else { 3896 ipha->ipha_fragment_offset_and_flags &= 3897 ~IPH_DF_HTONS; 3898 } 3899 } else { 3900 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 3901 ip6h->ip6_dst = v6dst; 3902 } 3903 } 3904 3905 /* 3906 * Remember the dst/dstport etc which corresponds to the built header 3907 * template and conn_ixa. 3908 */ 3909 oldixa = conn_replace_ixa(connp, ixa); 3910 connp->conn_v6lastdst = v6dst; 3911 connp->conn_lastipversion = ipversion; 3912 connp->conn_lastdstport = dstport; 3913 connp->conn_lastflowinfo = flowinfo; 3914 connp->conn_lastscopeid = ixa->ixa_scopeid; 3915 connp->conn_lastsrcid = srcid; 3916 /* Also remember a source to use together with lastdst */ 3917 connp->conn_v6lastsrc = v6src; 3918 3919 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 3920 dstport, flowinfo, &error); 3921 3922 /* Done with conn_t */ 3923 mutex_exit(&connp->conn_lock); 3924 ixa_refrele(oldixa); 3925 3926 if (data_mp == NULL) { 3927 ASSERT(error != 0); 3928 goto ud_error; 3929 } 3930 3931 /* We're done. Pass the packet to ip. */ 3932 UDPS_BUMP_MIB(us, udpHCOutDatagrams); 3933 3934 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3935 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 3936 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 3937 3938 error = conn_ip_output(data_mp, ixa); 3939 /* No udpOutErrors if an error since IP increases its error counter */ 3940 switch (error) { 3941 case 0: 3942 break; 3943 case EWOULDBLOCK: 3944 (void) ixa_check_drain_insert(connp, ixa); 3945 error = 0; 3946 break; 3947 case EADDRNOTAVAIL: 3948 /* 3949 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3950 * Don't have the application see that errno 3951 */ 3952 error = ENETUNREACH; 3953 /* FALLTHRU */ 3954 default: 3955 mutex_enter(&connp->conn_lock); 3956 /* 3957 * Clear the source and v6lastdst so we call ip_attr_connect 3958 * for the next packet and try to pick a better source. 3959 */ 3960 if (connp->conn_mcbc_bind) 3961 connp->conn_saddr_v6 = ipv6_all_zeros; 3962 else 3963 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3964 connp->conn_v6lastdst = ipv6_all_zeros; 3965 mutex_exit(&connp->conn_lock); 3966 break; 3967 } 3968 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3969 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3970 ixa->ixa_cpid = connp->conn_cpid; 3971 ixa_refrele(ixa); 3972 return (error); 3973 3974 ud_error: 3975 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3976 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3977 ixa->ixa_cpid = connp->conn_cpid; 3978 ixa_refrele(ixa); 3979 3980 freemsg(data_mp); 3981 UDPS_BUMP_MIB(us, udpOutErrors); 3982 UDP_STAT(us, udp_out_err_output); 3983 return (error); 3984 } 3985 3986 /* ARGSUSED */ 3987 static void 3988 udp_wput_fallback(queue_t *wq, mblk_t *mp) 3989 { 3990 #ifdef DEBUG 3991 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 3992 #endif 3993 freemsg(mp); 3994 } 3995 3996 3997 /* 3998 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 3999 */ 4000 static void 4001 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4002 { 4003 void *data; 4004 mblk_t *datamp = mp->b_cont; 4005 conn_t *connp = Q_TO_CONN(q); 4006 udp_t *udp = connp->conn_udp; 4007 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4008 4009 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4010 cmdp->cb_error = EPROTO; 4011 qreply(q, mp); 4012 return; 4013 } 4014 data = datamp->b_rptr; 4015 4016 mutex_enter(&connp->conn_lock); 4017 switch (cmdp->cb_cmd) { 4018 case TI_GETPEERNAME: 4019 if (udp->udp_state != TS_DATA_XFER) 4020 cmdp->cb_error = ENOTCONN; 4021 else 4022 cmdp->cb_error = conn_getpeername(connp, data, 4023 &cmdp->cb_len); 4024 break; 4025 case TI_GETMYNAME: 4026 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4027 break; 4028 default: 4029 cmdp->cb_error = EINVAL; 4030 break; 4031 } 4032 mutex_exit(&connp->conn_lock); 4033 4034 qreply(q, mp); 4035 } 4036 4037 static void 4038 udp_use_pure_tpi(udp_t *udp) 4039 { 4040 conn_t *connp = udp->udp_connp; 4041 4042 mutex_enter(&connp->conn_lock); 4043 udp->udp_issocket = B_FALSE; 4044 mutex_exit(&connp->conn_lock); 4045 UDP_STAT(udp->udp_us, udp_sock_fallback); 4046 } 4047 4048 static void 4049 udp_wput_other(queue_t *q, mblk_t *mp) 4050 { 4051 uchar_t *rptr = mp->b_rptr; 4052 struct iocblk *iocp; 4053 conn_t *connp = Q_TO_CONN(q); 4054 udp_t *udp = connp->conn_udp; 4055 cred_t *cr; 4056 4057 switch (mp->b_datap->db_type) { 4058 case M_CMD: 4059 udp_wput_cmdblk(q, mp); 4060 return; 4061 4062 case M_PROTO: 4063 case M_PCPROTO: 4064 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4065 /* 4066 * If the message does not contain a PRIM_type, 4067 * throw it away. 4068 */ 4069 freemsg(mp); 4070 return; 4071 } 4072 switch (((t_primp_t)rptr)->type) { 4073 case T_ADDR_REQ: 4074 udp_addr_req(q, mp); 4075 return; 4076 case O_T_BIND_REQ: 4077 case T_BIND_REQ: 4078 udp_tpi_bind(q, mp); 4079 return; 4080 case T_CONN_REQ: 4081 udp_tpi_connect(q, mp); 4082 return; 4083 case T_CAPABILITY_REQ: 4084 udp_capability_req(q, mp); 4085 return; 4086 case T_INFO_REQ: 4087 udp_info_req(q, mp); 4088 return; 4089 case T_UNITDATA_REQ: 4090 /* 4091 * If a T_UNITDATA_REQ gets here, the address must 4092 * be bad. Valid T_UNITDATA_REQs are handled 4093 * in udp_wput. 4094 */ 4095 udp_ud_err(q, mp, EADDRNOTAVAIL); 4096 return; 4097 case T_UNBIND_REQ: 4098 udp_tpi_unbind(q, mp); 4099 return; 4100 case T_SVR4_OPTMGMT_REQ: 4101 /* 4102 * All Solaris components should pass a db_credp 4103 * for this TPI message, hence we ASSERT. 4104 * But in case there is some other M_PROTO that looks 4105 * like a TPI message sent by some other kernel 4106 * component, we check and return an error. 4107 */ 4108 cr = msg_getcred(mp, NULL); 4109 ASSERT(cr != NULL); 4110 if (cr == NULL) { 4111 udp_err_ack(q, mp, TSYSERR, EINVAL); 4112 return; 4113 } 4114 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4115 cr)) { 4116 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4117 } 4118 return; 4119 4120 case T_OPTMGMT_REQ: 4121 /* 4122 * All Solaris components should pass a db_credp 4123 * for this TPI message, hence we ASSERT. 4124 * But in case there is some other M_PROTO that looks 4125 * like a TPI message sent by some other kernel 4126 * component, we check and return an error. 4127 */ 4128 cr = msg_getcred(mp, NULL); 4129 ASSERT(cr != NULL); 4130 if (cr == NULL) { 4131 udp_err_ack(q, mp, TSYSERR, EINVAL); 4132 return; 4133 } 4134 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4135 return; 4136 4137 case T_DISCON_REQ: 4138 udp_tpi_disconnect(q, mp); 4139 return; 4140 4141 /* The following TPI message is not supported by udp. */ 4142 case O_T_CONN_RES: 4143 case T_CONN_RES: 4144 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4145 return; 4146 4147 /* The following 3 TPI requests are illegal for udp. */ 4148 case T_DATA_REQ: 4149 case T_EXDATA_REQ: 4150 case T_ORDREL_REQ: 4151 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4152 return; 4153 default: 4154 break; 4155 } 4156 break; 4157 case M_FLUSH: 4158 if (*rptr & FLUSHW) 4159 flushq(q, FLUSHDATA); 4160 break; 4161 case M_IOCTL: 4162 iocp = (struct iocblk *)mp->b_rptr; 4163 switch (iocp->ioc_cmd) { 4164 case TI_GETPEERNAME: 4165 if (udp->udp_state != TS_DATA_XFER) { 4166 /* 4167 * If a default destination address has not 4168 * been associated with the stream, then we 4169 * don't know the peer's name. 4170 */ 4171 iocp->ioc_error = ENOTCONN; 4172 iocp->ioc_count = 0; 4173 mp->b_datap->db_type = M_IOCACK; 4174 qreply(q, mp); 4175 return; 4176 } 4177 /* FALLTHRU */ 4178 case TI_GETMYNAME: 4179 /* 4180 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4181 * need to copyin the user's strbuf structure. 4182 * Processing will continue in the M_IOCDATA case 4183 * below. 4184 */ 4185 mi_copyin(q, mp, NULL, 4186 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4187 return; 4188 case _SIOCSOCKFALLBACK: 4189 /* 4190 * Either sockmod is about to be popped and the 4191 * socket would now be treated as a plain stream, 4192 * or a module is about to be pushed so we have 4193 * to follow pure TPI semantics. 4194 */ 4195 if (!udp->udp_issocket) { 4196 DB_TYPE(mp) = M_IOCNAK; 4197 iocp->ioc_error = EINVAL; 4198 } else { 4199 udp_use_pure_tpi(udp); 4200 4201 DB_TYPE(mp) = M_IOCACK; 4202 iocp->ioc_error = 0; 4203 } 4204 iocp->ioc_count = 0; 4205 iocp->ioc_rval = 0; 4206 qreply(q, mp); 4207 return; 4208 default: 4209 break; 4210 } 4211 break; 4212 case M_IOCDATA: 4213 udp_wput_iocdata(q, mp); 4214 return; 4215 default: 4216 /* Unrecognized messages are passed through without change. */ 4217 break; 4218 } 4219 ip_wput_nondata(q, mp); 4220 } 4221 4222 /* 4223 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4224 * messages. 4225 */ 4226 static void 4227 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4228 { 4229 mblk_t *mp1; 4230 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4231 STRUCT_HANDLE(strbuf, sb); 4232 uint_t addrlen; 4233 conn_t *connp = Q_TO_CONN(q); 4234 udp_t *udp = connp->conn_udp; 4235 4236 /* Make sure it is one of ours. */ 4237 switch (iocp->ioc_cmd) { 4238 case TI_GETMYNAME: 4239 case TI_GETPEERNAME: 4240 break; 4241 default: 4242 ip_wput_nondata(q, mp); 4243 return; 4244 } 4245 4246 switch (mi_copy_state(q, mp, &mp1)) { 4247 case -1: 4248 return; 4249 case MI_COPY_CASE(MI_COPY_IN, 1): 4250 break; 4251 case MI_COPY_CASE(MI_COPY_OUT, 1): 4252 /* 4253 * The address has been copied out, so now 4254 * copyout the strbuf. 4255 */ 4256 mi_copyout(q, mp); 4257 return; 4258 case MI_COPY_CASE(MI_COPY_OUT, 2): 4259 /* 4260 * The address and strbuf have been copied out. 4261 * We're done, so just acknowledge the original 4262 * M_IOCTL. 4263 */ 4264 mi_copy_done(q, mp, 0); 4265 return; 4266 default: 4267 /* 4268 * Something strange has happened, so acknowledge 4269 * the original M_IOCTL with an EPROTO error. 4270 */ 4271 mi_copy_done(q, mp, EPROTO); 4272 return; 4273 } 4274 4275 /* 4276 * Now we have the strbuf structure for TI_GETMYNAME 4277 * and TI_GETPEERNAME. Next we copyout the requested 4278 * address and then we'll copyout the strbuf. 4279 */ 4280 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4281 4282 if (connp->conn_family == AF_INET) 4283 addrlen = sizeof (sin_t); 4284 else 4285 addrlen = sizeof (sin6_t); 4286 4287 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4288 mi_copy_done(q, mp, EINVAL); 4289 return; 4290 } 4291 4292 switch (iocp->ioc_cmd) { 4293 case TI_GETMYNAME: 4294 break; 4295 case TI_GETPEERNAME: 4296 if (udp->udp_state != TS_DATA_XFER) { 4297 mi_copy_done(q, mp, ENOTCONN); 4298 return; 4299 } 4300 break; 4301 } 4302 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4303 if (!mp1) 4304 return; 4305 4306 STRUCT_FSET(sb, len, addrlen); 4307 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4308 case TI_GETMYNAME: 4309 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4310 &addrlen); 4311 break; 4312 case TI_GETPEERNAME: 4313 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4314 &addrlen); 4315 break; 4316 } 4317 mp1->b_wptr += addrlen; 4318 /* Copy out the address */ 4319 mi_copyout(q, mp); 4320 } 4321 4322 void 4323 udp_ddi_g_init(void) 4324 { 4325 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4326 udp_opt_obj.odb_opt_arr_cnt); 4327 4328 /* 4329 * We want to be informed each time a stack is created or 4330 * destroyed in the kernel, so we can maintain the 4331 * set of udp_stack_t's. 4332 */ 4333 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4334 } 4335 4336 void 4337 udp_ddi_g_destroy(void) 4338 { 4339 netstack_unregister(NS_UDP); 4340 } 4341 4342 #define INET_NAME "ip" 4343 4344 /* 4345 * Initialize the UDP stack instance. 4346 */ 4347 static void * 4348 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4349 { 4350 udp_stack_t *us; 4351 int i; 4352 int error = 0; 4353 major_t major; 4354 size_t arrsz; 4355 4356 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4357 us->us_netstack = ns; 4358 4359 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4360 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4361 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4362 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4363 4364 /* 4365 * The smallest anonymous port in the priviledged port range which UDP 4366 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4367 */ 4368 us->us_min_anonpriv_port = 512; 4369 4370 us->us_bind_fanout_size = udp_bind_fanout_size; 4371 4372 /* Roundup variable that might have been modified in /etc/system */ 4373 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4374 /* Not a power of two. Round up to nearest power of two */ 4375 for (i = 0; i < 31; i++) { 4376 if (us->us_bind_fanout_size < (1 << i)) 4377 break; 4378 } 4379 us->us_bind_fanout_size = 1 << i; 4380 } 4381 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4382 sizeof (udp_fanout_t), KM_SLEEP); 4383 for (i = 0; i < us->us_bind_fanout_size; i++) { 4384 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4385 NULL); 4386 } 4387 4388 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4389 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4390 KM_SLEEP); 4391 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4392 4393 /* Allocate the per netstack stats */ 4394 mutex_enter(&cpu_lock); 4395 us->us_sc_cnt = MAX(ncpus, boot_ncpus); 4396 mutex_exit(&cpu_lock); 4397 us->us_sc = kmem_zalloc(max_ncpus * sizeof (udp_stats_cpu_t *), 4398 KM_SLEEP); 4399 for (i = 0; i < us->us_sc_cnt; i++) { 4400 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4401 KM_SLEEP); 4402 } 4403 4404 us->us_kstat = udp_kstat2_init(stackid); 4405 us->us_mibkp = udp_kstat_init(stackid); 4406 4407 major = mod_name_to_major(INET_NAME); 4408 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4409 ASSERT(error == 0); 4410 return (us); 4411 } 4412 4413 /* 4414 * Free the UDP stack instance. 4415 */ 4416 static void 4417 udp_stack_fini(netstackid_t stackid, void *arg) 4418 { 4419 udp_stack_t *us = (udp_stack_t *)arg; 4420 int i; 4421 4422 for (i = 0; i < us->us_bind_fanout_size; i++) { 4423 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4424 } 4425 4426 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4427 sizeof (udp_fanout_t)); 4428 4429 us->us_bind_fanout = NULL; 4430 4431 for (i = 0; i < us->us_sc_cnt; i++) 4432 kmem_free(us->us_sc[i], sizeof (udp_stats_cpu_t)); 4433 kmem_free(us->us_sc, max_ncpus * sizeof (udp_stats_cpu_t *)); 4434 4435 kmem_free(us->us_propinfo_tbl, 4436 udp_propinfo_count * sizeof (mod_prop_info_t)); 4437 us->us_propinfo_tbl = NULL; 4438 4439 udp_kstat_fini(stackid, us->us_mibkp); 4440 us->us_mibkp = NULL; 4441 4442 udp_kstat2_fini(stackid, us->us_kstat); 4443 us->us_kstat = NULL; 4444 4445 mutex_destroy(&us->us_epriv_port_lock); 4446 ldi_ident_release(us->us_ldi_ident); 4447 kmem_free(us, sizeof (*us)); 4448 } 4449 4450 static size_t 4451 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4452 { 4453 udp_stack_t *us = udp->udp_us; 4454 4455 /* We add a bit of extra buffering */ 4456 size += size >> 1; 4457 if (size > us->us_max_buf) 4458 size = us->us_max_buf; 4459 4460 udp->udp_rcv_hiwat = size; 4461 return (size); 4462 } 4463 4464 /* 4465 * For the lower queue so that UDP can be a dummy mux. 4466 * Nobody should be sending 4467 * packets up this stream 4468 */ 4469 static void 4470 udp_lrput(queue_t *q, mblk_t *mp) 4471 { 4472 switch (mp->b_datap->db_type) { 4473 case M_FLUSH: 4474 /* Turn around */ 4475 if (*mp->b_rptr & FLUSHW) { 4476 *mp->b_rptr &= ~FLUSHR; 4477 qreply(q, mp); 4478 return; 4479 } 4480 break; 4481 } 4482 freemsg(mp); 4483 } 4484 4485 /* 4486 * For the lower queue so that UDP can be a dummy mux. 4487 * Nobody should be sending packets down this stream. 4488 */ 4489 /* ARGSUSED */ 4490 void 4491 udp_lwput(queue_t *q, mblk_t *mp) 4492 { 4493 freemsg(mp); 4494 } 4495 4496 /* 4497 * When a CPU is added, we need to allocate the per CPU stats struct. 4498 */ 4499 void 4500 udp_stack_cpu_add(udp_stack_t *us, processorid_t cpu_seqid) 4501 { 4502 int i; 4503 4504 if (cpu_seqid < us->us_sc_cnt) 4505 return; 4506 for (i = us->us_sc_cnt; i <= cpu_seqid; i++) { 4507 ASSERT(us->us_sc[i] == NULL); 4508 us->us_sc[i] = kmem_zalloc(sizeof (udp_stats_cpu_t), 4509 KM_SLEEP); 4510 } 4511 membar_producer(); 4512 us->us_sc_cnt = cpu_seqid + 1; 4513 } 4514 4515 /* 4516 * Below routines for UDP socket module. 4517 */ 4518 4519 static conn_t * 4520 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4521 { 4522 udp_t *udp; 4523 conn_t *connp; 4524 zoneid_t zoneid; 4525 netstack_t *ns; 4526 udp_stack_t *us; 4527 int len; 4528 4529 ASSERT(errorp != NULL); 4530 4531 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4532 return (NULL); 4533 4534 ns = netstack_find_by_cred(credp); 4535 ASSERT(ns != NULL); 4536 us = ns->netstack_udp; 4537 ASSERT(us != NULL); 4538 4539 /* 4540 * For exclusive stacks we set the zoneid to zero 4541 * to make UDP operate as if in the global zone. 4542 */ 4543 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4544 zoneid = GLOBAL_ZONEID; 4545 else 4546 zoneid = crgetzoneid(credp); 4547 4548 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4549 4550 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4551 if (connp == NULL) { 4552 netstack_rele(ns); 4553 *errorp = ENOMEM; 4554 return (NULL); 4555 } 4556 udp = connp->conn_udp; 4557 4558 /* 4559 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4560 * done by netstack_find_by_cred() 4561 */ 4562 netstack_rele(ns); 4563 4564 /* 4565 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4566 * need to lock anything. 4567 */ 4568 ASSERT(connp->conn_proto == IPPROTO_UDP); 4569 ASSERT(connp->conn_udp == udp); 4570 ASSERT(udp->udp_connp == connp); 4571 4572 /* Set the initial state of the stream and the privilege status. */ 4573 udp->udp_state = TS_UNBND; 4574 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4575 if (isv6) { 4576 connp->conn_family = AF_INET6; 4577 connp->conn_ipversion = IPV6_VERSION; 4578 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4579 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4580 len = sizeof (ip6_t) + UDPH_SIZE; 4581 } else { 4582 connp->conn_family = AF_INET; 4583 connp->conn_ipversion = IPV4_VERSION; 4584 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4585 connp->conn_default_ttl = us->us_ipv4_ttl; 4586 len = sizeof (ipha_t) + UDPH_SIZE; 4587 } 4588 4589 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4590 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4591 4592 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4593 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4594 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4595 connp->conn_ixa->ixa_zoneid = zoneid; 4596 4597 connp->conn_zoneid = zoneid; 4598 4599 /* 4600 * If the caller has the process-wide flag set, then default to MAC 4601 * exempt mode. This allows read-down to unlabeled hosts. 4602 */ 4603 if (getpflags(NET_MAC_AWARE, credp) != 0) 4604 connp->conn_mac_mode = CONN_MAC_AWARE; 4605 4606 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4607 4608 udp->udp_us = us; 4609 4610 connp->conn_rcvbuf = us->us_recv_hiwat; 4611 connp->conn_sndbuf = us->us_xmit_hiwat; 4612 connp->conn_sndlowat = us->us_xmit_lowat; 4613 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4614 4615 connp->conn_wroff = len + us->us_wroff_extra; 4616 connp->conn_so_type = SOCK_DGRAM; 4617 4618 connp->conn_recv = udp_input; 4619 connp->conn_recvicmp = udp_icmp_input; 4620 crhold(credp); 4621 connp->conn_cred = credp; 4622 connp->conn_cpid = curproc->p_pid; 4623 connp->conn_open_time = ddi_get_lbolt64(); 4624 /* Cache things in ixa without an extra refhold */ 4625 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 4626 connp->conn_ixa->ixa_cred = connp->conn_cred; 4627 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 4628 if (is_system_labeled()) 4629 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 4630 4631 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 4632 4633 if (us->us_pmtu_discovery) 4634 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 4635 4636 return (connp); 4637 } 4638 4639 sock_lower_handle_t 4640 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 4641 uint_t *smodep, int *errorp, int flags, cred_t *credp) 4642 { 4643 udp_t *udp = NULL; 4644 udp_stack_t *us; 4645 conn_t *connp; 4646 boolean_t isv6; 4647 4648 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 4649 (proto != 0 && proto != IPPROTO_UDP)) { 4650 *errorp = EPROTONOSUPPORT; 4651 return (NULL); 4652 } 4653 4654 if (family == AF_INET6) 4655 isv6 = B_TRUE; 4656 else 4657 isv6 = B_FALSE; 4658 4659 connp = udp_do_open(credp, isv6, flags, errorp); 4660 if (connp == NULL) 4661 return (NULL); 4662 4663 udp = connp->conn_udp; 4664 ASSERT(udp != NULL); 4665 us = udp->udp_us; 4666 ASSERT(us != NULL); 4667 4668 udp->udp_issocket = B_TRUE; 4669 connp->conn_flags |= IPCL_NONSTR; 4670 4671 /* 4672 * Set flow control 4673 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4674 * need to lock anything. 4675 */ 4676 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 4677 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 4678 4679 connp->conn_flow_cntrld = B_FALSE; 4680 4681 mutex_enter(&connp->conn_lock); 4682 connp->conn_state_flags &= ~CONN_INCIPIENT; 4683 mutex_exit(&connp->conn_lock); 4684 4685 *errorp = 0; 4686 *smodep = SM_ATOMIC; 4687 *sock_downcalls = &sock_udp_downcalls; 4688 return ((sock_lower_handle_t)connp); 4689 } 4690 4691 /* ARGSUSED3 */ 4692 void 4693 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 4694 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 4695 { 4696 conn_t *connp = (conn_t *)proto_handle; 4697 struct sock_proto_props sopp; 4698 4699 /* All Solaris components should pass a cred for this operation. */ 4700 ASSERT(cr != NULL); 4701 4702 connp->conn_upcalls = sock_upcalls; 4703 connp->conn_upper_handle = sock_handle; 4704 4705 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 4706 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 4707 sopp.sopp_wroff = connp->conn_wroff; 4708 sopp.sopp_maxblk = INFPSZ; 4709 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 4710 sopp.sopp_rxlowat = connp->conn_rcvlowat; 4711 sopp.sopp_maxaddrlen = sizeof (sin6_t); 4712 sopp.sopp_maxpsz = 4713 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 4714 UDP_MAXPACKET_IPV6; 4715 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 4716 udp_mod_info.mi_minpsz; 4717 4718 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 4719 &sopp); 4720 } 4721 4722 static void 4723 udp_do_close(conn_t *connp) 4724 { 4725 udp_t *udp; 4726 4727 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 4728 udp = connp->conn_udp; 4729 4730 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 4731 /* 4732 * Running in cluster mode - register unbind information 4733 */ 4734 if (connp->conn_ipversion == IPV4_VERSION) { 4735 (*cl_inet_unbind)( 4736 connp->conn_netstack->netstack_stackid, 4737 IPPROTO_UDP, AF_INET, 4738 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 4739 (in_port_t)connp->conn_lport, NULL); 4740 } else { 4741 (*cl_inet_unbind)( 4742 connp->conn_netstack->netstack_stackid, 4743 IPPROTO_UDP, AF_INET6, 4744 (uint8_t *)&(connp->conn_laddr_v6), 4745 (in_port_t)connp->conn_lport, NULL); 4746 } 4747 } 4748 4749 udp_bind_hash_remove(udp, B_FALSE); 4750 4751 ip_quiesce_conn(connp); 4752 4753 if (!IPCL_IS_NONSTR(connp)) { 4754 ASSERT(connp->conn_wq != NULL); 4755 ASSERT(connp->conn_rq != NULL); 4756 qprocsoff(connp->conn_rq); 4757 } 4758 4759 udp_close_free(connp); 4760 4761 /* 4762 * Now we are truly single threaded on this stream, and can 4763 * delete the things hanging off the connp, and finally the connp. 4764 * We removed this connp from the fanout list, it cannot be 4765 * accessed thru the fanouts, and we already waited for the 4766 * conn_ref to drop to 0. We are already in close, so 4767 * there cannot be any other thread from the top. qprocsoff 4768 * has completed, and service has completed or won't run in 4769 * future. 4770 */ 4771 ASSERT(connp->conn_ref == 1); 4772 4773 if (!IPCL_IS_NONSTR(connp)) { 4774 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 4775 } else { 4776 ip_free_helper_stream(connp); 4777 } 4778 4779 connp->conn_ref--; 4780 ipcl_conn_destroy(connp); 4781 } 4782 4783 /* ARGSUSED1 */ 4784 int 4785 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 4786 { 4787 conn_t *connp = (conn_t *)proto_handle; 4788 4789 /* All Solaris components should pass a cred for this operation. */ 4790 ASSERT(cr != NULL); 4791 4792 udp_do_close(connp); 4793 return (0); 4794 } 4795 4796 static int 4797 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 4798 boolean_t bind_to_req_port_only) 4799 { 4800 sin_t *sin; 4801 sin6_t *sin6; 4802 udp_t *udp = connp->conn_udp; 4803 int error = 0; 4804 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 4805 in_port_t port; /* Host byte order */ 4806 in_port_t requested_port; /* Host byte order */ 4807 int count; 4808 ipaddr_t v4src; /* Set if AF_INET */ 4809 in6_addr_t v6src; 4810 int loopmax; 4811 udp_fanout_t *udpf; 4812 in_port_t lport; /* Network byte order */ 4813 uint_t scopeid = 0; 4814 zoneid_t zoneid = IPCL_ZONEID(connp); 4815 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4816 boolean_t is_inaddr_any; 4817 mlp_type_t addrtype, mlptype; 4818 udp_stack_t *us = udp->udp_us; 4819 4820 switch (len) { 4821 case sizeof (sin_t): /* Complete IPv4 address */ 4822 sin = (sin_t *)sa; 4823 4824 if (sin == NULL || !OK_32PTR((char *)sin)) 4825 return (EINVAL); 4826 4827 if (connp->conn_family != AF_INET || 4828 sin->sin_family != AF_INET) { 4829 return (EAFNOSUPPORT); 4830 } 4831 v4src = sin->sin_addr.s_addr; 4832 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 4833 if (v4src != INADDR_ANY) { 4834 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 4835 B_TRUE); 4836 } 4837 port = ntohs(sin->sin_port); 4838 break; 4839 4840 case sizeof (sin6_t): /* complete IPv6 address */ 4841 sin6 = (sin6_t *)sa; 4842 4843 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 4844 return (EINVAL); 4845 4846 if (connp->conn_family != AF_INET6 || 4847 sin6->sin6_family != AF_INET6) { 4848 return (EAFNOSUPPORT); 4849 } 4850 v6src = sin6->sin6_addr; 4851 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 4852 if (connp->conn_ipv6_v6only) 4853 return (EADDRNOTAVAIL); 4854 4855 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 4856 if (v4src != INADDR_ANY) { 4857 laddr_type = ip_laddr_verify_v4(v4src, 4858 zoneid, ipst, B_FALSE); 4859 } 4860 } else { 4861 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4862 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 4863 scopeid = sin6->sin6_scope_id; 4864 laddr_type = ip_laddr_verify_v6(&v6src, 4865 zoneid, ipst, B_TRUE, scopeid); 4866 } 4867 } 4868 port = ntohs(sin6->sin6_port); 4869 break; 4870 4871 default: /* Invalid request */ 4872 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 4873 "udp_bind: bad ADDR_length length %u", len); 4874 return (-TBADADDR); 4875 } 4876 4877 /* Is the local address a valid unicast, multicast, or broadcast? */ 4878 if (laddr_type == IPVL_BAD) 4879 return (EADDRNOTAVAIL); 4880 4881 requested_port = port; 4882 4883 if (requested_port == 0 || !bind_to_req_port_only) 4884 bind_to_req_port_only = B_FALSE; 4885 else /* T_BIND_REQ and requested_port != 0 */ 4886 bind_to_req_port_only = B_TRUE; 4887 4888 if (requested_port == 0) { 4889 /* 4890 * If the application passed in zero for the port number, it 4891 * doesn't care which port number we bind to. Get one in the 4892 * valid range. 4893 */ 4894 if (connp->conn_anon_priv_bind) { 4895 port = udp_get_next_priv_port(udp); 4896 } else { 4897 port = udp_update_next_port(udp, 4898 us->us_next_port_to_try, B_TRUE); 4899 } 4900 } else { 4901 /* 4902 * If the port is in the well-known privileged range, 4903 * make sure the caller was privileged. 4904 */ 4905 int i; 4906 boolean_t priv = B_FALSE; 4907 4908 if (port < us->us_smallest_nonpriv_port) { 4909 priv = B_TRUE; 4910 } else { 4911 for (i = 0; i < us->us_num_epriv_ports; i++) { 4912 if (port == us->us_epriv_ports[i]) { 4913 priv = B_TRUE; 4914 break; 4915 } 4916 } 4917 } 4918 4919 if (priv) { 4920 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 4921 return (-TACCES); 4922 } 4923 } 4924 4925 if (port == 0) 4926 return (-TNOADDR); 4927 4928 /* 4929 * The state must be TS_UNBND. TPI mandates that users must send 4930 * TPI primitives only 1 at a time and wait for the response before 4931 * sending the next primitive. 4932 */ 4933 mutex_enter(&connp->conn_lock); 4934 if (udp->udp_state != TS_UNBND) { 4935 mutex_exit(&connp->conn_lock); 4936 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 4937 "udp_bind: bad state, %u", udp->udp_state); 4938 return (-TOUTSTATE); 4939 } 4940 /* 4941 * Copy the source address into our udp structure. This address 4942 * may still be zero; if so, IP will fill in the correct address 4943 * each time an outbound packet is passed to it. Since the udp is 4944 * not yet in the bind hash list, we don't grab the uf_lock to 4945 * change conn_ipversion 4946 */ 4947 if (connp->conn_family == AF_INET) { 4948 ASSERT(sin != NULL); 4949 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 4950 } else { 4951 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 4952 /* 4953 * no need to hold the uf_lock to set the conn_ipversion 4954 * since we are not yet in the fanout list 4955 */ 4956 connp->conn_ipversion = IPV4_VERSION; 4957 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4958 } else { 4959 connp->conn_ipversion = IPV6_VERSION; 4960 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4961 } 4962 } 4963 4964 /* 4965 * If conn_reuseaddr is not set, then we have to make sure that 4966 * the IP address and port number the application requested 4967 * (or we selected for the application) is not being used by 4968 * another stream. If another stream is already using the 4969 * requested IP address and port, the behavior depends on 4970 * "bind_to_req_port_only". If set the bind fails; otherwise we 4971 * search for any an unused port to bind to the stream. 4972 * 4973 * As per the BSD semantics, as modified by the Deering multicast 4974 * changes, if udp_reuseaddr is set, then we allow multiple binds 4975 * to the same port independent of the local IP address. 4976 * 4977 * This is slightly different than in SunOS 4.X which did not 4978 * support IP multicast. Note that the change implemented by the 4979 * Deering multicast code effects all binds - not only binding 4980 * to IP multicast addresses. 4981 * 4982 * Note that when binding to port zero we ignore SO_REUSEADDR in 4983 * order to guarantee a unique port. 4984 */ 4985 4986 count = 0; 4987 if (connp->conn_anon_priv_bind) { 4988 /* 4989 * loopmax = (IPPORT_RESERVED-1) - 4990 * us->us_min_anonpriv_port + 1 4991 */ 4992 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 4993 } else { 4994 loopmax = us->us_largest_anon_port - 4995 us->us_smallest_anon_port + 1; 4996 } 4997 4998 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 4999 5000 for (;;) { 5001 udp_t *udp1; 5002 boolean_t found_exclbind = B_FALSE; 5003 conn_t *connp1; 5004 5005 /* 5006 * Walk through the list of udp streams bound to 5007 * requested port with the same IP address. 5008 */ 5009 lport = htons(port); 5010 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5011 us->us_bind_fanout_size)]; 5012 mutex_enter(&udpf->uf_lock); 5013 for (udp1 = udpf->uf_udp; udp1 != NULL; 5014 udp1 = udp1->udp_bind_hash) { 5015 connp1 = udp1->udp_connp; 5016 5017 if (lport != connp1->conn_lport) 5018 continue; 5019 5020 /* 5021 * On a labeled system, we must treat bindings to ports 5022 * on shared IP addresses by sockets with MAC exemption 5023 * privilege as being in all zones, as there's 5024 * otherwise no way to identify the right receiver. 5025 */ 5026 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5027 continue; 5028 5029 /* 5030 * If UDP_EXCLBIND is set for either the bound or 5031 * binding endpoint, the semantics of bind 5032 * is changed according to the following chart. 5033 * 5034 * spec = specified address (v4 or v6) 5035 * unspec = unspecified address (v4 or v6) 5036 * A = specified addresses are different for endpoints 5037 * 5038 * bound bind to allowed? 5039 * ------------------------------------- 5040 * unspec unspec no 5041 * unspec spec no 5042 * spec unspec no 5043 * spec spec yes if A 5044 * 5045 * For labeled systems, SO_MAC_EXEMPT behaves the same 5046 * as UDP_EXCLBIND, except that zoneid is ignored. 5047 */ 5048 if (connp1->conn_exclbind || connp->conn_exclbind || 5049 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5050 if (V6_OR_V4_INADDR_ANY( 5051 connp1->conn_bound_addr_v6) || 5052 is_inaddr_any || 5053 IN6_ARE_ADDR_EQUAL( 5054 &connp1->conn_bound_addr_v6, 5055 &v6src)) { 5056 found_exclbind = B_TRUE; 5057 break; 5058 } 5059 continue; 5060 } 5061 5062 /* 5063 * Check ipversion to allow IPv4 and IPv6 sockets to 5064 * have disjoint port number spaces. 5065 */ 5066 if (connp->conn_ipversion != connp1->conn_ipversion) { 5067 5068 /* 5069 * On the first time through the loop, if the 5070 * the user intentionally specified a 5071 * particular port number, then ignore any 5072 * bindings of the other protocol that may 5073 * conflict. This allows the user to bind IPv6 5074 * alone and get both v4 and v6, or bind both 5075 * both and get each seperately. On subsequent 5076 * times through the loop, we're checking a 5077 * port that we chose (not the user) and thus 5078 * we do not allow casual duplicate bindings. 5079 */ 5080 if (count == 0 && requested_port != 0) 5081 continue; 5082 } 5083 5084 /* 5085 * No difference depending on SO_REUSEADDR. 5086 * 5087 * If existing port is bound to a 5088 * non-wildcard IP address and 5089 * the requesting stream is bound to 5090 * a distinct different IP addresses 5091 * (non-wildcard, also), keep going. 5092 */ 5093 if (!is_inaddr_any && 5094 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5095 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5096 &v6src)) { 5097 continue; 5098 } 5099 break; 5100 } 5101 5102 if (!found_exclbind && 5103 (connp->conn_reuseaddr && requested_port != 0)) { 5104 break; 5105 } 5106 5107 if (udp1 == NULL) { 5108 /* 5109 * No other stream has this IP address 5110 * and port number. We can use it. 5111 */ 5112 break; 5113 } 5114 mutex_exit(&udpf->uf_lock); 5115 if (bind_to_req_port_only) { 5116 /* 5117 * We get here only when requested port 5118 * is bound (and only first of the for() 5119 * loop iteration). 5120 * 5121 * The semantics of this bind request 5122 * require it to fail so we return from 5123 * the routine (and exit the loop). 5124 * 5125 */ 5126 mutex_exit(&connp->conn_lock); 5127 return (-TADDRBUSY); 5128 } 5129 5130 if (connp->conn_anon_priv_bind) { 5131 port = udp_get_next_priv_port(udp); 5132 } else { 5133 if ((count == 0) && (requested_port != 0)) { 5134 /* 5135 * If the application wants us to find 5136 * a port, get one to start with. Set 5137 * requested_port to 0, so that we will 5138 * update us->us_next_port_to_try below. 5139 */ 5140 port = udp_update_next_port(udp, 5141 us->us_next_port_to_try, B_TRUE); 5142 requested_port = 0; 5143 } else { 5144 port = udp_update_next_port(udp, port + 1, 5145 B_FALSE); 5146 } 5147 } 5148 5149 if (port == 0 || ++count >= loopmax) { 5150 /* 5151 * We've tried every possible port number and 5152 * there are none available, so send an error 5153 * to the user. 5154 */ 5155 mutex_exit(&connp->conn_lock); 5156 return (-TNOADDR); 5157 } 5158 } 5159 5160 /* 5161 * Copy the source address into our udp structure. This address 5162 * may still be zero; if so, ip_attr_connect will fill in the correct 5163 * address when a packet is about to be sent. 5164 * If we are binding to a broadcast or multicast address then 5165 * we just set the conn_bound_addr since we don't want to use 5166 * that as the source address when sending. 5167 */ 5168 connp->conn_bound_addr_v6 = v6src; 5169 connp->conn_laddr_v6 = v6src; 5170 if (scopeid != 0) { 5171 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5172 connp->conn_ixa->ixa_scopeid = scopeid; 5173 connp->conn_incoming_ifindex = scopeid; 5174 } else { 5175 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5176 connp->conn_incoming_ifindex = connp->conn_bound_if; 5177 } 5178 5179 switch (laddr_type) { 5180 case IPVL_UNICAST_UP: 5181 case IPVL_UNICAST_DOWN: 5182 connp->conn_saddr_v6 = v6src; 5183 connp->conn_mcbc_bind = B_FALSE; 5184 break; 5185 case IPVL_MCAST: 5186 case IPVL_BCAST: 5187 /* ip_set_destination will pick a source address later */ 5188 connp->conn_saddr_v6 = ipv6_all_zeros; 5189 connp->conn_mcbc_bind = B_TRUE; 5190 break; 5191 } 5192 5193 /* Any errors after this point should use late_error */ 5194 connp->conn_lport = lport; 5195 5196 /* 5197 * Now reset the next anonymous port if the application requested 5198 * an anonymous port, or we handed out the next anonymous port. 5199 */ 5200 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5201 us->us_next_port_to_try = port + 1; 5202 } 5203 5204 /* Initialize the T_BIND_ACK. */ 5205 if (connp->conn_family == AF_INET) { 5206 sin->sin_port = connp->conn_lport; 5207 } else { 5208 sin6->sin6_port = connp->conn_lport; 5209 } 5210 udp->udp_state = TS_IDLE; 5211 udp_bind_hash_insert(udpf, udp); 5212 mutex_exit(&udpf->uf_lock); 5213 mutex_exit(&connp->conn_lock); 5214 5215 if (cl_inet_bind) { 5216 /* 5217 * Running in cluster mode - register bind information 5218 */ 5219 if (connp->conn_ipversion == IPV4_VERSION) { 5220 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5221 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5222 (in_port_t)connp->conn_lport, NULL); 5223 } else { 5224 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5225 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5226 (in_port_t)connp->conn_lport, NULL); 5227 } 5228 } 5229 5230 mutex_enter(&connp->conn_lock); 5231 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5232 if (is_system_labeled() && (!connp->conn_anon_port || 5233 connp->conn_anon_mlp)) { 5234 uint16_t mlpport; 5235 zone_t *zone; 5236 5237 zone = crgetzone(cr); 5238 connp->conn_mlp_type = 5239 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5240 mlptSingle; 5241 addrtype = tsol_mlp_addr_type( 5242 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5243 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5244 if (addrtype == mlptSingle) { 5245 error = -TNOADDR; 5246 mutex_exit(&connp->conn_lock); 5247 goto late_error; 5248 } 5249 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5250 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5251 addrtype); 5252 5253 /* 5254 * It is a coding error to attempt to bind an MLP port 5255 * without first setting SOL_SOCKET/SCM_UCRED. 5256 */ 5257 if (mlptype != mlptSingle && 5258 connp->conn_mlp_type == mlptSingle) { 5259 error = EINVAL; 5260 mutex_exit(&connp->conn_lock); 5261 goto late_error; 5262 } 5263 5264 /* 5265 * It is an access violation to attempt to bind an MLP port 5266 * without NET_BINDMLP privilege. 5267 */ 5268 if (mlptype != mlptSingle && 5269 secpolicy_net_bindmlp(cr) != 0) { 5270 if (connp->conn_debug) { 5271 (void) strlog(UDP_MOD_ID, 0, 1, 5272 SL_ERROR|SL_TRACE, 5273 "udp_bind: no priv for multilevel port %d", 5274 mlpport); 5275 } 5276 error = -TACCES; 5277 mutex_exit(&connp->conn_lock); 5278 goto late_error; 5279 } 5280 5281 /* 5282 * If we're specifically binding a shared IP address and the 5283 * port is MLP on shared addresses, then check to see if this 5284 * zone actually owns the MLP. Reject if not. 5285 */ 5286 if (mlptype == mlptShared && addrtype == mlptShared) { 5287 /* 5288 * No need to handle exclusive-stack zones since 5289 * ALL_ZONES only applies to the shared stack. 5290 */ 5291 zoneid_t mlpzone; 5292 5293 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5294 htons(mlpport)); 5295 if (connp->conn_zoneid != mlpzone) { 5296 if (connp->conn_debug) { 5297 (void) strlog(UDP_MOD_ID, 0, 1, 5298 SL_ERROR|SL_TRACE, 5299 "udp_bind: attempt to bind port " 5300 "%d on shared addr in zone %d " 5301 "(should be %d)", 5302 mlpport, connp->conn_zoneid, 5303 mlpzone); 5304 } 5305 error = -TACCES; 5306 mutex_exit(&connp->conn_lock); 5307 goto late_error; 5308 } 5309 } 5310 if (connp->conn_anon_port) { 5311 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5312 port, B_TRUE); 5313 if (error != 0) { 5314 if (connp->conn_debug) { 5315 (void) strlog(UDP_MOD_ID, 0, 1, 5316 SL_ERROR|SL_TRACE, 5317 "udp_bind: cannot establish anon " 5318 "MLP for port %d", port); 5319 } 5320 error = -TACCES; 5321 mutex_exit(&connp->conn_lock); 5322 goto late_error; 5323 } 5324 } 5325 connp->conn_mlp_type = mlptype; 5326 } 5327 5328 /* 5329 * We create an initial header template here to make a subsequent 5330 * sendto have a starting point. Since conn_last_dst is zero the 5331 * first sendto will always follow the 'dst changed' code path. 5332 * Note that we defer massaging options and the related checksum 5333 * adjustment until we have a destination address. 5334 */ 5335 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5336 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5337 if (error != 0) { 5338 mutex_exit(&connp->conn_lock); 5339 goto late_error; 5340 } 5341 /* Just in case */ 5342 connp->conn_faddr_v6 = ipv6_all_zeros; 5343 connp->conn_fport = 0; 5344 connp->conn_v6lastdst = ipv6_all_zeros; 5345 mutex_exit(&connp->conn_lock); 5346 5347 error = ip_laddr_fanout_insert(connp); 5348 if (error != 0) 5349 goto late_error; 5350 5351 /* Bind succeeded */ 5352 return (0); 5353 5354 late_error: 5355 /* We had already picked the port number, and then the bind failed */ 5356 mutex_enter(&connp->conn_lock); 5357 udpf = &us->us_bind_fanout[ 5358 UDP_BIND_HASH(connp->conn_lport, 5359 us->us_bind_fanout_size)]; 5360 mutex_enter(&udpf->uf_lock); 5361 connp->conn_saddr_v6 = ipv6_all_zeros; 5362 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5363 connp->conn_laddr_v6 = ipv6_all_zeros; 5364 if (scopeid != 0) { 5365 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5366 connp->conn_incoming_ifindex = connp->conn_bound_if; 5367 } 5368 udp->udp_state = TS_UNBND; 5369 udp_bind_hash_remove(udp, B_TRUE); 5370 connp->conn_lport = 0; 5371 mutex_exit(&udpf->uf_lock); 5372 connp->conn_anon_port = B_FALSE; 5373 connp->conn_mlp_type = mlptSingle; 5374 5375 connp->conn_v6lastdst = ipv6_all_zeros; 5376 5377 /* Restore the header that was built above - different source address */ 5378 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5379 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5380 mutex_exit(&connp->conn_lock); 5381 return (error); 5382 } 5383 5384 int 5385 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5386 socklen_t len, cred_t *cr) 5387 { 5388 int error; 5389 conn_t *connp; 5390 5391 /* All Solaris components should pass a cred for this operation. */ 5392 ASSERT(cr != NULL); 5393 5394 connp = (conn_t *)proto_handle; 5395 5396 if (sa == NULL) 5397 error = udp_do_unbind(connp); 5398 else 5399 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5400 5401 if (error < 0) { 5402 if (error == -TOUTSTATE) 5403 error = EINVAL; 5404 else 5405 error = proto_tlitosyserr(-error); 5406 } 5407 5408 return (error); 5409 } 5410 5411 static int 5412 udp_implicit_bind(conn_t *connp, cred_t *cr) 5413 { 5414 sin6_t sin6addr; 5415 sin_t *sin; 5416 sin6_t *sin6; 5417 socklen_t len; 5418 int error; 5419 5420 /* All Solaris components should pass a cred for this operation. */ 5421 ASSERT(cr != NULL); 5422 5423 if (connp->conn_family == AF_INET) { 5424 len = sizeof (struct sockaddr_in); 5425 sin = (sin_t *)&sin6addr; 5426 *sin = sin_null; 5427 sin->sin_family = AF_INET; 5428 sin->sin_addr.s_addr = INADDR_ANY; 5429 } else { 5430 ASSERT(connp->conn_family == AF_INET6); 5431 len = sizeof (sin6_t); 5432 sin6 = (sin6_t *)&sin6addr; 5433 *sin6 = sin6_null; 5434 sin6->sin6_family = AF_INET6; 5435 V6_SET_ZERO(sin6->sin6_addr); 5436 } 5437 5438 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5439 cr, B_FALSE); 5440 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5441 } 5442 5443 /* 5444 * This routine removes a port number association from a stream. It 5445 * is called by udp_unbind and udp_tpi_unbind. 5446 */ 5447 static int 5448 udp_do_unbind(conn_t *connp) 5449 { 5450 udp_t *udp = connp->conn_udp; 5451 udp_fanout_t *udpf; 5452 udp_stack_t *us = udp->udp_us; 5453 5454 if (cl_inet_unbind != NULL) { 5455 /* 5456 * Running in cluster mode - register unbind information 5457 */ 5458 if (connp->conn_ipversion == IPV4_VERSION) { 5459 (*cl_inet_unbind)( 5460 connp->conn_netstack->netstack_stackid, 5461 IPPROTO_UDP, AF_INET, 5462 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5463 (in_port_t)connp->conn_lport, NULL); 5464 } else { 5465 (*cl_inet_unbind)( 5466 connp->conn_netstack->netstack_stackid, 5467 IPPROTO_UDP, AF_INET6, 5468 (uint8_t *)&(connp->conn_laddr_v6), 5469 (in_port_t)connp->conn_lport, NULL); 5470 } 5471 } 5472 5473 mutex_enter(&connp->conn_lock); 5474 /* If a bind has not been done, we can't unbind. */ 5475 if (udp->udp_state == TS_UNBND) { 5476 mutex_exit(&connp->conn_lock); 5477 return (-TOUTSTATE); 5478 } 5479 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5480 us->us_bind_fanout_size)]; 5481 mutex_enter(&udpf->uf_lock); 5482 udp_bind_hash_remove(udp, B_TRUE); 5483 connp->conn_saddr_v6 = ipv6_all_zeros; 5484 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5485 connp->conn_laddr_v6 = ipv6_all_zeros; 5486 connp->conn_mcbc_bind = B_FALSE; 5487 connp->conn_lport = 0; 5488 /* In case we were also connected */ 5489 connp->conn_faddr_v6 = ipv6_all_zeros; 5490 connp->conn_fport = 0; 5491 mutex_exit(&udpf->uf_lock); 5492 5493 connp->conn_v6lastdst = ipv6_all_zeros; 5494 udp->udp_state = TS_UNBND; 5495 5496 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5497 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5498 mutex_exit(&connp->conn_lock); 5499 5500 ip_unbind(connp); 5501 5502 return (0); 5503 } 5504 5505 /* 5506 * It associates a default destination address with the stream. 5507 */ 5508 static int 5509 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5510 cred_t *cr, pid_t pid) 5511 { 5512 sin6_t *sin6; 5513 sin_t *sin; 5514 in6_addr_t v6dst; 5515 ipaddr_t v4dst; 5516 uint16_t dstport; 5517 uint32_t flowinfo; 5518 udp_fanout_t *udpf; 5519 udp_t *udp, *udp1; 5520 ushort_t ipversion; 5521 udp_stack_t *us; 5522 int error; 5523 conn_t *connp1; 5524 ip_xmit_attr_t *ixa; 5525 ip_xmit_attr_t *oldixa; 5526 uint_t scopeid = 0; 5527 uint_t srcid = 0; 5528 in6_addr_t v6src = connp->conn_saddr_v6; 5529 5530 udp = connp->conn_udp; 5531 us = udp->udp_us; 5532 5533 /* 5534 * Address has been verified by the caller 5535 */ 5536 switch (len) { 5537 default: 5538 /* 5539 * Should never happen 5540 */ 5541 return (EINVAL); 5542 5543 case sizeof (sin_t): 5544 sin = (sin_t *)sa; 5545 v4dst = sin->sin_addr.s_addr; 5546 dstport = sin->sin_port; 5547 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5548 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5549 ipversion = IPV4_VERSION; 5550 break; 5551 5552 case sizeof (sin6_t): 5553 sin6 = (sin6_t *)sa; 5554 v6dst = sin6->sin6_addr; 5555 dstport = sin6->sin6_port; 5556 srcid = sin6->__sin6_src_id; 5557 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5558 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5559 connp->conn_netstack); 5560 } 5561 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 5562 if (connp->conn_ipv6_v6only) 5563 return (EADDRNOTAVAIL); 5564 5565 /* 5566 * Destination adress is mapped IPv6 address. 5567 * Source bound address should be unspecified or 5568 * IPv6 mapped address as well. 5569 */ 5570 if (!IN6_IS_ADDR_UNSPECIFIED( 5571 &connp->conn_bound_addr_v6) && 5572 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5573 return (EADDRNOTAVAIL); 5574 } 5575 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5576 ipversion = IPV4_VERSION; 5577 flowinfo = 0; 5578 } else { 5579 ipversion = IPV6_VERSION; 5580 flowinfo = sin6->sin6_flowinfo; 5581 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5582 scopeid = sin6->sin6_scope_id; 5583 } 5584 break; 5585 } 5586 5587 if (dstport == 0) 5588 return (-TBADADDR); 5589 5590 /* 5591 * If there is a different thread using conn_ixa then we get a new 5592 * copy and cut the old one loose from conn_ixa. Otherwise we use 5593 * conn_ixa and prevent any other thread from using/changing it. 5594 * Once connect() is done other threads can use conn_ixa since the 5595 * refcnt will be back at one. 5596 * We defer updating conn_ixa until later to handle any concurrent 5597 * conn_ixa_cleanup thread. 5598 */ 5599 ixa = conn_get_ixa(connp, B_FALSE); 5600 if (ixa == NULL) 5601 return (ENOMEM); 5602 5603 mutex_enter(&connp->conn_lock); 5604 /* 5605 * This udp_t must have bound to a port already before doing a connect. 5606 * Reject if a connect is in progress (we drop conn_lock during 5607 * udp_do_connect). 5608 */ 5609 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5610 mutex_exit(&connp->conn_lock); 5611 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5612 "udp_connect: bad state, %u", udp->udp_state); 5613 ixa_refrele(ixa); 5614 return (-TOUTSTATE); 5615 } 5616 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 5617 5618 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5619 us->us_bind_fanout_size)]; 5620 5621 mutex_enter(&udpf->uf_lock); 5622 if (udp->udp_state == TS_DATA_XFER) { 5623 /* Already connected - clear out state */ 5624 if (connp->conn_mcbc_bind) 5625 connp->conn_saddr_v6 = ipv6_all_zeros; 5626 else 5627 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5628 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5629 connp->conn_faddr_v6 = ipv6_all_zeros; 5630 connp->conn_fport = 0; 5631 udp->udp_state = TS_IDLE; 5632 } 5633 5634 connp->conn_fport = dstport; 5635 connp->conn_ipversion = ipversion; 5636 if (ipversion == IPV4_VERSION) { 5637 /* 5638 * Interpret a zero destination to mean loopback. 5639 * Update the T_CONN_REQ (sin/sin6) since it is used to 5640 * generate the T_CONN_CON. 5641 */ 5642 if (v4dst == INADDR_ANY) { 5643 v4dst = htonl(INADDR_LOOPBACK); 5644 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5645 if (connp->conn_family == AF_INET) { 5646 sin->sin_addr.s_addr = v4dst; 5647 } else { 5648 sin6->sin6_addr = v6dst; 5649 } 5650 } 5651 connp->conn_faddr_v6 = v6dst; 5652 connp->conn_flowinfo = 0; 5653 } else { 5654 ASSERT(connp->conn_ipversion == IPV6_VERSION); 5655 /* 5656 * Interpret a zero destination to mean loopback. 5657 * Update the T_CONN_REQ (sin/sin6) since it is used to 5658 * generate the T_CONN_CON. 5659 */ 5660 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 5661 v6dst = ipv6_loopback; 5662 sin6->sin6_addr = v6dst; 5663 } 5664 connp->conn_faddr_v6 = v6dst; 5665 connp->conn_flowinfo = flowinfo; 5666 } 5667 mutex_exit(&udpf->uf_lock); 5668 5669 /* 5670 * We update our cred/cpid based on the caller of connect 5671 */ 5672 if (connp->conn_cred != cr) { 5673 crhold(cr); 5674 crfree(connp->conn_cred); 5675 connp->conn_cred = cr; 5676 } 5677 connp->conn_cpid = pid; 5678 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 5679 ixa->ixa_cred = cr; 5680 ixa->ixa_cpid = pid; 5681 if (is_system_labeled()) { 5682 /* We need to restart with a label based on the cred */ 5683 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 5684 } 5685 5686 if (scopeid != 0) { 5687 ixa->ixa_flags |= IXAF_SCOPEID_SET; 5688 ixa->ixa_scopeid = scopeid; 5689 connp->conn_incoming_ifindex = scopeid; 5690 } else { 5691 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5692 connp->conn_incoming_ifindex = connp->conn_bound_if; 5693 } 5694 /* 5695 * conn_connect will drop conn_lock and reacquire it. 5696 * To prevent a send* from messing with this udp_t while the lock 5697 * is dropped we set udp_state and clear conn_v6lastdst. 5698 * That will make all send* fail with EISCONN. 5699 */ 5700 connp->conn_v6lastdst = ipv6_all_zeros; 5701 udp->udp_state = TS_WCON_CREQ; 5702 5703 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 5704 mutex_exit(&connp->conn_lock); 5705 if (error != 0) 5706 goto connect_failed; 5707 5708 /* 5709 * The addresses have been verified. Time to insert in 5710 * the correct fanout list. 5711 */ 5712 error = ipcl_conn_insert(connp); 5713 if (error != 0) 5714 goto connect_failed; 5715 5716 mutex_enter(&connp->conn_lock); 5717 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5718 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5719 if (error != 0) { 5720 mutex_exit(&connp->conn_lock); 5721 goto connect_failed; 5722 } 5723 5724 udp->udp_state = TS_DATA_XFER; 5725 /* Record this as the "last" send even though we haven't sent any */ 5726 connp->conn_v6lastdst = connp->conn_faddr_v6; 5727 connp->conn_lastipversion = connp->conn_ipversion; 5728 connp->conn_lastdstport = connp->conn_fport; 5729 connp->conn_lastflowinfo = connp->conn_flowinfo; 5730 connp->conn_lastscopeid = scopeid; 5731 connp->conn_lastsrcid = srcid; 5732 /* Also remember a source to use together with lastdst */ 5733 connp->conn_v6lastsrc = v6src; 5734 5735 oldixa = conn_replace_ixa(connp, ixa); 5736 mutex_exit(&connp->conn_lock); 5737 ixa_refrele(oldixa); 5738 5739 /* 5740 * We've picked a source address above. Now we can 5741 * verify that the src/port/dst/port is unique for all 5742 * connections in TS_DATA_XFER, skipping ourselves. 5743 */ 5744 mutex_enter(&udpf->uf_lock); 5745 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 5746 if (udp1->udp_state != TS_DATA_XFER) 5747 continue; 5748 5749 if (udp1 == udp) 5750 continue; 5751 5752 connp1 = udp1->udp_connp; 5753 if (connp->conn_lport != connp1->conn_lport || 5754 connp->conn_ipversion != connp1->conn_ipversion || 5755 dstport != connp1->conn_fport || 5756 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 5757 &connp1->conn_laddr_v6) || 5758 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 5759 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 5760 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 5761 continue; 5762 mutex_exit(&udpf->uf_lock); 5763 error = -TBADADDR; 5764 goto connect_failed; 5765 } 5766 if (cl_inet_connect2 != NULL) { 5767 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 5768 if (error != 0) { 5769 mutex_exit(&udpf->uf_lock); 5770 error = -TBADADDR; 5771 goto connect_failed; 5772 } 5773 } 5774 mutex_exit(&udpf->uf_lock); 5775 5776 ixa_refrele(ixa); 5777 return (0); 5778 5779 connect_failed: 5780 if (ixa != NULL) 5781 ixa_refrele(ixa); 5782 mutex_enter(&connp->conn_lock); 5783 mutex_enter(&udpf->uf_lock); 5784 udp->udp_state = TS_IDLE; 5785 connp->conn_faddr_v6 = ipv6_all_zeros; 5786 connp->conn_fport = 0; 5787 /* In case the source address was set above */ 5788 if (connp->conn_mcbc_bind) 5789 connp->conn_saddr_v6 = ipv6_all_zeros; 5790 else 5791 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 5792 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 5793 mutex_exit(&udpf->uf_lock); 5794 5795 connp->conn_v6lastdst = ipv6_all_zeros; 5796 connp->conn_flowinfo = 0; 5797 5798 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5799 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5800 mutex_exit(&connp->conn_lock); 5801 return (error); 5802 } 5803 5804 static int 5805 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 5806 socklen_t len, sock_connid_t *id, cred_t *cr) 5807 { 5808 conn_t *connp = (conn_t *)proto_handle; 5809 udp_t *udp = connp->conn_udp; 5810 int error; 5811 boolean_t did_bind = B_FALSE; 5812 pid_t pid = curproc->p_pid; 5813 5814 /* All Solaris components should pass a cred for this operation. */ 5815 ASSERT(cr != NULL); 5816 5817 if (sa == NULL) { 5818 /* 5819 * Disconnect 5820 * Make sure we are connected 5821 */ 5822 if (udp->udp_state != TS_DATA_XFER) 5823 return (EINVAL); 5824 5825 error = udp_disconnect(connp); 5826 return (error); 5827 } 5828 5829 error = proto_verify_ip_addr(connp->conn_family, sa, len); 5830 if (error != 0) 5831 goto done; 5832 5833 /* do an implicit bind if necessary */ 5834 if (udp->udp_state == TS_UNBND) { 5835 error = udp_implicit_bind(connp, cr); 5836 /* 5837 * We could be racing with an actual bind, in which case 5838 * we would see EPROTO. We cross our fingers and try 5839 * to connect. 5840 */ 5841 if (!(error == 0 || error == EPROTO)) 5842 goto done; 5843 did_bind = B_TRUE; 5844 } 5845 /* 5846 * set SO_DGRAM_ERRIND 5847 */ 5848 connp->conn_dgram_errind = B_TRUE; 5849 5850 error = udp_do_connect(connp, sa, len, cr, pid); 5851 5852 if (error != 0 && did_bind) { 5853 int unbind_err; 5854 5855 unbind_err = udp_do_unbind(connp); 5856 ASSERT(unbind_err == 0); 5857 } 5858 5859 if (error == 0) { 5860 *id = 0; 5861 (*connp->conn_upcalls->su_connected) 5862 (connp->conn_upper_handle, 0, NULL, -1); 5863 } else if (error < 0) { 5864 error = proto_tlitosyserr(-error); 5865 } 5866 5867 done: 5868 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 5869 /* 5870 * No need to hold locks to set state 5871 * after connect failure socket state is undefined 5872 * We set the state only to imitate old sockfs behavior 5873 */ 5874 udp->udp_state = TS_IDLE; 5875 } 5876 return (error); 5877 } 5878 5879 int 5880 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 5881 cred_t *cr) 5882 { 5883 sin6_t *sin6; 5884 sin_t *sin = NULL; 5885 uint_t srcid; 5886 conn_t *connp = (conn_t *)proto_handle; 5887 udp_t *udp = connp->conn_udp; 5888 int error = 0; 5889 udp_stack_t *us = udp->udp_us; 5890 ushort_t ipversion; 5891 pid_t pid = curproc->p_pid; 5892 ip_xmit_attr_t *ixa; 5893 5894 ASSERT(DB_TYPE(mp) == M_DATA); 5895 5896 /* All Solaris components should pass a cred for this operation. */ 5897 ASSERT(cr != NULL); 5898 5899 /* do an implicit bind if necessary */ 5900 if (udp->udp_state == TS_UNBND) { 5901 error = udp_implicit_bind(connp, cr); 5902 /* 5903 * We could be racing with an actual bind, in which case 5904 * we would see EPROTO. We cross our fingers and try 5905 * to connect. 5906 */ 5907 if (!(error == 0 || error == EPROTO)) { 5908 freemsg(mp); 5909 return (error); 5910 } 5911 } 5912 5913 /* Connected? */ 5914 if (msg->msg_name == NULL) { 5915 if (udp->udp_state != TS_DATA_XFER) { 5916 UDPS_BUMP_MIB(us, udpOutErrors); 5917 return (EDESTADDRREQ); 5918 } 5919 if (msg->msg_controllen != 0) { 5920 error = udp_output_ancillary(connp, NULL, NULL, mp, 5921 NULL, msg, cr, pid); 5922 } else { 5923 error = udp_output_connected(connp, mp, cr, pid); 5924 } 5925 if (us->us_sendto_ignerr) 5926 return (0); 5927 else 5928 return (error); 5929 } 5930 if (udp->udp_state == TS_DATA_XFER) { 5931 UDPS_BUMP_MIB(us, udpOutErrors); 5932 return (EISCONN); 5933 } 5934 error = proto_verify_ip_addr(connp->conn_family, 5935 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 5936 if (error != 0) { 5937 UDPS_BUMP_MIB(us, udpOutErrors); 5938 return (error); 5939 } 5940 switch (connp->conn_family) { 5941 case AF_INET6: 5942 sin6 = (sin6_t *)msg->msg_name; 5943 5944 srcid = sin6->__sin6_src_id; 5945 5946 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5947 /* 5948 * Destination is a non-IPv4-compatible IPv6 address. 5949 * Send out an IPv6 format packet. 5950 */ 5951 5952 /* 5953 * If the local address is a mapped address return 5954 * an error. 5955 * It would be possible to send an IPv6 packet but the 5956 * response would never make it back to the application 5957 * since it is bound to a mapped address. 5958 */ 5959 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 5960 UDPS_BUMP_MIB(us, udpOutErrors); 5961 return (EADDRNOTAVAIL); 5962 } 5963 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 5964 sin6->sin6_addr = ipv6_loopback; 5965 ipversion = IPV6_VERSION; 5966 } else { 5967 if (connp->conn_ipv6_v6only) { 5968 UDPS_BUMP_MIB(us, udpOutErrors); 5969 return (EADDRNOTAVAIL); 5970 } 5971 5972 /* 5973 * If the local address is not zero or a mapped address 5974 * return an error. It would be possible to send an 5975 * IPv4 packet but the response would never make it 5976 * back to the application since it is bound to a 5977 * non-mapped address. 5978 */ 5979 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 5980 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 5981 UDPS_BUMP_MIB(us, udpOutErrors); 5982 return (EADDRNOTAVAIL); 5983 } 5984 5985 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 5986 V4_PART_OF_V6(sin6->sin6_addr) = 5987 htonl(INADDR_LOOPBACK); 5988 } 5989 ipversion = IPV4_VERSION; 5990 } 5991 5992 /* 5993 * We have to allocate an ip_xmit_attr_t before we grab 5994 * conn_lock and we need to hold conn_lock once we've check 5995 * conn_same_as_last_v6 to handle concurrent send* calls on a 5996 * socket. 5997 */ 5998 if (msg->msg_controllen == 0) { 5999 ixa = conn_get_ixa(connp, B_FALSE); 6000 if (ixa == NULL) { 6001 UDPS_BUMP_MIB(us, udpOutErrors); 6002 return (ENOMEM); 6003 } 6004 } else { 6005 ixa = NULL; 6006 } 6007 mutex_enter(&connp->conn_lock); 6008 if (udp->udp_delayed_error != 0) { 6009 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6010 6011 error = udp->udp_delayed_error; 6012 udp->udp_delayed_error = 0; 6013 6014 /* Compare IP address, port, and family */ 6015 6016 if (sin6->sin6_port == sin2->sin6_port && 6017 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6018 &sin2->sin6_addr) && 6019 sin6->sin6_family == sin2->sin6_family) { 6020 mutex_exit(&connp->conn_lock); 6021 UDPS_BUMP_MIB(us, udpOutErrors); 6022 if (ixa != NULL) 6023 ixa_refrele(ixa); 6024 return (error); 6025 } 6026 } 6027 6028 if (msg->msg_controllen != 0) { 6029 mutex_exit(&connp->conn_lock); 6030 ASSERT(ixa == NULL); 6031 error = udp_output_ancillary(connp, NULL, sin6, mp, 6032 NULL, msg, cr, pid); 6033 } else if (conn_same_as_last_v6(connp, sin6) && 6034 connp->conn_lastsrcid == srcid && 6035 ipsec_outbound_policy_current(ixa)) { 6036 /* udp_output_lastdst drops conn_lock */ 6037 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6038 } else { 6039 /* udp_output_newdst drops conn_lock */ 6040 error = udp_output_newdst(connp, mp, NULL, sin6, 6041 ipversion, cr, pid, ixa); 6042 } 6043 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6044 if (us->us_sendto_ignerr) 6045 return (0); 6046 else 6047 return (error); 6048 case AF_INET: 6049 sin = (sin_t *)msg->msg_name; 6050 6051 ipversion = IPV4_VERSION; 6052 6053 if (sin->sin_addr.s_addr == INADDR_ANY) 6054 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6055 6056 /* 6057 * We have to allocate an ip_xmit_attr_t before we grab 6058 * conn_lock and we need to hold conn_lock once we've check 6059 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6060 */ 6061 if (msg->msg_controllen == 0) { 6062 ixa = conn_get_ixa(connp, B_FALSE); 6063 if (ixa == NULL) { 6064 UDPS_BUMP_MIB(us, udpOutErrors); 6065 return (ENOMEM); 6066 } 6067 } else { 6068 ixa = NULL; 6069 } 6070 mutex_enter(&connp->conn_lock); 6071 if (udp->udp_delayed_error != 0) { 6072 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6073 6074 error = udp->udp_delayed_error; 6075 udp->udp_delayed_error = 0; 6076 6077 /* Compare IP address and port */ 6078 6079 if (sin->sin_port == sin2->sin_port && 6080 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6081 mutex_exit(&connp->conn_lock); 6082 UDPS_BUMP_MIB(us, udpOutErrors); 6083 if (ixa != NULL) 6084 ixa_refrele(ixa); 6085 return (error); 6086 } 6087 } 6088 if (msg->msg_controllen != 0) { 6089 mutex_exit(&connp->conn_lock); 6090 ASSERT(ixa == NULL); 6091 error = udp_output_ancillary(connp, sin, NULL, mp, 6092 NULL, msg, cr, pid); 6093 } else if (conn_same_as_last_v4(connp, sin) && 6094 ipsec_outbound_policy_current(ixa)) { 6095 /* udp_output_lastdst drops conn_lock */ 6096 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6097 } else { 6098 /* udp_output_newdst drops conn_lock */ 6099 error = udp_output_newdst(connp, mp, sin, NULL, 6100 ipversion, cr, pid, ixa); 6101 } 6102 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6103 if (us->us_sendto_ignerr) 6104 return (0); 6105 else 6106 return (error); 6107 default: 6108 return (EINVAL); 6109 } 6110 } 6111 6112 int 6113 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6114 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6115 sock_quiesce_arg_t *arg) 6116 { 6117 conn_t *connp = (conn_t *)proto_handle; 6118 udp_t *udp; 6119 struct T_capability_ack tca; 6120 struct sockaddr_in6 laddr, faddr; 6121 socklen_t laddrlen, faddrlen; 6122 short opts; 6123 struct stroptions *stropt; 6124 mblk_t *mp, *stropt_mp; 6125 int error; 6126 6127 udp = connp->conn_udp; 6128 6129 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6130 6131 /* 6132 * setup the fallback stream that was allocated 6133 */ 6134 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6135 connp->conn_minor_arena = WR(q)->q_ptr; 6136 6137 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6138 6139 WR(q)->q_qinfo = &udp_winit; 6140 6141 connp->conn_rq = RD(q); 6142 connp->conn_wq = WR(q); 6143 6144 /* Notify stream head about options before sending up data */ 6145 stropt_mp->b_datap->db_type = M_SETOPTS; 6146 stropt_mp->b_wptr += sizeof (*stropt); 6147 stropt = (struct stroptions *)stropt_mp->b_rptr; 6148 stropt->so_flags = SO_WROFF | SO_HIWAT; 6149 stropt->so_wroff = connp->conn_wroff; 6150 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6151 putnext(RD(q), stropt_mp); 6152 6153 /* 6154 * Free the helper stream 6155 */ 6156 ip_free_helper_stream(connp); 6157 6158 if (!issocket) 6159 udp_use_pure_tpi(udp); 6160 6161 /* 6162 * Collect the information needed to sync with the sonode 6163 */ 6164 udp_do_capability_ack(udp, &tca, TC1_INFO); 6165 6166 laddrlen = faddrlen = sizeof (sin6_t); 6167 (void) udp_getsockname((sock_lower_handle_t)connp, 6168 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6169 error = udp_getpeername((sock_lower_handle_t)connp, 6170 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6171 if (error != 0) 6172 faddrlen = 0; 6173 6174 opts = 0; 6175 if (connp->conn_dgram_errind) 6176 opts |= SO_DGRAM_ERRIND; 6177 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6178 opts |= SO_DONTROUTE; 6179 6180 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6181 (struct sockaddr *)&laddr, laddrlen, 6182 (struct sockaddr *)&faddr, faddrlen, opts); 6183 6184 mutex_enter(&udp->udp_recv_lock); 6185 /* 6186 * Attempts to send data up during fallback will result in it being 6187 * queued in udp_t. First push up the datagrams obtained from the 6188 * socket, then any packets queued in udp_t. 6189 */ 6190 if (mp != NULL) { 6191 mp->b_next = udp->udp_fallback_queue_head; 6192 udp->udp_fallback_queue_head = mp; 6193 } 6194 while (udp->udp_fallback_queue_head != NULL) { 6195 mp = udp->udp_fallback_queue_head; 6196 udp->udp_fallback_queue_head = mp->b_next; 6197 mutex_exit(&udp->udp_recv_lock); 6198 mp->b_next = NULL; 6199 putnext(RD(q), mp); 6200 mutex_enter(&udp->udp_recv_lock); 6201 } 6202 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6203 /* 6204 * No longer a streams less socket 6205 */ 6206 mutex_enter(&connp->conn_lock); 6207 connp->conn_flags &= ~IPCL_NONSTR; 6208 mutex_exit(&connp->conn_lock); 6209 6210 mutex_exit(&udp->udp_recv_lock); 6211 6212 ASSERT(connp->conn_ref >= 1); 6213 6214 return (0); 6215 } 6216 6217 /* ARGSUSED3 */ 6218 int 6219 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6220 socklen_t *salenp, cred_t *cr) 6221 { 6222 conn_t *connp = (conn_t *)proto_handle; 6223 udp_t *udp = connp->conn_udp; 6224 int error; 6225 6226 /* All Solaris components should pass a cred for this operation. */ 6227 ASSERT(cr != NULL); 6228 6229 mutex_enter(&connp->conn_lock); 6230 if (udp->udp_state != TS_DATA_XFER) 6231 error = ENOTCONN; 6232 else 6233 error = conn_getpeername(connp, sa, salenp); 6234 mutex_exit(&connp->conn_lock); 6235 return (error); 6236 } 6237 6238 /* ARGSUSED3 */ 6239 int 6240 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6241 socklen_t *salenp, cred_t *cr) 6242 { 6243 conn_t *connp = (conn_t *)proto_handle; 6244 int error; 6245 6246 /* All Solaris components should pass a cred for this operation. */ 6247 ASSERT(cr != NULL); 6248 6249 mutex_enter(&connp->conn_lock); 6250 error = conn_getsockname(connp, sa, salenp); 6251 mutex_exit(&connp->conn_lock); 6252 return (error); 6253 } 6254 6255 int 6256 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6257 void *optvalp, socklen_t *optlen, cred_t *cr) 6258 { 6259 conn_t *connp = (conn_t *)proto_handle; 6260 int error; 6261 t_uscalar_t max_optbuf_len; 6262 void *optvalp_buf; 6263 int len; 6264 6265 /* All Solaris components should pass a cred for this operation. */ 6266 ASSERT(cr != NULL); 6267 6268 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6269 udp_opt_obj.odb_opt_des_arr, 6270 udp_opt_obj.odb_opt_arr_cnt, 6271 B_FALSE, B_TRUE, cr); 6272 if (error != 0) { 6273 if (error < 0) 6274 error = proto_tlitosyserr(-error); 6275 return (error); 6276 } 6277 6278 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6279 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6280 if (len == -1) { 6281 kmem_free(optvalp_buf, max_optbuf_len); 6282 return (EINVAL); 6283 } 6284 6285 /* 6286 * update optlen and copy option value 6287 */ 6288 t_uscalar_t size = MIN(len, *optlen); 6289 6290 bcopy(optvalp_buf, optvalp, size); 6291 bcopy(&size, optlen, sizeof (size)); 6292 6293 kmem_free(optvalp_buf, max_optbuf_len); 6294 return (0); 6295 } 6296 6297 int 6298 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6299 const void *optvalp, socklen_t optlen, cred_t *cr) 6300 { 6301 conn_t *connp = (conn_t *)proto_handle; 6302 int error; 6303 6304 /* All Solaris components should pass a cred for this operation. */ 6305 ASSERT(cr != NULL); 6306 6307 error = proto_opt_check(level, option_name, optlen, NULL, 6308 udp_opt_obj.odb_opt_des_arr, 6309 udp_opt_obj.odb_opt_arr_cnt, 6310 B_TRUE, B_FALSE, cr); 6311 6312 if (error != 0) { 6313 if (error < 0) 6314 error = proto_tlitosyserr(-error); 6315 return (error); 6316 } 6317 6318 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6319 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6320 NULL, cr); 6321 6322 ASSERT(error >= 0); 6323 6324 return (error); 6325 } 6326 6327 void 6328 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6329 { 6330 conn_t *connp = (conn_t *)proto_handle; 6331 udp_t *udp = connp->conn_udp; 6332 6333 mutex_enter(&udp->udp_recv_lock); 6334 connp->conn_flow_cntrld = B_FALSE; 6335 mutex_exit(&udp->udp_recv_lock); 6336 } 6337 6338 /* ARGSUSED2 */ 6339 int 6340 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6341 { 6342 conn_t *connp = (conn_t *)proto_handle; 6343 6344 /* All Solaris components should pass a cred for this operation. */ 6345 ASSERT(cr != NULL); 6346 6347 /* shut down the send side */ 6348 if (how != SHUT_RD) 6349 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6350 SOCK_OPCTL_SHUT_SEND, 0); 6351 /* shut down the recv side */ 6352 if (how != SHUT_WR) 6353 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6354 SOCK_OPCTL_SHUT_RECV, 0); 6355 return (0); 6356 } 6357 6358 int 6359 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6360 int mode, int32_t *rvalp, cred_t *cr) 6361 { 6362 conn_t *connp = (conn_t *)proto_handle; 6363 int error; 6364 6365 /* All Solaris components should pass a cred for this operation. */ 6366 ASSERT(cr != NULL); 6367 6368 /* 6369 * If we don't have a helper stream then create one. 6370 * ip_create_helper_stream takes care of locking the conn_t, 6371 * so this check for NULL is just a performance optimization. 6372 */ 6373 if (connp->conn_helper_info == NULL) { 6374 udp_stack_t *us = connp->conn_udp->udp_us; 6375 6376 ASSERT(us->us_ldi_ident != NULL); 6377 6378 /* 6379 * Create a helper stream for non-STREAMS socket. 6380 */ 6381 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6382 if (error != 0) { 6383 ip0dbg(("tcp_ioctl: create of IP helper stream " 6384 "failed %d\n", error)); 6385 return (error); 6386 } 6387 } 6388 6389 switch (cmd) { 6390 case _SIOCSOCKFALLBACK: 6391 case TI_GETPEERNAME: 6392 case TI_GETMYNAME: 6393 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6394 cmd)); 6395 error = EINVAL; 6396 break; 6397 default: 6398 /* 6399 * Pass on to IP using helper stream 6400 */ 6401 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6402 cmd, arg, mode, cr, rvalp); 6403 break; 6404 } 6405 return (error); 6406 } 6407 6408 /* ARGSUSED */ 6409 int 6410 udp_accept(sock_lower_handle_t lproto_handle, 6411 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6412 cred_t *cr) 6413 { 6414 return (EOPNOTSUPP); 6415 } 6416 6417 /* ARGSUSED */ 6418 int 6419 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6420 { 6421 return (EOPNOTSUPP); 6422 } 6423 6424 sock_downcalls_t sock_udp_downcalls = { 6425 udp_activate, /* sd_activate */ 6426 udp_accept, /* sd_accept */ 6427 udp_bind, /* sd_bind */ 6428 udp_listen, /* sd_listen */ 6429 udp_connect, /* sd_connect */ 6430 udp_getpeername, /* sd_getpeername */ 6431 udp_getsockname, /* sd_getsockname */ 6432 udp_getsockopt, /* sd_getsockopt */ 6433 udp_setsockopt, /* sd_setsockopt */ 6434 udp_send, /* sd_send */ 6435 NULL, /* sd_send_uio */ 6436 NULL, /* sd_recv_uio */ 6437 NULL, /* sd_poll */ 6438 udp_shutdown, /* sd_shutdown */ 6439 udp_clr_flowctrl, /* sd_setflowctrl */ 6440 udp_ioctl, /* sd_ioctl */ 6441 udp_close /* sd_close */ 6442 }; 6443