1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* Copyright (c) 1990 Mentat Inc. */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strlog.h> 30 #include <sys/strsun.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/timod.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/strsubr.h> 37 #include <sys/suntpi.h> 38 #include <sys/xti_inet.h> 39 #include <sys/kmem.h> 40 #include <sys/cred_impl.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/ucred.h> 44 #include <sys/zone.h> 45 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sockio.h> 49 #include <sys/vtrace.h> 50 #include <sys/sdt.h> 51 #include <sys/debug.h> 52 #include <sys/isa_defs.h> 53 #include <sys/random.h> 54 #include <netinet/in.h> 55 #include <netinet/ip6.h> 56 #include <netinet/icmp6.h> 57 #include <netinet/udp.h> 58 59 #include <inet/common.h> 60 #include <inet/ip.h> 61 #include <inet/ip_impl.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ire.h> 65 #include <inet/ip_if.h> 66 #include <inet/ip_multi.h> 67 #include <inet/ip_ndp.h> 68 #include <inet/proto_set.h> 69 #include <inet/mib2.h> 70 #include <inet/optcom.h> 71 #include <inet/snmpcom.h> 72 #include <inet/kstatcom.h> 73 #include <inet/ipclassifier.h> 74 #include <sys/squeue_impl.h> 75 #include <inet/ipnet.h> 76 #include <sys/ethernet.h> 77 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tnet.h> 80 #include <rpc/pmap_prot.h> 81 82 #include <inet/udp_impl.h> 83 84 /* 85 * Synchronization notes: 86 * 87 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 88 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 89 * protects the contents of the udp_t. uf_lock protects the address and the 90 * fanout information. 91 * The lock order is conn_lock -> uf_lock. 92 * 93 * The fanout lock uf_lock: 94 * When a UDP endpoint is bound to a local port, it is inserted into 95 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 96 * The size of the array is controlled by the udp_bind_fanout_size variable. 97 * This variable can be changed in /etc/system if the default value is 98 * not large enough. Each bind hash bucket is protected by a per bucket 99 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 100 * structure and a few other fields in the udp_t. A UDP endpoint is removed 101 * from the bind hash list only when it is being unbound or being closed. 102 * The per bucket lock also protects a UDP endpoint's state changes. 103 * 104 * Plumbing notes: 105 * UDP is always a device driver. For compatibility with mibopen() code 106 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 107 * dummy module. 108 * 109 * The above implies that we don't support any intermediate module to 110 * reside in between /dev/ip and udp -- in fact, we never supported such 111 * scenario in the past as the inter-layer communication semantics have 112 * always been private. 113 */ 114 115 /* For /etc/system control */ 116 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 117 118 static void udp_addr_req(queue_t *q, mblk_t *mp); 119 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 120 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 121 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 122 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 123 const in6_addr_t *, in_port_t, uint32_t); 124 static void udp_capability_req(queue_t *q, mblk_t *mp); 125 static int udp_tpi_close(queue_t *q, int flags); 126 static void udp_close_free(conn_t *); 127 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 128 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 129 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 130 int sys_error); 131 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 132 t_scalar_t tlierr, int sys_error); 133 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 134 cred_t *cr); 135 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 136 char *value, caddr_t cp, cred_t *cr); 137 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 140 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 141 ip_recv_attr_t *ira); 142 static void udp_info_req(queue_t *q, mblk_t *mp); 143 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 144 static void udp_lrput(queue_t *, mblk_t *); 145 static void udp_lwput(queue_t *, mblk_t *); 146 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 147 cred_t *credp, boolean_t isv6); 148 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp); 150 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 153 int udp_opt_set(conn_t *connp, uint_t optset_context, 154 int level, int name, uint_t inlen, 155 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 156 void *thisdg_attrs, cred_t *cr); 157 int udp_opt_get(conn_t *connp, int level, int name, 158 uchar_t *ptr); 159 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 160 pid_t pid); 161 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid, ip_xmit_attr_t *ixa); 163 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 164 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 165 ip_xmit_attr_t *ixa); 166 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 167 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 168 int *); 169 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 170 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 171 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 172 static void udp_ud_err_connected(conn_t *, t_scalar_t); 173 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 174 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 175 boolean_t random); 176 static void udp_wput_other(queue_t *q, mblk_t *mp); 177 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 178 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 179 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 180 181 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 182 static void udp_stack_fini(netstackid_t stackid, void *arg); 183 184 static void *udp_kstat_init(netstackid_t stackid); 185 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 186 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 187 static void udp_kstat2_fini(netstackid_t, kstat_t *); 188 static int udp_kstat_update(kstat_t *kp, int rw); 189 190 191 /* Common routines for TPI and socket module */ 192 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 193 194 /* Common routine for TPI and socket module */ 195 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 196 static void udp_do_close(conn_t *); 197 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 198 boolean_t); 199 static int udp_do_unbind(conn_t *); 200 201 int udp_getsockname(sock_lower_handle_t, 202 struct sockaddr *, socklen_t *, cred_t *); 203 int udp_getpeername(sock_lower_handle_t, 204 struct sockaddr *, socklen_t *, cred_t *); 205 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 206 cred_t *, pid_t); 207 208 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 209 210 /* 211 * Checks if the given destination addr/port is allowed out. 212 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 213 * Called for each connect() and for sendto()/sendmsg() to a different 214 * destination. 215 * For connect(), called in udp_connect(). 216 * For sendto()/sendmsg(), called in udp_output_newdst(). 217 * 218 * This macro assumes that the cl_inet_connect2 hook is not NULL. 219 * Please check this before calling this macro. 220 * 221 * void 222 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 223 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 224 */ 225 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 226 (err) = 0; \ 227 /* \ 228 * Running in cluster mode - check and register active \ 229 * "connection" information \ 230 */ \ 231 if ((cp)->conn_ipversion == IPV4_VERSION) \ 232 (err) = (*cl_inet_connect2)( \ 233 (cp)->conn_netstack->netstack_stackid, \ 234 IPPROTO_UDP, is_outgoing, AF_INET, \ 235 (uint8_t *)&((cp)->conn_laddr_v4), \ 236 (cp)->conn_lport, \ 237 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 238 (in_port_t)(fport), NULL); \ 239 else \ 240 (err) = (*cl_inet_connect2)( \ 241 (cp)->conn_netstack->netstack_stackid, \ 242 IPPROTO_UDP, is_outgoing, AF_INET6, \ 243 (uint8_t *)&((cp)->conn_laddr_v6), \ 244 (cp)->conn_lport, \ 245 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 246 } 247 248 static struct module_info udp_mod_info = { 249 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 250 }; 251 252 /* 253 * Entry points for UDP as a device. 254 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 255 */ 256 static struct qinit udp_rinitv4 = { 257 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 258 }; 259 260 static struct qinit udp_rinitv6 = { 261 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 262 }; 263 264 static struct qinit udp_winit = { 265 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 266 }; 267 268 /* UDP entry point during fallback */ 269 struct qinit udp_fallback_sock_winit = { 270 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 271 }; 272 273 /* 274 * UDP needs to handle I_LINK and I_PLINK since ifconfig 275 * likes to use it as a place to hang the various streams. 276 */ 277 static struct qinit udp_lrinit = { 278 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 279 }; 280 281 static struct qinit udp_lwinit = { 282 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 283 }; 284 285 /* For AF_INET aka /dev/udp */ 286 struct streamtab udpinfov4 = { 287 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 288 }; 289 290 /* For AF_INET6 aka /dev/udp6 */ 291 struct streamtab udpinfov6 = { 292 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 293 }; 294 295 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 296 297 /* Default structure copied into T_INFO_ACK messages */ 298 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 299 T_INFO_ACK, 300 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 301 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 302 T_INVALID, /* CDATA_size. udp does not support connect data. */ 303 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 304 sizeof (sin_t), /* ADDR_size. */ 305 0, /* OPT_size - not initialized here */ 306 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 307 T_CLTS, /* SERV_type. udp supports connection-less. */ 308 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 309 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 310 }; 311 312 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 313 314 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 315 T_INFO_ACK, 316 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 317 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 318 T_INVALID, /* CDATA_size. udp does not support connect data. */ 319 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 320 sizeof (sin6_t), /* ADDR_size. */ 321 0, /* OPT_size - not initialized here */ 322 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 323 T_CLTS, /* SERV_type. udp supports connection-less. */ 324 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 325 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 326 }; 327 328 /* 329 * UDP tunables related declarations. Definitions are in udp_tunables.c 330 */ 331 extern mod_prop_info_t udp_propinfo_tbl[]; 332 extern int udp_propinfo_count; 333 334 /* Setable in /etc/system */ 335 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 336 uint32_t udp_random_anon_port = 1; 337 338 /* 339 * Hook functions to enable cluster networking. 340 * On non-clustered systems these vectors must always be NULL 341 */ 342 343 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 344 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 345 void *args) = NULL; 346 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 347 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 348 void *args) = NULL; 349 350 typedef union T_primitives *t_primp_t; 351 352 /* 353 * Return the next anonymous port in the privileged port range for 354 * bind checking. 355 * 356 * Trusted Extension (TX) notes: TX allows administrator to mark or 357 * reserve ports as Multilevel ports (MLP). MLP has special function 358 * on TX systems. Once a port is made MLP, it's not available as 359 * ordinary port. This creates "holes" in the port name space. It 360 * may be necessary to skip the "holes" find a suitable anon port. 361 */ 362 static in_port_t 363 udp_get_next_priv_port(udp_t *udp) 364 { 365 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 366 in_port_t nextport; 367 boolean_t restart = B_FALSE; 368 udp_stack_t *us = udp->udp_us; 369 370 retry: 371 if (next_priv_port < us->us_min_anonpriv_port || 372 next_priv_port >= IPPORT_RESERVED) { 373 next_priv_port = IPPORT_RESERVED - 1; 374 if (restart) 375 return (0); 376 restart = B_TRUE; 377 } 378 379 if (is_system_labeled() && 380 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 381 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 382 next_priv_port = nextport; 383 goto retry; 384 } 385 386 return (next_priv_port--); 387 } 388 389 /* 390 * Hash list removal routine for udp_t structures. 391 */ 392 static void 393 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 394 { 395 udp_t *udpnext; 396 kmutex_t *lockp; 397 udp_stack_t *us = udp->udp_us; 398 conn_t *connp = udp->udp_connp; 399 400 if (udp->udp_ptpbhn == NULL) 401 return; 402 403 /* 404 * Extract the lock pointer in case there are concurrent 405 * hash_remove's for this instance. 406 */ 407 ASSERT(connp->conn_lport != 0); 408 if (!caller_holds_lock) { 409 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 410 us->us_bind_fanout_size)].uf_lock; 411 ASSERT(lockp != NULL); 412 mutex_enter(lockp); 413 } 414 if (udp->udp_ptpbhn != NULL) { 415 udpnext = udp->udp_bind_hash; 416 if (udpnext != NULL) { 417 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 418 udp->udp_bind_hash = NULL; 419 } 420 *udp->udp_ptpbhn = udpnext; 421 udp->udp_ptpbhn = NULL; 422 } 423 if (!caller_holds_lock) { 424 mutex_exit(lockp); 425 } 426 } 427 428 static void 429 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 430 { 431 conn_t *connp = udp->udp_connp; 432 udp_t **udpp; 433 udp_t *udpnext; 434 conn_t *connext; 435 436 ASSERT(MUTEX_HELD(&uf->uf_lock)); 437 ASSERT(udp->udp_ptpbhn == NULL); 438 udpp = &uf->uf_udp; 439 udpnext = udpp[0]; 440 if (udpnext != NULL) { 441 /* 442 * If the new udp bound to the INADDR_ANY address 443 * and the first one in the list is not bound to 444 * INADDR_ANY we skip all entries until we find the 445 * first one bound to INADDR_ANY. 446 * This makes sure that applications binding to a 447 * specific address get preference over those binding to 448 * INADDR_ANY. 449 */ 450 connext = udpnext->udp_connp; 451 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 452 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 453 while ((udpnext = udpp[0]) != NULL && 454 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 455 udpp = &(udpnext->udp_bind_hash); 456 } 457 if (udpnext != NULL) 458 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 459 } else { 460 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 461 } 462 } 463 udp->udp_bind_hash = udpnext; 464 udp->udp_ptpbhn = udpp; 465 udpp[0] = udp; 466 } 467 468 /* 469 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 470 * passed to udp_wput. 471 * It associates a port number and local address with the stream. 472 * It calls IP to verify the local IP address, and calls IP to insert 473 * the conn_t in the fanout table. 474 * If everything is ok it then sends the T_BIND_ACK back up. 475 * 476 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 477 * without setting SO_REUSEADDR. This is needed so that they 478 * can be viewed as two independent transport protocols. 479 * However, anonymouns ports are allocated from the same range to avoid 480 * duplicating the us->us_next_port_to_try. 481 */ 482 static void 483 udp_tpi_bind(queue_t *q, mblk_t *mp) 484 { 485 sin_t *sin; 486 sin6_t *sin6; 487 mblk_t *mp1; 488 struct T_bind_req *tbr; 489 conn_t *connp; 490 udp_t *udp; 491 int error; 492 struct sockaddr *sa; 493 cred_t *cr; 494 495 /* 496 * All Solaris components should pass a db_credp 497 * for this TPI message, hence we ASSERT. 498 * But in case there is some other M_PROTO that looks 499 * like a TPI message sent by some other kernel 500 * component, we check and return an error. 501 */ 502 cr = msg_getcred(mp, NULL); 503 ASSERT(cr != NULL); 504 if (cr == NULL) { 505 udp_err_ack(q, mp, TSYSERR, EINVAL); 506 return; 507 } 508 509 connp = Q_TO_CONN(q); 510 udp = connp->conn_udp; 511 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 512 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 513 "udp_bind: bad req, len %u", 514 (uint_t)(mp->b_wptr - mp->b_rptr)); 515 udp_err_ack(q, mp, TPROTO, 0); 516 return; 517 } 518 if (udp->udp_state != TS_UNBND) { 519 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 520 "udp_bind: bad state, %u", udp->udp_state); 521 udp_err_ack(q, mp, TOUTSTATE, 0); 522 return; 523 } 524 /* 525 * Reallocate the message to make sure we have enough room for an 526 * address. 527 */ 528 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 529 if (mp1 == NULL) { 530 udp_err_ack(q, mp, TSYSERR, ENOMEM); 531 return; 532 } 533 534 mp = mp1; 535 536 /* Reset the message type in preparation for shipping it back. */ 537 DB_TYPE(mp) = M_PCPROTO; 538 539 tbr = (struct T_bind_req *)mp->b_rptr; 540 switch (tbr->ADDR_length) { 541 case 0: /* Request for a generic port */ 542 tbr->ADDR_offset = sizeof (struct T_bind_req); 543 if (connp->conn_family == AF_INET) { 544 tbr->ADDR_length = sizeof (sin_t); 545 sin = (sin_t *)&tbr[1]; 546 *sin = sin_null; 547 sin->sin_family = AF_INET; 548 mp->b_wptr = (uchar_t *)&sin[1]; 549 sa = (struct sockaddr *)sin; 550 } else { 551 ASSERT(connp->conn_family == AF_INET6); 552 tbr->ADDR_length = sizeof (sin6_t); 553 sin6 = (sin6_t *)&tbr[1]; 554 *sin6 = sin6_null; 555 sin6->sin6_family = AF_INET6; 556 mp->b_wptr = (uchar_t *)&sin6[1]; 557 sa = (struct sockaddr *)sin6; 558 } 559 break; 560 561 case sizeof (sin_t): /* Complete IPv4 address */ 562 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 563 sizeof (sin_t)); 564 if (sa == NULL || !OK_32PTR((char *)sa)) { 565 udp_err_ack(q, mp, TSYSERR, EINVAL); 566 return; 567 } 568 if (connp->conn_family != AF_INET || 569 sa->sa_family != AF_INET) { 570 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 571 return; 572 } 573 break; 574 575 case sizeof (sin6_t): /* complete IPv6 address */ 576 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 577 sizeof (sin6_t)); 578 if (sa == NULL || !OK_32PTR((char *)sa)) { 579 udp_err_ack(q, mp, TSYSERR, EINVAL); 580 return; 581 } 582 if (connp->conn_family != AF_INET6 || 583 sa->sa_family != AF_INET6) { 584 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 585 return; 586 } 587 break; 588 589 default: /* Invalid request */ 590 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 591 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 592 udp_err_ack(q, mp, TBADADDR, 0); 593 return; 594 } 595 596 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 597 tbr->PRIM_type != O_T_BIND_REQ); 598 599 if (error != 0) { 600 if (error > 0) { 601 udp_err_ack(q, mp, TSYSERR, error); 602 } else { 603 udp_err_ack(q, mp, -error, 0); 604 } 605 } else { 606 tbr->PRIM_type = T_BIND_ACK; 607 qreply(q, mp); 608 } 609 } 610 611 /* 612 * This routine handles each T_CONN_REQ message passed to udp. It 613 * associates a default destination address with the stream. 614 * 615 * After various error checks are completed, udp_connect() lays 616 * the target address and port into the composite header template. 617 * Then we ask IP for information, including a source address if we didn't 618 * already have one. Finally we send up the T_OK_ACK reply message. 619 */ 620 static void 621 udp_tpi_connect(queue_t *q, mblk_t *mp) 622 { 623 conn_t *connp = Q_TO_CONN(q); 624 int error; 625 socklen_t len; 626 struct sockaddr *sa; 627 struct T_conn_req *tcr; 628 cred_t *cr; 629 pid_t pid; 630 /* 631 * All Solaris components should pass a db_credp 632 * for this TPI message, hence we ASSERT. 633 * But in case there is some other M_PROTO that looks 634 * like a TPI message sent by some other kernel 635 * component, we check and return an error. 636 */ 637 cr = msg_getcred(mp, &pid); 638 ASSERT(cr != NULL); 639 if (cr == NULL) { 640 udp_err_ack(q, mp, TSYSERR, EINVAL); 641 return; 642 } 643 644 tcr = (struct T_conn_req *)mp->b_rptr; 645 646 /* A bit of sanity checking */ 647 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 648 udp_err_ack(q, mp, TPROTO, 0); 649 return; 650 } 651 652 if (tcr->OPT_length != 0) { 653 udp_err_ack(q, mp, TBADOPT, 0); 654 return; 655 } 656 657 /* 658 * Determine packet type based on type of address passed in 659 * the request should contain an IPv4 or IPv6 address. 660 * Make sure that address family matches the type of 661 * family of the address passed down. 662 */ 663 len = tcr->DEST_length; 664 switch (tcr->DEST_length) { 665 default: 666 udp_err_ack(q, mp, TBADADDR, 0); 667 return; 668 669 case sizeof (sin_t): 670 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 671 sizeof (sin_t)); 672 break; 673 674 case sizeof (sin6_t): 675 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 676 sizeof (sin6_t)); 677 break; 678 } 679 680 error = proto_verify_ip_addr(connp->conn_family, sa, len); 681 if (error != 0) { 682 udp_err_ack(q, mp, TSYSERR, error); 683 return; 684 } 685 686 error = udp_do_connect(connp, sa, len, cr, pid); 687 if (error != 0) { 688 if (error < 0) 689 udp_err_ack(q, mp, -error, 0); 690 else 691 udp_err_ack(q, mp, TSYSERR, error); 692 } else { 693 mblk_t *mp1; 694 /* 695 * We have to send a connection confirmation to 696 * keep TLI happy. 697 */ 698 if (connp->conn_family == AF_INET) { 699 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 700 sizeof (sin_t), NULL, 0); 701 } else { 702 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 703 sizeof (sin6_t), NULL, 0); 704 } 705 if (mp1 == NULL) { 706 udp_err_ack(q, mp, TSYSERR, ENOMEM); 707 return; 708 } 709 710 /* 711 * Send ok_ack for T_CONN_REQ 712 */ 713 mp = mi_tpi_ok_ack_alloc(mp); 714 if (mp == NULL) { 715 /* Unable to reuse the T_CONN_REQ for the ack. */ 716 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 717 return; 718 } 719 720 putnext(connp->conn_rq, mp); 721 putnext(connp->conn_rq, mp1); 722 } 723 } 724 725 static int 726 udp_tpi_close(queue_t *q, int flags) 727 { 728 conn_t *connp; 729 730 if (flags & SO_FALLBACK) { 731 /* 732 * stream is being closed while in fallback 733 * simply free the resources that were allocated 734 */ 735 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 736 qprocsoff(q); 737 goto done; 738 } 739 740 connp = Q_TO_CONN(q); 741 udp_do_close(connp); 742 done: 743 q->q_ptr = WR(q)->q_ptr = NULL; 744 return (0); 745 } 746 747 static void 748 udp_close_free(conn_t *connp) 749 { 750 udp_t *udp = connp->conn_udp; 751 752 /* If there are any options associated with the stream, free them. */ 753 if (udp->udp_recv_ipp.ipp_fields != 0) 754 ip_pkt_free(&udp->udp_recv_ipp); 755 756 /* 757 * Clear any fields which the kmem_cache constructor clears. 758 * Only udp_connp needs to be preserved. 759 * TBD: We should make this more efficient to avoid clearing 760 * everything. 761 */ 762 ASSERT(udp->udp_connp == connp); 763 bzero(udp, sizeof (udp_t)); 764 udp->udp_connp = connp; 765 } 766 767 static int 768 udp_do_disconnect(conn_t *connp) 769 { 770 udp_t *udp; 771 udp_fanout_t *udpf; 772 udp_stack_t *us; 773 int error; 774 775 udp = connp->conn_udp; 776 us = udp->udp_us; 777 mutex_enter(&connp->conn_lock); 778 if (udp->udp_state != TS_DATA_XFER) { 779 mutex_exit(&connp->conn_lock); 780 return (-TOUTSTATE); 781 } 782 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 783 us->us_bind_fanout_size)]; 784 mutex_enter(&udpf->uf_lock); 785 if (connp->conn_mcbc_bind) 786 connp->conn_saddr_v6 = ipv6_all_zeros; 787 else 788 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 789 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 790 connp->conn_faddr_v6 = ipv6_all_zeros; 791 connp->conn_fport = 0; 792 udp->udp_state = TS_IDLE; 793 mutex_exit(&udpf->uf_lock); 794 795 /* Remove any remnants of mapped address binding */ 796 if (connp->conn_family == AF_INET6) 797 connp->conn_ipversion = IPV6_VERSION; 798 799 connp->conn_v6lastdst = ipv6_all_zeros; 800 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 801 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 802 mutex_exit(&connp->conn_lock); 803 if (error != 0) 804 return (error); 805 806 /* 807 * Tell IP to remove the full binding and revert 808 * to the local address binding. 809 */ 810 return (ip_laddr_fanout_insert(connp)); 811 } 812 813 static void 814 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 815 { 816 conn_t *connp = Q_TO_CONN(q); 817 int error; 818 819 /* 820 * Allocate the largest primitive we need to send back 821 * T_error_ack is > than T_ok_ack 822 */ 823 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 824 if (mp == NULL) { 825 /* Unable to reuse the T_DISCON_REQ for the ack. */ 826 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 827 return; 828 } 829 830 error = udp_do_disconnect(connp); 831 832 if (error != 0) { 833 if (error < 0) { 834 udp_err_ack(q, mp, -error, 0); 835 } else { 836 udp_err_ack(q, mp, TSYSERR, error); 837 } 838 } else { 839 mp = mi_tpi_ok_ack_alloc(mp); 840 ASSERT(mp != NULL); 841 qreply(q, mp); 842 } 843 } 844 845 int 846 udp_disconnect(conn_t *connp) 847 { 848 int error; 849 850 connp->conn_dgram_errind = B_FALSE; 851 error = udp_do_disconnect(connp); 852 if (error < 0) 853 error = proto_tlitosyserr(-error); 854 855 return (error); 856 } 857 858 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 859 static void 860 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 861 { 862 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 863 qreply(q, mp); 864 } 865 866 /* Shorthand to generate and send TPI error acks to our client */ 867 static void 868 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 869 t_scalar_t t_error, int sys_error) 870 { 871 struct T_error_ack *teackp; 872 873 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 874 M_PCPROTO, T_ERROR_ACK)) != NULL) { 875 teackp = (struct T_error_ack *)mp->b_rptr; 876 teackp->ERROR_prim = primitive; 877 teackp->TLI_error = t_error; 878 teackp->UNIX_error = sys_error; 879 qreply(q, mp); 880 } 881 } 882 883 /* At minimum we need 4 bytes of UDP header */ 884 #define ICMP_MIN_UDP_HDR 4 885 886 /* 887 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 888 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 889 * Assumes that IP has pulled up everything up to and including the ICMP header. 890 */ 891 /* ARGSUSED2 */ 892 static void 893 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 894 { 895 conn_t *connp = (conn_t *)arg1; 896 icmph_t *icmph; 897 ipha_t *ipha; 898 int iph_hdr_length; 899 udpha_t *udpha; 900 sin_t sin; 901 sin6_t sin6; 902 mblk_t *mp1; 903 int error = 0; 904 udp_t *udp = connp->conn_udp; 905 906 ipha = (ipha_t *)mp->b_rptr; 907 908 ASSERT(OK_32PTR(mp->b_rptr)); 909 910 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 911 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 912 udp_icmp_error_ipv6(connp, mp, ira); 913 return; 914 } 915 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 916 917 /* Skip past the outer IP and ICMP headers */ 918 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 919 iph_hdr_length = ira->ira_ip_hdr_length; 920 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 921 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 922 923 /* Skip past the inner IP and find the ULP header */ 924 iph_hdr_length = IPH_HDR_LENGTH(ipha); 925 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 926 927 switch (icmph->icmph_type) { 928 case ICMP_DEST_UNREACHABLE: 929 switch (icmph->icmph_code) { 930 case ICMP_FRAGMENTATION_NEEDED: { 931 ipha_t *ipha; 932 ip_xmit_attr_t *ixa; 933 /* 934 * IP has already adjusted the path MTU. 935 * But we need to adjust DF for IPv4. 936 */ 937 if (connp->conn_ipversion != IPV4_VERSION) 938 break; 939 940 ixa = conn_get_ixa(connp, B_FALSE); 941 if (ixa == NULL || ixa->ixa_ire == NULL) { 942 /* 943 * Some other thread holds conn_ixa. We will 944 * redo this on the next ICMP too big. 945 */ 946 if (ixa != NULL) 947 ixa_refrele(ixa); 948 break; 949 } 950 (void) ip_get_pmtu(ixa); 951 952 mutex_enter(&connp->conn_lock); 953 ipha = (ipha_t *)connp->conn_ht_iphc; 954 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 955 ipha->ipha_fragment_offset_and_flags |= 956 IPH_DF_HTONS; 957 } else { 958 ipha->ipha_fragment_offset_and_flags &= 959 ~IPH_DF_HTONS; 960 } 961 mutex_exit(&connp->conn_lock); 962 ixa_refrele(ixa); 963 break; 964 } 965 case ICMP_PORT_UNREACHABLE: 966 case ICMP_PROTOCOL_UNREACHABLE: 967 error = ECONNREFUSED; 968 break; 969 default: 970 /* Transient errors */ 971 break; 972 } 973 break; 974 default: 975 /* Transient errors */ 976 break; 977 } 978 if (error == 0) { 979 freemsg(mp); 980 return; 981 } 982 983 /* 984 * Deliver T_UDERROR_IND when the application has asked for it. 985 * The socket layer enables this automatically when connected. 986 */ 987 if (!connp->conn_dgram_errind) { 988 freemsg(mp); 989 return; 990 } 991 992 switch (connp->conn_family) { 993 case AF_INET: 994 sin = sin_null; 995 sin.sin_family = AF_INET; 996 sin.sin_addr.s_addr = ipha->ipha_dst; 997 sin.sin_port = udpha->uha_dst_port; 998 if (IPCL_IS_NONSTR(connp)) { 999 mutex_enter(&connp->conn_lock); 1000 if (udp->udp_state == TS_DATA_XFER) { 1001 if (sin.sin_port == connp->conn_fport && 1002 sin.sin_addr.s_addr == 1003 connp->conn_faddr_v4) { 1004 mutex_exit(&connp->conn_lock); 1005 (*connp->conn_upcalls->su_set_error) 1006 (connp->conn_upper_handle, error); 1007 goto done; 1008 } 1009 } else { 1010 udp->udp_delayed_error = error; 1011 *((sin_t *)&udp->udp_delayed_addr) = sin; 1012 } 1013 mutex_exit(&connp->conn_lock); 1014 } else { 1015 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1016 NULL, 0, error); 1017 if (mp1 != NULL) 1018 putnext(connp->conn_rq, mp1); 1019 } 1020 break; 1021 case AF_INET6: 1022 sin6 = sin6_null; 1023 sin6.sin6_family = AF_INET6; 1024 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1025 sin6.sin6_port = udpha->uha_dst_port; 1026 if (IPCL_IS_NONSTR(connp)) { 1027 mutex_enter(&connp->conn_lock); 1028 if (udp->udp_state == TS_DATA_XFER) { 1029 if (sin6.sin6_port == connp->conn_fport && 1030 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1031 &connp->conn_faddr_v6)) { 1032 mutex_exit(&connp->conn_lock); 1033 (*connp->conn_upcalls->su_set_error) 1034 (connp->conn_upper_handle, error); 1035 goto done; 1036 } 1037 } else { 1038 udp->udp_delayed_error = error; 1039 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1040 } 1041 mutex_exit(&connp->conn_lock); 1042 } else { 1043 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1044 NULL, 0, error); 1045 if (mp1 != NULL) 1046 putnext(connp->conn_rq, mp1); 1047 } 1048 break; 1049 } 1050 done: 1051 freemsg(mp); 1052 } 1053 1054 /* 1055 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1056 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1057 * Assumes that IP has pulled up all the extension headers as well as the 1058 * ICMPv6 header. 1059 */ 1060 static void 1061 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1062 { 1063 icmp6_t *icmp6; 1064 ip6_t *ip6h, *outer_ip6h; 1065 uint16_t iph_hdr_length; 1066 uint8_t *nexthdrp; 1067 udpha_t *udpha; 1068 sin6_t sin6; 1069 mblk_t *mp1; 1070 int error = 0; 1071 udp_t *udp = connp->conn_udp; 1072 udp_stack_t *us = udp->udp_us; 1073 1074 outer_ip6h = (ip6_t *)mp->b_rptr; 1075 #ifdef DEBUG 1076 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1077 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1078 else 1079 iph_hdr_length = IPV6_HDR_LEN; 1080 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1081 #endif 1082 /* Skip past the outer IP and ICMP headers */ 1083 iph_hdr_length = ira->ira_ip_hdr_length; 1084 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1085 1086 /* Skip past the inner IP and find the ULP header */ 1087 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1088 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1089 freemsg(mp); 1090 return; 1091 } 1092 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1093 1094 switch (icmp6->icmp6_type) { 1095 case ICMP6_DST_UNREACH: 1096 switch (icmp6->icmp6_code) { 1097 case ICMP6_DST_UNREACH_NOPORT: 1098 error = ECONNREFUSED; 1099 break; 1100 case ICMP6_DST_UNREACH_ADMIN: 1101 case ICMP6_DST_UNREACH_NOROUTE: 1102 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1103 case ICMP6_DST_UNREACH_ADDR: 1104 /* Transient errors */ 1105 break; 1106 default: 1107 break; 1108 } 1109 break; 1110 case ICMP6_PACKET_TOO_BIG: { 1111 struct T_unitdata_ind *tudi; 1112 struct T_opthdr *toh; 1113 size_t udi_size; 1114 mblk_t *newmp; 1115 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1116 sizeof (struct ip6_mtuinfo); 1117 sin6_t *sin6; 1118 struct ip6_mtuinfo *mtuinfo; 1119 1120 /* 1121 * If the application has requested to receive path mtu 1122 * information, send up an empty message containing an 1123 * IPV6_PATHMTU ancillary data item. 1124 */ 1125 if (!connp->conn_ipv6_recvpathmtu) 1126 break; 1127 1128 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1129 opt_length; 1130 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1131 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1132 break; 1133 } 1134 1135 /* 1136 * newmp->b_cont is left to NULL on purpose. This is an 1137 * empty message containing only ancillary data. 1138 */ 1139 newmp->b_datap->db_type = M_PROTO; 1140 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1141 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1142 tudi->PRIM_type = T_UNITDATA_IND; 1143 tudi->SRC_length = sizeof (sin6_t); 1144 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1145 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1146 tudi->OPT_length = opt_length; 1147 1148 sin6 = (sin6_t *)&tudi[1]; 1149 bzero(sin6, sizeof (sin6_t)); 1150 sin6->sin6_family = AF_INET6; 1151 sin6->sin6_addr = connp->conn_faddr_v6; 1152 1153 toh = (struct T_opthdr *)&sin6[1]; 1154 toh->level = IPPROTO_IPV6; 1155 toh->name = IPV6_PATHMTU; 1156 toh->len = opt_length; 1157 toh->status = 0; 1158 1159 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1160 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1161 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1162 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1163 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1164 /* 1165 * We've consumed everything we need from the original 1166 * message. Free it, then send our empty message. 1167 */ 1168 freemsg(mp); 1169 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1170 return; 1171 } 1172 case ICMP6_TIME_EXCEEDED: 1173 /* Transient errors */ 1174 break; 1175 case ICMP6_PARAM_PROB: 1176 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1177 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1178 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1179 (uchar_t *)nexthdrp) { 1180 error = ECONNREFUSED; 1181 break; 1182 } 1183 break; 1184 } 1185 if (error == 0) { 1186 freemsg(mp); 1187 return; 1188 } 1189 1190 /* 1191 * Deliver T_UDERROR_IND when the application has asked for it. 1192 * The socket layer enables this automatically when connected. 1193 */ 1194 if (!connp->conn_dgram_errind) { 1195 freemsg(mp); 1196 return; 1197 } 1198 1199 sin6 = sin6_null; 1200 sin6.sin6_family = AF_INET6; 1201 sin6.sin6_addr = ip6h->ip6_dst; 1202 sin6.sin6_port = udpha->uha_dst_port; 1203 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1204 1205 if (IPCL_IS_NONSTR(connp)) { 1206 mutex_enter(&connp->conn_lock); 1207 if (udp->udp_state == TS_DATA_XFER) { 1208 if (sin6.sin6_port == connp->conn_fport && 1209 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1210 &connp->conn_faddr_v6)) { 1211 mutex_exit(&connp->conn_lock); 1212 (*connp->conn_upcalls->su_set_error) 1213 (connp->conn_upper_handle, error); 1214 goto done; 1215 } 1216 } else { 1217 udp->udp_delayed_error = error; 1218 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1219 } 1220 mutex_exit(&connp->conn_lock); 1221 } else { 1222 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1223 NULL, 0, error); 1224 if (mp1 != NULL) 1225 putnext(connp->conn_rq, mp1); 1226 } 1227 done: 1228 freemsg(mp); 1229 } 1230 1231 /* 1232 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1233 * The local address is filled in if endpoint is bound. The remote address 1234 * is filled in if remote address has been precified ("connected endpoint") 1235 * (The concept of connected CLTS sockets is alien to published TPI 1236 * but we support it anyway). 1237 */ 1238 static void 1239 udp_addr_req(queue_t *q, mblk_t *mp) 1240 { 1241 struct sockaddr *sa; 1242 mblk_t *ackmp; 1243 struct T_addr_ack *taa; 1244 udp_t *udp = Q_TO_UDP(q); 1245 conn_t *connp = udp->udp_connp; 1246 uint_t addrlen; 1247 1248 /* Make it large enough for worst case */ 1249 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1250 2 * sizeof (sin6_t), 1); 1251 if (ackmp == NULL) { 1252 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1253 return; 1254 } 1255 taa = (struct T_addr_ack *)ackmp->b_rptr; 1256 1257 bzero(taa, sizeof (struct T_addr_ack)); 1258 ackmp->b_wptr = (uchar_t *)&taa[1]; 1259 1260 taa->PRIM_type = T_ADDR_ACK; 1261 ackmp->b_datap->db_type = M_PCPROTO; 1262 1263 if (connp->conn_family == AF_INET) 1264 addrlen = sizeof (sin_t); 1265 else 1266 addrlen = sizeof (sin6_t); 1267 1268 mutex_enter(&connp->conn_lock); 1269 /* 1270 * Note: Following code assumes 32 bit alignment of basic 1271 * data structures like sin_t and struct T_addr_ack. 1272 */ 1273 if (udp->udp_state != TS_UNBND) { 1274 /* 1275 * Fill in local address first 1276 */ 1277 taa->LOCADDR_offset = sizeof (*taa); 1278 taa->LOCADDR_length = addrlen; 1279 sa = (struct sockaddr *)&taa[1]; 1280 (void) conn_getsockname(connp, sa, &addrlen); 1281 ackmp->b_wptr += addrlen; 1282 } 1283 if (udp->udp_state == TS_DATA_XFER) { 1284 /* 1285 * connected, fill remote address too 1286 */ 1287 taa->REMADDR_length = addrlen; 1288 /* assumed 32-bit alignment */ 1289 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1290 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1291 (void) conn_getpeername(connp, sa, &addrlen); 1292 ackmp->b_wptr += addrlen; 1293 } 1294 mutex_exit(&connp->conn_lock); 1295 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1296 qreply(q, ackmp); 1297 } 1298 1299 static void 1300 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1301 { 1302 conn_t *connp = udp->udp_connp; 1303 1304 if (connp->conn_family == AF_INET) { 1305 *tap = udp_g_t_info_ack_ipv4; 1306 } else { 1307 *tap = udp_g_t_info_ack_ipv6; 1308 } 1309 tap->CURRENT_state = udp->udp_state; 1310 tap->OPT_size = udp_max_optsize; 1311 } 1312 1313 static void 1314 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1315 t_uscalar_t cap_bits1) 1316 { 1317 tcap->CAP_bits1 = 0; 1318 1319 if (cap_bits1 & TC1_INFO) { 1320 udp_copy_info(&tcap->INFO_ack, udp); 1321 tcap->CAP_bits1 |= TC1_INFO; 1322 } 1323 } 1324 1325 /* 1326 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1327 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1328 * udp_g_t_info_ack. The current state of the stream is copied from 1329 * udp_state. 1330 */ 1331 static void 1332 udp_capability_req(queue_t *q, mblk_t *mp) 1333 { 1334 t_uscalar_t cap_bits1; 1335 struct T_capability_ack *tcap; 1336 udp_t *udp = Q_TO_UDP(q); 1337 1338 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1339 1340 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1341 mp->b_datap->db_type, T_CAPABILITY_ACK); 1342 if (!mp) 1343 return; 1344 1345 tcap = (struct T_capability_ack *)mp->b_rptr; 1346 udp_do_capability_ack(udp, tcap, cap_bits1); 1347 1348 qreply(q, mp); 1349 } 1350 1351 /* 1352 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1353 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1354 * The current state of the stream is copied from udp_state. 1355 */ 1356 static void 1357 udp_info_req(queue_t *q, mblk_t *mp) 1358 { 1359 udp_t *udp = Q_TO_UDP(q); 1360 1361 /* Create a T_INFO_ACK message. */ 1362 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1363 T_INFO_ACK); 1364 if (!mp) 1365 return; 1366 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1367 qreply(q, mp); 1368 } 1369 1370 /* For /dev/udp aka AF_INET open */ 1371 static int 1372 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1373 { 1374 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1375 } 1376 1377 /* For /dev/udp6 aka AF_INET6 open */ 1378 static int 1379 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1380 { 1381 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1382 } 1383 1384 /* 1385 * This is the open routine for udp. It allocates a udp_t structure for 1386 * the stream and, on the first open of the module, creates an ND table. 1387 */ 1388 static int 1389 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1390 boolean_t isv6) 1391 { 1392 udp_t *udp; 1393 conn_t *connp; 1394 dev_t conn_dev; 1395 vmem_t *minor_arena; 1396 int err; 1397 1398 /* If the stream is already open, return immediately. */ 1399 if (q->q_ptr != NULL) 1400 return (0); 1401 1402 if (sflag == MODOPEN) 1403 return (EINVAL); 1404 1405 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1406 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1407 minor_arena = ip_minor_arena_la; 1408 } else { 1409 /* 1410 * Either minor numbers in the large arena were exhausted 1411 * or a non socket application is doing the open. 1412 * Try to allocate from the small arena. 1413 */ 1414 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1415 return (EBUSY); 1416 1417 minor_arena = ip_minor_arena_sa; 1418 } 1419 1420 if (flag & SO_FALLBACK) { 1421 /* 1422 * Non streams socket needs a stream to fallback to 1423 */ 1424 RD(q)->q_ptr = (void *)conn_dev; 1425 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1426 WR(q)->q_ptr = (void *)minor_arena; 1427 qprocson(q); 1428 return (0); 1429 } 1430 1431 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1432 if (connp == NULL) { 1433 inet_minor_free(minor_arena, conn_dev); 1434 return (err); 1435 } 1436 udp = connp->conn_udp; 1437 1438 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1439 connp->conn_dev = conn_dev; 1440 connp->conn_minor_arena = minor_arena; 1441 1442 /* 1443 * Initialize the udp_t structure for this stream. 1444 */ 1445 q->q_ptr = connp; 1446 WR(q)->q_ptr = connp; 1447 connp->conn_rq = q; 1448 connp->conn_wq = WR(q); 1449 1450 /* 1451 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1452 * need to lock anything. 1453 */ 1454 ASSERT(connp->conn_proto == IPPROTO_UDP); 1455 ASSERT(connp->conn_udp == udp); 1456 ASSERT(udp->udp_connp == connp); 1457 1458 if (flag & SO_SOCKSTR) { 1459 udp->udp_issocket = B_TRUE; 1460 } 1461 1462 WR(q)->q_hiwat = connp->conn_sndbuf; 1463 WR(q)->q_lowat = connp->conn_sndlowat; 1464 1465 qprocson(q); 1466 1467 /* Set the Stream head write offset and high watermark. */ 1468 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1469 (void) proto_set_rx_hiwat(q, connp, 1470 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1471 1472 mutex_enter(&connp->conn_lock); 1473 connp->conn_state_flags &= ~CONN_INCIPIENT; 1474 mutex_exit(&connp->conn_lock); 1475 return (0); 1476 } 1477 1478 /* 1479 * Which UDP options OK to set through T_UNITDATA_REQ... 1480 */ 1481 /* ARGSUSED */ 1482 static boolean_t 1483 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1484 { 1485 return (B_TRUE); 1486 } 1487 1488 /* 1489 * This routine gets default values of certain options whose default 1490 * values are maintained by protcol specific code 1491 */ 1492 int 1493 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1494 { 1495 udp_t *udp = Q_TO_UDP(q); 1496 udp_stack_t *us = udp->udp_us; 1497 int *i1 = (int *)ptr; 1498 1499 switch (level) { 1500 case IPPROTO_IP: 1501 switch (name) { 1502 case IP_MULTICAST_TTL: 1503 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1504 return (sizeof (uchar_t)); 1505 case IP_MULTICAST_LOOP: 1506 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1507 return (sizeof (uchar_t)); 1508 } 1509 break; 1510 case IPPROTO_IPV6: 1511 switch (name) { 1512 case IPV6_MULTICAST_HOPS: 1513 *i1 = IP_DEFAULT_MULTICAST_TTL; 1514 return (sizeof (int)); 1515 case IPV6_MULTICAST_LOOP: 1516 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1517 return (sizeof (int)); 1518 case IPV6_UNICAST_HOPS: 1519 *i1 = us->us_ipv6_hoplimit; 1520 return (sizeof (int)); 1521 } 1522 break; 1523 } 1524 return (-1); 1525 } 1526 1527 /* 1528 * This routine retrieves the current status of socket options. 1529 * It returns the size of the option retrieved, or -1. 1530 */ 1531 int 1532 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1533 uchar_t *ptr) 1534 { 1535 int *i1 = (int *)ptr; 1536 udp_t *udp = connp->conn_udp; 1537 int len; 1538 conn_opt_arg_t coas; 1539 int retval; 1540 1541 coas.coa_connp = connp; 1542 coas.coa_ixa = connp->conn_ixa; 1543 coas.coa_ipp = &connp->conn_xmit_ipp; 1544 coas.coa_ancillary = B_FALSE; 1545 coas.coa_changed = 0; 1546 1547 /* 1548 * We assume that the optcom framework has checked for the set 1549 * of levels and names that are supported, hence we don't worry 1550 * about rejecting based on that. 1551 * First check for UDP specific handling, then pass to common routine. 1552 */ 1553 switch (level) { 1554 case IPPROTO_IP: 1555 /* 1556 * Only allow IPv4 option processing on IPv4 sockets. 1557 */ 1558 if (connp->conn_family != AF_INET) 1559 return (-1); 1560 1561 switch (name) { 1562 case IP_OPTIONS: 1563 case T_IP_OPTIONS: 1564 mutex_enter(&connp->conn_lock); 1565 if (!(udp->udp_recv_ipp.ipp_fields & 1566 IPPF_IPV4_OPTIONS)) { 1567 mutex_exit(&connp->conn_lock); 1568 return (0); 1569 } 1570 1571 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1572 ASSERT(len != 0); 1573 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1574 mutex_exit(&connp->conn_lock); 1575 return (len); 1576 } 1577 break; 1578 case IPPROTO_UDP: 1579 switch (name) { 1580 case UDP_NAT_T_ENDPOINT: 1581 mutex_enter(&connp->conn_lock); 1582 *i1 = udp->udp_nat_t_endpoint; 1583 mutex_exit(&connp->conn_lock); 1584 return (sizeof (int)); 1585 case UDP_RCVHDR: 1586 mutex_enter(&connp->conn_lock); 1587 *i1 = udp->udp_rcvhdr ? 1 : 0; 1588 mutex_exit(&connp->conn_lock); 1589 return (sizeof (int)); 1590 } 1591 } 1592 mutex_enter(&connp->conn_lock); 1593 retval = conn_opt_get(&coas, level, name, ptr); 1594 mutex_exit(&connp->conn_lock); 1595 return (retval); 1596 } 1597 1598 /* 1599 * This routine retrieves the current status of socket options. 1600 * It returns the size of the option retrieved, or -1. 1601 */ 1602 int 1603 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1604 { 1605 conn_t *connp = Q_TO_CONN(q); 1606 int err; 1607 1608 err = udp_opt_get(connp, level, name, ptr); 1609 return (err); 1610 } 1611 1612 /* 1613 * This routine sets socket options. 1614 */ 1615 int 1616 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1617 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1618 { 1619 conn_t *connp = coa->coa_connp; 1620 ip_xmit_attr_t *ixa = coa->coa_ixa; 1621 udp_t *udp = connp->conn_udp; 1622 udp_stack_t *us = udp->udp_us; 1623 int *i1 = (int *)invalp; 1624 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1625 int error; 1626 1627 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1628 /* 1629 * First do UDP specific sanity checks and handle UDP specific 1630 * options. Note that some IPPROTO_UDP options are handled 1631 * by conn_opt_set. 1632 */ 1633 switch (level) { 1634 case SOL_SOCKET: 1635 switch (name) { 1636 case SO_SNDBUF: 1637 if (*i1 > us->us_max_buf) { 1638 return (ENOBUFS); 1639 } 1640 break; 1641 case SO_RCVBUF: 1642 if (*i1 > us->us_max_buf) { 1643 return (ENOBUFS); 1644 } 1645 break; 1646 1647 case SCM_UCRED: { 1648 struct ucred_s *ucr; 1649 cred_t *newcr; 1650 ts_label_t *tsl; 1651 1652 /* 1653 * Only sockets that have proper privileges and are 1654 * bound to MLPs will have any other value here, so 1655 * this implicitly tests for privilege to set label. 1656 */ 1657 if (connp->conn_mlp_type == mlptSingle) 1658 break; 1659 1660 ucr = (struct ucred_s *)invalp; 1661 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1662 ucr->uc_labeloff < sizeof (*ucr) || 1663 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1664 return (EINVAL); 1665 if (!checkonly) { 1666 /* 1667 * Set ixa_tsl to the new label. 1668 * We assume that crgetzoneid doesn't change 1669 * as part of the SCM_UCRED. 1670 */ 1671 ASSERT(cr != NULL); 1672 if ((tsl = crgetlabel(cr)) == NULL) 1673 return (EINVAL); 1674 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1675 tsl->tsl_doi, KM_NOSLEEP); 1676 if (newcr == NULL) 1677 return (ENOSR); 1678 ASSERT(newcr->cr_label != NULL); 1679 /* 1680 * Move the hold on the cr_label to ixa_tsl by 1681 * setting cr_label to NULL. Then release newcr. 1682 */ 1683 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1684 ixa->ixa_flags |= IXAF_UCRED_TSL; 1685 newcr->cr_label = NULL; 1686 crfree(newcr); 1687 coa->coa_changed |= COA_HEADER_CHANGED; 1688 coa->coa_changed |= COA_WROFF_CHANGED; 1689 } 1690 /* Fully handled this option. */ 1691 return (0); 1692 } 1693 } 1694 break; 1695 case IPPROTO_UDP: 1696 switch (name) { 1697 case UDP_NAT_T_ENDPOINT: 1698 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1699 return (error); 1700 } 1701 1702 /* 1703 * Use conn_family instead so we can avoid ambiguitites 1704 * with AF_INET6 sockets that may switch from IPv4 1705 * to IPv6. 1706 */ 1707 if (connp->conn_family != AF_INET) { 1708 return (EAFNOSUPPORT); 1709 } 1710 1711 if (!checkonly) { 1712 mutex_enter(&connp->conn_lock); 1713 udp->udp_nat_t_endpoint = onoff; 1714 mutex_exit(&connp->conn_lock); 1715 coa->coa_changed |= COA_HEADER_CHANGED; 1716 coa->coa_changed |= COA_WROFF_CHANGED; 1717 } 1718 /* Fully handled this option. */ 1719 return (0); 1720 case UDP_RCVHDR: 1721 mutex_enter(&connp->conn_lock); 1722 udp->udp_rcvhdr = onoff; 1723 mutex_exit(&connp->conn_lock); 1724 return (0); 1725 } 1726 break; 1727 } 1728 error = conn_opt_set(coa, level, name, inlen, invalp, 1729 checkonly, cr); 1730 return (error); 1731 } 1732 1733 /* 1734 * This routine sets socket options. 1735 */ 1736 int 1737 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1738 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1739 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1740 { 1741 udp_t *udp = connp->conn_udp; 1742 int err; 1743 conn_opt_arg_t coas, *coa; 1744 boolean_t checkonly; 1745 udp_stack_t *us = udp->udp_us; 1746 1747 switch (optset_context) { 1748 case SETFN_OPTCOM_CHECKONLY: 1749 checkonly = B_TRUE; 1750 /* 1751 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1752 * inlen != 0 implies value supplied and 1753 * we have to "pretend" to set it. 1754 * inlen == 0 implies that there is no 1755 * value part in T_CHECK request and just validation 1756 * done elsewhere should be enough, we just return here. 1757 */ 1758 if (inlen == 0) { 1759 *outlenp = 0; 1760 return (0); 1761 } 1762 break; 1763 case SETFN_OPTCOM_NEGOTIATE: 1764 checkonly = B_FALSE; 1765 break; 1766 case SETFN_UD_NEGOTIATE: 1767 case SETFN_CONN_NEGOTIATE: 1768 checkonly = B_FALSE; 1769 /* 1770 * Negotiating local and "association-related" options 1771 * through T_UNITDATA_REQ. 1772 * 1773 * Following routine can filter out ones we do not 1774 * want to be "set" this way. 1775 */ 1776 if (!udp_opt_allow_udr_set(level, name)) { 1777 *outlenp = 0; 1778 return (EINVAL); 1779 } 1780 break; 1781 default: 1782 /* 1783 * We should never get here 1784 */ 1785 *outlenp = 0; 1786 return (EINVAL); 1787 } 1788 1789 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1790 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1791 1792 if (thisdg_attrs != NULL) { 1793 /* Options from T_UNITDATA_REQ */ 1794 coa = (conn_opt_arg_t *)thisdg_attrs; 1795 ASSERT(coa->coa_connp == connp); 1796 ASSERT(coa->coa_ixa != NULL); 1797 ASSERT(coa->coa_ipp != NULL); 1798 ASSERT(coa->coa_ancillary); 1799 } else { 1800 coa = &coas; 1801 coas.coa_connp = connp; 1802 /* Get a reference on conn_ixa to prevent concurrent mods */ 1803 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1804 if (coas.coa_ixa == NULL) { 1805 *outlenp = 0; 1806 return (ENOMEM); 1807 } 1808 coas.coa_ipp = &connp->conn_xmit_ipp; 1809 coas.coa_ancillary = B_FALSE; 1810 coas.coa_changed = 0; 1811 } 1812 1813 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1814 cr, checkonly); 1815 if (err != 0) { 1816 errout: 1817 if (!coa->coa_ancillary) 1818 ixa_refrele(coa->coa_ixa); 1819 *outlenp = 0; 1820 return (err); 1821 } 1822 /* Handle DHCPINIT here outside of lock */ 1823 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1824 uint_t ifindex; 1825 ill_t *ill; 1826 1827 ifindex = *(uint_t *)invalp; 1828 if (ifindex == 0) { 1829 ill = NULL; 1830 } else { 1831 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1832 coa->coa_ixa->ixa_ipst); 1833 if (ill == NULL) { 1834 err = ENXIO; 1835 goto errout; 1836 } 1837 1838 mutex_enter(&ill->ill_lock); 1839 if (ill->ill_state_flags & ILL_CONDEMNED) { 1840 mutex_exit(&ill->ill_lock); 1841 ill_refrele(ill); 1842 err = ENXIO; 1843 goto errout; 1844 } 1845 if (IS_VNI(ill)) { 1846 mutex_exit(&ill->ill_lock); 1847 ill_refrele(ill); 1848 err = EINVAL; 1849 goto errout; 1850 } 1851 } 1852 mutex_enter(&connp->conn_lock); 1853 1854 if (connp->conn_dhcpinit_ill != NULL) { 1855 /* 1856 * We've locked the conn so conn_cleanup_ill() 1857 * cannot clear conn_dhcpinit_ill -- so it's 1858 * safe to access the ill. 1859 */ 1860 ill_t *oill = connp->conn_dhcpinit_ill; 1861 1862 ASSERT(oill->ill_dhcpinit != 0); 1863 atomic_dec_32(&oill->ill_dhcpinit); 1864 ill_set_inputfn(connp->conn_dhcpinit_ill); 1865 connp->conn_dhcpinit_ill = NULL; 1866 } 1867 1868 if (ill != NULL) { 1869 connp->conn_dhcpinit_ill = ill; 1870 atomic_inc_32(&ill->ill_dhcpinit); 1871 ill_set_inputfn(ill); 1872 mutex_exit(&connp->conn_lock); 1873 mutex_exit(&ill->ill_lock); 1874 ill_refrele(ill); 1875 } else { 1876 mutex_exit(&connp->conn_lock); 1877 } 1878 } 1879 1880 /* 1881 * Common case of OK return with outval same as inval. 1882 */ 1883 if (invalp != outvalp) { 1884 /* don't trust bcopy for identical src/dst */ 1885 (void) bcopy(invalp, outvalp, inlen); 1886 } 1887 *outlenp = inlen; 1888 1889 /* 1890 * If this was not ancillary data, then we rebuild the headers, 1891 * update the IRE/NCE, and IPsec as needed. 1892 * Since the label depends on the destination we go through 1893 * ip_set_destination first. 1894 */ 1895 if (coa->coa_ancillary) { 1896 return (0); 1897 } 1898 1899 if (coa->coa_changed & COA_ROUTE_CHANGED) { 1900 in6_addr_t saddr, faddr, nexthop; 1901 in_port_t fport; 1902 1903 /* 1904 * We clear lastdst to make sure we pick up the change 1905 * next time sending. 1906 * If we are connected we re-cache the information. 1907 * We ignore errors to preserve BSD behavior. 1908 * Note that we don't redo IPsec policy lookup here 1909 * since the final destination (or source) didn't change. 1910 */ 1911 mutex_enter(&connp->conn_lock); 1912 connp->conn_v6lastdst = ipv6_all_zeros; 1913 1914 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 1915 &connp->conn_faddr_v6, &nexthop); 1916 saddr = connp->conn_saddr_v6; 1917 faddr = connp->conn_faddr_v6; 1918 fport = connp->conn_fport; 1919 mutex_exit(&connp->conn_lock); 1920 1921 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 1922 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 1923 (void) ip_attr_connect(connp, coa->coa_ixa, 1924 &saddr, &faddr, &nexthop, fport, NULL, NULL, 1925 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 1926 } 1927 } 1928 1929 ixa_refrele(coa->coa_ixa); 1930 1931 if (coa->coa_changed & COA_HEADER_CHANGED) { 1932 /* 1933 * Rebuild the header template if we are connected. 1934 * Otherwise clear conn_v6lastdst so we rebuild the header 1935 * in the data path. 1936 */ 1937 mutex_enter(&connp->conn_lock); 1938 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1939 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1940 err = udp_build_hdr_template(connp, 1941 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 1942 connp->conn_fport, connp->conn_flowinfo); 1943 if (err != 0) { 1944 mutex_exit(&connp->conn_lock); 1945 return (err); 1946 } 1947 } else { 1948 connp->conn_v6lastdst = ipv6_all_zeros; 1949 } 1950 mutex_exit(&connp->conn_lock); 1951 } 1952 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 1953 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 1954 connp->conn_rcvbuf); 1955 } 1956 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 1957 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 1958 } 1959 if (coa->coa_changed & COA_WROFF_CHANGED) { 1960 /* Increase wroff if needed */ 1961 uint_t wroff; 1962 1963 mutex_enter(&connp->conn_lock); 1964 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 1965 if (udp->udp_nat_t_endpoint) 1966 wroff += sizeof (uint32_t); 1967 if (wroff > connp->conn_wroff) { 1968 connp->conn_wroff = wroff; 1969 mutex_exit(&connp->conn_lock); 1970 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 1971 } else { 1972 mutex_exit(&connp->conn_lock); 1973 } 1974 } 1975 return (err); 1976 } 1977 1978 /* This routine sets socket options. */ 1979 int 1980 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1981 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1982 void *thisdg_attrs, cred_t *cr) 1983 { 1984 conn_t *connp = Q_TO_CONN(q); 1985 int error; 1986 1987 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 1988 outlenp, outvalp, thisdg_attrs, cr); 1989 return (error); 1990 } 1991 1992 /* 1993 * Setup IP and UDP headers. 1994 * Returns NULL on allocation failure, in which case data_mp is freed. 1995 */ 1996 mblk_t * 1997 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 1998 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 1999 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2000 { 2001 mblk_t *mp; 2002 udpha_t *udpha; 2003 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2004 uint_t data_len; 2005 uint32_t cksum; 2006 udp_t *udp = connp->conn_udp; 2007 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2008 uint_t ulp_hdr_len; 2009 2010 data_len = msgdsize(data_mp); 2011 ulp_hdr_len = UDPH_SIZE; 2012 if (insert_spi) 2013 ulp_hdr_len += sizeof (uint32_t); 2014 2015 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2016 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2017 if (mp == NULL) { 2018 ASSERT(*errorp != 0); 2019 return (NULL); 2020 } 2021 2022 data_len += ulp_hdr_len; 2023 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2024 2025 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2026 udpha->uha_src_port = connp->conn_lport; 2027 udpha->uha_dst_port = dstport; 2028 udpha->uha_checksum = 0; 2029 udpha->uha_length = htons(data_len); 2030 2031 /* 2032 * If there was a routing option/header then conn_prepend_hdr 2033 * has massaged it and placed the pseudo-header checksum difference 2034 * in the cksum argument. 2035 * 2036 * Setup header length and prepare for ULP checksum done in IP. 2037 * 2038 * We make it easy for IP to include our pseudo header 2039 * by putting our length in uha_checksum. 2040 * The IP source, destination, and length have already been set by 2041 * conn_prepend_hdr. 2042 */ 2043 cksum += data_len; 2044 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2045 ASSERT(cksum < 0x10000); 2046 2047 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2048 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2049 2050 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2051 2052 /* IP does the checksum if uha_checksum is non-zero */ 2053 if (us->us_do_checksum) { 2054 if (cksum == 0) 2055 udpha->uha_checksum = 0xffff; 2056 else 2057 udpha->uha_checksum = htons(cksum); 2058 } else { 2059 udpha->uha_checksum = 0; 2060 } 2061 } else { 2062 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2063 2064 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2065 if (cksum == 0) 2066 udpha->uha_checksum = 0xffff; 2067 else 2068 udpha->uha_checksum = htons(cksum); 2069 } 2070 2071 /* Insert all-0s SPI now. */ 2072 if (insert_spi) 2073 *((uint32_t *)(udpha + 1)) = 0; 2074 2075 return (mp); 2076 } 2077 2078 static int 2079 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2080 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2081 { 2082 udpha_t *udpha; 2083 int error; 2084 2085 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2086 /* 2087 * We clear lastdst to make sure we don't use the lastdst path 2088 * next time sending since we might not have set v6dst yet. 2089 */ 2090 connp->conn_v6lastdst = ipv6_all_zeros; 2091 2092 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2093 flowinfo); 2094 if (error != 0) 2095 return (error); 2096 2097 /* 2098 * Any routing header/option has been massaged. The checksum difference 2099 * is stored in conn_sum. 2100 */ 2101 udpha = (udpha_t *)connp->conn_ht_ulp; 2102 udpha->uha_src_port = connp->conn_lport; 2103 udpha->uha_dst_port = dstport; 2104 udpha->uha_checksum = 0; 2105 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2106 return (0); 2107 } 2108 2109 static mblk_t * 2110 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2111 { 2112 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2113 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2114 /* 2115 * fallback has started but messages have not been moved yet 2116 */ 2117 if (udp->udp_fallback_queue_head == NULL) { 2118 ASSERT(udp->udp_fallback_queue_tail == NULL); 2119 udp->udp_fallback_queue_head = mp; 2120 udp->udp_fallback_queue_tail = mp; 2121 } else { 2122 ASSERT(udp->udp_fallback_queue_tail != NULL); 2123 udp->udp_fallback_queue_tail->b_next = mp; 2124 udp->udp_fallback_queue_tail = mp; 2125 } 2126 return (NULL); 2127 } else { 2128 /* 2129 * Fallback completed, let the caller putnext() the mblk. 2130 */ 2131 return (mp); 2132 } 2133 } 2134 2135 /* 2136 * Deliver data to ULP. In case we have a socket, and it's falling back to 2137 * TPI, then we'll queue the mp for later processing. 2138 */ 2139 static void 2140 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2141 { 2142 if (IPCL_IS_NONSTR(connp)) { 2143 udp_t *udp = connp->conn_udp; 2144 int error; 2145 2146 ASSERT(len == msgdsize(mp)); 2147 if ((*connp->conn_upcalls->su_recv) 2148 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2149 mutex_enter(&udp->udp_recv_lock); 2150 if (error == ENOSPC) { 2151 /* 2152 * let's confirm while holding the lock 2153 */ 2154 if ((*connp->conn_upcalls->su_recv) 2155 (connp->conn_upper_handle, NULL, 0, 0, 2156 &error, NULL) < 0) { 2157 ASSERT(error == ENOSPC); 2158 if (error == ENOSPC) { 2159 connp->conn_flow_cntrld = 2160 B_TRUE; 2161 } 2162 } 2163 mutex_exit(&udp->udp_recv_lock); 2164 } else { 2165 ASSERT(error == EOPNOTSUPP); 2166 mp = udp_queue_fallback(udp, mp); 2167 mutex_exit(&udp->udp_recv_lock); 2168 if (mp != NULL) 2169 putnext(connp->conn_rq, mp); 2170 } 2171 } 2172 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2173 } else { 2174 if (is_system_labeled()) { 2175 ASSERT(ira->ira_cred != NULL); 2176 /* 2177 * Provide for protocols above UDP such as RPC 2178 * NOPID leaves db_cpid unchanged. 2179 */ 2180 mblk_setcred(mp, ira->ira_cred, NOPID); 2181 } 2182 2183 putnext(connp->conn_rq, mp); 2184 } 2185 } 2186 2187 /* 2188 * This is the inbound data path. 2189 * IP has already pulled up the IP plus UDP headers and verified alignment 2190 * etc. 2191 */ 2192 /* ARGSUSED2 */ 2193 static void 2194 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2195 { 2196 conn_t *connp = (conn_t *)arg1; 2197 struct T_unitdata_ind *tudi; 2198 uchar_t *rptr; /* Pointer to IP header */ 2199 int hdr_length; /* Length of IP+UDP headers */ 2200 int udi_size; /* Size of T_unitdata_ind */ 2201 int pkt_len; 2202 udp_t *udp; 2203 udpha_t *udpha; 2204 ip_pkt_t ipps; 2205 ip6_t *ip6h; 2206 mblk_t *mp1; 2207 uint32_t udp_ipv4_options_len; 2208 crb_t recv_ancillary; 2209 udp_stack_t *us; 2210 2211 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2212 2213 udp = connp->conn_udp; 2214 us = udp->udp_us; 2215 rptr = mp->b_rptr; 2216 2217 ASSERT(DB_TYPE(mp) == M_DATA); 2218 ASSERT(OK_32PTR(rptr)); 2219 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2220 pkt_len = ira->ira_pktlen; 2221 2222 /* 2223 * Get a snapshot of these and allow other threads to change 2224 * them after that. We need the same recv_ancillary when determining 2225 * the size as when adding the ancillary data items. 2226 */ 2227 mutex_enter(&connp->conn_lock); 2228 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2229 recv_ancillary = connp->conn_recv_ancillary; 2230 mutex_exit(&connp->conn_lock); 2231 2232 hdr_length = ira->ira_ip_hdr_length; 2233 2234 /* 2235 * IP inspected the UDP header thus all of it must be in the mblk. 2236 * UDP length check is performed for IPv6 packets and IPv4 packets 2237 * to check if the size of the packet as specified 2238 * by the UDP header is the same as the length derived from the IP 2239 * header. 2240 */ 2241 udpha = (udpha_t *)(rptr + hdr_length); 2242 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2243 goto tossit; 2244 2245 hdr_length += UDPH_SIZE; 2246 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2247 2248 /* Initialize regardless of IP version */ 2249 ipps.ipp_fields = 0; 2250 2251 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2252 udp_ipv4_options_len > 0) && 2253 connp->conn_family == AF_INET) { 2254 int err; 2255 2256 /* 2257 * Record/update udp_recv_ipp with the lock 2258 * held. Not needed for AF_INET6 sockets 2259 * since they don't support a getsockopt of IP_OPTIONS. 2260 */ 2261 mutex_enter(&connp->conn_lock); 2262 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2263 B_TRUE); 2264 if (err != 0) { 2265 /* Allocation failed. Drop packet */ 2266 mutex_exit(&connp->conn_lock); 2267 freemsg(mp); 2268 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2269 return; 2270 } 2271 mutex_exit(&connp->conn_lock); 2272 } 2273 2274 if (recv_ancillary.crb_all != 0) { 2275 /* 2276 * Record packet information in the ip_pkt_t 2277 */ 2278 if (ira->ira_flags & IRAF_IS_IPV4) { 2279 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2280 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2281 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2282 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2283 2284 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2285 } else { 2286 uint8_t nexthdrp; 2287 2288 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2289 /* 2290 * IPv6 packets can only be received by applications 2291 * that are prepared to receive IPv6 addresses. 2292 * The IP fanout must ensure this. 2293 */ 2294 ASSERT(connp->conn_family == AF_INET6); 2295 2296 ip6h = (ip6_t *)rptr; 2297 2298 /* We don't care about the length, but need the ipp */ 2299 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2300 &nexthdrp); 2301 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2302 /* Restore */ 2303 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2304 ASSERT(nexthdrp == IPPROTO_UDP); 2305 } 2306 } 2307 2308 /* 2309 * This is the inbound data path. Packets are passed upstream as 2310 * T_UNITDATA_IND messages. 2311 */ 2312 if (connp->conn_family == AF_INET) { 2313 sin_t *sin; 2314 2315 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2316 2317 /* 2318 * Normally only send up the source address. 2319 * If any ancillary data items are wanted we add those. 2320 */ 2321 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2322 if (recv_ancillary.crb_all != 0) { 2323 udi_size += conn_recvancillary_size(connp, 2324 recv_ancillary, ira, mp, &ipps); 2325 } 2326 2327 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2328 mp1 = allocb(udi_size, BPRI_MED); 2329 if (mp1 == NULL) { 2330 freemsg(mp); 2331 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2332 return; 2333 } 2334 mp1->b_cont = mp; 2335 mp1->b_datap->db_type = M_PROTO; 2336 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2337 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2338 tudi->PRIM_type = T_UNITDATA_IND; 2339 tudi->SRC_length = sizeof (sin_t); 2340 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2341 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2342 sizeof (sin_t); 2343 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2344 tudi->OPT_length = udi_size; 2345 sin = (sin_t *)&tudi[1]; 2346 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2347 sin->sin_port = udpha->uha_src_port; 2348 sin->sin_family = connp->conn_family; 2349 *(uint32_t *)&sin->sin_zero[0] = 0; 2350 *(uint32_t *)&sin->sin_zero[4] = 0; 2351 2352 /* 2353 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2354 * IP_RECVTTL has been set. 2355 */ 2356 if (udi_size != 0) { 2357 conn_recvancillary_add(connp, recv_ancillary, ira, 2358 &ipps, (uchar_t *)&sin[1], udi_size); 2359 } 2360 } else { 2361 sin6_t *sin6; 2362 2363 /* 2364 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2365 * 2366 * Normally we only send up the address. If receiving of any 2367 * optional receive side information is enabled, we also send 2368 * that up as options. 2369 */ 2370 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2371 2372 if (recv_ancillary.crb_all != 0) { 2373 udi_size += conn_recvancillary_size(connp, 2374 recv_ancillary, ira, mp, &ipps); 2375 } 2376 2377 mp1 = allocb(udi_size, BPRI_MED); 2378 if (mp1 == NULL) { 2379 freemsg(mp); 2380 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2381 return; 2382 } 2383 mp1->b_cont = mp; 2384 mp1->b_datap->db_type = M_PROTO; 2385 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2386 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2387 tudi->PRIM_type = T_UNITDATA_IND; 2388 tudi->SRC_length = sizeof (sin6_t); 2389 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2390 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2391 sizeof (sin6_t); 2392 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2393 tudi->OPT_length = udi_size; 2394 sin6 = (sin6_t *)&tudi[1]; 2395 if (ira->ira_flags & IRAF_IS_IPV4) { 2396 in6_addr_t v6dst; 2397 2398 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2399 &sin6->sin6_addr); 2400 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2401 &v6dst); 2402 sin6->sin6_flowinfo = 0; 2403 sin6->sin6_scope_id = 0; 2404 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2405 IPCL_ZONEID(connp), us->us_netstack); 2406 } else { 2407 ip6h = (ip6_t *)rptr; 2408 2409 sin6->sin6_addr = ip6h->ip6_src; 2410 /* No sin6_flowinfo per API */ 2411 sin6->sin6_flowinfo = 0; 2412 /* For link-scope pass up scope id */ 2413 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2414 sin6->sin6_scope_id = ira->ira_ruifindex; 2415 else 2416 sin6->sin6_scope_id = 0; 2417 sin6->__sin6_src_id = ip_srcid_find_addr( 2418 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2419 us->us_netstack); 2420 } 2421 sin6->sin6_port = udpha->uha_src_port; 2422 sin6->sin6_family = connp->conn_family; 2423 2424 if (udi_size != 0) { 2425 conn_recvancillary_add(connp, recv_ancillary, ira, 2426 &ipps, (uchar_t *)&sin6[1], udi_size); 2427 } 2428 } 2429 2430 /* 2431 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2432 * loopback traffic). 2433 */ 2434 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2435 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2436 2437 /* Walk past the headers unless IP_RECVHDR was set. */ 2438 if (!udp->udp_rcvhdr) { 2439 mp->b_rptr = rptr + hdr_length; 2440 pkt_len -= hdr_length; 2441 } 2442 2443 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2444 udp_ulp_recv(connp, mp1, pkt_len, ira); 2445 return; 2446 2447 tossit: 2448 freemsg(mp); 2449 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2450 } 2451 2452 /* 2453 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2454 * information that can be changing beneath us. 2455 */ 2456 mblk_t * 2457 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2458 { 2459 mblk_t *mpdata; 2460 mblk_t *mp_conn_ctl; 2461 mblk_t *mp_attr_ctl; 2462 mblk_t *mp6_conn_ctl; 2463 mblk_t *mp6_attr_ctl; 2464 mblk_t *mp_conn_tail; 2465 mblk_t *mp_attr_tail; 2466 mblk_t *mp6_conn_tail; 2467 mblk_t *mp6_attr_tail; 2468 struct opthdr *optp; 2469 mib2_udpEntry_t ude; 2470 mib2_udp6Entry_t ude6; 2471 mib2_transportMLPEntry_t mlp; 2472 int state; 2473 zoneid_t zoneid; 2474 int i; 2475 connf_t *connfp; 2476 conn_t *connp = Q_TO_CONN(q); 2477 int v4_conn_idx; 2478 int v6_conn_idx; 2479 boolean_t needattr; 2480 udp_t *udp; 2481 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2482 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2483 mblk_t *mp2ctl; 2484 2485 /* 2486 * make a copy of the original message 2487 */ 2488 mp2ctl = copymsg(mpctl); 2489 2490 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2491 if (mpctl == NULL || 2492 (mpdata = mpctl->b_cont) == NULL || 2493 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2494 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2495 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2496 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2497 freemsg(mp_conn_ctl); 2498 freemsg(mp_attr_ctl); 2499 freemsg(mp6_conn_ctl); 2500 freemsg(mpctl); 2501 freemsg(mp2ctl); 2502 return (0); 2503 } 2504 2505 zoneid = connp->conn_zoneid; 2506 2507 /* fixed length structure for IPv4 and IPv6 counters */ 2508 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2509 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2510 /* synchronize 64- and 32-bit counters */ 2511 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2512 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2513 2514 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2515 optp->level = MIB2_UDP; 2516 optp->name = 0; 2517 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2518 sizeof (us->us_udp_mib)); 2519 optp->len = msgdsize(mpdata); 2520 qreply(q, mpctl); 2521 2522 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2523 v4_conn_idx = v6_conn_idx = 0; 2524 2525 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2526 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2527 connp = NULL; 2528 2529 while ((connp = ipcl_get_next_conn(connfp, connp, 2530 IPCL_UDPCONN))) { 2531 udp = connp->conn_udp; 2532 if (zoneid != connp->conn_zoneid) 2533 continue; 2534 2535 /* 2536 * Note that the port numbers are sent in 2537 * host byte order 2538 */ 2539 2540 if (udp->udp_state == TS_UNBND) 2541 state = MIB2_UDP_unbound; 2542 else if (udp->udp_state == TS_IDLE) 2543 state = MIB2_UDP_idle; 2544 else if (udp->udp_state == TS_DATA_XFER) 2545 state = MIB2_UDP_connected; 2546 else 2547 state = MIB2_UDP_unknown; 2548 2549 needattr = B_FALSE; 2550 bzero(&mlp, sizeof (mlp)); 2551 if (connp->conn_mlp_type != mlptSingle) { 2552 if (connp->conn_mlp_type == mlptShared || 2553 connp->conn_mlp_type == mlptBoth) 2554 mlp.tme_flags |= MIB2_TMEF_SHARED; 2555 if (connp->conn_mlp_type == mlptPrivate || 2556 connp->conn_mlp_type == mlptBoth) 2557 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2558 needattr = B_TRUE; 2559 } 2560 if (connp->conn_anon_mlp) { 2561 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2562 needattr = B_TRUE; 2563 } 2564 switch (connp->conn_mac_mode) { 2565 case CONN_MAC_DEFAULT: 2566 break; 2567 case CONN_MAC_AWARE: 2568 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2569 needattr = B_TRUE; 2570 break; 2571 case CONN_MAC_IMPLICIT: 2572 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2573 needattr = B_TRUE; 2574 break; 2575 } 2576 mutex_enter(&connp->conn_lock); 2577 if (udp->udp_state == TS_DATA_XFER && 2578 connp->conn_ixa->ixa_tsl != NULL) { 2579 ts_label_t *tsl; 2580 2581 tsl = connp->conn_ixa->ixa_tsl; 2582 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2583 mlp.tme_doi = label2doi(tsl); 2584 mlp.tme_label = *label2bslabel(tsl); 2585 needattr = B_TRUE; 2586 } 2587 mutex_exit(&connp->conn_lock); 2588 2589 /* 2590 * Create an IPv4 table entry for IPv4 entries and also 2591 * any IPv6 entries which are bound to in6addr_any 2592 * (i.e. anything a IPv4 peer could connect/send to). 2593 */ 2594 if (connp->conn_ipversion == IPV4_VERSION || 2595 (udp->udp_state <= TS_IDLE && 2596 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2597 ude.udpEntryInfo.ue_state = state; 2598 /* 2599 * If in6addr_any this will set it to 2600 * INADDR_ANY 2601 */ 2602 ude.udpLocalAddress = connp->conn_laddr_v4; 2603 ude.udpLocalPort = ntohs(connp->conn_lport); 2604 if (udp->udp_state == TS_DATA_XFER) { 2605 /* 2606 * Can potentially get here for 2607 * v6 socket if another process 2608 * (say, ping) has just done a 2609 * sendto(), changing the state 2610 * from the TS_IDLE above to 2611 * TS_DATA_XFER by the time we hit 2612 * this part of the code. 2613 */ 2614 ude.udpEntryInfo.ue_RemoteAddress = 2615 connp->conn_faddr_v4; 2616 ude.udpEntryInfo.ue_RemotePort = 2617 ntohs(connp->conn_fport); 2618 } else { 2619 ude.udpEntryInfo.ue_RemoteAddress = 0; 2620 ude.udpEntryInfo.ue_RemotePort = 0; 2621 } 2622 2623 /* 2624 * We make the assumption that all udp_t 2625 * structs will be created within an address 2626 * region no larger than 32-bits. 2627 */ 2628 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2629 ude.udpCreationProcess = 2630 (connp->conn_cpid < 0) ? 2631 MIB2_UNKNOWN_PROCESS : 2632 connp->conn_cpid; 2633 ude.udpCreationTime = connp->conn_open_time; 2634 2635 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2636 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2637 mlp.tme_connidx = v4_conn_idx++; 2638 if (needattr) 2639 (void) snmp_append_data2( 2640 mp_attr_ctl->b_cont, &mp_attr_tail, 2641 (char *)&mlp, sizeof (mlp)); 2642 } 2643 if (connp->conn_ipversion == IPV6_VERSION) { 2644 ude6.udp6EntryInfo.ue_state = state; 2645 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2646 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2647 mutex_enter(&connp->conn_lock); 2648 if (connp->conn_ixa->ixa_flags & 2649 IXAF_SCOPEID_SET) { 2650 ude6.udp6IfIndex = 2651 connp->conn_ixa->ixa_scopeid; 2652 } else { 2653 ude6.udp6IfIndex = connp->conn_bound_if; 2654 } 2655 mutex_exit(&connp->conn_lock); 2656 if (udp->udp_state == TS_DATA_XFER) { 2657 ude6.udp6EntryInfo.ue_RemoteAddress = 2658 connp->conn_faddr_v6; 2659 ude6.udp6EntryInfo.ue_RemotePort = 2660 ntohs(connp->conn_fport); 2661 } else { 2662 ude6.udp6EntryInfo.ue_RemoteAddress = 2663 sin6_null.sin6_addr; 2664 ude6.udp6EntryInfo.ue_RemotePort = 0; 2665 } 2666 /* 2667 * We make the assumption that all udp_t 2668 * structs will be created within an address 2669 * region no larger than 32-bits. 2670 */ 2671 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2672 ude6.udp6CreationProcess = 2673 (connp->conn_cpid < 0) ? 2674 MIB2_UNKNOWN_PROCESS : 2675 connp->conn_cpid; 2676 ude6.udp6CreationTime = connp->conn_open_time; 2677 2678 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2679 &mp6_conn_tail, (char *)&ude6, 2680 sizeof (ude6)); 2681 mlp.tme_connidx = v6_conn_idx++; 2682 if (needattr) 2683 (void) snmp_append_data2( 2684 mp6_attr_ctl->b_cont, 2685 &mp6_attr_tail, (char *)&mlp, 2686 sizeof (mlp)); 2687 } 2688 } 2689 } 2690 2691 /* IPv4 UDP endpoints */ 2692 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2693 sizeof (struct T_optmgmt_ack)]; 2694 optp->level = MIB2_UDP; 2695 optp->name = MIB2_UDP_ENTRY; 2696 optp->len = msgdsize(mp_conn_ctl->b_cont); 2697 qreply(q, mp_conn_ctl); 2698 2699 /* table of MLP attributes... */ 2700 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2701 sizeof (struct T_optmgmt_ack)]; 2702 optp->level = MIB2_UDP; 2703 optp->name = EXPER_XPORT_MLP; 2704 optp->len = msgdsize(mp_attr_ctl->b_cont); 2705 if (optp->len == 0) 2706 freemsg(mp_attr_ctl); 2707 else 2708 qreply(q, mp_attr_ctl); 2709 2710 /* IPv6 UDP endpoints */ 2711 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2712 sizeof (struct T_optmgmt_ack)]; 2713 optp->level = MIB2_UDP6; 2714 optp->name = MIB2_UDP6_ENTRY; 2715 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2716 qreply(q, mp6_conn_ctl); 2717 2718 /* table of MLP attributes... */ 2719 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2720 sizeof (struct T_optmgmt_ack)]; 2721 optp->level = MIB2_UDP6; 2722 optp->name = EXPER_XPORT_MLP; 2723 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2724 if (optp->len == 0) 2725 freemsg(mp6_attr_ctl); 2726 else 2727 qreply(q, mp6_attr_ctl); 2728 2729 return (mp2ctl); 2730 } 2731 2732 /* 2733 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2734 * NOTE: Per MIB-II, UDP has no writable data. 2735 * TODO: If this ever actually tries to set anything, it needs to be 2736 * to do the appropriate locking. 2737 */ 2738 /* ARGSUSED */ 2739 int 2740 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2741 uchar_t *ptr, int len) 2742 { 2743 switch (level) { 2744 case MIB2_UDP: 2745 return (0); 2746 default: 2747 return (1); 2748 } 2749 } 2750 2751 /* 2752 * This routine creates a T_UDERROR_IND message and passes it upstream. 2753 * The address and options are copied from the T_UNITDATA_REQ message 2754 * passed in mp. This message is freed. 2755 */ 2756 static void 2757 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2758 { 2759 struct T_unitdata_req *tudr; 2760 mblk_t *mp1; 2761 uchar_t *destaddr; 2762 t_scalar_t destlen; 2763 uchar_t *optaddr; 2764 t_scalar_t optlen; 2765 2766 if ((mp->b_wptr < mp->b_rptr) || 2767 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2768 goto done; 2769 } 2770 tudr = (struct T_unitdata_req *)mp->b_rptr; 2771 destaddr = mp->b_rptr + tudr->DEST_offset; 2772 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2773 destaddr + tudr->DEST_length < mp->b_rptr || 2774 destaddr + tudr->DEST_length > mp->b_wptr) { 2775 goto done; 2776 } 2777 optaddr = mp->b_rptr + tudr->OPT_offset; 2778 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2779 optaddr + tudr->OPT_length < mp->b_rptr || 2780 optaddr + tudr->OPT_length > mp->b_wptr) { 2781 goto done; 2782 } 2783 destlen = tudr->DEST_length; 2784 optlen = tudr->OPT_length; 2785 2786 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2787 (char *)optaddr, optlen, err); 2788 if (mp1 != NULL) 2789 qreply(q, mp1); 2790 2791 done: 2792 freemsg(mp); 2793 } 2794 2795 /* 2796 * This routine removes a port number association from a stream. It 2797 * is called by udp_wput to handle T_UNBIND_REQ messages. 2798 */ 2799 static void 2800 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2801 { 2802 conn_t *connp = Q_TO_CONN(q); 2803 int error; 2804 2805 error = udp_do_unbind(connp); 2806 if (error) { 2807 if (error < 0) 2808 udp_err_ack(q, mp, -error, 0); 2809 else 2810 udp_err_ack(q, mp, TSYSERR, error); 2811 return; 2812 } 2813 2814 mp = mi_tpi_ok_ack_alloc(mp); 2815 ASSERT(mp != NULL); 2816 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2817 qreply(q, mp); 2818 } 2819 2820 /* 2821 * Don't let port fall into the privileged range. 2822 * Since the extra privileged ports can be arbitrary we also 2823 * ensure that we exclude those from consideration. 2824 * us->us_epriv_ports is not sorted thus we loop over it until 2825 * there are no changes. 2826 */ 2827 static in_port_t 2828 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2829 { 2830 int i; 2831 in_port_t nextport; 2832 boolean_t restart = B_FALSE; 2833 udp_stack_t *us = udp->udp_us; 2834 2835 if (random && udp_random_anon_port != 0) { 2836 (void) random_get_pseudo_bytes((uint8_t *)&port, 2837 sizeof (in_port_t)); 2838 /* 2839 * Unless changed by a sys admin, the smallest anon port 2840 * is 32768 and the largest anon port is 65535. It is 2841 * very likely (50%) for the random port to be smaller 2842 * than the smallest anon port. When that happens, 2843 * add port % (anon port range) to the smallest anon 2844 * port to get the random port. It should fall into the 2845 * valid anon port range. 2846 */ 2847 if (port < us->us_smallest_anon_port) { 2848 port = us->us_smallest_anon_port + 2849 port % (us->us_largest_anon_port - 2850 us->us_smallest_anon_port); 2851 } 2852 } 2853 2854 retry: 2855 if (port < us->us_smallest_anon_port) 2856 port = us->us_smallest_anon_port; 2857 2858 if (port > us->us_largest_anon_port) { 2859 port = us->us_smallest_anon_port; 2860 if (restart) 2861 return (0); 2862 restart = B_TRUE; 2863 } 2864 2865 if (port < us->us_smallest_nonpriv_port) 2866 port = us->us_smallest_nonpriv_port; 2867 2868 for (i = 0; i < us->us_num_epriv_ports; i++) { 2869 if (port == us->us_epriv_ports[i]) { 2870 port++; 2871 /* 2872 * Make sure that the port is in the 2873 * valid range. 2874 */ 2875 goto retry; 2876 } 2877 } 2878 2879 if (is_system_labeled() && 2880 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2881 port, IPPROTO_UDP, B_TRUE)) != 0) { 2882 port = nextport; 2883 goto retry; 2884 } 2885 2886 return (port); 2887 } 2888 2889 /* 2890 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2891 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2892 * the TPI options, otherwise we take them from msg_control. 2893 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2894 * Always consumes mp; never consumes tudr_mp. 2895 */ 2896 static int 2897 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2898 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2899 { 2900 udp_t *udp = connp->conn_udp; 2901 udp_stack_t *us = udp->udp_us; 2902 int error; 2903 ip_xmit_attr_t *ixa; 2904 ip_pkt_t *ipp; 2905 in6_addr_t v6src; 2906 in6_addr_t v6dst; 2907 in6_addr_t v6nexthop; 2908 in_port_t dstport; 2909 uint32_t flowinfo; 2910 uint_t srcid; 2911 int is_absreq_failure = 0; 2912 conn_opt_arg_t coas, *coa; 2913 2914 ASSERT(tudr_mp != NULL || msg != NULL); 2915 2916 /* 2917 * Get ixa before checking state to handle a disconnect race. 2918 * 2919 * We need an exclusive copy of conn_ixa since the ancillary data 2920 * options might modify it. That copy has no pointers hence we 2921 * need to set them up once we've parsed the ancillary data. 2922 */ 2923 ixa = conn_get_ixa_exclusive(connp); 2924 if (ixa == NULL) { 2925 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2926 freemsg(mp); 2927 return (ENOMEM); 2928 } 2929 ASSERT(cr != NULL); 2930 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2931 ixa->ixa_cred = cr; 2932 ixa->ixa_cpid = pid; 2933 if (is_system_labeled()) { 2934 /* We need to restart with a label based on the cred */ 2935 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2936 } 2937 2938 /* In case previous destination was multicast or multirt */ 2939 ip_attr_newdst(ixa); 2940 2941 /* Get a copy of conn_xmit_ipp since the options might change it */ 2942 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2943 if (ipp == NULL) { 2944 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2945 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2946 ixa->ixa_cpid = connp->conn_cpid; 2947 ixa_refrele(ixa); 2948 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2949 freemsg(mp); 2950 return (ENOMEM); 2951 } 2952 mutex_enter(&connp->conn_lock); 2953 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2954 mutex_exit(&connp->conn_lock); 2955 if (error != 0) { 2956 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2957 freemsg(mp); 2958 goto done; 2959 } 2960 2961 /* 2962 * Parse the options and update ixa and ipp as a result. 2963 * Note that ixa_tsl can be updated if SCM_UCRED. 2964 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2965 */ 2966 2967 coa = &coas; 2968 coa->coa_connp = connp; 2969 coa->coa_ixa = ixa; 2970 coa->coa_ipp = ipp; 2971 coa->coa_ancillary = B_TRUE; 2972 coa->coa_changed = 0; 2973 2974 if (msg != NULL) { 2975 error = process_auxiliary_options(connp, msg->msg_control, 2976 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2977 } else { 2978 struct T_unitdata_req *tudr; 2979 2980 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2981 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2982 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2983 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2984 coa, &is_absreq_failure); 2985 } 2986 if (error != 0) { 2987 /* 2988 * Note: No special action needed in this 2989 * module for "is_absreq_failure" 2990 */ 2991 freemsg(mp); 2992 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2993 goto done; 2994 } 2995 ASSERT(is_absreq_failure == 0); 2996 2997 mutex_enter(&connp->conn_lock); 2998 /* 2999 * If laddr is unspecified then we look at sin6_src_id. 3000 * We will give precedence to a source address set with IPV6_PKTINFO 3001 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3002 * want ip_attr_connect to select a source (since it can fail) when 3003 * IPV6_PKTINFO is specified. 3004 * If this doesn't result in a source address then we get a source 3005 * from ip_attr_connect() below. 3006 */ 3007 v6src = connp->conn_saddr_v6; 3008 if (sin != NULL) { 3009 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3010 dstport = sin->sin_port; 3011 flowinfo = 0; 3012 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3013 ixa->ixa_flags |= IXAF_IS_IPV4; 3014 } else if (sin6 != NULL) { 3015 v6dst = sin6->sin6_addr; 3016 dstport = sin6->sin6_port; 3017 flowinfo = sin6->sin6_flowinfo; 3018 srcid = sin6->__sin6_src_id; 3019 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3020 ixa->ixa_scopeid = sin6->sin6_scope_id; 3021 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3022 } else { 3023 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3024 } 3025 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3026 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3027 connp->conn_netstack); 3028 } 3029 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3030 ixa->ixa_flags |= IXAF_IS_IPV4; 3031 else 3032 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3033 } else { 3034 /* Connected case */ 3035 v6dst = connp->conn_faddr_v6; 3036 dstport = connp->conn_fport; 3037 flowinfo = connp->conn_flowinfo; 3038 } 3039 mutex_exit(&connp->conn_lock); 3040 3041 /* Handle IPV6_PKTINFO setting source address. */ 3042 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 3043 (ipp->ipp_fields & IPPF_ADDR)) { 3044 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3045 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3046 v6src = ipp->ipp_addr; 3047 } else { 3048 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3049 v6src = ipp->ipp_addr; 3050 } 3051 } 3052 3053 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3054 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3055 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3056 3057 switch (error) { 3058 case 0: 3059 break; 3060 case EADDRNOTAVAIL: 3061 /* 3062 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3063 * Don't have the application see that errno 3064 */ 3065 error = ENETUNREACH; 3066 goto failed; 3067 case ENETDOWN: 3068 /* 3069 * Have !ipif_addr_ready address; drop packet silently 3070 * until we can get applications to not send until we 3071 * are ready. 3072 */ 3073 error = 0; 3074 goto failed; 3075 case EHOSTUNREACH: 3076 case ENETUNREACH: 3077 if (ixa->ixa_ire != NULL) { 3078 /* 3079 * Let conn_ip_output/ire_send_noroute return 3080 * the error and send any local ICMP error. 3081 */ 3082 error = 0; 3083 break; 3084 } 3085 /* FALLTHRU */ 3086 default: 3087 failed: 3088 freemsg(mp); 3089 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3090 goto done; 3091 } 3092 3093 /* 3094 * We might be going to a different destination than last time, 3095 * thus check that TX allows the communication and compute any 3096 * needed label. 3097 * 3098 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3099 * don't have to worry about concurrent threads. 3100 */ 3101 if (is_system_labeled()) { 3102 /* Using UDP MLP requires SCM_UCRED from user */ 3103 if (connp->conn_mlp_type != mlptSingle && 3104 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3105 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3106 error = ECONNREFUSED; 3107 freemsg(mp); 3108 goto done; 3109 } 3110 /* 3111 * Check whether Trusted Solaris policy allows communication 3112 * with this host, and pretend that the destination is 3113 * unreachable if not. 3114 * Compute any needed label and place it in ipp_label_v4/v6. 3115 * 3116 * Later conn_build_hdr_template/conn_prepend_hdr takes 3117 * ipp_label_v4/v6 to form the packet. 3118 * 3119 * Tsol note: We have ipp structure local to this thread so 3120 * no locking is needed. 3121 */ 3122 error = conn_update_label(connp, ixa, &v6dst, ipp); 3123 if (error != 0) { 3124 freemsg(mp); 3125 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3126 goto done; 3127 } 3128 } 3129 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3130 flowinfo, mp, &error); 3131 if (mp == NULL) { 3132 ASSERT(error != 0); 3133 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3134 goto done; 3135 } 3136 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3137 error = EMSGSIZE; 3138 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3139 freemsg(mp); 3140 goto done; 3141 } 3142 /* We're done. Pass the packet to ip. */ 3143 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3144 3145 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3146 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3147 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3148 3149 error = conn_ip_output(mp, ixa); 3150 /* No udpOutErrors if an error since IP increases its error counter */ 3151 switch (error) { 3152 case 0: 3153 break; 3154 case EWOULDBLOCK: 3155 (void) ixa_check_drain_insert(connp, ixa); 3156 error = 0; 3157 break; 3158 case EADDRNOTAVAIL: 3159 /* 3160 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3161 * Don't have the application see that errno 3162 */ 3163 error = ENETUNREACH; 3164 /* FALLTHRU */ 3165 default: 3166 mutex_enter(&connp->conn_lock); 3167 /* 3168 * Clear the source and v6lastdst so we call ip_attr_connect 3169 * for the next packet and try to pick a better source. 3170 */ 3171 if (connp->conn_mcbc_bind) 3172 connp->conn_saddr_v6 = ipv6_all_zeros; 3173 else 3174 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3175 connp->conn_v6lastdst = ipv6_all_zeros; 3176 mutex_exit(&connp->conn_lock); 3177 break; 3178 } 3179 done: 3180 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3181 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3182 ixa->ixa_cpid = connp->conn_cpid; 3183 ixa_refrele(ixa); 3184 ip_pkt_free(ipp); 3185 kmem_free(ipp, sizeof (*ipp)); 3186 return (error); 3187 } 3188 3189 /* 3190 * Handle sending an M_DATA for a connected socket. 3191 * Handles both IPv4 and IPv6. 3192 */ 3193 static int 3194 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3195 { 3196 udp_t *udp = connp->conn_udp; 3197 udp_stack_t *us = udp->udp_us; 3198 int error; 3199 ip_xmit_attr_t *ixa; 3200 3201 /* 3202 * If no other thread is using conn_ixa this just gets a reference to 3203 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3204 */ 3205 ixa = conn_get_ixa(connp, B_FALSE); 3206 if (ixa == NULL) { 3207 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3208 freemsg(mp); 3209 return (ENOMEM); 3210 } 3211 3212 ASSERT(cr != NULL); 3213 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3214 ixa->ixa_cred = cr; 3215 ixa->ixa_cpid = pid; 3216 3217 mutex_enter(&connp->conn_lock); 3218 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3219 connp->conn_fport, connp->conn_flowinfo, &error); 3220 3221 if (mp == NULL) { 3222 ASSERT(error != 0); 3223 mutex_exit(&connp->conn_lock); 3224 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3225 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3226 ixa->ixa_cpid = connp->conn_cpid; 3227 ixa_refrele(ixa); 3228 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3229 freemsg(mp); 3230 return (error); 3231 } 3232 3233 /* 3234 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3235 * safe copy, then we need to fill in any pointers in it. 3236 */ 3237 if (ixa->ixa_ire == NULL) { 3238 in6_addr_t faddr, saddr; 3239 in6_addr_t nexthop; 3240 in_port_t fport; 3241 3242 saddr = connp->conn_saddr_v6; 3243 faddr = connp->conn_faddr_v6; 3244 fport = connp->conn_fport; 3245 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3246 mutex_exit(&connp->conn_lock); 3247 3248 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3249 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3250 IPDF_IPSEC); 3251 switch (error) { 3252 case 0: 3253 break; 3254 case EADDRNOTAVAIL: 3255 /* 3256 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3257 * Don't have the application see that errno 3258 */ 3259 error = ENETUNREACH; 3260 goto failed; 3261 case ENETDOWN: 3262 /* 3263 * Have !ipif_addr_ready address; drop packet silently 3264 * until we can get applications to not send until we 3265 * are ready. 3266 */ 3267 error = 0; 3268 goto failed; 3269 case EHOSTUNREACH: 3270 case ENETUNREACH: 3271 if (ixa->ixa_ire != NULL) { 3272 /* 3273 * Let conn_ip_output/ire_send_noroute return 3274 * the error and send any local ICMP error. 3275 */ 3276 error = 0; 3277 break; 3278 } 3279 /* FALLTHRU */ 3280 default: 3281 failed: 3282 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3283 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3284 ixa->ixa_cpid = connp->conn_cpid; 3285 ixa_refrele(ixa); 3286 freemsg(mp); 3287 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3288 return (error); 3289 } 3290 } else { 3291 /* Done with conn_t */ 3292 mutex_exit(&connp->conn_lock); 3293 } 3294 ASSERT(ixa->ixa_ire != NULL); 3295 3296 /* We're done. Pass the packet to ip. */ 3297 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3298 3299 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3300 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3301 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3302 3303 error = conn_ip_output(mp, ixa); 3304 /* No udpOutErrors if an error since IP increases its error counter */ 3305 switch (error) { 3306 case 0: 3307 break; 3308 case EWOULDBLOCK: 3309 (void) ixa_check_drain_insert(connp, ixa); 3310 error = 0; 3311 break; 3312 case EADDRNOTAVAIL: 3313 /* 3314 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3315 * Don't have the application see that errno 3316 */ 3317 error = ENETUNREACH; 3318 break; 3319 } 3320 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3321 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3322 ixa->ixa_cpid = connp->conn_cpid; 3323 ixa_refrele(ixa); 3324 return (error); 3325 } 3326 3327 /* 3328 * Handle sending an M_DATA to the last destination. 3329 * Handles both IPv4 and IPv6. 3330 * 3331 * NOTE: The caller must hold conn_lock and we drop it here. 3332 */ 3333 static int 3334 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3335 ip_xmit_attr_t *ixa) 3336 { 3337 udp_t *udp = connp->conn_udp; 3338 udp_stack_t *us = udp->udp_us; 3339 int error; 3340 3341 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3342 ASSERT(ixa != NULL); 3343 3344 ASSERT(cr != NULL); 3345 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3346 ixa->ixa_cred = cr; 3347 ixa->ixa_cpid = pid; 3348 3349 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3350 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3351 3352 if (mp == NULL) { 3353 ASSERT(error != 0); 3354 mutex_exit(&connp->conn_lock); 3355 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3356 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3357 ixa->ixa_cpid = connp->conn_cpid; 3358 ixa_refrele(ixa); 3359 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3360 freemsg(mp); 3361 return (error); 3362 } 3363 3364 /* 3365 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3366 * safe copy, then we need to fill in any pointers in it. 3367 */ 3368 if (ixa->ixa_ire == NULL) { 3369 in6_addr_t lastdst, lastsrc; 3370 in6_addr_t nexthop; 3371 in_port_t lastport; 3372 3373 lastsrc = connp->conn_v6lastsrc; 3374 lastdst = connp->conn_v6lastdst; 3375 lastport = connp->conn_lastdstport; 3376 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3377 mutex_exit(&connp->conn_lock); 3378 3379 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3380 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3381 IPDF_VERIFY_DST | IPDF_IPSEC); 3382 switch (error) { 3383 case 0: 3384 break; 3385 case EADDRNOTAVAIL: 3386 /* 3387 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3388 * Don't have the application see that errno 3389 */ 3390 error = ENETUNREACH; 3391 goto failed; 3392 case ENETDOWN: 3393 /* 3394 * Have !ipif_addr_ready address; drop packet silently 3395 * until we can get applications to not send until we 3396 * are ready. 3397 */ 3398 error = 0; 3399 goto failed; 3400 case EHOSTUNREACH: 3401 case ENETUNREACH: 3402 if (ixa->ixa_ire != NULL) { 3403 /* 3404 * Let conn_ip_output/ire_send_noroute return 3405 * the error and send any local ICMP error. 3406 */ 3407 error = 0; 3408 break; 3409 } 3410 /* FALLTHRU */ 3411 default: 3412 failed: 3413 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3414 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3415 ixa->ixa_cpid = connp->conn_cpid; 3416 ixa_refrele(ixa); 3417 freemsg(mp); 3418 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3419 return (error); 3420 } 3421 } else { 3422 /* Done with conn_t */ 3423 mutex_exit(&connp->conn_lock); 3424 } 3425 3426 /* We're done. Pass the packet to ip. */ 3427 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3428 3429 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3430 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3431 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3432 3433 error = conn_ip_output(mp, ixa); 3434 /* No udpOutErrors if an error since IP increases its error counter */ 3435 switch (error) { 3436 case 0: 3437 break; 3438 case EWOULDBLOCK: 3439 (void) ixa_check_drain_insert(connp, ixa); 3440 error = 0; 3441 break; 3442 case EADDRNOTAVAIL: 3443 /* 3444 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3445 * Don't have the application see that errno 3446 */ 3447 error = ENETUNREACH; 3448 /* FALLTHRU */ 3449 default: 3450 mutex_enter(&connp->conn_lock); 3451 /* 3452 * Clear the source and v6lastdst so we call ip_attr_connect 3453 * for the next packet and try to pick a better source. 3454 */ 3455 if (connp->conn_mcbc_bind) 3456 connp->conn_saddr_v6 = ipv6_all_zeros; 3457 else 3458 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3459 connp->conn_v6lastdst = ipv6_all_zeros; 3460 mutex_exit(&connp->conn_lock); 3461 break; 3462 } 3463 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3464 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3465 ixa->ixa_cpid = connp->conn_cpid; 3466 ixa_refrele(ixa); 3467 return (error); 3468 } 3469 3470 3471 /* 3472 * Prepend the header template and then fill in the source and 3473 * flowinfo. The caller needs to handle the destination address since 3474 * it's setting is different if rthdr or source route. 3475 * 3476 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3477 * When it returns NULL it sets errorp. 3478 */ 3479 static mblk_t * 3480 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3481 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3482 { 3483 udp_t *udp = connp->conn_udp; 3484 udp_stack_t *us = udp->udp_us; 3485 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3486 uint_t pktlen; 3487 uint_t alloclen; 3488 uint_t copylen; 3489 uint8_t *iph; 3490 uint_t ip_hdr_length; 3491 udpha_t *udpha; 3492 uint32_t cksum; 3493 ip_pkt_t *ipp; 3494 3495 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3496 3497 /* 3498 * Copy the header template and leave space for an SPI 3499 */ 3500 copylen = connp->conn_ht_iphc_len; 3501 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3502 pktlen = alloclen + msgdsize(mp); 3503 if (pktlen > IP_MAXPACKET) { 3504 freemsg(mp); 3505 *errorp = EMSGSIZE; 3506 return (NULL); 3507 } 3508 ixa->ixa_pktlen = pktlen; 3509 3510 /* check/fix buffer config, setup pointers into it */ 3511 iph = mp->b_rptr - alloclen; 3512 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3513 mblk_t *mp1; 3514 3515 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3516 if (mp1 == NULL) { 3517 freemsg(mp); 3518 *errorp = ENOMEM; 3519 return (NULL); 3520 } 3521 mp1->b_wptr = DB_LIM(mp1); 3522 mp1->b_cont = mp; 3523 mp = mp1; 3524 iph = (mp->b_wptr - alloclen); 3525 } 3526 mp->b_rptr = iph; 3527 bcopy(connp->conn_ht_iphc, iph, copylen); 3528 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3529 3530 ixa->ixa_ip_hdr_length = ip_hdr_length; 3531 udpha = (udpha_t *)(iph + ip_hdr_length); 3532 3533 /* 3534 * Setup header length and prepare for ULP checksum done in IP. 3535 * udp_build_hdr_template has already massaged any routing header 3536 * and placed the result in conn_sum. 3537 * 3538 * We make it easy for IP to include our pseudo header 3539 * by putting our length in uha_checksum. 3540 */ 3541 cksum = pktlen - ip_hdr_length; 3542 udpha->uha_length = htons(cksum); 3543 3544 cksum += connp->conn_sum; 3545 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3546 ASSERT(cksum < 0x10000); 3547 3548 ipp = &connp->conn_xmit_ipp; 3549 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3550 ipha_t *ipha = (ipha_t *)iph; 3551 3552 ipha->ipha_length = htons((uint16_t)pktlen); 3553 3554 /* IP does the checksum if uha_checksum is non-zero */ 3555 if (us->us_do_checksum) 3556 udpha->uha_checksum = htons(cksum); 3557 3558 /* if IP_PKTINFO specified an addres it wins over bind() */ 3559 if ((ipp->ipp_fields & IPPF_ADDR) && 3560 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3561 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3562 ipha->ipha_src = ipp->ipp_addr_v4; 3563 } else { 3564 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3565 } 3566 } else { 3567 ip6_t *ip6h = (ip6_t *)iph; 3568 3569 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3570 udpha->uha_checksum = htons(cksum); 3571 3572 /* if IP_PKTINFO specified an addres it wins over bind() */ 3573 if ((ipp->ipp_fields & IPPF_ADDR) && 3574 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3575 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3576 ip6h->ip6_src = ipp->ipp_addr; 3577 } else { 3578 ip6h->ip6_src = *v6src; 3579 } 3580 ip6h->ip6_vcf = 3581 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3582 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3583 if (ipp->ipp_fields & IPPF_TCLASS) { 3584 /* Overrides the class part of flowinfo */ 3585 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3586 ipp->ipp_tclass); 3587 } 3588 } 3589 3590 /* Insert all-0s SPI now. */ 3591 if (insert_spi) 3592 *((uint32_t *)(udpha + 1)) = 0; 3593 3594 udpha->uha_dst_port = dstport; 3595 return (mp); 3596 } 3597 3598 /* 3599 * Send a T_UDERR_IND in response to an M_DATA 3600 */ 3601 static void 3602 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3603 { 3604 struct sockaddr_storage ss; 3605 sin_t *sin; 3606 sin6_t *sin6; 3607 struct sockaddr *addr; 3608 socklen_t addrlen; 3609 mblk_t *mp1; 3610 3611 mutex_enter(&connp->conn_lock); 3612 /* Initialize addr and addrlen as if they're passed in */ 3613 if (connp->conn_family == AF_INET) { 3614 sin = (sin_t *)&ss; 3615 *sin = sin_null; 3616 sin->sin_family = AF_INET; 3617 sin->sin_port = connp->conn_fport; 3618 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3619 addr = (struct sockaddr *)sin; 3620 addrlen = sizeof (*sin); 3621 } else { 3622 sin6 = (sin6_t *)&ss; 3623 *sin6 = sin6_null; 3624 sin6->sin6_family = AF_INET6; 3625 sin6->sin6_port = connp->conn_fport; 3626 sin6->sin6_flowinfo = connp->conn_flowinfo; 3627 sin6->sin6_addr = connp->conn_faddr_v6; 3628 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3629 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3630 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3631 } else { 3632 sin6->sin6_scope_id = 0; 3633 } 3634 sin6->__sin6_src_id = 0; 3635 addr = (struct sockaddr *)sin6; 3636 addrlen = sizeof (*sin6); 3637 } 3638 mutex_exit(&connp->conn_lock); 3639 3640 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3641 if (mp1 != NULL) 3642 putnext(connp->conn_rq, mp1); 3643 } 3644 3645 /* 3646 * This routine handles all messages passed downstream. It either 3647 * consumes the message or passes it downstream; it never queues a 3648 * a message. 3649 * 3650 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3651 * is valid when we are directly beneath the stream head, and thus sockfs 3652 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3653 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3654 * connected endpoints. 3655 */ 3656 void 3657 udp_wput(queue_t *q, mblk_t *mp) 3658 { 3659 sin6_t *sin6; 3660 sin_t *sin = NULL; 3661 uint_t srcid; 3662 conn_t *connp = Q_TO_CONN(q); 3663 udp_t *udp = connp->conn_udp; 3664 int error = 0; 3665 struct sockaddr *addr = NULL; 3666 socklen_t addrlen; 3667 udp_stack_t *us = udp->udp_us; 3668 struct T_unitdata_req *tudr; 3669 mblk_t *data_mp; 3670 ushort_t ipversion; 3671 cred_t *cr; 3672 pid_t pid; 3673 3674 /* 3675 * We directly handle several cases here: T_UNITDATA_REQ message 3676 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3677 * socket. 3678 */ 3679 switch (DB_TYPE(mp)) { 3680 case M_DATA: 3681 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3682 /* Not connected; address is required */ 3683 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3684 UDP_DBGSTAT(us, udp_data_notconn); 3685 UDP_STAT(us, udp_out_err_notconn); 3686 freemsg(mp); 3687 return; 3688 } 3689 /* 3690 * All Solaris components should pass a db_credp 3691 * for this message, hence we ASSERT. 3692 * On production kernels we return an error to be robust against 3693 * random streams modules sitting on top of us. 3694 */ 3695 cr = msg_getcred(mp, &pid); 3696 ASSERT(cr != NULL); 3697 if (cr == NULL) { 3698 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3699 freemsg(mp); 3700 return; 3701 } 3702 ASSERT(udp->udp_issocket); 3703 UDP_DBGSTAT(us, udp_data_conn); 3704 error = udp_output_connected(connp, mp, cr, pid); 3705 if (error != 0) { 3706 UDP_STAT(us, udp_out_err_output); 3707 if (connp->conn_rq != NULL) 3708 udp_ud_err_connected(connp, (t_scalar_t)error); 3709 #ifdef DEBUG 3710 printf("udp_output_connected returned %d\n", error); 3711 #endif 3712 } 3713 return; 3714 3715 case M_PROTO: 3716 case M_PCPROTO: 3717 tudr = (struct T_unitdata_req *)mp->b_rptr; 3718 if (MBLKL(mp) < sizeof (*tudr) || 3719 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3720 udp_wput_other(q, mp); 3721 return; 3722 } 3723 break; 3724 3725 default: 3726 udp_wput_other(q, mp); 3727 return; 3728 } 3729 3730 /* Handle valid T_UNITDATA_REQ here */ 3731 data_mp = mp->b_cont; 3732 if (data_mp == NULL) { 3733 error = EPROTO; 3734 goto ud_error2; 3735 } 3736 mp->b_cont = NULL; 3737 3738 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3739 error = EADDRNOTAVAIL; 3740 goto ud_error2; 3741 } 3742 3743 /* 3744 * All Solaris components should pass a db_credp 3745 * for this TPI message, hence we should ASSERT. 3746 * However, RPC (svc_clts_ksend) does this odd thing where it 3747 * passes the options from a T_UNITDATA_IND unchanged in a 3748 * T_UNITDATA_REQ. While that is the right thing to do for 3749 * some options, SCM_UCRED being the key one, this also makes it 3750 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3751 */ 3752 cr = msg_getcred(mp, &pid); 3753 if (cr == NULL) { 3754 cr = connp->conn_cred; 3755 pid = connp->conn_cpid; 3756 } 3757 3758 /* 3759 * If a port has not been bound to the stream, fail. 3760 * This is not a problem when sockfs is directly 3761 * above us, because it will ensure that the socket 3762 * is first bound before allowing data to be sent. 3763 */ 3764 if (udp->udp_state == TS_UNBND) { 3765 error = EPROTO; 3766 goto ud_error2; 3767 } 3768 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3769 addrlen = tudr->DEST_length; 3770 3771 switch (connp->conn_family) { 3772 case AF_INET6: 3773 sin6 = (sin6_t *)addr; 3774 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3775 (sin6->sin6_family != AF_INET6)) { 3776 error = EADDRNOTAVAIL; 3777 goto ud_error2; 3778 } 3779 3780 srcid = sin6->__sin6_src_id; 3781 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3782 /* 3783 * Destination is a non-IPv4-compatible IPv6 address. 3784 * Send out an IPv6 format packet. 3785 */ 3786 3787 /* 3788 * If the local address is a mapped address return 3789 * an error. 3790 * It would be possible to send an IPv6 packet but the 3791 * response would never make it back to the application 3792 * since it is bound to a mapped address. 3793 */ 3794 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3795 error = EADDRNOTAVAIL; 3796 goto ud_error2; 3797 } 3798 3799 UDP_DBGSTAT(us, udp_out_ipv6); 3800 3801 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3802 sin6->sin6_addr = ipv6_loopback; 3803 ipversion = IPV6_VERSION; 3804 } else { 3805 if (connp->conn_ipv6_v6only) { 3806 error = EADDRNOTAVAIL; 3807 goto ud_error2; 3808 } 3809 3810 /* 3811 * If the local address is not zero or a mapped address 3812 * return an error. It would be possible to send an 3813 * IPv4 packet but the response would never make it 3814 * back to the application since it is bound to a 3815 * non-mapped address. 3816 */ 3817 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3818 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3819 error = EADDRNOTAVAIL; 3820 goto ud_error2; 3821 } 3822 UDP_DBGSTAT(us, udp_out_mapped); 3823 3824 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3825 V4_PART_OF_V6(sin6->sin6_addr) = 3826 htonl(INADDR_LOOPBACK); 3827 } 3828 ipversion = IPV4_VERSION; 3829 } 3830 3831 if (tudr->OPT_length != 0) { 3832 /* 3833 * If we are connected then the destination needs to be 3834 * the same as the connected one. 3835 */ 3836 if (udp->udp_state == TS_DATA_XFER && 3837 !conn_same_as_last_v6(connp, sin6)) { 3838 error = EISCONN; 3839 goto ud_error2; 3840 } 3841 UDP_STAT(us, udp_out_opt); 3842 error = udp_output_ancillary(connp, NULL, sin6, 3843 data_mp, mp, NULL, cr, pid); 3844 } else { 3845 ip_xmit_attr_t *ixa; 3846 3847 /* 3848 * We have to allocate an ip_xmit_attr_t before we grab 3849 * conn_lock and we need to hold conn_lock once we've 3850 * checked conn_same_as_last_v6 to handle concurrent 3851 * send* calls on a socket. 3852 */ 3853 ixa = conn_get_ixa(connp, B_FALSE); 3854 if (ixa == NULL) { 3855 error = ENOMEM; 3856 goto ud_error2; 3857 } 3858 mutex_enter(&connp->conn_lock); 3859 3860 if (conn_same_as_last_v6(connp, sin6) && 3861 connp->conn_lastsrcid == srcid && 3862 ipsec_outbound_policy_current(ixa)) { 3863 UDP_DBGSTAT(us, udp_out_lastdst); 3864 /* udp_output_lastdst drops conn_lock */ 3865 error = udp_output_lastdst(connp, data_mp, cr, 3866 pid, ixa); 3867 } else { 3868 UDP_DBGSTAT(us, udp_out_diffdst); 3869 /* udp_output_newdst drops conn_lock */ 3870 error = udp_output_newdst(connp, data_mp, NULL, 3871 sin6, ipversion, cr, pid, ixa); 3872 } 3873 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3874 } 3875 if (error == 0) { 3876 freeb(mp); 3877 return; 3878 } 3879 break; 3880 3881 case AF_INET: 3882 sin = (sin_t *)addr; 3883 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3884 (sin->sin_family != AF_INET)) { 3885 error = EADDRNOTAVAIL; 3886 goto ud_error2; 3887 } 3888 UDP_DBGSTAT(us, udp_out_ipv4); 3889 if (sin->sin_addr.s_addr == INADDR_ANY) 3890 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3891 ipversion = IPV4_VERSION; 3892 3893 srcid = 0; 3894 if (tudr->OPT_length != 0) { 3895 /* 3896 * If we are connected then the destination needs to be 3897 * the same as the connected one. 3898 */ 3899 if (udp->udp_state == TS_DATA_XFER && 3900 !conn_same_as_last_v4(connp, sin)) { 3901 error = EISCONN; 3902 goto ud_error2; 3903 } 3904 UDP_STAT(us, udp_out_opt); 3905 error = udp_output_ancillary(connp, sin, NULL, 3906 data_mp, mp, NULL, cr, pid); 3907 } else { 3908 ip_xmit_attr_t *ixa; 3909 3910 /* 3911 * We have to allocate an ip_xmit_attr_t before we grab 3912 * conn_lock and we need to hold conn_lock once we've 3913 * checked conn_same_as_last_v4 to handle concurrent 3914 * send* calls on a socket. 3915 */ 3916 ixa = conn_get_ixa(connp, B_FALSE); 3917 if (ixa == NULL) { 3918 error = ENOMEM; 3919 goto ud_error2; 3920 } 3921 mutex_enter(&connp->conn_lock); 3922 3923 if (conn_same_as_last_v4(connp, sin) && 3924 ipsec_outbound_policy_current(ixa)) { 3925 UDP_DBGSTAT(us, udp_out_lastdst); 3926 /* udp_output_lastdst drops conn_lock */ 3927 error = udp_output_lastdst(connp, data_mp, cr, 3928 pid, ixa); 3929 } else { 3930 UDP_DBGSTAT(us, udp_out_diffdst); 3931 /* udp_output_newdst drops conn_lock */ 3932 error = udp_output_newdst(connp, data_mp, sin, 3933 NULL, ipversion, cr, pid, ixa); 3934 } 3935 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3936 } 3937 if (error == 0) { 3938 freeb(mp); 3939 return; 3940 } 3941 break; 3942 } 3943 UDP_STAT(us, udp_out_err_output); 3944 ASSERT(mp != NULL); 3945 /* mp is freed by the following routine */ 3946 udp_ud_err(q, mp, (t_scalar_t)error); 3947 return; 3948 3949 ud_error2: 3950 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3951 freemsg(data_mp); 3952 UDP_STAT(us, udp_out_err_output); 3953 ASSERT(mp != NULL); 3954 /* mp is freed by the following routine */ 3955 udp_ud_err(q, mp, (t_scalar_t)error); 3956 } 3957 3958 /* 3959 * Handle the case of the IP address, port, flow label being different 3960 * for both IPv4 and IPv6. 3961 * 3962 * NOTE: The caller must hold conn_lock and we drop it here. 3963 */ 3964 static int 3965 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3966 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3967 { 3968 uint_t srcid; 3969 uint32_t flowinfo; 3970 udp_t *udp = connp->conn_udp; 3971 int error = 0; 3972 ip_xmit_attr_t *oldixa; 3973 udp_stack_t *us = udp->udp_us; 3974 in6_addr_t v6src; 3975 in6_addr_t v6dst; 3976 in6_addr_t v6nexthop; 3977 in_port_t dstport; 3978 3979 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3980 ASSERT(ixa != NULL); 3981 /* 3982 * We hold conn_lock across all the use and modifications of 3983 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3984 * stay consistent. 3985 */ 3986 3987 ASSERT(cr != NULL); 3988 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3989 ixa->ixa_cred = cr; 3990 ixa->ixa_cpid = pid; 3991 if (is_system_labeled()) { 3992 /* We need to restart with a label based on the cred */ 3993 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3994 } 3995 3996 /* 3997 * If we are connected then the destination needs to be the 3998 * same as the connected one, which is not the case here since we 3999 * checked for that above. 4000 */ 4001 if (udp->udp_state == TS_DATA_XFER) { 4002 mutex_exit(&connp->conn_lock); 4003 error = EISCONN; 4004 goto ud_error; 4005 } 4006 4007 /* In case previous destination was multicast or multirt */ 4008 ip_attr_newdst(ixa); 4009 4010 /* 4011 * If laddr is unspecified then we look at sin6_src_id. 4012 * We will give precedence to a source address set with IPV6_PKTINFO 4013 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4014 * want ip_attr_connect to select a source (since it can fail) when 4015 * IPV6_PKTINFO is specified. 4016 * If this doesn't result in a source address then we get a source 4017 * from ip_attr_connect() below. 4018 */ 4019 v6src = connp->conn_saddr_v6; 4020 if (sin != NULL) { 4021 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4022 dstport = sin->sin_port; 4023 flowinfo = 0; 4024 srcid = 0; 4025 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4026 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4027 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4028 connp->conn_netstack); 4029 } 4030 ixa->ixa_flags |= IXAF_IS_IPV4; 4031 } else { 4032 v6dst = sin6->sin6_addr; 4033 dstport = sin6->sin6_port; 4034 flowinfo = sin6->sin6_flowinfo; 4035 srcid = sin6->__sin6_src_id; 4036 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4037 ixa->ixa_scopeid = sin6->sin6_scope_id; 4038 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4039 } else { 4040 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4041 } 4042 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4043 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4044 connp->conn_netstack); 4045 } 4046 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4047 ixa->ixa_flags |= IXAF_IS_IPV4; 4048 else 4049 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4050 } 4051 /* Handle IPV6_PKTINFO setting source address. */ 4052 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 4053 (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { 4054 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4055 4056 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4057 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4058 v6src = ipp->ipp_addr; 4059 } else { 4060 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4061 v6src = ipp->ipp_addr; 4062 } 4063 } 4064 4065 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4066 mutex_exit(&connp->conn_lock); 4067 4068 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4069 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4070 switch (error) { 4071 case 0: 4072 break; 4073 case EADDRNOTAVAIL: 4074 /* 4075 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4076 * Don't have the application see that errno 4077 */ 4078 error = ENETUNREACH; 4079 goto failed; 4080 case ENETDOWN: 4081 /* 4082 * Have !ipif_addr_ready address; drop packet silently 4083 * until we can get applications to not send until we 4084 * are ready. 4085 */ 4086 error = 0; 4087 goto failed; 4088 case EHOSTUNREACH: 4089 case ENETUNREACH: 4090 if (ixa->ixa_ire != NULL) { 4091 /* 4092 * Let conn_ip_output/ire_send_noroute return 4093 * the error and send any local ICMP error. 4094 */ 4095 error = 0; 4096 break; 4097 } 4098 /* FALLTHRU */ 4099 failed: 4100 default: 4101 goto ud_error; 4102 } 4103 4104 4105 /* 4106 * Cluster note: we let the cluster hook know that we are sending to a 4107 * new address and/or port. 4108 */ 4109 if (cl_inet_connect2 != NULL) { 4110 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4111 if (error != 0) { 4112 error = EHOSTUNREACH; 4113 goto ud_error; 4114 } 4115 } 4116 4117 mutex_enter(&connp->conn_lock); 4118 /* 4119 * While we dropped the lock some other thread might have connected 4120 * this socket. If so we bail out with EISCONN to ensure that the 4121 * connecting thread is the one that updates conn_ixa, conn_ht_* 4122 * and conn_*last*. 4123 */ 4124 if (udp->udp_state == TS_DATA_XFER) { 4125 mutex_exit(&connp->conn_lock); 4126 error = EISCONN; 4127 goto ud_error; 4128 } 4129 4130 /* 4131 * We need to rebuild the headers if 4132 * - we are labeling packets (could be different for different 4133 * destinations) 4134 * - we have a source route (or routing header) since we need to 4135 * massage that to get the pseudo-header checksum 4136 * - the IP version is different than the last time 4137 * - a socket option with COA_HEADER_CHANGED has been set which 4138 * set conn_v6lastdst to zero. 4139 * 4140 * Otherwise the prepend function will just update the src, dst, 4141 * dstport, and flow label. 4142 */ 4143 if (is_system_labeled()) { 4144 /* TX MLP requires SCM_UCRED and don't have that here */ 4145 if (connp->conn_mlp_type != mlptSingle) { 4146 mutex_exit(&connp->conn_lock); 4147 error = ECONNREFUSED; 4148 goto ud_error; 4149 } 4150 /* 4151 * Check whether Trusted Solaris policy allows communication 4152 * with this host, and pretend that the destination is 4153 * unreachable if not. 4154 * Compute any needed label and place it in ipp_label_v4/v6. 4155 * 4156 * Later conn_build_hdr_template/conn_prepend_hdr takes 4157 * ipp_label_v4/v6 to form the packet. 4158 * 4159 * Tsol note: Since we hold conn_lock we know no other 4160 * thread manipulates conn_xmit_ipp. 4161 */ 4162 error = conn_update_label(connp, ixa, &v6dst, 4163 &connp->conn_xmit_ipp); 4164 if (error != 0) { 4165 mutex_exit(&connp->conn_lock); 4166 goto ud_error; 4167 } 4168 /* Rebuild the header template */ 4169 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4170 flowinfo); 4171 if (error != 0) { 4172 mutex_exit(&connp->conn_lock); 4173 goto ud_error; 4174 } 4175 } else if ((connp->conn_xmit_ipp.ipp_fields & 4176 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4177 ipversion != connp->conn_lastipversion || 4178 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4179 /* Rebuild the header template */ 4180 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4181 flowinfo); 4182 if (error != 0) { 4183 mutex_exit(&connp->conn_lock); 4184 goto ud_error; 4185 } 4186 } else { 4187 /* Simply update the destination address if no source route */ 4188 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4189 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4190 4191 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4192 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4193 ipha->ipha_fragment_offset_and_flags |= 4194 IPH_DF_HTONS; 4195 } else { 4196 ipha->ipha_fragment_offset_and_flags &= 4197 ~IPH_DF_HTONS; 4198 } 4199 } else { 4200 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4201 ip6h->ip6_dst = v6dst; 4202 } 4203 } 4204 4205 /* 4206 * Remember the dst/dstport etc which corresponds to the built header 4207 * template and conn_ixa. 4208 */ 4209 oldixa = conn_replace_ixa(connp, ixa); 4210 connp->conn_v6lastdst = v6dst; 4211 connp->conn_lastipversion = ipversion; 4212 connp->conn_lastdstport = dstport; 4213 connp->conn_lastflowinfo = flowinfo; 4214 connp->conn_lastscopeid = ixa->ixa_scopeid; 4215 connp->conn_lastsrcid = srcid; 4216 /* Also remember a source to use together with lastdst */ 4217 connp->conn_v6lastsrc = v6src; 4218 4219 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4220 dstport, flowinfo, &error); 4221 4222 /* Done with conn_t */ 4223 mutex_exit(&connp->conn_lock); 4224 ixa_refrele(oldixa); 4225 4226 if (data_mp == NULL) { 4227 ASSERT(error != 0); 4228 goto ud_error; 4229 } 4230 4231 /* We're done. Pass the packet to ip. */ 4232 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4233 4234 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 4235 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 4236 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 4237 4238 error = conn_ip_output(data_mp, ixa); 4239 /* No udpOutErrors if an error since IP increases its error counter */ 4240 switch (error) { 4241 case 0: 4242 break; 4243 case EWOULDBLOCK: 4244 (void) ixa_check_drain_insert(connp, ixa); 4245 error = 0; 4246 break; 4247 case EADDRNOTAVAIL: 4248 /* 4249 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4250 * Don't have the application see that errno 4251 */ 4252 error = ENETUNREACH; 4253 /* FALLTHRU */ 4254 default: 4255 mutex_enter(&connp->conn_lock); 4256 /* 4257 * Clear the source and v6lastdst so we call ip_attr_connect 4258 * for the next packet and try to pick a better source. 4259 */ 4260 if (connp->conn_mcbc_bind) 4261 connp->conn_saddr_v6 = ipv6_all_zeros; 4262 else 4263 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4264 connp->conn_v6lastdst = ipv6_all_zeros; 4265 mutex_exit(&connp->conn_lock); 4266 break; 4267 } 4268 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4269 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4270 ixa->ixa_cpid = connp->conn_cpid; 4271 ixa_refrele(ixa); 4272 return (error); 4273 4274 ud_error: 4275 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4276 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4277 ixa->ixa_cpid = connp->conn_cpid; 4278 ixa_refrele(ixa); 4279 4280 freemsg(data_mp); 4281 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4282 UDP_STAT(us, udp_out_err_output); 4283 return (error); 4284 } 4285 4286 /* ARGSUSED */ 4287 static void 4288 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4289 { 4290 #ifdef DEBUG 4291 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4292 #endif 4293 freemsg(mp); 4294 } 4295 4296 4297 /* 4298 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4299 */ 4300 static void 4301 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4302 { 4303 void *data; 4304 mblk_t *datamp = mp->b_cont; 4305 conn_t *connp = Q_TO_CONN(q); 4306 udp_t *udp = connp->conn_udp; 4307 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4308 4309 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4310 cmdp->cb_error = EPROTO; 4311 qreply(q, mp); 4312 return; 4313 } 4314 data = datamp->b_rptr; 4315 4316 mutex_enter(&connp->conn_lock); 4317 switch (cmdp->cb_cmd) { 4318 case TI_GETPEERNAME: 4319 if (udp->udp_state != TS_DATA_XFER) 4320 cmdp->cb_error = ENOTCONN; 4321 else 4322 cmdp->cb_error = conn_getpeername(connp, data, 4323 &cmdp->cb_len); 4324 break; 4325 case TI_GETMYNAME: 4326 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4327 break; 4328 default: 4329 cmdp->cb_error = EINVAL; 4330 break; 4331 } 4332 mutex_exit(&connp->conn_lock); 4333 4334 qreply(q, mp); 4335 } 4336 4337 static void 4338 udp_use_pure_tpi(udp_t *udp) 4339 { 4340 conn_t *connp = udp->udp_connp; 4341 4342 mutex_enter(&connp->conn_lock); 4343 udp->udp_issocket = B_FALSE; 4344 mutex_exit(&connp->conn_lock); 4345 UDP_STAT(udp->udp_us, udp_sock_fallback); 4346 } 4347 4348 static void 4349 udp_wput_other(queue_t *q, mblk_t *mp) 4350 { 4351 uchar_t *rptr = mp->b_rptr; 4352 struct iocblk *iocp; 4353 conn_t *connp = Q_TO_CONN(q); 4354 udp_t *udp = connp->conn_udp; 4355 cred_t *cr; 4356 4357 switch (mp->b_datap->db_type) { 4358 case M_CMD: 4359 udp_wput_cmdblk(q, mp); 4360 return; 4361 4362 case M_PROTO: 4363 case M_PCPROTO: 4364 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4365 /* 4366 * If the message does not contain a PRIM_type, 4367 * throw it away. 4368 */ 4369 freemsg(mp); 4370 return; 4371 } 4372 switch (((t_primp_t)rptr)->type) { 4373 case T_ADDR_REQ: 4374 udp_addr_req(q, mp); 4375 return; 4376 case O_T_BIND_REQ: 4377 case T_BIND_REQ: 4378 udp_tpi_bind(q, mp); 4379 return; 4380 case T_CONN_REQ: 4381 udp_tpi_connect(q, mp); 4382 return; 4383 case T_CAPABILITY_REQ: 4384 udp_capability_req(q, mp); 4385 return; 4386 case T_INFO_REQ: 4387 udp_info_req(q, mp); 4388 return; 4389 case T_UNITDATA_REQ: 4390 /* 4391 * If a T_UNITDATA_REQ gets here, the address must 4392 * be bad. Valid T_UNITDATA_REQs are handled 4393 * in udp_wput. 4394 */ 4395 udp_ud_err(q, mp, EADDRNOTAVAIL); 4396 return; 4397 case T_UNBIND_REQ: 4398 udp_tpi_unbind(q, mp); 4399 return; 4400 case T_SVR4_OPTMGMT_REQ: 4401 /* 4402 * All Solaris components should pass a db_credp 4403 * for this TPI message, hence we ASSERT. 4404 * But in case there is some other M_PROTO that looks 4405 * like a TPI message sent by some other kernel 4406 * component, we check and return an error. 4407 */ 4408 cr = msg_getcred(mp, NULL); 4409 ASSERT(cr != NULL); 4410 if (cr == NULL) { 4411 udp_err_ack(q, mp, TSYSERR, EINVAL); 4412 return; 4413 } 4414 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4415 cr)) { 4416 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4417 } 4418 return; 4419 4420 case T_OPTMGMT_REQ: 4421 /* 4422 * All Solaris components should pass a db_credp 4423 * for this TPI message, hence we ASSERT. 4424 * But in case there is some other M_PROTO that looks 4425 * like a TPI message sent by some other kernel 4426 * component, we check and return an error. 4427 */ 4428 cr = msg_getcred(mp, NULL); 4429 ASSERT(cr != NULL); 4430 if (cr == NULL) { 4431 udp_err_ack(q, mp, TSYSERR, EINVAL); 4432 return; 4433 } 4434 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4435 return; 4436 4437 case T_DISCON_REQ: 4438 udp_tpi_disconnect(q, mp); 4439 return; 4440 4441 /* The following TPI message is not supported by udp. */ 4442 case O_T_CONN_RES: 4443 case T_CONN_RES: 4444 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4445 return; 4446 4447 /* The following 3 TPI requests are illegal for udp. */ 4448 case T_DATA_REQ: 4449 case T_EXDATA_REQ: 4450 case T_ORDREL_REQ: 4451 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4452 return; 4453 default: 4454 break; 4455 } 4456 break; 4457 case M_FLUSH: 4458 if (*rptr & FLUSHW) 4459 flushq(q, FLUSHDATA); 4460 break; 4461 case M_IOCTL: 4462 iocp = (struct iocblk *)mp->b_rptr; 4463 switch (iocp->ioc_cmd) { 4464 case TI_GETPEERNAME: 4465 if (udp->udp_state != TS_DATA_XFER) { 4466 /* 4467 * If a default destination address has not 4468 * been associated with the stream, then we 4469 * don't know the peer's name. 4470 */ 4471 iocp->ioc_error = ENOTCONN; 4472 iocp->ioc_count = 0; 4473 mp->b_datap->db_type = M_IOCACK; 4474 qreply(q, mp); 4475 return; 4476 } 4477 /* FALLTHRU */ 4478 case TI_GETMYNAME: 4479 /* 4480 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4481 * need to copyin the user's strbuf structure. 4482 * Processing will continue in the M_IOCDATA case 4483 * below. 4484 */ 4485 mi_copyin(q, mp, NULL, 4486 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4487 return; 4488 case _SIOCSOCKFALLBACK: 4489 /* 4490 * Either sockmod is about to be popped and the 4491 * socket would now be treated as a plain stream, 4492 * or a module is about to be pushed so we have 4493 * to follow pure TPI semantics. 4494 */ 4495 if (!udp->udp_issocket) { 4496 DB_TYPE(mp) = M_IOCNAK; 4497 iocp->ioc_error = EINVAL; 4498 } else { 4499 udp_use_pure_tpi(udp); 4500 4501 DB_TYPE(mp) = M_IOCACK; 4502 iocp->ioc_error = 0; 4503 } 4504 iocp->ioc_count = 0; 4505 iocp->ioc_rval = 0; 4506 qreply(q, mp); 4507 return; 4508 default: 4509 break; 4510 } 4511 break; 4512 case M_IOCDATA: 4513 udp_wput_iocdata(q, mp); 4514 return; 4515 default: 4516 /* Unrecognized messages are passed through without change. */ 4517 break; 4518 } 4519 ip_wput_nondata(q, mp); 4520 } 4521 4522 /* 4523 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4524 * messages. 4525 */ 4526 static void 4527 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4528 { 4529 mblk_t *mp1; 4530 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4531 STRUCT_HANDLE(strbuf, sb); 4532 uint_t addrlen; 4533 conn_t *connp = Q_TO_CONN(q); 4534 udp_t *udp = connp->conn_udp; 4535 4536 /* Make sure it is one of ours. */ 4537 switch (iocp->ioc_cmd) { 4538 case TI_GETMYNAME: 4539 case TI_GETPEERNAME: 4540 break; 4541 default: 4542 ip_wput_nondata(q, mp); 4543 return; 4544 } 4545 4546 switch (mi_copy_state(q, mp, &mp1)) { 4547 case -1: 4548 return; 4549 case MI_COPY_CASE(MI_COPY_IN, 1): 4550 break; 4551 case MI_COPY_CASE(MI_COPY_OUT, 1): 4552 /* 4553 * The address has been copied out, so now 4554 * copyout the strbuf. 4555 */ 4556 mi_copyout(q, mp); 4557 return; 4558 case MI_COPY_CASE(MI_COPY_OUT, 2): 4559 /* 4560 * The address and strbuf have been copied out. 4561 * We're done, so just acknowledge the original 4562 * M_IOCTL. 4563 */ 4564 mi_copy_done(q, mp, 0); 4565 return; 4566 default: 4567 /* 4568 * Something strange has happened, so acknowledge 4569 * the original M_IOCTL with an EPROTO error. 4570 */ 4571 mi_copy_done(q, mp, EPROTO); 4572 return; 4573 } 4574 4575 /* 4576 * Now we have the strbuf structure for TI_GETMYNAME 4577 * and TI_GETPEERNAME. Next we copyout the requested 4578 * address and then we'll copyout the strbuf. 4579 */ 4580 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4581 4582 if (connp->conn_family == AF_INET) 4583 addrlen = sizeof (sin_t); 4584 else 4585 addrlen = sizeof (sin6_t); 4586 4587 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4588 mi_copy_done(q, mp, EINVAL); 4589 return; 4590 } 4591 4592 switch (iocp->ioc_cmd) { 4593 case TI_GETMYNAME: 4594 break; 4595 case TI_GETPEERNAME: 4596 if (udp->udp_state != TS_DATA_XFER) { 4597 mi_copy_done(q, mp, ENOTCONN); 4598 return; 4599 } 4600 break; 4601 } 4602 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4603 if (!mp1) 4604 return; 4605 4606 STRUCT_FSET(sb, len, addrlen); 4607 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4608 case TI_GETMYNAME: 4609 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4610 &addrlen); 4611 break; 4612 case TI_GETPEERNAME: 4613 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4614 &addrlen); 4615 break; 4616 } 4617 mp1->b_wptr += addrlen; 4618 /* Copy out the address */ 4619 mi_copyout(q, mp); 4620 } 4621 4622 void 4623 udp_ddi_g_init(void) 4624 { 4625 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4626 udp_opt_obj.odb_opt_arr_cnt); 4627 4628 /* 4629 * We want to be informed each time a stack is created or 4630 * destroyed in the kernel, so we can maintain the 4631 * set of udp_stack_t's. 4632 */ 4633 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4634 } 4635 4636 void 4637 udp_ddi_g_destroy(void) 4638 { 4639 netstack_unregister(NS_UDP); 4640 } 4641 4642 #define INET_NAME "ip" 4643 4644 /* 4645 * Initialize the UDP stack instance. 4646 */ 4647 static void * 4648 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4649 { 4650 udp_stack_t *us; 4651 int i; 4652 int error = 0; 4653 major_t major; 4654 size_t arrsz; 4655 4656 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4657 us->us_netstack = ns; 4658 4659 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4660 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4661 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4662 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4663 4664 /* 4665 * The smallest anonymous port in the priviledged port range which UDP 4666 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4667 */ 4668 us->us_min_anonpriv_port = 512; 4669 4670 us->us_bind_fanout_size = udp_bind_fanout_size; 4671 4672 /* Roundup variable that might have been modified in /etc/system */ 4673 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4674 /* Not a power of two. Round up to nearest power of two */ 4675 for (i = 0; i < 31; i++) { 4676 if (us->us_bind_fanout_size < (1 << i)) 4677 break; 4678 } 4679 us->us_bind_fanout_size = 1 << i; 4680 } 4681 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4682 sizeof (udp_fanout_t), KM_SLEEP); 4683 for (i = 0; i < us->us_bind_fanout_size; i++) { 4684 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4685 NULL); 4686 } 4687 4688 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4689 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4690 KM_SLEEP); 4691 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4692 4693 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4694 us->us_mibkp = udp_kstat_init(stackid); 4695 4696 major = mod_name_to_major(INET_NAME); 4697 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4698 ASSERT(error == 0); 4699 return (us); 4700 } 4701 4702 /* 4703 * Free the UDP stack instance. 4704 */ 4705 static void 4706 udp_stack_fini(netstackid_t stackid, void *arg) 4707 { 4708 udp_stack_t *us = (udp_stack_t *)arg; 4709 int i; 4710 4711 for (i = 0; i < us->us_bind_fanout_size; i++) { 4712 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4713 } 4714 4715 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4716 sizeof (udp_fanout_t)); 4717 4718 us->us_bind_fanout = NULL; 4719 4720 kmem_free(us->us_propinfo_tbl, 4721 udp_propinfo_count * sizeof (mod_prop_info_t)); 4722 us->us_propinfo_tbl = NULL; 4723 4724 udp_kstat_fini(stackid, us->us_mibkp); 4725 us->us_mibkp = NULL; 4726 4727 udp_kstat2_fini(stackid, us->us_kstat); 4728 us->us_kstat = NULL; 4729 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4730 4731 mutex_destroy(&us->us_epriv_port_lock); 4732 ldi_ident_release(us->us_ldi_ident); 4733 kmem_free(us, sizeof (*us)); 4734 } 4735 4736 static void * 4737 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4738 { 4739 kstat_t *ksp; 4740 4741 udp_stat_t template = { 4742 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4743 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4744 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4745 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4746 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4747 #ifdef DEBUG 4748 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4749 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4750 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4751 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4752 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4753 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4754 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4755 #endif 4756 }; 4757 4758 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4759 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4760 KSTAT_FLAG_VIRTUAL, stackid); 4761 4762 if (ksp == NULL) 4763 return (NULL); 4764 4765 bcopy(&template, us_statisticsp, sizeof (template)); 4766 ksp->ks_data = (void *)us_statisticsp; 4767 ksp->ks_private = (void *)(uintptr_t)stackid; 4768 4769 kstat_install(ksp); 4770 return (ksp); 4771 } 4772 4773 static void 4774 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4775 { 4776 if (ksp != NULL) { 4777 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4778 kstat_delete_netstack(ksp, stackid); 4779 } 4780 } 4781 4782 static void * 4783 udp_kstat_init(netstackid_t stackid) 4784 { 4785 kstat_t *ksp; 4786 4787 udp_named_kstat_t template = { 4788 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4789 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4790 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4791 { "entrySize", KSTAT_DATA_INT32, 0 }, 4792 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4793 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4794 }; 4795 4796 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4797 KSTAT_TYPE_NAMED, 4798 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4799 4800 if (ksp == NULL || ksp->ks_data == NULL) 4801 return (NULL); 4802 4803 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4804 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4805 4806 bcopy(&template, ksp->ks_data, sizeof (template)); 4807 ksp->ks_update = udp_kstat_update; 4808 ksp->ks_private = (void *)(uintptr_t)stackid; 4809 4810 kstat_install(ksp); 4811 return (ksp); 4812 } 4813 4814 static void 4815 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4816 { 4817 if (ksp != NULL) { 4818 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4819 kstat_delete_netstack(ksp, stackid); 4820 } 4821 } 4822 4823 static int 4824 udp_kstat_update(kstat_t *kp, int rw) 4825 { 4826 udp_named_kstat_t *udpkp; 4827 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 4828 netstack_t *ns; 4829 udp_stack_t *us; 4830 4831 if ((kp == NULL) || (kp->ks_data == NULL)) 4832 return (EIO); 4833 4834 if (rw == KSTAT_WRITE) 4835 return (EACCES); 4836 4837 ns = netstack_find_by_stackid(stackid); 4838 if (ns == NULL) 4839 return (-1); 4840 us = ns->netstack_udp; 4841 if (us == NULL) { 4842 netstack_rele(ns); 4843 return (-1); 4844 } 4845 udpkp = (udp_named_kstat_t *)kp->ks_data; 4846 4847 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 4848 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 4849 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 4850 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 4851 netstack_rele(ns); 4852 return (0); 4853 } 4854 4855 static size_t 4856 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4857 { 4858 udp_stack_t *us = udp->udp_us; 4859 4860 /* We add a bit of extra buffering */ 4861 size += size >> 1; 4862 if (size > us->us_max_buf) 4863 size = us->us_max_buf; 4864 4865 udp->udp_rcv_hiwat = size; 4866 return (size); 4867 } 4868 4869 /* 4870 * For the lower queue so that UDP can be a dummy mux. 4871 * Nobody should be sending 4872 * packets up this stream 4873 */ 4874 static void 4875 udp_lrput(queue_t *q, mblk_t *mp) 4876 { 4877 switch (mp->b_datap->db_type) { 4878 case M_FLUSH: 4879 /* Turn around */ 4880 if (*mp->b_rptr & FLUSHW) { 4881 *mp->b_rptr &= ~FLUSHR; 4882 qreply(q, mp); 4883 return; 4884 } 4885 break; 4886 } 4887 freemsg(mp); 4888 } 4889 4890 /* 4891 * For the lower queue so that UDP can be a dummy mux. 4892 * Nobody should be sending packets down this stream. 4893 */ 4894 /* ARGSUSED */ 4895 void 4896 udp_lwput(queue_t *q, mblk_t *mp) 4897 { 4898 freemsg(mp); 4899 } 4900 4901 /* 4902 * Below routines for UDP socket module. 4903 */ 4904 4905 static conn_t * 4906 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4907 { 4908 udp_t *udp; 4909 conn_t *connp; 4910 zoneid_t zoneid; 4911 netstack_t *ns; 4912 udp_stack_t *us; 4913 int len; 4914 4915 ASSERT(errorp != NULL); 4916 4917 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4918 return (NULL); 4919 4920 ns = netstack_find_by_cred(credp); 4921 ASSERT(ns != NULL); 4922 us = ns->netstack_udp; 4923 ASSERT(us != NULL); 4924 4925 /* 4926 * For exclusive stacks we set the zoneid to zero 4927 * to make UDP operate as if in the global zone. 4928 */ 4929 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4930 zoneid = GLOBAL_ZONEID; 4931 else 4932 zoneid = crgetzoneid(credp); 4933 4934 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4935 4936 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4937 if (connp == NULL) { 4938 netstack_rele(ns); 4939 *errorp = ENOMEM; 4940 return (NULL); 4941 } 4942 udp = connp->conn_udp; 4943 4944 /* 4945 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4946 * done by netstack_find_by_cred() 4947 */ 4948 netstack_rele(ns); 4949 4950 /* 4951 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4952 * need to lock anything. 4953 */ 4954 ASSERT(connp->conn_proto == IPPROTO_UDP); 4955 ASSERT(connp->conn_udp == udp); 4956 ASSERT(udp->udp_connp == connp); 4957 4958 /* Set the initial state of the stream and the privilege status. */ 4959 udp->udp_state = TS_UNBND; 4960 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4961 if (isv6) { 4962 connp->conn_family = AF_INET6; 4963 connp->conn_ipversion = IPV6_VERSION; 4964 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4965 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4966 len = sizeof (ip6_t) + UDPH_SIZE; 4967 } else { 4968 connp->conn_family = AF_INET; 4969 connp->conn_ipversion = IPV4_VERSION; 4970 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4971 connp->conn_default_ttl = us->us_ipv4_ttl; 4972 len = sizeof (ipha_t) + UDPH_SIZE; 4973 } 4974 4975 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4976 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4977 4978 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4979 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4980 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4981 connp->conn_ixa->ixa_zoneid = zoneid; 4982 4983 connp->conn_zoneid = zoneid; 4984 4985 /* 4986 * If the caller has the process-wide flag set, then default to MAC 4987 * exempt mode. This allows read-down to unlabeled hosts. 4988 */ 4989 if (getpflags(NET_MAC_AWARE, credp) != 0) 4990 connp->conn_mac_mode = CONN_MAC_AWARE; 4991 4992 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4993 4994 udp->udp_us = us; 4995 4996 connp->conn_rcvbuf = us->us_recv_hiwat; 4997 connp->conn_sndbuf = us->us_xmit_hiwat; 4998 connp->conn_sndlowat = us->us_xmit_lowat; 4999 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 5000 5001 connp->conn_wroff = len + us->us_wroff_extra; 5002 connp->conn_so_type = SOCK_DGRAM; 5003 5004 connp->conn_recv = udp_input; 5005 connp->conn_recvicmp = udp_icmp_input; 5006 crhold(credp); 5007 connp->conn_cred = credp; 5008 connp->conn_cpid = curproc->p_pid; 5009 connp->conn_open_time = ddi_get_lbolt64(); 5010 /* Cache things in ixa without an extra refhold */ 5011 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 5012 connp->conn_ixa->ixa_cred = connp->conn_cred; 5013 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5014 if (is_system_labeled()) 5015 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5016 5017 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5018 5019 if (us->us_pmtu_discovery) 5020 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5021 5022 return (connp); 5023 } 5024 5025 sock_lower_handle_t 5026 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5027 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5028 { 5029 udp_t *udp = NULL; 5030 udp_stack_t *us; 5031 conn_t *connp; 5032 boolean_t isv6; 5033 5034 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5035 (proto != 0 && proto != IPPROTO_UDP)) { 5036 *errorp = EPROTONOSUPPORT; 5037 return (NULL); 5038 } 5039 5040 if (family == AF_INET6) 5041 isv6 = B_TRUE; 5042 else 5043 isv6 = B_FALSE; 5044 5045 connp = udp_do_open(credp, isv6, flags, errorp); 5046 if (connp == NULL) 5047 return (NULL); 5048 5049 udp = connp->conn_udp; 5050 ASSERT(udp != NULL); 5051 us = udp->udp_us; 5052 ASSERT(us != NULL); 5053 5054 udp->udp_issocket = B_TRUE; 5055 connp->conn_flags |= IPCL_NONSTR; 5056 5057 /* 5058 * Set flow control 5059 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5060 * need to lock anything. 5061 */ 5062 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5063 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5064 5065 connp->conn_flow_cntrld = B_FALSE; 5066 5067 mutex_enter(&connp->conn_lock); 5068 connp->conn_state_flags &= ~CONN_INCIPIENT; 5069 mutex_exit(&connp->conn_lock); 5070 5071 *errorp = 0; 5072 *smodep = SM_ATOMIC; 5073 *sock_downcalls = &sock_udp_downcalls; 5074 return ((sock_lower_handle_t)connp); 5075 } 5076 5077 /* ARGSUSED3 */ 5078 void 5079 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5080 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5081 { 5082 conn_t *connp = (conn_t *)proto_handle; 5083 struct sock_proto_props sopp; 5084 5085 /* All Solaris components should pass a cred for this operation. */ 5086 ASSERT(cr != NULL); 5087 5088 connp->conn_upcalls = sock_upcalls; 5089 connp->conn_upper_handle = sock_handle; 5090 5091 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5092 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5093 sopp.sopp_wroff = connp->conn_wroff; 5094 sopp.sopp_maxblk = INFPSZ; 5095 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5096 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5097 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5098 sopp.sopp_maxpsz = 5099 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5100 UDP_MAXPACKET_IPV6; 5101 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5102 udp_mod_info.mi_minpsz; 5103 5104 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5105 &sopp); 5106 } 5107 5108 static void 5109 udp_do_close(conn_t *connp) 5110 { 5111 udp_t *udp; 5112 5113 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5114 udp = connp->conn_udp; 5115 5116 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5117 /* 5118 * Running in cluster mode - register unbind information 5119 */ 5120 if (connp->conn_ipversion == IPV4_VERSION) { 5121 (*cl_inet_unbind)( 5122 connp->conn_netstack->netstack_stackid, 5123 IPPROTO_UDP, AF_INET, 5124 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5125 (in_port_t)connp->conn_lport, NULL); 5126 } else { 5127 (*cl_inet_unbind)( 5128 connp->conn_netstack->netstack_stackid, 5129 IPPROTO_UDP, AF_INET6, 5130 (uint8_t *)&(connp->conn_laddr_v6), 5131 (in_port_t)connp->conn_lport, NULL); 5132 } 5133 } 5134 5135 udp_bind_hash_remove(udp, B_FALSE); 5136 5137 ip_quiesce_conn(connp); 5138 5139 if (!IPCL_IS_NONSTR(connp)) { 5140 ASSERT(connp->conn_wq != NULL); 5141 ASSERT(connp->conn_rq != NULL); 5142 qprocsoff(connp->conn_rq); 5143 } 5144 5145 udp_close_free(connp); 5146 5147 /* 5148 * Now we are truly single threaded on this stream, and can 5149 * delete the things hanging off the connp, and finally the connp. 5150 * We removed this connp from the fanout list, it cannot be 5151 * accessed thru the fanouts, and we already waited for the 5152 * conn_ref to drop to 0. We are already in close, so 5153 * there cannot be any other thread from the top. qprocsoff 5154 * has completed, and service has completed or won't run in 5155 * future. 5156 */ 5157 ASSERT(connp->conn_ref == 1); 5158 5159 if (!IPCL_IS_NONSTR(connp)) { 5160 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5161 } else { 5162 ip_free_helper_stream(connp); 5163 } 5164 5165 connp->conn_ref--; 5166 ipcl_conn_destroy(connp); 5167 } 5168 5169 /* ARGSUSED1 */ 5170 int 5171 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5172 { 5173 conn_t *connp = (conn_t *)proto_handle; 5174 5175 /* All Solaris components should pass a cred for this operation. */ 5176 ASSERT(cr != NULL); 5177 5178 udp_do_close(connp); 5179 return (0); 5180 } 5181 5182 static int 5183 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5184 boolean_t bind_to_req_port_only) 5185 { 5186 sin_t *sin; 5187 sin6_t *sin6; 5188 udp_t *udp = connp->conn_udp; 5189 int error = 0; 5190 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5191 in_port_t port; /* Host byte order */ 5192 in_port_t requested_port; /* Host byte order */ 5193 int count; 5194 ipaddr_t v4src; /* Set if AF_INET */ 5195 in6_addr_t v6src; 5196 int loopmax; 5197 udp_fanout_t *udpf; 5198 in_port_t lport; /* Network byte order */ 5199 uint_t scopeid = 0; 5200 zoneid_t zoneid = IPCL_ZONEID(connp); 5201 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5202 boolean_t is_inaddr_any; 5203 mlp_type_t addrtype, mlptype; 5204 udp_stack_t *us = udp->udp_us; 5205 5206 switch (len) { 5207 case sizeof (sin_t): /* Complete IPv4 address */ 5208 sin = (sin_t *)sa; 5209 5210 if (sin == NULL || !OK_32PTR((char *)sin)) 5211 return (EINVAL); 5212 5213 if (connp->conn_family != AF_INET || 5214 sin->sin_family != AF_INET) { 5215 return (EAFNOSUPPORT); 5216 } 5217 v4src = sin->sin_addr.s_addr; 5218 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5219 if (v4src != INADDR_ANY) { 5220 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5221 B_TRUE); 5222 } 5223 port = ntohs(sin->sin_port); 5224 break; 5225 5226 case sizeof (sin6_t): /* complete IPv6 address */ 5227 sin6 = (sin6_t *)sa; 5228 5229 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5230 return (EINVAL); 5231 5232 if (connp->conn_family != AF_INET6 || 5233 sin6->sin6_family != AF_INET6) { 5234 return (EAFNOSUPPORT); 5235 } 5236 v6src = sin6->sin6_addr; 5237 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5238 if (connp->conn_ipv6_v6only) 5239 return (EADDRNOTAVAIL); 5240 5241 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5242 if (v4src != INADDR_ANY) { 5243 laddr_type = ip_laddr_verify_v4(v4src, 5244 zoneid, ipst, B_FALSE); 5245 } 5246 } else { 5247 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5248 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5249 scopeid = sin6->sin6_scope_id; 5250 laddr_type = ip_laddr_verify_v6(&v6src, 5251 zoneid, ipst, B_TRUE, scopeid); 5252 } 5253 } 5254 port = ntohs(sin6->sin6_port); 5255 break; 5256 5257 default: /* Invalid request */ 5258 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5259 "udp_bind: bad ADDR_length length %u", len); 5260 return (-TBADADDR); 5261 } 5262 5263 /* Is the local address a valid unicast, multicast, or broadcast? */ 5264 if (laddr_type == IPVL_BAD) 5265 return (EADDRNOTAVAIL); 5266 5267 requested_port = port; 5268 5269 if (requested_port == 0 || !bind_to_req_port_only) 5270 bind_to_req_port_only = B_FALSE; 5271 else /* T_BIND_REQ and requested_port != 0 */ 5272 bind_to_req_port_only = B_TRUE; 5273 5274 if (requested_port == 0) { 5275 /* 5276 * If the application passed in zero for the port number, it 5277 * doesn't care which port number we bind to. Get one in the 5278 * valid range. 5279 */ 5280 if (connp->conn_anon_priv_bind) { 5281 port = udp_get_next_priv_port(udp); 5282 } else { 5283 port = udp_update_next_port(udp, 5284 us->us_next_port_to_try, B_TRUE); 5285 } 5286 } else { 5287 /* 5288 * If the port is in the well-known privileged range, 5289 * make sure the caller was privileged. 5290 */ 5291 int i; 5292 boolean_t priv = B_FALSE; 5293 5294 if (port < us->us_smallest_nonpriv_port) { 5295 priv = B_TRUE; 5296 } else { 5297 for (i = 0; i < us->us_num_epriv_ports; i++) { 5298 if (port == us->us_epriv_ports[i]) { 5299 priv = B_TRUE; 5300 break; 5301 } 5302 } 5303 } 5304 5305 if (priv) { 5306 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5307 return (-TACCES); 5308 } 5309 } 5310 5311 if (port == 0) 5312 return (-TNOADDR); 5313 5314 /* 5315 * The state must be TS_UNBND. TPI mandates that users must send 5316 * TPI primitives only 1 at a time and wait for the response before 5317 * sending the next primitive. 5318 */ 5319 mutex_enter(&connp->conn_lock); 5320 if (udp->udp_state != TS_UNBND) { 5321 mutex_exit(&connp->conn_lock); 5322 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5323 "udp_bind: bad state, %u", udp->udp_state); 5324 return (-TOUTSTATE); 5325 } 5326 /* 5327 * Copy the source address into our udp structure. This address 5328 * may still be zero; if so, IP will fill in the correct address 5329 * each time an outbound packet is passed to it. Since the udp is 5330 * not yet in the bind hash list, we don't grab the uf_lock to 5331 * change conn_ipversion 5332 */ 5333 if (connp->conn_family == AF_INET) { 5334 ASSERT(sin != NULL); 5335 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5336 } else { 5337 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5338 /* 5339 * no need to hold the uf_lock to set the conn_ipversion 5340 * since we are not yet in the fanout list 5341 */ 5342 connp->conn_ipversion = IPV4_VERSION; 5343 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5344 } else { 5345 connp->conn_ipversion = IPV6_VERSION; 5346 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5347 } 5348 } 5349 5350 /* 5351 * If conn_reuseaddr is not set, then we have to make sure that 5352 * the IP address and port number the application requested 5353 * (or we selected for the application) is not being used by 5354 * another stream. If another stream is already using the 5355 * requested IP address and port, the behavior depends on 5356 * "bind_to_req_port_only". If set the bind fails; otherwise we 5357 * search for any an unused port to bind to the stream. 5358 * 5359 * As per the BSD semantics, as modified by the Deering multicast 5360 * changes, if udp_reuseaddr is set, then we allow multiple binds 5361 * to the same port independent of the local IP address. 5362 * 5363 * This is slightly different than in SunOS 4.X which did not 5364 * support IP multicast. Note that the change implemented by the 5365 * Deering multicast code effects all binds - not only binding 5366 * to IP multicast addresses. 5367 * 5368 * Note that when binding to port zero we ignore SO_REUSEADDR in 5369 * order to guarantee a unique port. 5370 */ 5371 5372 count = 0; 5373 if (connp->conn_anon_priv_bind) { 5374 /* 5375 * loopmax = (IPPORT_RESERVED-1) - 5376 * us->us_min_anonpriv_port + 1 5377 */ 5378 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5379 } else { 5380 loopmax = us->us_largest_anon_port - 5381 us->us_smallest_anon_port + 1; 5382 } 5383 5384 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5385 5386 for (;;) { 5387 udp_t *udp1; 5388 boolean_t found_exclbind = B_FALSE; 5389 conn_t *connp1; 5390 5391 /* 5392 * Walk through the list of udp streams bound to 5393 * requested port with the same IP address. 5394 */ 5395 lport = htons(port); 5396 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5397 us->us_bind_fanout_size)]; 5398 mutex_enter(&udpf->uf_lock); 5399 for (udp1 = udpf->uf_udp; udp1 != NULL; 5400 udp1 = udp1->udp_bind_hash) { 5401 connp1 = udp1->udp_connp; 5402 5403 if (lport != connp1->conn_lport) 5404 continue; 5405 5406 /* 5407 * On a labeled system, we must treat bindings to ports 5408 * on shared IP addresses by sockets with MAC exemption 5409 * privilege as being in all zones, as there's 5410 * otherwise no way to identify the right receiver. 5411 */ 5412 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5413 continue; 5414 5415 /* 5416 * If UDP_EXCLBIND is set for either the bound or 5417 * binding endpoint, the semantics of bind 5418 * is changed according to the following chart. 5419 * 5420 * spec = specified address (v4 or v6) 5421 * unspec = unspecified address (v4 or v6) 5422 * A = specified addresses are different for endpoints 5423 * 5424 * bound bind to allowed? 5425 * ------------------------------------- 5426 * unspec unspec no 5427 * unspec spec no 5428 * spec unspec no 5429 * spec spec yes if A 5430 * 5431 * For labeled systems, SO_MAC_EXEMPT behaves the same 5432 * as UDP_EXCLBIND, except that zoneid is ignored. 5433 */ 5434 if (connp1->conn_exclbind || connp->conn_exclbind || 5435 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5436 if (V6_OR_V4_INADDR_ANY( 5437 connp1->conn_bound_addr_v6) || 5438 is_inaddr_any || 5439 IN6_ARE_ADDR_EQUAL( 5440 &connp1->conn_bound_addr_v6, 5441 &v6src)) { 5442 found_exclbind = B_TRUE; 5443 break; 5444 } 5445 continue; 5446 } 5447 5448 /* 5449 * Check ipversion to allow IPv4 and IPv6 sockets to 5450 * have disjoint port number spaces. 5451 */ 5452 if (connp->conn_ipversion != connp1->conn_ipversion) { 5453 5454 /* 5455 * On the first time through the loop, if the 5456 * the user intentionally specified a 5457 * particular port number, then ignore any 5458 * bindings of the other protocol that may 5459 * conflict. This allows the user to bind IPv6 5460 * alone and get both v4 and v6, or bind both 5461 * both and get each seperately. On subsequent 5462 * times through the loop, we're checking a 5463 * port that we chose (not the user) and thus 5464 * we do not allow casual duplicate bindings. 5465 */ 5466 if (count == 0 && requested_port != 0) 5467 continue; 5468 } 5469 5470 /* 5471 * No difference depending on SO_REUSEADDR. 5472 * 5473 * If existing port is bound to a 5474 * non-wildcard IP address and 5475 * the requesting stream is bound to 5476 * a distinct different IP addresses 5477 * (non-wildcard, also), keep going. 5478 */ 5479 if (!is_inaddr_any && 5480 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5481 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5482 &v6src)) { 5483 continue; 5484 } 5485 break; 5486 } 5487 5488 if (!found_exclbind && 5489 (connp->conn_reuseaddr && requested_port != 0)) { 5490 break; 5491 } 5492 5493 if (udp1 == NULL) { 5494 /* 5495 * No other stream has this IP address 5496 * and port number. We can use it. 5497 */ 5498 break; 5499 } 5500 mutex_exit(&udpf->uf_lock); 5501 if (bind_to_req_port_only) { 5502 /* 5503 * We get here only when requested port 5504 * is bound (and only first of the for() 5505 * loop iteration). 5506 * 5507 * The semantics of this bind request 5508 * require it to fail so we return from 5509 * the routine (and exit the loop). 5510 * 5511 */ 5512 mutex_exit(&connp->conn_lock); 5513 return (-TADDRBUSY); 5514 } 5515 5516 if (connp->conn_anon_priv_bind) { 5517 port = udp_get_next_priv_port(udp); 5518 } else { 5519 if ((count == 0) && (requested_port != 0)) { 5520 /* 5521 * If the application wants us to find 5522 * a port, get one to start with. Set 5523 * requested_port to 0, so that we will 5524 * update us->us_next_port_to_try below. 5525 */ 5526 port = udp_update_next_port(udp, 5527 us->us_next_port_to_try, B_TRUE); 5528 requested_port = 0; 5529 } else { 5530 port = udp_update_next_port(udp, port + 1, 5531 B_FALSE); 5532 } 5533 } 5534 5535 if (port == 0 || ++count >= loopmax) { 5536 /* 5537 * We've tried every possible port number and 5538 * there are none available, so send an error 5539 * to the user. 5540 */ 5541 mutex_exit(&connp->conn_lock); 5542 return (-TNOADDR); 5543 } 5544 } 5545 5546 /* 5547 * Copy the source address into our udp structure. This address 5548 * may still be zero; if so, ip_attr_connect will fill in the correct 5549 * address when a packet is about to be sent. 5550 * If we are binding to a broadcast or multicast address then 5551 * we just set the conn_bound_addr since we don't want to use 5552 * that as the source address when sending. 5553 */ 5554 connp->conn_bound_addr_v6 = v6src; 5555 connp->conn_laddr_v6 = v6src; 5556 if (scopeid != 0) { 5557 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5558 connp->conn_ixa->ixa_scopeid = scopeid; 5559 connp->conn_incoming_ifindex = scopeid; 5560 } else { 5561 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5562 connp->conn_incoming_ifindex = connp->conn_bound_if; 5563 } 5564 5565 switch (laddr_type) { 5566 case IPVL_UNICAST_UP: 5567 case IPVL_UNICAST_DOWN: 5568 connp->conn_saddr_v6 = v6src; 5569 connp->conn_mcbc_bind = B_FALSE; 5570 break; 5571 case IPVL_MCAST: 5572 case IPVL_BCAST: 5573 /* ip_set_destination will pick a source address later */ 5574 connp->conn_saddr_v6 = ipv6_all_zeros; 5575 connp->conn_mcbc_bind = B_TRUE; 5576 break; 5577 } 5578 5579 /* Any errors after this point should use late_error */ 5580 connp->conn_lport = lport; 5581 5582 /* 5583 * Now reset the next anonymous port if the application requested 5584 * an anonymous port, or we handed out the next anonymous port. 5585 */ 5586 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5587 us->us_next_port_to_try = port + 1; 5588 } 5589 5590 /* Initialize the T_BIND_ACK. */ 5591 if (connp->conn_family == AF_INET) { 5592 sin->sin_port = connp->conn_lport; 5593 } else { 5594 sin6->sin6_port = connp->conn_lport; 5595 } 5596 udp->udp_state = TS_IDLE; 5597 udp_bind_hash_insert(udpf, udp); 5598 mutex_exit(&udpf->uf_lock); 5599 mutex_exit(&connp->conn_lock); 5600 5601 if (cl_inet_bind) { 5602 /* 5603 * Running in cluster mode - register bind information 5604 */ 5605 if (connp->conn_ipversion == IPV4_VERSION) { 5606 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5607 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5608 (in_port_t)connp->conn_lport, NULL); 5609 } else { 5610 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5611 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5612 (in_port_t)connp->conn_lport, NULL); 5613 } 5614 } 5615 5616 mutex_enter(&connp->conn_lock); 5617 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5618 if (is_system_labeled() && (!connp->conn_anon_port || 5619 connp->conn_anon_mlp)) { 5620 uint16_t mlpport; 5621 zone_t *zone; 5622 5623 zone = crgetzone(cr); 5624 connp->conn_mlp_type = 5625 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5626 mlptSingle; 5627 addrtype = tsol_mlp_addr_type( 5628 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5629 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5630 if (addrtype == mlptSingle) { 5631 error = -TNOADDR; 5632 mutex_exit(&connp->conn_lock); 5633 goto late_error; 5634 } 5635 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5636 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5637 addrtype); 5638 5639 /* 5640 * It is a coding error to attempt to bind an MLP port 5641 * without first setting SOL_SOCKET/SCM_UCRED. 5642 */ 5643 if (mlptype != mlptSingle && 5644 connp->conn_mlp_type == mlptSingle) { 5645 error = EINVAL; 5646 mutex_exit(&connp->conn_lock); 5647 goto late_error; 5648 } 5649 5650 /* 5651 * It is an access violation to attempt to bind an MLP port 5652 * without NET_BINDMLP privilege. 5653 */ 5654 if (mlptype != mlptSingle && 5655 secpolicy_net_bindmlp(cr) != 0) { 5656 if (connp->conn_debug) { 5657 (void) strlog(UDP_MOD_ID, 0, 1, 5658 SL_ERROR|SL_TRACE, 5659 "udp_bind: no priv for multilevel port %d", 5660 mlpport); 5661 } 5662 error = -TACCES; 5663 mutex_exit(&connp->conn_lock); 5664 goto late_error; 5665 } 5666 5667 /* 5668 * If we're specifically binding a shared IP address and the 5669 * port is MLP on shared addresses, then check to see if this 5670 * zone actually owns the MLP. Reject if not. 5671 */ 5672 if (mlptype == mlptShared && addrtype == mlptShared) { 5673 /* 5674 * No need to handle exclusive-stack zones since 5675 * ALL_ZONES only applies to the shared stack. 5676 */ 5677 zoneid_t mlpzone; 5678 5679 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5680 htons(mlpport)); 5681 if (connp->conn_zoneid != mlpzone) { 5682 if (connp->conn_debug) { 5683 (void) strlog(UDP_MOD_ID, 0, 1, 5684 SL_ERROR|SL_TRACE, 5685 "udp_bind: attempt to bind port " 5686 "%d on shared addr in zone %d " 5687 "(should be %d)", 5688 mlpport, connp->conn_zoneid, 5689 mlpzone); 5690 } 5691 error = -TACCES; 5692 mutex_exit(&connp->conn_lock); 5693 goto late_error; 5694 } 5695 } 5696 if (connp->conn_anon_port) { 5697 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5698 port, B_TRUE); 5699 if (error != 0) { 5700 if (connp->conn_debug) { 5701 (void) strlog(UDP_MOD_ID, 0, 1, 5702 SL_ERROR|SL_TRACE, 5703 "udp_bind: cannot establish anon " 5704 "MLP for port %d", port); 5705 } 5706 error = -TACCES; 5707 mutex_exit(&connp->conn_lock); 5708 goto late_error; 5709 } 5710 } 5711 connp->conn_mlp_type = mlptype; 5712 } 5713 5714 /* 5715 * We create an initial header template here to make a subsequent 5716 * sendto have a starting point. Since conn_last_dst is zero the 5717 * first sendto will always follow the 'dst changed' code path. 5718 * Note that we defer massaging options and the related checksum 5719 * adjustment until we have a destination address. 5720 */ 5721 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5722 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5723 if (error != 0) { 5724 mutex_exit(&connp->conn_lock); 5725 goto late_error; 5726 } 5727 /* Just in case */ 5728 connp->conn_faddr_v6 = ipv6_all_zeros; 5729 connp->conn_fport = 0; 5730 connp->conn_v6lastdst = ipv6_all_zeros; 5731 mutex_exit(&connp->conn_lock); 5732 5733 error = ip_laddr_fanout_insert(connp); 5734 if (error != 0) 5735 goto late_error; 5736 5737 /* Bind succeeded */ 5738 return (0); 5739 5740 late_error: 5741 /* We had already picked the port number, and then the bind failed */ 5742 mutex_enter(&connp->conn_lock); 5743 udpf = &us->us_bind_fanout[ 5744 UDP_BIND_HASH(connp->conn_lport, 5745 us->us_bind_fanout_size)]; 5746 mutex_enter(&udpf->uf_lock); 5747 connp->conn_saddr_v6 = ipv6_all_zeros; 5748 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5749 connp->conn_laddr_v6 = ipv6_all_zeros; 5750 if (scopeid != 0) { 5751 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5752 connp->conn_incoming_ifindex = connp->conn_bound_if; 5753 } 5754 udp->udp_state = TS_UNBND; 5755 udp_bind_hash_remove(udp, B_TRUE); 5756 connp->conn_lport = 0; 5757 mutex_exit(&udpf->uf_lock); 5758 connp->conn_anon_port = B_FALSE; 5759 connp->conn_mlp_type = mlptSingle; 5760 5761 connp->conn_v6lastdst = ipv6_all_zeros; 5762 5763 /* Restore the header that was built above - different source address */ 5764 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5765 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5766 mutex_exit(&connp->conn_lock); 5767 return (error); 5768 } 5769 5770 int 5771 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5772 socklen_t len, cred_t *cr) 5773 { 5774 int error; 5775 conn_t *connp; 5776 5777 /* All Solaris components should pass a cred for this operation. */ 5778 ASSERT(cr != NULL); 5779 5780 connp = (conn_t *)proto_handle; 5781 5782 if (sa == NULL) 5783 error = udp_do_unbind(connp); 5784 else 5785 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5786 5787 if (error < 0) { 5788 if (error == -TOUTSTATE) 5789 error = EINVAL; 5790 else 5791 error = proto_tlitosyserr(-error); 5792 } 5793 5794 return (error); 5795 } 5796 5797 static int 5798 udp_implicit_bind(conn_t *connp, cred_t *cr) 5799 { 5800 sin6_t sin6addr; 5801 sin_t *sin; 5802 sin6_t *sin6; 5803 socklen_t len; 5804 int error; 5805 5806 /* All Solaris components should pass a cred for this operation. */ 5807 ASSERT(cr != NULL); 5808 5809 if (connp->conn_family == AF_INET) { 5810 len = sizeof (struct sockaddr_in); 5811 sin = (sin_t *)&sin6addr; 5812 *sin = sin_null; 5813 sin->sin_family = AF_INET; 5814 sin->sin_addr.s_addr = INADDR_ANY; 5815 } else { 5816 ASSERT(connp->conn_family == AF_INET6); 5817 len = sizeof (sin6_t); 5818 sin6 = (sin6_t *)&sin6addr; 5819 *sin6 = sin6_null; 5820 sin6->sin6_family = AF_INET6; 5821 V6_SET_ZERO(sin6->sin6_addr); 5822 } 5823 5824 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5825 cr, B_FALSE); 5826 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5827 } 5828 5829 /* 5830 * This routine removes a port number association from a stream. It 5831 * is called by udp_unbind and udp_tpi_unbind. 5832 */ 5833 static int 5834 udp_do_unbind(conn_t *connp) 5835 { 5836 udp_t *udp = connp->conn_udp; 5837 udp_fanout_t *udpf; 5838 udp_stack_t *us = udp->udp_us; 5839 5840 if (cl_inet_unbind != NULL) { 5841 /* 5842 * Running in cluster mode - register unbind information 5843 */ 5844 if (connp->conn_ipversion == IPV4_VERSION) { 5845 (*cl_inet_unbind)( 5846 connp->conn_netstack->netstack_stackid, 5847 IPPROTO_UDP, AF_INET, 5848 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5849 (in_port_t)connp->conn_lport, NULL); 5850 } else { 5851 (*cl_inet_unbind)( 5852 connp->conn_netstack->netstack_stackid, 5853 IPPROTO_UDP, AF_INET6, 5854 (uint8_t *)&(connp->conn_laddr_v6), 5855 (in_port_t)connp->conn_lport, NULL); 5856 } 5857 } 5858 5859 mutex_enter(&connp->conn_lock); 5860 /* If a bind has not been done, we can't unbind. */ 5861 if (udp->udp_state == TS_UNBND) { 5862 mutex_exit(&connp->conn_lock); 5863 return (-TOUTSTATE); 5864 } 5865 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5866 us->us_bind_fanout_size)]; 5867 mutex_enter(&udpf->uf_lock); 5868 udp_bind_hash_remove(udp, B_TRUE); 5869 connp->conn_saddr_v6 = ipv6_all_zeros; 5870 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5871 connp->conn_laddr_v6 = ipv6_all_zeros; 5872 connp->conn_mcbc_bind = B_FALSE; 5873 connp->conn_lport = 0; 5874 /* In case we were also connected */ 5875 connp->conn_faddr_v6 = ipv6_all_zeros; 5876 connp->conn_fport = 0; 5877 mutex_exit(&udpf->uf_lock); 5878 5879 connp->conn_v6lastdst = ipv6_all_zeros; 5880 udp->udp_state = TS_UNBND; 5881 5882 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5883 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5884 mutex_exit(&connp->conn_lock); 5885 5886 ip_unbind(connp); 5887 5888 return (0); 5889 } 5890 5891 /* 5892 * It associates a default destination address with the stream. 5893 */ 5894 static int 5895 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5896 cred_t *cr, pid_t pid) 5897 { 5898 sin6_t *sin6; 5899 sin_t *sin; 5900 in6_addr_t v6dst; 5901 ipaddr_t v4dst; 5902 uint16_t dstport; 5903 uint32_t flowinfo; 5904 udp_fanout_t *udpf; 5905 udp_t *udp, *udp1; 5906 ushort_t ipversion; 5907 udp_stack_t *us; 5908 int error; 5909 conn_t *connp1; 5910 ip_xmit_attr_t *ixa; 5911 ip_xmit_attr_t *oldixa; 5912 uint_t scopeid = 0; 5913 uint_t srcid = 0; 5914 in6_addr_t v6src = connp->conn_saddr_v6; 5915 5916 udp = connp->conn_udp; 5917 us = udp->udp_us; 5918 5919 /* 5920 * Address has been verified by the caller 5921 */ 5922 switch (len) { 5923 default: 5924 /* 5925 * Should never happen 5926 */ 5927 return (EINVAL); 5928 5929 case sizeof (sin_t): 5930 sin = (sin_t *)sa; 5931 v4dst = sin->sin_addr.s_addr; 5932 dstport = sin->sin_port; 5933 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5934 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5935 ipversion = IPV4_VERSION; 5936 break; 5937 5938 case sizeof (sin6_t): 5939 sin6 = (sin6_t *)sa; 5940 v6dst = sin6->sin6_addr; 5941 dstport = sin6->sin6_port; 5942 srcid = sin6->__sin6_src_id; 5943 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5944 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5945 connp->conn_netstack); 5946 } 5947 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 5948 if (connp->conn_ipv6_v6only) 5949 return (EADDRNOTAVAIL); 5950 5951 /* 5952 * Destination adress is mapped IPv6 address. 5953 * Source bound address should be unspecified or 5954 * IPv6 mapped address as well. 5955 */ 5956 if (!IN6_IS_ADDR_UNSPECIFIED( 5957 &connp->conn_bound_addr_v6) && 5958 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5959 return (EADDRNOTAVAIL); 5960 } 5961 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5962 ipversion = IPV4_VERSION; 5963 flowinfo = 0; 5964 } else { 5965 ipversion = IPV6_VERSION; 5966 flowinfo = sin6->sin6_flowinfo; 5967 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5968 scopeid = sin6->sin6_scope_id; 5969 } 5970 break; 5971 } 5972 5973 if (dstport == 0) 5974 return (-TBADADDR); 5975 5976 /* 5977 * If there is a different thread using conn_ixa then we get a new 5978 * copy and cut the old one loose from conn_ixa. Otherwise we use 5979 * conn_ixa and prevent any other thread from using/changing it. 5980 * Once connect() is done other threads can use conn_ixa since the 5981 * refcnt will be back at one. 5982 * We defer updating conn_ixa until later to handle any concurrent 5983 * conn_ixa_cleanup thread. 5984 */ 5985 ixa = conn_get_ixa(connp, B_FALSE); 5986 if (ixa == NULL) 5987 return (ENOMEM); 5988 5989 ASSERT(ixa->ixa_refcnt >= 2); 5990 ASSERT(ixa == connp->conn_ixa); 5991 5992 mutex_enter(&connp->conn_lock); 5993 /* 5994 * This udp_t must have bound to a port already before doing a connect. 5995 * Reject if a connect is in progress (we drop conn_lock during 5996 * udp_do_connect). 5997 */ 5998 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5999 mutex_exit(&connp->conn_lock); 6000 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 6001 "udp_connect: bad state, %u", udp->udp_state); 6002 ixa_refrele(ixa); 6003 return (-TOUTSTATE); 6004 } 6005 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6006 6007 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6008 us->us_bind_fanout_size)]; 6009 6010 mutex_enter(&udpf->uf_lock); 6011 if (udp->udp_state == TS_DATA_XFER) { 6012 /* Already connected - clear out state */ 6013 if (connp->conn_mcbc_bind) 6014 connp->conn_saddr_v6 = ipv6_all_zeros; 6015 else 6016 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6017 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6018 connp->conn_faddr_v6 = ipv6_all_zeros; 6019 connp->conn_fport = 0; 6020 udp->udp_state = TS_IDLE; 6021 } 6022 6023 connp->conn_fport = dstport; 6024 connp->conn_ipversion = ipversion; 6025 if (ipversion == IPV4_VERSION) { 6026 /* 6027 * Interpret a zero destination to mean loopback. 6028 * Update the T_CONN_REQ (sin/sin6) since it is used to 6029 * generate the T_CONN_CON. 6030 */ 6031 if (v4dst == INADDR_ANY) { 6032 v4dst = htonl(INADDR_LOOPBACK); 6033 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6034 if (connp->conn_family == AF_INET) { 6035 sin->sin_addr.s_addr = v4dst; 6036 } else { 6037 sin6->sin6_addr = v6dst; 6038 } 6039 } 6040 connp->conn_faddr_v6 = v6dst; 6041 connp->conn_flowinfo = 0; 6042 } else { 6043 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6044 /* 6045 * Interpret a zero destination to mean loopback. 6046 * Update the T_CONN_REQ (sin/sin6) since it is used to 6047 * generate the T_CONN_CON. 6048 */ 6049 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6050 v6dst = ipv6_loopback; 6051 sin6->sin6_addr = v6dst; 6052 } 6053 connp->conn_faddr_v6 = v6dst; 6054 connp->conn_flowinfo = flowinfo; 6055 } 6056 mutex_exit(&udpf->uf_lock); 6057 6058 /* 6059 * We update our cred/cpid based on the caller of connect 6060 */ 6061 if (connp->conn_cred != cr) { 6062 crhold(cr); 6063 crfree(connp->conn_cred); 6064 connp->conn_cred = cr; 6065 } 6066 connp->conn_cpid = pid; 6067 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 6068 ixa->ixa_cred = cr; 6069 ixa->ixa_cpid = pid; 6070 if (is_system_labeled()) { 6071 /* We need to restart with a label based on the cred */ 6072 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6073 } 6074 6075 if (scopeid != 0) { 6076 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6077 ixa->ixa_scopeid = scopeid; 6078 connp->conn_incoming_ifindex = scopeid; 6079 } else { 6080 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6081 connp->conn_incoming_ifindex = connp->conn_bound_if; 6082 } 6083 /* 6084 * conn_connect will drop conn_lock and reacquire it. 6085 * To prevent a send* from messing with this udp_t while the lock 6086 * is dropped we set udp_state and clear conn_v6lastdst. 6087 * That will make all send* fail with EISCONN. 6088 */ 6089 connp->conn_v6lastdst = ipv6_all_zeros; 6090 udp->udp_state = TS_WCON_CREQ; 6091 6092 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6093 mutex_exit(&connp->conn_lock); 6094 if (error != 0) 6095 goto connect_failed; 6096 6097 /* 6098 * The addresses have been verified. Time to insert in 6099 * the correct fanout list. 6100 */ 6101 error = ipcl_conn_insert(connp); 6102 if (error != 0) 6103 goto connect_failed; 6104 6105 mutex_enter(&connp->conn_lock); 6106 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6107 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6108 if (error != 0) { 6109 mutex_exit(&connp->conn_lock); 6110 goto connect_failed; 6111 } 6112 6113 udp->udp_state = TS_DATA_XFER; 6114 /* Record this as the "last" send even though we haven't sent any */ 6115 connp->conn_v6lastdst = connp->conn_faddr_v6; 6116 connp->conn_lastipversion = connp->conn_ipversion; 6117 connp->conn_lastdstport = connp->conn_fport; 6118 connp->conn_lastflowinfo = connp->conn_flowinfo; 6119 connp->conn_lastscopeid = scopeid; 6120 connp->conn_lastsrcid = srcid; 6121 /* Also remember a source to use together with lastdst */ 6122 connp->conn_v6lastsrc = v6src; 6123 6124 oldixa = conn_replace_ixa(connp, ixa); 6125 mutex_exit(&connp->conn_lock); 6126 ixa_refrele(oldixa); 6127 6128 /* 6129 * We've picked a source address above. Now we can 6130 * verify that the src/port/dst/port is unique for all 6131 * connections in TS_DATA_XFER, skipping ourselves. 6132 */ 6133 mutex_enter(&udpf->uf_lock); 6134 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6135 if (udp1->udp_state != TS_DATA_XFER) 6136 continue; 6137 6138 if (udp1 == udp) 6139 continue; 6140 6141 connp1 = udp1->udp_connp; 6142 if (connp->conn_lport != connp1->conn_lport || 6143 connp->conn_ipversion != connp1->conn_ipversion || 6144 dstport != connp1->conn_fport || 6145 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6146 &connp1->conn_laddr_v6) || 6147 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6148 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6149 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6150 continue; 6151 mutex_exit(&udpf->uf_lock); 6152 error = -TBADADDR; 6153 goto connect_failed; 6154 } 6155 if (cl_inet_connect2 != NULL) { 6156 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6157 if (error != 0) { 6158 mutex_exit(&udpf->uf_lock); 6159 error = -TBADADDR; 6160 goto connect_failed; 6161 } 6162 } 6163 mutex_exit(&udpf->uf_lock); 6164 6165 ixa_refrele(ixa); 6166 return (0); 6167 6168 connect_failed: 6169 if (ixa != NULL) 6170 ixa_refrele(ixa); 6171 mutex_enter(&connp->conn_lock); 6172 mutex_enter(&udpf->uf_lock); 6173 udp->udp_state = TS_IDLE; 6174 connp->conn_faddr_v6 = ipv6_all_zeros; 6175 connp->conn_fport = 0; 6176 /* In case the source address was set above */ 6177 if (connp->conn_mcbc_bind) 6178 connp->conn_saddr_v6 = ipv6_all_zeros; 6179 else 6180 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6181 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6182 mutex_exit(&udpf->uf_lock); 6183 6184 connp->conn_v6lastdst = ipv6_all_zeros; 6185 connp->conn_flowinfo = 0; 6186 6187 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6188 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6189 mutex_exit(&connp->conn_lock); 6190 return (error); 6191 } 6192 6193 static int 6194 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6195 socklen_t len, sock_connid_t *id, cred_t *cr) 6196 { 6197 conn_t *connp = (conn_t *)proto_handle; 6198 udp_t *udp = connp->conn_udp; 6199 int error; 6200 boolean_t did_bind = B_FALSE; 6201 pid_t pid = curproc->p_pid; 6202 6203 /* All Solaris components should pass a cred for this operation. */ 6204 ASSERT(cr != NULL); 6205 6206 if (sa == NULL) { 6207 /* 6208 * Disconnect 6209 * Make sure we are connected 6210 */ 6211 if (udp->udp_state != TS_DATA_XFER) 6212 return (EINVAL); 6213 6214 error = udp_disconnect(connp); 6215 return (error); 6216 } 6217 6218 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6219 if (error != 0) 6220 goto done; 6221 6222 /* do an implicit bind if necessary */ 6223 if (udp->udp_state == TS_UNBND) { 6224 error = udp_implicit_bind(connp, cr); 6225 /* 6226 * We could be racing with an actual bind, in which case 6227 * we would see EPROTO. We cross our fingers and try 6228 * to connect. 6229 */ 6230 if (!(error == 0 || error == EPROTO)) 6231 goto done; 6232 did_bind = B_TRUE; 6233 } 6234 /* 6235 * set SO_DGRAM_ERRIND 6236 */ 6237 connp->conn_dgram_errind = B_TRUE; 6238 6239 error = udp_do_connect(connp, sa, len, cr, pid); 6240 6241 if (error != 0 && did_bind) { 6242 int unbind_err; 6243 6244 unbind_err = udp_do_unbind(connp); 6245 ASSERT(unbind_err == 0); 6246 } 6247 6248 if (error == 0) { 6249 *id = 0; 6250 (*connp->conn_upcalls->su_connected) 6251 (connp->conn_upper_handle, 0, NULL, -1); 6252 } else if (error < 0) { 6253 error = proto_tlitosyserr(-error); 6254 } 6255 6256 done: 6257 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6258 /* 6259 * No need to hold locks to set state 6260 * after connect failure socket state is undefined 6261 * We set the state only to imitate old sockfs behavior 6262 */ 6263 udp->udp_state = TS_IDLE; 6264 } 6265 return (error); 6266 } 6267 6268 int 6269 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6270 cred_t *cr) 6271 { 6272 sin6_t *sin6; 6273 sin_t *sin = NULL; 6274 uint_t srcid; 6275 conn_t *connp = (conn_t *)proto_handle; 6276 udp_t *udp = connp->conn_udp; 6277 int error = 0; 6278 udp_stack_t *us = udp->udp_us; 6279 ushort_t ipversion; 6280 pid_t pid = curproc->p_pid; 6281 ip_xmit_attr_t *ixa; 6282 6283 ASSERT(DB_TYPE(mp) == M_DATA); 6284 6285 /* All Solaris components should pass a cred for this operation. */ 6286 ASSERT(cr != NULL); 6287 6288 /* do an implicit bind if necessary */ 6289 if (udp->udp_state == TS_UNBND) { 6290 error = udp_implicit_bind(connp, cr); 6291 /* 6292 * We could be racing with an actual bind, in which case 6293 * we would see EPROTO. We cross our fingers and try 6294 * to connect. 6295 */ 6296 if (!(error == 0 || error == EPROTO)) { 6297 freemsg(mp); 6298 return (error); 6299 } 6300 } 6301 6302 /* Connected? */ 6303 if (msg->msg_name == NULL) { 6304 if (udp->udp_state != TS_DATA_XFER) { 6305 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6306 return (EDESTADDRREQ); 6307 } 6308 if (msg->msg_controllen != 0) { 6309 error = udp_output_ancillary(connp, NULL, NULL, mp, 6310 NULL, msg, cr, pid); 6311 } else { 6312 error = udp_output_connected(connp, mp, cr, pid); 6313 } 6314 if (us->us_sendto_ignerr) 6315 return (0); 6316 else 6317 return (error); 6318 } 6319 if (udp->udp_state == TS_DATA_XFER) { 6320 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6321 return (EISCONN); 6322 } 6323 error = proto_verify_ip_addr(connp->conn_family, 6324 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6325 if (error != 0) { 6326 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6327 return (error); 6328 } 6329 switch (connp->conn_family) { 6330 case AF_INET6: 6331 sin6 = (sin6_t *)msg->msg_name; 6332 6333 srcid = sin6->__sin6_src_id; 6334 6335 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6336 /* 6337 * Destination is a non-IPv4-compatible IPv6 address. 6338 * Send out an IPv6 format packet. 6339 */ 6340 6341 /* 6342 * If the local address is a mapped address return 6343 * an error. 6344 * It would be possible to send an IPv6 packet but the 6345 * response would never make it back to the application 6346 * since it is bound to a mapped address. 6347 */ 6348 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6349 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6350 return (EADDRNOTAVAIL); 6351 } 6352 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6353 sin6->sin6_addr = ipv6_loopback; 6354 ipversion = IPV6_VERSION; 6355 } else { 6356 if (connp->conn_ipv6_v6only) { 6357 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6358 return (EADDRNOTAVAIL); 6359 } 6360 6361 /* 6362 * If the local address is not zero or a mapped address 6363 * return an error. It would be possible to send an 6364 * IPv4 packet but the response would never make it 6365 * back to the application since it is bound to a 6366 * non-mapped address. 6367 */ 6368 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6369 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6370 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6371 return (EADDRNOTAVAIL); 6372 } 6373 6374 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6375 V4_PART_OF_V6(sin6->sin6_addr) = 6376 htonl(INADDR_LOOPBACK); 6377 } 6378 ipversion = IPV4_VERSION; 6379 } 6380 6381 /* 6382 * We have to allocate an ip_xmit_attr_t before we grab 6383 * conn_lock and we need to hold conn_lock once we've check 6384 * conn_same_as_last_v6 to handle concurrent send* calls on a 6385 * socket. 6386 */ 6387 if (msg->msg_controllen == 0) { 6388 ixa = conn_get_ixa(connp, B_FALSE); 6389 if (ixa == NULL) { 6390 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6391 return (ENOMEM); 6392 } 6393 } else { 6394 ixa = NULL; 6395 } 6396 mutex_enter(&connp->conn_lock); 6397 if (udp->udp_delayed_error != 0) { 6398 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6399 6400 error = udp->udp_delayed_error; 6401 udp->udp_delayed_error = 0; 6402 6403 /* Compare IP address, port, and family */ 6404 6405 if (sin6->sin6_port == sin2->sin6_port && 6406 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6407 &sin2->sin6_addr) && 6408 sin6->sin6_family == sin2->sin6_family) { 6409 mutex_exit(&connp->conn_lock); 6410 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6411 if (ixa != NULL) 6412 ixa_refrele(ixa); 6413 return (error); 6414 } 6415 } 6416 6417 if (msg->msg_controllen != 0) { 6418 mutex_exit(&connp->conn_lock); 6419 ASSERT(ixa == NULL); 6420 error = udp_output_ancillary(connp, NULL, sin6, mp, 6421 NULL, msg, cr, pid); 6422 } else if (conn_same_as_last_v6(connp, sin6) && 6423 connp->conn_lastsrcid == srcid && 6424 ipsec_outbound_policy_current(ixa)) { 6425 /* udp_output_lastdst drops conn_lock */ 6426 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6427 } else { 6428 /* udp_output_newdst drops conn_lock */ 6429 error = udp_output_newdst(connp, mp, NULL, sin6, 6430 ipversion, cr, pid, ixa); 6431 } 6432 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6433 if (us->us_sendto_ignerr) 6434 return (0); 6435 else 6436 return (error); 6437 case AF_INET: 6438 sin = (sin_t *)msg->msg_name; 6439 6440 ipversion = IPV4_VERSION; 6441 6442 if (sin->sin_addr.s_addr == INADDR_ANY) 6443 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6444 6445 /* 6446 * We have to allocate an ip_xmit_attr_t before we grab 6447 * conn_lock and we need to hold conn_lock once we've check 6448 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6449 */ 6450 if (msg->msg_controllen == 0) { 6451 ixa = conn_get_ixa(connp, B_FALSE); 6452 if (ixa == NULL) { 6453 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6454 return (ENOMEM); 6455 } 6456 } else { 6457 ixa = NULL; 6458 } 6459 mutex_enter(&connp->conn_lock); 6460 if (udp->udp_delayed_error != 0) { 6461 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6462 6463 error = udp->udp_delayed_error; 6464 udp->udp_delayed_error = 0; 6465 6466 /* Compare IP address and port */ 6467 6468 if (sin->sin_port == sin2->sin_port && 6469 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6470 mutex_exit(&connp->conn_lock); 6471 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6472 if (ixa != NULL) 6473 ixa_refrele(ixa); 6474 return (error); 6475 } 6476 } 6477 if (msg->msg_controllen != 0) { 6478 mutex_exit(&connp->conn_lock); 6479 ASSERT(ixa == NULL); 6480 error = udp_output_ancillary(connp, sin, NULL, mp, 6481 NULL, msg, cr, pid); 6482 } else if (conn_same_as_last_v4(connp, sin) && 6483 ipsec_outbound_policy_current(ixa)) { 6484 /* udp_output_lastdst drops conn_lock */ 6485 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6486 } else { 6487 /* udp_output_newdst drops conn_lock */ 6488 error = udp_output_newdst(connp, mp, sin, NULL, 6489 ipversion, cr, pid, ixa); 6490 } 6491 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6492 if (us->us_sendto_ignerr) 6493 return (0); 6494 else 6495 return (error); 6496 default: 6497 return (EINVAL); 6498 } 6499 } 6500 6501 int 6502 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6503 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 6504 { 6505 conn_t *connp = (conn_t *)proto_handle; 6506 udp_t *udp; 6507 struct T_capability_ack tca; 6508 struct sockaddr_in6 laddr, faddr; 6509 socklen_t laddrlen, faddrlen; 6510 short opts; 6511 struct stroptions *stropt; 6512 mblk_t *stropt_mp; 6513 int error; 6514 6515 udp = connp->conn_udp; 6516 6517 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6518 6519 /* 6520 * setup the fallback stream that was allocated 6521 */ 6522 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6523 connp->conn_minor_arena = WR(q)->q_ptr; 6524 6525 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6526 6527 WR(q)->q_qinfo = &udp_winit; 6528 6529 connp->conn_rq = RD(q); 6530 connp->conn_wq = WR(q); 6531 6532 /* Notify stream head about options before sending up data */ 6533 stropt_mp->b_datap->db_type = M_SETOPTS; 6534 stropt_mp->b_wptr += sizeof (*stropt); 6535 stropt = (struct stroptions *)stropt_mp->b_rptr; 6536 stropt->so_flags = SO_WROFF | SO_HIWAT; 6537 stropt->so_wroff = connp->conn_wroff; 6538 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6539 putnext(RD(q), stropt_mp); 6540 6541 /* 6542 * Free the helper stream 6543 */ 6544 ip_free_helper_stream(connp); 6545 6546 if (!issocket) 6547 udp_use_pure_tpi(udp); 6548 6549 /* 6550 * Collect the information needed to sync with the sonode 6551 */ 6552 udp_do_capability_ack(udp, &tca, TC1_INFO); 6553 6554 laddrlen = faddrlen = sizeof (sin6_t); 6555 (void) udp_getsockname((sock_lower_handle_t)connp, 6556 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6557 error = udp_getpeername((sock_lower_handle_t)connp, 6558 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6559 if (error != 0) 6560 faddrlen = 0; 6561 6562 opts = 0; 6563 if (connp->conn_dgram_errind) 6564 opts |= SO_DGRAM_ERRIND; 6565 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6566 opts |= SO_DONTROUTE; 6567 6568 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6569 (struct sockaddr *)&laddr, laddrlen, 6570 (struct sockaddr *)&faddr, faddrlen, opts); 6571 6572 mutex_enter(&udp->udp_recv_lock); 6573 /* 6574 * Attempts to send data up during fallback will result in it being 6575 * queued in udp_t. Now we push up any queued packets. 6576 */ 6577 while (udp->udp_fallback_queue_head != NULL) { 6578 mblk_t *mp; 6579 mp = udp->udp_fallback_queue_head; 6580 udp->udp_fallback_queue_head = mp->b_next; 6581 mutex_exit(&udp->udp_recv_lock); 6582 mp->b_next = NULL; 6583 putnext(RD(q), mp); 6584 mutex_enter(&udp->udp_recv_lock); 6585 } 6586 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6587 /* 6588 * No longer a streams less socket 6589 */ 6590 mutex_enter(&connp->conn_lock); 6591 connp->conn_flags &= ~IPCL_NONSTR; 6592 mutex_exit(&connp->conn_lock); 6593 6594 mutex_exit(&udp->udp_recv_lock); 6595 6596 ASSERT(connp->conn_ref >= 1); 6597 6598 return (0); 6599 } 6600 6601 /* ARGSUSED3 */ 6602 int 6603 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6604 socklen_t *salenp, cred_t *cr) 6605 { 6606 conn_t *connp = (conn_t *)proto_handle; 6607 udp_t *udp = connp->conn_udp; 6608 int error; 6609 6610 /* All Solaris components should pass a cred for this operation. */ 6611 ASSERT(cr != NULL); 6612 6613 mutex_enter(&connp->conn_lock); 6614 if (udp->udp_state != TS_DATA_XFER) 6615 error = ENOTCONN; 6616 else 6617 error = conn_getpeername(connp, sa, salenp); 6618 mutex_exit(&connp->conn_lock); 6619 return (error); 6620 } 6621 6622 /* ARGSUSED3 */ 6623 int 6624 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6625 socklen_t *salenp, cred_t *cr) 6626 { 6627 conn_t *connp = (conn_t *)proto_handle; 6628 int error; 6629 6630 /* All Solaris components should pass a cred for this operation. */ 6631 ASSERT(cr != NULL); 6632 6633 mutex_enter(&connp->conn_lock); 6634 error = conn_getsockname(connp, sa, salenp); 6635 mutex_exit(&connp->conn_lock); 6636 return (error); 6637 } 6638 6639 int 6640 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6641 void *optvalp, socklen_t *optlen, cred_t *cr) 6642 { 6643 conn_t *connp = (conn_t *)proto_handle; 6644 int error; 6645 t_uscalar_t max_optbuf_len; 6646 void *optvalp_buf; 6647 int len; 6648 6649 /* All Solaris components should pass a cred for this operation. */ 6650 ASSERT(cr != NULL); 6651 6652 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6653 udp_opt_obj.odb_opt_des_arr, 6654 udp_opt_obj.odb_opt_arr_cnt, 6655 B_FALSE, B_TRUE, cr); 6656 if (error != 0) { 6657 if (error < 0) 6658 error = proto_tlitosyserr(-error); 6659 return (error); 6660 } 6661 6662 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6663 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6664 if (len == -1) { 6665 kmem_free(optvalp_buf, max_optbuf_len); 6666 return (EINVAL); 6667 } 6668 6669 /* 6670 * update optlen and copy option value 6671 */ 6672 t_uscalar_t size = MIN(len, *optlen); 6673 6674 bcopy(optvalp_buf, optvalp, size); 6675 bcopy(&size, optlen, sizeof (size)); 6676 6677 kmem_free(optvalp_buf, max_optbuf_len); 6678 return (0); 6679 } 6680 6681 int 6682 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6683 const void *optvalp, socklen_t optlen, cred_t *cr) 6684 { 6685 conn_t *connp = (conn_t *)proto_handle; 6686 int error; 6687 6688 /* All Solaris components should pass a cred for this operation. */ 6689 ASSERT(cr != NULL); 6690 6691 error = proto_opt_check(level, option_name, optlen, NULL, 6692 udp_opt_obj.odb_opt_des_arr, 6693 udp_opt_obj.odb_opt_arr_cnt, 6694 B_TRUE, B_FALSE, cr); 6695 6696 if (error != 0) { 6697 if (error < 0) 6698 error = proto_tlitosyserr(-error); 6699 return (error); 6700 } 6701 6702 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6703 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6704 NULL, cr); 6705 6706 ASSERT(error >= 0); 6707 6708 return (error); 6709 } 6710 6711 void 6712 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6713 { 6714 conn_t *connp = (conn_t *)proto_handle; 6715 udp_t *udp = connp->conn_udp; 6716 6717 mutex_enter(&udp->udp_recv_lock); 6718 connp->conn_flow_cntrld = B_FALSE; 6719 mutex_exit(&udp->udp_recv_lock); 6720 } 6721 6722 /* ARGSUSED2 */ 6723 int 6724 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6725 { 6726 conn_t *connp = (conn_t *)proto_handle; 6727 6728 /* All Solaris components should pass a cred for this operation. */ 6729 ASSERT(cr != NULL); 6730 6731 /* shut down the send side */ 6732 if (how != SHUT_RD) 6733 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6734 SOCK_OPCTL_SHUT_SEND, 0); 6735 /* shut down the recv side */ 6736 if (how != SHUT_WR) 6737 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6738 SOCK_OPCTL_SHUT_RECV, 0); 6739 return (0); 6740 } 6741 6742 int 6743 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6744 int mode, int32_t *rvalp, cred_t *cr) 6745 { 6746 conn_t *connp = (conn_t *)proto_handle; 6747 int error; 6748 6749 /* All Solaris components should pass a cred for this operation. */ 6750 ASSERT(cr != NULL); 6751 6752 /* 6753 * If we don't have a helper stream then create one. 6754 * ip_create_helper_stream takes care of locking the conn_t, 6755 * so this check for NULL is just a performance optimization. 6756 */ 6757 if (connp->conn_helper_info == NULL) { 6758 udp_stack_t *us = connp->conn_udp->udp_us; 6759 6760 ASSERT(us->us_ldi_ident != NULL); 6761 6762 /* 6763 * Create a helper stream for non-STREAMS socket. 6764 */ 6765 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6766 if (error != 0) { 6767 ip0dbg(("tcp_ioctl: create of IP helper stream " 6768 "failed %d\n", error)); 6769 return (error); 6770 } 6771 } 6772 6773 switch (cmd) { 6774 case _SIOCSOCKFALLBACK: 6775 case TI_GETPEERNAME: 6776 case TI_GETMYNAME: 6777 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6778 cmd)); 6779 error = EINVAL; 6780 break; 6781 default: 6782 /* 6783 * Pass on to IP using helper stream 6784 */ 6785 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6786 cmd, arg, mode, cr, rvalp); 6787 break; 6788 } 6789 return (error); 6790 } 6791 6792 /* ARGSUSED */ 6793 int 6794 udp_accept(sock_lower_handle_t lproto_handle, 6795 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6796 cred_t *cr) 6797 { 6798 return (EOPNOTSUPP); 6799 } 6800 6801 /* ARGSUSED */ 6802 int 6803 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6804 { 6805 return (EOPNOTSUPP); 6806 } 6807 6808 sock_downcalls_t sock_udp_downcalls = { 6809 udp_activate, /* sd_activate */ 6810 udp_accept, /* sd_accept */ 6811 udp_bind, /* sd_bind */ 6812 udp_listen, /* sd_listen */ 6813 udp_connect, /* sd_connect */ 6814 udp_getpeername, /* sd_getpeername */ 6815 udp_getsockname, /* sd_getsockname */ 6816 udp_getsockopt, /* sd_getsockopt */ 6817 udp_setsockopt, /* sd_setsockopt */ 6818 udp_send, /* sd_send */ 6819 NULL, /* sd_send_uio */ 6820 NULL, /* sd_recv_uio */ 6821 NULL, /* sd_poll */ 6822 udp_shutdown, /* sd_shutdown */ 6823 udp_clr_flowctrl, /* sd_setflowctrl */ 6824 udp_ioctl, /* sd_ioctl */ 6825 udp_close /* sd_close */ 6826 }; 6827