1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 /* Copyright (c) 1990 Mentat Inc. */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/stropts.h> 29 #include <sys/strlog.h> 30 #include <sys/strsun.h> 31 #define _SUN_TPI_VERSION 2 32 #include <sys/tihdr.h> 33 #include <sys/timod.h> 34 #include <sys/ddi.h> 35 #include <sys/sunddi.h> 36 #include <sys/strsubr.h> 37 #include <sys/suntpi.h> 38 #include <sys/xti_inet.h> 39 #include <sys/kmem.h> 40 #include <sys/cred_impl.h> 41 #include <sys/policy.h> 42 #include <sys/priv.h> 43 #include <sys/ucred.h> 44 #include <sys/zone.h> 45 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/sockio.h> 49 #include <sys/vtrace.h> 50 #include <sys/sdt.h> 51 #include <sys/debug.h> 52 #include <sys/isa_defs.h> 53 #include <sys/random.h> 54 #include <netinet/in.h> 55 #include <netinet/ip6.h> 56 #include <netinet/icmp6.h> 57 #include <netinet/udp.h> 58 59 #include <inet/common.h> 60 #include <inet/ip.h> 61 #include <inet/ip_impl.h> 62 #include <inet/ipsec_impl.h> 63 #include <inet/ip6.h> 64 #include <inet/ip_ire.h> 65 #include <inet/ip_if.h> 66 #include <inet/ip_multi.h> 67 #include <inet/ip_ndp.h> 68 #include <inet/proto_set.h> 69 #include <inet/mib2.h> 70 #include <inet/optcom.h> 71 #include <inet/snmpcom.h> 72 #include <inet/kstatcom.h> 73 #include <inet/ipclassifier.h> 74 #include <sys/squeue_impl.h> 75 #include <inet/ipnet.h> 76 #include <sys/ethernet.h> 77 78 #include <sys/tsol/label.h> 79 #include <sys/tsol/tnet.h> 80 #include <rpc/pmap_prot.h> 81 82 #include <inet/udp_impl.h> 83 84 /* 85 * Synchronization notes: 86 * 87 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 88 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 89 * protects the contents of the udp_t. uf_lock protects the address and the 90 * fanout information. 91 * The lock order is conn_lock -> uf_lock. 92 * 93 * The fanout lock uf_lock: 94 * When a UDP endpoint is bound to a local port, it is inserted into 95 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 96 * The size of the array is controlled by the udp_bind_fanout_size variable. 97 * This variable can be changed in /etc/system if the default value is 98 * not large enough. Each bind hash bucket is protected by a per bucket 99 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 100 * structure and a few other fields in the udp_t. A UDP endpoint is removed 101 * from the bind hash list only when it is being unbound or being closed. 102 * The per bucket lock also protects a UDP endpoint's state changes. 103 * 104 * Plumbing notes: 105 * UDP is always a device driver. For compatibility with mibopen() code 106 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 107 * dummy module. 108 * 109 * The above implies that we don't support any intermediate module to 110 * reside in between /dev/ip and udp -- in fact, we never supported such 111 * scenario in the past as the inter-layer communication semantics have 112 * always been private. 113 */ 114 115 /* For /etc/system control */ 116 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 117 118 static void udp_addr_req(queue_t *q, mblk_t *mp); 119 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 120 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 121 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 122 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 123 const in6_addr_t *, in_port_t, uint32_t); 124 static void udp_capability_req(queue_t *q, mblk_t *mp); 125 static int udp_tpi_close(queue_t *q, int flags); 126 static void udp_close_free(conn_t *); 127 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 128 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 129 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 130 int sys_error); 131 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 132 t_scalar_t tlierr, int sys_error); 133 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 134 cred_t *cr); 135 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 136 char *value, caddr_t cp, cred_t *cr); 137 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 140 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 141 ip_recv_attr_t *ira); 142 static void udp_info_req(queue_t *q, mblk_t *mp); 143 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 144 static void udp_lrput(queue_t *, mblk_t *); 145 static void udp_lwput(queue_t *, mblk_t *); 146 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 147 cred_t *credp, boolean_t isv6); 148 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp); 150 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 153 int udp_opt_set(conn_t *connp, uint_t optset_context, 154 int level, int name, uint_t inlen, 155 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 156 void *thisdg_attrs, cred_t *cr); 157 int udp_opt_get(conn_t *connp, int level, int name, 158 uchar_t *ptr); 159 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 160 pid_t pid); 161 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid, ip_xmit_attr_t *ixa); 163 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 164 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 165 ip_xmit_attr_t *ixa); 166 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 167 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 168 int *); 169 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 170 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 171 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 172 static void udp_ud_err_connected(conn_t *, t_scalar_t); 173 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 174 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 175 boolean_t random); 176 static void udp_wput_other(queue_t *q, mblk_t *mp); 177 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 178 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 179 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 180 181 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 182 static void udp_stack_fini(netstackid_t stackid, void *arg); 183 184 static void *udp_kstat_init(netstackid_t stackid); 185 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 186 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 187 static void udp_kstat2_fini(netstackid_t, kstat_t *); 188 static int udp_kstat_update(kstat_t *kp, int rw); 189 190 191 /* Common routines for TPI and socket module */ 192 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 193 194 /* Common routine for TPI and socket module */ 195 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 196 static void udp_do_close(conn_t *); 197 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 198 boolean_t); 199 static int udp_do_unbind(conn_t *); 200 201 int udp_getsockname(sock_lower_handle_t, 202 struct sockaddr *, socklen_t *, cred_t *); 203 int udp_getpeername(sock_lower_handle_t, 204 struct sockaddr *, socklen_t *, cred_t *); 205 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 206 cred_t *, pid_t); 207 208 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 209 210 /* 211 * Checks if the given destination addr/port is allowed out. 212 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 213 * Called for each connect() and for sendto()/sendmsg() to a different 214 * destination. 215 * For connect(), called in udp_connect(). 216 * For sendto()/sendmsg(), called in udp_output_newdst(). 217 * 218 * This macro assumes that the cl_inet_connect2 hook is not NULL. 219 * Please check this before calling this macro. 220 * 221 * void 222 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 223 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 224 */ 225 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 226 (err) = 0; \ 227 /* \ 228 * Running in cluster mode - check and register active \ 229 * "connection" information \ 230 */ \ 231 if ((cp)->conn_ipversion == IPV4_VERSION) \ 232 (err) = (*cl_inet_connect2)( \ 233 (cp)->conn_netstack->netstack_stackid, \ 234 IPPROTO_UDP, is_outgoing, AF_INET, \ 235 (uint8_t *)&((cp)->conn_laddr_v4), \ 236 (cp)->conn_lport, \ 237 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 238 (in_port_t)(fport), NULL); \ 239 else \ 240 (err) = (*cl_inet_connect2)( \ 241 (cp)->conn_netstack->netstack_stackid, \ 242 IPPROTO_UDP, is_outgoing, AF_INET6, \ 243 (uint8_t *)&((cp)->conn_laddr_v6), \ 244 (cp)->conn_lport, \ 245 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 246 } 247 248 static struct module_info udp_mod_info = { 249 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 250 }; 251 252 /* 253 * Entry points for UDP as a device. 254 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 255 */ 256 static struct qinit udp_rinitv4 = { 257 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 258 }; 259 260 static struct qinit udp_rinitv6 = { 261 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 262 }; 263 264 static struct qinit udp_winit = { 265 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 266 }; 267 268 /* UDP entry point during fallback */ 269 struct qinit udp_fallback_sock_winit = { 270 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 271 }; 272 273 /* 274 * UDP needs to handle I_LINK and I_PLINK since ifconfig 275 * likes to use it as a place to hang the various streams. 276 */ 277 static struct qinit udp_lrinit = { 278 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 279 }; 280 281 static struct qinit udp_lwinit = { 282 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 283 }; 284 285 /* For AF_INET aka /dev/udp */ 286 struct streamtab udpinfov4 = { 287 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 288 }; 289 290 /* For AF_INET6 aka /dev/udp6 */ 291 struct streamtab udpinfov6 = { 292 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 293 }; 294 295 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 296 297 /* Default structure copied into T_INFO_ACK messages */ 298 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 299 T_INFO_ACK, 300 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 301 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 302 T_INVALID, /* CDATA_size. udp does not support connect data. */ 303 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 304 sizeof (sin_t), /* ADDR_size. */ 305 0, /* OPT_size - not initialized here */ 306 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 307 T_CLTS, /* SERV_type. udp supports connection-less. */ 308 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 309 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 310 }; 311 312 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 313 314 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 315 T_INFO_ACK, 316 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 317 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 318 T_INVALID, /* CDATA_size. udp does not support connect data. */ 319 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 320 sizeof (sin6_t), /* ADDR_size. */ 321 0, /* OPT_size - not initialized here */ 322 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 323 T_CLTS, /* SERV_type. udp supports connection-less. */ 324 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 325 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 326 }; 327 328 /* 329 * UDP tunables related declarations. Definitions are in udp_tunables.c 330 */ 331 extern mod_prop_info_t udp_propinfo_tbl[]; 332 extern int udp_propinfo_count; 333 334 /* Setable in /etc/system */ 335 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 336 uint32_t udp_random_anon_port = 1; 337 338 /* 339 * Hook functions to enable cluster networking. 340 * On non-clustered systems these vectors must always be NULL 341 */ 342 343 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 344 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 345 void *args) = NULL; 346 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 347 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 348 void *args) = NULL; 349 350 typedef union T_primitives *t_primp_t; 351 352 /* 353 * Return the next anonymous port in the privileged port range for 354 * bind checking. 355 * 356 * Trusted Extension (TX) notes: TX allows administrator to mark or 357 * reserve ports as Multilevel ports (MLP). MLP has special function 358 * on TX systems. Once a port is made MLP, it's not available as 359 * ordinary port. This creates "holes" in the port name space. It 360 * may be necessary to skip the "holes" find a suitable anon port. 361 */ 362 static in_port_t 363 udp_get_next_priv_port(udp_t *udp) 364 { 365 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 366 in_port_t nextport; 367 boolean_t restart = B_FALSE; 368 udp_stack_t *us = udp->udp_us; 369 370 retry: 371 if (next_priv_port < us->us_min_anonpriv_port || 372 next_priv_port >= IPPORT_RESERVED) { 373 next_priv_port = IPPORT_RESERVED - 1; 374 if (restart) 375 return (0); 376 restart = B_TRUE; 377 } 378 379 if (is_system_labeled() && 380 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 381 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 382 next_priv_port = nextport; 383 goto retry; 384 } 385 386 return (next_priv_port--); 387 } 388 389 /* 390 * Hash list removal routine for udp_t structures. 391 */ 392 static void 393 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 394 { 395 udp_t *udpnext; 396 kmutex_t *lockp; 397 udp_stack_t *us = udp->udp_us; 398 conn_t *connp = udp->udp_connp; 399 400 if (udp->udp_ptpbhn == NULL) 401 return; 402 403 /* 404 * Extract the lock pointer in case there are concurrent 405 * hash_remove's for this instance. 406 */ 407 ASSERT(connp->conn_lport != 0); 408 if (!caller_holds_lock) { 409 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 410 us->us_bind_fanout_size)].uf_lock; 411 ASSERT(lockp != NULL); 412 mutex_enter(lockp); 413 } 414 if (udp->udp_ptpbhn != NULL) { 415 udpnext = udp->udp_bind_hash; 416 if (udpnext != NULL) { 417 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 418 udp->udp_bind_hash = NULL; 419 } 420 *udp->udp_ptpbhn = udpnext; 421 udp->udp_ptpbhn = NULL; 422 } 423 if (!caller_holds_lock) { 424 mutex_exit(lockp); 425 } 426 } 427 428 static void 429 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 430 { 431 conn_t *connp = udp->udp_connp; 432 udp_t **udpp; 433 udp_t *udpnext; 434 conn_t *connext; 435 436 ASSERT(MUTEX_HELD(&uf->uf_lock)); 437 ASSERT(udp->udp_ptpbhn == NULL); 438 udpp = &uf->uf_udp; 439 udpnext = udpp[0]; 440 if (udpnext != NULL) { 441 /* 442 * If the new udp bound to the INADDR_ANY address 443 * and the first one in the list is not bound to 444 * INADDR_ANY we skip all entries until we find the 445 * first one bound to INADDR_ANY. 446 * This makes sure that applications binding to a 447 * specific address get preference over those binding to 448 * INADDR_ANY. 449 */ 450 connext = udpnext->udp_connp; 451 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 452 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 453 while ((udpnext = udpp[0]) != NULL && 454 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 455 udpp = &(udpnext->udp_bind_hash); 456 } 457 if (udpnext != NULL) 458 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 459 } else { 460 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 461 } 462 } 463 udp->udp_bind_hash = udpnext; 464 udp->udp_ptpbhn = udpp; 465 udpp[0] = udp; 466 } 467 468 /* 469 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 470 * passed to udp_wput. 471 * It associates a port number and local address with the stream. 472 * It calls IP to verify the local IP address, and calls IP to insert 473 * the conn_t in the fanout table. 474 * If everything is ok it then sends the T_BIND_ACK back up. 475 * 476 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 477 * without setting SO_REUSEADDR. This is needed so that they 478 * can be viewed as two independent transport protocols. 479 * However, anonymouns ports are allocated from the same range to avoid 480 * duplicating the us->us_next_port_to_try. 481 */ 482 static void 483 udp_tpi_bind(queue_t *q, mblk_t *mp) 484 { 485 sin_t *sin; 486 sin6_t *sin6; 487 mblk_t *mp1; 488 struct T_bind_req *tbr; 489 conn_t *connp; 490 udp_t *udp; 491 int error; 492 struct sockaddr *sa; 493 cred_t *cr; 494 495 /* 496 * All Solaris components should pass a db_credp 497 * for this TPI message, hence we ASSERT. 498 * But in case there is some other M_PROTO that looks 499 * like a TPI message sent by some other kernel 500 * component, we check and return an error. 501 */ 502 cr = msg_getcred(mp, NULL); 503 ASSERT(cr != NULL); 504 if (cr == NULL) { 505 udp_err_ack(q, mp, TSYSERR, EINVAL); 506 return; 507 } 508 509 connp = Q_TO_CONN(q); 510 udp = connp->conn_udp; 511 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 512 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 513 "udp_bind: bad req, len %u", 514 (uint_t)(mp->b_wptr - mp->b_rptr)); 515 udp_err_ack(q, mp, TPROTO, 0); 516 return; 517 } 518 if (udp->udp_state != TS_UNBND) { 519 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 520 "udp_bind: bad state, %u", udp->udp_state); 521 udp_err_ack(q, mp, TOUTSTATE, 0); 522 return; 523 } 524 /* 525 * Reallocate the message to make sure we have enough room for an 526 * address. 527 */ 528 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 529 if (mp1 == NULL) { 530 udp_err_ack(q, mp, TSYSERR, ENOMEM); 531 return; 532 } 533 534 mp = mp1; 535 536 /* Reset the message type in preparation for shipping it back. */ 537 DB_TYPE(mp) = M_PCPROTO; 538 539 tbr = (struct T_bind_req *)mp->b_rptr; 540 switch (tbr->ADDR_length) { 541 case 0: /* Request for a generic port */ 542 tbr->ADDR_offset = sizeof (struct T_bind_req); 543 if (connp->conn_family == AF_INET) { 544 tbr->ADDR_length = sizeof (sin_t); 545 sin = (sin_t *)&tbr[1]; 546 *sin = sin_null; 547 sin->sin_family = AF_INET; 548 mp->b_wptr = (uchar_t *)&sin[1]; 549 sa = (struct sockaddr *)sin; 550 } else { 551 ASSERT(connp->conn_family == AF_INET6); 552 tbr->ADDR_length = sizeof (sin6_t); 553 sin6 = (sin6_t *)&tbr[1]; 554 *sin6 = sin6_null; 555 sin6->sin6_family = AF_INET6; 556 mp->b_wptr = (uchar_t *)&sin6[1]; 557 sa = (struct sockaddr *)sin6; 558 } 559 break; 560 561 case sizeof (sin_t): /* Complete IPv4 address */ 562 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 563 sizeof (sin_t)); 564 if (sa == NULL || !OK_32PTR((char *)sa)) { 565 udp_err_ack(q, mp, TSYSERR, EINVAL); 566 return; 567 } 568 if (connp->conn_family != AF_INET || 569 sa->sa_family != AF_INET) { 570 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 571 return; 572 } 573 break; 574 575 case sizeof (sin6_t): /* complete IPv6 address */ 576 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 577 sizeof (sin6_t)); 578 if (sa == NULL || !OK_32PTR((char *)sa)) { 579 udp_err_ack(q, mp, TSYSERR, EINVAL); 580 return; 581 } 582 if (connp->conn_family != AF_INET6 || 583 sa->sa_family != AF_INET6) { 584 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 585 return; 586 } 587 break; 588 589 default: /* Invalid request */ 590 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 591 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 592 udp_err_ack(q, mp, TBADADDR, 0); 593 return; 594 } 595 596 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 597 tbr->PRIM_type != O_T_BIND_REQ); 598 599 if (error != 0) { 600 if (error > 0) { 601 udp_err_ack(q, mp, TSYSERR, error); 602 } else { 603 udp_err_ack(q, mp, -error, 0); 604 } 605 } else { 606 tbr->PRIM_type = T_BIND_ACK; 607 qreply(q, mp); 608 } 609 } 610 611 /* 612 * This routine handles each T_CONN_REQ message passed to udp. It 613 * associates a default destination address with the stream. 614 * 615 * After various error checks are completed, udp_connect() lays 616 * the target address and port into the composite header template. 617 * Then we ask IP for information, including a source address if we didn't 618 * already have one. Finally we send up the T_OK_ACK reply message. 619 */ 620 static void 621 udp_tpi_connect(queue_t *q, mblk_t *mp) 622 { 623 conn_t *connp = Q_TO_CONN(q); 624 int error; 625 socklen_t len; 626 struct sockaddr *sa; 627 struct T_conn_req *tcr; 628 cred_t *cr; 629 pid_t pid; 630 /* 631 * All Solaris components should pass a db_credp 632 * for this TPI message, hence we ASSERT. 633 * But in case there is some other M_PROTO that looks 634 * like a TPI message sent by some other kernel 635 * component, we check and return an error. 636 */ 637 cr = msg_getcred(mp, &pid); 638 ASSERT(cr != NULL); 639 if (cr == NULL) { 640 udp_err_ack(q, mp, TSYSERR, EINVAL); 641 return; 642 } 643 644 tcr = (struct T_conn_req *)mp->b_rptr; 645 646 /* A bit of sanity checking */ 647 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 648 udp_err_ack(q, mp, TPROTO, 0); 649 return; 650 } 651 652 if (tcr->OPT_length != 0) { 653 udp_err_ack(q, mp, TBADOPT, 0); 654 return; 655 } 656 657 /* 658 * Determine packet type based on type of address passed in 659 * the request should contain an IPv4 or IPv6 address. 660 * Make sure that address family matches the type of 661 * family of the address passed down. 662 */ 663 len = tcr->DEST_length; 664 switch (tcr->DEST_length) { 665 default: 666 udp_err_ack(q, mp, TBADADDR, 0); 667 return; 668 669 case sizeof (sin_t): 670 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 671 sizeof (sin_t)); 672 break; 673 674 case sizeof (sin6_t): 675 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 676 sizeof (sin6_t)); 677 break; 678 } 679 680 error = proto_verify_ip_addr(connp->conn_family, sa, len); 681 if (error != 0) { 682 udp_err_ack(q, mp, TSYSERR, error); 683 return; 684 } 685 686 error = udp_do_connect(connp, sa, len, cr, pid); 687 if (error != 0) { 688 if (error < 0) 689 udp_err_ack(q, mp, -error, 0); 690 else 691 udp_err_ack(q, mp, TSYSERR, error); 692 } else { 693 mblk_t *mp1; 694 /* 695 * We have to send a connection confirmation to 696 * keep TLI happy. 697 */ 698 if (connp->conn_family == AF_INET) { 699 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 700 sizeof (sin_t), NULL, 0); 701 } else { 702 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 703 sizeof (sin6_t), NULL, 0); 704 } 705 if (mp1 == NULL) { 706 udp_err_ack(q, mp, TSYSERR, ENOMEM); 707 return; 708 } 709 710 /* 711 * Send ok_ack for T_CONN_REQ 712 */ 713 mp = mi_tpi_ok_ack_alloc(mp); 714 if (mp == NULL) { 715 /* Unable to reuse the T_CONN_REQ for the ack. */ 716 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 717 return; 718 } 719 720 putnext(connp->conn_rq, mp); 721 putnext(connp->conn_rq, mp1); 722 } 723 } 724 725 static int 726 udp_tpi_close(queue_t *q, int flags) 727 { 728 conn_t *connp; 729 730 if (flags & SO_FALLBACK) { 731 /* 732 * stream is being closed while in fallback 733 * simply free the resources that were allocated 734 */ 735 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 736 qprocsoff(q); 737 goto done; 738 } 739 740 connp = Q_TO_CONN(q); 741 udp_do_close(connp); 742 done: 743 q->q_ptr = WR(q)->q_ptr = NULL; 744 return (0); 745 } 746 747 static void 748 udp_close_free(conn_t *connp) 749 { 750 udp_t *udp = connp->conn_udp; 751 752 /* If there are any options associated with the stream, free them. */ 753 if (udp->udp_recv_ipp.ipp_fields != 0) 754 ip_pkt_free(&udp->udp_recv_ipp); 755 756 /* 757 * Clear any fields which the kmem_cache constructor clears. 758 * Only udp_connp needs to be preserved. 759 * TBD: We should make this more efficient to avoid clearing 760 * everything. 761 */ 762 ASSERT(udp->udp_connp == connp); 763 bzero(udp, sizeof (udp_t)); 764 udp->udp_connp = connp; 765 } 766 767 static int 768 udp_do_disconnect(conn_t *connp) 769 { 770 udp_t *udp; 771 udp_fanout_t *udpf; 772 udp_stack_t *us; 773 int error; 774 775 udp = connp->conn_udp; 776 us = udp->udp_us; 777 mutex_enter(&connp->conn_lock); 778 if (udp->udp_state != TS_DATA_XFER) { 779 mutex_exit(&connp->conn_lock); 780 return (-TOUTSTATE); 781 } 782 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 783 us->us_bind_fanout_size)]; 784 mutex_enter(&udpf->uf_lock); 785 if (connp->conn_mcbc_bind) 786 connp->conn_saddr_v6 = ipv6_all_zeros; 787 else 788 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 789 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 790 connp->conn_faddr_v6 = ipv6_all_zeros; 791 connp->conn_fport = 0; 792 udp->udp_state = TS_IDLE; 793 mutex_exit(&udpf->uf_lock); 794 795 /* Remove any remnants of mapped address binding */ 796 if (connp->conn_family == AF_INET6) 797 connp->conn_ipversion = IPV6_VERSION; 798 799 connp->conn_v6lastdst = ipv6_all_zeros; 800 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 801 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 802 mutex_exit(&connp->conn_lock); 803 if (error != 0) 804 return (error); 805 806 /* 807 * Tell IP to remove the full binding and revert 808 * to the local address binding. 809 */ 810 return (ip_laddr_fanout_insert(connp)); 811 } 812 813 static void 814 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 815 { 816 conn_t *connp = Q_TO_CONN(q); 817 int error; 818 819 /* 820 * Allocate the largest primitive we need to send back 821 * T_error_ack is > than T_ok_ack 822 */ 823 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 824 if (mp == NULL) { 825 /* Unable to reuse the T_DISCON_REQ for the ack. */ 826 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 827 return; 828 } 829 830 error = udp_do_disconnect(connp); 831 832 if (error != 0) { 833 if (error < 0) { 834 udp_err_ack(q, mp, -error, 0); 835 } else { 836 udp_err_ack(q, mp, TSYSERR, error); 837 } 838 } else { 839 mp = mi_tpi_ok_ack_alloc(mp); 840 ASSERT(mp != NULL); 841 qreply(q, mp); 842 } 843 } 844 845 int 846 udp_disconnect(conn_t *connp) 847 { 848 int error; 849 850 connp->conn_dgram_errind = B_FALSE; 851 error = udp_do_disconnect(connp); 852 if (error < 0) 853 error = proto_tlitosyserr(-error); 854 855 return (error); 856 } 857 858 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 859 static void 860 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 861 { 862 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 863 qreply(q, mp); 864 } 865 866 /* Shorthand to generate and send TPI error acks to our client */ 867 static void 868 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 869 t_scalar_t t_error, int sys_error) 870 { 871 struct T_error_ack *teackp; 872 873 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 874 M_PCPROTO, T_ERROR_ACK)) != NULL) { 875 teackp = (struct T_error_ack *)mp->b_rptr; 876 teackp->ERROR_prim = primitive; 877 teackp->TLI_error = t_error; 878 teackp->UNIX_error = sys_error; 879 qreply(q, mp); 880 } 881 } 882 883 /* At minimum we need 4 bytes of UDP header */ 884 #define ICMP_MIN_UDP_HDR 4 885 886 /* 887 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 888 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 889 * Assumes that IP has pulled up everything up to and including the ICMP header. 890 */ 891 /* ARGSUSED2 */ 892 static void 893 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 894 { 895 conn_t *connp = (conn_t *)arg1; 896 icmph_t *icmph; 897 ipha_t *ipha; 898 int iph_hdr_length; 899 udpha_t *udpha; 900 sin_t sin; 901 sin6_t sin6; 902 mblk_t *mp1; 903 int error = 0; 904 udp_t *udp = connp->conn_udp; 905 906 ipha = (ipha_t *)mp->b_rptr; 907 908 ASSERT(OK_32PTR(mp->b_rptr)); 909 910 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 911 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 912 udp_icmp_error_ipv6(connp, mp, ira); 913 return; 914 } 915 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 916 917 /* Skip past the outer IP and ICMP headers */ 918 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 919 iph_hdr_length = ira->ira_ip_hdr_length; 920 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 921 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 922 923 /* Skip past the inner IP and find the ULP header */ 924 iph_hdr_length = IPH_HDR_LENGTH(ipha); 925 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 926 927 switch (icmph->icmph_type) { 928 case ICMP_DEST_UNREACHABLE: 929 switch (icmph->icmph_code) { 930 case ICMP_FRAGMENTATION_NEEDED: { 931 ipha_t *ipha; 932 ip_xmit_attr_t *ixa; 933 /* 934 * IP has already adjusted the path MTU. 935 * But we need to adjust DF for IPv4. 936 */ 937 if (connp->conn_ipversion != IPV4_VERSION) 938 break; 939 940 ixa = conn_get_ixa(connp, B_FALSE); 941 if (ixa == NULL || ixa->ixa_ire == NULL) { 942 /* 943 * Some other thread holds conn_ixa. We will 944 * redo this on the next ICMP too big. 945 */ 946 if (ixa != NULL) 947 ixa_refrele(ixa); 948 break; 949 } 950 (void) ip_get_pmtu(ixa); 951 952 mutex_enter(&connp->conn_lock); 953 ipha = (ipha_t *)connp->conn_ht_iphc; 954 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 955 ipha->ipha_fragment_offset_and_flags |= 956 IPH_DF_HTONS; 957 } else { 958 ipha->ipha_fragment_offset_and_flags &= 959 ~IPH_DF_HTONS; 960 } 961 mutex_exit(&connp->conn_lock); 962 ixa_refrele(ixa); 963 break; 964 } 965 case ICMP_PORT_UNREACHABLE: 966 case ICMP_PROTOCOL_UNREACHABLE: 967 error = ECONNREFUSED; 968 break; 969 default: 970 /* Transient errors */ 971 break; 972 } 973 break; 974 default: 975 /* Transient errors */ 976 break; 977 } 978 if (error == 0) { 979 freemsg(mp); 980 return; 981 } 982 983 /* 984 * Deliver T_UDERROR_IND when the application has asked for it. 985 * The socket layer enables this automatically when connected. 986 */ 987 if (!connp->conn_dgram_errind) { 988 freemsg(mp); 989 return; 990 } 991 992 switch (connp->conn_family) { 993 case AF_INET: 994 sin = sin_null; 995 sin.sin_family = AF_INET; 996 sin.sin_addr.s_addr = ipha->ipha_dst; 997 sin.sin_port = udpha->uha_dst_port; 998 if (IPCL_IS_NONSTR(connp)) { 999 mutex_enter(&connp->conn_lock); 1000 if (udp->udp_state == TS_DATA_XFER) { 1001 if (sin.sin_port == connp->conn_fport && 1002 sin.sin_addr.s_addr == 1003 connp->conn_faddr_v4) { 1004 mutex_exit(&connp->conn_lock); 1005 (*connp->conn_upcalls->su_set_error) 1006 (connp->conn_upper_handle, error); 1007 goto done; 1008 } 1009 } else { 1010 udp->udp_delayed_error = error; 1011 *((sin_t *)&udp->udp_delayed_addr) = sin; 1012 } 1013 mutex_exit(&connp->conn_lock); 1014 } else { 1015 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1016 NULL, 0, error); 1017 if (mp1 != NULL) 1018 putnext(connp->conn_rq, mp1); 1019 } 1020 break; 1021 case AF_INET6: 1022 sin6 = sin6_null; 1023 sin6.sin6_family = AF_INET6; 1024 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1025 sin6.sin6_port = udpha->uha_dst_port; 1026 if (IPCL_IS_NONSTR(connp)) { 1027 mutex_enter(&connp->conn_lock); 1028 if (udp->udp_state == TS_DATA_XFER) { 1029 if (sin6.sin6_port == connp->conn_fport && 1030 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1031 &connp->conn_faddr_v6)) { 1032 mutex_exit(&connp->conn_lock); 1033 (*connp->conn_upcalls->su_set_error) 1034 (connp->conn_upper_handle, error); 1035 goto done; 1036 } 1037 } else { 1038 udp->udp_delayed_error = error; 1039 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1040 } 1041 mutex_exit(&connp->conn_lock); 1042 } else { 1043 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1044 NULL, 0, error); 1045 if (mp1 != NULL) 1046 putnext(connp->conn_rq, mp1); 1047 } 1048 break; 1049 } 1050 done: 1051 freemsg(mp); 1052 } 1053 1054 /* 1055 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1056 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1057 * Assumes that IP has pulled up all the extension headers as well as the 1058 * ICMPv6 header. 1059 */ 1060 static void 1061 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1062 { 1063 icmp6_t *icmp6; 1064 ip6_t *ip6h, *outer_ip6h; 1065 uint16_t iph_hdr_length; 1066 uint8_t *nexthdrp; 1067 udpha_t *udpha; 1068 sin6_t sin6; 1069 mblk_t *mp1; 1070 int error = 0; 1071 udp_t *udp = connp->conn_udp; 1072 udp_stack_t *us = udp->udp_us; 1073 1074 outer_ip6h = (ip6_t *)mp->b_rptr; 1075 #ifdef DEBUG 1076 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1077 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1078 else 1079 iph_hdr_length = IPV6_HDR_LEN; 1080 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1081 #endif 1082 /* Skip past the outer IP and ICMP headers */ 1083 iph_hdr_length = ira->ira_ip_hdr_length; 1084 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1085 1086 /* Skip past the inner IP and find the ULP header */ 1087 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1088 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1089 freemsg(mp); 1090 return; 1091 } 1092 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1093 1094 switch (icmp6->icmp6_type) { 1095 case ICMP6_DST_UNREACH: 1096 switch (icmp6->icmp6_code) { 1097 case ICMP6_DST_UNREACH_NOPORT: 1098 error = ECONNREFUSED; 1099 break; 1100 case ICMP6_DST_UNREACH_ADMIN: 1101 case ICMP6_DST_UNREACH_NOROUTE: 1102 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1103 case ICMP6_DST_UNREACH_ADDR: 1104 /* Transient errors */ 1105 break; 1106 default: 1107 break; 1108 } 1109 break; 1110 case ICMP6_PACKET_TOO_BIG: { 1111 struct T_unitdata_ind *tudi; 1112 struct T_opthdr *toh; 1113 size_t udi_size; 1114 mblk_t *newmp; 1115 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1116 sizeof (struct ip6_mtuinfo); 1117 sin6_t *sin6; 1118 struct ip6_mtuinfo *mtuinfo; 1119 1120 /* 1121 * If the application has requested to receive path mtu 1122 * information, send up an empty message containing an 1123 * IPV6_PATHMTU ancillary data item. 1124 */ 1125 if (!connp->conn_ipv6_recvpathmtu) 1126 break; 1127 1128 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1129 opt_length; 1130 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1131 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1132 break; 1133 } 1134 1135 /* 1136 * newmp->b_cont is left to NULL on purpose. This is an 1137 * empty message containing only ancillary data. 1138 */ 1139 newmp->b_datap->db_type = M_PROTO; 1140 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1141 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1142 tudi->PRIM_type = T_UNITDATA_IND; 1143 tudi->SRC_length = sizeof (sin6_t); 1144 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1145 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1146 tudi->OPT_length = opt_length; 1147 1148 sin6 = (sin6_t *)&tudi[1]; 1149 bzero(sin6, sizeof (sin6_t)); 1150 sin6->sin6_family = AF_INET6; 1151 sin6->sin6_addr = connp->conn_faddr_v6; 1152 1153 toh = (struct T_opthdr *)&sin6[1]; 1154 toh->level = IPPROTO_IPV6; 1155 toh->name = IPV6_PATHMTU; 1156 toh->len = opt_length; 1157 toh->status = 0; 1158 1159 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1160 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1161 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1162 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1163 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1164 /* 1165 * We've consumed everything we need from the original 1166 * message. Free it, then send our empty message. 1167 */ 1168 freemsg(mp); 1169 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1170 return; 1171 } 1172 case ICMP6_TIME_EXCEEDED: 1173 /* Transient errors */ 1174 break; 1175 case ICMP6_PARAM_PROB: 1176 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1177 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1178 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1179 (uchar_t *)nexthdrp) { 1180 error = ECONNREFUSED; 1181 break; 1182 } 1183 break; 1184 } 1185 if (error == 0) { 1186 freemsg(mp); 1187 return; 1188 } 1189 1190 /* 1191 * Deliver T_UDERROR_IND when the application has asked for it. 1192 * The socket layer enables this automatically when connected. 1193 */ 1194 if (!connp->conn_dgram_errind) { 1195 freemsg(mp); 1196 return; 1197 } 1198 1199 sin6 = sin6_null; 1200 sin6.sin6_family = AF_INET6; 1201 sin6.sin6_addr = ip6h->ip6_dst; 1202 sin6.sin6_port = udpha->uha_dst_port; 1203 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1204 1205 if (IPCL_IS_NONSTR(connp)) { 1206 mutex_enter(&connp->conn_lock); 1207 if (udp->udp_state == TS_DATA_XFER) { 1208 if (sin6.sin6_port == connp->conn_fport && 1209 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1210 &connp->conn_faddr_v6)) { 1211 mutex_exit(&connp->conn_lock); 1212 (*connp->conn_upcalls->su_set_error) 1213 (connp->conn_upper_handle, error); 1214 goto done; 1215 } 1216 } else { 1217 udp->udp_delayed_error = error; 1218 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1219 } 1220 mutex_exit(&connp->conn_lock); 1221 } else { 1222 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1223 NULL, 0, error); 1224 if (mp1 != NULL) 1225 putnext(connp->conn_rq, mp1); 1226 } 1227 done: 1228 freemsg(mp); 1229 } 1230 1231 /* 1232 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1233 * The local address is filled in if endpoint is bound. The remote address 1234 * is filled in if remote address has been precified ("connected endpoint") 1235 * (The concept of connected CLTS sockets is alien to published TPI 1236 * but we support it anyway). 1237 */ 1238 static void 1239 udp_addr_req(queue_t *q, mblk_t *mp) 1240 { 1241 struct sockaddr *sa; 1242 mblk_t *ackmp; 1243 struct T_addr_ack *taa; 1244 udp_t *udp = Q_TO_UDP(q); 1245 conn_t *connp = udp->udp_connp; 1246 uint_t addrlen; 1247 1248 /* Make it large enough for worst case */ 1249 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1250 2 * sizeof (sin6_t), 1); 1251 if (ackmp == NULL) { 1252 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1253 return; 1254 } 1255 taa = (struct T_addr_ack *)ackmp->b_rptr; 1256 1257 bzero(taa, sizeof (struct T_addr_ack)); 1258 ackmp->b_wptr = (uchar_t *)&taa[1]; 1259 1260 taa->PRIM_type = T_ADDR_ACK; 1261 ackmp->b_datap->db_type = M_PCPROTO; 1262 1263 if (connp->conn_family == AF_INET) 1264 addrlen = sizeof (sin_t); 1265 else 1266 addrlen = sizeof (sin6_t); 1267 1268 mutex_enter(&connp->conn_lock); 1269 /* 1270 * Note: Following code assumes 32 bit alignment of basic 1271 * data structures like sin_t and struct T_addr_ack. 1272 */ 1273 if (udp->udp_state != TS_UNBND) { 1274 /* 1275 * Fill in local address first 1276 */ 1277 taa->LOCADDR_offset = sizeof (*taa); 1278 taa->LOCADDR_length = addrlen; 1279 sa = (struct sockaddr *)&taa[1]; 1280 (void) conn_getsockname(connp, sa, &addrlen); 1281 ackmp->b_wptr += addrlen; 1282 } 1283 if (udp->udp_state == TS_DATA_XFER) { 1284 /* 1285 * connected, fill remote address too 1286 */ 1287 taa->REMADDR_length = addrlen; 1288 /* assumed 32-bit alignment */ 1289 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1290 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1291 (void) conn_getpeername(connp, sa, &addrlen); 1292 ackmp->b_wptr += addrlen; 1293 } 1294 mutex_exit(&connp->conn_lock); 1295 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1296 qreply(q, ackmp); 1297 } 1298 1299 static void 1300 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1301 { 1302 conn_t *connp = udp->udp_connp; 1303 1304 if (connp->conn_family == AF_INET) { 1305 *tap = udp_g_t_info_ack_ipv4; 1306 } else { 1307 *tap = udp_g_t_info_ack_ipv6; 1308 } 1309 tap->CURRENT_state = udp->udp_state; 1310 tap->OPT_size = udp_max_optsize; 1311 } 1312 1313 static void 1314 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1315 t_uscalar_t cap_bits1) 1316 { 1317 tcap->CAP_bits1 = 0; 1318 1319 if (cap_bits1 & TC1_INFO) { 1320 udp_copy_info(&tcap->INFO_ack, udp); 1321 tcap->CAP_bits1 |= TC1_INFO; 1322 } 1323 } 1324 1325 /* 1326 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1327 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1328 * udp_g_t_info_ack. The current state of the stream is copied from 1329 * udp_state. 1330 */ 1331 static void 1332 udp_capability_req(queue_t *q, mblk_t *mp) 1333 { 1334 t_uscalar_t cap_bits1; 1335 struct T_capability_ack *tcap; 1336 udp_t *udp = Q_TO_UDP(q); 1337 1338 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1339 1340 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1341 mp->b_datap->db_type, T_CAPABILITY_ACK); 1342 if (!mp) 1343 return; 1344 1345 tcap = (struct T_capability_ack *)mp->b_rptr; 1346 udp_do_capability_ack(udp, tcap, cap_bits1); 1347 1348 qreply(q, mp); 1349 } 1350 1351 /* 1352 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1353 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1354 * The current state of the stream is copied from udp_state. 1355 */ 1356 static void 1357 udp_info_req(queue_t *q, mblk_t *mp) 1358 { 1359 udp_t *udp = Q_TO_UDP(q); 1360 1361 /* Create a T_INFO_ACK message. */ 1362 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1363 T_INFO_ACK); 1364 if (!mp) 1365 return; 1366 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1367 qreply(q, mp); 1368 } 1369 1370 /* For /dev/udp aka AF_INET open */ 1371 static int 1372 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1373 { 1374 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1375 } 1376 1377 /* For /dev/udp6 aka AF_INET6 open */ 1378 static int 1379 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1380 { 1381 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1382 } 1383 1384 /* 1385 * This is the open routine for udp. It allocates a udp_t structure for 1386 * the stream and, on the first open of the module, creates an ND table. 1387 */ 1388 static int 1389 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1390 boolean_t isv6) 1391 { 1392 udp_t *udp; 1393 conn_t *connp; 1394 dev_t conn_dev; 1395 vmem_t *minor_arena; 1396 int err; 1397 1398 /* If the stream is already open, return immediately. */ 1399 if (q->q_ptr != NULL) 1400 return (0); 1401 1402 if (sflag == MODOPEN) 1403 return (EINVAL); 1404 1405 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1406 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1407 minor_arena = ip_minor_arena_la; 1408 } else { 1409 /* 1410 * Either minor numbers in the large arena were exhausted 1411 * or a non socket application is doing the open. 1412 * Try to allocate from the small arena. 1413 */ 1414 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1415 return (EBUSY); 1416 1417 minor_arena = ip_minor_arena_sa; 1418 } 1419 1420 if (flag & SO_FALLBACK) { 1421 /* 1422 * Non streams socket needs a stream to fallback to 1423 */ 1424 RD(q)->q_ptr = (void *)conn_dev; 1425 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1426 WR(q)->q_ptr = (void *)minor_arena; 1427 qprocson(q); 1428 return (0); 1429 } 1430 1431 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1432 if (connp == NULL) { 1433 inet_minor_free(minor_arena, conn_dev); 1434 return (err); 1435 } 1436 udp = connp->conn_udp; 1437 1438 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1439 connp->conn_dev = conn_dev; 1440 connp->conn_minor_arena = minor_arena; 1441 1442 /* 1443 * Initialize the udp_t structure for this stream. 1444 */ 1445 q->q_ptr = connp; 1446 WR(q)->q_ptr = connp; 1447 connp->conn_rq = q; 1448 connp->conn_wq = WR(q); 1449 1450 /* 1451 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1452 * need to lock anything. 1453 */ 1454 ASSERT(connp->conn_proto == IPPROTO_UDP); 1455 ASSERT(connp->conn_udp == udp); 1456 ASSERT(udp->udp_connp == connp); 1457 1458 if (flag & SO_SOCKSTR) { 1459 udp->udp_issocket = B_TRUE; 1460 } 1461 1462 WR(q)->q_hiwat = connp->conn_sndbuf; 1463 WR(q)->q_lowat = connp->conn_sndlowat; 1464 1465 qprocson(q); 1466 1467 /* Set the Stream head write offset and high watermark. */ 1468 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1469 (void) proto_set_rx_hiwat(q, connp, 1470 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1471 1472 mutex_enter(&connp->conn_lock); 1473 connp->conn_state_flags &= ~CONN_INCIPIENT; 1474 mutex_exit(&connp->conn_lock); 1475 return (0); 1476 } 1477 1478 /* 1479 * Which UDP options OK to set through T_UNITDATA_REQ... 1480 */ 1481 /* ARGSUSED */ 1482 static boolean_t 1483 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1484 { 1485 return (B_TRUE); 1486 } 1487 1488 /* 1489 * This routine gets default values of certain options whose default 1490 * values are maintained by protcol specific code 1491 */ 1492 int 1493 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1494 { 1495 udp_t *udp = Q_TO_UDP(q); 1496 udp_stack_t *us = udp->udp_us; 1497 int *i1 = (int *)ptr; 1498 1499 switch (level) { 1500 case IPPROTO_IP: 1501 switch (name) { 1502 case IP_MULTICAST_TTL: 1503 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1504 return (sizeof (uchar_t)); 1505 case IP_MULTICAST_LOOP: 1506 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1507 return (sizeof (uchar_t)); 1508 } 1509 break; 1510 case IPPROTO_IPV6: 1511 switch (name) { 1512 case IPV6_MULTICAST_HOPS: 1513 *i1 = IP_DEFAULT_MULTICAST_TTL; 1514 return (sizeof (int)); 1515 case IPV6_MULTICAST_LOOP: 1516 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1517 return (sizeof (int)); 1518 case IPV6_UNICAST_HOPS: 1519 *i1 = us->us_ipv6_hoplimit; 1520 return (sizeof (int)); 1521 } 1522 break; 1523 } 1524 return (-1); 1525 } 1526 1527 /* 1528 * This routine retrieves the current status of socket options. 1529 * It returns the size of the option retrieved, or -1. 1530 */ 1531 int 1532 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1533 uchar_t *ptr) 1534 { 1535 int *i1 = (int *)ptr; 1536 udp_t *udp = connp->conn_udp; 1537 int len; 1538 conn_opt_arg_t coas; 1539 int retval; 1540 1541 coas.coa_connp = connp; 1542 coas.coa_ixa = connp->conn_ixa; 1543 coas.coa_ipp = &connp->conn_xmit_ipp; 1544 coas.coa_ancillary = B_FALSE; 1545 coas.coa_changed = 0; 1546 1547 /* 1548 * We assume that the optcom framework has checked for the set 1549 * of levels and names that are supported, hence we don't worry 1550 * about rejecting based on that. 1551 * First check for UDP specific handling, then pass to common routine. 1552 */ 1553 switch (level) { 1554 case IPPROTO_IP: 1555 /* 1556 * Only allow IPv4 option processing on IPv4 sockets. 1557 */ 1558 if (connp->conn_family != AF_INET) 1559 return (-1); 1560 1561 switch (name) { 1562 case IP_OPTIONS: 1563 case T_IP_OPTIONS: 1564 mutex_enter(&connp->conn_lock); 1565 if (!(udp->udp_recv_ipp.ipp_fields & 1566 IPPF_IPV4_OPTIONS)) { 1567 mutex_exit(&connp->conn_lock); 1568 return (0); 1569 } 1570 1571 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1572 ASSERT(len != 0); 1573 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1574 mutex_exit(&connp->conn_lock); 1575 return (len); 1576 } 1577 break; 1578 case IPPROTO_UDP: 1579 switch (name) { 1580 case UDP_NAT_T_ENDPOINT: 1581 mutex_enter(&connp->conn_lock); 1582 *i1 = udp->udp_nat_t_endpoint; 1583 mutex_exit(&connp->conn_lock); 1584 return (sizeof (int)); 1585 case UDP_RCVHDR: 1586 mutex_enter(&connp->conn_lock); 1587 *i1 = udp->udp_rcvhdr ? 1 : 0; 1588 mutex_exit(&connp->conn_lock); 1589 return (sizeof (int)); 1590 } 1591 } 1592 mutex_enter(&connp->conn_lock); 1593 retval = conn_opt_get(&coas, level, name, ptr); 1594 mutex_exit(&connp->conn_lock); 1595 return (retval); 1596 } 1597 1598 /* 1599 * This routine retrieves the current status of socket options. 1600 * It returns the size of the option retrieved, or -1. 1601 */ 1602 int 1603 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1604 { 1605 conn_t *connp = Q_TO_CONN(q); 1606 int err; 1607 1608 err = udp_opt_get(connp, level, name, ptr); 1609 return (err); 1610 } 1611 1612 /* 1613 * This routine sets socket options. 1614 */ 1615 int 1616 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1617 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1618 { 1619 conn_t *connp = coa->coa_connp; 1620 ip_xmit_attr_t *ixa = coa->coa_ixa; 1621 udp_t *udp = connp->conn_udp; 1622 udp_stack_t *us = udp->udp_us; 1623 int *i1 = (int *)invalp; 1624 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1625 int error; 1626 1627 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1628 /* 1629 * First do UDP specific sanity checks and handle UDP specific 1630 * options. Note that some IPPROTO_UDP options are handled 1631 * by conn_opt_set. 1632 */ 1633 switch (level) { 1634 case SOL_SOCKET: 1635 switch (name) { 1636 case SO_SNDBUF: 1637 if (*i1 > us->us_max_buf) { 1638 return (ENOBUFS); 1639 } 1640 break; 1641 case SO_RCVBUF: 1642 if (*i1 > us->us_max_buf) { 1643 return (ENOBUFS); 1644 } 1645 break; 1646 1647 case SCM_UCRED: { 1648 struct ucred_s *ucr; 1649 cred_t *newcr; 1650 ts_label_t *tsl; 1651 1652 /* 1653 * Only sockets that have proper privileges and are 1654 * bound to MLPs will have any other value here, so 1655 * this implicitly tests for privilege to set label. 1656 */ 1657 if (connp->conn_mlp_type == mlptSingle) 1658 break; 1659 1660 ucr = (struct ucred_s *)invalp; 1661 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1662 ucr->uc_labeloff < sizeof (*ucr) || 1663 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1664 return (EINVAL); 1665 if (!checkonly) { 1666 /* 1667 * Set ixa_tsl to the new label. 1668 * We assume that crgetzoneid doesn't change 1669 * as part of the SCM_UCRED. 1670 */ 1671 ASSERT(cr != NULL); 1672 if ((tsl = crgetlabel(cr)) == NULL) 1673 return (EINVAL); 1674 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1675 tsl->tsl_doi, KM_NOSLEEP); 1676 if (newcr == NULL) 1677 return (ENOSR); 1678 ASSERT(newcr->cr_label != NULL); 1679 /* 1680 * Move the hold on the cr_label to ixa_tsl by 1681 * setting cr_label to NULL. Then release newcr. 1682 */ 1683 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1684 ixa->ixa_flags |= IXAF_UCRED_TSL; 1685 newcr->cr_label = NULL; 1686 crfree(newcr); 1687 coa->coa_changed |= COA_HEADER_CHANGED; 1688 coa->coa_changed |= COA_WROFF_CHANGED; 1689 } 1690 /* Fully handled this option. */ 1691 return (0); 1692 } 1693 } 1694 break; 1695 case IPPROTO_UDP: 1696 switch (name) { 1697 case UDP_NAT_T_ENDPOINT: 1698 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1699 return (error); 1700 } 1701 1702 /* 1703 * Use conn_family instead so we can avoid ambiguitites 1704 * with AF_INET6 sockets that may switch from IPv4 1705 * to IPv6. 1706 */ 1707 if (connp->conn_family != AF_INET) { 1708 return (EAFNOSUPPORT); 1709 } 1710 1711 if (!checkonly) { 1712 mutex_enter(&connp->conn_lock); 1713 udp->udp_nat_t_endpoint = onoff; 1714 mutex_exit(&connp->conn_lock); 1715 coa->coa_changed |= COA_HEADER_CHANGED; 1716 coa->coa_changed |= COA_WROFF_CHANGED; 1717 } 1718 /* Fully handled this option. */ 1719 return (0); 1720 case UDP_RCVHDR: 1721 mutex_enter(&connp->conn_lock); 1722 udp->udp_rcvhdr = onoff; 1723 mutex_exit(&connp->conn_lock); 1724 return (0); 1725 } 1726 break; 1727 } 1728 error = conn_opt_set(coa, level, name, inlen, invalp, 1729 checkonly, cr); 1730 return (error); 1731 } 1732 1733 /* 1734 * This routine sets socket options. 1735 */ 1736 int 1737 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1738 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1739 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1740 { 1741 udp_t *udp = connp->conn_udp; 1742 int err; 1743 conn_opt_arg_t coas, *coa; 1744 boolean_t checkonly; 1745 udp_stack_t *us = udp->udp_us; 1746 1747 switch (optset_context) { 1748 case SETFN_OPTCOM_CHECKONLY: 1749 checkonly = B_TRUE; 1750 /* 1751 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1752 * inlen != 0 implies value supplied and 1753 * we have to "pretend" to set it. 1754 * inlen == 0 implies that there is no 1755 * value part in T_CHECK request and just validation 1756 * done elsewhere should be enough, we just return here. 1757 */ 1758 if (inlen == 0) { 1759 *outlenp = 0; 1760 return (0); 1761 } 1762 break; 1763 case SETFN_OPTCOM_NEGOTIATE: 1764 checkonly = B_FALSE; 1765 break; 1766 case SETFN_UD_NEGOTIATE: 1767 case SETFN_CONN_NEGOTIATE: 1768 checkonly = B_FALSE; 1769 /* 1770 * Negotiating local and "association-related" options 1771 * through T_UNITDATA_REQ. 1772 * 1773 * Following routine can filter out ones we do not 1774 * want to be "set" this way. 1775 */ 1776 if (!udp_opt_allow_udr_set(level, name)) { 1777 *outlenp = 0; 1778 return (EINVAL); 1779 } 1780 break; 1781 default: 1782 /* 1783 * We should never get here 1784 */ 1785 *outlenp = 0; 1786 return (EINVAL); 1787 } 1788 1789 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1790 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1791 1792 if (thisdg_attrs != NULL) { 1793 /* Options from T_UNITDATA_REQ */ 1794 coa = (conn_opt_arg_t *)thisdg_attrs; 1795 ASSERT(coa->coa_connp == connp); 1796 ASSERT(coa->coa_ixa != NULL); 1797 ASSERT(coa->coa_ipp != NULL); 1798 ASSERT(coa->coa_ancillary); 1799 } else { 1800 coa = &coas; 1801 coas.coa_connp = connp; 1802 /* Get a reference on conn_ixa to prevent concurrent mods */ 1803 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1804 if (coas.coa_ixa == NULL) { 1805 *outlenp = 0; 1806 return (ENOMEM); 1807 } 1808 coas.coa_ipp = &connp->conn_xmit_ipp; 1809 coas.coa_ancillary = B_FALSE; 1810 coas.coa_changed = 0; 1811 } 1812 1813 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1814 cr, checkonly); 1815 if (err != 0) { 1816 errout: 1817 if (!coa->coa_ancillary) 1818 ixa_refrele(coa->coa_ixa); 1819 *outlenp = 0; 1820 return (err); 1821 } 1822 /* Handle DHCPINIT here outside of lock */ 1823 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1824 uint_t ifindex; 1825 ill_t *ill; 1826 1827 ifindex = *(uint_t *)invalp; 1828 if (ifindex == 0) { 1829 ill = NULL; 1830 } else { 1831 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1832 coa->coa_ixa->ixa_ipst); 1833 if (ill == NULL) { 1834 err = ENXIO; 1835 goto errout; 1836 } 1837 1838 mutex_enter(&ill->ill_lock); 1839 if (ill->ill_state_flags & ILL_CONDEMNED) { 1840 mutex_exit(&ill->ill_lock); 1841 ill_refrele(ill); 1842 err = ENXIO; 1843 goto errout; 1844 } 1845 if (IS_VNI(ill)) { 1846 mutex_exit(&ill->ill_lock); 1847 ill_refrele(ill); 1848 err = EINVAL; 1849 goto errout; 1850 } 1851 } 1852 mutex_enter(&connp->conn_lock); 1853 1854 if (connp->conn_dhcpinit_ill != NULL) { 1855 /* 1856 * We've locked the conn so conn_cleanup_ill() 1857 * cannot clear conn_dhcpinit_ill -- so it's 1858 * safe to access the ill. 1859 */ 1860 ill_t *oill = connp->conn_dhcpinit_ill; 1861 1862 ASSERT(oill->ill_dhcpinit != 0); 1863 atomic_dec_32(&oill->ill_dhcpinit); 1864 ill_set_inputfn(connp->conn_dhcpinit_ill); 1865 connp->conn_dhcpinit_ill = NULL; 1866 } 1867 1868 if (ill != NULL) { 1869 connp->conn_dhcpinit_ill = ill; 1870 atomic_inc_32(&ill->ill_dhcpinit); 1871 ill_set_inputfn(ill); 1872 mutex_exit(&connp->conn_lock); 1873 mutex_exit(&ill->ill_lock); 1874 ill_refrele(ill); 1875 } else { 1876 mutex_exit(&connp->conn_lock); 1877 } 1878 } 1879 1880 /* 1881 * Common case of OK return with outval same as inval. 1882 */ 1883 if (invalp != outvalp) { 1884 /* don't trust bcopy for identical src/dst */ 1885 (void) bcopy(invalp, outvalp, inlen); 1886 } 1887 *outlenp = inlen; 1888 1889 /* 1890 * If this was not ancillary data, then we rebuild the headers, 1891 * update the IRE/NCE, and IPsec as needed. 1892 * Since the label depends on the destination we go through 1893 * ip_set_destination first. 1894 */ 1895 if (coa->coa_ancillary) { 1896 return (0); 1897 } 1898 1899 if (coa->coa_changed & COA_ROUTE_CHANGED) { 1900 in6_addr_t saddr, faddr, nexthop; 1901 in_port_t fport; 1902 1903 /* 1904 * We clear lastdst to make sure we pick up the change 1905 * next time sending. 1906 * If we are connected we re-cache the information. 1907 * We ignore errors to preserve BSD behavior. 1908 * Note that we don't redo IPsec policy lookup here 1909 * since the final destination (or source) didn't change. 1910 */ 1911 mutex_enter(&connp->conn_lock); 1912 connp->conn_v6lastdst = ipv6_all_zeros; 1913 1914 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 1915 &connp->conn_faddr_v6, &nexthop); 1916 saddr = connp->conn_saddr_v6; 1917 faddr = connp->conn_faddr_v6; 1918 fport = connp->conn_fport; 1919 mutex_exit(&connp->conn_lock); 1920 1921 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 1922 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 1923 (void) ip_attr_connect(connp, coa->coa_ixa, 1924 &saddr, &faddr, &nexthop, fport, NULL, NULL, 1925 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 1926 } 1927 } 1928 1929 ixa_refrele(coa->coa_ixa); 1930 1931 if (coa->coa_changed & COA_HEADER_CHANGED) { 1932 /* 1933 * Rebuild the header template if we are connected. 1934 * Otherwise clear conn_v6lastdst so we rebuild the header 1935 * in the data path. 1936 */ 1937 mutex_enter(&connp->conn_lock); 1938 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 1939 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 1940 err = udp_build_hdr_template(connp, 1941 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 1942 connp->conn_fport, connp->conn_flowinfo); 1943 if (err != 0) { 1944 mutex_exit(&connp->conn_lock); 1945 return (err); 1946 } 1947 } else { 1948 connp->conn_v6lastdst = ipv6_all_zeros; 1949 } 1950 mutex_exit(&connp->conn_lock); 1951 } 1952 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 1953 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 1954 connp->conn_rcvbuf); 1955 } 1956 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 1957 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 1958 } 1959 if (coa->coa_changed & COA_WROFF_CHANGED) { 1960 /* Increase wroff if needed */ 1961 uint_t wroff; 1962 1963 mutex_enter(&connp->conn_lock); 1964 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 1965 if (udp->udp_nat_t_endpoint) 1966 wroff += sizeof (uint32_t); 1967 if (wroff > connp->conn_wroff) { 1968 connp->conn_wroff = wroff; 1969 mutex_exit(&connp->conn_lock); 1970 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 1971 } else { 1972 mutex_exit(&connp->conn_lock); 1973 } 1974 } 1975 return (err); 1976 } 1977 1978 /* This routine sets socket options. */ 1979 int 1980 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 1981 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 1982 void *thisdg_attrs, cred_t *cr) 1983 { 1984 conn_t *connp = Q_TO_CONN(q); 1985 int error; 1986 1987 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 1988 outlenp, outvalp, thisdg_attrs, cr); 1989 return (error); 1990 } 1991 1992 /* 1993 * Setup IP and UDP headers. 1994 * Returns NULL on allocation failure, in which case data_mp is freed. 1995 */ 1996 mblk_t * 1997 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 1998 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 1999 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2000 { 2001 mblk_t *mp; 2002 udpha_t *udpha; 2003 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2004 uint_t data_len; 2005 uint32_t cksum; 2006 udp_t *udp = connp->conn_udp; 2007 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2008 uint_t ulp_hdr_len; 2009 2010 data_len = msgdsize(data_mp); 2011 ulp_hdr_len = UDPH_SIZE; 2012 if (insert_spi) 2013 ulp_hdr_len += sizeof (uint32_t); 2014 2015 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2016 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2017 if (mp == NULL) { 2018 ASSERT(*errorp != 0); 2019 return (NULL); 2020 } 2021 2022 data_len += ulp_hdr_len; 2023 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2024 2025 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2026 udpha->uha_src_port = connp->conn_lport; 2027 udpha->uha_dst_port = dstport; 2028 udpha->uha_checksum = 0; 2029 udpha->uha_length = htons(data_len); 2030 2031 /* 2032 * If there was a routing option/header then conn_prepend_hdr 2033 * has massaged it and placed the pseudo-header checksum difference 2034 * in the cksum argument. 2035 * 2036 * Setup header length and prepare for ULP checksum done in IP. 2037 * 2038 * We make it easy for IP to include our pseudo header 2039 * by putting our length in uha_checksum. 2040 * The IP source, destination, and length have already been set by 2041 * conn_prepend_hdr. 2042 */ 2043 cksum += data_len; 2044 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2045 ASSERT(cksum < 0x10000); 2046 2047 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2048 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2049 2050 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2051 2052 /* IP does the checksum if uha_checksum is non-zero */ 2053 if (us->us_do_checksum) { 2054 if (cksum == 0) 2055 udpha->uha_checksum = 0xffff; 2056 else 2057 udpha->uha_checksum = htons(cksum); 2058 } else { 2059 udpha->uha_checksum = 0; 2060 } 2061 } else { 2062 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2063 2064 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2065 if (cksum == 0) 2066 udpha->uha_checksum = 0xffff; 2067 else 2068 udpha->uha_checksum = htons(cksum); 2069 } 2070 2071 /* Insert all-0s SPI now. */ 2072 if (insert_spi) 2073 *((uint32_t *)(udpha + 1)) = 0; 2074 2075 return (mp); 2076 } 2077 2078 static int 2079 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2080 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2081 { 2082 udpha_t *udpha; 2083 int error; 2084 2085 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2086 /* 2087 * We clear lastdst to make sure we don't use the lastdst path 2088 * next time sending since we might not have set v6dst yet. 2089 */ 2090 connp->conn_v6lastdst = ipv6_all_zeros; 2091 2092 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2093 flowinfo); 2094 if (error != 0) 2095 return (error); 2096 2097 /* 2098 * Any routing header/option has been massaged. The checksum difference 2099 * is stored in conn_sum. 2100 */ 2101 udpha = (udpha_t *)connp->conn_ht_ulp; 2102 udpha->uha_src_port = connp->conn_lport; 2103 udpha->uha_dst_port = dstport; 2104 udpha->uha_checksum = 0; 2105 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2106 return (0); 2107 } 2108 2109 static mblk_t * 2110 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2111 { 2112 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2113 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2114 /* 2115 * fallback has started but messages have not been moved yet 2116 */ 2117 if (udp->udp_fallback_queue_head == NULL) { 2118 ASSERT(udp->udp_fallback_queue_tail == NULL); 2119 udp->udp_fallback_queue_head = mp; 2120 udp->udp_fallback_queue_tail = mp; 2121 } else { 2122 ASSERT(udp->udp_fallback_queue_tail != NULL); 2123 udp->udp_fallback_queue_tail->b_next = mp; 2124 udp->udp_fallback_queue_tail = mp; 2125 } 2126 return (NULL); 2127 } else { 2128 /* 2129 * Fallback completed, let the caller putnext() the mblk. 2130 */ 2131 return (mp); 2132 } 2133 } 2134 2135 /* 2136 * Deliver data to ULP. In case we have a socket, and it's falling back to 2137 * TPI, then we'll queue the mp for later processing. 2138 */ 2139 static void 2140 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2141 { 2142 if (IPCL_IS_NONSTR(connp)) { 2143 udp_t *udp = connp->conn_udp; 2144 int error; 2145 2146 ASSERT(len == msgdsize(mp)); 2147 if ((*connp->conn_upcalls->su_recv) 2148 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2149 mutex_enter(&udp->udp_recv_lock); 2150 if (error == ENOSPC) { 2151 /* 2152 * let's confirm while holding the lock 2153 */ 2154 if ((*connp->conn_upcalls->su_recv) 2155 (connp->conn_upper_handle, NULL, 0, 0, 2156 &error, NULL) < 0) { 2157 ASSERT(error == ENOSPC); 2158 if (error == ENOSPC) { 2159 connp->conn_flow_cntrld = 2160 B_TRUE; 2161 } 2162 } 2163 mutex_exit(&udp->udp_recv_lock); 2164 } else { 2165 ASSERT(error == EOPNOTSUPP); 2166 mp = udp_queue_fallback(udp, mp); 2167 mutex_exit(&udp->udp_recv_lock); 2168 if (mp != NULL) 2169 putnext(connp->conn_rq, mp); 2170 } 2171 } 2172 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2173 } else { 2174 if (is_system_labeled()) { 2175 ASSERT(ira->ira_cred != NULL); 2176 /* 2177 * Provide for protocols above UDP such as RPC 2178 * NOPID leaves db_cpid unchanged. 2179 */ 2180 mblk_setcred(mp, ira->ira_cred, NOPID); 2181 } 2182 2183 putnext(connp->conn_rq, mp); 2184 } 2185 } 2186 2187 /* 2188 * This is the inbound data path. 2189 * IP has already pulled up the IP plus UDP headers and verified alignment 2190 * etc. 2191 */ 2192 /* ARGSUSED2 */ 2193 static void 2194 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2195 { 2196 conn_t *connp = (conn_t *)arg1; 2197 struct T_unitdata_ind *tudi; 2198 uchar_t *rptr; /* Pointer to IP header */ 2199 int hdr_length; /* Length of IP+UDP headers */ 2200 int udi_size; /* Size of T_unitdata_ind */ 2201 int pkt_len; 2202 udp_t *udp; 2203 udpha_t *udpha; 2204 ip_pkt_t ipps; 2205 ip6_t *ip6h; 2206 mblk_t *mp1; 2207 uint32_t udp_ipv4_options_len; 2208 crb_t recv_ancillary; 2209 udp_stack_t *us; 2210 2211 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2212 2213 udp = connp->conn_udp; 2214 us = udp->udp_us; 2215 rptr = mp->b_rptr; 2216 2217 ASSERT(DB_TYPE(mp) == M_DATA); 2218 ASSERT(OK_32PTR(rptr)); 2219 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2220 pkt_len = ira->ira_pktlen; 2221 2222 /* 2223 * Get a snapshot of these and allow other threads to change 2224 * them after that. We need the same recv_ancillary when determining 2225 * the size as when adding the ancillary data items. 2226 */ 2227 mutex_enter(&connp->conn_lock); 2228 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2229 recv_ancillary = connp->conn_recv_ancillary; 2230 mutex_exit(&connp->conn_lock); 2231 2232 hdr_length = ira->ira_ip_hdr_length; 2233 2234 /* 2235 * IP inspected the UDP header thus all of it must be in the mblk. 2236 * UDP length check is performed for IPv6 packets and IPv4 packets 2237 * to check if the size of the packet as specified 2238 * by the UDP header is the same as the length derived from the IP 2239 * header. 2240 */ 2241 udpha = (udpha_t *)(rptr + hdr_length); 2242 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2243 goto tossit; 2244 2245 hdr_length += UDPH_SIZE; 2246 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2247 2248 /* Initialize regardless of IP version */ 2249 ipps.ipp_fields = 0; 2250 2251 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2252 udp_ipv4_options_len > 0) && 2253 connp->conn_family == AF_INET) { 2254 int err; 2255 2256 /* 2257 * Record/update udp_recv_ipp with the lock 2258 * held. Not needed for AF_INET6 sockets 2259 * since they don't support a getsockopt of IP_OPTIONS. 2260 */ 2261 mutex_enter(&connp->conn_lock); 2262 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2263 B_TRUE); 2264 if (err != 0) { 2265 /* Allocation failed. Drop packet */ 2266 mutex_exit(&connp->conn_lock); 2267 freemsg(mp); 2268 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2269 return; 2270 } 2271 mutex_exit(&connp->conn_lock); 2272 } 2273 2274 if (recv_ancillary.crb_all != 0) { 2275 /* 2276 * Record packet information in the ip_pkt_t 2277 */ 2278 if (ira->ira_flags & IRAF_IS_IPV4) { 2279 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2280 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2281 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2282 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2283 2284 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2285 } else { 2286 uint8_t nexthdrp; 2287 2288 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2289 /* 2290 * IPv6 packets can only be received by applications 2291 * that are prepared to receive IPv6 addresses. 2292 * The IP fanout must ensure this. 2293 */ 2294 ASSERT(connp->conn_family == AF_INET6); 2295 2296 ip6h = (ip6_t *)rptr; 2297 2298 /* We don't care about the length, but need the ipp */ 2299 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2300 &nexthdrp); 2301 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2302 /* Restore */ 2303 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2304 ASSERT(nexthdrp == IPPROTO_UDP); 2305 } 2306 } 2307 2308 /* 2309 * This is the inbound data path. Packets are passed upstream as 2310 * T_UNITDATA_IND messages. 2311 */ 2312 if (connp->conn_family == AF_INET) { 2313 sin_t *sin; 2314 2315 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2316 2317 /* 2318 * Normally only send up the source address. 2319 * If any ancillary data items are wanted we add those. 2320 */ 2321 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2322 if (recv_ancillary.crb_all != 0) { 2323 udi_size += conn_recvancillary_size(connp, 2324 recv_ancillary, ira, mp, &ipps); 2325 } 2326 2327 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2328 mp1 = allocb(udi_size, BPRI_MED); 2329 if (mp1 == NULL) { 2330 freemsg(mp); 2331 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2332 return; 2333 } 2334 mp1->b_cont = mp; 2335 mp1->b_datap->db_type = M_PROTO; 2336 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2337 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2338 tudi->PRIM_type = T_UNITDATA_IND; 2339 tudi->SRC_length = sizeof (sin_t); 2340 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2341 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2342 sizeof (sin_t); 2343 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2344 tudi->OPT_length = udi_size; 2345 sin = (sin_t *)&tudi[1]; 2346 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2347 sin->sin_port = udpha->uha_src_port; 2348 sin->sin_family = connp->conn_family; 2349 *(uint32_t *)&sin->sin_zero[0] = 0; 2350 *(uint32_t *)&sin->sin_zero[4] = 0; 2351 2352 /* 2353 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2354 * IP_RECVTTL has been set. 2355 */ 2356 if (udi_size != 0) { 2357 conn_recvancillary_add(connp, recv_ancillary, ira, 2358 &ipps, (uchar_t *)&sin[1], udi_size); 2359 } 2360 } else { 2361 sin6_t *sin6; 2362 2363 /* 2364 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2365 * 2366 * Normally we only send up the address. If receiving of any 2367 * optional receive side information is enabled, we also send 2368 * that up as options. 2369 */ 2370 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2371 2372 if (recv_ancillary.crb_all != 0) { 2373 udi_size += conn_recvancillary_size(connp, 2374 recv_ancillary, ira, mp, &ipps); 2375 } 2376 2377 mp1 = allocb(udi_size, BPRI_MED); 2378 if (mp1 == NULL) { 2379 freemsg(mp); 2380 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2381 return; 2382 } 2383 mp1->b_cont = mp; 2384 mp1->b_datap->db_type = M_PROTO; 2385 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2386 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2387 tudi->PRIM_type = T_UNITDATA_IND; 2388 tudi->SRC_length = sizeof (sin6_t); 2389 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2390 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2391 sizeof (sin6_t); 2392 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2393 tudi->OPT_length = udi_size; 2394 sin6 = (sin6_t *)&tudi[1]; 2395 if (ira->ira_flags & IRAF_IS_IPV4) { 2396 in6_addr_t v6dst; 2397 2398 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2399 &sin6->sin6_addr); 2400 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2401 &v6dst); 2402 sin6->sin6_flowinfo = 0; 2403 sin6->sin6_scope_id = 0; 2404 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2405 IPCL_ZONEID(connp), us->us_netstack); 2406 } else { 2407 ip6h = (ip6_t *)rptr; 2408 2409 sin6->sin6_addr = ip6h->ip6_src; 2410 /* No sin6_flowinfo per API */ 2411 sin6->sin6_flowinfo = 0; 2412 /* For link-scope pass up scope id */ 2413 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2414 sin6->sin6_scope_id = ira->ira_ruifindex; 2415 else 2416 sin6->sin6_scope_id = 0; 2417 sin6->__sin6_src_id = ip_srcid_find_addr( 2418 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2419 us->us_netstack); 2420 } 2421 sin6->sin6_port = udpha->uha_src_port; 2422 sin6->sin6_family = connp->conn_family; 2423 2424 if (udi_size != 0) { 2425 conn_recvancillary_add(connp, recv_ancillary, ira, 2426 &ipps, (uchar_t *)&sin6[1], udi_size); 2427 } 2428 } 2429 2430 /* 2431 * DTrace this UDP input as udp:::receive (this is for IPv4, IPv6 and 2432 * loopback traffic). 2433 */ 2434 DTRACE_UDP5(receive, mblk_t *, NULL, ip_xmit_attr_t *, connp->conn_ixa, 2435 void_ip_t *, rptr, udp_t *, udp, udpha_t *, udpha); 2436 2437 /* Walk past the headers unless IP_RECVHDR was set. */ 2438 if (!udp->udp_rcvhdr) { 2439 mp->b_rptr = rptr + hdr_length; 2440 pkt_len -= hdr_length; 2441 } 2442 2443 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2444 udp_ulp_recv(connp, mp1, pkt_len, ira); 2445 return; 2446 2447 tossit: 2448 freemsg(mp); 2449 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2450 } 2451 2452 /* 2453 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2454 * information that can be changing beneath us. 2455 */ 2456 mblk_t * 2457 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2458 { 2459 mblk_t *mpdata; 2460 mblk_t *mp_conn_ctl; 2461 mblk_t *mp_attr_ctl; 2462 mblk_t *mp6_conn_ctl; 2463 mblk_t *mp6_attr_ctl; 2464 mblk_t *mp_conn_tail; 2465 mblk_t *mp_attr_tail; 2466 mblk_t *mp6_conn_tail; 2467 mblk_t *mp6_attr_tail; 2468 struct opthdr *optp; 2469 mib2_udpEntry_t ude; 2470 mib2_udp6Entry_t ude6; 2471 mib2_transportMLPEntry_t mlp; 2472 int state; 2473 zoneid_t zoneid; 2474 int i; 2475 connf_t *connfp; 2476 conn_t *connp = Q_TO_CONN(q); 2477 int v4_conn_idx; 2478 int v6_conn_idx; 2479 boolean_t needattr; 2480 udp_t *udp; 2481 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2482 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2483 mblk_t *mp2ctl; 2484 2485 /* 2486 * make a copy of the original message 2487 */ 2488 mp2ctl = copymsg(mpctl); 2489 2490 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2491 if (mpctl == NULL || 2492 (mpdata = mpctl->b_cont) == NULL || 2493 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2494 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2495 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2496 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2497 freemsg(mp_conn_ctl); 2498 freemsg(mp_attr_ctl); 2499 freemsg(mp6_conn_ctl); 2500 freemsg(mpctl); 2501 freemsg(mp2ctl); 2502 return (0); 2503 } 2504 2505 zoneid = connp->conn_zoneid; 2506 2507 /* fixed length structure for IPv4 and IPv6 counters */ 2508 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2509 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2510 /* synchronize 64- and 32-bit counters */ 2511 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2512 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2513 2514 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2515 optp->level = MIB2_UDP; 2516 optp->name = 0; 2517 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2518 sizeof (us->us_udp_mib)); 2519 optp->len = msgdsize(mpdata); 2520 qreply(q, mpctl); 2521 2522 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2523 v4_conn_idx = v6_conn_idx = 0; 2524 2525 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2526 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2527 connp = NULL; 2528 2529 while ((connp = ipcl_get_next_conn(connfp, connp, 2530 IPCL_UDPCONN))) { 2531 udp = connp->conn_udp; 2532 if (zoneid != connp->conn_zoneid) 2533 continue; 2534 2535 /* 2536 * Note that the port numbers are sent in 2537 * host byte order 2538 */ 2539 2540 if (udp->udp_state == TS_UNBND) 2541 state = MIB2_UDP_unbound; 2542 else if (udp->udp_state == TS_IDLE) 2543 state = MIB2_UDP_idle; 2544 else if (udp->udp_state == TS_DATA_XFER) 2545 state = MIB2_UDP_connected; 2546 else 2547 state = MIB2_UDP_unknown; 2548 2549 needattr = B_FALSE; 2550 bzero(&mlp, sizeof (mlp)); 2551 if (connp->conn_mlp_type != mlptSingle) { 2552 if (connp->conn_mlp_type == mlptShared || 2553 connp->conn_mlp_type == mlptBoth) 2554 mlp.tme_flags |= MIB2_TMEF_SHARED; 2555 if (connp->conn_mlp_type == mlptPrivate || 2556 connp->conn_mlp_type == mlptBoth) 2557 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2558 needattr = B_TRUE; 2559 } 2560 if (connp->conn_anon_mlp) { 2561 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2562 needattr = B_TRUE; 2563 } 2564 switch (connp->conn_mac_mode) { 2565 case CONN_MAC_DEFAULT: 2566 break; 2567 case CONN_MAC_AWARE: 2568 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2569 needattr = B_TRUE; 2570 break; 2571 case CONN_MAC_IMPLICIT: 2572 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2573 needattr = B_TRUE; 2574 break; 2575 } 2576 mutex_enter(&connp->conn_lock); 2577 if (udp->udp_state == TS_DATA_XFER && 2578 connp->conn_ixa->ixa_tsl != NULL) { 2579 ts_label_t *tsl; 2580 2581 tsl = connp->conn_ixa->ixa_tsl; 2582 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2583 mlp.tme_doi = label2doi(tsl); 2584 mlp.tme_label = *label2bslabel(tsl); 2585 needattr = B_TRUE; 2586 } 2587 mutex_exit(&connp->conn_lock); 2588 2589 /* 2590 * Create an IPv4 table entry for IPv4 entries and also 2591 * any IPv6 entries which are bound to in6addr_any 2592 * (i.e. anything a IPv4 peer could connect/send to). 2593 */ 2594 if (connp->conn_ipversion == IPV4_VERSION || 2595 (udp->udp_state <= TS_IDLE && 2596 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2597 ude.udpEntryInfo.ue_state = state; 2598 /* 2599 * If in6addr_any this will set it to 2600 * INADDR_ANY 2601 */ 2602 ude.udpLocalAddress = connp->conn_laddr_v4; 2603 ude.udpLocalPort = ntohs(connp->conn_lport); 2604 if (udp->udp_state == TS_DATA_XFER) { 2605 /* 2606 * Can potentially get here for 2607 * v6 socket if another process 2608 * (say, ping) has just done a 2609 * sendto(), changing the state 2610 * from the TS_IDLE above to 2611 * TS_DATA_XFER by the time we hit 2612 * this part of the code. 2613 */ 2614 ude.udpEntryInfo.ue_RemoteAddress = 2615 connp->conn_faddr_v4; 2616 ude.udpEntryInfo.ue_RemotePort = 2617 ntohs(connp->conn_fport); 2618 } else { 2619 ude.udpEntryInfo.ue_RemoteAddress = 0; 2620 ude.udpEntryInfo.ue_RemotePort = 0; 2621 } 2622 2623 /* 2624 * We make the assumption that all udp_t 2625 * structs will be created within an address 2626 * region no larger than 32-bits. 2627 */ 2628 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2629 ude.udpCreationProcess = 2630 (connp->conn_cpid < 0) ? 2631 MIB2_UNKNOWN_PROCESS : 2632 connp->conn_cpid; 2633 ude.udpCreationTime = connp->conn_open_time; 2634 2635 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2636 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2637 mlp.tme_connidx = v4_conn_idx++; 2638 if (needattr) 2639 (void) snmp_append_data2( 2640 mp_attr_ctl->b_cont, &mp_attr_tail, 2641 (char *)&mlp, sizeof (mlp)); 2642 } 2643 if (connp->conn_ipversion == IPV6_VERSION) { 2644 ude6.udp6EntryInfo.ue_state = state; 2645 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2646 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2647 mutex_enter(&connp->conn_lock); 2648 if (connp->conn_ixa->ixa_flags & 2649 IXAF_SCOPEID_SET) { 2650 ude6.udp6IfIndex = 2651 connp->conn_ixa->ixa_scopeid; 2652 } else { 2653 ude6.udp6IfIndex = connp->conn_bound_if; 2654 } 2655 mutex_exit(&connp->conn_lock); 2656 if (udp->udp_state == TS_DATA_XFER) { 2657 ude6.udp6EntryInfo.ue_RemoteAddress = 2658 connp->conn_faddr_v6; 2659 ude6.udp6EntryInfo.ue_RemotePort = 2660 ntohs(connp->conn_fport); 2661 } else { 2662 ude6.udp6EntryInfo.ue_RemoteAddress = 2663 sin6_null.sin6_addr; 2664 ude6.udp6EntryInfo.ue_RemotePort = 0; 2665 } 2666 /* 2667 * We make the assumption that all udp_t 2668 * structs will be created within an address 2669 * region no larger than 32-bits. 2670 */ 2671 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2672 ude6.udp6CreationProcess = 2673 (connp->conn_cpid < 0) ? 2674 MIB2_UNKNOWN_PROCESS : 2675 connp->conn_cpid; 2676 ude6.udp6CreationTime = connp->conn_open_time; 2677 2678 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2679 &mp6_conn_tail, (char *)&ude6, 2680 sizeof (ude6)); 2681 mlp.tme_connidx = v6_conn_idx++; 2682 if (needattr) 2683 (void) snmp_append_data2( 2684 mp6_attr_ctl->b_cont, 2685 &mp6_attr_tail, (char *)&mlp, 2686 sizeof (mlp)); 2687 } 2688 } 2689 } 2690 2691 /* IPv4 UDP endpoints */ 2692 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2693 sizeof (struct T_optmgmt_ack)]; 2694 optp->level = MIB2_UDP; 2695 optp->name = MIB2_UDP_ENTRY; 2696 optp->len = msgdsize(mp_conn_ctl->b_cont); 2697 qreply(q, mp_conn_ctl); 2698 2699 /* table of MLP attributes... */ 2700 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2701 sizeof (struct T_optmgmt_ack)]; 2702 optp->level = MIB2_UDP; 2703 optp->name = EXPER_XPORT_MLP; 2704 optp->len = msgdsize(mp_attr_ctl->b_cont); 2705 if (optp->len == 0) 2706 freemsg(mp_attr_ctl); 2707 else 2708 qreply(q, mp_attr_ctl); 2709 2710 /* IPv6 UDP endpoints */ 2711 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2712 sizeof (struct T_optmgmt_ack)]; 2713 optp->level = MIB2_UDP6; 2714 optp->name = MIB2_UDP6_ENTRY; 2715 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2716 qreply(q, mp6_conn_ctl); 2717 2718 /* table of MLP attributes... */ 2719 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2720 sizeof (struct T_optmgmt_ack)]; 2721 optp->level = MIB2_UDP6; 2722 optp->name = EXPER_XPORT_MLP; 2723 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2724 if (optp->len == 0) 2725 freemsg(mp6_attr_ctl); 2726 else 2727 qreply(q, mp6_attr_ctl); 2728 2729 return (mp2ctl); 2730 } 2731 2732 /* 2733 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2734 * NOTE: Per MIB-II, UDP has no writable data. 2735 * TODO: If this ever actually tries to set anything, it needs to be 2736 * to do the appropriate locking. 2737 */ 2738 /* ARGSUSED */ 2739 int 2740 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2741 uchar_t *ptr, int len) 2742 { 2743 switch (level) { 2744 case MIB2_UDP: 2745 return (0); 2746 default: 2747 return (1); 2748 } 2749 } 2750 2751 /* 2752 * This routine creates a T_UDERROR_IND message and passes it upstream. 2753 * The address and options are copied from the T_UNITDATA_REQ message 2754 * passed in mp. This message is freed. 2755 */ 2756 static void 2757 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2758 { 2759 struct T_unitdata_req *tudr; 2760 mblk_t *mp1; 2761 uchar_t *destaddr; 2762 t_scalar_t destlen; 2763 uchar_t *optaddr; 2764 t_scalar_t optlen; 2765 2766 if ((mp->b_wptr < mp->b_rptr) || 2767 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2768 goto done; 2769 } 2770 tudr = (struct T_unitdata_req *)mp->b_rptr; 2771 destaddr = mp->b_rptr + tudr->DEST_offset; 2772 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2773 destaddr + tudr->DEST_length < mp->b_rptr || 2774 destaddr + tudr->DEST_length > mp->b_wptr) { 2775 goto done; 2776 } 2777 optaddr = mp->b_rptr + tudr->OPT_offset; 2778 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2779 optaddr + tudr->OPT_length < mp->b_rptr || 2780 optaddr + tudr->OPT_length > mp->b_wptr) { 2781 goto done; 2782 } 2783 destlen = tudr->DEST_length; 2784 optlen = tudr->OPT_length; 2785 2786 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2787 (char *)optaddr, optlen, err); 2788 if (mp1 != NULL) 2789 qreply(q, mp1); 2790 2791 done: 2792 freemsg(mp); 2793 } 2794 2795 /* 2796 * This routine removes a port number association from a stream. It 2797 * is called by udp_wput to handle T_UNBIND_REQ messages. 2798 */ 2799 static void 2800 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2801 { 2802 conn_t *connp = Q_TO_CONN(q); 2803 int error; 2804 2805 error = udp_do_unbind(connp); 2806 if (error) { 2807 if (error < 0) 2808 udp_err_ack(q, mp, -error, 0); 2809 else 2810 udp_err_ack(q, mp, TSYSERR, error); 2811 return; 2812 } 2813 2814 mp = mi_tpi_ok_ack_alloc(mp); 2815 ASSERT(mp != NULL); 2816 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 2817 qreply(q, mp); 2818 } 2819 2820 /* 2821 * Don't let port fall into the privileged range. 2822 * Since the extra privileged ports can be arbitrary we also 2823 * ensure that we exclude those from consideration. 2824 * us->us_epriv_ports is not sorted thus we loop over it until 2825 * there are no changes. 2826 */ 2827 static in_port_t 2828 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 2829 { 2830 int i; 2831 in_port_t nextport; 2832 boolean_t restart = B_FALSE; 2833 udp_stack_t *us = udp->udp_us; 2834 2835 if (random && udp_random_anon_port != 0) { 2836 (void) random_get_pseudo_bytes((uint8_t *)&port, 2837 sizeof (in_port_t)); 2838 /* 2839 * Unless changed by a sys admin, the smallest anon port 2840 * is 32768 and the largest anon port is 65535. It is 2841 * very likely (50%) for the random port to be smaller 2842 * than the smallest anon port. When that happens, 2843 * add port % (anon port range) to the smallest anon 2844 * port to get the random port. It should fall into the 2845 * valid anon port range. 2846 */ 2847 if (port < us->us_smallest_anon_port) { 2848 port = us->us_smallest_anon_port + 2849 port % (us->us_largest_anon_port - 2850 us->us_smallest_anon_port); 2851 } 2852 } 2853 2854 retry: 2855 if (port < us->us_smallest_anon_port) 2856 port = us->us_smallest_anon_port; 2857 2858 if (port > us->us_largest_anon_port) { 2859 port = us->us_smallest_anon_port; 2860 if (restart) 2861 return (0); 2862 restart = B_TRUE; 2863 } 2864 2865 if (port < us->us_smallest_nonpriv_port) 2866 port = us->us_smallest_nonpriv_port; 2867 2868 for (i = 0; i < us->us_num_epriv_ports; i++) { 2869 if (port == us->us_epriv_ports[i]) { 2870 port++; 2871 /* 2872 * Make sure that the port is in the 2873 * valid range. 2874 */ 2875 goto retry; 2876 } 2877 } 2878 2879 if (is_system_labeled() && 2880 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 2881 port, IPPROTO_UDP, B_TRUE)) != 0) { 2882 port = nextport; 2883 goto retry; 2884 } 2885 2886 return (port); 2887 } 2888 2889 /* 2890 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 2891 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 2892 * the TPI options, otherwise we take them from msg_control. 2893 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 2894 * Always consumes mp; never consumes tudr_mp. 2895 */ 2896 static int 2897 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 2898 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 2899 { 2900 udp_t *udp = connp->conn_udp; 2901 udp_stack_t *us = udp->udp_us; 2902 int error; 2903 ip_xmit_attr_t *ixa; 2904 ip_pkt_t *ipp; 2905 in6_addr_t v6src; 2906 in6_addr_t v6dst; 2907 in6_addr_t v6nexthop; 2908 in_port_t dstport; 2909 uint32_t flowinfo; 2910 uint_t srcid; 2911 int is_absreq_failure = 0; 2912 conn_opt_arg_t coas, *coa; 2913 2914 ASSERT(tudr_mp != NULL || msg != NULL); 2915 2916 /* 2917 * Get ixa before checking state to handle a disconnect race. 2918 * 2919 * We need an exclusive copy of conn_ixa since the ancillary data 2920 * options might modify it. That copy has no pointers hence we 2921 * need to set them up once we've parsed the ancillary data. 2922 */ 2923 ixa = conn_get_ixa_exclusive(connp); 2924 if (ixa == NULL) { 2925 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2926 freemsg(mp); 2927 return (ENOMEM); 2928 } 2929 ASSERT(cr != NULL); 2930 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2931 ixa->ixa_cred = cr; 2932 ixa->ixa_cpid = pid; 2933 if (is_system_labeled()) { 2934 /* We need to restart with a label based on the cred */ 2935 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 2936 } 2937 2938 /* In case previous destination was multicast or multirt */ 2939 ip_attr_newdst(ixa); 2940 2941 /* Get a copy of conn_xmit_ipp since the options might change it */ 2942 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 2943 if (ipp == NULL) { 2944 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 2945 ixa->ixa_cred = connp->conn_cred; /* Restore */ 2946 ixa->ixa_cpid = connp->conn_cpid; 2947 ixa_refrele(ixa); 2948 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2949 freemsg(mp); 2950 return (ENOMEM); 2951 } 2952 mutex_enter(&connp->conn_lock); 2953 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 2954 mutex_exit(&connp->conn_lock); 2955 if (error != 0) { 2956 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2957 freemsg(mp); 2958 goto done; 2959 } 2960 2961 /* 2962 * Parse the options and update ixa and ipp as a result. 2963 * Note that ixa_tsl can be updated if SCM_UCRED. 2964 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 2965 */ 2966 2967 coa = &coas; 2968 coa->coa_connp = connp; 2969 coa->coa_ixa = ixa; 2970 coa->coa_ipp = ipp; 2971 coa->coa_ancillary = B_TRUE; 2972 coa->coa_changed = 0; 2973 2974 if (msg != NULL) { 2975 error = process_auxiliary_options(connp, msg->msg_control, 2976 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 2977 } else { 2978 struct T_unitdata_req *tudr; 2979 2980 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 2981 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 2982 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 2983 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 2984 coa, &is_absreq_failure); 2985 } 2986 if (error != 0) { 2987 /* 2988 * Note: No special action needed in this 2989 * module for "is_absreq_failure" 2990 */ 2991 freemsg(mp); 2992 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 2993 goto done; 2994 } 2995 ASSERT(is_absreq_failure == 0); 2996 2997 mutex_enter(&connp->conn_lock); 2998 /* 2999 * If laddr is unspecified then we look at sin6_src_id. 3000 * We will give precedence to a source address set with IPV6_PKTINFO 3001 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3002 * want ip_attr_connect to select a source (since it can fail) when 3003 * IPV6_PKTINFO is specified. 3004 * If this doesn't result in a source address then we get a source 3005 * from ip_attr_connect() below. 3006 */ 3007 v6src = connp->conn_saddr_v6; 3008 if (sin != NULL) { 3009 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3010 dstport = sin->sin_port; 3011 flowinfo = 0; 3012 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3013 ixa->ixa_flags |= IXAF_IS_IPV4; 3014 } else if (sin6 != NULL) { 3015 v6dst = sin6->sin6_addr; 3016 dstport = sin6->sin6_port; 3017 flowinfo = sin6->sin6_flowinfo; 3018 srcid = sin6->__sin6_src_id; 3019 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3020 ixa->ixa_scopeid = sin6->sin6_scope_id; 3021 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3022 } else { 3023 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3024 } 3025 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3026 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3027 connp->conn_netstack); 3028 } 3029 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3030 ixa->ixa_flags |= IXAF_IS_IPV4; 3031 else 3032 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3033 } else { 3034 /* Connected case */ 3035 v6dst = connp->conn_faddr_v6; 3036 dstport = connp->conn_fport; 3037 flowinfo = connp->conn_flowinfo; 3038 } 3039 mutex_exit(&connp->conn_lock); 3040 3041 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 3042 if (ipp->ipp_fields & IPPF_ADDR) { 3043 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3044 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3045 v6src = ipp->ipp_addr; 3046 } else { 3047 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3048 v6src = ipp->ipp_addr; 3049 } 3050 } 3051 3052 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3053 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3054 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3055 3056 switch (error) { 3057 case 0: 3058 break; 3059 case EADDRNOTAVAIL: 3060 /* 3061 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3062 * Don't have the application see that errno 3063 */ 3064 error = ENETUNREACH; 3065 goto failed; 3066 case ENETDOWN: 3067 /* 3068 * Have !ipif_addr_ready address; drop packet silently 3069 * until we can get applications to not send until we 3070 * are ready. 3071 */ 3072 error = 0; 3073 goto failed; 3074 case EHOSTUNREACH: 3075 case ENETUNREACH: 3076 if (ixa->ixa_ire != NULL) { 3077 /* 3078 * Let conn_ip_output/ire_send_noroute return 3079 * the error and send any local ICMP error. 3080 */ 3081 error = 0; 3082 break; 3083 } 3084 /* FALLTHRU */ 3085 default: 3086 failed: 3087 freemsg(mp); 3088 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3089 goto done; 3090 } 3091 3092 /* 3093 * We might be going to a different destination than last time, 3094 * thus check that TX allows the communication and compute any 3095 * needed label. 3096 * 3097 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3098 * don't have to worry about concurrent threads. 3099 */ 3100 if (is_system_labeled()) { 3101 /* Using UDP MLP requires SCM_UCRED from user */ 3102 if (connp->conn_mlp_type != mlptSingle && 3103 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3104 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3105 error = ECONNREFUSED; 3106 freemsg(mp); 3107 goto done; 3108 } 3109 /* 3110 * Check whether Trusted Solaris policy allows communication 3111 * with this host, and pretend that the destination is 3112 * unreachable if not. 3113 * Compute any needed label and place it in ipp_label_v4/v6. 3114 * 3115 * Later conn_build_hdr_template/conn_prepend_hdr takes 3116 * ipp_label_v4/v6 to form the packet. 3117 * 3118 * Tsol note: We have ipp structure local to this thread so 3119 * no locking is needed. 3120 */ 3121 error = conn_update_label(connp, ixa, &v6dst, ipp); 3122 if (error != 0) { 3123 freemsg(mp); 3124 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3125 goto done; 3126 } 3127 } 3128 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3129 flowinfo, mp, &error); 3130 if (mp == NULL) { 3131 ASSERT(error != 0); 3132 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3133 goto done; 3134 } 3135 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3136 error = EMSGSIZE; 3137 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3138 freemsg(mp); 3139 goto done; 3140 } 3141 /* We're done. Pass the packet to ip. */ 3142 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3143 3144 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3145 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3146 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3147 3148 error = conn_ip_output(mp, ixa); 3149 /* No udpOutErrors if an error since IP increases its error counter */ 3150 switch (error) { 3151 case 0: 3152 break; 3153 case EWOULDBLOCK: 3154 (void) ixa_check_drain_insert(connp, ixa); 3155 error = 0; 3156 break; 3157 case EADDRNOTAVAIL: 3158 /* 3159 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3160 * Don't have the application see that errno 3161 */ 3162 error = ENETUNREACH; 3163 /* FALLTHRU */ 3164 default: 3165 mutex_enter(&connp->conn_lock); 3166 /* 3167 * Clear the source and v6lastdst so we call ip_attr_connect 3168 * for the next packet and try to pick a better source. 3169 */ 3170 if (connp->conn_mcbc_bind) 3171 connp->conn_saddr_v6 = ipv6_all_zeros; 3172 else 3173 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3174 connp->conn_v6lastdst = ipv6_all_zeros; 3175 mutex_exit(&connp->conn_lock); 3176 break; 3177 } 3178 done: 3179 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3180 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3181 ixa->ixa_cpid = connp->conn_cpid; 3182 ixa_refrele(ixa); 3183 ip_pkt_free(ipp); 3184 kmem_free(ipp, sizeof (*ipp)); 3185 return (error); 3186 } 3187 3188 /* 3189 * Handle sending an M_DATA for a connected socket. 3190 * Handles both IPv4 and IPv6. 3191 */ 3192 static int 3193 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3194 { 3195 udp_t *udp = connp->conn_udp; 3196 udp_stack_t *us = udp->udp_us; 3197 int error; 3198 ip_xmit_attr_t *ixa; 3199 3200 /* 3201 * If no other thread is using conn_ixa this just gets a reference to 3202 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3203 */ 3204 ixa = conn_get_ixa(connp, B_FALSE); 3205 if (ixa == NULL) { 3206 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3207 freemsg(mp); 3208 return (ENOMEM); 3209 } 3210 3211 ASSERT(cr != NULL); 3212 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3213 ixa->ixa_cred = cr; 3214 ixa->ixa_cpid = pid; 3215 3216 mutex_enter(&connp->conn_lock); 3217 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3218 connp->conn_fport, connp->conn_flowinfo, &error); 3219 3220 if (mp == NULL) { 3221 ASSERT(error != 0); 3222 mutex_exit(&connp->conn_lock); 3223 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3224 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3225 ixa->ixa_cpid = connp->conn_cpid; 3226 ixa_refrele(ixa); 3227 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3228 freemsg(mp); 3229 return (error); 3230 } 3231 3232 /* 3233 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3234 * safe copy, then we need to fill in any pointers in it. 3235 */ 3236 if (ixa->ixa_ire == NULL) { 3237 in6_addr_t faddr, saddr; 3238 in6_addr_t nexthop; 3239 in_port_t fport; 3240 3241 saddr = connp->conn_saddr_v6; 3242 faddr = connp->conn_faddr_v6; 3243 fport = connp->conn_fport; 3244 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3245 mutex_exit(&connp->conn_lock); 3246 3247 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3248 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3249 IPDF_IPSEC); 3250 switch (error) { 3251 case 0: 3252 break; 3253 case EADDRNOTAVAIL: 3254 /* 3255 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3256 * Don't have the application see that errno 3257 */ 3258 error = ENETUNREACH; 3259 goto failed; 3260 case ENETDOWN: 3261 /* 3262 * Have !ipif_addr_ready address; drop packet silently 3263 * until we can get applications to not send until we 3264 * are ready. 3265 */ 3266 error = 0; 3267 goto failed; 3268 case EHOSTUNREACH: 3269 case ENETUNREACH: 3270 if (ixa->ixa_ire != NULL) { 3271 /* 3272 * Let conn_ip_output/ire_send_noroute return 3273 * the error and send any local ICMP error. 3274 */ 3275 error = 0; 3276 break; 3277 } 3278 /* FALLTHRU */ 3279 default: 3280 failed: 3281 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3282 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3283 ixa->ixa_cpid = connp->conn_cpid; 3284 ixa_refrele(ixa); 3285 freemsg(mp); 3286 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3287 return (error); 3288 } 3289 } else { 3290 /* Done with conn_t */ 3291 mutex_exit(&connp->conn_lock); 3292 } 3293 ASSERT(ixa->ixa_ire != NULL); 3294 3295 /* We're done. Pass the packet to ip. */ 3296 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3297 3298 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3299 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3300 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3301 3302 error = conn_ip_output(mp, ixa); 3303 /* No udpOutErrors if an error since IP increases its error counter */ 3304 switch (error) { 3305 case 0: 3306 break; 3307 case EWOULDBLOCK: 3308 (void) ixa_check_drain_insert(connp, ixa); 3309 error = 0; 3310 break; 3311 case EADDRNOTAVAIL: 3312 /* 3313 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3314 * Don't have the application see that errno 3315 */ 3316 error = ENETUNREACH; 3317 break; 3318 } 3319 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3320 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3321 ixa->ixa_cpid = connp->conn_cpid; 3322 ixa_refrele(ixa); 3323 return (error); 3324 } 3325 3326 /* 3327 * Handle sending an M_DATA to the last destination. 3328 * Handles both IPv4 and IPv6. 3329 * 3330 * NOTE: The caller must hold conn_lock and we drop it here. 3331 */ 3332 static int 3333 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3334 ip_xmit_attr_t *ixa) 3335 { 3336 udp_t *udp = connp->conn_udp; 3337 udp_stack_t *us = udp->udp_us; 3338 int error; 3339 3340 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3341 ASSERT(ixa != NULL); 3342 3343 ASSERT(cr != NULL); 3344 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3345 ixa->ixa_cred = cr; 3346 ixa->ixa_cpid = pid; 3347 3348 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3349 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3350 3351 if (mp == NULL) { 3352 ASSERT(error != 0); 3353 mutex_exit(&connp->conn_lock); 3354 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3355 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3356 ixa->ixa_cpid = connp->conn_cpid; 3357 ixa_refrele(ixa); 3358 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3359 freemsg(mp); 3360 return (error); 3361 } 3362 3363 /* 3364 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3365 * safe copy, then we need to fill in any pointers in it. 3366 */ 3367 if (ixa->ixa_ire == NULL) { 3368 in6_addr_t lastdst, lastsrc; 3369 in6_addr_t nexthop; 3370 in_port_t lastport; 3371 3372 lastsrc = connp->conn_v6lastsrc; 3373 lastdst = connp->conn_v6lastdst; 3374 lastport = connp->conn_lastdstport; 3375 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3376 mutex_exit(&connp->conn_lock); 3377 3378 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3379 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3380 IPDF_VERIFY_DST | IPDF_IPSEC); 3381 switch (error) { 3382 case 0: 3383 break; 3384 case EADDRNOTAVAIL: 3385 /* 3386 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3387 * Don't have the application see that errno 3388 */ 3389 error = ENETUNREACH; 3390 goto failed; 3391 case ENETDOWN: 3392 /* 3393 * Have !ipif_addr_ready address; drop packet silently 3394 * until we can get applications to not send until we 3395 * are ready. 3396 */ 3397 error = 0; 3398 goto failed; 3399 case EHOSTUNREACH: 3400 case ENETUNREACH: 3401 if (ixa->ixa_ire != NULL) { 3402 /* 3403 * Let conn_ip_output/ire_send_noroute return 3404 * the error and send any local ICMP error. 3405 */ 3406 error = 0; 3407 break; 3408 } 3409 /* FALLTHRU */ 3410 default: 3411 failed: 3412 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3413 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3414 ixa->ixa_cpid = connp->conn_cpid; 3415 ixa_refrele(ixa); 3416 freemsg(mp); 3417 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3418 return (error); 3419 } 3420 } else { 3421 /* Done with conn_t */ 3422 mutex_exit(&connp->conn_lock); 3423 } 3424 3425 /* We're done. Pass the packet to ip. */ 3426 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3427 3428 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 3429 void_ip_t *, mp->b_rptr, udp_t *, udp, udpha_t *, 3430 &mp->b_rptr[ixa->ixa_ip_hdr_length]); 3431 3432 error = conn_ip_output(mp, ixa); 3433 /* No udpOutErrors if an error since IP increases its error counter */ 3434 switch (error) { 3435 case 0: 3436 break; 3437 case EWOULDBLOCK: 3438 (void) ixa_check_drain_insert(connp, ixa); 3439 error = 0; 3440 break; 3441 case EADDRNOTAVAIL: 3442 /* 3443 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3444 * Don't have the application see that errno 3445 */ 3446 error = ENETUNREACH; 3447 /* FALLTHRU */ 3448 default: 3449 mutex_enter(&connp->conn_lock); 3450 /* 3451 * Clear the source and v6lastdst so we call ip_attr_connect 3452 * for the next packet and try to pick a better source. 3453 */ 3454 if (connp->conn_mcbc_bind) 3455 connp->conn_saddr_v6 = ipv6_all_zeros; 3456 else 3457 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3458 connp->conn_v6lastdst = ipv6_all_zeros; 3459 mutex_exit(&connp->conn_lock); 3460 break; 3461 } 3462 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3463 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3464 ixa->ixa_cpid = connp->conn_cpid; 3465 ixa_refrele(ixa); 3466 return (error); 3467 } 3468 3469 3470 /* 3471 * Prepend the header template and then fill in the source and 3472 * flowinfo. The caller needs to handle the destination address since 3473 * it's setting is different if rthdr or source route. 3474 * 3475 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3476 * When it returns NULL it sets errorp. 3477 */ 3478 static mblk_t * 3479 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3480 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3481 { 3482 udp_t *udp = connp->conn_udp; 3483 udp_stack_t *us = udp->udp_us; 3484 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3485 uint_t pktlen; 3486 uint_t alloclen; 3487 uint_t copylen; 3488 uint8_t *iph; 3489 uint_t ip_hdr_length; 3490 udpha_t *udpha; 3491 uint32_t cksum; 3492 ip_pkt_t *ipp; 3493 3494 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3495 3496 /* 3497 * Copy the header template and leave space for an SPI 3498 */ 3499 copylen = connp->conn_ht_iphc_len; 3500 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3501 pktlen = alloclen + msgdsize(mp); 3502 if (pktlen > IP_MAXPACKET) { 3503 freemsg(mp); 3504 *errorp = EMSGSIZE; 3505 return (NULL); 3506 } 3507 ixa->ixa_pktlen = pktlen; 3508 3509 /* check/fix buffer config, setup pointers into it */ 3510 iph = mp->b_rptr - alloclen; 3511 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3512 mblk_t *mp1; 3513 3514 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3515 if (mp1 == NULL) { 3516 freemsg(mp); 3517 *errorp = ENOMEM; 3518 return (NULL); 3519 } 3520 mp1->b_wptr = DB_LIM(mp1); 3521 mp1->b_cont = mp; 3522 mp = mp1; 3523 iph = (mp->b_wptr - alloclen); 3524 } 3525 mp->b_rptr = iph; 3526 bcopy(connp->conn_ht_iphc, iph, copylen); 3527 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3528 3529 ixa->ixa_ip_hdr_length = ip_hdr_length; 3530 udpha = (udpha_t *)(iph + ip_hdr_length); 3531 3532 /* 3533 * Setup header length and prepare for ULP checksum done in IP. 3534 * udp_build_hdr_template has already massaged any routing header 3535 * and placed the result in conn_sum. 3536 * 3537 * We make it easy for IP to include our pseudo header 3538 * by putting our length in uha_checksum. 3539 */ 3540 cksum = pktlen - ip_hdr_length; 3541 udpha->uha_length = htons(cksum); 3542 3543 cksum += connp->conn_sum; 3544 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3545 ASSERT(cksum < 0x10000); 3546 3547 ipp = &connp->conn_xmit_ipp; 3548 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3549 ipha_t *ipha = (ipha_t *)iph; 3550 3551 ipha->ipha_length = htons((uint16_t)pktlen); 3552 3553 /* IP does the checksum if uha_checksum is non-zero */ 3554 if (us->us_do_checksum) 3555 udpha->uha_checksum = htons(cksum); 3556 3557 /* if IP_PKTINFO specified an addres it wins over bind() */ 3558 if ((ipp->ipp_fields & IPPF_ADDR) && 3559 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3560 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3561 ipha->ipha_src = ipp->ipp_addr_v4; 3562 } else { 3563 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3564 } 3565 } else { 3566 ip6_t *ip6h = (ip6_t *)iph; 3567 3568 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3569 udpha->uha_checksum = htons(cksum); 3570 3571 /* if IP_PKTINFO specified an addres it wins over bind() */ 3572 if ((ipp->ipp_fields & IPPF_ADDR) && 3573 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3574 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3575 ip6h->ip6_src = ipp->ipp_addr; 3576 } else { 3577 ip6h->ip6_src = *v6src; 3578 } 3579 ip6h->ip6_vcf = 3580 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3581 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3582 if (ipp->ipp_fields & IPPF_TCLASS) { 3583 /* Overrides the class part of flowinfo */ 3584 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3585 ipp->ipp_tclass); 3586 } 3587 } 3588 3589 /* Insert all-0s SPI now. */ 3590 if (insert_spi) 3591 *((uint32_t *)(udpha + 1)) = 0; 3592 3593 udpha->uha_dst_port = dstport; 3594 return (mp); 3595 } 3596 3597 /* 3598 * Send a T_UDERR_IND in response to an M_DATA 3599 */ 3600 static void 3601 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3602 { 3603 struct sockaddr_storage ss; 3604 sin_t *sin; 3605 sin6_t *sin6; 3606 struct sockaddr *addr; 3607 socklen_t addrlen; 3608 mblk_t *mp1; 3609 3610 mutex_enter(&connp->conn_lock); 3611 /* Initialize addr and addrlen as if they're passed in */ 3612 if (connp->conn_family == AF_INET) { 3613 sin = (sin_t *)&ss; 3614 *sin = sin_null; 3615 sin->sin_family = AF_INET; 3616 sin->sin_port = connp->conn_fport; 3617 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3618 addr = (struct sockaddr *)sin; 3619 addrlen = sizeof (*sin); 3620 } else { 3621 sin6 = (sin6_t *)&ss; 3622 *sin6 = sin6_null; 3623 sin6->sin6_family = AF_INET6; 3624 sin6->sin6_port = connp->conn_fport; 3625 sin6->sin6_flowinfo = connp->conn_flowinfo; 3626 sin6->sin6_addr = connp->conn_faddr_v6; 3627 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3628 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3629 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3630 } else { 3631 sin6->sin6_scope_id = 0; 3632 } 3633 sin6->__sin6_src_id = 0; 3634 addr = (struct sockaddr *)sin6; 3635 addrlen = sizeof (*sin6); 3636 } 3637 mutex_exit(&connp->conn_lock); 3638 3639 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3640 if (mp1 != NULL) 3641 putnext(connp->conn_rq, mp1); 3642 } 3643 3644 /* 3645 * This routine handles all messages passed downstream. It either 3646 * consumes the message or passes it downstream; it never queues a 3647 * a message. 3648 * 3649 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3650 * is valid when we are directly beneath the stream head, and thus sockfs 3651 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3652 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3653 * connected endpoints. 3654 */ 3655 void 3656 udp_wput(queue_t *q, mblk_t *mp) 3657 { 3658 sin6_t *sin6; 3659 sin_t *sin = NULL; 3660 uint_t srcid; 3661 conn_t *connp = Q_TO_CONN(q); 3662 udp_t *udp = connp->conn_udp; 3663 int error = 0; 3664 struct sockaddr *addr = NULL; 3665 socklen_t addrlen; 3666 udp_stack_t *us = udp->udp_us; 3667 struct T_unitdata_req *tudr; 3668 mblk_t *data_mp; 3669 ushort_t ipversion; 3670 cred_t *cr; 3671 pid_t pid; 3672 3673 /* 3674 * We directly handle several cases here: T_UNITDATA_REQ message 3675 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3676 * socket. 3677 */ 3678 switch (DB_TYPE(mp)) { 3679 case M_DATA: 3680 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3681 /* Not connected; address is required */ 3682 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3683 UDP_DBGSTAT(us, udp_data_notconn); 3684 UDP_STAT(us, udp_out_err_notconn); 3685 freemsg(mp); 3686 return; 3687 } 3688 /* 3689 * All Solaris components should pass a db_credp 3690 * for this message, hence we ASSERT. 3691 * On production kernels we return an error to be robust against 3692 * random streams modules sitting on top of us. 3693 */ 3694 cr = msg_getcred(mp, &pid); 3695 ASSERT(cr != NULL); 3696 if (cr == NULL) { 3697 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3698 freemsg(mp); 3699 return; 3700 } 3701 ASSERT(udp->udp_issocket); 3702 UDP_DBGSTAT(us, udp_data_conn); 3703 error = udp_output_connected(connp, mp, cr, pid); 3704 if (error != 0) { 3705 UDP_STAT(us, udp_out_err_output); 3706 if (connp->conn_rq != NULL) 3707 udp_ud_err_connected(connp, (t_scalar_t)error); 3708 #ifdef DEBUG 3709 printf("udp_output_connected returned %d\n", error); 3710 #endif 3711 } 3712 return; 3713 3714 case M_PROTO: 3715 case M_PCPROTO: 3716 tudr = (struct T_unitdata_req *)mp->b_rptr; 3717 if (MBLKL(mp) < sizeof (*tudr) || 3718 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3719 udp_wput_other(q, mp); 3720 return; 3721 } 3722 break; 3723 3724 default: 3725 udp_wput_other(q, mp); 3726 return; 3727 } 3728 3729 /* Handle valid T_UNITDATA_REQ here */ 3730 data_mp = mp->b_cont; 3731 if (data_mp == NULL) { 3732 error = EPROTO; 3733 goto ud_error2; 3734 } 3735 mp->b_cont = NULL; 3736 3737 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3738 error = EADDRNOTAVAIL; 3739 goto ud_error2; 3740 } 3741 3742 /* 3743 * All Solaris components should pass a db_credp 3744 * for this TPI message, hence we should ASSERT. 3745 * However, RPC (svc_clts_ksend) does this odd thing where it 3746 * passes the options from a T_UNITDATA_IND unchanged in a 3747 * T_UNITDATA_REQ. While that is the right thing to do for 3748 * some options, SCM_UCRED being the key one, this also makes it 3749 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3750 */ 3751 cr = msg_getcred(mp, &pid); 3752 if (cr == NULL) { 3753 cr = connp->conn_cred; 3754 pid = connp->conn_cpid; 3755 } 3756 3757 /* 3758 * If a port has not been bound to the stream, fail. 3759 * This is not a problem when sockfs is directly 3760 * above us, because it will ensure that the socket 3761 * is first bound before allowing data to be sent. 3762 */ 3763 if (udp->udp_state == TS_UNBND) { 3764 error = EPROTO; 3765 goto ud_error2; 3766 } 3767 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3768 addrlen = tudr->DEST_length; 3769 3770 switch (connp->conn_family) { 3771 case AF_INET6: 3772 sin6 = (sin6_t *)addr; 3773 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3774 (sin6->sin6_family != AF_INET6)) { 3775 error = EADDRNOTAVAIL; 3776 goto ud_error2; 3777 } 3778 3779 srcid = sin6->__sin6_src_id; 3780 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3781 /* 3782 * Destination is a non-IPv4-compatible IPv6 address. 3783 * Send out an IPv6 format packet. 3784 */ 3785 3786 /* 3787 * If the local address is a mapped address return 3788 * an error. 3789 * It would be possible to send an IPv6 packet but the 3790 * response would never make it back to the application 3791 * since it is bound to a mapped address. 3792 */ 3793 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3794 error = EADDRNOTAVAIL; 3795 goto ud_error2; 3796 } 3797 3798 UDP_DBGSTAT(us, udp_out_ipv6); 3799 3800 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3801 sin6->sin6_addr = ipv6_loopback; 3802 ipversion = IPV6_VERSION; 3803 } else { 3804 if (connp->conn_ipv6_v6only) { 3805 error = EADDRNOTAVAIL; 3806 goto ud_error2; 3807 } 3808 3809 /* 3810 * If the local address is not zero or a mapped address 3811 * return an error. It would be possible to send an 3812 * IPv4 packet but the response would never make it 3813 * back to the application since it is bound to a 3814 * non-mapped address. 3815 */ 3816 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3817 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3818 error = EADDRNOTAVAIL; 3819 goto ud_error2; 3820 } 3821 UDP_DBGSTAT(us, udp_out_mapped); 3822 3823 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3824 V4_PART_OF_V6(sin6->sin6_addr) = 3825 htonl(INADDR_LOOPBACK); 3826 } 3827 ipversion = IPV4_VERSION; 3828 } 3829 3830 if (tudr->OPT_length != 0) { 3831 /* 3832 * If we are connected then the destination needs to be 3833 * the same as the connected one. 3834 */ 3835 if (udp->udp_state == TS_DATA_XFER && 3836 !conn_same_as_last_v6(connp, sin6)) { 3837 error = EISCONN; 3838 goto ud_error2; 3839 } 3840 UDP_STAT(us, udp_out_opt); 3841 error = udp_output_ancillary(connp, NULL, sin6, 3842 data_mp, mp, NULL, cr, pid); 3843 } else { 3844 ip_xmit_attr_t *ixa; 3845 3846 /* 3847 * We have to allocate an ip_xmit_attr_t before we grab 3848 * conn_lock and we need to hold conn_lock once we've 3849 * checked conn_same_as_last_v6 to handle concurrent 3850 * send* calls on a socket. 3851 */ 3852 ixa = conn_get_ixa(connp, B_FALSE); 3853 if (ixa == NULL) { 3854 error = ENOMEM; 3855 goto ud_error2; 3856 } 3857 mutex_enter(&connp->conn_lock); 3858 3859 if (conn_same_as_last_v6(connp, sin6) && 3860 connp->conn_lastsrcid == srcid && 3861 ipsec_outbound_policy_current(ixa)) { 3862 UDP_DBGSTAT(us, udp_out_lastdst); 3863 /* udp_output_lastdst drops conn_lock */ 3864 error = udp_output_lastdst(connp, data_mp, cr, 3865 pid, ixa); 3866 } else { 3867 UDP_DBGSTAT(us, udp_out_diffdst); 3868 /* udp_output_newdst drops conn_lock */ 3869 error = udp_output_newdst(connp, data_mp, NULL, 3870 sin6, ipversion, cr, pid, ixa); 3871 } 3872 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3873 } 3874 if (error == 0) { 3875 freeb(mp); 3876 return; 3877 } 3878 break; 3879 3880 case AF_INET: 3881 sin = (sin_t *)addr; 3882 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 3883 (sin->sin_family != AF_INET)) { 3884 error = EADDRNOTAVAIL; 3885 goto ud_error2; 3886 } 3887 UDP_DBGSTAT(us, udp_out_ipv4); 3888 if (sin->sin_addr.s_addr == INADDR_ANY) 3889 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 3890 ipversion = IPV4_VERSION; 3891 3892 srcid = 0; 3893 if (tudr->OPT_length != 0) { 3894 /* 3895 * If we are connected then the destination needs to be 3896 * the same as the connected one. 3897 */ 3898 if (udp->udp_state == TS_DATA_XFER && 3899 !conn_same_as_last_v4(connp, sin)) { 3900 error = EISCONN; 3901 goto ud_error2; 3902 } 3903 UDP_STAT(us, udp_out_opt); 3904 error = udp_output_ancillary(connp, sin, NULL, 3905 data_mp, mp, NULL, cr, pid); 3906 } else { 3907 ip_xmit_attr_t *ixa; 3908 3909 /* 3910 * We have to allocate an ip_xmit_attr_t before we grab 3911 * conn_lock and we need to hold conn_lock once we've 3912 * checked conn_same_as_last_v4 to handle concurrent 3913 * send* calls on a socket. 3914 */ 3915 ixa = conn_get_ixa(connp, B_FALSE); 3916 if (ixa == NULL) { 3917 error = ENOMEM; 3918 goto ud_error2; 3919 } 3920 mutex_enter(&connp->conn_lock); 3921 3922 if (conn_same_as_last_v4(connp, sin) && 3923 ipsec_outbound_policy_current(ixa)) { 3924 UDP_DBGSTAT(us, udp_out_lastdst); 3925 /* udp_output_lastdst drops conn_lock */ 3926 error = udp_output_lastdst(connp, data_mp, cr, 3927 pid, ixa); 3928 } else { 3929 UDP_DBGSTAT(us, udp_out_diffdst); 3930 /* udp_output_newdst drops conn_lock */ 3931 error = udp_output_newdst(connp, data_mp, sin, 3932 NULL, ipversion, cr, pid, ixa); 3933 } 3934 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 3935 } 3936 if (error == 0) { 3937 freeb(mp); 3938 return; 3939 } 3940 break; 3941 } 3942 UDP_STAT(us, udp_out_err_output); 3943 ASSERT(mp != NULL); 3944 /* mp is freed by the following routine */ 3945 udp_ud_err(q, mp, (t_scalar_t)error); 3946 return; 3947 3948 ud_error2: 3949 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3950 freemsg(data_mp); 3951 UDP_STAT(us, udp_out_err_output); 3952 ASSERT(mp != NULL); 3953 /* mp is freed by the following routine */ 3954 udp_ud_err(q, mp, (t_scalar_t)error); 3955 } 3956 3957 /* 3958 * Handle the case of the IP address, port, flow label being different 3959 * for both IPv4 and IPv6. 3960 * 3961 * NOTE: The caller must hold conn_lock and we drop it here. 3962 */ 3963 static int 3964 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 3965 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 3966 { 3967 uint_t srcid; 3968 uint32_t flowinfo; 3969 udp_t *udp = connp->conn_udp; 3970 int error = 0; 3971 ip_xmit_attr_t *oldixa; 3972 udp_stack_t *us = udp->udp_us; 3973 in6_addr_t v6src; 3974 in6_addr_t v6dst; 3975 in6_addr_t v6nexthop; 3976 in_port_t dstport; 3977 3978 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3979 ASSERT(ixa != NULL); 3980 /* 3981 * We hold conn_lock across all the use and modifications of 3982 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 3983 * stay consistent. 3984 */ 3985 3986 ASSERT(cr != NULL); 3987 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3988 ixa->ixa_cred = cr; 3989 ixa->ixa_cpid = pid; 3990 if (is_system_labeled()) { 3991 /* We need to restart with a label based on the cred */ 3992 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3993 } 3994 3995 /* 3996 * If we are connected then the destination needs to be the 3997 * same as the connected one, which is not the case here since we 3998 * checked for that above. 3999 */ 4000 if (udp->udp_state == TS_DATA_XFER) { 4001 mutex_exit(&connp->conn_lock); 4002 error = EISCONN; 4003 goto ud_error; 4004 } 4005 4006 /* In case previous destination was multicast or multirt */ 4007 ip_attr_newdst(ixa); 4008 4009 /* 4010 * If laddr is unspecified then we look at sin6_src_id. 4011 * We will give precedence to a source address set with IPV6_PKTINFO 4012 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4013 * want ip_attr_connect to select a source (since it can fail) when 4014 * IPV6_PKTINFO is specified. 4015 * If this doesn't result in a source address then we get a source 4016 * from ip_attr_connect() below. 4017 */ 4018 v6src = connp->conn_saddr_v6; 4019 if (sin != NULL) { 4020 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4021 dstport = sin->sin_port; 4022 flowinfo = 0; 4023 srcid = 0; 4024 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4025 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4026 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4027 connp->conn_netstack); 4028 } 4029 ixa->ixa_flags |= IXAF_IS_IPV4; 4030 } else { 4031 v6dst = sin6->sin6_addr; 4032 dstport = sin6->sin6_port; 4033 flowinfo = sin6->sin6_flowinfo; 4034 srcid = sin6->__sin6_src_id; 4035 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4036 ixa->ixa_scopeid = sin6->sin6_scope_id; 4037 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4038 } else { 4039 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4040 } 4041 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4042 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4043 connp->conn_netstack); 4044 } 4045 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4046 ixa->ixa_flags |= IXAF_IS_IPV4; 4047 else 4048 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4049 } 4050 /* Handle IP_PKTINFO/IPV6_PKTINFO setting source address. */ 4051 if (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR) { 4052 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4053 4054 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4055 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4056 v6src = ipp->ipp_addr; 4057 } else { 4058 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4059 v6src = ipp->ipp_addr; 4060 } 4061 } 4062 4063 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4064 mutex_exit(&connp->conn_lock); 4065 4066 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4067 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4068 switch (error) { 4069 case 0: 4070 break; 4071 case EADDRNOTAVAIL: 4072 /* 4073 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4074 * Don't have the application see that errno 4075 */ 4076 error = ENETUNREACH; 4077 goto failed; 4078 case ENETDOWN: 4079 /* 4080 * Have !ipif_addr_ready address; drop packet silently 4081 * until we can get applications to not send until we 4082 * are ready. 4083 */ 4084 error = 0; 4085 goto failed; 4086 case EHOSTUNREACH: 4087 case ENETUNREACH: 4088 if (ixa->ixa_ire != NULL) { 4089 /* 4090 * Let conn_ip_output/ire_send_noroute return 4091 * the error and send any local ICMP error. 4092 */ 4093 error = 0; 4094 break; 4095 } 4096 /* FALLTHRU */ 4097 failed: 4098 default: 4099 goto ud_error; 4100 } 4101 4102 4103 /* 4104 * Cluster note: we let the cluster hook know that we are sending to a 4105 * new address and/or port. 4106 */ 4107 if (cl_inet_connect2 != NULL) { 4108 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4109 if (error != 0) { 4110 error = EHOSTUNREACH; 4111 goto ud_error; 4112 } 4113 } 4114 4115 mutex_enter(&connp->conn_lock); 4116 /* 4117 * While we dropped the lock some other thread might have connected 4118 * this socket. If so we bail out with EISCONN to ensure that the 4119 * connecting thread is the one that updates conn_ixa, conn_ht_* 4120 * and conn_*last*. 4121 */ 4122 if (udp->udp_state == TS_DATA_XFER) { 4123 mutex_exit(&connp->conn_lock); 4124 error = EISCONN; 4125 goto ud_error; 4126 } 4127 4128 /* 4129 * We need to rebuild the headers if 4130 * - we are labeling packets (could be different for different 4131 * destinations) 4132 * - we have a source route (or routing header) since we need to 4133 * massage that to get the pseudo-header checksum 4134 * - the IP version is different than the last time 4135 * - a socket option with COA_HEADER_CHANGED has been set which 4136 * set conn_v6lastdst to zero. 4137 * 4138 * Otherwise the prepend function will just update the src, dst, 4139 * dstport, and flow label. 4140 */ 4141 if (is_system_labeled()) { 4142 /* TX MLP requires SCM_UCRED and don't have that here */ 4143 if (connp->conn_mlp_type != mlptSingle) { 4144 mutex_exit(&connp->conn_lock); 4145 error = ECONNREFUSED; 4146 goto ud_error; 4147 } 4148 /* 4149 * Check whether Trusted Solaris policy allows communication 4150 * with this host, and pretend that the destination is 4151 * unreachable if not. 4152 * Compute any needed label and place it in ipp_label_v4/v6. 4153 * 4154 * Later conn_build_hdr_template/conn_prepend_hdr takes 4155 * ipp_label_v4/v6 to form the packet. 4156 * 4157 * Tsol note: Since we hold conn_lock we know no other 4158 * thread manipulates conn_xmit_ipp. 4159 */ 4160 error = conn_update_label(connp, ixa, &v6dst, 4161 &connp->conn_xmit_ipp); 4162 if (error != 0) { 4163 mutex_exit(&connp->conn_lock); 4164 goto ud_error; 4165 } 4166 /* Rebuild the header template */ 4167 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4168 flowinfo); 4169 if (error != 0) { 4170 mutex_exit(&connp->conn_lock); 4171 goto ud_error; 4172 } 4173 } else if ((connp->conn_xmit_ipp.ipp_fields & 4174 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4175 ipversion != connp->conn_lastipversion || 4176 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4177 /* Rebuild the header template */ 4178 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4179 flowinfo); 4180 if (error != 0) { 4181 mutex_exit(&connp->conn_lock); 4182 goto ud_error; 4183 } 4184 } else { 4185 /* Simply update the destination address if no source route */ 4186 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4187 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4188 4189 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4190 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4191 ipha->ipha_fragment_offset_and_flags |= 4192 IPH_DF_HTONS; 4193 } else { 4194 ipha->ipha_fragment_offset_and_flags &= 4195 ~IPH_DF_HTONS; 4196 } 4197 } else { 4198 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4199 ip6h->ip6_dst = v6dst; 4200 } 4201 } 4202 4203 /* 4204 * Remember the dst/dstport etc which corresponds to the built header 4205 * template and conn_ixa. 4206 */ 4207 oldixa = conn_replace_ixa(connp, ixa); 4208 connp->conn_v6lastdst = v6dst; 4209 connp->conn_lastipversion = ipversion; 4210 connp->conn_lastdstport = dstport; 4211 connp->conn_lastflowinfo = flowinfo; 4212 connp->conn_lastscopeid = ixa->ixa_scopeid; 4213 connp->conn_lastsrcid = srcid; 4214 /* Also remember a source to use together with lastdst */ 4215 connp->conn_v6lastsrc = v6src; 4216 4217 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4218 dstport, flowinfo, &error); 4219 4220 /* Done with conn_t */ 4221 mutex_exit(&connp->conn_lock); 4222 ixa_refrele(oldixa); 4223 4224 if (data_mp == NULL) { 4225 ASSERT(error != 0); 4226 goto ud_error; 4227 } 4228 4229 /* We're done. Pass the packet to ip. */ 4230 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4231 4232 DTRACE_UDP5(send, mblk_t *, NULL, ip_xmit_attr_t *, ixa, 4233 void_ip_t *, data_mp->b_rptr, udp_t *, udp, udpha_t *, 4234 &data_mp->b_rptr[ixa->ixa_ip_hdr_length]); 4235 4236 error = conn_ip_output(data_mp, ixa); 4237 /* No udpOutErrors if an error since IP increases its error counter */ 4238 switch (error) { 4239 case 0: 4240 break; 4241 case EWOULDBLOCK: 4242 (void) ixa_check_drain_insert(connp, ixa); 4243 error = 0; 4244 break; 4245 case EADDRNOTAVAIL: 4246 /* 4247 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4248 * Don't have the application see that errno 4249 */ 4250 error = ENETUNREACH; 4251 /* FALLTHRU */ 4252 default: 4253 mutex_enter(&connp->conn_lock); 4254 /* 4255 * Clear the source and v6lastdst so we call ip_attr_connect 4256 * for the next packet and try to pick a better source. 4257 */ 4258 if (connp->conn_mcbc_bind) 4259 connp->conn_saddr_v6 = ipv6_all_zeros; 4260 else 4261 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4262 connp->conn_v6lastdst = ipv6_all_zeros; 4263 mutex_exit(&connp->conn_lock); 4264 break; 4265 } 4266 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4267 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4268 ixa->ixa_cpid = connp->conn_cpid; 4269 ixa_refrele(ixa); 4270 return (error); 4271 4272 ud_error: 4273 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4274 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4275 ixa->ixa_cpid = connp->conn_cpid; 4276 ixa_refrele(ixa); 4277 4278 freemsg(data_mp); 4279 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4280 UDP_STAT(us, udp_out_err_output); 4281 return (error); 4282 } 4283 4284 /* ARGSUSED */ 4285 static void 4286 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4287 { 4288 #ifdef DEBUG 4289 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4290 #endif 4291 freemsg(mp); 4292 } 4293 4294 4295 /* 4296 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4297 */ 4298 static void 4299 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4300 { 4301 void *data; 4302 mblk_t *datamp = mp->b_cont; 4303 conn_t *connp = Q_TO_CONN(q); 4304 udp_t *udp = connp->conn_udp; 4305 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4306 4307 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4308 cmdp->cb_error = EPROTO; 4309 qreply(q, mp); 4310 return; 4311 } 4312 data = datamp->b_rptr; 4313 4314 mutex_enter(&connp->conn_lock); 4315 switch (cmdp->cb_cmd) { 4316 case TI_GETPEERNAME: 4317 if (udp->udp_state != TS_DATA_XFER) 4318 cmdp->cb_error = ENOTCONN; 4319 else 4320 cmdp->cb_error = conn_getpeername(connp, data, 4321 &cmdp->cb_len); 4322 break; 4323 case TI_GETMYNAME: 4324 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4325 break; 4326 default: 4327 cmdp->cb_error = EINVAL; 4328 break; 4329 } 4330 mutex_exit(&connp->conn_lock); 4331 4332 qreply(q, mp); 4333 } 4334 4335 static void 4336 udp_use_pure_tpi(udp_t *udp) 4337 { 4338 conn_t *connp = udp->udp_connp; 4339 4340 mutex_enter(&connp->conn_lock); 4341 udp->udp_issocket = B_FALSE; 4342 mutex_exit(&connp->conn_lock); 4343 UDP_STAT(udp->udp_us, udp_sock_fallback); 4344 } 4345 4346 static void 4347 udp_wput_other(queue_t *q, mblk_t *mp) 4348 { 4349 uchar_t *rptr = mp->b_rptr; 4350 struct iocblk *iocp; 4351 conn_t *connp = Q_TO_CONN(q); 4352 udp_t *udp = connp->conn_udp; 4353 cred_t *cr; 4354 4355 switch (mp->b_datap->db_type) { 4356 case M_CMD: 4357 udp_wput_cmdblk(q, mp); 4358 return; 4359 4360 case M_PROTO: 4361 case M_PCPROTO: 4362 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4363 /* 4364 * If the message does not contain a PRIM_type, 4365 * throw it away. 4366 */ 4367 freemsg(mp); 4368 return; 4369 } 4370 switch (((t_primp_t)rptr)->type) { 4371 case T_ADDR_REQ: 4372 udp_addr_req(q, mp); 4373 return; 4374 case O_T_BIND_REQ: 4375 case T_BIND_REQ: 4376 udp_tpi_bind(q, mp); 4377 return; 4378 case T_CONN_REQ: 4379 udp_tpi_connect(q, mp); 4380 return; 4381 case T_CAPABILITY_REQ: 4382 udp_capability_req(q, mp); 4383 return; 4384 case T_INFO_REQ: 4385 udp_info_req(q, mp); 4386 return; 4387 case T_UNITDATA_REQ: 4388 /* 4389 * If a T_UNITDATA_REQ gets here, the address must 4390 * be bad. Valid T_UNITDATA_REQs are handled 4391 * in udp_wput. 4392 */ 4393 udp_ud_err(q, mp, EADDRNOTAVAIL); 4394 return; 4395 case T_UNBIND_REQ: 4396 udp_tpi_unbind(q, mp); 4397 return; 4398 case T_SVR4_OPTMGMT_REQ: 4399 /* 4400 * All Solaris components should pass a db_credp 4401 * for this TPI message, hence we ASSERT. 4402 * But in case there is some other M_PROTO that looks 4403 * like a TPI message sent by some other kernel 4404 * component, we check and return an error. 4405 */ 4406 cr = msg_getcred(mp, NULL); 4407 ASSERT(cr != NULL); 4408 if (cr == NULL) { 4409 udp_err_ack(q, mp, TSYSERR, EINVAL); 4410 return; 4411 } 4412 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4413 cr)) { 4414 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4415 } 4416 return; 4417 4418 case T_OPTMGMT_REQ: 4419 /* 4420 * All Solaris components should pass a db_credp 4421 * for this TPI message, hence we ASSERT. 4422 * But in case there is some other M_PROTO that looks 4423 * like a TPI message sent by some other kernel 4424 * component, we check and return an error. 4425 */ 4426 cr = msg_getcred(mp, NULL); 4427 ASSERT(cr != NULL); 4428 if (cr == NULL) { 4429 udp_err_ack(q, mp, TSYSERR, EINVAL); 4430 return; 4431 } 4432 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4433 return; 4434 4435 case T_DISCON_REQ: 4436 udp_tpi_disconnect(q, mp); 4437 return; 4438 4439 /* The following TPI message is not supported by udp. */ 4440 case O_T_CONN_RES: 4441 case T_CONN_RES: 4442 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4443 return; 4444 4445 /* The following 3 TPI requests are illegal for udp. */ 4446 case T_DATA_REQ: 4447 case T_EXDATA_REQ: 4448 case T_ORDREL_REQ: 4449 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4450 return; 4451 default: 4452 break; 4453 } 4454 break; 4455 case M_FLUSH: 4456 if (*rptr & FLUSHW) 4457 flushq(q, FLUSHDATA); 4458 break; 4459 case M_IOCTL: 4460 iocp = (struct iocblk *)mp->b_rptr; 4461 switch (iocp->ioc_cmd) { 4462 case TI_GETPEERNAME: 4463 if (udp->udp_state != TS_DATA_XFER) { 4464 /* 4465 * If a default destination address has not 4466 * been associated with the stream, then we 4467 * don't know the peer's name. 4468 */ 4469 iocp->ioc_error = ENOTCONN; 4470 iocp->ioc_count = 0; 4471 mp->b_datap->db_type = M_IOCACK; 4472 qreply(q, mp); 4473 return; 4474 } 4475 /* FALLTHRU */ 4476 case TI_GETMYNAME: 4477 /* 4478 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4479 * need to copyin the user's strbuf structure. 4480 * Processing will continue in the M_IOCDATA case 4481 * below. 4482 */ 4483 mi_copyin(q, mp, NULL, 4484 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4485 return; 4486 case _SIOCSOCKFALLBACK: 4487 /* 4488 * Either sockmod is about to be popped and the 4489 * socket would now be treated as a plain stream, 4490 * or a module is about to be pushed so we have 4491 * to follow pure TPI semantics. 4492 */ 4493 if (!udp->udp_issocket) { 4494 DB_TYPE(mp) = M_IOCNAK; 4495 iocp->ioc_error = EINVAL; 4496 } else { 4497 udp_use_pure_tpi(udp); 4498 4499 DB_TYPE(mp) = M_IOCACK; 4500 iocp->ioc_error = 0; 4501 } 4502 iocp->ioc_count = 0; 4503 iocp->ioc_rval = 0; 4504 qreply(q, mp); 4505 return; 4506 default: 4507 break; 4508 } 4509 break; 4510 case M_IOCDATA: 4511 udp_wput_iocdata(q, mp); 4512 return; 4513 default: 4514 /* Unrecognized messages are passed through without change. */ 4515 break; 4516 } 4517 ip_wput_nondata(q, mp); 4518 } 4519 4520 /* 4521 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4522 * messages. 4523 */ 4524 static void 4525 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4526 { 4527 mblk_t *mp1; 4528 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4529 STRUCT_HANDLE(strbuf, sb); 4530 uint_t addrlen; 4531 conn_t *connp = Q_TO_CONN(q); 4532 udp_t *udp = connp->conn_udp; 4533 4534 /* Make sure it is one of ours. */ 4535 switch (iocp->ioc_cmd) { 4536 case TI_GETMYNAME: 4537 case TI_GETPEERNAME: 4538 break; 4539 default: 4540 ip_wput_nondata(q, mp); 4541 return; 4542 } 4543 4544 switch (mi_copy_state(q, mp, &mp1)) { 4545 case -1: 4546 return; 4547 case MI_COPY_CASE(MI_COPY_IN, 1): 4548 break; 4549 case MI_COPY_CASE(MI_COPY_OUT, 1): 4550 /* 4551 * The address has been copied out, so now 4552 * copyout the strbuf. 4553 */ 4554 mi_copyout(q, mp); 4555 return; 4556 case MI_COPY_CASE(MI_COPY_OUT, 2): 4557 /* 4558 * The address and strbuf have been copied out. 4559 * We're done, so just acknowledge the original 4560 * M_IOCTL. 4561 */ 4562 mi_copy_done(q, mp, 0); 4563 return; 4564 default: 4565 /* 4566 * Something strange has happened, so acknowledge 4567 * the original M_IOCTL with an EPROTO error. 4568 */ 4569 mi_copy_done(q, mp, EPROTO); 4570 return; 4571 } 4572 4573 /* 4574 * Now we have the strbuf structure for TI_GETMYNAME 4575 * and TI_GETPEERNAME. Next we copyout the requested 4576 * address and then we'll copyout the strbuf. 4577 */ 4578 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4579 4580 if (connp->conn_family == AF_INET) 4581 addrlen = sizeof (sin_t); 4582 else 4583 addrlen = sizeof (sin6_t); 4584 4585 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4586 mi_copy_done(q, mp, EINVAL); 4587 return; 4588 } 4589 4590 switch (iocp->ioc_cmd) { 4591 case TI_GETMYNAME: 4592 break; 4593 case TI_GETPEERNAME: 4594 if (udp->udp_state != TS_DATA_XFER) { 4595 mi_copy_done(q, mp, ENOTCONN); 4596 return; 4597 } 4598 break; 4599 } 4600 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4601 if (!mp1) 4602 return; 4603 4604 STRUCT_FSET(sb, len, addrlen); 4605 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4606 case TI_GETMYNAME: 4607 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4608 &addrlen); 4609 break; 4610 case TI_GETPEERNAME: 4611 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4612 &addrlen); 4613 break; 4614 } 4615 mp1->b_wptr += addrlen; 4616 /* Copy out the address */ 4617 mi_copyout(q, mp); 4618 } 4619 4620 void 4621 udp_ddi_g_init(void) 4622 { 4623 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4624 udp_opt_obj.odb_opt_arr_cnt); 4625 4626 /* 4627 * We want to be informed each time a stack is created or 4628 * destroyed in the kernel, so we can maintain the 4629 * set of udp_stack_t's. 4630 */ 4631 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4632 } 4633 4634 void 4635 udp_ddi_g_destroy(void) 4636 { 4637 netstack_unregister(NS_UDP); 4638 } 4639 4640 #define INET_NAME "ip" 4641 4642 /* 4643 * Initialize the UDP stack instance. 4644 */ 4645 static void * 4646 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4647 { 4648 udp_stack_t *us; 4649 int i; 4650 int error = 0; 4651 major_t major; 4652 size_t arrsz; 4653 4654 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4655 us->us_netstack = ns; 4656 4657 mutex_init(&us->us_epriv_port_lock, NULL, MUTEX_DEFAULT, NULL); 4658 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4659 us->us_epriv_ports[0] = ULP_DEF_EPRIV_PORT1; 4660 us->us_epriv_ports[1] = ULP_DEF_EPRIV_PORT2; 4661 4662 /* 4663 * The smallest anonymous port in the priviledged port range which UDP 4664 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4665 */ 4666 us->us_min_anonpriv_port = 512; 4667 4668 us->us_bind_fanout_size = udp_bind_fanout_size; 4669 4670 /* Roundup variable that might have been modified in /etc/system */ 4671 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4672 /* Not a power of two. Round up to nearest power of two */ 4673 for (i = 0; i < 31; i++) { 4674 if (us->us_bind_fanout_size < (1 << i)) 4675 break; 4676 } 4677 us->us_bind_fanout_size = 1 << i; 4678 } 4679 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4680 sizeof (udp_fanout_t), KM_SLEEP); 4681 for (i = 0; i < us->us_bind_fanout_size; i++) { 4682 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4683 NULL); 4684 } 4685 4686 arrsz = udp_propinfo_count * sizeof (mod_prop_info_t); 4687 us->us_propinfo_tbl = (mod_prop_info_t *)kmem_alloc(arrsz, 4688 KM_SLEEP); 4689 bcopy(udp_propinfo_tbl, us->us_propinfo_tbl, arrsz); 4690 4691 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4692 us->us_mibkp = udp_kstat_init(stackid); 4693 4694 major = mod_name_to_major(INET_NAME); 4695 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4696 ASSERT(error == 0); 4697 return (us); 4698 } 4699 4700 /* 4701 * Free the UDP stack instance. 4702 */ 4703 static void 4704 udp_stack_fini(netstackid_t stackid, void *arg) 4705 { 4706 udp_stack_t *us = (udp_stack_t *)arg; 4707 int i; 4708 4709 for (i = 0; i < us->us_bind_fanout_size; i++) { 4710 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4711 } 4712 4713 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4714 sizeof (udp_fanout_t)); 4715 4716 us->us_bind_fanout = NULL; 4717 4718 kmem_free(us->us_propinfo_tbl, 4719 udp_propinfo_count * sizeof (mod_prop_info_t)); 4720 us->us_propinfo_tbl = NULL; 4721 4722 udp_kstat_fini(stackid, us->us_mibkp); 4723 us->us_mibkp = NULL; 4724 4725 udp_kstat2_fini(stackid, us->us_kstat); 4726 us->us_kstat = NULL; 4727 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4728 4729 mutex_destroy(&us->us_epriv_port_lock); 4730 ldi_ident_release(us->us_ldi_ident); 4731 kmem_free(us, sizeof (*us)); 4732 } 4733 4734 static void * 4735 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4736 { 4737 kstat_t *ksp; 4738 4739 udp_stat_t template = { 4740 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4741 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4742 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4743 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4744 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4745 #ifdef DEBUG 4746 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4747 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4748 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4749 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4750 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4751 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4752 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4753 #endif 4754 }; 4755 4756 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4757 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4758 KSTAT_FLAG_VIRTUAL, stackid); 4759 4760 if (ksp == NULL) 4761 return (NULL); 4762 4763 bcopy(&template, us_statisticsp, sizeof (template)); 4764 ksp->ks_data = (void *)us_statisticsp; 4765 ksp->ks_private = (void *)(uintptr_t)stackid; 4766 4767 kstat_install(ksp); 4768 return (ksp); 4769 } 4770 4771 static void 4772 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4773 { 4774 if (ksp != NULL) { 4775 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4776 kstat_delete_netstack(ksp, stackid); 4777 } 4778 } 4779 4780 static void * 4781 udp_kstat_init(netstackid_t stackid) 4782 { 4783 kstat_t *ksp; 4784 4785 udp_named_kstat_t template = { 4786 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4787 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4788 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4789 { "entrySize", KSTAT_DATA_INT32, 0 }, 4790 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4791 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4792 }; 4793 4794 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4795 KSTAT_TYPE_NAMED, 4796 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4797 4798 if (ksp == NULL || ksp->ks_data == NULL) 4799 return (NULL); 4800 4801 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4802 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4803 4804 bcopy(&template, ksp->ks_data, sizeof (template)); 4805 ksp->ks_update = udp_kstat_update; 4806 ksp->ks_private = (void *)(uintptr_t)stackid; 4807 4808 kstat_install(ksp); 4809 return (ksp); 4810 } 4811 4812 static void 4813 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4814 { 4815 if (ksp != NULL) { 4816 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4817 kstat_delete_netstack(ksp, stackid); 4818 } 4819 } 4820 4821 static int 4822 udp_kstat_update(kstat_t *kp, int rw) 4823 { 4824 udp_named_kstat_t *udpkp; 4825 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 4826 netstack_t *ns; 4827 udp_stack_t *us; 4828 4829 if ((kp == NULL) || (kp->ks_data == NULL)) 4830 return (EIO); 4831 4832 if (rw == KSTAT_WRITE) 4833 return (EACCES); 4834 4835 ns = netstack_find_by_stackid(stackid); 4836 if (ns == NULL) 4837 return (-1); 4838 us = ns->netstack_udp; 4839 if (us == NULL) { 4840 netstack_rele(ns); 4841 return (-1); 4842 } 4843 udpkp = (udp_named_kstat_t *)kp->ks_data; 4844 4845 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 4846 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 4847 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 4848 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 4849 netstack_rele(ns); 4850 return (0); 4851 } 4852 4853 static size_t 4854 udp_set_rcv_hiwat(udp_t *udp, size_t size) 4855 { 4856 udp_stack_t *us = udp->udp_us; 4857 4858 /* We add a bit of extra buffering */ 4859 size += size >> 1; 4860 if (size > us->us_max_buf) 4861 size = us->us_max_buf; 4862 4863 udp->udp_rcv_hiwat = size; 4864 return (size); 4865 } 4866 4867 /* 4868 * For the lower queue so that UDP can be a dummy mux. 4869 * Nobody should be sending 4870 * packets up this stream 4871 */ 4872 static void 4873 udp_lrput(queue_t *q, mblk_t *mp) 4874 { 4875 switch (mp->b_datap->db_type) { 4876 case M_FLUSH: 4877 /* Turn around */ 4878 if (*mp->b_rptr & FLUSHW) { 4879 *mp->b_rptr &= ~FLUSHR; 4880 qreply(q, mp); 4881 return; 4882 } 4883 break; 4884 } 4885 freemsg(mp); 4886 } 4887 4888 /* 4889 * For the lower queue so that UDP can be a dummy mux. 4890 * Nobody should be sending packets down this stream. 4891 */ 4892 /* ARGSUSED */ 4893 void 4894 udp_lwput(queue_t *q, mblk_t *mp) 4895 { 4896 freemsg(mp); 4897 } 4898 4899 /* 4900 * Below routines for UDP socket module. 4901 */ 4902 4903 static conn_t * 4904 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 4905 { 4906 udp_t *udp; 4907 conn_t *connp; 4908 zoneid_t zoneid; 4909 netstack_t *ns; 4910 udp_stack_t *us; 4911 int len; 4912 4913 ASSERT(errorp != NULL); 4914 4915 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 4916 return (NULL); 4917 4918 ns = netstack_find_by_cred(credp); 4919 ASSERT(ns != NULL); 4920 us = ns->netstack_udp; 4921 ASSERT(us != NULL); 4922 4923 /* 4924 * For exclusive stacks we set the zoneid to zero 4925 * to make UDP operate as if in the global zone. 4926 */ 4927 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 4928 zoneid = GLOBAL_ZONEID; 4929 else 4930 zoneid = crgetzoneid(credp); 4931 4932 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 4933 4934 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 4935 if (connp == NULL) { 4936 netstack_rele(ns); 4937 *errorp = ENOMEM; 4938 return (NULL); 4939 } 4940 udp = connp->conn_udp; 4941 4942 /* 4943 * ipcl_conn_create did a netstack_hold. Undo the hold that was 4944 * done by netstack_find_by_cred() 4945 */ 4946 netstack_rele(ns); 4947 4948 /* 4949 * Since this conn_t/udp_t is not yet visible to anybody else we don't 4950 * need to lock anything. 4951 */ 4952 ASSERT(connp->conn_proto == IPPROTO_UDP); 4953 ASSERT(connp->conn_udp == udp); 4954 ASSERT(udp->udp_connp == connp); 4955 4956 /* Set the initial state of the stream and the privilege status. */ 4957 udp->udp_state = TS_UNBND; 4958 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 4959 if (isv6) { 4960 connp->conn_family = AF_INET6; 4961 connp->conn_ipversion = IPV6_VERSION; 4962 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 4963 connp->conn_default_ttl = us->us_ipv6_hoplimit; 4964 len = sizeof (ip6_t) + UDPH_SIZE; 4965 } else { 4966 connp->conn_family = AF_INET; 4967 connp->conn_ipversion = IPV4_VERSION; 4968 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 4969 connp->conn_default_ttl = us->us_ipv4_ttl; 4970 len = sizeof (ipha_t) + UDPH_SIZE; 4971 } 4972 4973 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 4974 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 4975 4976 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 4977 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 4978 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 4979 connp->conn_ixa->ixa_zoneid = zoneid; 4980 4981 connp->conn_zoneid = zoneid; 4982 4983 /* 4984 * If the caller has the process-wide flag set, then default to MAC 4985 * exempt mode. This allows read-down to unlabeled hosts. 4986 */ 4987 if (getpflags(NET_MAC_AWARE, credp) != 0) 4988 connp->conn_mac_mode = CONN_MAC_AWARE; 4989 4990 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 4991 4992 udp->udp_us = us; 4993 4994 connp->conn_rcvbuf = us->us_recv_hiwat; 4995 connp->conn_sndbuf = us->us_xmit_hiwat; 4996 connp->conn_sndlowat = us->us_xmit_lowat; 4997 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 4998 4999 connp->conn_wroff = len + us->us_wroff_extra; 5000 connp->conn_so_type = SOCK_DGRAM; 5001 5002 connp->conn_recv = udp_input; 5003 connp->conn_recvicmp = udp_icmp_input; 5004 crhold(credp); 5005 connp->conn_cred = credp; 5006 connp->conn_cpid = curproc->p_pid; 5007 connp->conn_open_time = ddi_get_lbolt64(); 5008 /* Cache things in ixa without an extra refhold */ 5009 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 5010 connp->conn_ixa->ixa_cred = connp->conn_cred; 5011 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5012 if (is_system_labeled()) 5013 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5014 5015 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5016 5017 if (us->us_pmtu_discovery) 5018 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5019 5020 return (connp); 5021 } 5022 5023 sock_lower_handle_t 5024 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5025 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5026 { 5027 udp_t *udp = NULL; 5028 udp_stack_t *us; 5029 conn_t *connp; 5030 boolean_t isv6; 5031 5032 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5033 (proto != 0 && proto != IPPROTO_UDP)) { 5034 *errorp = EPROTONOSUPPORT; 5035 return (NULL); 5036 } 5037 5038 if (family == AF_INET6) 5039 isv6 = B_TRUE; 5040 else 5041 isv6 = B_FALSE; 5042 5043 connp = udp_do_open(credp, isv6, flags, errorp); 5044 if (connp == NULL) 5045 return (NULL); 5046 5047 udp = connp->conn_udp; 5048 ASSERT(udp != NULL); 5049 us = udp->udp_us; 5050 ASSERT(us != NULL); 5051 5052 udp->udp_issocket = B_TRUE; 5053 connp->conn_flags |= IPCL_NONSTR; 5054 5055 /* 5056 * Set flow control 5057 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5058 * need to lock anything. 5059 */ 5060 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5061 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5062 5063 connp->conn_flow_cntrld = B_FALSE; 5064 5065 mutex_enter(&connp->conn_lock); 5066 connp->conn_state_flags &= ~CONN_INCIPIENT; 5067 mutex_exit(&connp->conn_lock); 5068 5069 *errorp = 0; 5070 *smodep = SM_ATOMIC; 5071 *sock_downcalls = &sock_udp_downcalls; 5072 return ((sock_lower_handle_t)connp); 5073 } 5074 5075 /* ARGSUSED3 */ 5076 void 5077 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5078 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5079 { 5080 conn_t *connp = (conn_t *)proto_handle; 5081 struct sock_proto_props sopp; 5082 5083 /* All Solaris components should pass a cred for this operation. */ 5084 ASSERT(cr != NULL); 5085 5086 connp->conn_upcalls = sock_upcalls; 5087 connp->conn_upper_handle = sock_handle; 5088 5089 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5090 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5091 sopp.sopp_wroff = connp->conn_wroff; 5092 sopp.sopp_maxblk = INFPSZ; 5093 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5094 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5095 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5096 sopp.sopp_maxpsz = 5097 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5098 UDP_MAXPACKET_IPV6; 5099 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5100 udp_mod_info.mi_minpsz; 5101 5102 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5103 &sopp); 5104 } 5105 5106 static void 5107 udp_do_close(conn_t *connp) 5108 { 5109 udp_t *udp; 5110 5111 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5112 udp = connp->conn_udp; 5113 5114 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5115 /* 5116 * Running in cluster mode - register unbind information 5117 */ 5118 if (connp->conn_ipversion == IPV4_VERSION) { 5119 (*cl_inet_unbind)( 5120 connp->conn_netstack->netstack_stackid, 5121 IPPROTO_UDP, AF_INET, 5122 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5123 (in_port_t)connp->conn_lport, NULL); 5124 } else { 5125 (*cl_inet_unbind)( 5126 connp->conn_netstack->netstack_stackid, 5127 IPPROTO_UDP, AF_INET6, 5128 (uint8_t *)&(connp->conn_laddr_v6), 5129 (in_port_t)connp->conn_lport, NULL); 5130 } 5131 } 5132 5133 udp_bind_hash_remove(udp, B_FALSE); 5134 5135 ip_quiesce_conn(connp); 5136 5137 if (!IPCL_IS_NONSTR(connp)) { 5138 ASSERT(connp->conn_wq != NULL); 5139 ASSERT(connp->conn_rq != NULL); 5140 qprocsoff(connp->conn_rq); 5141 } 5142 5143 udp_close_free(connp); 5144 5145 /* 5146 * Now we are truly single threaded on this stream, and can 5147 * delete the things hanging off the connp, and finally the connp. 5148 * We removed this connp from the fanout list, it cannot be 5149 * accessed thru the fanouts, and we already waited for the 5150 * conn_ref to drop to 0. We are already in close, so 5151 * there cannot be any other thread from the top. qprocsoff 5152 * has completed, and service has completed or won't run in 5153 * future. 5154 */ 5155 ASSERT(connp->conn_ref == 1); 5156 5157 if (!IPCL_IS_NONSTR(connp)) { 5158 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5159 } else { 5160 ip_free_helper_stream(connp); 5161 } 5162 5163 connp->conn_ref--; 5164 ipcl_conn_destroy(connp); 5165 } 5166 5167 /* ARGSUSED1 */ 5168 int 5169 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5170 { 5171 conn_t *connp = (conn_t *)proto_handle; 5172 5173 /* All Solaris components should pass a cred for this operation. */ 5174 ASSERT(cr != NULL); 5175 5176 udp_do_close(connp); 5177 return (0); 5178 } 5179 5180 static int 5181 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5182 boolean_t bind_to_req_port_only) 5183 { 5184 sin_t *sin; 5185 sin6_t *sin6; 5186 udp_t *udp = connp->conn_udp; 5187 int error = 0; 5188 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5189 in_port_t port; /* Host byte order */ 5190 in_port_t requested_port; /* Host byte order */ 5191 int count; 5192 ipaddr_t v4src; /* Set if AF_INET */ 5193 in6_addr_t v6src; 5194 int loopmax; 5195 udp_fanout_t *udpf; 5196 in_port_t lport; /* Network byte order */ 5197 uint_t scopeid = 0; 5198 zoneid_t zoneid = IPCL_ZONEID(connp); 5199 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5200 boolean_t is_inaddr_any; 5201 mlp_type_t addrtype, mlptype; 5202 udp_stack_t *us = udp->udp_us; 5203 5204 switch (len) { 5205 case sizeof (sin_t): /* Complete IPv4 address */ 5206 sin = (sin_t *)sa; 5207 5208 if (sin == NULL || !OK_32PTR((char *)sin)) 5209 return (EINVAL); 5210 5211 if (connp->conn_family != AF_INET || 5212 sin->sin_family != AF_INET) { 5213 return (EAFNOSUPPORT); 5214 } 5215 v4src = sin->sin_addr.s_addr; 5216 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5217 if (v4src != INADDR_ANY) { 5218 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5219 B_TRUE); 5220 } 5221 port = ntohs(sin->sin_port); 5222 break; 5223 5224 case sizeof (sin6_t): /* complete IPv6 address */ 5225 sin6 = (sin6_t *)sa; 5226 5227 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5228 return (EINVAL); 5229 5230 if (connp->conn_family != AF_INET6 || 5231 sin6->sin6_family != AF_INET6) { 5232 return (EAFNOSUPPORT); 5233 } 5234 v6src = sin6->sin6_addr; 5235 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5236 if (connp->conn_ipv6_v6only) 5237 return (EADDRNOTAVAIL); 5238 5239 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5240 if (v4src != INADDR_ANY) { 5241 laddr_type = ip_laddr_verify_v4(v4src, 5242 zoneid, ipst, B_FALSE); 5243 } 5244 } else { 5245 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5246 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5247 scopeid = sin6->sin6_scope_id; 5248 laddr_type = ip_laddr_verify_v6(&v6src, 5249 zoneid, ipst, B_TRUE, scopeid); 5250 } 5251 } 5252 port = ntohs(sin6->sin6_port); 5253 break; 5254 5255 default: /* Invalid request */ 5256 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5257 "udp_bind: bad ADDR_length length %u", len); 5258 return (-TBADADDR); 5259 } 5260 5261 /* Is the local address a valid unicast, multicast, or broadcast? */ 5262 if (laddr_type == IPVL_BAD) 5263 return (EADDRNOTAVAIL); 5264 5265 requested_port = port; 5266 5267 if (requested_port == 0 || !bind_to_req_port_only) 5268 bind_to_req_port_only = B_FALSE; 5269 else /* T_BIND_REQ and requested_port != 0 */ 5270 bind_to_req_port_only = B_TRUE; 5271 5272 if (requested_port == 0) { 5273 /* 5274 * If the application passed in zero for the port number, it 5275 * doesn't care which port number we bind to. Get one in the 5276 * valid range. 5277 */ 5278 if (connp->conn_anon_priv_bind) { 5279 port = udp_get_next_priv_port(udp); 5280 } else { 5281 port = udp_update_next_port(udp, 5282 us->us_next_port_to_try, B_TRUE); 5283 } 5284 } else { 5285 /* 5286 * If the port is in the well-known privileged range, 5287 * make sure the caller was privileged. 5288 */ 5289 int i; 5290 boolean_t priv = B_FALSE; 5291 5292 if (port < us->us_smallest_nonpriv_port) { 5293 priv = B_TRUE; 5294 } else { 5295 for (i = 0; i < us->us_num_epriv_ports; i++) { 5296 if (port == us->us_epriv_ports[i]) { 5297 priv = B_TRUE; 5298 break; 5299 } 5300 } 5301 } 5302 5303 if (priv) { 5304 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5305 return (-TACCES); 5306 } 5307 } 5308 5309 if (port == 0) 5310 return (-TNOADDR); 5311 5312 /* 5313 * The state must be TS_UNBND. TPI mandates that users must send 5314 * TPI primitives only 1 at a time and wait for the response before 5315 * sending the next primitive. 5316 */ 5317 mutex_enter(&connp->conn_lock); 5318 if (udp->udp_state != TS_UNBND) { 5319 mutex_exit(&connp->conn_lock); 5320 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5321 "udp_bind: bad state, %u", udp->udp_state); 5322 return (-TOUTSTATE); 5323 } 5324 /* 5325 * Copy the source address into our udp structure. This address 5326 * may still be zero; if so, IP will fill in the correct address 5327 * each time an outbound packet is passed to it. Since the udp is 5328 * not yet in the bind hash list, we don't grab the uf_lock to 5329 * change conn_ipversion 5330 */ 5331 if (connp->conn_family == AF_INET) { 5332 ASSERT(sin != NULL); 5333 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5334 } else { 5335 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5336 /* 5337 * no need to hold the uf_lock to set the conn_ipversion 5338 * since we are not yet in the fanout list 5339 */ 5340 connp->conn_ipversion = IPV4_VERSION; 5341 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5342 } else { 5343 connp->conn_ipversion = IPV6_VERSION; 5344 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5345 } 5346 } 5347 5348 /* 5349 * If conn_reuseaddr is not set, then we have to make sure that 5350 * the IP address and port number the application requested 5351 * (or we selected for the application) is not being used by 5352 * another stream. If another stream is already using the 5353 * requested IP address and port, the behavior depends on 5354 * "bind_to_req_port_only". If set the bind fails; otherwise we 5355 * search for any an unused port to bind to the stream. 5356 * 5357 * As per the BSD semantics, as modified by the Deering multicast 5358 * changes, if udp_reuseaddr is set, then we allow multiple binds 5359 * to the same port independent of the local IP address. 5360 * 5361 * This is slightly different than in SunOS 4.X which did not 5362 * support IP multicast. Note that the change implemented by the 5363 * Deering multicast code effects all binds - not only binding 5364 * to IP multicast addresses. 5365 * 5366 * Note that when binding to port zero we ignore SO_REUSEADDR in 5367 * order to guarantee a unique port. 5368 */ 5369 5370 count = 0; 5371 if (connp->conn_anon_priv_bind) { 5372 /* 5373 * loopmax = (IPPORT_RESERVED-1) - 5374 * us->us_min_anonpriv_port + 1 5375 */ 5376 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5377 } else { 5378 loopmax = us->us_largest_anon_port - 5379 us->us_smallest_anon_port + 1; 5380 } 5381 5382 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5383 5384 for (;;) { 5385 udp_t *udp1; 5386 boolean_t found_exclbind = B_FALSE; 5387 conn_t *connp1; 5388 5389 /* 5390 * Walk through the list of udp streams bound to 5391 * requested port with the same IP address. 5392 */ 5393 lport = htons(port); 5394 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5395 us->us_bind_fanout_size)]; 5396 mutex_enter(&udpf->uf_lock); 5397 for (udp1 = udpf->uf_udp; udp1 != NULL; 5398 udp1 = udp1->udp_bind_hash) { 5399 connp1 = udp1->udp_connp; 5400 5401 if (lport != connp1->conn_lport) 5402 continue; 5403 5404 /* 5405 * On a labeled system, we must treat bindings to ports 5406 * on shared IP addresses by sockets with MAC exemption 5407 * privilege as being in all zones, as there's 5408 * otherwise no way to identify the right receiver. 5409 */ 5410 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5411 continue; 5412 5413 /* 5414 * If UDP_EXCLBIND is set for either the bound or 5415 * binding endpoint, the semantics of bind 5416 * is changed according to the following chart. 5417 * 5418 * spec = specified address (v4 or v6) 5419 * unspec = unspecified address (v4 or v6) 5420 * A = specified addresses are different for endpoints 5421 * 5422 * bound bind to allowed? 5423 * ------------------------------------- 5424 * unspec unspec no 5425 * unspec spec no 5426 * spec unspec no 5427 * spec spec yes if A 5428 * 5429 * For labeled systems, SO_MAC_EXEMPT behaves the same 5430 * as UDP_EXCLBIND, except that zoneid is ignored. 5431 */ 5432 if (connp1->conn_exclbind || connp->conn_exclbind || 5433 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5434 if (V6_OR_V4_INADDR_ANY( 5435 connp1->conn_bound_addr_v6) || 5436 is_inaddr_any || 5437 IN6_ARE_ADDR_EQUAL( 5438 &connp1->conn_bound_addr_v6, 5439 &v6src)) { 5440 found_exclbind = B_TRUE; 5441 break; 5442 } 5443 continue; 5444 } 5445 5446 /* 5447 * Check ipversion to allow IPv4 and IPv6 sockets to 5448 * have disjoint port number spaces. 5449 */ 5450 if (connp->conn_ipversion != connp1->conn_ipversion) { 5451 5452 /* 5453 * On the first time through the loop, if the 5454 * the user intentionally specified a 5455 * particular port number, then ignore any 5456 * bindings of the other protocol that may 5457 * conflict. This allows the user to bind IPv6 5458 * alone and get both v4 and v6, or bind both 5459 * both and get each seperately. On subsequent 5460 * times through the loop, we're checking a 5461 * port that we chose (not the user) and thus 5462 * we do not allow casual duplicate bindings. 5463 */ 5464 if (count == 0 && requested_port != 0) 5465 continue; 5466 } 5467 5468 /* 5469 * No difference depending on SO_REUSEADDR. 5470 * 5471 * If existing port is bound to a 5472 * non-wildcard IP address and 5473 * the requesting stream is bound to 5474 * a distinct different IP addresses 5475 * (non-wildcard, also), keep going. 5476 */ 5477 if (!is_inaddr_any && 5478 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5479 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5480 &v6src)) { 5481 continue; 5482 } 5483 break; 5484 } 5485 5486 if (!found_exclbind && 5487 (connp->conn_reuseaddr && requested_port != 0)) { 5488 break; 5489 } 5490 5491 if (udp1 == NULL) { 5492 /* 5493 * No other stream has this IP address 5494 * and port number. We can use it. 5495 */ 5496 break; 5497 } 5498 mutex_exit(&udpf->uf_lock); 5499 if (bind_to_req_port_only) { 5500 /* 5501 * We get here only when requested port 5502 * is bound (and only first of the for() 5503 * loop iteration). 5504 * 5505 * The semantics of this bind request 5506 * require it to fail so we return from 5507 * the routine (and exit the loop). 5508 * 5509 */ 5510 mutex_exit(&connp->conn_lock); 5511 return (-TADDRBUSY); 5512 } 5513 5514 if (connp->conn_anon_priv_bind) { 5515 port = udp_get_next_priv_port(udp); 5516 } else { 5517 if ((count == 0) && (requested_port != 0)) { 5518 /* 5519 * If the application wants us to find 5520 * a port, get one to start with. Set 5521 * requested_port to 0, so that we will 5522 * update us->us_next_port_to_try below. 5523 */ 5524 port = udp_update_next_port(udp, 5525 us->us_next_port_to_try, B_TRUE); 5526 requested_port = 0; 5527 } else { 5528 port = udp_update_next_port(udp, port + 1, 5529 B_FALSE); 5530 } 5531 } 5532 5533 if (port == 0 || ++count >= loopmax) { 5534 /* 5535 * We've tried every possible port number and 5536 * there are none available, so send an error 5537 * to the user. 5538 */ 5539 mutex_exit(&connp->conn_lock); 5540 return (-TNOADDR); 5541 } 5542 } 5543 5544 /* 5545 * Copy the source address into our udp structure. This address 5546 * may still be zero; if so, ip_attr_connect will fill in the correct 5547 * address when a packet is about to be sent. 5548 * If we are binding to a broadcast or multicast address then 5549 * we just set the conn_bound_addr since we don't want to use 5550 * that as the source address when sending. 5551 */ 5552 connp->conn_bound_addr_v6 = v6src; 5553 connp->conn_laddr_v6 = v6src; 5554 if (scopeid != 0) { 5555 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5556 connp->conn_ixa->ixa_scopeid = scopeid; 5557 connp->conn_incoming_ifindex = scopeid; 5558 } else { 5559 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5560 connp->conn_incoming_ifindex = connp->conn_bound_if; 5561 } 5562 5563 switch (laddr_type) { 5564 case IPVL_UNICAST_UP: 5565 case IPVL_UNICAST_DOWN: 5566 connp->conn_saddr_v6 = v6src; 5567 connp->conn_mcbc_bind = B_FALSE; 5568 break; 5569 case IPVL_MCAST: 5570 case IPVL_BCAST: 5571 /* ip_set_destination will pick a source address later */ 5572 connp->conn_saddr_v6 = ipv6_all_zeros; 5573 connp->conn_mcbc_bind = B_TRUE; 5574 break; 5575 } 5576 5577 /* Any errors after this point should use late_error */ 5578 connp->conn_lport = lport; 5579 5580 /* 5581 * Now reset the next anonymous port if the application requested 5582 * an anonymous port, or we handed out the next anonymous port. 5583 */ 5584 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5585 us->us_next_port_to_try = port + 1; 5586 } 5587 5588 /* Initialize the T_BIND_ACK. */ 5589 if (connp->conn_family == AF_INET) { 5590 sin->sin_port = connp->conn_lport; 5591 } else { 5592 sin6->sin6_port = connp->conn_lport; 5593 } 5594 udp->udp_state = TS_IDLE; 5595 udp_bind_hash_insert(udpf, udp); 5596 mutex_exit(&udpf->uf_lock); 5597 mutex_exit(&connp->conn_lock); 5598 5599 if (cl_inet_bind) { 5600 /* 5601 * Running in cluster mode - register bind information 5602 */ 5603 if (connp->conn_ipversion == IPV4_VERSION) { 5604 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5605 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5606 (in_port_t)connp->conn_lport, NULL); 5607 } else { 5608 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5609 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5610 (in_port_t)connp->conn_lport, NULL); 5611 } 5612 } 5613 5614 mutex_enter(&connp->conn_lock); 5615 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5616 if (is_system_labeled() && (!connp->conn_anon_port || 5617 connp->conn_anon_mlp)) { 5618 uint16_t mlpport; 5619 zone_t *zone; 5620 5621 zone = crgetzone(cr); 5622 connp->conn_mlp_type = 5623 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5624 mlptSingle; 5625 addrtype = tsol_mlp_addr_type( 5626 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5627 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5628 if (addrtype == mlptSingle) { 5629 error = -TNOADDR; 5630 mutex_exit(&connp->conn_lock); 5631 goto late_error; 5632 } 5633 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5634 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5635 addrtype); 5636 5637 /* 5638 * It is a coding error to attempt to bind an MLP port 5639 * without first setting SOL_SOCKET/SCM_UCRED. 5640 */ 5641 if (mlptype != mlptSingle && 5642 connp->conn_mlp_type == mlptSingle) { 5643 error = EINVAL; 5644 mutex_exit(&connp->conn_lock); 5645 goto late_error; 5646 } 5647 5648 /* 5649 * It is an access violation to attempt to bind an MLP port 5650 * without NET_BINDMLP privilege. 5651 */ 5652 if (mlptype != mlptSingle && 5653 secpolicy_net_bindmlp(cr) != 0) { 5654 if (connp->conn_debug) { 5655 (void) strlog(UDP_MOD_ID, 0, 1, 5656 SL_ERROR|SL_TRACE, 5657 "udp_bind: no priv for multilevel port %d", 5658 mlpport); 5659 } 5660 error = -TACCES; 5661 mutex_exit(&connp->conn_lock); 5662 goto late_error; 5663 } 5664 5665 /* 5666 * If we're specifically binding a shared IP address and the 5667 * port is MLP on shared addresses, then check to see if this 5668 * zone actually owns the MLP. Reject if not. 5669 */ 5670 if (mlptype == mlptShared && addrtype == mlptShared) { 5671 /* 5672 * No need to handle exclusive-stack zones since 5673 * ALL_ZONES only applies to the shared stack. 5674 */ 5675 zoneid_t mlpzone; 5676 5677 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5678 htons(mlpport)); 5679 if (connp->conn_zoneid != mlpzone) { 5680 if (connp->conn_debug) { 5681 (void) strlog(UDP_MOD_ID, 0, 1, 5682 SL_ERROR|SL_TRACE, 5683 "udp_bind: attempt to bind port " 5684 "%d on shared addr in zone %d " 5685 "(should be %d)", 5686 mlpport, connp->conn_zoneid, 5687 mlpzone); 5688 } 5689 error = -TACCES; 5690 mutex_exit(&connp->conn_lock); 5691 goto late_error; 5692 } 5693 } 5694 if (connp->conn_anon_port) { 5695 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5696 port, B_TRUE); 5697 if (error != 0) { 5698 if (connp->conn_debug) { 5699 (void) strlog(UDP_MOD_ID, 0, 1, 5700 SL_ERROR|SL_TRACE, 5701 "udp_bind: cannot establish anon " 5702 "MLP for port %d", port); 5703 } 5704 error = -TACCES; 5705 mutex_exit(&connp->conn_lock); 5706 goto late_error; 5707 } 5708 } 5709 connp->conn_mlp_type = mlptype; 5710 } 5711 5712 /* 5713 * We create an initial header template here to make a subsequent 5714 * sendto have a starting point. Since conn_last_dst is zero the 5715 * first sendto will always follow the 'dst changed' code path. 5716 * Note that we defer massaging options and the related checksum 5717 * adjustment until we have a destination address. 5718 */ 5719 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5720 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5721 if (error != 0) { 5722 mutex_exit(&connp->conn_lock); 5723 goto late_error; 5724 } 5725 /* Just in case */ 5726 connp->conn_faddr_v6 = ipv6_all_zeros; 5727 connp->conn_fport = 0; 5728 connp->conn_v6lastdst = ipv6_all_zeros; 5729 mutex_exit(&connp->conn_lock); 5730 5731 error = ip_laddr_fanout_insert(connp); 5732 if (error != 0) 5733 goto late_error; 5734 5735 /* Bind succeeded */ 5736 return (0); 5737 5738 late_error: 5739 /* We had already picked the port number, and then the bind failed */ 5740 mutex_enter(&connp->conn_lock); 5741 udpf = &us->us_bind_fanout[ 5742 UDP_BIND_HASH(connp->conn_lport, 5743 us->us_bind_fanout_size)]; 5744 mutex_enter(&udpf->uf_lock); 5745 connp->conn_saddr_v6 = ipv6_all_zeros; 5746 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5747 connp->conn_laddr_v6 = ipv6_all_zeros; 5748 if (scopeid != 0) { 5749 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5750 connp->conn_incoming_ifindex = connp->conn_bound_if; 5751 } 5752 udp->udp_state = TS_UNBND; 5753 udp_bind_hash_remove(udp, B_TRUE); 5754 connp->conn_lport = 0; 5755 mutex_exit(&udpf->uf_lock); 5756 connp->conn_anon_port = B_FALSE; 5757 connp->conn_mlp_type = mlptSingle; 5758 5759 connp->conn_v6lastdst = ipv6_all_zeros; 5760 5761 /* Restore the header that was built above - different source address */ 5762 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5763 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5764 mutex_exit(&connp->conn_lock); 5765 return (error); 5766 } 5767 5768 int 5769 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5770 socklen_t len, cred_t *cr) 5771 { 5772 int error; 5773 conn_t *connp; 5774 5775 /* All Solaris components should pass a cred for this operation. */ 5776 ASSERT(cr != NULL); 5777 5778 connp = (conn_t *)proto_handle; 5779 5780 if (sa == NULL) 5781 error = udp_do_unbind(connp); 5782 else 5783 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5784 5785 if (error < 0) { 5786 if (error == -TOUTSTATE) 5787 error = EINVAL; 5788 else 5789 error = proto_tlitosyserr(-error); 5790 } 5791 5792 return (error); 5793 } 5794 5795 static int 5796 udp_implicit_bind(conn_t *connp, cred_t *cr) 5797 { 5798 sin6_t sin6addr; 5799 sin_t *sin; 5800 sin6_t *sin6; 5801 socklen_t len; 5802 int error; 5803 5804 /* All Solaris components should pass a cred for this operation. */ 5805 ASSERT(cr != NULL); 5806 5807 if (connp->conn_family == AF_INET) { 5808 len = sizeof (struct sockaddr_in); 5809 sin = (sin_t *)&sin6addr; 5810 *sin = sin_null; 5811 sin->sin_family = AF_INET; 5812 sin->sin_addr.s_addr = INADDR_ANY; 5813 } else { 5814 ASSERT(connp->conn_family == AF_INET6); 5815 len = sizeof (sin6_t); 5816 sin6 = (sin6_t *)&sin6addr; 5817 *sin6 = sin6_null; 5818 sin6->sin6_family = AF_INET6; 5819 V6_SET_ZERO(sin6->sin6_addr); 5820 } 5821 5822 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5823 cr, B_FALSE); 5824 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5825 } 5826 5827 /* 5828 * This routine removes a port number association from a stream. It 5829 * is called by udp_unbind and udp_tpi_unbind. 5830 */ 5831 static int 5832 udp_do_unbind(conn_t *connp) 5833 { 5834 udp_t *udp = connp->conn_udp; 5835 udp_fanout_t *udpf; 5836 udp_stack_t *us = udp->udp_us; 5837 5838 if (cl_inet_unbind != NULL) { 5839 /* 5840 * Running in cluster mode - register unbind information 5841 */ 5842 if (connp->conn_ipversion == IPV4_VERSION) { 5843 (*cl_inet_unbind)( 5844 connp->conn_netstack->netstack_stackid, 5845 IPPROTO_UDP, AF_INET, 5846 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5847 (in_port_t)connp->conn_lport, NULL); 5848 } else { 5849 (*cl_inet_unbind)( 5850 connp->conn_netstack->netstack_stackid, 5851 IPPROTO_UDP, AF_INET6, 5852 (uint8_t *)&(connp->conn_laddr_v6), 5853 (in_port_t)connp->conn_lport, NULL); 5854 } 5855 } 5856 5857 mutex_enter(&connp->conn_lock); 5858 /* If a bind has not been done, we can't unbind. */ 5859 if (udp->udp_state == TS_UNBND) { 5860 mutex_exit(&connp->conn_lock); 5861 return (-TOUTSTATE); 5862 } 5863 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 5864 us->us_bind_fanout_size)]; 5865 mutex_enter(&udpf->uf_lock); 5866 udp_bind_hash_remove(udp, B_TRUE); 5867 connp->conn_saddr_v6 = ipv6_all_zeros; 5868 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5869 connp->conn_laddr_v6 = ipv6_all_zeros; 5870 connp->conn_mcbc_bind = B_FALSE; 5871 connp->conn_lport = 0; 5872 /* In case we were also connected */ 5873 connp->conn_faddr_v6 = ipv6_all_zeros; 5874 connp->conn_fport = 0; 5875 mutex_exit(&udpf->uf_lock); 5876 5877 connp->conn_v6lastdst = ipv6_all_zeros; 5878 udp->udp_state = TS_UNBND; 5879 5880 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5881 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5882 mutex_exit(&connp->conn_lock); 5883 5884 ip_unbind(connp); 5885 5886 return (0); 5887 } 5888 5889 /* 5890 * It associates a default destination address with the stream. 5891 */ 5892 static int 5893 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 5894 cred_t *cr, pid_t pid) 5895 { 5896 sin6_t *sin6; 5897 sin_t *sin; 5898 in6_addr_t v6dst; 5899 ipaddr_t v4dst; 5900 uint16_t dstport; 5901 uint32_t flowinfo; 5902 udp_fanout_t *udpf; 5903 udp_t *udp, *udp1; 5904 ushort_t ipversion; 5905 udp_stack_t *us; 5906 int error; 5907 conn_t *connp1; 5908 ip_xmit_attr_t *ixa; 5909 ip_xmit_attr_t *oldixa; 5910 uint_t scopeid = 0; 5911 uint_t srcid = 0; 5912 in6_addr_t v6src = connp->conn_saddr_v6; 5913 5914 udp = connp->conn_udp; 5915 us = udp->udp_us; 5916 5917 /* 5918 * Address has been verified by the caller 5919 */ 5920 switch (len) { 5921 default: 5922 /* 5923 * Should never happen 5924 */ 5925 return (EINVAL); 5926 5927 case sizeof (sin_t): 5928 sin = (sin_t *)sa; 5929 v4dst = sin->sin_addr.s_addr; 5930 dstport = sin->sin_port; 5931 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 5932 ASSERT(connp->conn_ipversion == IPV4_VERSION); 5933 ipversion = IPV4_VERSION; 5934 break; 5935 5936 case sizeof (sin6_t): 5937 sin6 = (sin6_t *)sa; 5938 v6dst = sin6->sin6_addr; 5939 dstport = sin6->sin6_port; 5940 srcid = sin6->__sin6_src_id; 5941 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5942 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 5943 connp->conn_netstack); 5944 } 5945 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 5946 if (connp->conn_ipv6_v6only) 5947 return (EADDRNOTAVAIL); 5948 5949 /* 5950 * Destination adress is mapped IPv6 address. 5951 * Source bound address should be unspecified or 5952 * IPv6 mapped address as well. 5953 */ 5954 if (!IN6_IS_ADDR_UNSPECIFIED( 5955 &connp->conn_bound_addr_v6) && 5956 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 5957 return (EADDRNOTAVAIL); 5958 } 5959 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 5960 ipversion = IPV4_VERSION; 5961 flowinfo = 0; 5962 } else { 5963 ipversion = IPV6_VERSION; 5964 flowinfo = sin6->sin6_flowinfo; 5965 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 5966 scopeid = sin6->sin6_scope_id; 5967 } 5968 break; 5969 } 5970 5971 if (dstport == 0) 5972 return (-TBADADDR); 5973 5974 /* 5975 * If there is a different thread using conn_ixa then we get a new 5976 * copy and cut the old one loose from conn_ixa. Otherwise we use 5977 * conn_ixa and prevent any other thread from using/changing it. 5978 * Once connect() is done other threads can use conn_ixa since the 5979 * refcnt will be back at one. 5980 * We defer updating conn_ixa until later to handle any concurrent 5981 * conn_ixa_cleanup thread. 5982 */ 5983 ixa = conn_get_ixa(connp, B_FALSE); 5984 if (ixa == NULL) 5985 return (ENOMEM); 5986 5987 ASSERT(ixa->ixa_refcnt >= 2); 5988 ASSERT(ixa == connp->conn_ixa); 5989 5990 mutex_enter(&connp->conn_lock); 5991 /* 5992 * This udp_t must have bound to a port already before doing a connect. 5993 * Reject if a connect is in progress (we drop conn_lock during 5994 * udp_do_connect). 5995 */ 5996 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 5997 mutex_exit(&connp->conn_lock); 5998 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5999 "udp_connect: bad state, %u", udp->udp_state); 6000 ixa_refrele(ixa); 6001 return (-TOUTSTATE); 6002 } 6003 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6004 6005 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6006 us->us_bind_fanout_size)]; 6007 6008 mutex_enter(&udpf->uf_lock); 6009 if (udp->udp_state == TS_DATA_XFER) { 6010 /* Already connected - clear out state */ 6011 if (connp->conn_mcbc_bind) 6012 connp->conn_saddr_v6 = ipv6_all_zeros; 6013 else 6014 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6015 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6016 connp->conn_faddr_v6 = ipv6_all_zeros; 6017 connp->conn_fport = 0; 6018 udp->udp_state = TS_IDLE; 6019 } 6020 6021 connp->conn_fport = dstport; 6022 connp->conn_ipversion = ipversion; 6023 if (ipversion == IPV4_VERSION) { 6024 /* 6025 * Interpret a zero destination to mean loopback. 6026 * Update the T_CONN_REQ (sin/sin6) since it is used to 6027 * generate the T_CONN_CON. 6028 */ 6029 if (v4dst == INADDR_ANY) { 6030 v4dst = htonl(INADDR_LOOPBACK); 6031 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6032 if (connp->conn_family == AF_INET) { 6033 sin->sin_addr.s_addr = v4dst; 6034 } else { 6035 sin6->sin6_addr = v6dst; 6036 } 6037 } 6038 connp->conn_faddr_v6 = v6dst; 6039 connp->conn_flowinfo = 0; 6040 } else { 6041 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6042 /* 6043 * Interpret a zero destination to mean loopback. 6044 * Update the T_CONN_REQ (sin/sin6) since it is used to 6045 * generate the T_CONN_CON. 6046 */ 6047 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6048 v6dst = ipv6_loopback; 6049 sin6->sin6_addr = v6dst; 6050 } 6051 connp->conn_faddr_v6 = v6dst; 6052 connp->conn_flowinfo = flowinfo; 6053 } 6054 mutex_exit(&udpf->uf_lock); 6055 6056 /* 6057 * We update our cred/cpid based on the caller of connect 6058 */ 6059 if (connp->conn_cred != cr) { 6060 crhold(cr); 6061 crfree(connp->conn_cred); 6062 connp->conn_cred = cr; 6063 } 6064 connp->conn_cpid = pid; 6065 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 6066 ixa->ixa_cred = cr; 6067 ixa->ixa_cpid = pid; 6068 if (is_system_labeled()) { 6069 /* We need to restart with a label based on the cred */ 6070 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6071 } 6072 6073 if (scopeid != 0) { 6074 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6075 ixa->ixa_scopeid = scopeid; 6076 connp->conn_incoming_ifindex = scopeid; 6077 } else { 6078 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6079 connp->conn_incoming_ifindex = connp->conn_bound_if; 6080 } 6081 /* 6082 * conn_connect will drop conn_lock and reacquire it. 6083 * To prevent a send* from messing with this udp_t while the lock 6084 * is dropped we set udp_state and clear conn_v6lastdst. 6085 * That will make all send* fail with EISCONN. 6086 */ 6087 connp->conn_v6lastdst = ipv6_all_zeros; 6088 udp->udp_state = TS_WCON_CREQ; 6089 6090 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6091 mutex_exit(&connp->conn_lock); 6092 if (error != 0) 6093 goto connect_failed; 6094 6095 /* 6096 * The addresses have been verified. Time to insert in 6097 * the correct fanout list. 6098 */ 6099 error = ipcl_conn_insert(connp); 6100 if (error != 0) 6101 goto connect_failed; 6102 6103 mutex_enter(&connp->conn_lock); 6104 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6105 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6106 if (error != 0) { 6107 mutex_exit(&connp->conn_lock); 6108 goto connect_failed; 6109 } 6110 6111 udp->udp_state = TS_DATA_XFER; 6112 /* Record this as the "last" send even though we haven't sent any */ 6113 connp->conn_v6lastdst = connp->conn_faddr_v6; 6114 connp->conn_lastipversion = connp->conn_ipversion; 6115 connp->conn_lastdstport = connp->conn_fport; 6116 connp->conn_lastflowinfo = connp->conn_flowinfo; 6117 connp->conn_lastscopeid = scopeid; 6118 connp->conn_lastsrcid = srcid; 6119 /* Also remember a source to use together with lastdst */ 6120 connp->conn_v6lastsrc = v6src; 6121 6122 oldixa = conn_replace_ixa(connp, ixa); 6123 mutex_exit(&connp->conn_lock); 6124 ixa_refrele(oldixa); 6125 6126 /* 6127 * We've picked a source address above. Now we can 6128 * verify that the src/port/dst/port is unique for all 6129 * connections in TS_DATA_XFER, skipping ourselves. 6130 */ 6131 mutex_enter(&udpf->uf_lock); 6132 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6133 if (udp1->udp_state != TS_DATA_XFER) 6134 continue; 6135 6136 if (udp1 == udp) 6137 continue; 6138 6139 connp1 = udp1->udp_connp; 6140 if (connp->conn_lport != connp1->conn_lport || 6141 connp->conn_ipversion != connp1->conn_ipversion || 6142 dstport != connp1->conn_fport || 6143 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6144 &connp1->conn_laddr_v6) || 6145 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6146 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6147 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6148 continue; 6149 mutex_exit(&udpf->uf_lock); 6150 error = -TBADADDR; 6151 goto connect_failed; 6152 } 6153 if (cl_inet_connect2 != NULL) { 6154 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6155 if (error != 0) { 6156 mutex_exit(&udpf->uf_lock); 6157 error = -TBADADDR; 6158 goto connect_failed; 6159 } 6160 } 6161 mutex_exit(&udpf->uf_lock); 6162 6163 ixa_refrele(ixa); 6164 return (0); 6165 6166 connect_failed: 6167 if (ixa != NULL) 6168 ixa_refrele(ixa); 6169 mutex_enter(&connp->conn_lock); 6170 mutex_enter(&udpf->uf_lock); 6171 udp->udp_state = TS_IDLE; 6172 connp->conn_faddr_v6 = ipv6_all_zeros; 6173 connp->conn_fport = 0; 6174 /* In case the source address was set above */ 6175 if (connp->conn_mcbc_bind) 6176 connp->conn_saddr_v6 = ipv6_all_zeros; 6177 else 6178 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6179 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6180 mutex_exit(&udpf->uf_lock); 6181 6182 connp->conn_v6lastdst = ipv6_all_zeros; 6183 connp->conn_flowinfo = 0; 6184 6185 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6186 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6187 mutex_exit(&connp->conn_lock); 6188 return (error); 6189 } 6190 6191 static int 6192 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6193 socklen_t len, sock_connid_t *id, cred_t *cr) 6194 { 6195 conn_t *connp = (conn_t *)proto_handle; 6196 udp_t *udp = connp->conn_udp; 6197 int error; 6198 boolean_t did_bind = B_FALSE; 6199 pid_t pid = curproc->p_pid; 6200 6201 /* All Solaris components should pass a cred for this operation. */ 6202 ASSERT(cr != NULL); 6203 6204 if (sa == NULL) { 6205 /* 6206 * Disconnect 6207 * Make sure we are connected 6208 */ 6209 if (udp->udp_state != TS_DATA_XFER) 6210 return (EINVAL); 6211 6212 error = udp_disconnect(connp); 6213 return (error); 6214 } 6215 6216 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6217 if (error != 0) 6218 goto done; 6219 6220 /* do an implicit bind if necessary */ 6221 if (udp->udp_state == TS_UNBND) { 6222 error = udp_implicit_bind(connp, cr); 6223 /* 6224 * We could be racing with an actual bind, in which case 6225 * we would see EPROTO. We cross our fingers and try 6226 * to connect. 6227 */ 6228 if (!(error == 0 || error == EPROTO)) 6229 goto done; 6230 did_bind = B_TRUE; 6231 } 6232 /* 6233 * set SO_DGRAM_ERRIND 6234 */ 6235 connp->conn_dgram_errind = B_TRUE; 6236 6237 error = udp_do_connect(connp, sa, len, cr, pid); 6238 6239 if (error != 0 && did_bind) { 6240 int unbind_err; 6241 6242 unbind_err = udp_do_unbind(connp); 6243 ASSERT(unbind_err == 0); 6244 } 6245 6246 if (error == 0) { 6247 *id = 0; 6248 (*connp->conn_upcalls->su_connected) 6249 (connp->conn_upper_handle, 0, NULL, -1); 6250 } else if (error < 0) { 6251 error = proto_tlitosyserr(-error); 6252 } 6253 6254 done: 6255 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6256 /* 6257 * No need to hold locks to set state 6258 * after connect failure socket state is undefined 6259 * We set the state only to imitate old sockfs behavior 6260 */ 6261 udp->udp_state = TS_IDLE; 6262 } 6263 return (error); 6264 } 6265 6266 int 6267 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6268 cred_t *cr) 6269 { 6270 sin6_t *sin6; 6271 sin_t *sin = NULL; 6272 uint_t srcid; 6273 conn_t *connp = (conn_t *)proto_handle; 6274 udp_t *udp = connp->conn_udp; 6275 int error = 0; 6276 udp_stack_t *us = udp->udp_us; 6277 ushort_t ipversion; 6278 pid_t pid = curproc->p_pid; 6279 ip_xmit_attr_t *ixa; 6280 6281 ASSERT(DB_TYPE(mp) == M_DATA); 6282 6283 /* All Solaris components should pass a cred for this operation. */ 6284 ASSERT(cr != NULL); 6285 6286 /* do an implicit bind if necessary */ 6287 if (udp->udp_state == TS_UNBND) { 6288 error = udp_implicit_bind(connp, cr); 6289 /* 6290 * We could be racing with an actual bind, in which case 6291 * we would see EPROTO. We cross our fingers and try 6292 * to connect. 6293 */ 6294 if (!(error == 0 || error == EPROTO)) { 6295 freemsg(mp); 6296 return (error); 6297 } 6298 } 6299 6300 /* Connected? */ 6301 if (msg->msg_name == NULL) { 6302 if (udp->udp_state != TS_DATA_XFER) { 6303 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6304 return (EDESTADDRREQ); 6305 } 6306 if (msg->msg_controllen != 0) { 6307 error = udp_output_ancillary(connp, NULL, NULL, mp, 6308 NULL, msg, cr, pid); 6309 } else { 6310 error = udp_output_connected(connp, mp, cr, pid); 6311 } 6312 if (us->us_sendto_ignerr) 6313 return (0); 6314 else 6315 return (error); 6316 } 6317 if (udp->udp_state == TS_DATA_XFER) { 6318 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6319 return (EISCONN); 6320 } 6321 error = proto_verify_ip_addr(connp->conn_family, 6322 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6323 if (error != 0) { 6324 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6325 return (error); 6326 } 6327 switch (connp->conn_family) { 6328 case AF_INET6: 6329 sin6 = (sin6_t *)msg->msg_name; 6330 6331 srcid = sin6->__sin6_src_id; 6332 6333 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6334 /* 6335 * Destination is a non-IPv4-compatible IPv6 address. 6336 * Send out an IPv6 format packet. 6337 */ 6338 6339 /* 6340 * If the local address is a mapped address return 6341 * an error. 6342 * It would be possible to send an IPv6 packet but the 6343 * response would never make it back to the application 6344 * since it is bound to a mapped address. 6345 */ 6346 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6347 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6348 return (EADDRNOTAVAIL); 6349 } 6350 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6351 sin6->sin6_addr = ipv6_loopback; 6352 ipversion = IPV6_VERSION; 6353 } else { 6354 if (connp->conn_ipv6_v6only) { 6355 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6356 return (EADDRNOTAVAIL); 6357 } 6358 6359 /* 6360 * If the local address is not zero or a mapped address 6361 * return an error. It would be possible to send an 6362 * IPv4 packet but the response would never make it 6363 * back to the application since it is bound to a 6364 * non-mapped address. 6365 */ 6366 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6367 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6368 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6369 return (EADDRNOTAVAIL); 6370 } 6371 6372 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6373 V4_PART_OF_V6(sin6->sin6_addr) = 6374 htonl(INADDR_LOOPBACK); 6375 } 6376 ipversion = IPV4_VERSION; 6377 } 6378 6379 /* 6380 * We have to allocate an ip_xmit_attr_t before we grab 6381 * conn_lock and we need to hold conn_lock once we've check 6382 * conn_same_as_last_v6 to handle concurrent send* calls on a 6383 * socket. 6384 */ 6385 if (msg->msg_controllen == 0) { 6386 ixa = conn_get_ixa(connp, B_FALSE); 6387 if (ixa == NULL) { 6388 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6389 return (ENOMEM); 6390 } 6391 } else { 6392 ixa = NULL; 6393 } 6394 mutex_enter(&connp->conn_lock); 6395 if (udp->udp_delayed_error != 0) { 6396 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6397 6398 error = udp->udp_delayed_error; 6399 udp->udp_delayed_error = 0; 6400 6401 /* Compare IP address, port, and family */ 6402 6403 if (sin6->sin6_port == sin2->sin6_port && 6404 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6405 &sin2->sin6_addr) && 6406 sin6->sin6_family == sin2->sin6_family) { 6407 mutex_exit(&connp->conn_lock); 6408 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6409 if (ixa != NULL) 6410 ixa_refrele(ixa); 6411 return (error); 6412 } 6413 } 6414 6415 if (msg->msg_controllen != 0) { 6416 mutex_exit(&connp->conn_lock); 6417 ASSERT(ixa == NULL); 6418 error = udp_output_ancillary(connp, NULL, sin6, mp, 6419 NULL, msg, cr, pid); 6420 } else if (conn_same_as_last_v6(connp, sin6) && 6421 connp->conn_lastsrcid == srcid && 6422 ipsec_outbound_policy_current(ixa)) { 6423 /* udp_output_lastdst drops conn_lock */ 6424 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6425 } else { 6426 /* udp_output_newdst drops conn_lock */ 6427 error = udp_output_newdst(connp, mp, NULL, sin6, 6428 ipversion, cr, pid, ixa); 6429 } 6430 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6431 if (us->us_sendto_ignerr) 6432 return (0); 6433 else 6434 return (error); 6435 case AF_INET: 6436 sin = (sin_t *)msg->msg_name; 6437 6438 ipversion = IPV4_VERSION; 6439 6440 if (sin->sin_addr.s_addr == INADDR_ANY) 6441 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6442 6443 /* 6444 * We have to allocate an ip_xmit_attr_t before we grab 6445 * conn_lock and we need to hold conn_lock once we've check 6446 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6447 */ 6448 if (msg->msg_controllen == 0) { 6449 ixa = conn_get_ixa(connp, B_FALSE); 6450 if (ixa == NULL) { 6451 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6452 return (ENOMEM); 6453 } 6454 } else { 6455 ixa = NULL; 6456 } 6457 mutex_enter(&connp->conn_lock); 6458 if (udp->udp_delayed_error != 0) { 6459 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6460 6461 error = udp->udp_delayed_error; 6462 udp->udp_delayed_error = 0; 6463 6464 /* Compare IP address and port */ 6465 6466 if (sin->sin_port == sin2->sin_port && 6467 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6468 mutex_exit(&connp->conn_lock); 6469 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6470 if (ixa != NULL) 6471 ixa_refrele(ixa); 6472 return (error); 6473 } 6474 } 6475 if (msg->msg_controllen != 0) { 6476 mutex_exit(&connp->conn_lock); 6477 ASSERT(ixa == NULL); 6478 error = udp_output_ancillary(connp, sin, NULL, mp, 6479 NULL, msg, cr, pid); 6480 } else if (conn_same_as_last_v4(connp, sin) && 6481 ipsec_outbound_policy_current(ixa)) { 6482 /* udp_output_lastdst drops conn_lock */ 6483 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6484 } else { 6485 /* udp_output_newdst drops conn_lock */ 6486 error = udp_output_newdst(connp, mp, sin, NULL, 6487 ipversion, cr, pid, ixa); 6488 } 6489 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6490 if (us->us_sendto_ignerr) 6491 return (0); 6492 else 6493 return (error); 6494 default: 6495 return (EINVAL); 6496 } 6497 } 6498 6499 int 6500 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6501 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb, 6502 sock_quiesce_arg_t *arg) 6503 { 6504 conn_t *connp = (conn_t *)proto_handle; 6505 udp_t *udp; 6506 struct T_capability_ack tca; 6507 struct sockaddr_in6 laddr, faddr; 6508 socklen_t laddrlen, faddrlen; 6509 short opts; 6510 struct stroptions *stropt; 6511 mblk_t *mp, *stropt_mp; 6512 int error; 6513 6514 udp = connp->conn_udp; 6515 6516 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6517 6518 /* 6519 * setup the fallback stream that was allocated 6520 */ 6521 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6522 connp->conn_minor_arena = WR(q)->q_ptr; 6523 6524 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6525 6526 WR(q)->q_qinfo = &udp_winit; 6527 6528 connp->conn_rq = RD(q); 6529 connp->conn_wq = WR(q); 6530 6531 /* Notify stream head about options before sending up data */ 6532 stropt_mp->b_datap->db_type = M_SETOPTS; 6533 stropt_mp->b_wptr += sizeof (*stropt); 6534 stropt = (struct stroptions *)stropt_mp->b_rptr; 6535 stropt->so_flags = SO_WROFF | SO_HIWAT; 6536 stropt->so_wroff = connp->conn_wroff; 6537 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6538 putnext(RD(q), stropt_mp); 6539 6540 /* 6541 * Free the helper stream 6542 */ 6543 ip_free_helper_stream(connp); 6544 6545 if (!issocket) 6546 udp_use_pure_tpi(udp); 6547 6548 /* 6549 * Collect the information needed to sync with the sonode 6550 */ 6551 udp_do_capability_ack(udp, &tca, TC1_INFO); 6552 6553 laddrlen = faddrlen = sizeof (sin6_t); 6554 (void) udp_getsockname((sock_lower_handle_t)connp, 6555 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6556 error = udp_getpeername((sock_lower_handle_t)connp, 6557 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6558 if (error != 0) 6559 faddrlen = 0; 6560 6561 opts = 0; 6562 if (connp->conn_dgram_errind) 6563 opts |= SO_DGRAM_ERRIND; 6564 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6565 opts |= SO_DONTROUTE; 6566 6567 mp = (*quiesced_cb)(connp->conn_upper_handle, arg, &tca, 6568 (struct sockaddr *)&laddr, laddrlen, 6569 (struct sockaddr *)&faddr, faddrlen, opts); 6570 6571 mutex_enter(&udp->udp_recv_lock); 6572 /* 6573 * Attempts to send data up during fallback will result in it being 6574 * queued in udp_t. First push up the datagrams obtained from the 6575 * socket, then any packets queued in udp_t. 6576 */ 6577 if (mp != NULL) { 6578 mp->b_next = udp->udp_fallback_queue_head; 6579 udp->udp_fallback_queue_head = mp; 6580 } 6581 while (udp->udp_fallback_queue_head != NULL) { 6582 mp = udp->udp_fallback_queue_head; 6583 udp->udp_fallback_queue_head = mp->b_next; 6584 mutex_exit(&udp->udp_recv_lock); 6585 mp->b_next = NULL; 6586 putnext(RD(q), mp); 6587 mutex_enter(&udp->udp_recv_lock); 6588 } 6589 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6590 /* 6591 * No longer a streams less socket 6592 */ 6593 mutex_enter(&connp->conn_lock); 6594 connp->conn_flags &= ~IPCL_NONSTR; 6595 mutex_exit(&connp->conn_lock); 6596 6597 mutex_exit(&udp->udp_recv_lock); 6598 6599 ASSERT(connp->conn_ref >= 1); 6600 6601 return (0); 6602 } 6603 6604 /* ARGSUSED3 */ 6605 int 6606 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6607 socklen_t *salenp, cred_t *cr) 6608 { 6609 conn_t *connp = (conn_t *)proto_handle; 6610 udp_t *udp = connp->conn_udp; 6611 int error; 6612 6613 /* All Solaris components should pass a cred for this operation. */ 6614 ASSERT(cr != NULL); 6615 6616 mutex_enter(&connp->conn_lock); 6617 if (udp->udp_state != TS_DATA_XFER) 6618 error = ENOTCONN; 6619 else 6620 error = conn_getpeername(connp, sa, salenp); 6621 mutex_exit(&connp->conn_lock); 6622 return (error); 6623 } 6624 6625 /* ARGSUSED3 */ 6626 int 6627 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6628 socklen_t *salenp, cred_t *cr) 6629 { 6630 conn_t *connp = (conn_t *)proto_handle; 6631 int error; 6632 6633 /* All Solaris components should pass a cred for this operation. */ 6634 ASSERT(cr != NULL); 6635 6636 mutex_enter(&connp->conn_lock); 6637 error = conn_getsockname(connp, sa, salenp); 6638 mutex_exit(&connp->conn_lock); 6639 return (error); 6640 } 6641 6642 int 6643 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6644 void *optvalp, socklen_t *optlen, cred_t *cr) 6645 { 6646 conn_t *connp = (conn_t *)proto_handle; 6647 int error; 6648 t_uscalar_t max_optbuf_len; 6649 void *optvalp_buf; 6650 int len; 6651 6652 /* All Solaris components should pass a cred for this operation. */ 6653 ASSERT(cr != NULL); 6654 6655 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6656 udp_opt_obj.odb_opt_des_arr, 6657 udp_opt_obj.odb_opt_arr_cnt, 6658 B_FALSE, B_TRUE, cr); 6659 if (error != 0) { 6660 if (error < 0) 6661 error = proto_tlitosyserr(-error); 6662 return (error); 6663 } 6664 6665 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6666 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6667 if (len == -1) { 6668 kmem_free(optvalp_buf, max_optbuf_len); 6669 return (EINVAL); 6670 } 6671 6672 /* 6673 * update optlen and copy option value 6674 */ 6675 t_uscalar_t size = MIN(len, *optlen); 6676 6677 bcopy(optvalp_buf, optvalp, size); 6678 bcopy(&size, optlen, sizeof (size)); 6679 6680 kmem_free(optvalp_buf, max_optbuf_len); 6681 return (0); 6682 } 6683 6684 int 6685 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6686 const void *optvalp, socklen_t optlen, cred_t *cr) 6687 { 6688 conn_t *connp = (conn_t *)proto_handle; 6689 int error; 6690 6691 /* All Solaris components should pass a cred for this operation. */ 6692 ASSERT(cr != NULL); 6693 6694 error = proto_opt_check(level, option_name, optlen, NULL, 6695 udp_opt_obj.odb_opt_des_arr, 6696 udp_opt_obj.odb_opt_arr_cnt, 6697 B_TRUE, B_FALSE, cr); 6698 6699 if (error != 0) { 6700 if (error < 0) 6701 error = proto_tlitosyserr(-error); 6702 return (error); 6703 } 6704 6705 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6706 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6707 NULL, cr); 6708 6709 ASSERT(error >= 0); 6710 6711 return (error); 6712 } 6713 6714 void 6715 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6716 { 6717 conn_t *connp = (conn_t *)proto_handle; 6718 udp_t *udp = connp->conn_udp; 6719 6720 mutex_enter(&udp->udp_recv_lock); 6721 connp->conn_flow_cntrld = B_FALSE; 6722 mutex_exit(&udp->udp_recv_lock); 6723 } 6724 6725 /* ARGSUSED2 */ 6726 int 6727 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6728 { 6729 conn_t *connp = (conn_t *)proto_handle; 6730 6731 /* All Solaris components should pass a cred for this operation. */ 6732 ASSERT(cr != NULL); 6733 6734 /* shut down the send side */ 6735 if (how != SHUT_RD) 6736 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6737 SOCK_OPCTL_SHUT_SEND, 0); 6738 /* shut down the recv side */ 6739 if (how != SHUT_WR) 6740 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6741 SOCK_OPCTL_SHUT_RECV, 0); 6742 return (0); 6743 } 6744 6745 int 6746 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6747 int mode, int32_t *rvalp, cred_t *cr) 6748 { 6749 conn_t *connp = (conn_t *)proto_handle; 6750 int error; 6751 6752 /* All Solaris components should pass a cred for this operation. */ 6753 ASSERT(cr != NULL); 6754 6755 /* 6756 * If we don't have a helper stream then create one. 6757 * ip_create_helper_stream takes care of locking the conn_t, 6758 * so this check for NULL is just a performance optimization. 6759 */ 6760 if (connp->conn_helper_info == NULL) { 6761 udp_stack_t *us = connp->conn_udp->udp_us; 6762 6763 ASSERT(us->us_ldi_ident != NULL); 6764 6765 /* 6766 * Create a helper stream for non-STREAMS socket. 6767 */ 6768 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6769 if (error != 0) { 6770 ip0dbg(("tcp_ioctl: create of IP helper stream " 6771 "failed %d\n", error)); 6772 return (error); 6773 } 6774 } 6775 6776 switch (cmd) { 6777 case _SIOCSOCKFALLBACK: 6778 case TI_GETPEERNAME: 6779 case TI_GETMYNAME: 6780 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6781 cmd)); 6782 error = EINVAL; 6783 break; 6784 default: 6785 /* 6786 * Pass on to IP using helper stream 6787 */ 6788 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6789 cmd, arg, mode, cr, rvalp); 6790 break; 6791 } 6792 return (error); 6793 } 6794 6795 /* ARGSUSED */ 6796 int 6797 udp_accept(sock_lower_handle_t lproto_handle, 6798 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6799 cred_t *cr) 6800 { 6801 return (EOPNOTSUPP); 6802 } 6803 6804 /* ARGSUSED */ 6805 int 6806 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6807 { 6808 return (EOPNOTSUPP); 6809 } 6810 6811 sock_downcalls_t sock_udp_downcalls = { 6812 udp_activate, /* sd_activate */ 6813 udp_accept, /* sd_accept */ 6814 udp_bind, /* sd_bind */ 6815 udp_listen, /* sd_listen */ 6816 udp_connect, /* sd_connect */ 6817 udp_getpeername, /* sd_getpeername */ 6818 udp_getsockname, /* sd_getsockname */ 6819 udp_getsockopt, /* sd_getsockopt */ 6820 udp_setsockopt, /* sd_setsockopt */ 6821 udp_send, /* sd_send */ 6822 NULL, /* sd_send_uio */ 6823 NULL, /* sd_recv_uio */ 6824 NULL, /* sd_poll */ 6825 udp_shutdown, /* sd_shutdown */ 6826 udp_clr_flowctrl, /* sd_setflowctrl */ 6827 udp_ioctl, /* sd_ioctl */ 6828 udp_close /* sd_close */ 6829 }; 6830