1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/optcom.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 #include <inet/ipclassifier.h> 76 #include <sys/squeue_impl.h> 77 #include <inet/ipnet.h> 78 #include <sys/ethernet.h> 79 80 #include <sys/tsol/label.h> 81 #include <sys/tsol/tnet.h> 82 #include <rpc/pmap_prot.h> 83 84 #include <inet/udp_impl.h> 85 86 /* 87 * Synchronization notes: 88 * 89 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 90 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 91 * protects the contents of the udp_t. uf_lock protects the address and the 92 * fanout information. 93 * The lock order is conn_lock -> uf_lock. 94 * 95 * The fanout lock uf_lock: 96 * When a UDP endpoint is bound to a local port, it is inserted into 97 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 98 * The size of the array is controlled by the udp_bind_fanout_size variable. 99 * This variable can be changed in /etc/system if the default value is 100 * not large enough. Each bind hash bucket is protected by a per bucket 101 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 102 * structure and a few other fields in the udp_t. A UDP endpoint is removed 103 * from the bind hash list only when it is being unbound or being closed. 104 * The per bucket lock also protects a UDP endpoint's state changes. 105 * 106 * Plumbing notes: 107 * UDP is always a device driver. For compatibility with mibopen() code 108 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 109 * dummy module. 110 * 111 * The above implies that we don't support any intermediate module to 112 * reside in between /dev/ip and udp -- in fact, we never supported such 113 * scenario in the past as the inter-layer communication semantics have 114 * always been private. 115 */ 116 117 /* For /etc/system control */ 118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 119 120 static void udp_addr_req(queue_t *q, mblk_t *mp); 121 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 122 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 123 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 124 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 125 const in6_addr_t *, in_port_t, uint32_t); 126 static void udp_capability_req(queue_t *q, mblk_t *mp); 127 static int udp_tpi_close(queue_t *q, int flags); 128 static void udp_close_free(conn_t *); 129 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 130 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 131 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 132 int sys_error); 133 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 134 t_scalar_t tlierr, int sys_error); 135 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 136 cred_t *cr); 137 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 140 char *value, caddr_t cp, cred_t *cr); 141 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 142 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 143 ip_recv_attr_t *ira); 144 static void udp_info_req(queue_t *q, mblk_t *mp); 145 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_lrput(queue_t *, mblk_t *); 147 static void udp_lwput(queue_t *, mblk_t *); 148 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp, boolean_t isv6); 150 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp); 154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 155 int udp_opt_set(conn_t *connp, uint_t optset_context, 156 int level, int name, uint_t inlen, 157 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 158 void *thisdg_attrs, cred_t *cr); 159 int udp_opt_get(conn_t *connp, int level, int name, 160 uchar_t *ptr); 161 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid); 163 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 164 pid_t pid, ip_xmit_attr_t *ixa); 165 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 166 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 167 ip_xmit_attr_t *ixa); 168 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 169 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 170 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 171 cred_t *cr); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 static void *udp_kstat_init(netstackid_t stackid); 191 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 192 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 193 static void udp_kstat2_fini(netstackid_t, kstat_t *); 194 static int udp_kstat_update(kstat_t *kp, int rw); 195 196 197 /* Common routines for TPI and socket module */ 198 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 199 200 /* Common routine for TPI and socket module */ 201 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 202 static void udp_do_close(conn_t *); 203 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 204 boolean_t); 205 static int udp_do_unbind(conn_t *); 206 207 int udp_getsockname(sock_lower_handle_t, 208 struct sockaddr *, socklen_t *, cred_t *); 209 int udp_getpeername(sock_lower_handle_t, 210 struct sockaddr *, socklen_t *, cred_t *); 211 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 212 cred_t *, pid_t); 213 214 #define UDP_RECV_HIWATER (56 * 1024) 215 #define UDP_RECV_LOWATER 128 216 #define UDP_XMIT_HIWATER (56 * 1024) 217 #define UDP_XMIT_LOWATER 1024 218 219 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 220 221 /* 222 * Checks if the given destination addr/port is allowed out. 223 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 224 * Called for each connect() and for sendto()/sendmsg() to a different 225 * destination. 226 * For connect(), called in udp_connect(). 227 * For sendto()/sendmsg(), called in udp_output_newdst(). 228 * 229 * This macro assumes that the cl_inet_connect2 hook is not NULL. 230 * Please check this before calling this macro. 231 * 232 * void 233 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 234 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 235 */ 236 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 237 (err) = 0; \ 238 /* \ 239 * Running in cluster mode - check and register active \ 240 * "connection" information \ 241 */ \ 242 if ((cp)->conn_ipversion == IPV4_VERSION) \ 243 (err) = (*cl_inet_connect2)( \ 244 (cp)->conn_netstack->netstack_stackid, \ 245 IPPROTO_UDP, is_outgoing, AF_INET, \ 246 (uint8_t *)&((cp)->conn_laddr_v4), \ 247 (cp)->conn_lport, \ 248 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 249 (in_port_t)(fport), NULL); \ 250 else \ 251 (err) = (*cl_inet_connect2)( \ 252 (cp)->conn_netstack->netstack_stackid, \ 253 IPPROTO_UDP, is_outgoing, AF_INET6, \ 254 (uint8_t *)&((cp)->conn_laddr_v6), \ 255 (cp)->conn_lport, \ 256 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 257 } 258 259 static struct module_info udp_mod_info = { 260 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 261 }; 262 263 /* 264 * Entry points for UDP as a device. 265 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 266 */ 267 static struct qinit udp_rinitv4 = { 268 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 269 }; 270 271 static struct qinit udp_rinitv6 = { 272 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 273 }; 274 275 static struct qinit udp_winit = { 276 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 277 }; 278 279 /* UDP entry point during fallback */ 280 struct qinit udp_fallback_sock_winit = { 281 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 282 }; 283 284 /* 285 * UDP needs to handle I_LINK and I_PLINK since ifconfig 286 * likes to use it as a place to hang the various streams. 287 */ 288 static struct qinit udp_lrinit = { 289 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 290 }; 291 292 static struct qinit udp_lwinit = { 293 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 294 }; 295 296 /* For AF_INET aka /dev/udp */ 297 struct streamtab udpinfov4 = { 298 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 299 }; 300 301 /* For AF_INET6 aka /dev/udp6 */ 302 struct streamtab udpinfov6 = { 303 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 304 }; 305 306 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 307 308 /* Default structure copied into T_INFO_ACK messages */ 309 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 310 T_INFO_ACK, 311 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 312 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 313 T_INVALID, /* CDATA_size. udp does not support connect data. */ 314 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 315 sizeof (sin_t), /* ADDR_size. */ 316 0, /* OPT_size - not initialized here */ 317 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 318 T_CLTS, /* SERV_type. udp supports connection-less. */ 319 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 320 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 321 }; 322 323 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 324 325 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 326 T_INFO_ACK, 327 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 328 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 329 T_INVALID, /* CDATA_size. udp does not support connect data. */ 330 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 331 sizeof (sin6_t), /* ADDR_size. */ 332 0, /* OPT_size - not initialized here */ 333 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 334 T_CLTS, /* SERV_type. udp supports connection-less. */ 335 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 336 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 337 }; 338 339 /* largest UDP port number */ 340 #define UDP_MAX_PORT 65535 341 342 /* 343 * Table of ND variables supported by udp. These are loaded into us_nd 344 * in udp_open. 345 * All of these are alterable, within the min/max values given, at run time. 346 */ 347 /* BEGIN CSTYLED */ 348 udpparam_t udp_param_arr[] = { 349 /*min max value name */ 350 { 0L, 256, 32, "udp_wroff_extra" }, 351 { 1L, 255, 255, "udp_ipv4_ttl" }, 352 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 353 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 354 { 0, 1, 1, "udp_do_checksum" }, 355 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 356 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 357 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 358 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 359 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 360 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 361 { 0, 1, 0, "udp_pmtu_discovery" }, 362 { 0, 1, 0, "udp_sendto_ignerr" }, 363 }; 364 /* END CSTYLED */ 365 366 /* Setable in /etc/system */ 367 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 368 uint32_t udp_random_anon_port = 1; 369 370 /* 371 * Hook functions to enable cluster networking. 372 * On non-clustered systems these vectors must always be NULL 373 */ 374 375 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 376 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 377 void *args) = NULL; 378 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 379 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 380 void *args) = NULL; 381 382 typedef union T_primitives *t_primp_t; 383 384 /* 385 * Return the next anonymous port in the privileged port range for 386 * bind checking. 387 * 388 * Trusted Extension (TX) notes: TX allows administrator to mark or 389 * reserve ports as Multilevel ports (MLP). MLP has special function 390 * on TX systems. Once a port is made MLP, it's not available as 391 * ordinary port. This creates "holes" in the port name space. It 392 * may be necessary to skip the "holes" find a suitable anon port. 393 */ 394 static in_port_t 395 udp_get_next_priv_port(udp_t *udp) 396 { 397 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 398 in_port_t nextport; 399 boolean_t restart = B_FALSE; 400 udp_stack_t *us = udp->udp_us; 401 402 retry: 403 if (next_priv_port < us->us_min_anonpriv_port || 404 next_priv_port >= IPPORT_RESERVED) { 405 next_priv_port = IPPORT_RESERVED - 1; 406 if (restart) 407 return (0); 408 restart = B_TRUE; 409 } 410 411 if (is_system_labeled() && 412 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 413 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 414 next_priv_port = nextport; 415 goto retry; 416 } 417 418 return (next_priv_port--); 419 } 420 421 /* 422 * Hash list removal routine for udp_t structures. 423 */ 424 static void 425 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 426 { 427 udp_t *udpnext; 428 kmutex_t *lockp; 429 udp_stack_t *us = udp->udp_us; 430 conn_t *connp = udp->udp_connp; 431 432 if (udp->udp_ptpbhn == NULL) 433 return; 434 435 /* 436 * Extract the lock pointer in case there are concurrent 437 * hash_remove's for this instance. 438 */ 439 ASSERT(connp->conn_lport != 0); 440 if (!caller_holds_lock) { 441 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 442 us->us_bind_fanout_size)].uf_lock; 443 ASSERT(lockp != NULL); 444 mutex_enter(lockp); 445 } 446 if (udp->udp_ptpbhn != NULL) { 447 udpnext = udp->udp_bind_hash; 448 if (udpnext != NULL) { 449 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 450 udp->udp_bind_hash = NULL; 451 } 452 *udp->udp_ptpbhn = udpnext; 453 udp->udp_ptpbhn = NULL; 454 } 455 if (!caller_holds_lock) { 456 mutex_exit(lockp); 457 } 458 } 459 460 static void 461 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 462 { 463 conn_t *connp = udp->udp_connp; 464 udp_t **udpp; 465 udp_t *udpnext; 466 conn_t *connext; 467 468 ASSERT(MUTEX_HELD(&uf->uf_lock)); 469 ASSERT(udp->udp_ptpbhn == NULL); 470 udpp = &uf->uf_udp; 471 udpnext = udpp[0]; 472 if (udpnext != NULL) { 473 /* 474 * If the new udp bound to the INADDR_ANY address 475 * and the first one in the list is not bound to 476 * INADDR_ANY we skip all entries until we find the 477 * first one bound to INADDR_ANY. 478 * This makes sure that applications binding to a 479 * specific address get preference over those binding to 480 * INADDR_ANY. 481 */ 482 connext = udpnext->udp_connp; 483 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 484 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 485 while ((udpnext = udpp[0]) != NULL && 486 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 487 udpp = &(udpnext->udp_bind_hash); 488 } 489 if (udpnext != NULL) 490 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 491 } else { 492 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 493 } 494 } 495 udp->udp_bind_hash = udpnext; 496 udp->udp_ptpbhn = udpp; 497 udpp[0] = udp; 498 } 499 500 /* 501 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 502 * passed to udp_wput. 503 * It associates a port number and local address with the stream. 504 * It calls IP to verify the local IP address, and calls IP to insert 505 * the conn_t in the fanout table. 506 * If everything is ok it then sends the T_BIND_ACK back up. 507 * 508 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 509 * without setting SO_REUSEADDR. This is needed so that they 510 * can be viewed as two independent transport protocols. 511 * However, anonymouns ports are allocated from the same range to avoid 512 * duplicating the us->us_next_port_to_try. 513 */ 514 static void 515 udp_tpi_bind(queue_t *q, mblk_t *mp) 516 { 517 sin_t *sin; 518 sin6_t *sin6; 519 mblk_t *mp1; 520 struct T_bind_req *tbr; 521 conn_t *connp; 522 udp_t *udp; 523 int error; 524 struct sockaddr *sa; 525 cred_t *cr; 526 527 /* 528 * All Solaris components should pass a db_credp 529 * for this TPI message, hence we ASSERT. 530 * But in case there is some other M_PROTO that looks 531 * like a TPI message sent by some other kernel 532 * component, we check and return an error. 533 */ 534 cr = msg_getcred(mp, NULL); 535 ASSERT(cr != NULL); 536 if (cr == NULL) { 537 udp_err_ack(q, mp, TSYSERR, EINVAL); 538 return; 539 } 540 541 connp = Q_TO_CONN(q); 542 udp = connp->conn_udp; 543 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 544 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 545 "udp_bind: bad req, len %u", 546 (uint_t)(mp->b_wptr - mp->b_rptr)); 547 udp_err_ack(q, mp, TPROTO, 0); 548 return; 549 } 550 if (udp->udp_state != TS_UNBND) { 551 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 552 "udp_bind: bad state, %u", udp->udp_state); 553 udp_err_ack(q, mp, TOUTSTATE, 0); 554 return; 555 } 556 /* 557 * Reallocate the message to make sure we have enough room for an 558 * address. 559 */ 560 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 561 if (mp1 == NULL) { 562 udp_err_ack(q, mp, TSYSERR, ENOMEM); 563 return; 564 } 565 566 mp = mp1; 567 568 /* Reset the message type in preparation for shipping it back. */ 569 DB_TYPE(mp) = M_PCPROTO; 570 571 tbr = (struct T_bind_req *)mp->b_rptr; 572 switch (tbr->ADDR_length) { 573 case 0: /* Request for a generic port */ 574 tbr->ADDR_offset = sizeof (struct T_bind_req); 575 if (connp->conn_family == AF_INET) { 576 tbr->ADDR_length = sizeof (sin_t); 577 sin = (sin_t *)&tbr[1]; 578 *sin = sin_null; 579 sin->sin_family = AF_INET; 580 mp->b_wptr = (uchar_t *)&sin[1]; 581 sa = (struct sockaddr *)sin; 582 } else { 583 ASSERT(connp->conn_family == AF_INET6); 584 tbr->ADDR_length = sizeof (sin6_t); 585 sin6 = (sin6_t *)&tbr[1]; 586 *sin6 = sin6_null; 587 sin6->sin6_family = AF_INET6; 588 mp->b_wptr = (uchar_t *)&sin6[1]; 589 sa = (struct sockaddr *)sin6; 590 } 591 break; 592 593 case sizeof (sin_t): /* Complete IPv4 address */ 594 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 595 sizeof (sin_t)); 596 if (sa == NULL || !OK_32PTR((char *)sa)) { 597 udp_err_ack(q, mp, TSYSERR, EINVAL); 598 return; 599 } 600 if (connp->conn_family != AF_INET || 601 sa->sa_family != AF_INET) { 602 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 603 return; 604 } 605 break; 606 607 case sizeof (sin6_t): /* complete IPv6 address */ 608 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 609 sizeof (sin6_t)); 610 if (sa == NULL || !OK_32PTR((char *)sa)) { 611 udp_err_ack(q, mp, TSYSERR, EINVAL); 612 return; 613 } 614 if (connp->conn_family != AF_INET6 || 615 sa->sa_family != AF_INET6) { 616 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 617 return; 618 } 619 break; 620 621 default: /* Invalid request */ 622 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 623 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 624 udp_err_ack(q, mp, TBADADDR, 0); 625 return; 626 } 627 628 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 629 tbr->PRIM_type != O_T_BIND_REQ); 630 631 if (error != 0) { 632 if (error > 0) { 633 udp_err_ack(q, mp, TSYSERR, error); 634 } else { 635 udp_err_ack(q, mp, -error, 0); 636 } 637 } else { 638 tbr->PRIM_type = T_BIND_ACK; 639 qreply(q, mp); 640 } 641 } 642 643 /* 644 * This routine handles each T_CONN_REQ message passed to udp. It 645 * associates a default destination address with the stream. 646 * 647 * After various error checks are completed, udp_connect() lays 648 * the target address and port into the composite header template. 649 * Then we ask IP for information, including a source address if we didn't 650 * already have one. Finally we send up the T_OK_ACK reply message. 651 */ 652 static void 653 udp_tpi_connect(queue_t *q, mblk_t *mp) 654 { 655 conn_t *connp = Q_TO_CONN(q); 656 int error; 657 socklen_t len; 658 struct sockaddr *sa; 659 struct T_conn_req *tcr; 660 cred_t *cr; 661 pid_t pid; 662 /* 663 * All Solaris components should pass a db_credp 664 * for this TPI message, hence we ASSERT. 665 * But in case there is some other M_PROTO that looks 666 * like a TPI message sent by some other kernel 667 * component, we check and return an error. 668 */ 669 cr = msg_getcred(mp, &pid); 670 ASSERT(cr != NULL); 671 if (cr == NULL) { 672 udp_err_ack(q, mp, TSYSERR, EINVAL); 673 return; 674 } 675 676 tcr = (struct T_conn_req *)mp->b_rptr; 677 678 /* A bit of sanity checking */ 679 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 680 udp_err_ack(q, mp, TPROTO, 0); 681 return; 682 } 683 684 if (tcr->OPT_length != 0) { 685 udp_err_ack(q, mp, TBADOPT, 0); 686 return; 687 } 688 689 /* 690 * Determine packet type based on type of address passed in 691 * the request should contain an IPv4 or IPv6 address. 692 * Make sure that address family matches the type of 693 * family of the address passed down. 694 */ 695 len = tcr->DEST_length; 696 switch (tcr->DEST_length) { 697 default: 698 udp_err_ack(q, mp, TBADADDR, 0); 699 return; 700 701 case sizeof (sin_t): 702 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 703 sizeof (sin_t)); 704 break; 705 706 case sizeof (sin6_t): 707 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 708 sizeof (sin6_t)); 709 break; 710 } 711 712 error = proto_verify_ip_addr(connp->conn_family, sa, len); 713 if (error != 0) { 714 udp_err_ack(q, mp, TSYSERR, error); 715 return; 716 } 717 718 error = udp_do_connect(connp, sa, len, cr, pid); 719 if (error != 0) { 720 if (error < 0) 721 udp_err_ack(q, mp, -error, 0); 722 else 723 udp_err_ack(q, mp, TSYSERR, error); 724 } else { 725 mblk_t *mp1; 726 /* 727 * We have to send a connection confirmation to 728 * keep TLI happy. 729 */ 730 if (connp->conn_family == AF_INET) { 731 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 732 sizeof (sin_t), NULL, 0); 733 } else { 734 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 735 sizeof (sin6_t), NULL, 0); 736 } 737 if (mp1 == NULL) { 738 udp_err_ack(q, mp, TSYSERR, ENOMEM); 739 return; 740 } 741 742 /* 743 * Send ok_ack for T_CONN_REQ 744 */ 745 mp = mi_tpi_ok_ack_alloc(mp); 746 if (mp == NULL) { 747 /* Unable to reuse the T_CONN_REQ for the ack. */ 748 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 749 return; 750 } 751 752 putnext(connp->conn_rq, mp); 753 putnext(connp->conn_rq, mp1); 754 } 755 } 756 757 static int 758 udp_tpi_close(queue_t *q, int flags) 759 { 760 conn_t *connp; 761 762 if (flags & SO_FALLBACK) { 763 /* 764 * stream is being closed while in fallback 765 * simply free the resources that were allocated 766 */ 767 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 768 qprocsoff(q); 769 goto done; 770 } 771 772 connp = Q_TO_CONN(q); 773 udp_do_close(connp); 774 done: 775 q->q_ptr = WR(q)->q_ptr = NULL; 776 return (0); 777 } 778 779 static void 780 udp_close_free(conn_t *connp) 781 { 782 udp_t *udp = connp->conn_udp; 783 784 /* If there are any options associated with the stream, free them. */ 785 if (udp->udp_recv_ipp.ipp_fields != 0) 786 ip_pkt_free(&udp->udp_recv_ipp); 787 788 /* 789 * Clear any fields which the kmem_cache constructor clears. 790 * Only udp_connp needs to be preserved. 791 * TBD: We should make this more efficient to avoid clearing 792 * everything. 793 */ 794 ASSERT(udp->udp_connp == connp); 795 bzero(udp, sizeof (udp_t)); 796 udp->udp_connp = connp; 797 } 798 799 static int 800 udp_do_disconnect(conn_t *connp) 801 { 802 udp_t *udp; 803 udp_fanout_t *udpf; 804 udp_stack_t *us; 805 int error; 806 807 udp = connp->conn_udp; 808 us = udp->udp_us; 809 mutex_enter(&connp->conn_lock); 810 if (udp->udp_state != TS_DATA_XFER) { 811 mutex_exit(&connp->conn_lock); 812 return (-TOUTSTATE); 813 } 814 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 815 us->us_bind_fanout_size)]; 816 mutex_enter(&udpf->uf_lock); 817 if (connp->conn_mcbc_bind) 818 connp->conn_saddr_v6 = ipv6_all_zeros; 819 else 820 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 821 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 822 connp->conn_faddr_v6 = ipv6_all_zeros; 823 connp->conn_fport = 0; 824 udp->udp_state = TS_IDLE; 825 mutex_exit(&udpf->uf_lock); 826 827 /* Remove any remnants of mapped address binding */ 828 if (connp->conn_family == AF_INET6) 829 connp->conn_ipversion = IPV6_VERSION; 830 831 connp->conn_v6lastdst = ipv6_all_zeros; 832 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 833 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 834 mutex_exit(&connp->conn_lock); 835 if (error != 0) 836 return (error); 837 838 /* 839 * Tell IP to remove the full binding and revert 840 * to the local address binding. 841 */ 842 return (ip_laddr_fanout_insert(connp)); 843 } 844 845 static void 846 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 847 { 848 conn_t *connp = Q_TO_CONN(q); 849 int error; 850 851 /* 852 * Allocate the largest primitive we need to send back 853 * T_error_ack is > than T_ok_ack 854 */ 855 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 856 if (mp == NULL) { 857 /* Unable to reuse the T_DISCON_REQ for the ack. */ 858 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 859 return; 860 } 861 862 error = udp_do_disconnect(connp); 863 864 if (error != 0) { 865 if (error < 0) { 866 udp_err_ack(q, mp, -error, 0); 867 } else { 868 udp_err_ack(q, mp, TSYSERR, error); 869 } 870 } else { 871 mp = mi_tpi_ok_ack_alloc(mp); 872 ASSERT(mp != NULL); 873 qreply(q, mp); 874 } 875 } 876 877 int 878 udp_disconnect(conn_t *connp) 879 { 880 int error; 881 882 connp->conn_dgram_errind = B_FALSE; 883 error = udp_do_disconnect(connp); 884 if (error < 0) 885 error = proto_tlitosyserr(-error); 886 887 return (error); 888 } 889 890 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 891 static void 892 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 893 { 894 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 895 qreply(q, mp); 896 } 897 898 /* Shorthand to generate and send TPI error acks to our client */ 899 static void 900 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 901 t_scalar_t t_error, int sys_error) 902 { 903 struct T_error_ack *teackp; 904 905 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 906 M_PCPROTO, T_ERROR_ACK)) != NULL) { 907 teackp = (struct T_error_ack *)mp->b_rptr; 908 teackp->ERROR_prim = primitive; 909 teackp->TLI_error = t_error; 910 teackp->UNIX_error = sys_error; 911 qreply(q, mp); 912 } 913 } 914 915 /*ARGSUSED2*/ 916 static int 917 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 918 { 919 int i; 920 udp_t *udp = Q_TO_UDP(q); 921 udp_stack_t *us = udp->udp_us; 922 923 for (i = 0; i < us->us_num_epriv_ports; i++) { 924 if (us->us_epriv_ports[i] != 0) 925 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 926 } 927 return (0); 928 } 929 930 /* ARGSUSED1 */ 931 static int 932 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 933 cred_t *cr) 934 { 935 long new_value; 936 int i; 937 udp_t *udp = Q_TO_UDP(q); 938 udp_stack_t *us = udp->udp_us; 939 940 /* 941 * Fail the request if the new value does not lie within the 942 * port number limits. 943 */ 944 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 945 new_value <= 0 || new_value >= 65536) { 946 return (EINVAL); 947 } 948 949 /* Check if the value is already in the list */ 950 for (i = 0; i < us->us_num_epriv_ports; i++) { 951 if (new_value == us->us_epriv_ports[i]) { 952 return (EEXIST); 953 } 954 } 955 /* Find an empty slot */ 956 for (i = 0; i < us->us_num_epriv_ports; i++) { 957 if (us->us_epriv_ports[i] == 0) 958 break; 959 } 960 if (i == us->us_num_epriv_ports) { 961 return (EOVERFLOW); 962 } 963 964 /* Set the new value */ 965 us->us_epriv_ports[i] = (in_port_t)new_value; 966 return (0); 967 } 968 969 /* ARGSUSED1 */ 970 static int 971 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 972 cred_t *cr) 973 { 974 long new_value; 975 int i; 976 udp_t *udp = Q_TO_UDP(q); 977 udp_stack_t *us = udp->udp_us; 978 979 /* 980 * Fail the request if the new value does not lie within the 981 * port number limits. 982 */ 983 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 984 new_value <= 0 || new_value >= 65536) { 985 return (EINVAL); 986 } 987 988 /* Check that the value is already in the list */ 989 for (i = 0; i < us->us_num_epriv_ports; i++) { 990 if (us->us_epriv_ports[i] == new_value) 991 break; 992 } 993 if (i == us->us_num_epriv_ports) { 994 return (ESRCH); 995 } 996 997 /* Clear the value */ 998 us->us_epriv_ports[i] = 0; 999 return (0); 1000 } 1001 1002 /* At minimum we need 4 bytes of UDP header */ 1003 #define ICMP_MIN_UDP_HDR 4 1004 1005 /* 1006 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 1007 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1008 * Assumes that IP has pulled up everything up to and including the ICMP header. 1009 */ 1010 /* ARGSUSED2 */ 1011 static void 1012 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1013 { 1014 conn_t *connp = (conn_t *)arg1; 1015 icmph_t *icmph; 1016 ipha_t *ipha; 1017 int iph_hdr_length; 1018 udpha_t *udpha; 1019 sin_t sin; 1020 sin6_t sin6; 1021 mblk_t *mp1; 1022 int error = 0; 1023 udp_t *udp = connp->conn_udp; 1024 1025 ipha = (ipha_t *)mp->b_rptr; 1026 1027 ASSERT(OK_32PTR(mp->b_rptr)); 1028 1029 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1030 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1031 udp_icmp_error_ipv6(connp, mp, ira); 1032 return; 1033 } 1034 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1035 1036 /* Skip past the outer IP and ICMP headers */ 1037 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 1038 iph_hdr_length = ira->ira_ip_hdr_length; 1039 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1040 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1041 1042 /* Skip past the inner IP and find the ULP header */ 1043 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1044 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1045 1046 switch (icmph->icmph_type) { 1047 case ICMP_DEST_UNREACHABLE: 1048 switch (icmph->icmph_code) { 1049 case ICMP_FRAGMENTATION_NEEDED: { 1050 ipha_t *ipha; 1051 ip_xmit_attr_t *ixa; 1052 /* 1053 * IP has already adjusted the path MTU. 1054 * But we need to adjust DF for IPv4. 1055 */ 1056 if (connp->conn_ipversion != IPV4_VERSION) 1057 break; 1058 1059 ixa = conn_get_ixa(connp, B_FALSE); 1060 if (ixa == NULL || ixa->ixa_ire == NULL) { 1061 /* 1062 * Some other thread holds conn_ixa. We will 1063 * redo this on the next ICMP too big. 1064 */ 1065 if (ixa != NULL) 1066 ixa_refrele(ixa); 1067 break; 1068 } 1069 (void) ip_get_pmtu(ixa); 1070 1071 mutex_enter(&connp->conn_lock); 1072 ipha = (ipha_t *)connp->conn_ht_iphc; 1073 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1074 ipha->ipha_fragment_offset_and_flags |= 1075 IPH_DF_HTONS; 1076 } else { 1077 ipha->ipha_fragment_offset_and_flags &= 1078 ~IPH_DF_HTONS; 1079 } 1080 mutex_exit(&connp->conn_lock); 1081 ixa_refrele(ixa); 1082 break; 1083 } 1084 case ICMP_PORT_UNREACHABLE: 1085 case ICMP_PROTOCOL_UNREACHABLE: 1086 error = ECONNREFUSED; 1087 break; 1088 default: 1089 /* Transient errors */ 1090 break; 1091 } 1092 break; 1093 default: 1094 /* Transient errors */ 1095 break; 1096 } 1097 if (error == 0) { 1098 freemsg(mp); 1099 return; 1100 } 1101 1102 /* 1103 * Deliver T_UDERROR_IND when the application has asked for it. 1104 * The socket layer enables this automatically when connected. 1105 */ 1106 if (!connp->conn_dgram_errind) { 1107 freemsg(mp); 1108 return; 1109 } 1110 1111 switch (connp->conn_family) { 1112 case AF_INET: 1113 sin = sin_null; 1114 sin.sin_family = AF_INET; 1115 sin.sin_addr.s_addr = ipha->ipha_dst; 1116 sin.sin_port = udpha->uha_dst_port; 1117 if (IPCL_IS_NONSTR(connp)) { 1118 mutex_enter(&connp->conn_lock); 1119 if (udp->udp_state == TS_DATA_XFER) { 1120 if (sin.sin_port == connp->conn_fport && 1121 sin.sin_addr.s_addr == 1122 connp->conn_faddr_v4) { 1123 mutex_exit(&connp->conn_lock); 1124 (*connp->conn_upcalls->su_set_error) 1125 (connp->conn_upper_handle, error); 1126 goto done; 1127 } 1128 } else { 1129 udp->udp_delayed_error = error; 1130 *((sin_t *)&udp->udp_delayed_addr) = sin; 1131 } 1132 mutex_exit(&connp->conn_lock); 1133 } else { 1134 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1135 NULL, 0, error); 1136 if (mp1 != NULL) 1137 putnext(connp->conn_rq, mp1); 1138 } 1139 break; 1140 case AF_INET6: 1141 sin6 = sin6_null; 1142 sin6.sin6_family = AF_INET6; 1143 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1144 sin6.sin6_port = udpha->uha_dst_port; 1145 if (IPCL_IS_NONSTR(connp)) { 1146 mutex_enter(&connp->conn_lock); 1147 if (udp->udp_state == TS_DATA_XFER) { 1148 if (sin6.sin6_port == connp->conn_fport && 1149 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1150 &connp->conn_faddr_v6)) { 1151 mutex_exit(&connp->conn_lock); 1152 (*connp->conn_upcalls->su_set_error) 1153 (connp->conn_upper_handle, error); 1154 goto done; 1155 } 1156 } else { 1157 udp->udp_delayed_error = error; 1158 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1159 } 1160 mutex_exit(&connp->conn_lock); 1161 } else { 1162 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1163 NULL, 0, error); 1164 if (mp1 != NULL) 1165 putnext(connp->conn_rq, mp1); 1166 } 1167 break; 1168 } 1169 done: 1170 freemsg(mp); 1171 } 1172 1173 /* 1174 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1175 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1176 * Assumes that IP has pulled up all the extension headers as well as the 1177 * ICMPv6 header. 1178 */ 1179 static void 1180 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1181 { 1182 icmp6_t *icmp6; 1183 ip6_t *ip6h, *outer_ip6h; 1184 uint16_t iph_hdr_length; 1185 uint8_t *nexthdrp; 1186 udpha_t *udpha; 1187 sin6_t sin6; 1188 mblk_t *mp1; 1189 int error = 0; 1190 udp_t *udp = connp->conn_udp; 1191 udp_stack_t *us = udp->udp_us; 1192 1193 outer_ip6h = (ip6_t *)mp->b_rptr; 1194 #ifdef DEBUG 1195 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1196 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1197 else 1198 iph_hdr_length = IPV6_HDR_LEN; 1199 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1200 #endif 1201 /* Skip past the outer IP and ICMP headers */ 1202 iph_hdr_length = ira->ira_ip_hdr_length; 1203 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1204 1205 /* Skip past the inner IP and find the ULP header */ 1206 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1207 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1208 freemsg(mp); 1209 return; 1210 } 1211 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1212 1213 switch (icmp6->icmp6_type) { 1214 case ICMP6_DST_UNREACH: 1215 switch (icmp6->icmp6_code) { 1216 case ICMP6_DST_UNREACH_NOPORT: 1217 error = ECONNREFUSED; 1218 break; 1219 case ICMP6_DST_UNREACH_ADMIN: 1220 case ICMP6_DST_UNREACH_NOROUTE: 1221 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1222 case ICMP6_DST_UNREACH_ADDR: 1223 /* Transient errors */ 1224 break; 1225 default: 1226 break; 1227 } 1228 break; 1229 case ICMP6_PACKET_TOO_BIG: { 1230 struct T_unitdata_ind *tudi; 1231 struct T_opthdr *toh; 1232 size_t udi_size; 1233 mblk_t *newmp; 1234 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1235 sizeof (struct ip6_mtuinfo); 1236 sin6_t *sin6; 1237 struct ip6_mtuinfo *mtuinfo; 1238 1239 /* 1240 * If the application has requested to receive path mtu 1241 * information, send up an empty message containing an 1242 * IPV6_PATHMTU ancillary data item. 1243 */ 1244 if (!connp->conn_ipv6_recvpathmtu) 1245 break; 1246 1247 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1248 opt_length; 1249 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1250 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1251 break; 1252 } 1253 1254 /* 1255 * newmp->b_cont is left to NULL on purpose. This is an 1256 * empty message containing only ancillary data. 1257 */ 1258 newmp->b_datap->db_type = M_PROTO; 1259 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1260 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1261 tudi->PRIM_type = T_UNITDATA_IND; 1262 tudi->SRC_length = sizeof (sin6_t); 1263 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1264 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1265 tudi->OPT_length = opt_length; 1266 1267 sin6 = (sin6_t *)&tudi[1]; 1268 bzero(sin6, sizeof (sin6_t)); 1269 sin6->sin6_family = AF_INET6; 1270 sin6->sin6_addr = connp->conn_faddr_v6; 1271 1272 toh = (struct T_opthdr *)&sin6[1]; 1273 toh->level = IPPROTO_IPV6; 1274 toh->name = IPV6_PATHMTU; 1275 toh->len = opt_length; 1276 toh->status = 0; 1277 1278 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1279 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1280 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1281 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1282 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1283 /* 1284 * We've consumed everything we need from the original 1285 * message. Free it, then send our empty message. 1286 */ 1287 freemsg(mp); 1288 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1289 return; 1290 } 1291 case ICMP6_TIME_EXCEEDED: 1292 /* Transient errors */ 1293 break; 1294 case ICMP6_PARAM_PROB: 1295 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1296 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1297 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1298 (uchar_t *)nexthdrp) { 1299 error = ECONNREFUSED; 1300 break; 1301 } 1302 break; 1303 } 1304 if (error == 0) { 1305 freemsg(mp); 1306 return; 1307 } 1308 1309 /* 1310 * Deliver T_UDERROR_IND when the application has asked for it. 1311 * The socket layer enables this automatically when connected. 1312 */ 1313 if (!connp->conn_dgram_errind) { 1314 freemsg(mp); 1315 return; 1316 } 1317 1318 sin6 = sin6_null; 1319 sin6.sin6_family = AF_INET6; 1320 sin6.sin6_addr = ip6h->ip6_dst; 1321 sin6.sin6_port = udpha->uha_dst_port; 1322 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1323 1324 if (IPCL_IS_NONSTR(connp)) { 1325 mutex_enter(&connp->conn_lock); 1326 if (udp->udp_state == TS_DATA_XFER) { 1327 if (sin6.sin6_port == connp->conn_fport && 1328 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1329 &connp->conn_faddr_v6)) { 1330 mutex_exit(&connp->conn_lock); 1331 (*connp->conn_upcalls->su_set_error) 1332 (connp->conn_upper_handle, error); 1333 goto done; 1334 } 1335 } else { 1336 udp->udp_delayed_error = error; 1337 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1338 } 1339 mutex_exit(&connp->conn_lock); 1340 } else { 1341 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1342 NULL, 0, error); 1343 if (mp1 != NULL) 1344 putnext(connp->conn_rq, mp1); 1345 } 1346 done: 1347 freemsg(mp); 1348 } 1349 1350 /* 1351 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1352 * The local address is filled in if endpoint is bound. The remote address 1353 * is filled in if remote address has been precified ("connected endpoint") 1354 * (The concept of connected CLTS sockets is alien to published TPI 1355 * but we support it anyway). 1356 */ 1357 static void 1358 udp_addr_req(queue_t *q, mblk_t *mp) 1359 { 1360 struct sockaddr *sa; 1361 mblk_t *ackmp; 1362 struct T_addr_ack *taa; 1363 udp_t *udp = Q_TO_UDP(q); 1364 conn_t *connp = udp->udp_connp; 1365 uint_t addrlen; 1366 1367 /* Make it large enough for worst case */ 1368 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1369 2 * sizeof (sin6_t), 1); 1370 if (ackmp == NULL) { 1371 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1372 return; 1373 } 1374 taa = (struct T_addr_ack *)ackmp->b_rptr; 1375 1376 bzero(taa, sizeof (struct T_addr_ack)); 1377 ackmp->b_wptr = (uchar_t *)&taa[1]; 1378 1379 taa->PRIM_type = T_ADDR_ACK; 1380 ackmp->b_datap->db_type = M_PCPROTO; 1381 1382 if (connp->conn_family == AF_INET) 1383 addrlen = sizeof (sin_t); 1384 else 1385 addrlen = sizeof (sin6_t); 1386 1387 mutex_enter(&connp->conn_lock); 1388 /* 1389 * Note: Following code assumes 32 bit alignment of basic 1390 * data structures like sin_t and struct T_addr_ack. 1391 */ 1392 if (udp->udp_state != TS_UNBND) { 1393 /* 1394 * Fill in local address first 1395 */ 1396 taa->LOCADDR_offset = sizeof (*taa); 1397 taa->LOCADDR_length = addrlen; 1398 sa = (struct sockaddr *)&taa[1]; 1399 (void) conn_getsockname(connp, sa, &addrlen); 1400 ackmp->b_wptr += addrlen; 1401 } 1402 if (udp->udp_state == TS_DATA_XFER) { 1403 /* 1404 * connected, fill remote address too 1405 */ 1406 taa->REMADDR_length = addrlen; 1407 /* assumed 32-bit alignment */ 1408 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1409 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1410 (void) conn_getpeername(connp, sa, &addrlen); 1411 ackmp->b_wptr += addrlen; 1412 } 1413 mutex_exit(&connp->conn_lock); 1414 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1415 qreply(q, ackmp); 1416 } 1417 1418 static void 1419 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1420 { 1421 conn_t *connp = udp->udp_connp; 1422 1423 if (connp->conn_family == AF_INET) { 1424 *tap = udp_g_t_info_ack_ipv4; 1425 } else { 1426 *tap = udp_g_t_info_ack_ipv6; 1427 } 1428 tap->CURRENT_state = udp->udp_state; 1429 tap->OPT_size = udp_max_optsize; 1430 } 1431 1432 static void 1433 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1434 t_uscalar_t cap_bits1) 1435 { 1436 tcap->CAP_bits1 = 0; 1437 1438 if (cap_bits1 & TC1_INFO) { 1439 udp_copy_info(&tcap->INFO_ack, udp); 1440 tcap->CAP_bits1 |= TC1_INFO; 1441 } 1442 } 1443 1444 /* 1445 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1446 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1447 * udp_g_t_info_ack. The current state of the stream is copied from 1448 * udp_state. 1449 */ 1450 static void 1451 udp_capability_req(queue_t *q, mblk_t *mp) 1452 { 1453 t_uscalar_t cap_bits1; 1454 struct T_capability_ack *tcap; 1455 udp_t *udp = Q_TO_UDP(q); 1456 1457 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1458 1459 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1460 mp->b_datap->db_type, T_CAPABILITY_ACK); 1461 if (!mp) 1462 return; 1463 1464 tcap = (struct T_capability_ack *)mp->b_rptr; 1465 udp_do_capability_ack(udp, tcap, cap_bits1); 1466 1467 qreply(q, mp); 1468 } 1469 1470 /* 1471 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1472 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1473 * The current state of the stream is copied from udp_state. 1474 */ 1475 static void 1476 udp_info_req(queue_t *q, mblk_t *mp) 1477 { 1478 udp_t *udp = Q_TO_UDP(q); 1479 1480 /* Create a T_INFO_ACK message. */ 1481 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1482 T_INFO_ACK); 1483 if (!mp) 1484 return; 1485 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1486 qreply(q, mp); 1487 } 1488 1489 /* For /dev/udp aka AF_INET open */ 1490 static int 1491 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1492 { 1493 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1494 } 1495 1496 /* For /dev/udp6 aka AF_INET6 open */ 1497 static int 1498 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1499 { 1500 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1501 } 1502 1503 /* 1504 * This is the open routine for udp. It allocates a udp_t structure for 1505 * the stream and, on the first open of the module, creates an ND table. 1506 */ 1507 static int 1508 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1509 boolean_t isv6) 1510 { 1511 udp_t *udp; 1512 conn_t *connp; 1513 dev_t conn_dev; 1514 vmem_t *minor_arena; 1515 int err; 1516 1517 /* If the stream is already open, return immediately. */ 1518 if (q->q_ptr != NULL) 1519 return (0); 1520 1521 if (sflag == MODOPEN) 1522 return (EINVAL); 1523 1524 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1525 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1526 minor_arena = ip_minor_arena_la; 1527 } else { 1528 /* 1529 * Either minor numbers in the large arena were exhausted 1530 * or a non socket application is doing the open. 1531 * Try to allocate from the small arena. 1532 */ 1533 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1534 return (EBUSY); 1535 1536 minor_arena = ip_minor_arena_sa; 1537 } 1538 1539 if (flag & SO_FALLBACK) { 1540 /* 1541 * Non streams socket needs a stream to fallback to 1542 */ 1543 RD(q)->q_ptr = (void *)conn_dev; 1544 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1545 WR(q)->q_ptr = (void *)minor_arena; 1546 qprocson(q); 1547 return (0); 1548 } 1549 1550 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1551 if (connp == NULL) { 1552 inet_minor_free(minor_arena, conn_dev); 1553 return (err); 1554 } 1555 udp = connp->conn_udp; 1556 1557 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1558 connp->conn_dev = conn_dev; 1559 connp->conn_minor_arena = minor_arena; 1560 1561 /* 1562 * Initialize the udp_t structure for this stream. 1563 */ 1564 q->q_ptr = connp; 1565 WR(q)->q_ptr = connp; 1566 connp->conn_rq = q; 1567 connp->conn_wq = WR(q); 1568 1569 /* 1570 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1571 * need to lock anything. 1572 */ 1573 ASSERT(connp->conn_proto == IPPROTO_UDP); 1574 ASSERT(connp->conn_udp == udp); 1575 ASSERT(udp->udp_connp == connp); 1576 1577 if (flag & SO_SOCKSTR) { 1578 udp->udp_issocket = B_TRUE; 1579 } 1580 1581 WR(q)->q_hiwat = connp->conn_sndbuf; 1582 WR(q)->q_lowat = connp->conn_sndlowat; 1583 1584 qprocson(q); 1585 1586 /* Set the Stream head write offset and high watermark. */ 1587 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1588 (void) proto_set_rx_hiwat(q, connp, 1589 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1590 1591 mutex_enter(&connp->conn_lock); 1592 connp->conn_state_flags &= ~CONN_INCIPIENT; 1593 mutex_exit(&connp->conn_lock); 1594 return (0); 1595 } 1596 1597 /* 1598 * Which UDP options OK to set through T_UNITDATA_REQ... 1599 */ 1600 /* ARGSUSED */ 1601 static boolean_t 1602 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1603 { 1604 return (B_TRUE); 1605 } 1606 1607 /* 1608 * This routine gets default values of certain options whose default 1609 * values are maintained by protcol specific code 1610 */ 1611 int 1612 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1613 { 1614 udp_t *udp = Q_TO_UDP(q); 1615 udp_stack_t *us = udp->udp_us; 1616 int *i1 = (int *)ptr; 1617 1618 switch (level) { 1619 case IPPROTO_IP: 1620 switch (name) { 1621 case IP_MULTICAST_TTL: 1622 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1623 return (sizeof (uchar_t)); 1624 case IP_MULTICAST_LOOP: 1625 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1626 return (sizeof (uchar_t)); 1627 } 1628 break; 1629 case IPPROTO_IPV6: 1630 switch (name) { 1631 case IPV6_MULTICAST_HOPS: 1632 *i1 = IP_DEFAULT_MULTICAST_TTL; 1633 return (sizeof (int)); 1634 case IPV6_MULTICAST_LOOP: 1635 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1636 return (sizeof (int)); 1637 case IPV6_UNICAST_HOPS: 1638 *i1 = us->us_ipv6_hoplimit; 1639 return (sizeof (int)); 1640 } 1641 break; 1642 } 1643 return (-1); 1644 } 1645 1646 /* 1647 * This routine retrieves the current status of socket options. 1648 * It returns the size of the option retrieved, or -1. 1649 */ 1650 int 1651 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1652 uchar_t *ptr) 1653 { 1654 int *i1 = (int *)ptr; 1655 udp_t *udp = connp->conn_udp; 1656 int len; 1657 conn_opt_arg_t coas; 1658 int retval; 1659 1660 coas.coa_connp = connp; 1661 coas.coa_ixa = connp->conn_ixa; 1662 coas.coa_ipp = &connp->conn_xmit_ipp; 1663 coas.coa_ancillary = B_FALSE; 1664 coas.coa_changed = 0; 1665 1666 /* 1667 * We assume that the optcom framework has checked for the set 1668 * of levels and names that are supported, hence we don't worry 1669 * about rejecting based on that. 1670 * First check for UDP specific handling, then pass to common routine. 1671 */ 1672 switch (level) { 1673 case IPPROTO_IP: 1674 /* 1675 * Only allow IPv4 option processing on IPv4 sockets. 1676 */ 1677 if (connp->conn_family != AF_INET) 1678 return (-1); 1679 1680 switch (name) { 1681 case IP_OPTIONS: 1682 case T_IP_OPTIONS: 1683 mutex_enter(&connp->conn_lock); 1684 if (!(udp->udp_recv_ipp.ipp_fields & 1685 IPPF_IPV4_OPTIONS)) { 1686 mutex_exit(&connp->conn_lock); 1687 return (0); 1688 } 1689 1690 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1691 ASSERT(len != 0); 1692 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1693 mutex_exit(&connp->conn_lock); 1694 return (len); 1695 } 1696 break; 1697 case IPPROTO_UDP: 1698 switch (name) { 1699 case UDP_NAT_T_ENDPOINT: 1700 mutex_enter(&connp->conn_lock); 1701 *i1 = udp->udp_nat_t_endpoint; 1702 mutex_exit(&connp->conn_lock); 1703 return (sizeof (int)); 1704 case UDP_RCVHDR: 1705 mutex_enter(&connp->conn_lock); 1706 *i1 = udp->udp_rcvhdr ? 1 : 0; 1707 mutex_exit(&connp->conn_lock); 1708 return (sizeof (int)); 1709 } 1710 } 1711 mutex_enter(&connp->conn_lock); 1712 retval = conn_opt_get(&coas, level, name, ptr); 1713 mutex_exit(&connp->conn_lock); 1714 return (retval); 1715 } 1716 1717 /* 1718 * This routine retrieves the current status of socket options. 1719 * It returns the size of the option retrieved, or -1. 1720 */ 1721 int 1722 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1723 { 1724 conn_t *connp = Q_TO_CONN(q); 1725 int err; 1726 1727 err = udp_opt_get(connp, level, name, ptr); 1728 return (err); 1729 } 1730 1731 /* 1732 * This routine sets socket options. 1733 */ 1734 int 1735 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1736 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1737 { 1738 conn_t *connp = coa->coa_connp; 1739 ip_xmit_attr_t *ixa = coa->coa_ixa; 1740 udp_t *udp = connp->conn_udp; 1741 udp_stack_t *us = udp->udp_us; 1742 int *i1 = (int *)invalp; 1743 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1744 int error; 1745 1746 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1747 /* 1748 * First do UDP specific sanity checks and handle UDP specific 1749 * options. Note that some IPPROTO_UDP options are handled 1750 * by conn_opt_set. 1751 */ 1752 switch (level) { 1753 case SOL_SOCKET: 1754 switch (name) { 1755 case SO_SNDBUF: 1756 if (*i1 > us->us_max_buf) { 1757 return (ENOBUFS); 1758 } 1759 break; 1760 case SO_RCVBUF: 1761 if (*i1 > us->us_max_buf) { 1762 return (ENOBUFS); 1763 } 1764 break; 1765 1766 case SCM_UCRED: { 1767 struct ucred_s *ucr; 1768 cred_t *newcr; 1769 ts_label_t *tsl; 1770 1771 /* 1772 * Only sockets that have proper privileges and are 1773 * bound to MLPs will have any other value here, so 1774 * this implicitly tests for privilege to set label. 1775 */ 1776 if (connp->conn_mlp_type == mlptSingle) 1777 break; 1778 1779 ucr = (struct ucred_s *)invalp; 1780 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1781 ucr->uc_labeloff < sizeof (*ucr) || 1782 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1783 return (EINVAL); 1784 if (!checkonly) { 1785 /* 1786 * Set ixa_tsl to the new label. 1787 * We assume that crgetzoneid doesn't change 1788 * as part of the SCM_UCRED. 1789 */ 1790 ASSERT(cr != NULL); 1791 if ((tsl = crgetlabel(cr)) == NULL) 1792 return (EINVAL); 1793 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1794 tsl->tsl_doi, KM_NOSLEEP); 1795 if (newcr == NULL) 1796 return (ENOSR); 1797 ASSERT(newcr->cr_label != NULL); 1798 /* 1799 * Move the hold on the cr_label to ixa_tsl by 1800 * setting cr_label to NULL. Then release newcr. 1801 */ 1802 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1803 ixa->ixa_flags |= IXAF_UCRED_TSL; 1804 newcr->cr_label = NULL; 1805 crfree(newcr); 1806 coa->coa_changed |= COA_HEADER_CHANGED; 1807 coa->coa_changed |= COA_WROFF_CHANGED; 1808 } 1809 /* Fully handled this option. */ 1810 return (0); 1811 } 1812 } 1813 break; 1814 case IPPROTO_UDP: 1815 switch (name) { 1816 case UDP_NAT_T_ENDPOINT: 1817 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1818 return (error); 1819 } 1820 1821 /* 1822 * Use conn_family instead so we can avoid ambiguitites 1823 * with AF_INET6 sockets that may switch from IPv4 1824 * to IPv6. 1825 */ 1826 if (connp->conn_family != AF_INET) { 1827 return (EAFNOSUPPORT); 1828 } 1829 1830 if (!checkonly) { 1831 mutex_enter(&connp->conn_lock); 1832 udp->udp_nat_t_endpoint = onoff; 1833 mutex_exit(&connp->conn_lock); 1834 coa->coa_changed |= COA_HEADER_CHANGED; 1835 coa->coa_changed |= COA_WROFF_CHANGED; 1836 } 1837 /* Fully handled this option. */ 1838 return (0); 1839 case UDP_RCVHDR: 1840 mutex_enter(&connp->conn_lock); 1841 udp->udp_rcvhdr = onoff; 1842 mutex_exit(&connp->conn_lock); 1843 return (0); 1844 } 1845 break; 1846 } 1847 error = conn_opt_set(coa, level, name, inlen, invalp, 1848 checkonly, cr); 1849 return (error); 1850 } 1851 1852 /* 1853 * This routine sets socket options. 1854 */ 1855 int 1856 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1857 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1858 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1859 { 1860 udp_t *udp = connp->conn_udp; 1861 int err; 1862 conn_opt_arg_t coas, *coa; 1863 boolean_t checkonly; 1864 udp_stack_t *us = udp->udp_us; 1865 1866 switch (optset_context) { 1867 case SETFN_OPTCOM_CHECKONLY: 1868 checkonly = B_TRUE; 1869 /* 1870 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1871 * inlen != 0 implies value supplied and 1872 * we have to "pretend" to set it. 1873 * inlen == 0 implies that there is no 1874 * value part in T_CHECK request and just validation 1875 * done elsewhere should be enough, we just return here. 1876 */ 1877 if (inlen == 0) { 1878 *outlenp = 0; 1879 return (0); 1880 } 1881 break; 1882 case SETFN_OPTCOM_NEGOTIATE: 1883 checkonly = B_FALSE; 1884 break; 1885 case SETFN_UD_NEGOTIATE: 1886 case SETFN_CONN_NEGOTIATE: 1887 checkonly = B_FALSE; 1888 /* 1889 * Negotiating local and "association-related" options 1890 * through T_UNITDATA_REQ. 1891 * 1892 * Following routine can filter out ones we do not 1893 * want to be "set" this way. 1894 */ 1895 if (!udp_opt_allow_udr_set(level, name)) { 1896 *outlenp = 0; 1897 return (EINVAL); 1898 } 1899 break; 1900 default: 1901 /* 1902 * We should never get here 1903 */ 1904 *outlenp = 0; 1905 return (EINVAL); 1906 } 1907 1908 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1909 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1910 1911 if (thisdg_attrs != NULL) { 1912 /* Options from T_UNITDATA_REQ */ 1913 coa = (conn_opt_arg_t *)thisdg_attrs; 1914 ASSERT(coa->coa_connp == connp); 1915 ASSERT(coa->coa_ixa != NULL); 1916 ASSERT(coa->coa_ipp != NULL); 1917 ASSERT(coa->coa_ancillary); 1918 } else { 1919 coa = &coas; 1920 coas.coa_connp = connp; 1921 /* Get a reference on conn_ixa to prevent concurrent mods */ 1922 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1923 if (coas.coa_ixa == NULL) { 1924 *outlenp = 0; 1925 return (ENOMEM); 1926 } 1927 coas.coa_ipp = &connp->conn_xmit_ipp; 1928 coas.coa_ancillary = B_FALSE; 1929 coas.coa_changed = 0; 1930 } 1931 1932 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1933 cr, checkonly); 1934 if (err != 0) { 1935 errout: 1936 if (!coa->coa_ancillary) 1937 ixa_refrele(coa->coa_ixa); 1938 *outlenp = 0; 1939 return (err); 1940 } 1941 /* Handle DHCPINIT here outside of lock */ 1942 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1943 uint_t ifindex; 1944 ill_t *ill; 1945 1946 ifindex = *(uint_t *)invalp; 1947 if (ifindex == 0) { 1948 ill = NULL; 1949 } else { 1950 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1951 coa->coa_ixa->ixa_ipst); 1952 if (ill == NULL) { 1953 err = ENXIO; 1954 goto errout; 1955 } 1956 1957 mutex_enter(&ill->ill_lock); 1958 if (ill->ill_state_flags & ILL_CONDEMNED) { 1959 mutex_exit(&ill->ill_lock); 1960 ill_refrele(ill); 1961 err = ENXIO; 1962 goto errout; 1963 } 1964 if (IS_VNI(ill)) { 1965 mutex_exit(&ill->ill_lock); 1966 ill_refrele(ill); 1967 err = EINVAL; 1968 goto errout; 1969 } 1970 } 1971 mutex_enter(&connp->conn_lock); 1972 1973 if (connp->conn_dhcpinit_ill != NULL) { 1974 /* 1975 * We've locked the conn so conn_cleanup_ill() 1976 * cannot clear conn_dhcpinit_ill -- so it's 1977 * safe to access the ill. 1978 */ 1979 ill_t *oill = connp->conn_dhcpinit_ill; 1980 1981 ASSERT(oill->ill_dhcpinit != 0); 1982 atomic_dec_32(&oill->ill_dhcpinit); 1983 ill_set_inputfn(connp->conn_dhcpinit_ill); 1984 connp->conn_dhcpinit_ill = NULL; 1985 } 1986 1987 if (ill != NULL) { 1988 connp->conn_dhcpinit_ill = ill; 1989 atomic_inc_32(&ill->ill_dhcpinit); 1990 ill_set_inputfn(ill); 1991 mutex_exit(&connp->conn_lock); 1992 mutex_exit(&ill->ill_lock); 1993 ill_refrele(ill); 1994 } else { 1995 mutex_exit(&connp->conn_lock); 1996 } 1997 } 1998 1999 /* 2000 * Common case of OK return with outval same as inval. 2001 */ 2002 if (invalp != outvalp) { 2003 /* don't trust bcopy for identical src/dst */ 2004 (void) bcopy(invalp, outvalp, inlen); 2005 } 2006 *outlenp = inlen; 2007 2008 /* 2009 * If this was not ancillary data, then we rebuild the headers, 2010 * update the IRE/NCE, and IPsec as needed. 2011 * Since the label depends on the destination we go through 2012 * ip_set_destination first. 2013 */ 2014 if (coa->coa_ancillary) { 2015 return (0); 2016 } 2017 2018 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2019 in6_addr_t saddr, faddr, nexthop; 2020 in_port_t fport; 2021 2022 /* 2023 * We clear lastdst to make sure we pick up the change 2024 * next time sending. 2025 * If we are connected we re-cache the information. 2026 * We ignore errors to preserve BSD behavior. 2027 * Note that we don't redo IPsec policy lookup here 2028 * since the final destination (or source) didn't change. 2029 */ 2030 mutex_enter(&connp->conn_lock); 2031 connp->conn_v6lastdst = ipv6_all_zeros; 2032 2033 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2034 &connp->conn_faddr_v6, &nexthop); 2035 saddr = connp->conn_saddr_v6; 2036 faddr = connp->conn_faddr_v6; 2037 fport = connp->conn_fport; 2038 mutex_exit(&connp->conn_lock); 2039 2040 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2041 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2042 (void) ip_attr_connect(connp, coa->coa_ixa, 2043 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2044 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2045 } 2046 } 2047 2048 ixa_refrele(coa->coa_ixa); 2049 2050 if (coa->coa_changed & COA_HEADER_CHANGED) { 2051 /* 2052 * Rebuild the header template if we are connected. 2053 * Otherwise clear conn_v6lastdst so we rebuild the header 2054 * in the data path. 2055 */ 2056 mutex_enter(&connp->conn_lock); 2057 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2058 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2059 err = udp_build_hdr_template(connp, 2060 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2061 connp->conn_fport, connp->conn_flowinfo); 2062 if (err != 0) { 2063 mutex_exit(&connp->conn_lock); 2064 return (err); 2065 } 2066 } else { 2067 connp->conn_v6lastdst = ipv6_all_zeros; 2068 } 2069 mutex_exit(&connp->conn_lock); 2070 } 2071 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2072 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2073 connp->conn_rcvbuf); 2074 } 2075 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2076 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2077 } 2078 if (coa->coa_changed & COA_WROFF_CHANGED) { 2079 /* Increase wroff if needed */ 2080 uint_t wroff; 2081 2082 mutex_enter(&connp->conn_lock); 2083 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2084 if (udp->udp_nat_t_endpoint) 2085 wroff += sizeof (uint32_t); 2086 if (wroff > connp->conn_wroff) { 2087 connp->conn_wroff = wroff; 2088 mutex_exit(&connp->conn_lock); 2089 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2090 } else { 2091 mutex_exit(&connp->conn_lock); 2092 } 2093 } 2094 return (err); 2095 } 2096 2097 /* This routine sets socket options. */ 2098 int 2099 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2100 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2101 void *thisdg_attrs, cred_t *cr) 2102 { 2103 conn_t *connp = Q_TO_CONN(q); 2104 int error; 2105 2106 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2107 outlenp, outvalp, thisdg_attrs, cr); 2108 return (error); 2109 } 2110 2111 /* 2112 * Setup IP and UDP headers. 2113 * Returns NULL on allocation failure, in which case data_mp is freed. 2114 */ 2115 mblk_t * 2116 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2117 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2118 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2119 { 2120 mblk_t *mp; 2121 udpha_t *udpha; 2122 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2123 uint_t data_len; 2124 uint32_t cksum; 2125 udp_t *udp = connp->conn_udp; 2126 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2127 uint_t ulp_hdr_len; 2128 2129 data_len = msgdsize(data_mp); 2130 ulp_hdr_len = UDPH_SIZE; 2131 if (insert_spi) 2132 ulp_hdr_len += sizeof (uint32_t); 2133 2134 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2135 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2136 if (mp == NULL) { 2137 ASSERT(*errorp != 0); 2138 return (NULL); 2139 } 2140 2141 data_len += ulp_hdr_len; 2142 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2143 2144 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2145 udpha->uha_src_port = connp->conn_lport; 2146 udpha->uha_dst_port = dstport; 2147 udpha->uha_checksum = 0; 2148 udpha->uha_length = htons(data_len); 2149 2150 /* 2151 * If there was a routing option/header then conn_prepend_hdr 2152 * has massaged it and placed the pseudo-header checksum difference 2153 * in the cksum argument. 2154 * 2155 * Setup header length and prepare for ULP checksum done in IP. 2156 * 2157 * We make it easy for IP to include our pseudo header 2158 * by putting our length in uha_checksum. 2159 * The IP source, destination, and length have already been set by 2160 * conn_prepend_hdr. 2161 */ 2162 cksum += data_len; 2163 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2164 ASSERT(cksum < 0x10000); 2165 2166 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2167 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2168 2169 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2170 2171 /* IP does the checksum if uha_checksum is non-zero */ 2172 if (us->us_do_checksum) { 2173 if (cksum == 0) 2174 udpha->uha_checksum = 0xffff; 2175 else 2176 udpha->uha_checksum = htons(cksum); 2177 } else { 2178 udpha->uha_checksum = 0; 2179 } 2180 } else { 2181 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2182 2183 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2184 if (cksum == 0) 2185 udpha->uha_checksum = 0xffff; 2186 else 2187 udpha->uha_checksum = htons(cksum); 2188 } 2189 2190 /* Insert all-0s SPI now. */ 2191 if (insert_spi) 2192 *((uint32_t *)(udpha + 1)) = 0; 2193 2194 return (mp); 2195 } 2196 2197 static int 2198 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2199 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2200 { 2201 udpha_t *udpha; 2202 int error; 2203 2204 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2205 /* 2206 * We clear lastdst to make sure we don't use the lastdst path 2207 * next time sending since we might not have set v6dst yet. 2208 */ 2209 connp->conn_v6lastdst = ipv6_all_zeros; 2210 2211 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2212 flowinfo); 2213 if (error != 0) 2214 return (error); 2215 2216 /* 2217 * Any routing header/option has been massaged. The checksum difference 2218 * is stored in conn_sum. 2219 */ 2220 udpha = (udpha_t *)connp->conn_ht_ulp; 2221 udpha->uha_src_port = connp->conn_lport; 2222 udpha->uha_dst_port = dstport; 2223 udpha->uha_checksum = 0; 2224 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2225 return (0); 2226 } 2227 2228 /* 2229 * This routine retrieves the value of an ND variable in a udpparam_t 2230 * structure. It is called through nd_getset when a user reads the 2231 * variable. 2232 */ 2233 /* ARGSUSED */ 2234 static int 2235 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2236 { 2237 udpparam_t *udppa = (udpparam_t *)cp; 2238 2239 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 2240 return (0); 2241 } 2242 2243 /* 2244 * Walk through the param array specified registering each element with the 2245 * named dispatch (ND) handler. 2246 */ 2247 static boolean_t 2248 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 2249 { 2250 for (; cnt-- > 0; udppa++) { 2251 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 2252 if (!nd_load(ndp, udppa->udp_param_name, 2253 udp_param_get, udp_param_set, 2254 (caddr_t)udppa)) { 2255 nd_free(ndp); 2256 return (B_FALSE); 2257 } 2258 } 2259 } 2260 if (!nd_load(ndp, "udp_extra_priv_ports", 2261 udp_extra_priv_ports_get, NULL, NULL)) { 2262 nd_free(ndp); 2263 return (B_FALSE); 2264 } 2265 if (!nd_load(ndp, "udp_extra_priv_ports_add", 2266 NULL, udp_extra_priv_ports_add, NULL)) { 2267 nd_free(ndp); 2268 return (B_FALSE); 2269 } 2270 if (!nd_load(ndp, "udp_extra_priv_ports_del", 2271 NULL, udp_extra_priv_ports_del, NULL)) { 2272 nd_free(ndp); 2273 return (B_FALSE); 2274 } 2275 return (B_TRUE); 2276 } 2277 2278 /* This routine sets an ND variable in a udpparam_t structure. */ 2279 /* ARGSUSED */ 2280 static int 2281 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2282 { 2283 long new_value; 2284 udpparam_t *udppa = (udpparam_t *)cp; 2285 2286 /* 2287 * Fail the request if the new value does not lie within the 2288 * required bounds. 2289 */ 2290 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2291 new_value < udppa->udp_param_min || 2292 new_value > udppa->udp_param_max) { 2293 return (EINVAL); 2294 } 2295 2296 /* Set the new value */ 2297 udppa->udp_param_value = new_value; 2298 return (0); 2299 } 2300 2301 static mblk_t * 2302 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2303 { 2304 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2305 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2306 /* 2307 * fallback has started but messages have not been moved yet 2308 */ 2309 if (udp->udp_fallback_queue_head == NULL) { 2310 ASSERT(udp->udp_fallback_queue_tail == NULL); 2311 udp->udp_fallback_queue_head = mp; 2312 udp->udp_fallback_queue_tail = mp; 2313 } else { 2314 ASSERT(udp->udp_fallback_queue_tail != NULL); 2315 udp->udp_fallback_queue_tail->b_next = mp; 2316 udp->udp_fallback_queue_tail = mp; 2317 } 2318 return (NULL); 2319 } else { 2320 /* 2321 * Fallback completed, let the caller putnext() the mblk. 2322 */ 2323 return (mp); 2324 } 2325 } 2326 2327 /* 2328 * Deliver data to ULP. In case we have a socket, and it's falling back to 2329 * TPI, then we'll queue the mp for later processing. 2330 */ 2331 static void 2332 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2333 { 2334 if (IPCL_IS_NONSTR(connp)) { 2335 udp_t *udp = connp->conn_udp; 2336 int error; 2337 2338 ASSERT(len == msgdsize(mp)); 2339 if ((*connp->conn_upcalls->su_recv) 2340 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2341 mutex_enter(&udp->udp_recv_lock); 2342 if (error == ENOSPC) { 2343 /* 2344 * let's confirm while holding the lock 2345 */ 2346 if ((*connp->conn_upcalls->su_recv) 2347 (connp->conn_upper_handle, NULL, 0, 0, 2348 &error, NULL) < 0) { 2349 ASSERT(error == ENOSPC); 2350 if (error == ENOSPC) { 2351 connp->conn_flow_cntrld = 2352 B_TRUE; 2353 } 2354 } 2355 mutex_exit(&udp->udp_recv_lock); 2356 } else { 2357 ASSERT(error == EOPNOTSUPP); 2358 mp = udp_queue_fallback(udp, mp); 2359 mutex_exit(&udp->udp_recv_lock); 2360 if (mp != NULL) 2361 putnext(connp->conn_rq, mp); 2362 } 2363 } 2364 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2365 } else { 2366 if (is_system_labeled()) { 2367 ASSERT(ira->ira_cred != NULL); 2368 /* 2369 * Provide for protocols above UDP such as RPC 2370 * NOPID leaves db_cpid unchanged. 2371 */ 2372 mblk_setcred(mp, ira->ira_cred, NOPID); 2373 } 2374 2375 putnext(connp->conn_rq, mp); 2376 } 2377 } 2378 2379 /* 2380 * This is the inbound data path. 2381 * IP has already pulled up the IP plus UDP headers and verified alignment 2382 * etc. 2383 */ 2384 /* ARGSUSED2 */ 2385 static void 2386 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2387 { 2388 conn_t *connp = (conn_t *)arg1; 2389 struct T_unitdata_ind *tudi; 2390 uchar_t *rptr; /* Pointer to IP header */ 2391 int hdr_length; /* Length of IP+UDP headers */ 2392 int udi_size; /* Size of T_unitdata_ind */ 2393 int pkt_len; 2394 udp_t *udp; 2395 udpha_t *udpha; 2396 ip_pkt_t ipps; 2397 ip6_t *ip6h; 2398 mblk_t *mp1; 2399 uint32_t udp_ipv4_options_len; 2400 crb_t recv_ancillary; 2401 udp_stack_t *us; 2402 2403 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2404 2405 udp = connp->conn_udp; 2406 us = udp->udp_us; 2407 rptr = mp->b_rptr; 2408 2409 ASSERT(DB_TYPE(mp) == M_DATA); 2410 ASSERT(OK_32PTR(rptr)); 2411 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2412 pkt_len = ira->ira_pktlen; 2413 2414 /* 2415 * Get a snapshot of these and allow other threads to change 2416 * them after that. We need the same recv_ancillary when determining 2417 * the size as when adding the ancillary data items. 2418 */ 2419 mutex_enter(&connp->conn_lock); 2420 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2421 recv_ancillary = connp->conn_recv_ancillary; 2422 mutex_exit(&connp->conn_lock); 2423 2424 hdr_length = ira->ira_ip_hdr_length; 2425 2426 /* 2427 * IP inspected the UDP header thus all of it must be in the mblk. 2428 * UDP length check is performed for IPv6 packets and IPv4 packets 2429 * to check if the size of the packet as specified 2430 * by the UDP header is the same as the length derived from the IP 2431 * header. 2432 */ 2433 udpha = (udpha_t *)(rptr + hdr_length); 2434 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2435 goto tossit; 2436 2437 hdr_length += UDPH_SIZE; 2438 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2439 2440 /* Initialize regardless of IP version */ 2441 ipps.ipp_fields = 0; 2442 2443 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2444 udp_ipv4_options_len > 0) && 2445 connp->conn_family == AF_INET) { 2446 int err; 2447 2448 /* 2449 * Record/update udp_recv_ipp with the lock 2450 * held. Not needed for AF_INET6 sockets 2451 * since they don't support a getsockopt of IP_OPTIONS. 2452 */ 2453 mutex_enter(&connp->conn_lock); 2454 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2455 B_TRUE); 2456 if (err != 0) { 2457 /* Allocation failed. Drop packet */ 2458 mutex_exit(&connp->conn_lock); 2459 freemsg(mp); 2460 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2461 return; 2462 } 2463 mutex_exit(&connp->conn_lock); 2464 } 2465 2466 if (recv_ancillary.crb_all != 0) { 2467 /* 2468 * Record packet information in the ip_pkt_t 2469 */ 2470 if (ira->ira_flags & IRAF_IS_IPV4) { 2471 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2472 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2473 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2474 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2475 2476 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2477 } else { 2478 uint8_t nexthdrp; 2479 2480 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2481 /* 2482 * IPv6 packets can only be received by applications 2483 * that are prepared to receive IPv6 addresses. 2484 * The IP fanout must ensure this. 2485 */ 2486 ASSERT(connp->conn_family == AF_INET6); 2487 2488 ip6h = (ip6_t *)rptr; 2489 2490 /* We don't care about the length, but need the ipp */ 2491 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2492 &nexthdrp); 2493 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2494 /* Restore */ 2495 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2496 ASSERT(nexthdrp == IPPROTO_UDP); 2497 } 2498 } 2499 2500 /* 2501 * This is the inbound data path. Packets are passed upstream as 2502 * T_UNITDATA_IND messages. 2503 */ 2504 if (connp->conn_family == AF_INET) { 2505 sin_t *sin; 2506 2507 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2508 2509 /* 2510 * Normally only send up the source address. 2511 * If any ancillary data items are wanted we add those. 2512 */ 2513 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2514 if (recv_ancillary.crb_all != 0) { 2515 udi_size += conn_recvancillary_size(connp, 2516 recv_ancillary, ira, mp, &ipps); 2517 } 2518 2519 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2520 mp1 = allocb(udi_size, BPRI_MED); 2521 if (mp1 == NULL) { 2522 freemsg(mp); 2523 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2524 return; 2525 } 2526 mp1->b_cont = mp; 2527 mp1->b_datap->db_type = M_PROTO; 2528 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2529 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2530 tudi->PRIM_type = T_UNITDATA_IND; 2531 tudi->SRC_length = sizeof (sin_t); 2532 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2533 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2534 sizeof (sin_t); 2535 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2536 tudi->OPT_length = udi_size; 2537 sin = (sin_t *)&tudi[1]; 2538 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2539 sin->sin_port = udpha->uha_src_port; 2540 sin->sin_family = connp->conn_family; 2541 *(uint32_t *)&sin->sin_zero[0] = 0; 2542 *(uint32_t *)&sin->sin_zero[4] = 0; 2543 2544 /* 2545 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2546 * IP_RECVTTL has been set. 2547 */ 2548 if (udi_size != 0) { 2549 conn_recvancillary_add(connp, recv_ancillary, ira, 2550 &ipps, (uchar_t *)&sin[1], udi_size); 2551 } 2552 } else { 2553 sin6_t *sin6; 2554 2555 /* 2556 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2557 * 2558 * Normally we only send up the address. If receiving of any 2559 * optional receive side information is enabled, we also send 2560 * that up as options. 2561 */ 2562 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2563 2564 if (recv_ancillary.crb_all != 0) { 2565 udi_size += conn_recvancillary_size(connp, 2566 recv_ancillary, ira, mp, &ipps); 2567 } 2568 2569 mp1 = allocb(udi_size, BPRI_MED); 2570 if (mp1 == NULL) { 2571 freemsg(mp); 2572 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2573 return; 2574 } 2575 mp1->b_cont = mp; 2576 mp1->b_datap->db_type = M_PROTO; 2577 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2578 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2579 tudi->PRIM_type = T_UNITDATA_IND; 2580 tudi->SRC_length = sizeof (sin6_t); 2581 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2582 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2583 sizeof (sin6_t); 2584 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2585 tudi->OPT_length = udi_size; 2586 sin6 = (sin6_t *)&tudi[1]; 2587 if (ira->ira_flags & IRAF_IS_IPV4) { 2588 in6_addr_t v6dst; 2589 2590 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2591 &sin6->sin6_addr); 2592 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2593 &v6dst); 2594 sin6->sin6_flowinfo = 0; 2595 sin6->sin6_scope_id = 0; 2596 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2597 IPCL_ZONEID(connp), us->us_netstack); 2598 } else { 2599 ip6h = (ip6_t *)rptr; 2600 2601 sin6->sin6_addr = ip6h->ip6_src; 2602 /* No sin6_flowinfo per API */ 2603 sin6->sin6_flowinfo = 0; 2604 /* For link-scope pass up scope id */ 2605 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2606 sin6->sin6_scope_id = ira->ira_ruifindex; 2607 else 2608 sin6->sin6_scope_id = 0; 2609 sin6->__sin6_src_id = ip_srcid_find_addr( 2610 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2611 us->us_netstack); 2612 } 2613 sin6->sin6_port = udpha->uha_src_port; 2614 sin6->sin6_family = connp->conn_family; 2615 2616 if (udi_size != 0) { 2617 conn_recvancillary_add(connp, recv_ancillary, ira, 2618 &ipps, (uchar_t *)&sin6[1], udi_size); 2619 } 2620 } 2621 2622 /* Walk past the headers unless IP_RECVHDR was set. */ 2623 if (!udp->udp_rcvhdr) { 2624 mp->b_rptr = rptr + hdr_length; 2625 pkt_len -= hdr_length; 2626 } 2627 2628 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2629 udp_ulp_recv(connp, mp1, pkt_len, ira); 2630 return; 2631 2632 tossit: 2633 freemsg(mp); 2634 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2635 } 2636 2637 /* 2638 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2639 * information that can be changing beneath us. 2640 */ 2641 mblk_t * 2642 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2643 { 2644 mblk_t *mpdata; 2645 mblk_t *mp_conn_ctl; 2646 mblk_t *mp_attr_ctl; 2647 mblk_t *mp6_conn_ctl; 2648 mblk_t *mp6_attr_ctl; 2649 mblk_t *mp_conn_tail; 2650 mblk_t *mp_attr_tail; 2651 mblk_t *mp6_conn_tail; 2652 mblk_t *mp6_attr_tail; 2653 struct opthdr *optp; 2654 mib2_udpEntry_t ude; 2655 mib2_udp6Entry_t ude6; 2656 mib2_transportMLPEntry_t mlp; 2657 int state; 2658 zoneid_t zoneid; 2659 int i; 2660 connf_t *connfp; 2661 conn_t *connp = Q_TO_CONN(q); 2662 int v4_conn_idx; 2663 int v6_conn_idx; 2664 boolean_t needattr; 2665 udp_t *udp; 2666 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2667 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2668 mblk_t *mp2ctl; 2669 2670 /* 2671 * make a copy of the original message 2672 */ 2673 mp2ctl = copymsg(mpctl); 2674 2675 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2676 if (mpctl == NULL || 2677 (mpdata = mpctl->b_cont) == NULL || 2678 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2679 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2680 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2681 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2682 freemsg(mp_conn_ctl); 2683 freemsg(mp_attr_ctl); 2684 freemsg(mp6_conn_ctl); 2685 freemsg(mpctl); 2686 freemsg(mp2ctl); 2687 return (0); 2688 } 2689 2690 zoneid = connp->conn_zoneid; 2691 2692 /* fixed length structure for IPv4 and IPv6 counters */ 2693 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2694 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2695 /* synchronize 64- and 32-bit counters */ 2696 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2697 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2698 2699 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2700 optp->level = MIB2_UDP; 2701 optp->name = 0; 2702 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2703 sizeof (us->us_udp_mib)); 2704 optp->len = msgdsize(mpdata); 2705 qreply(q, mpctl); 2706 2707 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2708 v4_conn_idx = v6_conn_idx = 0; 2709 2710 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2711 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2712 connp = NULL; 2713 2714 while ((connp = ipcl_get_next_conn(connfp, connp, 2715 IPCL_UDPCONN))) { 2716 udp = connp->conn_udp; 2717 if (zoneid != connp->conn_zoneid) 2718 continue; 2719 2720 /* 2721 * Note that the port numbers are sent in 2722 * host byte order 2723 */ 2724 2725 if (udp->udp_state == TS_UNBND) 2726 state = MIB2_UDP_unbound; 2727 else if (udp->udp_state == TS_IDLE) 2728 state = MIB2_UDP_idle; 2729 else if (udp->udp_state == TS_DATA_XFER) 2730 state = MIB2_UDP_connected; 2731 else 2732 state = MIB2_UDP_unknown; 2733 2734 needattr = B_FALSE; 2735 bzero(&mlp, sizeof (mlp)); 2736 if (connp->conn_mlp_type != mlptSingle) { 2737 if (connp->conn_mlp_type == mlptShared || 2738 connp->conn_mlp_type == mlptBoth) 2739 mlp.tme_flags |= MIB2_TMEF_SHARED; 2740 if (connp->conn_mlp_type == mlptPrivate || 2741 connp->conn_mlp_type == mlptBoth) 2742 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2743 needattr = B_TRUE; 2744 } 2745 if (connp->conn_anon_mlp) { 2746 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2747 needattr = B_TRUE; 2748 } 2749 switch (connp->conn_mac_mode) { 2750 case CONN_MAC_DEFAULT: 2751 break; 2752 case CONN_MAC_AWARE: 2753 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2754 needattr = B_TRUE; 2755 break; 2756 case CONN_MAC_IMPLICIT: 2757 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2758 needattr = B_TRUE; 2759 break; 2760 } 2761 mutex_enter(&connp->conn_lock); 2762 if (udp->udp_state == TS_DATA_XFER && 2763 connp->conn_ixa->ixa_tsl != NULL) { 2764 ts_label_t *tsl; 2765 2766 tsl = connp->conn_ixa->ixa_tsl; 2767 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2768 mlp.tme_doi = label2doi(tsl); 2769 mlp.tme_label = *label2bslabel(tsl); 2770 needattr = B_TRUE; 2771 } 2772 mutex_exit(&connp->conn_lock); 2773 2774 /* 2775 * Create an IPv4 table entry for IPv4 entries and also 2776 * any IPv6 entries which are bound to in6addr_any 2777 * (i.e. anything a IPv4 peer could connect/send to). 2778 */ 2779 if (connp->conn_ipversion == IPV4_VERSION || 2780 (udp->udp_state <= TS_IDLE && 2781 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2782 ude.udpEntryInfo.ue_state = state; 2783 /* 2784 * If in6addr_any this will set it to 2785 * INADDR_ANY 2786 */ 2787 ude.udpLocalAddress = connp->conn_laddr_v4; 2788 ude.udpLocalPort = ntohs(connp->conn_lport); 2789 if (udp->udp_state == TS_DATA_XFER) { 2790 /* 2791 * Can potentially get here for 2792 * v6 socket if another process 2793 * (say, ping) has just done a 2794 * sendto(), changing the state 2795 * from the TS_IDLE above to 2796 * TS_DATA_XFER by the time we hit 2797 * this part of the code. 2798 */ 2799 ude.udpEntryInfo.ue_RemoteAddress = 2800 connp->conn_faddr_v4; 2801 ude.udpEntryInfo.ue_RemotePort = 2802 ntohs(connp->conn_fport); 2803 } else { 2804 ude.udpEntryInfo.ue_RemoteAddress = 0; 2805 ude.udpEntryInfo.ue_RemotePort = 0; 2806 } 2807 2808 /* 2809 * We make the assumption that all udp_t 2810 * structs will be created within an address 2811 * region no larger than 32-bits. 2812 */ 2813 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2814 ude.udpCreationProcess = 2815 (connp->conn_cpid < 0) ? 2816 MIB2_UNKNOWN_PROCESS : 2817 connp->conn_cpid; 2818 ude.udpCreationTime = connp->conn_open_time; 2819 2820 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2821 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2822 mlp.tme_connidx = v4_conn_idx++; 2823 if (needattr) 2824 (void) snmp_append_data2( 2825 mp_attr_ctl->b_cont, &mp_attr_tail, 2826 (char *)&mlp, sizeof (mlp)); 2827 } 2828 if (connp->conn_ipversion == IPV6_VERSION) { 2829 ude6.udp6EntryInfo.ue_state = state; 2830 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2831 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2832 mutex_enter(&connp->conn_lock); 2833 if (connp->conn_ixa->ixa_flags & 2834 IXAF_SCOPEID_SET) { 2835 ude6.udp6IfIndex = 2836 connp->conn_ixa->ixa_scopeid; 2837 } else { 2838 ude6.udp6IfIndex = connp->conn_bound_if; 2839 } 2840 mutex_exit(&connp->conn_lock); 2841 if (udp->udp_state == TS_DATA_XFER) { 2842 ude6.udp6EntryInfo.ue_RemoteAddress = 2843 connp->conn_faddr_v6; 2844 ude6.udp6EntryInfo.ue_RemotePort = 2845 ntohs(connp->conn_fport); 2846 } else { 2847 ude6.udp6EntryInfo.ue_RemoteAddress = 2848 sin6_null.sin6_addr; 2849 ude6.udp6EntryInfo.ue_RemotePort = 0; 2850 } 2851 /* 2852 * We make the assumption that all udp_t 2853 * structs will be created within an address 2854 * region no larger than 32-bits. 2855 */ 2856 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2857 ude6.udp6CreationProcess = 2858 (connp->conn_cpid < 0) ? 2859 MIB2_UNKNOWN_PROCESS : 2860 connp->conn_cpid; 2861 ude6.udp6CreationTime = connp->conn_open_time; 2862 2863 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2864 &mp6_conn_tail, (char *)&ude6, 2865 sizeof (ude6)); 2866 mlp.tme_connidx = v6_conn_idx++; 2867 if (needattr) 2868 (void) snmp_append_data2( 2869 mp6_attr_ctl->b_cont, 2870 &mp6_attr_tail, (char *)&mlp, 2871 sizeof (mlp)); 2872 } 2873 } 2874 } 2875 2876 /* IPv4 UDP endpoints */ 2877 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2878 sizeof (struct T_optmgmt_ack)]; 2879 optp->level = MIB2_UDP; 2880 optp->name = MIB2_UDP_ENTRY; 2881 optp->len = msgdsize(mp_conn_ctl->b_cont); 2882 qreply(q, mp_conn_ctl); 2883 2884 /* table of MLP attributes... */ 2885 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2886 sizeof (struct T_optmgmt_ack)]; 2887 optp->level = MIB2_UDP; 2888 optp->name = EXPER_XPORT_MLP; 2889 optp->len = msgdsize(mp_attr_ctl->b_cont); 2890 if (optp->len == 0) 2891 freemsg(mp_attr_ctl); 2892 else 2893 qreply(q, mp_attr_ctl); 2894 2895 /* IPv6 UDP endpoints */ 2896 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2897 sizeof (struct T_optmgmt_ack)]; 2898 optp->level = MIB2_UDP6; 2899 optp->name = MIB2_UDP6_ENTRY; 2900 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2901 qreply(q, mp6_conn_ctl); 2902 2903 /* table of MLP attributes... */ 2904 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2905 sizeof (struct T_optmgmt_ack)]; 2906 optp->level = MIB2_UDP6; 2907 optp->name = EXPER_XPORT_MLP; 2908 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2909 if (optp->len == 0) 2910 freemsg(mp6_attr_ctl); 2911 else 2912 qreply(q, mp6_attr_ctl); 2913 2914 return (mp2ctl); 2915 } 2916 2917 /* 2918 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2919 * NOTE: Per MIB-II, UDP has no writable data. 2920 * TODO: If this ever actually tries to set anything, it needs to be 2921 * to do the appropriate locking. 2922 */ 2923 /* ARGSUSED */ 2924 int 2925 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2926 uchar_t *ptr, int len) 2927 { 2928 switch (level) { 2929 case MIB2_UDP: 2930 return (0); 2931 default: 2932 return (1); 2933 } 2934 } 2935 2936 /* 2937 * This routine creates a T_UDERROR_IND message and passes it upstream. 2938 * The address and options are copied from the T_UNITDATA_REQ message 2939 * passed in mp. This message is freed. 2940 */ 2941 static void 2942 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2943 { 2944 struct T_unitdata_req *tudr; 2945 mblk_t *mp1; 2946 uchar_t *destaddr; 2947 t_scalar_t destlen; 2948 uchar_t *optaddr; 2949 t_scalar_t optlen; 2950 2951 if ((mp->b_wptr < mp->b_rptr) || 2952 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2953 goto done; 2954 } 2955 tudr = (struct T_unitdata_req *)mp->b_rptr; 2956 destaddr = mp->b_rptr + tudr->DEST_offset; 2957 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2958 destaddr + tudr->DEST_length < mp->b_rptr || 2959 destaddr + tudr->DEST_length > mp->b_wptr) { 2960 goto done; 2961 } 2962 optaddr = mp->b_rptr + tudr->OPT_offset; 2963 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2964 optaddr + tudr->OPT_length < mp->b_rptr || 2965 optaddr + tudr->OPT_length > mp->b_wptr) { 2966 goto done; 2967 } 2968 destlen = tudr->DEST_length; 2969 optlen = tudr->OPT_length; 2970 2971 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2972 (char *)optaddr, optlen, err); 2973 if (mp1 != NULL) 2974 qreply(q, mp1); 2975 2976 done: 2977 freemsg(mp); 2978 } 2979 2980 /* 2981 * This routine removes a port number association from a stream. It 2982 * is called by udp_wput to handle T_UNBIND_REQ messages. 2983 */ 2984 static void 2985 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2986 { 2987 conn_t *connp = Q_TO_CONN(q); 2988 int error; 2989 2990 error = udp_do_unbind(connp); 2991 if (error) { 2992 if (error < 0) 2993 udp_err_ack(q, mp, -error, 0); 2994 else 2995 udp_err_ack(q, mp, TSYSERR, error); 2996 return; 2997 } 2998 2999 mp = mi_tpi_ok_ack_alloc(mp); 3000 ASSERT(mp != NULL); 3001 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 3002 qreply(q, mp); 3003 } 3004 3005 /* 3006 * Don't let port fall into the privileged range. 3007 * Since the extra privileged ports can be arbitrary we also 3008 * ensure that we exclude those from consideration. 3009 * us->us_epriv_ports is not sorted thus we loop over it until 3010 * there are no changes. 3011 */ 3012 static in_port_t 3013 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 3014 { 3015 int i; 3016 in_port_t nextport; 3017 boolean_t restart = B_FALSE; 3018 udp_stack_t *us = udp->udp_us; 3019 3020 if (random && udp_random_anon_port != 0) { 3021 (void) random_get_pseudo_bytes((uint8_t *)&port, 3022 sizeof (in_port_t)); 3023 /* 3024 * Unless changed by a sys admin, the smallest anon port 3025 * is 32768 and the largest anon port is 65535. It is 3026 * very likely (50%) for the random port to be smaller 3027 * than the smallest anon port. When that happens, 3028 * add port % (anon port range) to the smallest anon 3029 * port to get the random port. It should fall into the 3030 * valid anon port range. 3031 */ 3032 if (port < us->us_smallest_anon_port) { 3033 port = us->us_smallest_anon_port + 3034 port % (us->us_largest_anon_port - 3035 us->us_smallest_anon_port); 3036 } 3037 } 3038 3039 retry: 3040 if (port < us->us_smallest_anon_port) 3041 port = us->us_smallest_anon_port; 3042 3043 if (port > us->us_largest_anon_port) { 3044 port = us->us_smallest_anon_port; 3045 if (restart) 3046 return (0); 3047 restart = B_TRUE; 3048 } 3049 3050 if (port < us->us_smallest_nonpriv_port) 3051 port = us->us_smallest_nonpriv_port; 3052 3053 for (i = 0; i < us->us_num_epriv_ports; i++) { 3054 if (port == us->us_epriv_ports[i]) { 3055 port++; 3056 /* 3057 * Make sure that the port is in the 3058 * valid range. 3059 */ 3060 goto retry; 3061 } 3062 } 3063 3064 if (is_system_labeled() && 3065 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 3066 port, IPPROTO_UDP, B_TRUE)) != 0) { 3067 port = nextport; 3068 goto retry; 3069 } 3070 3071 return (port); 3072 } 3073 3074 /* 3075 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 3076 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 3077 * the TPI options, otherwise we take them from msg_control. 3078 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 3079 * Always consumes mp; never consumes tudr_mp. 3080 */ 3081 static int 3082 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 3083 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 3084 { 3085 udp_t *udp = connp->conn_udp; 3086 udp_stack_t *us = udp->udp_us; 3087 int error; 3088 ip_xmit_attr_t *ixa; 3089 ip_pkt_t *ipp; 3090 in6_addr_t v6src; 3091 in6_addr_t v6dst; 3092 in6_addr_t v6nexthop; 3093 in_port_t dstport; 3094 uint32_t flowinfo; 3095 uint_t srcid; 3096 int is_absreq_failure = 0; 3097 conn_opt_arg_t coas, *coa; 3098 3099 ASSERT(tudr_mp != NULL || msg != NULL); 3100 3101 /* 3102 * Get ixa before checking state to handle a disconnect race. 3103 * 3104 * We need an exclusive copy of conn_ixa since the ancillary data 3105 * options might modify it. That copy has no pointers hence we 3106 * need to set them up once we've parsed the ancillary data. 3107 */ 3108 ixa = conn_get_ixa_exclusive(connp); 3109 if (ixa == NULL) { 3110 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3111 freemsg(mp); 3112 return (ENOMEM); 3113 } 3114 ASSERT(cr != NULL); 3115 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3116 ixa->ixa_cred = cr; 3117 ixa->ixa_cpid = pid; 3118 if (is_system_labeled()) { 3119 /* We need to restart with a label based on the cred */ 3120 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3121 } 3122 3123 /* In case previous destination was multicast or multirt */ 3124 ip_attr_newdst(ixa); 3125 3126 /* Get a copy of conn_xmit_ipp since the options might change it */ 3127 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 3128 if (ipp == NULL) { 3129 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3130 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3131 ixa->ixa_cpid = connp->conn_cpid; 3132 ixa_refrele(ixa); 3133 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3134 freemsg(mp); 3135 return (ENOMEM); 3136 } 3137 mutex_enter(&connp->conn_lock); 3138 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 3139 mutex_exit(&connp->conn_lock); 3140 if (error != 0) { 3141 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3142 freemsg(mp); 3143 goto done; 3144 } 3145 3146 /* 3147 * Parse the options and update ixa and ipp as a result. 3148 * Note that ixa_tsl can be updated if SCM_UCRED. 3149 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 3150 */ 3151 3152 coa = &coas; 3153 coa->coa_connp = connp; 3154 coa->coa_ixa = ixa; 3155 coa->coa_ipp = ipp; 3156 coa->coa_ancillary = B_TRUE; 3157 coa->coa_changed = 0; 3158 3159 if (msg != NULL) { 3160 error = process_auxiliary_options(connp, msg->msg_control, 3161 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 3162 } else { 3163 struct T_unitdata_req *tudr; 3164 3165 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 3166 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 3167 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 3168 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 3169 coa, &is_absreq_failure); 3170 } 3171 if (error != 0) { 3172 /* 3173 * Note: No special action needed in this 3174 * module for "is_absreq_failure" 3175 */ 3176 freemsg(mp); 3177 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3178 goto done; 3179 } 3180 ASSERT(is_absreq_failure == 0); 3181 3182 mutex_enter(&connp->conn_lock); 3183 /* 3184 * If laddr is unspecified then we look at sin6_src_id. 3185 * We will give precedence to a source address set with IPV6_PKTINFO 3186 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3187 * want ip_attr_connect to select a source (since it can fail) when 3188 * IPV6_PKTINFO is specified. 3189 * If this doesn't result in a source address then we get a source 3190 * from ip_attr_connect() below. 3191 */ 3192 v6src = connp->conn_saddr_v6; 3193 if (sin != NULL) { 3194 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3195 dstport = sin->sin_port; 3196 flowinfo = 0; 3197 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3198 ixa->ixa_flags |= IXAF_IS_IPV4; 3199 } else if (sin6 != NULL) { 3200 v6dst = sin6->sin6_addr; 3201 dstport = sin6->sin6_port; 3202 flowinfo = sin6->sin6_flowinfo; 3203 srcid = sin6->__sin6_src_id; 3204 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3205 ixa->ixa_scopeid = sin6->sin6_scope_id; 3206 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3207 } else { 3208 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3209 } 3210 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3211 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3212 connp->conn_netstack); 3213 } 3214 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3215 ixa->ixa_flags |= IXAF_IS_IPV4; 3216 else 3217 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3218 } else { 3219 /* Connected case */ 3220 v6dst = connp->conn_faddr_v6; 3221 dstport = connp->conn_fport; 3222 flowinfo = connp->conn_flowinfo; 3223 } 3224 mutex_exit(&connp->conn_lock); 3225 3226 /* Handle IPV6_PKTINFO setting source address. */ 3227 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 3228 (ipp->ipp_fields & IPPF_ADDR)) { 3229 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3230 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3231 v6src = ipp->ipp_addr; 3232 } else { 3233 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3234 v6src = ipp->ipp_addr; 3235 } 3236 } 3237 3238 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3239 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3240 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3241 3242 switch (error) { 3243 case 0: 3244 break; 3245 case EADDRNOTAVAIL: 3246 /* 3247 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3248 * Don't have the application see that errno 3249 */ 3250 error = ENETUNREACH; 3251 goto failed; 3252 case ENETDOWN: 3253 /* 3254 * Have !ipif_addr_ready address; drop packet silently 3255 * until we can get applications to not send until we 3256 * are ready. 3257 */ 3258 error = 0; 3259 goto failed; 3260 case EHOSTUNREACH: 3261 case ENETUNREACH: 3262 if (ixa->ixa_ire != NULL) { 3263 /* 3264 * Let conn_ip_output/ire_send_noroute return 3265 * the error and send any local ICMP error. 3266 */ 3267 error = 0; 3268 break; 3269 } 3270 /* FALLTHRU */ 3271 default: 3272 failed: 3273 freemsg(mp); 3274 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3275 goto done; 3276 } 3277 3278 /* 3279 * We might be going to a different destination than last time, 3280 * thus check that TX allows the communication and compute any 3281 * needed label. 3282 * 3283 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3284 * don't have to worry about concurrent threads. 3285 */ 3286 if (is_system_labeled()) { 3287 /* Using UDP MLP requires SCM_UCRED from user */ 3288 if (connp->conn_mlp_type != mlptSingle && 3289 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3290 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3291 error = ECONNREFUSED; 3292 freemsg(mp); 3293 goto done; 3294 } 3295 /* 3296 * Check whether Trusted Solaris policy allows communication 3297 * with this host, and pretend that the destination is 3298 * unreachable if not. 3299 * Compute any needed label and place it in ipp_label_v4/v6. 3300 * 3301 * Later conn_build_hdr_template/conn_prepend_hdr takes 3302 * ipp_label_v4/v6 to form the packet. 3303 * 3304 * Tsol note: We have ipp structure local to this thread so 3305 * no locking is needed. 3306 */ 3307 error = conn_update_label(connp, ixa, &v6dst, ipp); 3308 if (error != 0) { 3309 freemsg(mp); 3310 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3311 goto done; 3312 } 3313 } 3314 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3315 flowinfo, mp, &error); 3316 if (mp == NULL) { 3317 ASSERT(error != 0); 3318 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3319 goto done; 3320 } 3321 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3322 error = EMSGSIZE; 3323 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3324 freemsg(mp); 3325 goto done; 3326 } 3327 /* We're done. Pass the packet to ip. */ 3328 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3329 3330 error = conn_ip_output(mp, ixa); 3331 /* No udpOutErrors if an error since IP increases its error counter */ 3332 switch (error) { 3333 case 0: 3334 break; 3335 case EWOULDBLOCK: 3336 (void) ixa_check_drain_insert(connp, ixa); 3337 error = 0; 3338 break; 3339 case EADDRNOTAVAIL: 3340 /* 3341 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3342 * Don't have the application see that errno 3343 */ 3344 error = ENETUNREACH; 3345 /* FALLTHRU */ 3346 default: 3347 mutex_enter(&connp->conn_lock); 3348 /* 3349 * Clear the source and v6lastdst so we call ip_attr_connect 3350 * for the next packet and try to pick a better source. 3351 */ 3352 if (connp->conn_mcbc_bind) 3353 connp->conn_saddr_v6 = ipv6_all_zeros; 3354 else 3355 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3356 connp->conn_v6lastdst = ipv6_all_zeros; 3357 mutex_exit(&connp->conn_lock); 3358 break; 3359 } 3360 done: 3361 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3362 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3363 ixa->ixa_cpid = connp->conn_cpid; 3364 ixa_refrele(ixa); 3365 ip_pkt_free(ipp); 3366 kmem_free(ipp, sizeof (*ipp)); 3367 return (error); 3368 } 3369 3370 /* 3371 * Handle sending an M_DATA for a connected socket. 3372 * Handles both IPv4 and IPv6. 3373 */ 3374 static int 3375 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3376 { 3377 udp_t *udp = connp->conn_udp; 3378 udp_stack_t *us = udp->udp_us; 3379 int error; 3380 ip_xmit_attr_t *ixa; 3381 3382 /* 3383 * If no other thread is using conn_ixa this just gets a reference to 3384 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3385 */ 3386 ixa = conn_get_ixa(connp, B_FALSE); 3387 if (ixa == NULL) { 3388 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3389 freemsg(mp); 3390 return (ENOMEM); 3391 } 3392 3393 ASSERT(cr != NULL); 3394 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3395 ixa->ixa_cred = cr; 3396 ixa->ixa_cpid = pid; 3397 3398 mutex_enter(&connp->conn_lock); 3399 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3400 connp->conn_fport, connp->conn_flowinfo, &error); 3401 3402 if (mp == NULL) { 3403 ASSERT(error != 0); 3404 mutex_exit(&connp->conn_lock); 3405 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3406 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3407 ixa->ixa_cpid = connp->conn_cpid; 3408 ixa_refrele(ixa); 3409 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3410 freemsg(mp); 3411 return (error); 3412 } 3413 3414 /* 3415 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3416 * safe copy, then we need to fill in any pointers in it. 3417 */ 3418 if (ixa->ixa_ire == NULL) { 3419 in6_addr_t faddr, saddr; 3420 in6_addr_t nexthop; 3421 in_port_t fport; 3422 3423 saddr = connp->conn_saddr_v6; 3424 faddr = connp->conn_faddr_v6; 3425 fport = connp->conn_fport; 3426 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3427 mutex_exit(&connp->conn_lock); 3428 3429 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3430 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3431 IPDF_IPSEC); 3432 switch (error) { 3433 case 0: 3434 break; 3435 case EADDRNOTAVAIL: 3436 /* 3437 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3438 * Don't have the application see that errno 3439 */ 3440 error = ENETUNREACH; 3441 goto failed; 3442 case ENETDOWN: 3443 /* 3444 * Have !ipif_addr_ready address; drop packet silently 3445 * until we can get applications to not send until we 3446 * are ready. 3447 */ 3448 error = 0; 3449 goto failed; 3450 case EHOSTUNREACH: 3451 case ENETUNREACH: 3452 if (ixa->ixa_ire != NULL) { 3453 /* 3454 * Let conn_ip_output/ire_send_noroute return 3455 * the error and send any local ICMP error. 3456 */ 3457 error = 0; 3458 break; 3459 } 3460 /* FALLTHRU */ 3461 default: 3462 failed: 3463 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3464 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3465 ixa->ixa_cpid = connp->conn_cpid; 3466 ixa_refrele(ixa); 3467 freemsg(mp); 3468 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3469 return (error); 3470 } 3471 } else { 3472 /* Done with conn_t */ 3473 mutex_exit(&connp->conn_lock); 3474 } 3475 ASSERT(ixa->ixa_ire != NULL); 3476 3477 /* We're done. Pass the packet to ip. */ 3478 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3479 3480 error = conn_ip_output(mp, ixa); 3481 /* No udpOutErrors if an error since IP increases its error counter */ 3482 switch (error) { 3483 case 0: 3484 break; 3485 case EWOULDBLOCK: 3486 (void) ixa_check_drain_insert(connp, ixa); 3487 error = 0; 3488 break; 3489 case EADDRNOTAVAIL: 3490 /* 3491 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3492 * Don't have the application see that errno 3493 */ 3494 error = ENETUNREACH; 3495 break; 3496 } 3497 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3498 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3499 ixa->ixa_cpid = connp->conn_cpid; 3500 ixa_refrele(ixa); 3501 return (error); 3502 } 3503 3504 /* 3505 * Handle sending an M_DATA to the last destination. 3506 * Handles both IPv4 and IPv6. 3507 * 3508 * NOTE: The caller must hold conn_lock and we drop it here. 3509 */ 3510 static int 3511 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3512 ip_xmit_attr_t *ixa) 3513 { 3514 udp_t *udp = connp->conn_udp; 3515 udp_stack_t *us = udp->udp_us; 3516 int error; 3517 3518 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3519 ASSERT(ixa != NULL); 3520 3521 ASSERT(cr != NULL); 3522 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3523 ixa->ixa_cred = cr; 3524 ixa->ixa_cpid = pid; 3525 3526 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3527 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3528 3529 if (mp == NULL) { 3530 ASSERT(error != 0); 3531 mutex_exit(&connp->conn_lock); 3532 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3533 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3534 ixa->ixa_cpid = connp->conn_cpid; 3535 ixa_refrele(ixa); 3536 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3537 freemsg(mp); 3538 return (error); 3539 } 3540 3541 /* 3542 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3543 * safe copy, then we need to fill in any pointers in it. 3544 */ 3545 if (ixa->ixa_ire == NULL) { 3546 in6_addr_t lastdst, lastsrc; 3547 in6_addr_t nexthop; 3548 in_port_t lastport; 3549 3550 lastsrc = connp->conn_v6lastsrc; 3551 lastdst = connp->conn_v6lastdst; 3552 lastport = connp->conn_lastdstport; 3553 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3554 mutex_exit(&connp->conn_lock); 3555 3556 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3557 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3558 IPDF_VERIFY_DST | IPDF_IPSEC); 3559 switch (error) { 3560 case 0: 3561 break; 3562 case EADDRNOTAVAIL: 3563 /* 3564 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3565 * Don't have the application see that errno 3566 */ 3567 error = ENETUNREACH; 3568 goto failed; 3569 case ENETDOWN: 3570 /* 3571 * Have !ipif_addr_ready address; drop packet silently 3572 * until we can get applications to not send until we 3573 * are ready. 3574 */ 3575 error = 0; 3576 goto failed; 3577 case EHOSTUNREACH: 3578 case ENETUNREACH: 3579 if (ixa->ixa_ire != NULL) { 3580 /* 3581 * Let conn_ip_output/ire_send_noroute return 3582 * the error and send any local ICMP error. 3583 */ 3584 error = 0; 3585 break; 3586 } 3587 /* FALLTHRU */ 3588 default: 3589 failed: 3590 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3591 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3592 ixa->ixa_cpid = connp->conn_cpid; 3593 ixa_refrele(ixa); 3594 freemsg(mp); 3595 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3596 return (error); 3597 } 3598 } else { 3599 /* Done with conn_t */ 3600 mutex_exit(&connp->conn_lock); 3601 } 3602 3603 /* We're done. Pass the packet to ip. */ 3604 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3605 3606 error = conn_ip_output(mp, ixa); 3607 /* No udpOutErrors if an error since IP increases its error counter */ 3608 switch (error) { 3609 case 0: 3610 break; 3611 case EWOULDBLOCK: 3612 (void) ixa_check_drain_insert(connp, ixa); 3613 error = 0; 3614 break; 3615 case EADDRNOTAVAIL: 3616 /* 3617 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3618 * Don't have the application see that errno 3619 */ 3620 error = ENETUNREACH; 3621 /* FALLTHRU */ 3622 default: 3623 mutex_enter(&connp->conn_lock); 3624 /* 3625 * Clear the source and v6lastdst so we call ip_attr_connect 3626 * for the next packet and try to pick a better source. 3627 */ 3628 if (connp->conn_mcbc_bind) 3629 connp->conn_saddr_v6 = ipv6_all_zeros; 3630 else 3631 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3632 connp->conn_v6lastdst = ipv6_all_zeros; 3633 mutex_exit(&connp->conn_lock); 3634 break; 3635 } 3636 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 3637 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3638 ixa->ixa_cpid = connp->conn_cpid; 3639 ixa_refrele(ixa); 3640 return (error); 3641 } 3642 3643 3644 /* 3645 * Prepend the header template and then fill in the source and 3646 * flowinfo. The caller needs to handle the destination address since 3647 * it's setting is different if rthdr or source route. 3648 * 3649 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3650 * When it returns NULL it sets errorp. 3651 */ 3652 static mblk_t * 3653 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3654 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3655 { 3656 udp_t *udp = connp->conn_udp; 3657 udp_stack_t *us = udp->udp_us; 3658 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3659 uint_t pktlen; 3660 uint_t alloclen; 3661 uint_t copylen; 3662 uint8_t *iph; 3663 uint_t ip_hdr_length; 3664 udpha_t *udpha; 3665 uint32_t cksum; 3666 ip_pkt_t *ipp; 3667 3668 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3669 3670 /* 3671 * Copy the header template and leave space for an SPI 3672 */ 3673 copylen = connp->conn_ht_iphc_len; 3674 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3675 pktlen = alloclen + msgdsize(mp); 3676 if (pktlen > IP_MAXPACKET) { 3677 freemsg(mp); 3678 *errorp = EMSGSIZE; 3679 return (NULL); 3680 } 3681 ixa->ixa_pktlen = pktlen; 3682 3683 /* check/fix buffer config, setup pointers into it */ 3684 iph = mp->b_rptr - alloclen; 3685 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3686 mblk_t *mp1; 3687 3688 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3689 if (mp1 == NULL) { 3690 freemsg(mp); 3691 *errorp = ENOMEM; 3692 return (NULL); 3693 } 3694 mp1->b_wptr = DB_LIM(mp1); 3695 mp1->b_cont = mp; 3696 mp = mp1; 3697 iph = (mp->b_wptr - alloclen); 3698 } 3699 mp->b_rptr = iph; 3700 bcopy(connp->conn_ht_iphc, iph, copylen); 3701 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3702 3703 ixa->ixa_ip_hdr_length = ip_hdr_length; 3704 udpha = (udpha_t *)(iph + ip_hdr_length); 3705 3706 /* 3707 * Setup header length and prepare for ULP checksum done in IP. 3708 * udp_build_hdr_template has already massaged any routing header 3709 * and placed the result in conn_sum. 3710 * 3711 * We make it easy for IP to include our pseudo header 3712 * by putting our length in uha_checksum. 3713 */ 3714 cksum = pktlen - ip_hdr_length; 3715 udpha->uha_length = htons(cksum); 3716 3717 cksum += connp->conn_sum; 3718 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3719 ASSERT(cksum < 0x10000); 3720 3721 ipp = &connp->conn_xmit_ipp; 3722 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3723 ipha_t *ipha = (ipha_t *)iph; 3724 3725 ipha->ipha_length = htons((uint16_t)pktlen); 3726 3727 /* IP does the checksum if uha_checksum is non-zero */ 3728 if (us->us_do_checksum) 3729 udpha->uha_checksum = htons(cksum); 3730 3731 /* if IP_PKTINFO specified an addres it wins over bind() */ 3732 if ((ipp->ipp_fields & IPPF_ADDR) && 3733 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3734 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3735 ipha->ipha_src = ipp->ipp_addr_v4; 3736 } else { 3737 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3738 } 3739 } else { 3740 ip6_t *ip6h = (ip6_t *)iph; 3741 3742 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3743 udpha->uha_checksum = htons(cksum); 3744 3745 /* if IP_PKTINFO specified an addres it wins over bind() */ 3746 if ((ipp->ipp_fields & IPPF_ADDR) && 3747 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3748 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3749 ip6h->ip6_src = ipp->ipp_addr; 3750 } else { 3751 ip6h->ip6_src = *v6src; 3752 } 3753 ip6h->ip6_vcf = 3754 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3755 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3756 if (ipp->ipp_fields & IPPF_TCLASS) { 3757 /* Overrides the class part of flowinfo */ 3758 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3759 ipp->ipp_tclass); 3760 } 3761 } 3762 3763 /* Insert all-0s SPI now. */ 3764 if (insert_spi) 3765 *((uint32_t *)(udpha + 1)) = 0; 3766 3767 udpha->uha_dst_port = dstport; 3768 return (mp); 3769 } 3770 3771 /* 3772 * Send a T_UDERR_IND in response to an M_DATA 3773 */ 3774 static void 3775 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3776 { 3777 struct sockaddr_storage ss; 3778 sin_t *sin; 3779 sin6_t *sin6; 3780 struct sockaddr *addr; 3781 socklen_t addrlen; 3782 mblk_t *mp1; 3783 3784 mutex_enter(&connp->conn_lock); 3785 /* Initialize addr and addrlen as if they're passed in */ 3786 if (connp->conn_family == AF_INET) { 3787 sin = (sin_t *)&ss; 3788 *sin = sin_null; 3789 sin->sin_family = AF_INET; 3790 sin->sin_port = connp->conn_fport; 3791 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3792 addr = (struct sockaddr *)sin; 3793 addrlen = sizeof (*sin); 3794 } else { 3795 sin6 = (sin6_t *)&ss; 3796 *sin6 = sin6_null; 3797 sin6->sin6_family = AF_INET6; 3798 sin6->sin6_port = connp->conn_fport; 3799 sin6->sin6_flowinfo = connp->conn_flowinfo; 3800 sin6->sin6_addr = connp->conn_faddr_v6; 3801 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3802 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3803 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3804 } else { 3805 sin6->sin6_scope_id = 0; 3806 } 3807 sin6->__sin6_src_id = 0; 3808 addr = (struct sockaddr *)sin6; 3809 addrlen = sizeof (*sin6); 3810 } 3811 mutex_exit(&connp->conn_lock); 3812 3813 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3814 if (mp1 != NULL) 3815 putnext(connp->conn_rq, mp1); 3816 } 3817 3818 /* 3819 * This routine handles all messages passed downstream. It either 3820 * consumes the message or passes it downstream; it never queues a 3821 * a message. 3822 * 3823 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3824 * is valid when we are directly beneath the stream head, and thus sockfs 3825 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3826 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3827 * connected endpoints. 3828 */ 3829 void 3830 udp_wput(queue_t *q, mblk_t *mp) 3831 { 3832 sin6_t *sin6; 3833 sin_t *sin = NULL; 3834 uint_t srcid; 3835 conn_t *connp = Q_TO_CONN(q); 3836 udp_t *udp = connp->conn_udp; 3837 int error = 0; 3838 struct sockaddr *addr = NULL; 3839 socklen_t addrlen; 3840 udp_stack_t *us = udp->udp_us; 3841 struct T_unitdata_req *tudr; 3842 mblk_t *data_mp; 3843 ushort_t ipversion; 3844 cred_t *cr; 3845 pid_t pid; 3846 3847 /* 3848 * We directly handle several cases here: T_UNITDATA_REQ message 3849 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3850 * socket. 3851 */ 3852 switch (DB_TYPE(mp)) { 3853 case M_DATA: 3854 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3855 /* Not connected; address is required */ 3856 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3857 UDP_DBGSTAT(us, udp_data_notconn); 3858 UDP_STAT(us, udp_out_err_notconn); 3859 freemsg(mp); 3860 return; 3861 } 3862 /* 3863 * All Solaris components should pass a db_credp 3864 * for this message, hence we ASSERT. 3865 * On production kernels we return an error to be robust against 3866 * random streams modules sitting on top of us. 3867 */ 3868 cr = msg_getcred(mp, &pid); 3869 ASSERT(cr != NULL); 3870 if (cr == NULL) { 3871 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3872 freemsg(mp); 3873 return; 3874 } 3875 ASSERT(udp->udp_issocket); 3876 UDP_DBGSTAT(us, udp_data_conn); 3877 error = udp_output_connected(connp, mp, cr, pid); 3878 if (error != 0) { 3879 UDP_STAT(us, udp_out_err_output); 3880 if (connp->conn_rq != NULL) 3881 udp_ud_err_connected(connp, (t_scalar_t)error); 3882 #ifdef DEBUG 3883 printf("udp_output_connected returned %d\n", error); 3884 #endif 3885 } 3886 return; 3887 3888 case M_PROTO: 3889 case M_PCPROTO: 3890 tudr = (struct T_unitdata_req *)mp->b_rptr; 3891 if (MBLKL(mp) < sizeof (*tudr) || 3892 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3893 udp_wput_other(q, mp); 3894 return; 3895 } 3896 break; 3897 3898 default: 3899 udp_wput_other(q, mp); 3900 return; 3901 } 3902 3903 /* Handle valid T_UNITDATA_REQ here */ 3904 data_mp = mp->b_cont; 3905 if (data_mp == NULL) { 3906 error = EPROTO; 3907 goto ud_error2; 3908 } 3909 mp->b_cont = NULL; 3910 3911 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3912 error = EADDRNOTAVAIL; 3913 goto ud_error2; 3914 } 3915 3916 /* 3917 * All Solaris components should pass a db_credp 3918 * for this TPI message, hence we should ASSERT. 3919 * However, RPC (svc_clts_ksend) does this odd thing where it 3920 * passes the options from a T_UNITDATA_IND unchanged in a 3921 * T_UNITDATA_REQ. While that is the right thing to do for 3922 * some options, SCM_UCRED being the key one, this also makes it 3923 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3924 */ 3925 cr = msg_getcred(mp, &pid); 3926 if (cr == NULL) { 3927 cr = connp->conn_cred; 3928 pid = connp->conn_cpid; 3929 } 3930 3931 /* 3932 * If a port has not been bound to the stream, fail. 3933 * This is not a problem when sockfs is directly 3934 * above us, because it will ensure that the socket 3935 * is first bound before allowing data to be sent. 3936 */ 3937 if (udp->udp_state == TS_UNBND) { 3938 error = EPROTO; 3939 goto ud_error2; 3940 } 3941 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3942 addrlen = tudr->DEST_length; 3943 3944 switch (connp->conn_family) { 3945 case AF_INET6: 3946 sin6 = (sin6_t *)addr; 3947 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3948 (sin6->sin6_family != AF_INET6)) { 3949 error = EADDRNOTAVAIL; 3950 goto ud_error2; 3951 } 3952 3953 srcid = sin6->__sin6_src_id; 3954 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3955 /* 3956 * Destination is a non-IPv4-compatible IPv6 address. 3957 * Send out an IPv6 format packet. 3958 */ 3959 3960 /* 3961 * If the local address is a mapped address return 3962 * an error. 3963 * It would be possible to send an IPv6 packet but the 3964 * response would never make it back to the application 3965 * since it is bound to a mapped address. 3966 */ 3967 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3968 error = EADDRNOTAVAIL; 3969 goto ud_error2; 3970 } 3971 3972 UDP_DBGSTAT(us, udp_out_ipv6); 3973 3974 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3975 sin6->sin6_addr = ipv6_loopback; 3976 ipversion = IPV6_VERSION; 3977 } else { 3978 if (connp->conn_ipv6_v6only) { 3979 error = EADDRNOTAVAIL; 3980 goto ud_error2; 3981 } 3982 3983 /* 3984 * If the local address is not zero or a mapped address 3985 * return an error. It would be possible to send an 3986 * IPv4 packet but the response would never make it 3987 * back to the application since it is bound to a 3988 * non-mapped address. 3989 */ 3990 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3991 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3992 error = EADDRNOTAVAIL; 3993 goto ud_error2; 3994 } 3995 UDP_DBGSTAT(us, udp_out_mapped); 3996 3997 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3998 V4_PART_OF_V6(sin6->sin6_addr) = 3999 htonl(INADDR_LOOPBACK); 4000 } 4001 ipversion = IPV4_VERSION; 4002 } 4003 4004 if (tudr->OPT_length != 0) { 4005 /* 4006 * If we are connected then the destination needs to be 4007 * the same as the connected one. 4008 */ 4009 if (udp->udp_state == TS_DATA_XFER && 4010 !conn_same_as_last_v6(connp, sin6)) { 4011 error = EISCONN; 4012 goto ud_error2; 4013 } 4014 UDP_STAT(us, udp_out_opt); 4015 error = udp_output_ancillary(connp, NULL, sin6, 4016 data_mp, mp, NULL, cr, pid); 4017 } else { 4018 ip_xmit_attr_t *ixa; 4019 4020 /* 4021 * We have to allocate an ip_xmit_attr_t before we grab 4022 * conn_lock and we need to hold conn_lock once we've 4023 * checked conn_same_as_last_v6 to handle concurrent 4024 * send* calls on a socket. 4025 */ 4026 ixa = conn_get_ixa(connp, B_FALSE); 4027 if (ixa == NULL) { 4028 error = ENOMEM; 4029 goto ud_error2; 4030 } 4031 mutex_enter(&connp->conn_lock); 4032 4033 if (conn_same_as_last_v6(connp, sin6) && 4034 connp->conn_lastsrcid == srcid && 4035 ipsec_outbound_policy_current(ixa)) { 4036 UDP_DBGSTAT(us, udp_out_lastdst); 4037 /* udp_output_lastdst drops conn_lock */ 4038 error = udp_output_lastdst(connp, data_mp, cr, 4039 pid, ixa); 4040 } else { 4041 UDP_DBGSTAT(us, udp_out_diffdst); 4042 /* udp_output_newdst drops conn_lock */ 4043 error = udp_output_newdst(connp, data_mp, NULL, 4044 sin6, ipversion, cr, pid, ixa); 4045 } 4046 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4047 } 4048 if (error == 0) { 4049 freeb(mp); 4050 return; 4051 } 4052 break; 4053 4054 case AF_INET: 4055 sin = (sin_t *)addr; 4056 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 4057 (sin->sin_family != AF_INET)) { 4058 error = EADDRNOTAVAIL; 4059 goto ud_error2; 4060 } 4061 UDP_DBGSTAT(us, udp_out_ipv4); 4062 if (sin->sin_addr.s_addr == INADDR_ANY) 4063 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 4064 ipversion = IPV4_VERSION; 4065 4066 srcid = 0; 4067 if (tudr->OPT_length != 0) { 4068 /* 4069 * If we are connected then the destination needs to be 4070 * the same as the connected one. 4071 */ 4072 if (udp->udp_state == TS_DATA_XFER && 4073 !conn_same_as_last_v4(connp, sin)) { 4074 error = EISCONN; 4075 goto ud_error2; 4076 } 4077 UDP_STAT(us, udp_out_opt); 4078 error = udp_output_ancillary(connp, sin, NULL, 4079 data_mp, mp, NULL, cr, pid); 4080 } else { 4081 ip_xmit_attr_t *ixa; 4082 4083 /* 4084 * We have to allocate an ip_xmit_attr_t before we grab 4085 * conn_lock and we need to hold conn_lock once we've 4086 * checked conn_same_as_last_v4 to handle concurrent 4087 * send* calls on a socket. 4088 */ 4089 ixa = conn_get_ixa(connp, B_FALSE); 4090 if (ixa == NULL) { 4091 error = ENOMEM; 4092 goto ud_error2; 4093 } 4094 mutex_enter(&connp->conn_lock); 4095 4096 if (conn_same_as_last_v4(connp, sin) && 4097 ipsec_outbound_policy_current(ixa)) { 4098 UDP_DBGSTAT(us, udp_out_lastdst); 4099 /* udp_output_lastdst drops conn_lock */ 4100 error = udp_output_lastdst(connp, data_mp, cr, 4101 pid, ixa); 4102 } else { 4103 UDP_DBGSTAT(us, udp_out_diffdst); 4104 /* udp_output_newdst drops conn_lock */ 4105 error = udp_output_newdst(connp, data_mp, sin, 4106 NULL, ipversion, cr, pid, ixa); 4107 } 4108 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4109 } 4110 if (error == 0) { 4111 freeb(mp); 4112 return; 4113 } 4114 break; 4115 } 4116 UDP_STAT(us, udp_out_err_output); 4117 ASSERT(mp != NULL); 4118 /* mp is freed by the following routine */ 4119 udp_ud_err(q, mp, (t_scalar_t)error); 4120 return; 4121 4122 ud_error2: 4123 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4124 freemsg(data_mp); 4125 UDP_STAT(us, udp_out_err_output); 4126 ASSERT(mp != NULL); 4127 /* mp is freed by the following routine */ 4128 udp_ud_err(q, mp, (t_scalar_t)error); 4129 } 4130 4131 /* 4132 * Handle the case of the IP address, port, flow label being different 4133 * for both IPv4 and IPv6. 4134 * 4135 * NOTE: The caller must hold conn_lock and we drop it here. 4136 */ 4137 static int 4138 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 4139 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 4140 { 4141 uint_t srcid; 4142 uint32_t flowinfo; 4143 udp_t *udp = connp->conn_udp; 4144 int error = 0; 4145 ip_xmit_attr_t *oldixa; 4146 udp_stack_t *us = udp->udp_us; 4147 in6_addr_t v6src; 4148 in6_addr_t v6dst; 4149 in6_addr_t v6nexthop; 4150 in_port_t dstport; 4151 4152 ASSERT(MUTEX_HELD(&connp->conn_lock)); 4153 ASSERT(ixa != NULL); 4154 /* 4155 * We hold conn_lock across all the use and modifications of 4156 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 4157 * stay consistent. 4158 */ 4159 4160 ASSERT(cr != NULL); 4161 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4162 ixa->ixa_cred = cr; 4163 ixa->ixa_cpid = pid; 4164 if (is_system_labeled()) { 4165 /* We need to restart with a label based on the cred */ 4166 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 4167 } 4168 4169 /* 4170 * If we are connected then the destination needs to be the 4171 * same as the connected one, which is not the case here since we 4172 * checked for that above. 4173 */ 4174 if (udp->udp_state == TS_DATA_XFER) { 4175 mutex_exit(&connp->conn_lock); 4176 error = EISCONN; 4177 goto ud_error; 4178 } 4179 4180 /* In case previous destination was multicast or multirt */ 4181 ip_attr_newdst(ixa); 4182 4183 /* 4184 * If laddr is unspecified then we look at sin6_src_id. 4185 * We will give precedence to a source address set with IPV6_PKTINFO 4186 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4187 * want ip_attr_connect to select a source (since it can fail) when 4188 * IPV6_PKTINFO is specified. 4189 * If this doesn't result in a source address then we get a source 4190 * from ip_attr_connect() below. 4191 */ 4192 v6src = connp->conn_saddr_v6; 4193 if (sin != NULL) { 4194 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4195 dstport = sin->sin_port; 4196 flowinfo = 0; 4197 srcid = 0; 4198 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4199 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4200 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4201 connp->conn_netstack); 4202 } 4203 ixa->ixa_flags |= IXAF_IS_IPV4; 4204 } else { 4205 v6dst = sin6->sin6_addr; 4206 dstport = sin6->sin6_port; 4207 flowinfo = sin6->sin6_flowinfo; 4208 srcid = sin6->__sin6_src_id; 4209 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4210 ixa->ixa_scopeid = sin6->sin6_scope_id; 4211 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4212 } else { 4213 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4214 } 4215 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4216 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4217 connp->conn_netstack); 4218 } 4219 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4220 ixa->ixa_flags |= IXAF_IS_IPV4; 4221 else 4222 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4223 } 4224 /* Handle IPV6_PKTINFO setting source address. */ 4225 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 4226 (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { 4227 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4228 4229 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4230 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4231 v6src = ipp->ipp_addr; 4232 } else { 4233 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4234 v6src = ipp->ipp_addr; 4235 } 4236 } 4237 4238 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4239 mutex_exit(&connp->conn_lock); 4240 4241 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4242 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4243 switch (error) { 4244 case 0: 4245 break; 4246 case EADDRNOTAVAIL: 4247 /* 4248 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4249 * Don't have the application see that errno 4250 */ 4251 error = ENETUNREACH; 4252 goto failed; 4253 case ENETDOWN: 4254 /* 4255 * Have !ipif_addr_ready address; drop packet silently 4256 * until we can get applications to not send until we 4257 * are ready. 4258 */ 4259 error = 0; 4260 goto failed; 4261 case EHOSTUNREACH: 4262 case ENETUNREACH: 4263 if (ixa->ixa_ire != NULL) { 4264 /* 4265 * Let conn_ip_output/ire_send_noroute return 4266 * the error and send any local ICMP error. 4267 */ 4268 error = 0; 4269 break; 4270 } 4271 /* FALLTHRU */ 4272 failed: 4273 default: 4274 goto ud_error; 4275 } 4276 4277 4278 /* 4279 * Cluster note: we let the cluster hook know that we are sending to a 4280 * new address and/or port. 4281 */ 4282 if (cl_inet_connect2 != NULL) { 4283 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4284 if (error != 0) { 4285 error = EHOSTUNREACH; 4286 goto ud_error; 4287 } 4288 } 4289 4290 mutex_enter(&connp->conn_lock); 4291 /* 4292 * While we dropped the lock some other thread might have connected 4293 * this socket. If so we bail out with EISCONN to ensure that the 4294 * connecting thread is the one that updates conn_ixa, conn_ht_* 4295 * and conn_*last*. 4296 */ 4297 if (udp->udp_state == TS_DATA_XFER) { 4298 mutex_exit(&connp->conn_lock); 4299 error = EISCONN; 4300 goto ud_error; 4301 } 4302 4303 /* 4304 * We need to rebuild the headers if 4305 * - we are labeling packets (could be different for different 4306 * destinations) 4307 * - we have a source route (or routing header) since we need to 4308 * massage that to get the pseudo-header checksum 4309 * - the IP version is different than the last time 4310 * - a socket option with COA_HEADER_CHANGED has been set which 4311 * set conn_v6lastdst to zero. 4312 * 4313 * Otherwise the prepend function will just update the src, dst, 4314 * dstport, and flow label. 4315 */ 4316 if (is_system_labeled()) { 4317 /* TX MLP requires SCM_UCRED and don't have that here */ 4318 if (connp->conn_mlp_type != mlptSingle) { 4319 mutex_exit(&connp->conn_lock); 4320 error = ECONNREFUSED; 4321 goto ud_error; 4322 } 4323 /* 4324 * Check whether Trusted Solaris policy allows communication 4325 * with this host, and pretend that the destination is 4326 * unreachable if not. 4327 * Compute any needed label and place it in ipp_label_v4/v6. 4328 * 4329 * Later conn_build_hdr_template/conn_prepend_hdr takes 4330 * ipp_label_v4/v6 to form the packet. 4331 * 4332 * Tsol note: Since we hold conn_lock we know no other 4333 * thread manipulates conn_xmit_ipp. 4334 */ 4335 error = conn_update_label(connp, ixa, &v6dst, 4336 &connp->conn_xmit_ipp); 4337 if (error != 0) { 4338 mutex_exit(&connp->conn_lock); 4339 goto ud_error; 4340 } 4341 /* Rebuild the header template */ 4342 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4343 flowinfo); 4344 if (error != 0) { 4345 mutex_exit(&connp->conn_lock); 4346 goto ud_error; 4347 } 4348 } else if ((connp->conn_xmit_ipp.ipp_fields & 4349 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4350 ipversion != connp->conn_lastipversion || 4351 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4352 /* Rebuild the header template */ 4353 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4354 flowinfo); 4355 if (error != 0) { 4356 mutex_exit(&connp->conn_lock); 4357 goto ud_error; 4358 } 4359 } else { 4360 /* Simply update the destination address if no source route */ 4361 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4362 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4363 4364 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4365 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4366 ipha->ipha_fragment_offset_and_flags |= 4367 IPH_DF_HTONS; 4368 } else { 4369 ipha->ipha_fragment_offset_and_flags &= 4370 ~IPH_DF_HTONS; 4371 } 4372 } else { 4373 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4374 ip6h->ip6_dst = v6dst; 4375 } 4376 } 4377 4378 /* 4379 * Remember the dst/dstport etc which corresponds to the built header 4380 * template and conn_ixa. 4381 */ 4382 oldixa = conn_replace_ixa(connp, ixa); 4383 connp->conn_v6lastdst = v6dst; 4384 connp->conn_lastipversion = ipversion; 4385 connp->conn_lastdstport = dstport; 4386 connp->conn_lastflowinfo = flowinfo; 4387 connp->conn_lastscopeid = ixa->ixa_scopeid; 4388 connp->conn_lastsrcid = srcid; 4389 /* Also remember a source to use together with lastdst */ 4390 connp->conn_v6lastsrc = v6src; 4391 4392 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4393 dstport, flowinfo, &error); 4394 4395 /* Done with conn_t */ 4396 mutex_exit(&connp->conn_lock); 4397 ixa_refrele(oldixa); 4398 4399 if (data_mp == NULL) { 4400 ASSERT(error != 0); 4401 goto ud_error; 4402 } 4403 4404 /* We're done. Pass the packet to ip. */ 4405 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4406 4407 error = conn_ip_output(data_mp, ixa); 4408 /* No udpOutErrors if an error since IP increases its error counter */ 4409 switch (error) { 4410 case 0: 4411 break; 4412 case EWOULDBLOCK: 4413 (void) ixa_check_drain_insert(connp, ixa); 4414 error = 0; 4415 break; 4416 case EADDRNOTAVAIL: 4417 /* 4418 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4419 * Don't have the application see that errno 4420 */ 4421 error = ENETUNREACH; 4422 /* FALLTHRU */ 4423 default: 4424 mutex_enter(&connp->conn_lock); 4425 /* 4426 * Clear the source and v6lastdst so we call ip_attr_connect 4427 * for the next packet and try to pick a better source. 4428 */ 4429 if (connp->conn_mcbc_bind) 4430 connp->conn_saddr_v6 = ipv6_all_zeros; 4431 else 4432 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4433 connp->conn_v6lastdst = ipv6_all_zeros; 4434 mutex_exit(&connp->conn_lock); 4435 break; 4436 } 4437 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4438 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4439 ixa->ixa_cpid = connp->conn_cpid; 4440 ixa_refrele(ixa); 4441 return (error); 4442 4443 ud_error: 4444 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 4445 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4446 ixa->ixa_cpid = connp->conn_cpid; 4447 ixa_refrele(ixa); 4448 4449 freemsg(data_mp); 4450 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4451 UDP_STAT(us, udp_out_err_output); 4452 return (error); 4453 } 4454 4455 /* ARGSUSED */ 4456 static void 4457 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4458 { 4459 #ifdef DEBUG 4460 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4461 #endif 4462 freemsg(mp); 4463 } 4464 4465 4466 /* 4467 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4468 */ 4469 static void 4470 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4471 { 4472 void *data; 4473 mblk_t *datamp = mp->b_cont; 4474 conn_t *connp = Q_TO_CONN(q); 4475 udp_t *udp = connp->conn_udp; 4476 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4477 4478 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4479 cmdp->cb_error = EPROTO; 4480 qreply(q, mp); 4481 return; 4482 } 4483 data = datamp->b_rptr; 4484 4485 mutex_enter(&connp->conn_lock); 4486 switch (cmdp->cb_cmd) { 4487 case TI_GETPEERNAME: 4488 if (udp->udp_state != TS_DATA_XFER) 4489 cmdp->cb_error = ENOTCONN; 4490 else 4491 cmdp->cb_error = conn_getpeername(connp, data, 4492 &cmdp->cb_len); 4493 break; 4494 case TI_GETMYNAME: 4495 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4496 break; 4497 default: 4498 cmdp->cb_error = EINVAL; 4499 break; 4500 } 4501 mutex_exit(&connp->conn_lock); 4502 4503 qreply(q, mp); 4504 } 4505 4506 static void 4507 udp_use_pure_tpi(udp_t *udp) 4508 { 4509 conn_t *connp = udp->udp_connp; 4510 4511 mutex_enter(&connp->conn_lock); 4512 udp->udp_issocket = B_FALSE; 4513 mutex_exit(&connp->conn_lock); 4514 UDP_STAT(udp->udp_us, udp_sock_fallback); 4515 } 4516 4517 static void 4518 udp_wput_other(queue_t *q, mblk_t *mp) 4519 { 4520 uchar_t *rptr = mp->b_rptr; 4521 struct iocblk *iocp; 4522 conn_t *connp = Q_TO_CONN(q); 4523 udp_t *udp = connp->conn_udp; 4524 udp_stack_t *us = udp->udp_us; 4525 cred_t *cr; 4526 4527 switch (mp->b_datap->db_type) { 4528 case M_CMD: 4529 udp_wput_cmdblk(q, mp); 4530 return; 4531 4532 case M_PROTO: 4533 case M_PCPROTO: 4534 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4535 /* 4536 * If the message does not contain a PRIM_type, 4537 * throw it away. 4538 */ 4539 freemsg(mp); 4540 return; 4541 } 4542 switch (((t_primp_t)rptr)->type) { 4543 case T_ADDR_REQ: 4544 udp_addr_req(q, mp); 4545 return; 4546 case O_T_BIND_REQ: 4547 case T_BIND_REQ: 4548 udp_tpi_bind(q, mp); 4549 return; 4550 case T_CONN_REQ: 4551 udp_tpi_connect(q, mp); 4552 return; 4553 case T_CAPABILITY_REQ: 4554 udp_capability_req(q, mp); 4555 return; 4556 case T_INFO_REQ: 4557 udp_info_req(q, mp); 4558 return; 4559 case T_UNITDATA_REQ: 4560 /* 4561 * If a T_UNITDATA_REQ gets here, the address must 4562 * be bad. Valid T_UNITDATA_REQs are handled 4563 * in udp_wput. 4564 */ 4565 udp_ud_err(q, mp, EADDRNOTAVAIL); 4566 return; 4567 case T_UNBIND_REQ: 4568 udp_tpi_unbind(q, mp); 4569 return; 4570 case T_SVR4_OPTMGMT_REQ: 4571 /* 4572 * All Solaris components should pass a db_credp 4573 * for this TPI message, hence we ASSERT. 4574 * But in case there is some other M_PROTO that looks 4575 * like a TPI message sent by some other kernel 4576 * component, we check and return an error. 4577 */ 4578 cr = msg_getcred(mp, NULL); 4579 ASSERT(cr != NULL); 4580 if (cr == NULL) { 4581 udp_err_ack(q, mp, TSYSERR, EINVAL); 4582 return; 4583 } 4584 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4585 cr)) { 4586 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4587 } 4588 return; 4589 4590 case T_OPTMGMT_REQ: 4591 /* 4592 * All Solaris components should pass a db_credp 4593 * for this TPI message, hence we ASSERT. 4594 * But in case there is some other M_PROTO that looks 4595 * like a TPI message sent by some other kernel 4596 * component, we check and return an error. 4597 */ 4598 cr = msg_getcred(mp, NULL); 4599 ASSERT(cr != NULL); 4600 if (cr == NULL) { 4601 udp_err_ack(q, mp, TSYSERR, EINVAL); 4602 return; 4603 } 4604 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4605 return; 4606 4607 case T_DISCON_REQ: 4608 udp_tpi_disconnect(q, mp); 4609 return; 4610 4611 /* The following TPI message is not supported by udp. */ 4612 case O_T_CONN_RES: 4613 case T_CONN_RES: 4614 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4615 return; 4616 4617 /* The following 3 TPI requests are illegal for udp. */ 4618 case T_DATA_REQ: 4619 case T_EXDATA_REQ: 4620 case T_ORDREL_REQ: 4621 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4622 return; 4623 default: 4624 break; 4625 } 4626 break; 4627 case M_FLUSH: 4628 if (*rptr & FLUSHW) 4629 flushq(q, FLUSHDATA); 4630 break; 4631 case M_IOCTL: 4632 iocp = (struct iocblk *)mp->b_rptr; 4633 switch (iocp->ioc_cmd) { 4634 case TI_GETPEERNAME: 4635 if (udp->udp_state != TS_DATA_XFER) { 4636 /* 4637 * If a default destination address has not 4638 * been associated with the stream, then we 4639 * don't know the peer's name. 4640 */ 4641 iocp->ioc_error = ENOTCONN; 4642 iocp->ioc_count = 0; 4643 mp->b_datap->db_type = M_IOCACK; 4644 qreply(q, mp); 4645 return; 4646 } 4647 /* FALLTHRU */ 4648 case TI_GETMYNAME: 4649 /* 4650 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4651 * need to copyin the user's strbuf structure. 4652 * Processing will continue in the M_IOCDATA case 4653 * below. 4654 */ 4655 mi_copyin(q, mp, NULL, 4656 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4657 return; 4658 case ND_SET: 4659 /* nd_getset performs the necessary checking */ 4660 case ND_GET: 4661 if (nd_getset(q, us->us_nd, mp)) { 4662 qreply(q, mp); 4663 return; 4664 } 4665 break; 4666 case _SIOCSOCKFALLBACK: 4667 /* 4668 * Either sockmod is about to be popped and the 4669 * socket would now be treated as a plain stream, 4670 * or a module is about to be pushed so we have 4671 * to follow pure TPI semantics. 4672 */ 4673 if (!udp->udp_issocket) { 4674 DB_TYPE(mp) = M_IOCNAK; 4675 iocp->ioc_error = EINVAL; 4676 } else { 4677 udp_use_pure_tpi(udp); 4678 4679 DB_TYPE(mp) = M_IOCACK; 4680 iocp->ioc_error = 0; 4681 } 4682 iocp->ioc_count = 0; 4683 iocp->ioc_rval = 0; 4684 qreply(q, mp); 4685 return; 4686 default: 4687 break; 4688 } 4689 break; 4690 case M_IOCDATA: 4691 udp_wput_iocdata(q, mp); 4692 return; 4693 default: 4694 /* Unrecognized messages are passed through without change. */ 4695 break; 4696 } 4697 ip_wput_nondata(q, mp); 4698 } 4699 4700 /* 4701 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4702 * messages. 4703 */ 4704 static void 4705 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4706 { 4707 mblk_t *mp1; 4708 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4709 STRUCT_HANDLE(strbuf, sb); 4710 uint_t addrlen; 4711 conn_t *connp = Q_TO_CONN(q); 4712 udp_t *udp = connp->conn_udp; 4713 4714 /* Make sure it is one of ours. */ 4715 switch (iocp->ioc_cmd) { 4716 case TI_GETMYNAME: 4717 case TI_GETPEERNAME: 4718 break; 4719 default: 4720 ip_wput_nondata(q, mp); 4721 return; 4722 } 4723 4724 switch (mi_copy_state(q, mp, &mp1)) { 4725 case -1: 4726 return; 4727 case MI_COPY_CASE(MI_COPY_IN, 1): 4728 break; 4729 case MI_COPY_CASE(MI_COPY_OUT, 1): 4730 /* 4731 * The address has been copied out, so now 4732 * copyout the strbuf. 4733 */ 4734 mi_copyout(q, mp); 4735 return; 4736 case MI_COPY_CASE(MI_COPY_OUT, 2): 4737 /* 4738 * The address and strbuf have been copied out. 4739 * We're done, so just acknowledge the original 4740 * M_IOCTL. 4741 */ 4742 mi_copy_done(q, mp, 0); 4743 return; 4744 default: 4745 /* 4746 * Something strange has happened, so acknowledge 4747 * the original M_IOCTL with an EPROTO error. 4748 */ 4749 mi_copy_done(q, mp, EPROTO); 4750 return; 4751 } 4752 4753 /* 4754 * Now we have the strbuf structure for TI_GETMYNAME 4755 * and TI_GETPEERNAME. Next we copyout the requested 4756 * address and then we'll copyout the strbuf. 4757 */ 4758 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4759 4760 if (connp->conn_family == AF_INET) 4761 addrlen = sizeof (sin_t); 4762 else 4763 addrlen = sizeof (sin6_t); 4764 4765 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4766 mi_copy_done(q, mp, EINVAL); 4767 return; 4768 } 4769 4770 switch (iocp->ioc_cmd) { 4771 case TI_GETMYNAME: 4772 break; 4773 case TI_GETPEERNAME: 4774 if (udp->udp_state != TS_DATA_XFER) { 4775 mi_copy_done(q, mp, ENOTCONN); 4776 return; 4777 } 4778 break; 4779 } 4780 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4781 if (!mp1) 4782 return; 4783 4784 STRUCT_FSET(sb, len, addrlen); 4785 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4786 case TI_GETMYNAME: 4787 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4788 &addrlen); 4789 break; 4790 case TI_GETPEERNAME: 4791 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4792 &addrlen); 4793 break; 4794 } 4795 mp1->b_wptr += addrlen; 4796 /* Copy out the address */ 4797 mi_copyout(q, mp); 4798 } 4799 4800 void 4801 udp_ddi_g_init(void) 4802 { 4803 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4804 udp_opt_obj.odb_opt_arr_cnt); 4805 4806 /* 4807 * We want to be informed each time a stack is created or 4808 * destroyed in the kernel, so we can maintain the 4809 * set of udp_stack_t's. 4810 */ 4811 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4812 } 4813 4814 void 4815 udp_ddi_g_destroy(void) 4816 { 4817 netstack_unregister(NS_UDP); 4818 } 4819 4820 #define INET_NAME "ip" 4821 4822 /* 4823 * Initialize the UDP stack instance. 4824 */ 4825 static void * 4826 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4827 { 4828 udp_stack_t *us; 4829 udpparam_t *pa; 4830 int i; 4831 int error = 0; 4832 major_t major; 4833 4834 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4835 us->us_netstack = ns; 4836 4837 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4838 us->us_epriv_ports[0] = 2049; 4839 us->us_epriv_ports[1] = 4045; 4840 4841 /* 4842 * The smallest anonymous port in the priviledged port range which UDP 4843 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4844 */ 4845 us->us_min_anonpriv_port = 512; 4846 4847 us->us_bind_fanout_size = udp_bind_fanout_size; 4848 4849 /* Roundup variable that might have been modified in /etc/system */ 4850 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4851 /* Not a power of two. Round up to nearest power of two */ 4852 for (i = 0; i < 31; i++) { 4853 if (us->us_bind_fanout_size < (1 << i)) 4854 break; 4855 } 4856 us->us_bind_fanout_size = 1 << i; 4857 } 4858 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4859 sizeof (udp_fanout_t), KM_SLEEP); 4860 for (i = 0; i < us->us_bind_fanout_size; i++) { 4861 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4862 NULL); 4863 } 4864 4865 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 4866 4867 us->us_param_arr = pa; 4868 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 4869 4870 (void) udp_param_register(&us->us_nd, 4871 us->us_param_arr, A_CNT(udp_param_arr)); 4872 4873 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4874 us->us_mibkp = udp_kstat_init(stackid); 4875 4876 major = mod_name_to_major(INET_NAME); 4877 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4878 ASSERT(error == 0); 4879 return (us); 4880 } 4881 4882 /* 4883 * Free the UDP stack instance. 4884 */ 4885 static void 4886 udp_stack_fini(netstackid_t stackid, void *arg) 4887 { 4888 udp_stack_t *us = (udp_stack_t *)arg; 4889 int i; 4890 4891 for (i = 0; i < us->us_bind_fanout_size; i++) { 4892 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4893 } 4894 4895 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4896 sizeof (udp_fanout_t)); 4897 4898 us->us_bind_fanout = NULL; 4899 4900 nd_free(&us->us_nd); 4901 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 4902 us->us_param_arr = NULL; 4903 4904 udp_kstat_fini(stackid, us->us_mibkp); 4905 us->us_mibkp = NULL; 4906 4907 udp_kstat2_fini(stackid, us->us_kstat); 4908 us->us_kstat = NULL; 4909 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4910 4911 ldi_ident_release(us->us_ldi_ident); 4912 kmem_free(us, sizeof (*us)); 4913 } 4914 4915 static void * 4916 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4917 { 4918 kstat_t *ksp; 4919 4920 udp_stat_t template = { 4921 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4922 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4923 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4924 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4925 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4926 #ifdef DEBUG 4927 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4928 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4929 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4930 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4931 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4932 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4933 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4934 #endif 4935 }; 4936 4937 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4938 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4939 KSTAT_FLAG_VIRTUAL, stackid); 4940 4941 if (ksp == NULL) 4942 return (NULL); 4943 4944 bcopy(&template, us_statisticsp, sizeof (template)); 4945 ksp->ks_data = (void *)us_statisticsp; 4946 ksp->ks_private = (void *)(uintptr_t)stackid; 4947 4948 kstat_install(ksp); 4949 return (ksp); 4950 } 4951 4952 static void 4953 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4954 { 4955 if (ksp != NULL) { 4956 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4957 kstat_delete_netstack(ksp, stackid); 4958 } 4959 } 4960 4961 static void * 4962 udp_kstat_init(netstackid_t stackid) 4963 { 4964 kstat_t *ksp; 4965 4966 udp_named_kstat_t template = { 4967 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4968 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4969 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4970 { "entrySize", KSTAT_DATA_INT32, 0 }, 4971 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4972 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4973 }; 4974 4975 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4976 KSTAT_TYPE_NAMED, 4977 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4978 4979 if (ksp == NULL || ksp->ks_data == NULL) 4980 return (NULL); 4981 4982 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4983 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4984 4985 bcopy(&template, ksp->ks_data, sizeof (template)); 4986 ksp->ks_update = udp_kstat_update; 4987 ksp->ks_private = (void *)(uintptr_t)stackid; 4988 4989 kstat_install(ksp); 4990 return (ksp); 4991 } 4992 4993 static void 4994 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4995 { 4996 if (ksp != NULL) { 4997 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4998 kstat_delete_netstack(ksp, stackid); 4999 } 5000 } 5001 5002 static int 5003 udp_kstat_update(kstat_t *kp, int rw) 5004 { 5005 udp_named_kstat_t *udpkp; 5006 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 5007 netstack_t *ns; 5008 udp_stack_t *us; 5009 5010 if ((kp == NULL) || (kp->ks_data == NULL)) 5011 return (EIO); 5012 5013 if (rw == KSTAT_WRITE) 5014 return (EACCES); 5015 5016 ns = netstack_find_by_stackid(stackid); 5017 if (ns == NULL) 5018 return (-1); 5019 us = ns->netstack_udp; 5020 if (us == NULL) { 5021 netstack_rele(ns); 5022 return (-1); 5023 } 5024 udpkp = (udp_named_kstat_t *)kp->ks_data; 5025 5026 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 5027 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 5028 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 5029 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 5030 netstack_rele(ns); 5031 return (0); 5032 } 5033 5034 static size_t 5035 udp_set_rcv_hiwat(udp_t *udp, size_t size) 5036 { 5037 udp_stack_t *us = udp->udp_us; 5038 5039 /* We add a bit of extra buffering */ 5040 size += size >> 1; 5041 if (size > us->us_max_buf) 5042 size = us->us_max_buf; 5043 5044 udp->udp_rcv_hiwat = size; 5045 return (size); 5046 } 5047 5048 /* 5049 * For the lower queue so that UDP can be a dummy mux. 5050 * Nobody should be sending 5051 * packets up this stream 5052 */ 5053 static void 5054 udp_lrput(queue_t *q, mblk_t *mp) 5055 { 5056 switch (mp->b_datap->db_type) { 5057 case M_FLUSH: 5058 /* Turn around */ 5059 if (*mp->b_rptr & FLUSHW) { 5060 *mp->b_rptr &= ~FLUSHR; 5061 qreply(q, mp); 5062 return; 5063 } 5064 break; 5065 } 5066 freemsg(mp); 5067 } 5068 5069 /* 5070 * For the lower queue so that UDP can be a dummy mux. 5071 * Nobody should be sending packets down this stream. 5072 */ 5073 /* ARGSUSED */ 5074 void 5075 udp_lwput(queue_t *q, mblk_t *mp) 5076 { 5077 freemsg(mp); 5078 } 5079 5080 /* 5081 * Below routines for UDP socket module. 5082 */ 5083 5084 static conn_t * 5085 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 5086 { 5087 udp_t *udp; 5088 conn_t *connp; 5089 zoneid_t zoneid; 5090 netstack_t *ns; 5091 udp_stack_t *us; 5092 int len; 5093 5094 ASSERT(errorp != NULL); 5095 5096 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 5097 return (NULL); 5098 5099 ns = netstack_find_by_cred(credp); 5100 ASSERT(ns != NULL); 5101 us = ns->netstack_udp; 5102 ASSERT(us != NULL); 5103 5104 /* 5105 * For exclusive stacks we set the zoneid to zero 5106 * to make UDP operate as if in the global zone. 5107 */ 5108 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 5109 zoneid = GLOBAL_ZONEID; 5110 else 5111 zoneid = crgetzoneid(credp); 5112 5113 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 5114 5115 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 5116 if (connp == NULL) { 5117 netstack_rele(ns); 5118 *errorp = ENOMEM; 5119 return (NULL); 5120 } 5121 udp = connp->conn_udp; 5122 5123 /* 5124 * ipcl_conn_create did a netstack_hold. Undo the hold that was 5125 * done by netstack_find_by_cred() 5126 */ 5127 netstack_rele(ns); 5128 5129 /* 5130 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5131 * need to lock anything. 5132 */ 5133 ASSERT(connp->conn_proto == IPPROTO_UDP); 5134 ASSERT(connp->conn_udp == udp); 5135 ASSERT(udp->udp_connp == connp); 5136 5137 /* Set the initial state of the stream and the privilege status. */ 5138 udp->udp_state = TS_UNBND; 5139 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 5140 if (isv6) { 5141 connp->conn_family = AF_INET6; 5142 connp->conn_ipversion = IPV6_VERSION; 5143 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5144 connp->conn_default_ttl = us->us_ipv6_hoplimit; 5145 len = sizeof (ip6_t) + UDPH_SIZE; 5146 } else { 5147 connp->conn_family = AF_INET; 5148 connp->conn_ipversion = IPV4_VERSION; 5149 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5150 connp->conn_default_ttl = us->us_ipv4_ttl; 5151 len = sizeof (ipha_t) + UDPH_SIZE; 5152 } 5153 5154 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 5155 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 5156 5157 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 5158 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 5159 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 5160 connp->conn_ixa->ixa_zoneid = zoneid; 5161 5162 connp->conn_zoneid = zoneid; 5163 5164 /* 5165 * If the caller has the process-wide flag set, then default to MAC 5166 * exempt mode. This allows read-down to unlabeled hosts. 5167 */ 5168 if (getpflags(NET_MAC_AWARE, credp) != 0) 5169 connp->conn_mac_mode = CONN_MAC_AWARE; 5170 5171 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 5172 5173 udp->udp_us = us; 5174 5175 connp->conn_rcvbuf = us->us_recv_hiwat; 5176 connp->conn_sndbuf = us->us_xmit_hiwat; 5177 connp->conn_sndlowat = us->us_xmit_lowat; 5178 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 5179 5180 connp->conn_wroff = len + us->us_wroff_extra; 5181 connp->conn_so_type = SOCK_DGRAM; 5182 5183 connp->conn_recv = udp_input; 5184 connp->conn_recvicmp = udp_icmp_input; 5185 crhold(credp); 5186 connp->conn_cred = credp; 5187 connp->conn_cpid = curproc->p_pid; 5188 connp->conn_open_time = ddi_get_lbolt64(); 5189 /* Cache things in ixa without an extra refhold */ 5190 ASSERT(!(connp->conn_ixa->ixa_free_flags & IXA_FREE_CRED)); 5191 connp->conn_ixa->ixa_cred = connp->conn_cred; 5192 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5193 if (is_system_labeled()) 5194 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5195 5196 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5197 5198 if (us->us_pmtu_discovery) 5199 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5200 5201 return (connp); 5202 } 5203 5204 sock_lower_handle_t 5205 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5206 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5207 { 5208 udp_t *udp = NULL; 5209 udp_stack_t *us; 5210 conn_t *connp; 5211 boolean_t isv6; 5212 5213 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5214 (proto != 0 && proto != IPPROTO_UDP)) { 5215 *errorp = EPROTONOSUPPORT; 5216 return (NULL); 5217 } 5218 5219 if (family == AF_INET6) 5220 isv6 = B_TRUE; 5221 else 5222 isv6 = B_FALSE; 5223 5224 connp = udp_do_open(credp, isv6, flags, errorp); 5225 if (connp == NULL) 5226 return (NULL); 5227 5228 udp = connp->conn_udp; 5229 ASSERT(udp != NULL); 5230 us = udp->udp_us; 5231 ASSERT(us != NULL); 5232 5233 udp->udp_issocket = B_TRUE; 5234 connp->conn_flags |= IPCL_NONSTR; 5235 5236 /* 5237 * Set flow control 5238 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5239 * need to lock anything. 5240 */ 5241 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5242 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5243 5244 connp->conn_flow_cntrld = B_FALSE; 5245 5246 mutex_enter(&connp->conn_lock); 5247 connp->conn_state_flags &= ~CONN_INCIPIENT; 5248 mutex_exit(&connp->conn_lock); 5249 5250 *errorp = 0; 5251 *smodep = SM_ATOMIC; 5252 *sock_downcalls = &sock_udp_downcalls; 5253 return ((sock_lower_handle_t)connp); 5254 } 5255 5256 /* ARGSUSED3 */ 5257 void 5258 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5259 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5260 { 5261 conn_t *connp = (conn_t *)proto_handle; 5262 struct sock_proto_props sopp; 5263 5264 /* All Solaris components should pass a cred for this operation. */ 5265 ASSERT(cr != NULL); 5266 5267 connp->conn_upcalls = sock_upcalls; 5268 connp->conn_upper_handle = sock_handle; 5269 5270 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5271 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5272 sopp.sopp_wroff = connp->conn_wroff; 5273 sopp.sopp_maxblk = INFPSZ; 5274 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5275 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5276 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5277 sopp.sopp_maxpsz = 5278 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5279 UDP_MAXPACKET_IPV6; 5280 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5281 udp_mod_info.mi_minpsz; 5282 5283 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5284 &sopp); 5285 } 5286 5287 static void 5288 udp_do_close(conn_t *connp) 5289 { 5290 udp_t *udp; 5291 5292 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5293 udp = connp->conn_udp; 5294 5295 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5296 /* 5297 * Running in cluster mode - register unbind information 5298 */ 5299 if (connp->conn_ipversion == IPV4_VERSION) { 5300 (*cl_inet_unbind)( 5301 connp->conn_netstack->netstack_stackid, 5302 IPPROTO_UDP, AF_INET, 5303 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5304 (in_port_t)connp->conn_lport, NULL); 5305 } else { 5306 (*cl_inet_unbind)( 5307 connp->conn_netstack->netstack_stackid, 5308 IPPROTO_UDP, AF_INET6, 5309 (uint8_t *)&(connp->conn_laddr_v6), 5310 (in_port_t)connp->conn_lport, NULL); 5311 } 5312 } 5313 5314 udp_bind_hash_remove(udp, B_FALSE); 5315 5316 ip_quiesce_conn(connp); 5317 5318 if (!IPCL_IS_NONSTR(connp)) { 5319 ASSERT(connp->conn_wq != NULL); 5320 ASSERT(connp->conn_rq != NULL); 5321 qprocsoff(connp->conn_rq); 5322 } 5323 5324 udp_close_free(connp); 5325 5326 /* 5327 * Now we are truly single threaded on this stream, and can 5328 * delete the things hanging off the connp, and finally the connp. 5329 * We removed this connp from the fanout list, it cannot be 5330 * accessed thru the fanouts, and we already waited for the 5331 * conn_ref to drop to 0. We are already in close, so 5332 * there cannot be any other thread from the top. qprocsoff 5333 * has completed, and service has completed or won't run in 5334 * future. 5335 */ 5336 ASSERT(connp->conn_ref == 1); 5337 5338 if (!IPCL_IS_NONSTR(connp)) { 5339 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5340 } else { 5341 ip_free_helper_stream(connp); 5342 } 5343 5344 connp->conn_ref--; 5345 ipcl_conn_destroy(connp); 5346 } 5347 5348 /* ARGSUSED1 */ 5349 int 5350 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5351 { 5352 conn_t *connp = (conn_t *)proto_handle; 5353 5354 /* All Solaris components should pass a cred for this operation. */ 5355 ASSERT(cr != NULL); 5356 5357 udp_do_close(connp); 5358 return (0); 5359 } 5360 5361 static int 5362 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5363 boolean_t bind_to_req_port_only) 5364 { 5365 sin_t *sin; 5366 sin6_t *sin6; 5367 udp_t *udp = connp->conn_udp; 5368 int error = 0; 5369 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5370 in_port_t port; /* Host byte order */ 5371 in_port_t requested_port; /* Host byte order */ 5372 int count; 5373 ipaddr_t v4src; /* Set if AF_INET */ 5374 in6_addr_t v6src; 5375 int loopmax; 5376 udp_fanout_t *udpf; 5377 in_port_t lport; /* Network byte order */ 5378 uint_t scopeid = 0; 5379 zoneid_t zoneid = IPCL_ZONEID(connp); 5380 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5381 boolean_t is_inaddr_any; 5382 mlp_type_t addrtype, mlptype; 5383 udp_stack_t *us = udp->udp_us; 5384 5385 switch (len) { 5386 case sizeof (sin_t): /* Complete IPv4 address */ 5387 sin = (sin_t *)sa; 5388 5389 if (sin == NULL || !OK_32PTR((char *)sin)) 5390 return (EINVAL); 5391 5392 if (connp->conn_family != AF_INET || 5393 sin->sin_family != AF_INET) { 5394 return (EAFNOSUPPORT); 5395 } 5396 v4src = sin->sin_addr.s_addr; 5397 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5398 if (v4src != INADDR_ANY) { 5399 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5400 B_TRUE); 5401 } 5402 port = ntohs(sin->sin_port); 5403 break; 5404 5405 case sizeof (sin6_t): /* complete IPv6 address */ 5406 sin6 = (sin6_t *)sa; 5407 5408 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5409 return (EINVAL); 5410 5411 if (connp->conn_family != AF_INET6 || 5412 sin6->sin6_family != AF_INET6) { 5413 return (EAFNOSUPPORT); 5414 } 5415 v6src = sin6->sin6_addr; 5416 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5417 if (connp->conn_ipv6_v6only) 5418 return (EADDRNOTAVAIL); 5419 5420 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5421 if (v4src != INADDR_ANY) { 5422 laddr_type = ip_laddr_verify_v4(v4src, 5423 zoneid, ipst, B_FALSE); 5424 } 5425 } else { 5426 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5427 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5428 scopeid = sin6->sin6_scope_id; 5429 laddr_type = ip_laddr_verify_v6(&v6src, 5430 zoneid, ipst, B_TRUE, scopeid); 5431 } 5432 } 5433 port = ntohs(sin6->sin6_port); 5434 break; 5435 5436 default: /* Invalid request */ 5437 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5438 "udp_bind: bad ADDR_length length %u", len); 5439 return (-TBADADDR); 5440 } 5441 5442 /* Is the local address a valid unicast, multicast, or broadcast? */ 5443 if (laddr_type == IPVL_BAD) 5444 return (EADDRNOTAVAIL); 5445 5446 requested_port = port; 5447 5448 if (requested_port == 0 || !bind_to_req_port_only) 5449 bind_to_req_port_only = B_FALSE; 5450 else /* T_BIND_REQ and requested_port != 0 */ 5451 bind_to_req_port_only = B_TRUE; 5452 5453 if (requested_port == 0) { 5454 /* 5455 * If the application passed in zero for the port number, it 5456 * doesn't care which port number we bind to. Get one in the 5457 * valid range. 5458 */ 5459 if (connp->conn_anon_priv_bind) { 5460 port = udp_get_next_priv_port(udp); 5461 } else { 5462 port = udp_update_next_port(udp, 5463 us->us_next_port_to_try, B_TRUE); 5464 } 5465 } else { 5466 /* 5467 * If the port is in the well-known privileged range, 5468 * make sure the caller was privileged. 5469 */ 5470 int i; 5471 boolean_t priv = B_FALSE; 5472 5473 if (port < us->us_smallest_nonpriv_port) { 5474 priv = B_TRUE; 5475 } else { 5476 for (i = 0; i < us->us_num_epriv_ports; i++) { 5477 if (port == us->us_epriv_ports[i]) { 5478 priv = B_TRUE; 5479 break; 5480 } 5481 } 5482 } 5483 5484 if (priv) { 5485 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5486 return (-TACCES); 5487 } 5488 } 5489 5490 if (port == 0) 5491 return (-TNOADDR); 5492 5493 /* 5494 * The state must be TS_UNBND. TPI mandates that users must send 5495 * TPI primitives only 1 at a time and wait for the response before 5496 * sending the next primitive. 5497 */ 5498 mutex_enter(&connp->conn_lock); 5499 if (udp->udp_state != TS_UNBND) { 5500 mutex_exit(&connp->conn_lock); 5501 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5502 "udp_bind: bad state, %u", udp->udp_state); 5503 return (-TOUTSTATE); 5504 } 5505 /* 5506 * Copy the source address into our udp structure. This address 5507 * may still be zero; if so, IP will fill in the correct address 5508 * each time an outbound packet is passed to it. Since the udp is 5509 * not yet in the bind hash list, we don't grab the uf_lock to 5510 * change conn_ipversion 5511 */ 5512 if (connp->conn_family == AF_INET) { 5513 ASSERT(sin != NULL); 5514 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5515 } else { 5516 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5517 /* 5518 * no need to hold the uf_lock to set the conn_ipversion 5519 * since we are not yet in the fanout list 5520 */ 5521 connp->conn_ipversion = IPV4_VERSION; 5522 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5523 } else { 5524 connp->conn_ipversion = IPV6_VERSION; 5525 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5526 } 5527 } 5528 5529 /* 5530 * If conn_reuseaddr is not set, then we have to make sure that 5531 * the IP address and port number the application requested 5532 * (or we selected for the application) is not being used by 5533 * another stream. If another stream is already using the 5534 * requested IP address and port, the behavior depends on 5535 * "bind_to_req_port_only". If set the bind fails; otherwise we 5536 * search for any an unused port to bind to the stream. 5537 * 5538 * As per the BSD semantics, as modified by the Deering multicast 5539 * changes, if udp_reuseaddr is set, then we allow multiple binds 5540 * to the same port independent of the local IP address. 5541 * 5542 * This is slightly different than in SunOS 4.X which did not 5543 * support IP multicast. Note that the change implemented by the 5544 * Deering multicast code effects all binds - not only binding 5545 * to IP multicast addresses. 5546 * 5547 * Note that when binding to port zero we ignore SO_REUSEADDR in 5548 * order to guarantee a unique port. 5549 */ 5550 5551 count = 0; 5552 if (connp->conn_anon_priv_bind) { 5553 /* 5554 * loopmax = (IPPORT_RESERVED-1) - 5555 * us->us_min_anonpriv_port + 1 5556 */ 5557 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5558 } else { 5559 loopmax = us->us_largest_anon_port - 5560 us->us_smallest_anon_port + 1; 5561 } 5562 5563 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5564 5565 for (;;) { 5566 udp_t *udp1; 5567 boolean_t found_exclbind = B_FALSE; 5568 conn_t *connp1; 5569 5570 /* 5571 * Walk through the list of udp streams bound to 5572 * requested port with the same IP address. 5573 */ 5574 lport = htons(port); 5575 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5576 us->us_bind_fanout_size)]; 5577 mutex_enter(&udpf->uf_lock); 5578 for (udp1 = udpf->uf_udp; udp1 != NULL; 5579 udp1 = udp1->udp_bind_hash) { 5580 connp1 = udp1->udp_connp; 5581 5582 if (lport != connp1->conn_lport) 5583 continue; 5584 5585 /* 5586 * On a labeled system, we must treat bindings to ports 5587 * on shared IP addresses by sockets with MAC exemption 5588 * privilege as being in all zones, as there's 5589 * otherwise no way to identify the right receiver. 5590 */ 5591 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5592 continue; 5593 5594 /* 5595 * If UDP_EXCLBIND is set for either the bound or 5596 * binding endpoint, the semantics of bind 5597 * is changed according to the following chart. 5598 * 5599 * spec = specified address (v4 or v6) 5600 * unspec = unspecified address (v4 or v6) 5601 * A = specified addresses are different for endpoints 5602 * 5603 * bound bind to allowed? 5604 * ------------------------------------- 5605 * unspec unspec no 5606 * unspec spec no 5607 * spec unspec no 5608 * spec spec yes if A 5609 * 5610 * For labeled systems, SO_MAC_EXEMPT behaves the same 5611 * as UDP_EXCLBIND, except that zoneid is ignored. 5612 */ 5613 if (connp1->conn_exclbind || connp->conn_exclbind || 5614 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5615 if (V6_OR_V4_INADDR_ANY( 5616 connp1->conn_bound_addr_v6) || 5617 is_inaddr_any || 5618 IN6_ARE_ADDR_EQUAL( 5619 &connp1->conn_bound_addr_v6, 5620 &v6src)) { 5621 found_exclbind = B_TRUE; 5622 break; 5623 } 5624 continue; 5625 } 5626 5627 /* 5628 * Check ipversion to allow IPv4 and IPv6 sockets to 5629 * have disjoint port number spaces. 5630 */ 5631 if (connp->conn_ipversion != connp1->conn_ipversion) { 5632 5633 /* 5634 * On the first time through the loop, if the 5635 * the user intentionally specified a 5636 * particular port number, then ignore any 5637 * bindings of the other protocol that may 5638 * conflict. This allows the user to bind IPv6 5639 * alone and get both v4 and v6, or bind both 5640 * both and get each seperately. On subsequent 5641 * times through the loop, we're checking a 5642 * port that we chose (not the user) and thus 5643 * we do not allow casual duplicate bindings. 5644 */ 5645 if (count == 0 && requested_port != 0) 5646 continue; 5647 } 5648 5649 /* 5650 * No difference depending on SO_REUSEADDR. 5651 * 5652 * If existing port is bound to a 5653 * non-wildcard IP address and 5654 * the requesting stream is bound to 5655 * a distinct different IP addresses 5656 * (non-wildcard, also), keep going. 5657 */ 5658 if (!is_inaddr_any && 5659 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5660 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5661 &v6src)) { 5662 continue; 5663 } 5664 break; 5665 } 5666 5667 if (!found_exclbind && 5668 (connp->conn_reuseaddr && requested_port != 0)) { 5669 break; 5670 } 5671 5672 if (udp1 == NULL) { 5673 /* 5674 * No other stream has this IP address 5675 * and port number. We can use it. 5676 */ 5677 break; 5678 } 5679 mutex_exit(&udpf->uf_lock); 5680 if (bind_to_req_port_only) { 5681 /* 5682 * We get here only when requested port 5683 * is bound (and only first of the for() 5684 * loop iteration). 5685 * 5686 * The semantics of this bind request 5687 * require it to fail so we return from 5688 * the routine (and exit the loop). 5689 * 5690 */ 5691 mutex_exit(&connp->conn_lock); 5692 return (-TADDRBUSY); 5693 } 5694 5695 if (connp->conn_anon_priv_bind) { 5696 port = udp_get_next_priv_port(udp); 5697 } else { 5698 if ((count == 0) && (requested_port != 0)) { 5699 /* 5700 * If the application wants us to find 5701 * a port, get one to start with. Set 5702 * requested_port to 0, so that we will 5703 * update us->us_next_port_to_try below. 5704 */ 5705 port = udp_update_next_port(udp, 5706 us->us_next_port_to_try, B_TRUE); 5707 requested_port = 0; 5708 } else { 5709 port = udp_update_next_port(udp, port + 1, 5710 B_FALSE); 5711 } 5712 } 5713 5714 if (port == 0 || ++count >= loopmax) { 5715 /* 5716 * We've tried every possible port number and 5717 * there are none available, so send an error 5718 * to the user. 5719 */ 5720 mutex_exit(&connp->conn_lock); 5721 return (-TNOADDR); 5722 } 5723 } 5724 5725 /* 5726 * Copy the source address into our udp structure. This address 5727 * may still be zero; if so, ip_attr_connect will fill in the correct 5728 * address when a packet is about to be sent. 5729 * If we are binding to a broadcast or multicast address then 5730 * we just set the conn_bound_addr since we don't want to use 5731 * that as the source address when sending. 5732 */ 5733 connp->conn_bound_addr_v6 = v6src; 5734 connp->conn_laddr_v6 = v6src; 5735 if (scopeid != 0) { 5736 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5737 connp->conn_ixa->ixa_scopeid = scopeid; 5738 connp->conn_incoming_ifindex = scopeid; 5739 } else { 5740 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5741 connp->conn_incoming_ifindex = connp->conn_bound_if; 5742 } 5743 5744 switch (laddr_type) { 5745 case IPVL_UNICAST_UP: 5746 case IPVL_UNICAST_DOWN: 5747 connp->conn_saddr_v6 = v6src; 5748 connp->conn_mcbc_bind = B_FALSE; 5749 break; 5750 case IPVL_MCAST: 5751 case IPVL_BCAST: 5752 /* ip_set_destination will pick a source address later */ 5753 connp->conn_saddr_v6 = ipv6_all_zeros; 5754 connp->conn_mcbc_bind = B_TRUE; 5755 break; 5756 } 5757 5758 /* Any errors after this point should use late_error */ 5759 connp->conn_lport = lport; 5760 5761 /* 5762 * Now reset the next anonymous port if the application requested 5763 * an anonymous port, or we handed out the next anonymous port. 5764 */ 5765 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5766 us->us_next_port_to_try = port + 1; 5767 } 5768 5769 /* Initialize the T_BIND_ACK. */ 5770 if (connp->conn_family == AF_INET) { 5771 sin->sin_port = connp->conn_lport; 5772 } else { 5773 sin6->sin6_port = connp->conn_lport; 5774 } 5775 udp->udp_state = TS_IDLE; 5776 udp_bind_hash_insert(udpf, udp); 5777 mutex_exit(&udpf->uf_lock); 5778 mutex_exit(&connp->conn_lock); 5779 5780 if (cl_inet_bind) { 5781 /* 5782 * Running in cluster mode - register bind information 5783 */ 5784 if (connp->conn_ipversion == IPV4_VERSION) { 5785 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5786 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5787 (in_port_t)connp->conn_lport, NULL); 5788 } else { 5789 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5790 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5791 (in_port_t)connp->conn_lport, NULL); 5792 } 5793 } 5794 5795 mutex_enter(&connp->conn_lock); 5796 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5797 if (is_system_labeled() && (!connp->conn_anon_port || 5798 connp->conn_anon_mlp)) { 5799 uint16_t mlpport; 5800 zone_t *zone; 5801 5802 zone = crgetzone(cr); 5803 connp->conn_mlp_type = 5804 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5805 mlptSingle; 5806 addrtype = tsol_mlp_addr_type( 5807 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5808 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5809 if (addrtype == mlptSingle) { 5810 error = -TNOADDR; 5811 mutex_exit(&connp->conn_lock); 5812 goto late_error; 5813 } 5814 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5815 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5816 addrtype); 5817 5818 /* 5819 * It is a coding error to attempt to bind an MLP port 5820 * without first setting SOL_SOCKET/SCM_UCRED. 5821 */ 5822 if (mlptype != mlptSingle && 5823 connp->conn_mlp_type == mlptSingle) { 5824 error = EINVAL; 5825 mutex_exit(&connp->conn_lock); 5826 goto late_error; 5827 } 5828 5829 /* 5830 * It is an access violation to attempt to bind an MLP port 5831 * without NET_BINDMLP privilege. 5832 */ 5833 if (mlptype != mlptSingle && 5834 secpolicy_net_bindmlp(cr) != 0) { 5835 if (connp->conn_debug) { 5836 (void) strlog(UDP_MOD_ID, 0, 1, 5837 SL_ERROR|SL_TRACE, 5838 "udp_bind: no priv for multilevel port %d", 5839 mlpport); 5840 } 5841 error = -TACCES; 5842 mutex_exit(&connp->conn_lock); 5843 goto late_error; 5844 } 5845 5846 /* 5847 * If we're specifically binding a shared IP address and the 5848 * port is MLP on shared addresses, then check to see if this 5849 * zone actually owns the MLP. Reject if not. 5850 */ 5851 if (mlptype == mlptShared && addrtype == mlptShared) { 5852 /* 5853 * No need to handle exclusive-stack zones since 5854 * ALL_ZONES only applies to the shared stack. 5855 */ 5856 zoneid_t mlpzone; 5857 5858 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5859 htons(mlpport)); 5860 if (connp->conn_zoneid != mlpzone) { 5861 if (connp->conn_debug) { 5862 (void) strlog(UDP_MOD_ID, 0, 1, 5863 SL_ERROR|SL_TRACE, 5864 "udp_bind: attempt to bind port " 5865 "%d on shared addr in zone %d " 5866 "(should be %d)", 5867 mlpport, connp->conn_zoneid, 5868 mlpzone); 5869 } 5870 error = -TACCES; 5871 mutex_exit(&connp->conn_lock); 5872 goto late_error; 5873 } 5874 } 5875 if (connp->conn_anon_port) { 5876 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5877 port, B_TRUE); 5878 if (error != 0) { 5879 if (connp->conn_debug) { 5880 (void) strlog(UDP_MOD_ID, 0, 1, 5881 SL_ERROR|SL_TRACE, 5882 "udp_bind: cannot establish anon " 5883 "MLP for port %d", port); 5884 } 5885 error = -TACCES; 5886 mutex_exit(&connp->conn_lock); 5887 goto late_error; 5888 } 5889 } 5890 connp->conn_mlp_type = mlptype; 5891 } 5892 5893 /* 5894 * We create an initial header template here to make a subsequent 5895 * sendto have a starting point. Since conn_last_dst is zero the 5896 * first sendto will always follow the 'dst changed' code path. 5897 * Note that we defer massaging options and the related checksum 5898 * adjustment until we have a destination address. 5899 */ 5900 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5901 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5902 if (error != 0) { 5903 mutex_exit(&connp->conn_lock); 5904 goto late_error; 5905 } 5906 /* Just in case */ 5907 connp->conn_faddr_v6 = ipv6_all_zeros; 5908 connp->conn_fport = 0; 5909 connp->conn_v6lastdst = ipv6_all_zeros; 5910 mutex_exit(&connp->conn_lock); 5911 5912 error = ip_laddr_fanout_insert(connp); 5913 if (error != 0) 5914 goto late_error; 5915 5916 /* Bind succeeded */ 5917 return (0); 5918 5919 late_error: 5920 /* We had already picked the port number, and then the bind failed */ 5921 mutex_enter(&connp->conn_lock); 5922 udpf = &us->us_bind_fanout[ 5923 UDP_BIND_HASH(connp->conn_lport, 5924 us->us_bind_fanout_size)]; 5925 mutex_enter(&udpf->uf_lock); 5926 connp->conn_saddr_v6 = ipv6_all_zeros; 5927 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5928 connp->conn_laddr_v6 = ipv6_all_zeros; 5929 if (scopeid != 0) { 5930 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5931 connp->conn_incoming_ifindex = connp->conn_bound_if; 5932 } 5933 udp->udp_state = TS_UNBND; 5934 udp_bind_hash_remove(udp, B_TRUE); 5935 connp->conn_lport = 0; 5936 mutex_exit(&udpf->uf_lock); 5937 connp->conn_anon_port = B_FALSE; 5938 connp->conn_mlp_type = mlptSingle; 5939 5940 connp->conn_v6lastdst = ipv6_all_zeros; 5941 5942 /* Restore the header that was built above - different source address */ 5943 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5944 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5945 mutex_exit(&connp->conn_lock); 5946 return (error); 5947 } 5948 5949 int 5950 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5951 socklen_t len, cred_t *cr) 5952 { 5953 int error; 5954 conn_t *connp; 5955 5956 /* All Solaris components should pass a cred for this operation. */ 5957 ASSERT(cr != NULL); 5958 5959 connp = (conn_t *)proto_handle; 5960 5961 if (sa == NULL) 5962 error = udp_do_unbind(connp); 5963 else 5964 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5965 5966 if (error < 0) { 5967 if (error == -TOUTSTATE) 5968 error = EINVAL; 5969 else 5970 error = proto_tlitosyserr(-error); 5971 } 5972 5973 return (error); 5974 } 5975 5976 static int 5977 udp_implicit_bind(conn_t *connp, cred_t *cr) 5978 { 5979 sin6_t sin6addr; 5980 sin_t *sin; 5981 sin6_t *sin6; 5982 socklen_t len; 5983 int error; 5984 5985 /* All Solaris components should pass a cred for this operation. */ 5986 ASSERT(cr != NULL); 5987 5988 if (connp->conn_family == AF_INET) { 5989 len = sizeof (struct sockaddr_in); 5990 sin = (sin_t *)&sin6addr; 5991 *sin = sin_null; 5992 sin->sin_family = AF_INET; 5993 sin->sin_addr.s_addr = INADDR_ANY; 5994 } else { 5995 ASSERT(connp->conn_family == AF_INET6); 5996 len = sizeof (sin6_t); 5997 sin6 = (sin6_t *)&sin6addr; 5998 *sin6 = sin6_null; 5999 sin6->sin6_family = AF_INET6; 6000 V6_SET_ZERO(sin6->sin6_addr); 6001 } 6002 6003 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 6004 cr, B_FALSE); 6005 return ((error < 0) ? proto_tlitosyserr(-error) : error); 6006 } 6007 6008 /* 6009 * This routine removes a port number association from a stream. It 6010 * is called by udp_unbind and udp_tpi_unbind. 6011 */ 6012 static int 6013 udp_do_unbind(conn_t *connp) 6014 { 6015 udp_t *udp = connp->conn_udp; 6016 udp_fanout_t *udpf; 6017 udp_stack_t *us = udp->udp_us; 6018 6019 if (cl_inet_unbind != NULL) { 6020 /* 6021 * Running in cluster mode - register unbind information 6022 */ 6023 if (connp->conn_ipversion == IPV4_VERSION) { 6024 (*cl_inet_unbind)( 6025 connp->conn_netstack->netstack_stackid, 6026 IPPROTO_UDP, AF_INET, 6027 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 6028 (in_port_t)connp->conn_lport, NULL); 6029 } else { 6030 (*cl_inet_unbind)( 6031 connp->conn_netstack->netstack_stackid, 6032 IPPROTO_UDP, AF_INET6, 6033 (uint8_t *)&(connp->conn_laddr_v6), 6034 (in_port_t)connp->conn_lport, NULL); 6035 } 6036 } 6037 6038 mutex_enter(&connp->conn_lock); 6039 /* If a bind has not been done, we can't unbind. */ 6040 if (udp->udp_state == TS_UNBND) { 6041 mutex_exit(&connp->conn_lock); 6042 return (-TOUTSTATE); 6043 } 6044 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6045 us->us_bind_fanout_size)]; 6046 mutex_enter(&udpf->uf_lock); 6047 udp_bind_hash_remove(udp, B_TRUE); 6048 connp->conn_saddr_v6 = ipv6_all_zeros; 6049 connp->conn_bound_addr_v6 = ipv6_all_zeros; 6050 connp->conn_laddr_v6 = ipv6_all_zeros; 6051 connp->conn_mcbc_bind = B_FALSE; 6052 connp->conn_lport = 0; 6053 /* In case we were also connected */ 6054 connp->conn_faddr_v6 = ipv6_all_zeros; 6055 connp->conn_fport = 0; 6056 mutex_exit(&udpf->uf_lock); 6057 6058 connp->conn_v6lastdst = ipv6_all_zeros; 6059 udp->udp_state = TS_UNBND; 6060 6061 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6062 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6063 mutex_exit(&connp->conn_lock); 6064 6065 ip_unbind(connp); 6066 6067 return (0); 6068 } 6069 6070 /* 6071 * It associates a default destination address with the stream. 6072 */ 6073 static int 6074 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 6075 cred_t *cr, pid_t pid) 6076 { 6077 sin6_t *sin6; 6078 sin_t *sin; 6079 in6_addr_t v6dst; 6080 ipaddr_t v4dst; 6081 uint16_t dstport; 6082 uint32_t flowinfo; 6083 udp_fanout_t *udpf; 6084 udp_t *udp, *udp1; 6085 ushort_t ipversion; 6086 udp_stack_t *us; 6087 int error; 6088 conn_t *connp1; 6089 ip_xmit_attr_t *ixa; 6090 uint_t scopeid = 0; 6091 uint_t srcid = 0; 6092 in6_addr_t v6src = connp->conn_saddr_v6; 6093 6094 udp = connp->conn_udp; 6095 us = udp->udp_us; 6096 6097 /* 6098 * Address has been verified by the caller 6099 */ 6100 switch (len) { 6101 default: 6102 /* 6103 * Should never happen 6104 */ 6105 return (EINVAL); 6106 6107 case sizeof (sin_t): 6108 sin = (sin_t *)sa; 6109 v4dst = sin->sin_addr.s_addr; 6110 dstport = sin->sin_port; 6111 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6112 ASSERT(connp->conn_ipversion == IPV4_VERSION); 6113 ipversion = IPV4_VERSION; 6114 break; 6115 6116 case sizeof (sin6_t): 6117 sin6 = (sin6_t *)sa; 6118 v6dst = sin6->sin6_addr; 6119 dstport = sin6->sin6_port; 6120 srcid = sin6->__sin6_src_id; 6121 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 6122 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 6123 connp->conn_netstack); 6124 } 6125 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 6126 if (connp->conn_ipv6_v6only) 6127 return (EADDRNOTAVAIL); 6128 6129 /* 6130 * Destination adress is mapped IPv6 address. 6131 * Source bound address should be unspecified or 6132 * IPv6 mapped address as well. 6133 */ 6134 if (!IN6_IS_ADDR_UNSPECIFIED( 6135 &connp->conn_bound_addr_v6) && 6136 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 6137 return (EADDRNOTAVAIL); 6138 } 6139 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 6140 ipversion = IPV4_VERSION; 6141 flowinfo = 0; 6142 } else { 6143 ipversion = IPV6_VERSION; 6144 flowinfo = sin6->sin6_flowinfo; 6145 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 6146 scopeid = sin6->sin6_scope_id; 6147 } 6148 break; 6149 } 6150 6151 if (dstport == 0) 6152 return (-TBADADDR); 6153 6154 /* 6155 * If there is a different thread using conn_ixa then we get a new 6156 * copy and cut the old one loose from conn_ixa. Otherwise we use 6157 * conn_ixa and prevent any other thread from using/changing it. 6158 * Once connect() is done other threads can use conn_ixa since the 6159 * refcnt will be back at one. 6160 */ 6161 ixa = conn_get_ixa(connp, B_TRUE); 6162 if (ixa == NULL) 6163 return (ENOMEM); 6164 6165 ASSERT(ixa->ixa_refcnt >= 2); 6166 ASSERT(ixa == connp->conn_ixa); 6167 6168 mutex_enter(&connp->conn_lock); 6169 /* 6170 * This udp_t must have bound to a port already before doing a connect. 6171 * Reject if a connect is in progress (we drop conn_lock during 6172 * udp_do_connect). 6173 */ 6174 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 6175 mutex_exit(&connp->conn_lock); 6176 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 6177 "udp_connect: bad state, %u", udp->udp_state); 6178 ixa_refrele(ixa); 6179 return (-TOUTSTATE); 6180 } 6181 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6182 6183 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6184 us->us_bind_fanout_size)]; 6185 6186 mutex_enter(&udpf->uf_lock); 6187 if (udp->udp_state == TS_DATA_XFER) { 6188 /* Already connected - clear out state */ 6189 if (connp->conn_mcbc_bind) 6190 connp->conn_saddr_v6 = ipv6_all_zeros; 6191 else 6192 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6193 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6194 connp->conn_faddr_v6 = ipv6_all_zeros; 6195 connp->conn_fport = 0; 6196 udp->udp_state = TS_IDLE; 6197 } 6198 6199 connp->conn_fport = dstport; 6200 connp->conn_ipversion = ipversion; 6201 if (ipversion == IPV4_VERSION) { 6202 /* 6203 * Interpret a zero destination to mean loopback. 6204 * Update the T_CONN_REQ (sin/sin6) since it is used to 6205 * generate the T_CONN_CON. 6206 */ 6207 if (v4dst == INADDR_ANY) { 6208 v4dst = htonl(INADDR_LOOPBACK); 6209 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6210 if (connp->conn_family == AF_INET) { 6211 sin->sin_addr.s_addr = v4dst; 6212 } else { 6213 sin6->sin6_addr = v6dst; 6214 } 6215 } 6216 connp->conn_faddr_v6 = v6dst; 6217 connp->conn_flowinfo = 0; 6218 } else { 6219 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6220 /* 6221 * Interpret a zero destination to mean loopback. 6222 * Update the T_CONN_REQ (sin/sin6) since it is used to 6223 * generate the T_CONN_CON. 6224 */ 6225 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6226 v6dst = ipv6_loopback; 6227 sin6->sin6_addr = v6dst; 6228 } 6229 connp->conn_faddr_v6 = v6dst; 6230 connp->conn_flowinfo = flowinfo; 6231 } 6232 mutex_exit(&udpf->uf_lock); 6233 6234 /* 6235 * We update our cred/cpid based on the caller of connect 6236 */ 6237 if (connp->conn_cred != cr) { 6238 crhold(cr); 6239 crfree(connp->conn_cred); 6240 connp->conn_cred = cr; 6241 } 6242 connp->conn_cpid = pid; 6243 ASSERT(!(ixa->ixa_free_flags & IXA_FREE_CRED)); 6244 ixa->ixa_cred = cr; 6245 ixa->ixa_cpid = pid; 6246 if (is_system_labeled()) { 6247 /* We need to restart with a label based on the cred */ 6248 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6249 } 6250 6251 if (scopeid != 0) { 6252 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6253 ixa->ixa_scopeid = scopeid; 6254 connp->conn_incoming_ifindex = scopeid; 6255 } else { 6256 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6257 connp->conn_incoming_ifindex = connp->conn_bound_if; 6258 } 6259 /* 6260 * conn_connect will drop conn_lock and reacquire it. 6261 * To prevent a send* from messing with this udp_t while the lock 6262 * is dropped we set udp_state and clear conn_v6lastdst. 6263 * That will make all send* fail with EISCONN. 6264 */ 6265 connp->conn_v6lastdst = ipv6_all_zeros; 6266 udp->udp_state = TS_WCON_CREQ; 6267 6268 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6269 mutex_exit(&connp->conn_lock); 6270 if (error != 0) 6271 goto connect_failed; 6272 6273 /* 6274 * The addresses have been verified. Time to insert in 6275 * the correct fanout list. 6276 */ 6277 error = ipcl_conn_insert(connp); 6278 if (error != 0) 6279 goto connect_failed; 6280 6281 mutex_enter(&connp->conn_lock); 6282 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6283 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6284 if (error != 0) { 6285 mutex_exit(&connp->conn_lock); 6286 goto connect_failed; 6287 } 6288 6289 udp->udp_state = TS_DATA_XFER; 6290 /* Record this as the "last" send even though we haven't sent any */ 6291 connp->conn_v6lastdst = connp->conn_faddr_v6; 6292 connp->conn_lastipversion = connp->conn_ipversion; 6293 connp->conn_lastdstport = connp->conn_fport; 6294 connp->conn_lastflowinfo = connp->conn_flowinfo; 6295 connp->conn_lastscopeid = scopeid; 6296 connp->conn_lastsrcid = srcid; 6297 /* Also remember a source to use together with lastdst */ 6298 connp->conn_v6lastsrc = v6src; 6299 mutex_exit(&connp->conn_lock); 6300 6301 /* 6302 * We've picked a source address above. Now we can 6303 * verify that the src/port/dst/port is unique for all 6304 * connections in TS_DATA_XFER, skipping ourselves. 6305 */ 6306 mutex_enter(&udpf->uf_lock); 6307 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6308 if (udp1->udp_state != TS_DATA_XFER) 6309 continue; 6310 6311 if (udp1 == udp) 6312 continue; 6313 6314 connp1 = udp1->udp_connp; 6315 if (connp->conn_lport != connp1->conn_lport || 6316 connp->conn_ipversion != connp1->conn_ipversion || 6317 dstport != connp1->conn_fport || 6318 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6319 &connp1->conn_laddr_v6) || 6320 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6321 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6322 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6323 continue; 6324 mutex_exit(&udpf->uf_lock); 6325 error = -TBADADDR; 6326 goto connect_failed; 6327 } 6328 if (cl_inet_connect2 != NULL) { 6329 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6330 if (error != 0) { 6331 mutex_exit(&udpf->uf_lock); 6332 error = -TBADADDR; 6333 goto connect_failed; 6334 } 6335 } 6336 mutex_exit(&udpf->uf_lock); 6337 6338 ixa_refrele(ixa); 6339 return (0); 6340 6341 connect_failed: 6342 if (ixa != NULL) 6343 ixa_refrele(ixa); 6344 mutex_enter(&connp->conn_lock); 6345 mutex_enter(&udpf->uf_lock); 6346 udp->udp_state = TS_IDLE; 6347 connp->conn_faddr_v6 = ipv6_all_zeros; 6348 connp->conn_fport = 0; 6349 /* In case the source address was set above */ 6350 if (connp->conn_mcbc_bind) 6351 connp->conn_saddr_v6 = ipv6_all_zeros; 6352 else 6353 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6354 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6355 mutex_exit(&udpf->uf_lock); 6356 6357 connp->conn_v6lastdst = ipv6_all_zeros; 6358 connp->conn_flowinfo = 0; 6359 6360 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6361 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6362 mutex_exit(&connp->conn_lock); 6363 return (error); 6364 } 6365 6366 static int 6367 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6368 socklen_t len, sock_connid_t *id, cred_t *cr) 6369 { 6370 conn_t *connp = (conn_t *)proto_handle; 6371 udp_t *udp = connp->conn_udp; 6372 int error; 6373 boolean_t did_bind = B_FALSE; 6374 pid_t pid = curproc->p_pid; 6375 6376 /* All Solaris components should pass a cred for this operation. */ 6377 ASSERT(cr != NULL); 6378 6379 if (sa == NULL) { 6380 /* 6381 * Disconnect 6382 * Make sure we are connected 6383 */ 6384 if (udp->udp_state != TS_DATA_XFER) 6385 return (EINVAL); 6386 6387 error = udp_disconnect(connp); 6388 return (error); 6389 } 6390 6391 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6392 if (error != 0) 6393 goto done; 6394 6395 /* do an implicit bind if necessary */ 6396 if (udp->udp_state == TS_UNBND) { 6397 error = udp_implicit_bind(connp, cr); 6398 /* 6399 * We could be racing with an actual bind, in which case 6400 * we would see EPROTO. We cross our fingers and try 6401 * to connect. 6402 */ 6403 if (!(error == 0 || error == EPROTO)) 6404 goto done; 6405 did_bind = B_TRUE; 6406 } 6407 /* 6408 * set SO_DGRAM_ERRIND 6409 */ 6410 connp->conn_dgram_errind = B_TRUE; 6411 6412 error = udp_do_connect(connp, sa, len, cr, pid); 6413 6414 if (error != 0 && did_bind) { 6415 int unbind_err; 6416 6417 unbind_err = udp_do_unbind(connp); 6418 ASSERT(unbind_err == 0); 6419 } 6420 6421 if (error == 0) { 6422 *id = 0; 6423 (*connp->conn_upcalls->su_connected) 6424 (connp->conn_upper_handle, 0, NULL, -1); 6425 } else if (error < 0) { 6426 error = proto_tlitosyserr(-error); 6427 } 6428 6429 done: 6430 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6431 /* 6432 * No need to hold locks to set state 6433 * after connect failure socket state is undefined 6434 * We set the state only to imitate old sockfs behavior 6435 */ 6436 udp->udp_state = TS_IDLE; 6437 } 6438 return (error); 6439 } 6440 6441 int 6442 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6443 cred_t *cr) 6444 { 6445 sin6_t *sin6; 6446 sin_t *sin = NULL; 6447 uint_t srcid; 6448 conn_t *connp = (conn_t *)proto_handle; 6449 udp_t *udp = connp->conn_udp; 6450 int error = 0; 6451 udp_stack_t *us = udp->udp_us; 6452 ushort_t ipversion; 6453 pid_t pid = curproc->p_pid; 6454 ip_xmit_attr_t *ixa; 6455 6456 ASSERT(DB_TYPE(mp) == M_DATA); 6457 6458 /* All Solaris components should pass a cred for this operation. */ 6459 ASSERT(cr != NULL); 6460 6461 /* do an implicit bind if necessary */ 6462 if (udp->udp_state == TS_UNBND) { 6463 error = udp_implicit_bind(connp, cr); 6464 /* 6465 * We could be racing with an actual bind, in which case 6466 * we would see EPROTO. We cross our fingers and try 6467 * to connect. 6468 */ 6469 if (!(error == 0 || error == EPROTO)) { 6470 freemsg(mp); 6471 return (error); 6472 } 6473 } 6474 6475 /* Connected? */ 6476 if (msg->msg_name == NULL) { 6477 if (udp->udp_state != TS_DATA_XFER) { 6478 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6479 return (EDESTADDRREQ); 6480 } 6481 if (msg->msg_controllen != 0) { 6482 error = udp_output_ancillary(connp, NULL, NULL, mp, 6483 NULL, msg, cr, pid); 6484 } else { 6485 error = udp_output_connected(connp, mp, cr, pid); 6486 } 6487 if (us->us_sendto_ignerr) 6488 return (0); 6489 else 6490 return (error); 6491 } 6492 if (udp->udp_state == TS_DATA_XFER) { 6493 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6494 return (EISCONN); 6495 } 6496 error = proto_verify_ip_addr(connp->conn_family, 6497 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6498 if (error != 0) { 6499 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6500 return (error); 6501 } 6502 switch (connp->conn_family) { 6503 case AF_INET6: 6504 sin6 = (sin6_t *)msg->msg_name; 6505 6506 srcid = sin6->__sin6_src_id; 6507 6508 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6509 /* 6510 * Destination is a non-IPv4-compatible IPv6 address. 6511 * Send out an IPv6 format packet. 6512 */ 6513 6514 /* 6515 * If the local address is a mapped address return 6516 * an error. 6517 * It would be possible to send an IPv6 packet but the 6518 * response would never make it back to the application 6519 * since it is bound to a mapped address. 6520 */ 6521 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6522 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6523 return (EADDRNOTAVAIL); 6524 } 6525 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6526 sin6->sin6_addr = ipv6_loopback; 6527 ipversion = IPV6_VERSION; 6528 } else { 6529 if (connp->conn_ipv6_v6only) { 6530 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6531 return (EADDRNOTAVAIL); 6532 } 6533 6534 /* 6535 * If the local address is not zero or a mapped address 6536 * return an error. It would be possible to send an 6537 * IPv4 packet but the response would never make it 6538 * back to the application since it is bound to a 6539 * non-mapped address. 6540 */ 6541 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6542 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6543 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6544 return (EADDRNOTAVAIL); 6545 } 6546 6547 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6548 V4_PART_OF_V6(sin6->sin6_addr) = 6549 htonl(INADDR_LOOPBACK); 6550 } 6551 ipversion = IPV4_VERSION; 6552 } 6553 6554 /* 6555 * We have to allocate an ip_xmit_attr_t before we grab 6556 * conn_lock and we need to hold conn_lock once we've check 6557 * conn_same_as_last_v6 to handle concurrent send* calls on a 6558 * socket. 6559 */ 6560 if (msg->msg_controllen == 0) { 6561 ixa = conn_get_ixa(connp, B_FALSE); 6562 if (ixa == NULL) { 6563 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6564 return (ENOMEM); 6565 } 6566 } else { 6567 ixa = NULL; 6568 } 6569 mutex_enter(&connp->conn_lock); 6570 if (udp->udp_delayed_error != 0) { 6571 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6572 6573 error = udp->udp_delayed_error; 6574 udp->udp_delayed_error = 0; 6575 6576 /* Compare IP address, port, and family */ 6577 6578 if (sin6->sin6_port == sin2->sin6_port && 6579 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6580 &sin2->sin6_addr) && 6581 sin6->sin6_family == sin2->sin6_family) { 6582 mutex_exit(&connp->conn_lock); 6583 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6584 if (ixa != NULL) 6585 ixa_refrele(ixa); 6586 return (error); 6587 } 6588 } 6589 6590 if (msg->msg_controllen != 0) { 6591 mutex_exit(&connp->conn_lock); 6592 ASSERT(ixa == NULL); 6593 error = udp_output_ancillary(connp, NULL, sin6, mp, 6594 NULL, msg, cr, pid); 6595 } else if (conn_same_as_last_v6(connp, sin6) && 6596 connp->conn_lastsrcid == srcid && 6597 ipsec_outbound_policy_current(ixa)) { 6598 /* udp_output_lastdst drops conn_lock */ 6599 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6600 } else { 6601 /* udp_output_newdst drops conn_lock */ 6602 error = udp_output_newdst(connp, mp, NULL, sin6, 6603 ipversion, cr, pid, ixa); 6604 } 6605 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6606 if (us->us_sendto_ignerr) 6607 return (0); 6608 else 6609 return (error); 6610 case AF_INET: 6611 sin = (sin_t *)msg->msg_name; 6612 6613 ipversion = IPV4_VERSION; 6614 6615 if (sin->sin_addr.s_addr == INADDR_ANY) 6616 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6617 6618 /* 6619 * We have to allocate an ip_xmit_attr_t before we grab 6620 * conn_lock and we need to hold conn_lock once we've check 6621 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6622 */ 6623 if (msg->msg_controllen == 0) { 6624 ixa = conn_get_ixa(connp, B_FALSE); 6625 if (ixa == NULL) { 6626 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6627 return (ENOMEM); 6628 } 6629 } else { 6630 ixa = NULL; 6631 } 6632 mutex_enter(&connp->conn_lock); 6633 if (udp->udp_delayed_error != 0) { 6634 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6635 6636 error = udp->udp_delayed_error; 6637 udp->udp_delayed_error = 0; 6638 6639 /* Compare IP address and port */ 6640 6641 if (sin->sin_port == sin2->sin_port && 6642 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6643 mutex_exit(&connp->conn_lock); 6644 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6645 if (ixa != NULL) 6646 ixa_refrele(ixa); 6647 return (error); 6648 } 6649 } 6650 if (msg->msg_controllen != 0) { 6651 mutex_exit(&connp->conn_lock); 6652 ASSERT(ixa == NULL); 6653 error = udp_output_ancillary(connp, sin, NULL, mp, 6654 NULL, msg, cr, pid); 6655 } else if (conn_same_as_last_v4(connp, sin) && 6656 ipsec_outbound_policy_current(ixa)) { 6657 /* udp_output_lastdst drops conn_lock */ 6658 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6659 } else { 6660 /* udp_output_newdst drops conn_lock */ 6661 error = udp_output_newdst(connp, mp, sin, NULL, 6662 ipversion, cr, pid, ixa); 6663 } 6664 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6665 if (us->us_sendto_ignerr) 6666 return (0); 6667 else 6668 return (error); 6669 default: 6670 return (EINVAL); 6671 } 6672 } 6673 6674 int 6675 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6676 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 6677 { 6678 conn_t *connp = (conn_t *)proto_handle; 6679 udp_t *udp; 6680 struct T_capability_ack tca; 6681 struct sockaddr_in6 laddr, faddr; 6682 socklen_t laddrlen, faddrlen; 6683 short opts; 6684 struct stroptions *stropt; 6685 mblk_t *stropt_mp; 6686 int error; 6687 6688 udp = connp->conn_udp; 6689 6690 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6691 6692 /* 6693 * setup the fallback stream that was allocated 6694 */ 6695 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6696 connp->conn_minor_arena = WR(q)->q_ptr; 6697 6698 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6699 6700 WR(q)->q_qinfo = &udp_winit; 6701 6702 connp->conn_rq = RD(q); 6703 connp->conn_wq = WR(q); 6704 6705 /* Notify stream head about options before sending up data */ 6706 stropt_mp->b_datap->db_type = M_SETOPTS; 6707 stropt_mp->b_wptr += sizeof (*stropt); 6708 stropt = (struct stroptions *)stropt_mp->b_rptr; 6709 stropt->so_flags = SO_WROFF | SO_HIWAT; 6710 stropt->so_wroff = connp->conn_wroff; 6711 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6712 putnext(RD(q), stropt_mp); 6713 6714 /* 6715 * Free the helper stream 6716 */ 6717 ip_free_helper_stream(connp); 6718 6719 if (!issocket) 6720 udp_use_pure_tpi(udp); 6721 6722 /* 6723 * Collect the information needed to sync with the sonode 6724 */ 6725 udp_do_capability_ack(udp, &tca, TC1_INFO); 6726 6727 laddrlen = faddrlen = sizeof (sin6_t); 6728 (void) udp_getsockname((sock_lower_handle_t)connp, 6729 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6730 error = udp_getpeername((sock_lower_handle_t)connp, 6731 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6732 if (error != 0) 6733 faddrlen = 0; 6734 6735 opts = 0; 6736 if (connp->conn_dgram_errind) 6737 opts |= SO_DGRAM_ERRIND; 6738 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6739 opts |= SO_DONTROUTE; 6740 6741 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6742 (struct sockaddr *)&laddr, laddrlen, 6743 (struct sockaddr *)&faddr, faddrlen, opts); 6744 6745 mutex_enter(&udp->udp_recv_lock); 6746 /* 6747 * Attempts to send data up during fallback will result in it being 6748 * queued in udp_t. Now we push up any queued packets. 6749 */ 6750 while (udp->udp_fallback_queue_head != NULL) { 6751 mblk_t *mp; 6752 mp = udp->udp_fallback_queue_head; 6753 udp->udp_fallback_queue_head = mp->b_next; 6754 mutex_exit(&udp->udp_recv_lock); 6755 mp->b_next = NULL; 6756 putnext(RD(q), mp); 6757 mutex_enter(&udp->udp_recv_lock); 6758 } 6759 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6760 /* 6761 * No longer a streams less socket 6762 */ 6763 mutex_enter(&connp->conn_lock); 6764 connp->conn_flags &= ~IPCL_NONSTR; 6765 mutex_exit(&connp->conn_lock); 6766 6767 mutex_exit(&udp->udp_recv_lock); 6768 6769 ASSERT(connp->conn_ref >= 1); 6770 6771 return (0); 6772 } 6773 6774 /* ARGSUSED3 */ 6775 int 6776 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6777 socklen_t *salenp, cred_t *cr) 6778 { 6779 conn_t *connp = (conn_t *)proto_handle; 6780 udp_t *udp = connp->conn_udp; 6781 int error; 6782 6783 /* All Solaris components should pass a cred for this operation. */ 6784 ASSERT(cr != NULL); 6785 6786 mutex_enter(&connp->conn_lock); 6787 if (udp->udp_state != TS_DATA_XFER) 6788 error = ENOTCONN; 6789 else 6790 error = conn_getpeername(connp, sa, salenp); 6791 mutex_exit(&connp->conn_lock); 6792 return (error); 6793 } 6794 6795 /* ARGSUSED3 */ 6796 int 6797 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6798 socklen_t *salenp, cred_t *cr) 6799 { 6800 conn_t *connp = (conn_t *)proto_handle; 6801 int error; 6802 6803 /* All Solaris components should pass a cred for this operation. */ 6804 ASSERT(cr != NULL); 6805 6806 mutex_enter(&connp->conn_lock); 6807 error = conn_getsockname(connp, sa, salenp); 6808 mutex_exit(&connp->conn_lock); 6809 return (error); 6810 } 6811 6812 int 6813 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6814 void *optvalp, socklen_t *optlen, cred_t *cr) 6815 { 6816 conn_t *connp = (conn_t *)proto_handle; 6817 int error; 6818 t_uscalar_t max_optbuf_len; 6819 void *optvalp_buf; 6820 int len; 6821 6822 /* All Solaris components should pass a cred for this operation. */ 6823 ASSERT(cr != NULL); 6824 6825 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6826 udp_opt_obj.odb_opt_des_arr, 6827 udp_opt_obj.odb_opt_arr_cnt, 6828 B_FALSE, B_TRUE, cr); 6829 if (error != 0) { 6830 if (error < 0) 6831 error = proto_tlitosyserr(-error); 6832 return (error); 6833 } 6834 6835 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6836 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6837 if (len == -1) { 6838 kmem_free(optvalp_buf, max_optbuf_len); 6839 return (EINVAL); 6840 } 6841 6842 /* 6843 * update optlen and copy option value 6844 */ 6845 t_uscalar_t size = MIN(len, *optlen); 6846 6847 bcopy(optvalp_buf, optvalp, size); 6848 bcopy(&size, optlen, sizeof (size)); 6849 6850 kmem_free(optvalp_buf, max_optbuf_len); 6851 return (0); 6852 } 6853 6854 int 6855 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6856 const void *optvalp, socklen_t optlen, cred_t *cr) 6857 { 6858 conn_t *connp = (conn_t *)proto_handle; 6859 int error; 6860 6861 /* All Solaris components should pass a cred for this operation. */ 6862 ASSERT(cr != NULL); 6863 6864 error = proto_opt_check(level, option_name, optlen, NULL, 6865 udp_opt_obj.odb_opt_des_arr, 6866 udp_opt_obj.odb_opt_arr_cnt, 6867 B_TRUE, B_FALSE, cr); 6868 6869 if (error != 0) { 6870 if (error < 0) 6871 error = proto_tlitosyserr(-error); 6872 return (error); 6873 } 6874 6875 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6876 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6877 NULL, cr); 6878 6879 ASSERT(error >= 0); 6880 6881 return (error); 6882 } 6883 6884 void 6885 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6886 { 6887 conn_t *connp = (conn_t *)proto_handle; 6888 udp_t *udp = connp->conn_udp; 6889 6890 mutex_enter(&udp->udp_recv_lock); 6891 connp->conn_flow_cntrld = B_FALSE; 6892 mutex_exit(&udp->udp_recv_lock); 6893 } 6894 6895 /* ARGSUSED2 */ 6896 int 6897 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6898 { 6899 conn_t *connp = (conn_t *)proto_handle; 6900 6901 /* All Solaris components should pass a cred for this operation. */ 6902 ASSERT(cr != NULL); 6903 6904 /* shut down the send side */ 6905 if (how != SHUT_RD) 6906 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6907 SOCK_OPCTL_SHUT_SEND, 0); 6908 /* shut down the recv side */ 6909 if (how != SHUT_WR) 6910 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6911 SOCK_OPCTL_SHUT_RECV, 0); 6912 return (0); 6913 } 6914 6915 int 6916 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6917 int mode, int32_t *rvalp, cred_t *cr) 6918 { 6919 conn_t *connp = (conn_t *)proto_handle; 6920 int error; 6921 6922 /* All Solaris components should pass a cred for this operation. */ 6923 ASSERT(cr != NULL); 6924 6925 /* 6926 * If we don't have a helper stream then create one. 6927 * ip_create_helper_stream takes care of locking the conn_t, 6928 * so this check for NULL is just a performance optimization. 6929 */ 6930 if (connp->conn_helper_info == NULL) { 6931 udp_stack_t *us = connp->conn_udp->udp_us; 6932 6933 ASSERT(us->us_ldi_ident != NULL); 6934 6935 /* 6936 * Create a helper stream for non-STREAMS socket. 6937 */ 6938 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6939 if (error != 0) { 6940 ip0dbg(("tcp_ioctl: create of IP helper stream " 6941 "failed %d\n", error)); 6942 return (error); 6943 } 6944 } 6945 6946 switch (cmd) { 6947 case ND_SET: 6948 case ND_GET: 6949 case _SIOCSOCKFALLBACK: 6950 case TI_GETPEERNAME: 6951 case TI_GETMYNAME: 6952 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6953 cmd)); 6954 error = EINVAL; 6955 break; 6956 default: 6957 /* 6958 * Pass on to IP using helper stream 6959 */ 6960 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6961 cmd, arg, mode, cr, rvalp); 6962 break; 6963 } 6964 return (error); 6965 } 6966 6967 /* ARGSUSED */ 6968 int 6969 udp_accept(sock_lower_handle_t lproto_handle, 6970 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6971 cred_t *cr) 6972 { 6973 return (EOPNOTSUPP); 6974 } 6975 6976 /* ARGSUSED */ 6977 int 6978 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6979 { 6980 return (EOPNOTSUPP); 6981 } 6982 6983 sock_downcalls_t sock_udp_downcalls = { 6984 udp_activate, /* sd_activate */ 6985 udp_accept, /* sd_accept */ 6986 udp_bind, /* sd_bind */ 6987 udp_listen, /* sd_listen */ 6988 udp_connect, /* sd_connect */ 6989 udp_getpeername, /* sd_getpeername */ 6990 udp_getsockname, /* sd_getsockname */ 6991 udp_getsockopt, /* sd_getsockopt */ 6992 udp_setsockopt, /* sd_setsockopt */ 6993 udp_send, /* sd_send */ 6994 NULL, /* sd_send_uio */ 6995 NULL, /* sd_recv_uio */ 6996 NULL, /* sd_poll */ 6997 udp_shutdown, /* sd_shutdown */ 6998 udp_clr_flowctrl, /* sd_setflowctrl */ 6999 udp_ioctl, /* sd_ioctl */ 7000 udp_close /* sd_close */ 7001 }; 7002