1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/optcom.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 #include <inet/ipclassifier.h> 76 #include <sys/squeue_impl.h> 77 #include <inet/ipnet.h> 78 #include <sys/ethernet.h> 79 80 #include <sys/tsol/label.h> 81 #include <sys/tsol/tnet.h> 82 #include <rpc/pmap_prot.h> 83 84 #include <inet/udp_impl.h> 85 86 /* 87 * Synchronization notes: 88 * 89 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 90 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 91 * protects the contents of the udp_t. uf_lock protects the address and the 92 * fanout information. 93 * The lock order is conn_lock -> uf_lock. 94 * 95 * The fanout lock uf_lock: 96 * When a UDP endpoint is bound to a local port, it is inserted into 97 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 98 * The size of the array is controlled by the udp_bind_fanout_size variable. 99 * This variable can be changed in /etc/system if the default value is 100 * not large enough. Each bind hash bucket is protected by a per bucket 101 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 102 * structure and a few other fields in the udp_t. A UDP endpoint is removed 103 * from the bind hash list only when it is being unbound or being closed. 104 * The per bucket lock also protects a UDP endpoint's state changes. 105 * 106 * Plumbing notes: 107 * UDP is always a device driver. For compatibility with mibopen() code 108 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 109 * dummy module. 110 * 111 * The above implies that we don't support any intermediate module to 112 * reside in between /dev/ip and udp -- in fact, we never supported such 113 * scenario in the past as the inter-layer communication semantics have 114 * always been private. 115 */ 116 117 /* For /etc/system control */ 118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 119 120 static void udp_addr_req(queue_t *q, mblk_t *mp); 121 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 122 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 123 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 124 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 125 const in6_addr_t *, in_port_t, uint32_t); 126 static void udp_capability_req(queue_t *q, mblk_t *mp); 127 static int udp_tpi_close(queue_t *q, int flags); 128 static void udp_close_free(conn_t *); 129 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 130 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 131 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 132 int sys_error); 133 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 134 t_scalar_t tlierr, int sys_error); 135 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 136 cred_t *cr); 137 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 140 char *value, caddr_t cp, cred_t *cr); 141 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 142 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 143 ip_recv_attr_t *ira); 144 static void udp_info_req(queue_t *q, mblk_t *mp); 145 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_lrput(queue_t *, mblk_t *); 147 static void udp_lwput(queue_t *, mblk_t *); 148 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp, boolean_t isv6); 150 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp); 154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 155 int udp_opt_set(conn_t *connp, uint_t optset_context, 156 int level, int name, uint_t inlen, 157 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 158 void *thisdg_attrs, cred_t *cr); 159 int udp_opt_get(conn_t *connp, int level, int name, 160 uchar_t *ptr); 161 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid); 163 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 164 pid_t pid, ip_xmit_attr_t *ixa); 165 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 166 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 167 ip_xmit_attr_t *ixa); 168 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 169 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 170 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 171 cred_t *cr); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 static void *udp_kstat_init(netstackid_t stackid); 191 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 192 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 193 static void udp_kstat2_fini(netstackid_t, kstat_t *); 194 static int udp_kstat_update(kstat_t *kp, int rw); 195 196 197 /* Common routines for TPI and socket module */ 198 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 199 200 /* Common routine for TPI and socket module */ 201 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 202 static void udp_do_close(conn_t *); 203 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 204 boolean_t); 205 static int udp_do_unbind(conn_t *); 206 207 int udp_getsockname(sock_lower_handle_t, 208 struct sockaddr *, socklen_t *, cred_t *); 209 int udp_getpeername(sock_lower_handle_t, 210 struct sockaddr *, socklen_t *, cred_t *); 211 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 212 cred_t *, pid_t); 213 214 #define UDP_RECV_HIWATER (56 * 1024) 215 #define UDP_RECV_LOWATER 128 216 #define UDP_XMIT_HIWATER (56 * 1024) 217 #define UDP_XMIT_LOWATER 1024 218 219 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 220 221 /* 222 * Checks if the given destination addr/port is allowed out. 223 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 224 * Called for each connect() and for sendto()/sendmsg() to a different 225 * destination. 226 * For connect(), called in udp_connect(). 227 * For sendto()/sendmsg(), called in udp_output_newdst(). 228 * 229 * This macro assumes that the cl_inet_connect2 hook is not NULL. 230 * Please check this before calling this macro. 231 * 232 * void 233 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 234 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 235 */ 236 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 237 (err) = 0; \ 238 /* \ 239 * Running in cluster mode - check and register active \ 240 * "connection" information \ 241 */ \ 242 if ((cp)->conn_ipversion == IPV4_VERSION) \ 243 (err) = (*cl_inet_connect2)( \ 244 (cp)->conn_netstack->netstack_stackid, \ 245 IPPROTO_UDP, is_outgoing, AF_INET, \ 246 (uint8_t *)&((cp)->conn_laddr_v4), \ 247 (cp)->conn_lport, \ 248 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 249 (in_port_t)(fport), NULL); \ 250 else \ 251 (err) = (*cl_inet_connect2)( \ 252 (cp)->conn_netstack->netstack_stackid, \ 253 IPPROTO_UDP, is_outgoing, AF_INET6, \ 254 (uint8_t *)&((cp)->conn_laddr_v6), \ 255 (cp)->conn_lport, \ 256 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 257 } 258 259 static struct module_info udp_mod_info = { 260 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 261 }; 262 263 /* 264 * Entry points for UDP as a device. 265 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 266 */ 267 static struct qinit udp_rinitv4 = { 268 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 269 }; 270 271 static struct qinit udp_rinitv6 = { 272 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 273 }; 274 275 static struct qinit udp_winit = { 276 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 277 }; 278 279 /* UDP entry point during fallback */ 280 struct qinit udp_fallback_sock_winit = { 281 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 282 }; 283 284 /* 285 * UDP needs to handle I_LINK and I_PLINK since ifconfig 286 * likes to use it as a place to hang the various streams. 287 */ 288 static struct qinit udp_lrinit = { 289 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 290 }; 291 292 static struct qinit udp_lwinit = { 293 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 294 }; 295 296 /* For AF_INET aka /dev/udp */ 297 struct streamtab udpinfov4 = { 298 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 299 }; 300 301 /* For AF_INET6 aka /dev/udp6 */ 302 struct streamtab udpinfov6 = { 303 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 304 }; 305 306 static sin_t sin_null; /* Zero address for quick clears */ 307 static sin6_t sin6_null; /* Zero address for quick clears */ 308 309 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 310 311 /* Default structure copied into T_INFO_ACK messages */ 312 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 313 T_INFO_ACK, 314 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 315 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 316 T_INVALID, /* CDATA_size. udp does not support connect data. */ 317 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 318 sizeof (sin_t), /* ADDR_size. */ 319 0, /* OPT_size - not initialized here */ 320 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 321 T_CLTS, /* SERV_type. udp supports connection-less. */ 322 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 323 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 324 }; 325 326 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 327 328 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 329 T_INFO_ACK, 330 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 331 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 332 T_INVALID, /* CDATA_size. udp does not support connect data. */ 333 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 334 sizeof (sin6_t), /* ADDR_size. */ 335 0, /* OPT_size - not initialized here */ 336 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 337 T_CLTS, /* SERV_type. udp supports connection-less. */ 338 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 339 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 340 }; 341 342 /* largest UDP port number */ 343 #define UDP_MAX_PORT 65535 344 345 /* 346 * Table of ND variables supported by udp. These are loaded into us_nd 347 * in udp_open. 348 * All of these are alterable, within the min/max values given, at run time. 349 */ 350 /* BEGIN CSTYLED */ 351 udpparam_t udp_param_arr[] = { 352 /*min max value name */ 353 { 0L, 256, 32, "udp_wroff_extra" }, 354 { 1L, 255, 255, "udp_ipv4_ttl" }, 355 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 356 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 357 { 0, 1, 1, "udp_do_checksum" }, 358 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 359 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 360 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 361 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 362 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 363 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 364 { 0, 1, 0, "udp_pmtu_discovery" }, 365 { 0, 1, 0, "udp_sendto_ignerr" }, 366 }; 367 /* END CSTYLED */ 368 369 /* Setable in /etc/system */ 370 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 371 uint32_t udp_random_anon_port = 1; 372 373 /* 374 * Hook functions to enable cluster networking. 375 * On non-clustered systems these vectors must always be NULL 376 */ 377 378 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 379 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 380 void *args) = NULL; 381 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 382 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 383 void *args) = NULL; 384 385 typedef union T_primitives *t_primp_t; 386 387 /* 388 * Return the next anonymous port in the privileged port range for 389 * bind checking. 390 * 391 * Trusted Extension (TX) notes: TX allows administrator to mark or 392 * reserve ports as Multilevel ports (MLP). MLP has special function 393 * on TX systems. Once a port is made MLP, it's not available as 394 * ordinary port. This creates "holes" in the port name space. It 395 * may be necessary to skip the "holes" find a suitable anon port. 396 */ 397 static in_port_t 398 udp_get_next_priv_port(udp_t *udp) 399 { 400 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 401 in_port_t nextport; 402 boolean_t restart = B_FALSE; 403 udp_stack_t *us = udp->udp_us; 404 405 retry: 406 if (next_priv_port < us->us_min_anonpriv_port || 407 next_priv_port >= IPPORT_RESERVED) { 408 next_priv_port = IPPORT_RESERVED - 1; 409 if (restart) 410 return (0); 411 restart = B_TRUE; 412 } 413 414 if (is_system_labeled() && 415 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 416 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 417 next_priv_port = nextport; 418 goto retry; 419 } 420 421 return (next_priv_port--); 422 } 423 424 /* 425 * Hash list removal routine for udp_t structures. 426 */ 427 static void 428 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 429 { 430 udp_t *udpnext; 431 kmutex_t *lockp; 432 udp_stack_t *us = udp->udp_us; 433 conn_t *connp = udp->udp_connp; 434 435 if (udp->udp_ptpbhn == NULL) 436 return; 437 438 /* 439 * Extract the lock pointer in case there are concurrent 440 * hash_remove's for this instance. 441 */ 442 ASSERT(connp->conn_lport != 0); 443 if (!caller_holds_lock) { 444 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 445 us->us_bind_fanout_size)].uf_lock; 446 ASSERT(lockp != NULL); 447 mutex_enter(lockp); 448 } 449 if (udp->udp_ptpbhn != NULL) { 450 udpnext = udp->udp_bind_hash; 451 if (udpnext != NULL) { 452 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 453 udp->udp_bind_hash = NULL; 454 } 455 *udp->udp_ptpbhn = udpnext; 456 udp->udp_ptpbhn = NULL; 457 } 458 if (!caller_holds_lock) { 459 mutex_exit(lockp); 460 } 461 } 462 463 static void 464 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 465 { 466 conn_t *connp = udp->udp_connp; 467 udp_t **udpp; 468 udp_t *udpnext; 469 conn_t *connext; 470 471 ASSERT(MUTEX_HELD(&uf->uf_lock)); 472 ASSERT(udp->udp_ptpbhn == NULL); 473 udpp = &uf->uf_udp; 474 udpnext = udpp[0]; 475 if (udpnext != NULL) { 476 /* 477 * If the new udp bound to the INADDR_ANY address 478 * and the first one in the list is not bound to 479 * INADDR_ANY we skip all entries until we find the 480 * first one bound to INADDR_ANY. 481 * This makes sure that applications binding to a 482 * specific address get preference over those binding to 483 * INADDR_ANY. 484 */ 485 connext = udpnext->udp_connp; 486 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 487 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 488 while ((udpnext = udpp[0]) != NULL && 489 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 490 udpp = &(udpnext->udp_bind_hash); 491 } 492 if (udpnext != NULL) 493 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 494 } else { 495 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 496 } 497 } 498 udp->udp_bind_hash = udpnext; 499 udp->udp_ptpbhn = udpp; 500 udpp[0] = udp; 501 } 502 503 /* 504 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 505 * passed to udp_wput. 506 * It associates a port number and local address with the stream. 507 * It calls IP to verify the local IP address, and calls IP to insert 508 * the conn_t in the fanout table. 509 * If everything is ok it then sends the T_BIND_ACK back up. 510 * 511 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 512 * without setting SO_REUSEADDR. This is needed so that they 513 * can be viewed as two independent transport protocols. 514 * However, anonymouns ports are allocated from the same range to avoid 515 * duplicating the us->us_next_port_to_try. 516 */ 517 static void 518 udp_tpi_bind(queue_t *q, mblk_t *mp) 519 { 520 sin_t *sin; 521 sin6_t *sin6; 522 mblk_t *mp1; 523 struct T_bind_req *tbr; 524 conn_t *connp; 525 udp_t *udp; 526 int error; 527 struct sockaddr *sa; 528 cred_t *cr; 529 530 /* 531 * All Solaris components should pass a db_credp 532 * for this TPI message, hence we ASSERT. 533 * But in case there is some other M_PROTO that looks 534 * like a TPI message sent by some other kernel 535 * component, we check and return an error. 536 */ 537 cr = msg_getcred(mp, NULL); 538 ASSERT(cr != NULL); 539 if (cr == NULL) { 540 udp_err_ack(q, mp, TSYSERR, EINVAL); 541 return; 542 } 543 544 connp = Q_TO_CONN(q); 545 udp = connp->conn_udp; 546 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 547 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 548 "udp_bind: bad req, len %u", 549 (uint_t)(mp->b_wptr - mp->b_rptr)); 550 udp_err_ack(q, mp, TPROTO, 0); 551 return; 552 } 553 if (udp->udp_state != TS_UNBND) { 554 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 555 "udp_bind: bad state, %u", udp->udp_state); 556 udp_err_ack(q, mp, TOUTSTATE, 0); 557 return; 558 } 559 /* 560 * Reallocate the message to make sure we have enough room for an 561 * address. 562 */ 563 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 564 if (mp1 == NULL) { 565 udp_err_ack(q, mp, TSYSERR, ENOMEM); 566 return; 567 } 568 569 mp = mp1; 570 571 /* Reset the message type in preparation for shipping it back. */ 572 DB_TYPE(mp) = M_PCPROTO; 573 574 tbr = (struct T_bind_req *)mp->b_rptr; 575 switch (tbr->ADDR_length) { 576 case 0: /* Request for a generic port */ 577 tbr->ADDR_offset = sizeof (struct T_bind_req); 578 if (connp->conn_family == AF_INET) { 579 tbr->ADDR_length = sizeof (sin_t); 580 sin = (sin_t *)&tbr[1]; 581 *sin = sin_null; 582 sin->sin_family = AF_INET; 583 mp->b_wptr = (uchar_t *)&sin[1]; 584 sa = (struct sockaddr *)sin; 585 } else { 586 ASSERT(connp->conn_family == AF_INET6); 587 tbr->ADDR_length = sizeof (sin6_t); 588 sin6 = (sin6_t *)&tbr[1]; 589 *sin6 = sin6_null; 590 sin6->sin6_family = AF_INET6; 591 mp->b_wptr = (uchar_t *)&sin6[1]; 592 sa = (struct sockaddr *)sin6; 593 } 594 break; 595 596 case sizeof (sin_t): /* Complete IPv4 address */ 597 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 598 sizeof (sin_t)); 599 if (sa == NULL || !OK_32PTR((char *)sa)) { 600 udp_err_ack(q, mp, TSYSERR, EINVAL); 601 return; 602 } 603 if (connp->conn_family != AF_INET || 604 sa->sa_family != AF_INET) { 605 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 606 return; 607 } 608 break; 609 610 case sizeof (sin6_t): /* complete IPv6 address */ 611 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 612 sizeof (sin6_t)); 613 if (sa == NULL || !OK_32PTR((char *)sa)) { 614 udp_err_ack(q, mp, TSYSERR, EINVAL); 615 return; 616 } 617 if (connp->conn_family != AF_INET6 || 618 sa->sa_family != AF_INET6) { 619 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 620 return; 621 } 622 break; 623 624 default: /* Invalid request */ 625 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 626 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 627 udp_err_ack(q, mp, TBADADDR, 0); 628 return; 629 } 630 631 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 632 tbr->PRIM_type != O_T_BIND_REQ); 633 634 if (error != 0) { 635 if (error > 0) { 636 udp_err_ack(q, mp, TSYSERR, error); 637 } else { 638 udp_err_ack(q, mp, -error, 0); 639 } 640 } else { 641 tbr->PRIM_type = T_BIND_ACK; 642 qreply(q, mp); 643 } 644 } 645 646 /* 647 * This routine handles each T_CONN_REQ message passed to udp. It 648 * associates a default destination address with the stream. 649 * 650 * After various error checks are completed, udp_connect() lays 651 * the target address and port into the composite header template. 652 * Then we ask IP for information, including a source address if we didn't 653 * already have one. Finally we send up the T_OK_ACK reply message. 654 */ 655 static void 656 udp_tpi_connect(queue_t *q, mblk_t *mp) 657 { 658 conn_t *connp = Q_TO_CONN(q); 659 int error; 660 socklen_t len; 661 struct sockaddr *sa; 662 struct T_conn_req *tcr; 663 cred_t *cr; 664 pid_t pid; 665 /* 666 * All Solaris components should pass a db_credp 667 * for this TPI message, hence we ASSERT. 668 * But in case there is some other M_PROTO that looks 669 * like a TPI message sent by some other kernel 670 * component, we check and return an error. 671 */ 672 cr = msg_getcred(mp, &pid); 673 ASSERT(cr != NULL); 674 if (cr == NULL) { 675 udp_err_ack(q, mp, TSYSERR, EINVAL); 676 return; 677 } 678 679 tcr = (struct T_conn_req *)mp->b_rptr; 680 681 /* A bit of sanity checking */ 682 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 683 udp_err_ack(q, mp, TPROTO, 0); 684 return; 685 } 686 687 if (tcr->OPT_length != 0) { 688 udp_err_ack(q, mp, TBADOPT, 0); 689 return; 690 } 691 692 /* 693 * Determine packet type based on type of address passed in 694 * the request should contain an IPv4 or IPv6 address. 695 * Make sure that address family matches the type of 696 * family of the address passed down. 697 */ 698 len = tcr->DEST_length; 699 switch (tcr->DEST_length) { 700 default: 701 udp_err_ack(q, mp, TBADADDR, 0); 702 return; 703 704 case sizeof (sin_t): 705 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 706 sizeof (sin_t)); 707 break; 708 709 case sizeof (sin6_t): 710 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 711 sizeof (sin6_t)); 712 break; 713 } 714 715 error = proto_verify_ip_addr(connp->conn_family, sa, len); 716 if (error != 0) { 717 udp_err_ack(q, mp, TSYSERR, error); 718 return; 719 } 720 721 error = udp_do_connect(connp, sa, len, cr, pid); 722 if (error != 0) { 723 if (error < 0) 724 udp_err_ack(q, mp, -error, 0); 725 else 726 udp_err_ack(q, mp, TSYSERR, error); 727 } else { 728 mblk_t *mp1; 729 /* 730 * We have to send a connection confirmation to 731 * keep TLI happy. 732 */ 733 if (connp->conn_family == AF_INET) { 734 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 735 sizeof (sin_t), NULL, 0); 736 } else { 737 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 738 sizeof (sin6_t), NULL, 0); 739 } 740 if (mp1 == NULL) { 741 udp_err_ack(q, mp, TSYSERR, ENOMEM); 742 return; 743 } 744 745 /* 746 * Send ok_ack for T_CONN_REQ 747 */ 748 mp = mi_tpi_ok_ack_alloc(mp); 749 if (mp == NULL) { 750 /* Unable to reuse the T_CONN_REQ for the ack. */ 751 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 752 return; 753 } 754 755 putnext(connp->conn_rq, mp); 756 putnext(connp->conn_rq, mp1); 757 } 758 } 759 760 static int 761 udp_tpi_close(queue_t *q, int flags) 762 { 763 conn_t *connp; 764 765 if (flags & SO_FALLBACK) { 766 /* 767 * stream is being closed while in fallback 768 * simply free the resources that were allocated 769 */ 770 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 771 qprocsoff(q); 772 goto done; 773 } 774 775 connp = Q_TO_CONN(q); 776 udp_do_close(connp); 777 done: 778 q->q_ptr = WR(q)->q_ptr = NULL; 779 return (0); 780 } 781 782 static void 783 udp_close_free(conn_t *connp) 784 { 785 udp_t *udp = connp->conn_udp; 786 787 /* If there are any options associated with the stream, free them. */ 788 if (udp->udp_recv_ipp.ipp_fields != 0) 789 ip_pkt_free(&udp->udp_recv_ipp); 790 791 /* 792 * Clear any fields which the kmem_cache constructor clears. 793 * Only udp_connp needs to be preserved. 794 * TBD: We should make this more efficient to avoid clearing 795 * everything. 796 */ 797 ASSERT(udp->udp_connp == connp); 798 bzero(udp, sizeof (udp_t)); 799 udp->udp_connp = connp; 800 } 801 802 static int 803 udp_do_disconnect(conn_t *connp) 804 { 805 udp_t *udp; 806 udp_fanout_t *udpf; 807 udp_stack_t *us; 808 int error; 809 810 udp = connp->conn_udp; 811 us = udp->udp_us; 812 mutex_enter(&connp->conn_lock); 813 if (udp->udp_state != TS_DATA_XFER) { 814 mutex_exit(&connp->conn_lock); 815 return (-TOUTSTATE); 816 } 817 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 818 us->us_bind_fanout_size)]; 819 mutex_enter(&udpf->uf_lock); 820 if (connp->conn_mcbc_bind) 821 connp->conn_saddr_v6 = ipv6_all_zeros; 822 else 823 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 824 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 825 connp->conn_faddr_v6 = ipv6_all_zeros; 826 connp->conn_fport = 0; 827 udp->udp_state = TS_IDLE; 828 mutex_exit(&udpf->uf_lock); 829 830 /* Remove any remnants of mapped address binding */ 831 if (connp->conn_family == AF_INET6) 832 connp->conn_ipversion = IPV6_VERSION; 833 834 connp->conn_v6lastdst = ipv6_all_zeros; 835 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 836 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 837 mutex_exit(&connp->conn_lock); 838 if (error != 0) 839 return (error); 840 841 /* 842 * Tell IP to remove the full binding and revert 843 * to the local address binding. 844 */ 845 return (ip_laddr_fanout_insert(connp)); 846 } 847 848 static void 849 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 850 { 851 conn_t *connp = Q_TO_CONN(q); 852 int error; 853 854 /* 855 * Allocate the largest primitive we need to send back 856 * T_error_ack is > than T_ok_ack 857 */ 858 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 859 if (mp == NULL) { 860 /* Unable to reuse the T_DISCON_REQ for the ack. */ 861 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 862 return; 863 } 864 865 error = udp_do_disconnect(connp); 866 867 if (error != 0) { 868 if (error < 0) { 869 udp_err_ack(q, mp, -error, 0); 870 } else { 871 udp_err_ack(q, mp, TSYSERR, error); 872 } 873 } else { 874 mp = mi_tpi_ok_ack_alloc(mp); 875 ASSERT(mp != NULL); 876 qreply(q, mp); 877 } 878 } 879 880 int 881 udp_disconnect(conn_t *connp) 882 { 883 int error; 884 885 connp->conn_dgram_errind = B_FALSE; 886 error = udp_do_disconnect(connp); 887 if (error < 0) 888 error = proto_tlitosyserr(-error); 889 890 return (error); 891 } 892 893 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 894 static void 895 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 896 { 897 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 898 qreply(q, mp); 899 } 900 901 /* Shorthand to generate and send TPI error acks to our client */ 902 static void 903 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 904 t_scalar_t t_error, int sys_error) 905 { 906 struct T_error_ack *teackp; 907 908 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 909 M_PCPROTO, T_ERROR_ACK)) != NULL) { 910 teackp = (struct T_error_ack *)mp->b_rptr; 911 teackp->ERROR_prim = primitive; 912 teackp->TLI_error = t_error; 913 teackp->UNIX_error = sys_error; 914 qreply(q, mp); 915 } 916 } 917 918 /*ARGSUSED2*/ 919 static int 920 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 921 { 922 int i; 923 udp_t *udp = Q_TO_UDP(q); 924 udp_stack_t *us = udp->udp_us; 925 926 for (i = 0; i < us->us_num_epriv_ports; i++) { 927 if (us->us_epriv_ports[i] != 0) 928 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 929 } 930 return (0); 931 } 932 933 /* ARGSUSED1 */ 934 static int 935 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 936 cred_t *cr) 937 { 938 long new_value; 939 int i; 940 udp_t *udp = Q_TO_UDP(q); 941 udp_stack_t *us = udp->udp_us; 942 943 /* 944 * Fail the request if the new value does not lie within the 945 * port number limits. 946 */ 947 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 948 new_value <= 0 || new_value >= 65536) { 949 return (EINVAL); 950 } 951 952 /* Check if the value is already in the list */ 953 for (i = 0; i < us->us_num_epriv_ports; i++) { 954 if (new_value == us->us_epriv_ports[i]) { 955 return (EEXIST); 956 } 957 } 958 /* Find an empty slot */ 959 for (i = 0; i < us->us_num_epriv_ports; i++) { 960 if (us->us_epriv_ports[i] == 0) 961 break; 962 } 963 if (i == us->us_num_epriv_ports) { 964 return (EOVERFLOW); 965 } 966 967 /* Set the new value */ 968 us->us_epriv_ports[i] = (in_port_t)new_value; 969 return (0); 970 } 971 972 /* ARGSUSED1 */ 973 static int 974 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 975 cred_t *cr) 976 { 977 long new_value; 978 int i; 979 udp_t *udp = Q_TO_UDP(q); 980 udp_stack_t *us = udp->udp_us; 981 982 /* 983 * Fail the request if the new value does not lie within the 984 * port number limits. 985 */ 986 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 987 new_value <= 0 || new_value >= 65536) { 988 return (EINVAL); 989 } 990 991 /* Check that the value is already in the list */ 992 for (i = 0; i < us->us_num_epriv_ports; i++) { 993 if (us->us_epriv_ports[i] == new_value) 994 break; 995 } 996 if (i == us->us_num_epriv_ports) { 997 return (ESRCH); 998 } 999 1000 /* Clear the value */ 1001 us->us_epriv_ports[i] = 0; 1002 return (0); 1003 } 1004 1005 /* At minimum we need 4 bytes of UDP header */ 1006 #define ICMP_MIN_UDP_HDR 4 1007 1008 /* 1009 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 1010 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1011 * Assumes that IP has pulled up everything up to and including the ICMP header. 1012 */ 1013 /* ARGSUSED2 */ 1014 static void 1015 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1016 { 1017 conn_t *connp = (conn_t *)arg1; 1018 icmph_t *icmph; 1019 ipha_t *ipha; 1020 int iph_hdr_length; 1021 udpha_t *udpha; 1022 sin_t sin; 1023 sin6_t sin6; 1024 mblk_t *mp1; 1025 int error = 0; 1026 udp_t *udp = connp->conn_udp; 1027 1028 ipha = (ipha_t *)mp->b_rptr; 1029 1030 ASSERT(OK_32PTR(mp->b_rptr)); 1031 1032 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1033 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1034 udp_icmp_error_ipv6(connp, mp, ira); 1035 return; 1036 } 1037 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1038 1039 /* Skip past the outer IP and ICMP headers */ 1040 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 1041 iph_hdr_length = ira->ira_ip_hdr_length; 1042 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1043 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1044 1045 /* Skip past the inner IP and find the ULP header */ 1046 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1047 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1048 1049 switch (icmph->icmph_type) { 1050 case ICMP_DEST_UNREACHABLE: 1051 switch (icmph->icmph_code) { 1052 case ICMP_FRAGMENTATION_NEEDED: { 1053 ipha_t *ipha; 1054 ip_xmit_attr_t *ixa; 1055 /* 1056 * IP has already adjusted the path MTU. 1057 * But we need to adjust DF for IPv4. 1058 */ 1059 if (connp->conn_ipversion != IPV4_VERSION) 1060 break; 1061 1062 ixa = conn_get_ixa(connp, B_FALSE); 1063 if (ixa == NULL || ixa->ixa_ire == NULL) { 1064 /* 1065 * Some other thread holds conn_ixa. We will 1066 * redo this on the next ICMP too big. 1067 */ 1068 if (ixa != NULL) 1069 ixa_refrele(ixa); 1070 break; 1071 } 1072 (void) ip_get_pmtu(ixa); 1073 1074 mutex_enter(&connp->conn_lock); 1075 ipha = (ipha_t *)connp->conn_ht_iphc; 1076 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1077 ipha->ipha_fragment_offset_and_flags |= 1078 IPH_DF_HTONS; 1079 } else { 1080 ipha->ipha_fragment_offset_and_flags &= 1081 ~IPH_DF_HTONS; 1082 } 1083 mutex_exit(&connp->conn_lock); 1084 ixa_refrele(ixa); 1085 break; 1086 } 1087 case ICMP_PORT_UNREACHABLE: 1088 case ICMP_PROTOCOL_UNREACHABLE: 1089 error = ECONNREFUSED; 1090 break; 1091 default: 1092 /* Transient errors */ 1093 break; 1094 } 1095 break; 1096 default: 1097 /* Transient errors */ 1098 break; 1099 } 1100 if (error == 0) { 1101 freemsg(mp); 1102 return; 1103 } 1104 1105 /* 1106 * Deliver T_UDERROR_IND when the application has asked for it. 1107 * The socket layer enables this automatically when connected. 1108 */ 1109 if (!connp->conn_dgram_errind) { 1110 freemsg(mp); 1111 return; 1112 } 1113 1114 switch (connp->conn_family) { 1115 case AF_INET: 1116 sin = sin_null; 1117 sin.sin_family = AF_INET; 1118 sin.sin_addr.s_addr = ipha->ipha_dst; 1119 sin.sin_port = udpha->uha_dst_port; 1120 if (IPCL_IS_NONSTR(connp)) { 1121 mutex_enter(&connp->conn_lock); 1122 if (udp->udp_state == TS_DATA_XFER) { 1123 if (sin.sin_port == connp->conn_fport && 1124 sin.sin_addr.s_addr == 1125 connp->conn_faddr_v4) { 1126 mutex_exit(&connp->conn_lock); 1127 (*connp->conn_upcalls->su_set_error) 1128 (connp->conn_upper_handle, error); 1129 goto done; 1130 } 1131 } else { 1132 udp->udp_delayed_error = error; 1133 *((sin_t *)&udp->udp_delayed_addr) = sin; 1134 } 1135 mutex_exit(&connp->conn_lock); 1136 } else { 1137 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1138 NULL, 0, error); 1139 if (mp1 != NULL) 1140 putnext(connp->conn_rq, mp1); 1141 } 1142 break; 1143 case AF_INET6: 1144 sin6 = sin6_null; 1145 sin6.sin6_family = AF_INET6; 1146 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1147 sin6.sin6_port = udpha->uha_dst_port; 1148 if (IPCL_IS_NONSTR(connp)) { 1149 mutex_enter(&connp->conn_lock); 1150 if (udp->udp_state == TS_DATA_XFER) { 1151 if (sin6.sin6_port == connp->conn_fport && 1152 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1153 &connp->conn_faddr_v6)) { 1154 mutex_exit(&connp->conn_lock); 1155 (*connp->conn_upcalls->su_set_error) 1156 (connp->conn_upper_handle, error); 1157 goto done; 1158 } 1159 } else { 1160 udp->udp_delayed_error = error; 1161 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1162 } 1163 mutex_exit(&connp->conn_lock); 1164 } else { 1165 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1166 NULL, 0, error); 1167 if (mp1 != NULL) 1168 putnext(connp->conn_rq, mp1); 1169 } 1170 break; 1171 } 1172 done: 1173 freemsg(mp); 1174 } 1175 1176 /* 1177 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1178 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1179 * Assumes that IP has pulled up all the extension headers as well as the 1180 * ICMPv6 header. 1181 */ 1182 static void 1183 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1184 { 1185 icmp6_t *icmp6; 1186 ip6_t *ip6h, *outer_ip6h; 1187 uint16_t iph_hdr_length; 1188 uint8_t *nexthdrp; 1189 udpha_t *udpha; 1190 sin6_t sin6; 1191 mblk_t *mp1; 1192 int error = 0; 1193 udp_t *udp = connp->conn_udp; 1194 udp_stack_t *us = udp->udp_us; 1195 1196 outer_ip6h = (ip6_t *)mp->b_rptr; 1197 #ifdef DEBUG 1198 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1199 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1200 else 1201 iph_hdr_length = IPV6_HDR_LEN; 1202 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1203 #endif 1204 /* Skip past the outer IP and ICMP headers */ 1205 iph_hdr_length = ira->ira_ip_hdr_length; 1206 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1207 1208 /* Skip past the inner IP and find the ULP header */ 1209 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1210 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1211 freemsg(mp); 1212 return; 1213 } 1214 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1215 1216 switch (icmp6->icmp6_type) { 1217 case ICMP6_DST_UNREACH: 1218 switch (icmp6->icmp6_code) { 1219 case ICMP6_DST_UNREACH_NOPORT: 1220 error = ECONNREFUSED; 1221 break; 1222 case ICMP6_DST_UNREACH_ADMIN: 1223 case ICMP6_DST_UNREACH_NOROUTE: 1224 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1225 case ICMP6_DST_UNREACH_ADDR: 1226 /* Transient errors */ 1227 break; 1228 default: 1229 break; 1230 } 1231 break; 1232 case ICMP6_PACKET_TOO_BIG: { 1233 struct T_unitdata_ind *tudi; 1234 struct T_opthdr *toh; 1235 size_t udi_size; 1236 mblk_t *newmp; 1237 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1238 sizeof (struct ip6_mtuinfo); 1239 sin6_t *sin6; 1240 struct ip6_mtuinfo *mtuinfo; 1241 1242 /* 1243 * If the application has requested to receive path mtu 1244 * information, send up an empty message containing an 1245 * IPV6_PATHMTU ancillary data item. 1246 */ 1247 if (!connp->conn_ipv6_recvpathmtu) 1248 break; 1249 1250 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1251 opt_length; 1252 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1253 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1254 break; 1255 } 1256 1257 /* 1258 * newmp->b_cont is left to NULL on purpose. This is an 1259 * empty message containing only ancillary data. 1260 */ 1261 newmp->b_datap->db_type = M_PROTO; 1262 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1263 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1264 tudi->PRIM_type = T_UNITDATA_IND; 1265 tudi->SRC_length = sizeof (sin6_t); 1266 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1267 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1268 tudi->OPT_length = opt_length; 1269 1270 sin6 = (sin6_t *)&tudi[1]; 1271 bzero(sin6, sizeof (sin6_t)); 1272 sin6->sin6_family = AF_INET6; 1273 sin6->sin6_addr = connp->conn_faddr_v6; 1274 1275 toh = (struct T_opthdr *)&sin6[1]; 1276 toh->level = IPPROTO_IPV6; 1277 toh->name = IPV6_PATHMTU; 1278 toh->len = opt_length; 1279 toh->status = 0; 1280 1281 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1282 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1283 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1284 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1285 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1286 /* 1287 * We've consumed everything we need from the original 1288 * message. Free it, then send our empty message. 1289 */ 1290 freemsg(mp); 1291 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1292 return; 1293 } 1294 case ICMP6_TIME_EXCEEDED: 1295 /* Transient errors */ 1296 break; 1297 case ICMP6_PARAM_PROB: 1298 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1299 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1300 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1301 (uchar_t *)nexthdrp) { 1302 error = ECONNREFUSED; 1303 break; 1304 } 1305 break; 1306 } 1307 if (error == 0) { 1308 freemsg(mp); 1309 return; 1310 } 1311 1312 /* 1313 * Deliver T_UDERROR_IND when the application has asked for it. 1314 * The socket layer enables this automatically when connected. 1315 */ 1316 if (!connp->conn_dgram_errind) { 1317 freemsg(mp); 1318 return; 1319 } 1320 1321 sin6 = sin6_null; 1322 sin6.sin6_family = AF_INET6; 1323 sin6.sin6_addr = ip6h->ip6_dst; 1324 sin6.sin6_port = udpha->uha_dst_port; 1325 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1326 1327 if (IPCL_IS_NONSTR(connp)) { 1328 mutex_enter(&connp->conn_lock); 1329 if (udp->udp_state == TS_DATA_XFER) { 1330 if (sin6.sin6_port == connp->conn_fport && 1331 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1332 &connp->conn_faddr_v6)) { 1333 mutex_exit(&connp->conn_lock); 1334 (*connp->conn_upcalls->su_set_error) 1335 (connp->conn_upper_handle, error); 1336 goto done; 1337 } 1338 } else { 1339 udp->udp_delayed_error = error; 1340 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1341 } 1342 mutex_exit(&connp->conn_lock); 1343 } else { 1344 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1345 NULL, 0, error); 1346 if (mp1 != NULL) 1347 putnext(connp->conn_rq, mp1); 1348 } 1349 done: 1350 freemsg(mp); 1351 } 1352 1353 /* 1354 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1355 * The local address is filled in if endpoint is bound. The remote address 1356 * is filled in if remote address has been precified ("connected endpoint") 1357 * (The concept of connected CLTS sockets is alien to published TPI 1358 * but we support it anyway). 1359 */ 1360 static void 1361 udp_addr_req(queue_t *q, mblk_t *mp) 1362 { 1363 struct sockaddr *sa; 1364 mblk_t *ackmp; 1365 struct T_addr_ack *taa; 1366 udp_t *udp = Q_TO_UDP(q); 1367 conn_t *connp = udp->udp_connp; 1368 uint_t addrlen; 1369 1370 /* Make it large enough for worst case */ 1371 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1372 2 * sizeof (sin6_t), 1); 1373 if (ackmp == NULL) { 1374 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1375 return; 1376 } 1377 taa = (struct T_addr_ack *)ackmp->b_rptr; 1378 1379 bzero(taa, sizeof (struct T_addr_ack)); 1380 ackmp->b_wptr = (uchar_t *)&taa[1]; 1381 1382 taa->PRIM_type = T_ADDR_ACK; 1383 ackmp->b_datap->db_type = M_PCPROTO; 1384 1385 if (connp->conn_family == AF_INET) 1386 addrlen = sizeof (sin_t); 1387 else 1388 addrlen = sizeof (sin6_t); 1389 1390 mutex_enter(&connp->conn_lock); 1391 /* 1392 * Note: Following code assumes 32 bit alignment of basic 1393 * data structures like sin_t and struct T_addr_ack. 1394 */ 1395 if (udp->udp_state != TS_UNBND) { 1396 /* 1397 * Fill in local address first 1398 */ 1399 taa->LOCADDR_offset = sizeof (*taa); 1400 taa->LOCADDR_length = addrlen; 1401 sa = (struct sockaddr *)&taa[1]; 1402 (void) conn_getsockname(connp, sa, &addrlen); 1403 ackmp->b_wptr += addrlen; 1404 } 1405 if (udp->udp_state == TS_DATA_XFER) { 1406 /* 1407 * connected, fill remote address too 1408 */ 1409 taa->REMADDR_length = addrlen; 1410 /* assumed 32-bit alignment */ 1411 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1412 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1413 (void) conn_getpeername(connp, sa, &addrlen); 1414 ackmp->b_wptr += addrlen; 1415 } 1416 mutex_exit(&connp->conn_lock); 1417 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1418 qreply(q, ackmp); 1419 } 1420 1421 static void 1422 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1423 { 1424 conn_t *connp = udp->udp_connp; 1425 1426 if (connp->conn_family == AF_INET) { 1427 *tap = udp_g_t_info_ack_ipv4; 1428 } else { 1429 *tap = udp_g_t_info_ack_ipv6; 1430 } 1431 tap->CURRENT_state = udp->udp_state; 1432 tap->OPT_size = udp_max_optsize; 1433 } 1434 1435 static void 1436 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1437 t_uscalar_t cap_bits1) 1438 { 1439 tcap->CAP_bits1 = 0; 1440 1441 if (cap_bits1 & TC1_INFO) { 1442 udp_copy_info(&tcap->INFO_ack, udp); 1443 tcap->CAP_bits1 |= TC1_INFO; 1444 } 1445 } 1446 1447 /* 1448 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1449 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1450 * udp_g_t_info_ack. The current state of the stream is copied from 1451 * udp_state. 1452 */ 1453 static void 1454 udp_capability_req(queue_t *q, mblk_t *mp) 1455 { 1456 t_uscalar_t cap_bits1; 1457 struct T_capability_ack *tcap; 1458 udp_t *udp = Q_TO_UDP(q); 1459 1460 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1461 1462 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1463 mp->b_datap->db_type, T_CAPABILITY_ACK); 1464 if (!mp) 1465 return; 1466 1467 tcap = (struct T_capability_ack *)mp->b_rptr; 1468 udp_do_capability_ack(udp, tcap, cap_bits1); 1469 1470 qreply(q, mp); 1471 } 1472 1473 /* 1474 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1475 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1476 * The current state of the stream is copied from udp_state. 1477 */ 1478 static void 1479 udp_info_req(queue_t *q, mblk_t *mp) 1480 { 1481 udp_t *udp = Q_TO_UDP(q); 1482 1483 /* Create a T_INFO_ACK message. */ 1484 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1485 T_INFO_ACK); 1486 if (!mp) 1487 return; 1488 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1489 qreply(q, mp); 1490 } 1491 1492 /* For /dev/udp aka AF_INET open */ 1493 static int 1494 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1495 { 1496 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1497 } 1498 1499 /* For /dev/udp6 aka AF_INET6 open */ 1500 static int 1501 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1502 { 1503 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1504 } 1505 1506 /* 1507 * This is the open routine for udp. It allocates a udp_t structure for 1508 * the stream and, on the first open of the module, creates an ND table. 1509 */ 1510 static int 1511 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1512 boolean_t isv6) 1513 { 1514 udp_t *udp; 1515 conn_t *connp; 1516 dev_t conn_dev; 1517 vmem_t *minor_arena; 1518 int err; 1519 1520 /* If the stream is already open, return immediately. */ 1521 if (q->q_ptr != NULL) 1522 return (0); 1523 1524 if (sflag == MODOPEN) 1525 return (EINVAL); 1526 1527 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1528 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1529 minor_arena = ip_minor_arena_la; 1530 } else { 1531 /* 1532 * Either minor numbers in the large arena were exhausted 1533 * or a non socket application is doing the open. 1534 * Try to allocate from the small arena. 1535 */ 1536 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1537 return (EBUSY); 1538 1539 minor_arena = ip_minor_arena_sa; 1540 } 1541 1542 if (flag & SO_FALLBACK) { 1543 /* 1544 * Non streams socket needs a stream to fallback to 1545 */ 1546 RD(q)->q_ptr = (void *)conn_dev; 1547 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1548 WR(q)->q_ptr = (void *)minor_arena; 1549 qprocson(q); 1550 return (0); 1551 } 1552 1553 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1554 if (connp == NULL) { 1555 inet_minor_free(minor_arena, conn_dev); 1556 return (err); 1557 } 1558 udp = connp->conn_udp; 1559 1560 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1561 connp->conn_dev = conn_dev; 1562 connp->conn_minor_arena = minor_arena; 1563 1564 /* 1565 * Initialize the udp_t structure for this stream. 1566 */ 1567 q->q_ptr = connp; 1568 WR(q)->q_ptr = connp; 1569 connp->conn_rq = q; 1570 connp->conn_wq = WR(q); 1571 1572 /* 1573 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1574 * need to lock anything. 1575 */ 1576 ASSERT(connp->conn_proto == IPPROTO_UDP); 1577 ASSERT(connp->conn_udp == udp); 1578 ASSERT(udp->udp_connp == connp); 1579 1580 if (flag & SO_SOCKSTR) { 1581 udp->udp_issocket = B_TRUE; 1582 } 1583 1584 WR(q)->q_hiwat = connp->conn_sndbuf; 1585 WR(q)->q_lowat = connp->conn_sndlowat; 1586 1587 qprocson(q); 1588 1589 /* Set the Stream head write offset and high watermark. */ 1590 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1591 (void) proto_set_rx_hiwat(q, connp, 1592 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1593 1594 mutex_enter(&connp->conn_lock); 1595 connp->conn_state_flags &= ~CONN_INCIPIENT; 1596 mutex_exit(&connp->conn_lock); 1597 return (0); 1598 } 1599 1600 /* 1601 * Which UDP options OK to set through T_UNITDATA_REQ... 1602 */ 1603 /* ARGSUSED */ 1604 static boolean_t 1605 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1606 { 1607 return (B_TRUE); 1608 } 1609 1610 /* 1611 * This routine gets default values of certain options whose default 1612 * values are maintained by protcol specific code 1613 */ 1614 int 1615 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1616 { 1617 udp_t *udp = Q_TO_UDP(q); 1618 udp_stack_t *us = udp->udp_us; 1619 int *i1 = (int *)ptr; 1620 1621 switch (level) { 1622 case IPPROTO_IP: 1623 switch (name) { 1624 case IP_MULTICAST_TTL: 1625 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1626 return (sizeof (uchar_t)); 1627 case IP_MULTICAST_LOOP: 1628 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1629 return (sizeof (uchar_t)); 1630 } 1631 break; 1632 case IPPROTO_IPV6: 1633 switch (name) { 1634 case IPV6_MULTICAST_HOPS: 1635 *i1 = IP_DEFAULT_MULTICAST_TTL; 1636 return (sizeof (int)); 1637 case IPV6_MULTICAST_LOOP: 1638 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1639 return (sizeof (int)); 1640 case IPV6_UNICAST_HOPS: 1641 *i1 = us->us_ipv6_hoplimit; 1642 return (sizeof (int)); 1643 } 1644 break; 1645 } 1646 return (-1); 1647 } 1648 1649 /* 1650 * This routine retrieves the current status of socket options. 1651 * It returns the size of the option retrieved, or -1. 1652 */ 1653 int 1654 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1655 uchar_t *ptr) 1656 { 1657 int *i1 = (int *)ptr; 1658 udp_t *udp = connp->conn_udp; 1659 int len; 1660 conn_opt_arg_t coas; 1661 int retval; 1662 1663 coas.coa_connp = connp; 1664 coas.coa_ixa = connp->conn_ixa; 1665 coas.coa_ipp = &connp->conn_xmit_ipp; 1666 coas.coa_ancillary = B_FALSE; 1667 coas.coa_changed = 0; 1668 1669 /* 1670 * We assume that the optcom framework has checked for the set 1671 * of levels and names that are supported, hence we don't worry 1672 * about rejecting based on that. 1673 * First check for UDP specific handling, then pass to common routine. 1674 */ 1675 switch (level) { 1676 case IPPROTO_IP: 1677 /* 1678 * Only allow IPv4 option processing on IPv4 sockets. 1679 */ 1680 if (connp->conn_family != AF_INET) 1681 return (-1); 1682 1683 switch (name) { 1684 case IP_OPTIONS: 1685 case T_IP_OPTIONS: 1686 mutex_enter(&connp->conn_lock); 1687 if (!(udp->udp_recv_ipp.ipp_fields & 1688 IPPF_IPV4_OPTIONS)) { 1689 mutex_exit(&connp->conn_lock); 1690 return (0); 1691 } 1692 1693 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1694 ASSERT(len != 0); 1695 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1696 mutex_exit(&connp->conn_lock); 1697 return (len); 1698 } 1699 break; 1700 case IPPROTO_UDP: 1701 switch (name) { 1702 case UDP_NAT_T_ENDPOINT: 1703 mutex_enter(&connp->conn_lock); 1704 *i1 = udp->udp_nat_t_endpoint; 1705 mutex_exit(&connp->conn_lock); 1706 return (sizeof (int)); 1707 case UDP_RCVHDR: 1708 mutex_enter(&connp->conn_lock); 1709 *i1 = udp->udp_rcvhdr ? 1 : 0; 1710 mutex_exit(&connp->conn_lock); 1711 return (sizeof (int)); 1712 } 1713 } 1714 mutex_enter(&connp->conn_lock); 1715 retval = conn_opt_get(&coas, level, name, ptr); 1716 mutex_exit(&connp->conn_lock); 1717 return (retval); 1718 } 1719 1720 /* 1721 * This routine retrieves the current status of socket options. 1722 * It returns the size of the option retrieved, or -1. 1723 */ 1724 int 1725 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1726 { 1727 conn_t *connp = Q_TO_CONN(q); 1728 int err; 1729 1730 err = udp_opt_get(connp, level, name, ptr); 1731 return (err); 1732 } 1733 1734 /* 1735 * This routine sets socket options. 1736 */ 1737 int 1738 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1739 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1740 { 1741 conn_t *connp = coa->coa_connp; 1742 ip_xmit_attr_t *ixa = coa->coa_ixa; 1743 udp_t *udp = connp->conn_udp; 1744 udp_stack_t *us = udp->udp_us; 1745 int *i1 = (int *)invalp; 1746 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1747 int error; 1748 1749 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1750 /* 1751 * First do UDP specific sanity checks and handle UDP specific 1752 * options. Note that some IPPROTO_UDP options are handled 1753 * by conn_opt_set. 1754 */ 1755 switch (level) { 1756 case SOL_SOCKET: 1757 switch (name) { 1758 case SO_SNDBUF: 1759 if (*i1 > us->us_max_buf) { 1760 return (ENOBUFS); 1761 } 1762 break; 1763 case SO_RCVBUF: 1764 if (*i1 > us->us_max_buf) { 1765 return (ENOBUFS); 1766 } 1767 break; 1768 1769 case SCM_UCRED: { 1770 struct ucred_s *ucr; 1771 cred_t *newcr; 1772 ts_label_t *tsl; 1773 1774 /* 1775 * Only sockets that have proper privileges and are 1776 * bound to MLPs will have any other value here, so 1777 * this implicitly tests for privilege to set label. 1778 */ 1779 if (connp->conn_mlp_type == mlptSingle) 1780 break; 1781 1782 ucr = (struct ucred_s *)invalp; 1783 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1784 ucr->uc_labeloff < sizeof (*ucr) || 1785 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1786 return (EINVAL); 1787 if (!checkonly) { 1788 /* 1789 * Set ixa_tsl to the new label. 1790 * We assume that crgetzoneid doesn't change 1791 * as part of the SCM_UCRED. 1792 */ 1793 ASSERT(cr != NULL); 1794 if ((tsl = crgetlabel(cr)) == NULL) 1795 return (EINVAL); 1796 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1797 tsl->tsl_doi, KM_NOSLEEP); 1798 if (newcr == NULL) 1799 return (ENOSR); 1800 ASSERT(newcr->cr_label != NULL); 1801 /* 1802 * Move the hold on the cr_label to ixa_tsl by 1803 * setting cr_label to NULL. Then release newcr. 1804 */ 1805 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1806 ixa->ixa_flags |= IXAF_UCRED_TSL; 1807 newcr->cr_label = NULL; 1808 crfree(newcr); 1809 coa->coa_changed |= COA_HEADER_CHANGED; 1810 coa->coa_changed |= COA_WROFF_CHANGED; 1811 } 1812 /* Fully handled this option. */ 1813 return (0); 1814 } 1815 } 1816 break; 1817 case IPPROTO_UDP: 1818 switch (name) { 1819 case UDP_NAT_T_ENDPOINT: 1820 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1821 return (error); 1822 } 1823 1824 /* 1825 * Use conn_family instead so we can avoid ambiguitites 1826 * with AF_INET6 sockets that may switch from IPv4 1827 * to IPv6. 1828 */ 1829 if (connp->conn_family != AF_INET) { 1830 return (EAFNOSUPPORT); 1831 } 1832 1833 if (!checkonly) { 1834 mutex_enter(&connp->conn_lock); 1835 udp->udp_nat_t_endpoint = onoff; 1836 mutex_exit(&connp->conn_lock); 1837 coa->coa_changed |= COA_HEADER_CHANGED; 1838 coa->coa_changed |= COA_WROFF_CHANGED; 1839 } 1840 /* Fully handled this option. */ 1841 return (0); 1842 case UDP_RCVHDR: 1843 mutex_enter(&connp->conn_lock); 1844 udp->udp_rcvhdr = onoff; 1845 mutex_exit(&connp->conn_lock); 1846 return (0); 1847 } 1848 break; 1849 } 1850 error = conn_opt_set(coa, level, name, inlen, invalp, 1851 checkonly, cr); 1852 return (error); 1853 } 1854 1855 /* 1856 * This routine sets socket options. 1857 */ 1858 int 1859 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1860 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1861 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1862 { 1863 udp_t *udp = connp->conn_udp; 1864 int err; 1865 conn_opt_arg_t coas, *coa; 1866 boolean_t checkonly; 1867 udp_stack_t *us = udp->udp_us; 1868 1869 switch (optset_context) { 1870 case SETFN_OPTCOM_CHECKONLY: 1871 checkonly = B_TRUE; 1872 /* 1873 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1874 * inlen != 0 implies value supplied and 1875 * we have to "pretend" to set it. 1876 * inlen == 0 implies that there is no 1877 * value part in T_CHECK request and just validation 1878 * done elsewhere should be enough, we just return here. 1879 */ 1880 if (inlen == 0) { 1881 *outlenp = 0; 1882 return (0); 1883 } 1884 break; 1885 case SETFN_OPTCOM_NEGOTIATE: 1886 checkonly = B_FALSE; 1887 break; 1888 case SETFN_UD_NEGOTIATE: 1889 case SETFN_CONN_NEGOTIATE: 1890 checkonly = B_FALSE; 1891 /* 1892 * Negotiating local and "association-related" options 1893 * through T_UNITDATA_REQ. 1894 * 1895 * Following routine can filter out ones we do not 1896 * want to be "set" this way. 1897 */ 1898 if (!udp_opt_allow_udr_set(level, name)) { 1899 *outlenp = 0; 1900 return (EINVAL); 1901 } 1902 break; 1903 default: 1904 /* 1905 * We should never get here 1906 */ 1907 *outlenp = 0; 1908 return (EINVAL); 1909 } 1910 1911 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1912 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1913 1914 if (thisdg_attrs != NULL) { 1915 /* Options from T_UNITDATA_REQ */ 1916 coa = (conn_opt_arg_t *)thisdg_attrs; 1917 ASSERT(coa->coa_connp == connp); 1918 ASSERT(coa->coa_ixa != NULL); 1919 ASSERT(coa->coa_ipp != NULL); 1920 ASSERT(coa->coa_ancillary); 1921 } else { 1922 coa = &coas; 1923 coas.coa_connp = connp; 1924 /* Get a reference on conn_ixa to prevent concurrent mods */ 1925 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1926 if (coas.coa_ixa == NULL) { 1927 *outlenp = 0; 1928 return (ENOMEM); 1929 } 1930 coas.coa_ipp = &connp->conn_xmit_ipp; 1931 coas.coa_ancillary = B_FALSE; 1932 coas.coa_changed = 0; 1933 } 1934 1935 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1936 cr, checkonly); 1937 if (err != 0) { 1938 errout: 1939 if (!coa->coa_ancillary) 1940 ixa_refrele(coa->coa_ixa); 1941 *outlenp = 0; 1942 return (err); 1943 } 1944 /* Handle DHCPINIT here outside of lock */ 1945 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1946 uint_t ifindex; 1947 ill_t *ill; 1948 1949 ifindex = *(uint_t *)invalp; 1950 if (ifindex == 0) { 1951 ill = NULL; 1952 } else { 1953 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1954 coa->coa_ixa->ixa_ipst); 1955 if (ill == NULL) { 1956 err = ENXIO; 1957 goto errout; 1958 } 1959 1960 mutex_enter(&ill->ill_lock); 1961 if (ill->ill_state_flags & ILL_CONDEMNED) { 1962 mutex_exit(&ill->ill_lock); 1963 ill_refrele(ill); 1964 err = ENXIO; 1965 goto errout; 1966 } 1967 if (IS_VNI(ill)) { 1968 mutex_exit(&ill->ill_lock); 1969 ill_refrele(ill); 1970 err = EINVAL; 1971 goto errout; 1972 } 1973 } 1974 mutex_enter(&connp->conn_lock); 1975 1976 if (connp->conn_dhcpinit_ill != NULL) { 1977 /* 1978 * We've locked the conn so conn_cleanup_ill() 1979 * cannot clear conn_dhcpinit_ill -- so it's 1980 * safe to access the ill. 1981 */ 1982 ill_t *oill = connp->conn_dhcpinit_ill; 1983 1984 ASSERT(oill->ill_dhcpinit != 0); 1985 atomic_dec_32(&oill->ill_dhcpinit); 1986 ill_set_inputfn(connp->conn_dhcpinit_ill); 1987 connp->conn_dhcpinit_ill = NULL; 1988 } 1989 1990 if (ill != NULL) { 1991 connp->conn_dhcpinit_ill = ill; 1992 atomic_inc_32(&ill->ill_dhcpinit); 1993 ill_set_inputfn(ill); 1994 mutex_exit(&connp->conn_lock); 1995 mutex_exit(&ill->ill_lock); 1996 ill_refrele(ill); 1997 } else { 1998 mutex_exit(&connp->conn_lock); 1999 } 2000 } 2001 2002 /* 2003 * Common case of OK return with outval same as inval. 2004 */ 2005 if (invalp != outvalp) { 2006 /* don't trust bcopy for identical src/dst */ 2007 (void) bcopy(invalp, outvalp, inlen); 2008 } 2009 *outlenp = inlen; 2010 2011 /* 2012 * If this was not ancillary data, then we rebuild the headers, 2013 * update the IRE/NCE, and IPsec as needed. 2014 * Since the label depends on the destination we go through 2015 * ip_set_destination first. 2016 */ 2017 if (coa->coa_ancillary) { 2018 return (0); 2019 } 2020 2021 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2022 in6_addr_t saddr, faddr, nexthop; 2023 in_port_t fport; 2024 2025 /* 2026 * We clear lastdst to make sure we pick up the change 2027 * next time sending. 2028 * If we are connected we re-cache the information. 2029 * We ignore errors to preserve BSD behavior. 2030 * Note that we don't redo IPsec policy lookup here 2031 * since the final destination (or source) didn't change. 2032 */ 2033 mutex_enter(&connp->conn_lock); 2034 connp->conn_v6lastdst = ipv6_all_zeros; 2035 2036 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2037 &connp->conn_faddr_v6, &nexthop); 2038 saddr = connp->conn_saddr_v6; 2039 faddr = connp->conn_faddr_v6; 2040 fport = connp->conn_fport; 2041 mutex_exit(&connp->conn_lock); 2042 2043 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2044 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2045 (void) ip_attr_connect(connp, coa->coa_ixa, 2046 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2047 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2048 } 2049 } 2050 2051 ixa_refrele(coa->coa_ixa); 2052 2053 if (coa->coa_changed & COA_HEADER_CHANGED) { 2054 /* 2055 * Rebuild the header template if we are connected. 2056 * Otherwise clear conn_v6lastdst so we rebuild the header 2057 * in the data path. 2058 */ 2059 mutex_enter(&connp->conn_lock); 2060 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2061 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2062 err = udp_build_hdr_template(connp, 2063 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2064 connp->conn_fport, connp->conn_flowinfo); 2065 if (err != 0) { 2066 mutex_exit(&connp->conn_lock); 2067 return (err); 2068 } 2069 } else { 2070 connp->conn_v6lastdst = ipv6_all_zeros; 2071 } 2072 mutex_exit(&connp->conn_lock); 2073 } 2074 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2075 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2076 connp->conn_rcvbuf); 2077 } 2078 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2079 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2080 } 2081 if (coa->coa_changed & COA_WROFF_CHANGED) { 2082 /* Increase wroff if needed */ 2083 uint_t wroff; 2084 2085 mutex_enter(&connp->conn_lock); 2086 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2087 if (udp->udp_nat_t_endpoint) 2088 wroff += sizeof (uint32_t); 2089 if (wroff > connp->conn_wroff) { 2090 connp->conn_wroff = wroff; 2091 mutex_exit(&connp->conn_lock); 2092 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2093 } else { 2094 mutex_exit(&connp->conn_lock); 2095 } 2096 } 2097 return (err); 2098 } 2099 2100 /* This routine sets socket options. */ 2101 int 2102 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2103 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2104 void *thisdg_attrs, cred_t *cr) 2105 { 2106 conn_t *connp = Q_TO_CONN(q); 2107 int error; 2108 2109 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2110 outlenp, outvalp, thisdg_attrs, cr); 2111 return (error); 2112 } 2113 2114 /* 2115 * Setup IP and UDP headers. 2116 * Returns NULL on allocation failure, in which case data_mp is freed. 2117 */ 2118 mblk_t * 2119 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2120 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2121 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2122 { 2123 mblk_t *mp; 2124 udpha_t *udpha; 2125 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2126 uint_t data_len; 2127 uint32_t cksum; 2128 udp_t *udp = connp->conn_udp; 2129 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2130 uint_t ulp_hdr_len; 2131 2132 data_len = msgdsize(data_mp); 2133 ulp_hdr_len = UDPH_SIZE; 2134 if (insert_spi) 2135 ulp_hdr_len += sizeof (uint32_t); 2136 2137 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2138 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2139 if (mp == NULL) { 2140 ASSERT(*errorp != 0); 2141 return (NULL); 2142 } 2143 2144 data_len += ulp_hdr_len; 2145 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2146 2147 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2148 udpha->uha_src_port = connp->conn_lport; 2149 udpha->uha_dst_port = dstport; 2150 udpha->uha_checksum = 0; 2151 udpha->uha_length = htons(data_len); 2152 2153 /* 2154 * If there was a routing option/header then conn_prepend_hdr 2155 * has massaged it and placed the pseudo-header checksum difference 2156 * in the cksum argument. 2157 * 2158 * Setup header length and prepare for ULP checksum done in IP. 2159 * 2160 * We make it easy for IP to include our pseudo header 2161 * by putting our length in uha_checksum. 2162 * The IP source, destination, and length have already been set by 2163 * conn_prepend_hdr. 2164 */ 2165 cksum += data_len; 2166 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2167 ASSERT(cksum < 0x10000); 2168 2169 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2170 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2171 2172 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2173 2174 /* IP does the checksum if uha_checksum is non-zero */ 2175 if (us->us_do_checksum) { 2176 if (cksum == 0) 2177 udpha->uha_checksum = 0xffff; 2178 else 2179 udpha->uha_checksum = htons(cksum); 2180 } else { 2181 udpha->uha_checksum = 0; 2182 } 2183 } else { 2184 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2185 2186 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2187 if (cksum == 0) 2188 udpha->uha_checksum = 0xffff; 2189 else 2190 udpha->uha_checksum = htons(cksum); 2191 } 2192 2193 /* Insert all-0s SPI now. */ 2194 if (insert_spi) 2195 *((uint32_t *)(udpha + 1)) = 0; 2196 2197 return (mp); 2198 } 2199 2200 static int 2201 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2202 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2203 { 2204 udpha_t *udpha; 2205 int error; 2206 2207 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2208 /* 2209 * We clear lastdst to make sure we don't use the lastdst path 2210 * next time sending since we might not have set v6dst yet. 2211 */ 2212 connp->conn_v6lastdst = ipv6_all_zeros; 2213 2214 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2215 flowinfo); 2216 if (error != 0) 2217 return (error); 2218 2219 /* 2220 * Any routing header/option has been massaged. The checksum difference 2221 * is stored in conn_sum. 2222 */ 2223 udpha = (udpha_t *)connp->conn_ht_ulp; 2224 udpha->uha_src_port = connp->conn_lport; 2225 udpha->uha_dst_port = dstport; 2226 udpha->uha_checksum = 0; 2227 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2228 return (0); 2229 } 2230 2231 /* 2232 * This routine retrieves the value of an ND variable in a udpparam_t 2233 * structure. It is called through nd_getset when a user reads the 2234 * variable. 2235 */ 2236 /* ARGSUSED */ 2237 static int 2238 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2239 { 2240 udpparam_t *udppa = (udpparam_t *)cp; 2241 2242 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 2243 return (0); 2244 } 2245 2246 /* 2247 * Walk through the param array specified registering each element with the 2248 * named dispatch (ND) handler. 2249 */ 2250 static boolean_t 2251 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 2252 { 2253 for (; cnt-- > 0; udppa++) { 2254 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 2255 if (!nd_load(ndp, udppa->udp_param_name, 2256 udp_param_get, udp_param_set, 2257 (caddr_t)udppa)) { 2258 nd_free(ndp); 2259 return (B_FALSE); 2260 } 2261 } 2262 } 2263 if (!nd_load(ndp, "udp_extra_priv_ports", 2264 udp_extra_priv_ports_get, NULL, NULL)) { 2265 nd_free(ndp); 2266 return (B_FALSE); 2267 } 2268 if (!nd_load(ndp, "udp_extra_priv_ports_add", 2269 NULL, udp_extra_priv_ports_add, NULL)) { 2270 nd_free(ndp); 2271 return (B_FALSE); 2272 } 2273 if (!nd_load(ndp, "udp_extra_priv_ports_del", 2274 NULL, udp_extra_priv_ports_del, NULL)) { 2275 nd_free(ndp); 2276 return (B_FALSE); 2277 } 2278 return (B_TRUE); 2279 } 2280 2281 /* This routine sets an ND variable in a udpparam_t structure. */ 2282 /* ARGSUSED */ 2283 static int 2284 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2285 { 2286 long new_value; 2287 udpparam_t *udppa = (udpparam_t *)cp; 2288 2289 /* 2290 * Fail the request if the new value does not lie within the 2291 * required bounds. 2292 */ 2293 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2294 new_value < udppa->udp_param_min || 2295 new_value > udppa->udp_param_max) { 2296 return (EINVAL); 2297 } 2298 2299 /* Set the new value */ 2300 udppa->udp_param_value = new_value; 2301 return (0); 2302 } 2303 2304 static mblk_t * 2305 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2306 { 2307 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2308 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2309 /* 2310 * fallback has started but messages have not been moved yet 2311 */ 2312 if (udp->udp_fallback_queue_head == NULL) { 2313 ASSERT(udp->udp_fallback_queue_tail == NULL); 2314 udp->udp_fallback_queue_head = mp; 2315 udp->udp_fallback_queue_tail = mp; 2316 } else { 2317 ASSERT(udp->udp_fallback_queue_tail != NULL); 2318 udp->udp_fallback_queue_tail->b_next = mp; 2319 udp->udp_fallback_queue_tail = mp; 2320 } 2321 return (NULL); 2322 } else { 2323 /* 2324 * Fallback completed, let the caller putnext() the mblk. 2325 */ 2326 return (mp); 2327 } 2328 } 2329 2330 /* 2331 * Deliver data to ULP. In case we have a socket, and it's falling back to 2332 * TPI, then we'll queue the mp for later processing. 2333 */ 2334 static void 2335 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2336 { 2337 if (IPCL_IS_NONSTR(connp)) { 2338 udp_t *udp = connp->conn_udp; 2339 int error; 2340 2341 ASSERT(len == msgdsize(mp)); 2342 if ((*connp->conn_upcalls->su_recv) 2343 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2344 mutex_enter(&udp->udp_recv_lock); 2345 if (error == ENOSPC) { 2346 /* 2347 * let's confirm while holding the lock 2348 */ 2349 if ((*connp->conn_upcalls->su_recv) 2350 (connp->conn_upper_handle, NULL, 0, 0, 2351 &error, NULL) < 0) { 2352 ASSERT(error == ENOSPC); 2353 if (error == ENOSPC) { 2354 connp->conn_flow_cntrld = 2355 B_TRUE; 2356 } 2357 } 2358 mutex_exit(&udp->udp_recv_lock); 2359 } else { 2360 ASSERT(error == EOPNOTSUPP); 2361 mp = udp_queue_fallback(udp, mp); 2362 mutex_exit(&udp->udp_recv_lock); 2363 if (mp != NULL) 2364 putnext(connp->conn_rq, mp); 2365 } 2366 } 2367 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2368 } else { 2369 if (is_system_labeled()) { 2370 ASSERT(ira->ira_cred != NULL); 2371 /* 2372 * Provide for protocols above UDP such as RPC 2373 * NOPID leaves db_cpid unchanged. 2374 */ 2375 mblk_setcred(mp, ira->ira_cred, NOPID); 2376 } 2377 2378 putnext(connp->conn_rq, mp); 2379 } 2380 } 2381 2382 /* 2383 * This is the inbound data path. 2384 * IP has already pulled up the IP plus UDP headers and verified alignment 2385 * etc. 2386 */ 2387 /* ARGSUSED2 */ 2388 static void 2389 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2390 { 2391 conn_t *connp = (conn_t *)arg1; 2392 struct T_unitdata_ind *tudi; 2393 uchar_t *rptr; /* Pointer to IP header */ 2394 int hdr_length; /* Length of IP+UDP headers */ 2395 int udi_size; /* Size of T_unitdata_ind */ 2396 int pkt_len; 2397 udp_t *udp; 2398 udpha_t *udpha; 2399 ip_pkt_t ipps; 2400 ip6_t *ip6h; 2401 mblk_t *mp1; 2402 uint32_t udp_ipv4_options_len; 2403 crb_t recv_ancillary; 2404 udp_stack_t *us; 2405 2406 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2407 2408 udp = connp->conn_udp; 2409 us = udp->udp_us; 2410 rptr = mp->b_rptr; 2411 2412 ASSERT(DB_TYPE(mp) == M_DATA); 2413 ASSERT(OK_32PTR(rptr)); 2414 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2415 pkt_len = ira->ira_pktlen; 2416 2417 /* 2418 * Get a snapshot of these and allow other threads to change 2419 * them after that. We need the same recv_ancillary when determining 2420 * the size as when adding the ancillary data items. 2421 */ 2422 mutex_enter(&connp->conn_lock); 2423 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2424 recv_ancillary = connp->conn_recv_ancillary; 2425 mutex_exit(&connp->conn_lock); 2426 2427 hdr_length = ira->ira_ip_hdr_length; 2428 2429 /* 2430 * IP inspected the UDP header thus all of it must be in the mblk. 2431 * UDP length check is performed for IPv6 packets and IPv4 packets 2432 * to check if the size of the packet as specified 2433 * by the UDP header is the same as the length derived from the IP 2434 * header. 2435 */ 2436 udpha = (udpha_t *)(rptr + hdr_length); 2437 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2438 goto tossit; 2439 2440 hdr_length += UDPH_SIZE; 2441 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2442 2443 /* Initialize regardless of IP version */ 2444 ipps.ipp_fields = 0; 2445 2446 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2447 udp_ipv4_options_len > 0) && 2448 connp->conn_family == AF_INET) { 2449 int err; 2450 2451 /* 2452 * Record/update udp_recv_ipp with the lock 2453 * held. Not needed for AF_INET6 sockets 2454 * since they don't support a getsockopt of IP_OPTIONS. 2455 */ 2456 mutex_enter(&connp->conn_lock); 2457 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2458 B_TRUE); 2459 if (err != 0) { 2460 /* Allocation failed. Drop packet */ 2461 mutex_exit(&connp->conn_lock); 2462 freemsg(mp); 2463 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2464 return; 2465 } 2466 mutex_exit(&connp->conn_lock); 2467 } 2468 2469 if (recv_ancillary.crb_all != 0) { 2470 /* 2471 * Record packet information in the ip_pkt_t 2472 */ 2473 if (ira->ira_flags & IRAF_IS_IPV4) { 2474 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2475 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2476 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2477 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2478 2479 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2480 } else { 2481 uint8_t nexthdrp; 2482 2483 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2484 /* 2485 * IPv6 packets can only be received by applications 2486 * that are prepared to receive IPv6 addresses. 2487 * The IP fanout must ensure this. 2488 */ 2489 ASSERT(connp->conn_family == AF_INET6); 2490 2491 ip6h = (ip6_t *)rptr; 2492 2493 /* We don't care about the length, but need the ipp */ 2494 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2495 &nexthdrp); 2496 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2497 /* Restore */ 2498 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2499 ASSERT(nexthdrp == IPPROTO_UDP); 2500 } 2501 } 2502 2503 /* 2504 * This is the inbound data path. Packets are passed upstream as 2505 * T_UNITDATA_IND messages. 2506 */ 2507 if (connp->conn_family == AF_INET) { 2508 sin_t *sin; 2509 2510 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2511 2512 /* 2513 * Normally only send up the source address. 2514 * If any ancillary data items are wanted we add those. 2515 */ 2516 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2517 if (recv_ancillary.crb_all != 0) { 2518 udi_size += conn_recvancillary_size(connp, 2519 recv_ancillary, ira, mp, &ipps); 2520 } 2521 2522 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2523 mp1 = allocb(udi_size, BPRI_MED); 2524 if (mp1 == NULL) { 2525 freemsg(mp); 2526 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2527 return; 2528 } 2529 mp1->b_cont = mp; 2530 mp1->b_datap->db_type = M_PROTO; 2531 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2532 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2533 tudi->PRIM_type = T_UNITDATA_IND; 2534 tudi->SRC_length = sizeof (sin_t); 2535 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2536 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2537 sizeof (sin_t); 2538 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2539 tudi->OPT_length = udi_size; 2540 sin = (sin_t *)&tudi[1]; 2541 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2542 sin->sin_port = udpha->uha_src_port; 2543 sin->sin_family = connp->conn_family; 2544 *(uint32_t *)&sin->sin_zero[0] = 0; 2545 *(uint32_t *)&sin->sin_zero[4] = 0; 2546 2547 /* 2548 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2549 * IP_RECVTTL has been set. 2550 */ 2551 if (udi_size != 0) { 2552 conn_recvancillary_add(connp, recv_ancillary, ira, 2553 &ipps, (uchar_t *)&sin[1], udi_size); 2554 } 2555 } else { 2556 sin6_t *sin6; 2557 2558 /* 2559 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2560 * 2561 * Normally we only send up the address. If receiving of any 2562 * optional receive side information is enabled, we also send 2563 * that up as options. 2564 */ 2565 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2566 2567 if (recv_ancillary.crb_all != 0) { 2568 udi_size += conn_recvancillary_size(connp, 2569 recv_ancillary, ira, mp, &ipps); 2570 } 2571 2572 mp1 = allocb(udi_size, BPRI_MED); 2573 if (mp1 == NULL) { 2574 freemsg(mp); 2575 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2576 return; 2577 } 2578 mp1->b_cont = mp; 2579 mp1->b_datap->db_type = M_PROTO; 2580 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2581 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2582 tudi->PRIM_type = T_UNITDATA_IND; 2583 tudi->SRC_length = sizeof (sin6_t); 2584 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2585 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2586 sizeof (sin6_t); 2587 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2588 tudi->OPT_length = udi_size; 2589 sin6 = (sin6_t *)&tudi[1]; 2590 if (ira->ira_flags & IRAF_IS_IPV4) { 2591 in6_addr_t v6dst; 2592 2593 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2594 &sin6->sin6_addr); 2595 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2596 &v6dst); 2597 sin6->sin6_flowinfo = 0; 2598 sin6->sin6_scope_id = 0; 2599 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2600 IPCL_ZONEID(connp), us->us_netstack); 2601 } else { 2602 ip6h = (ip6_t *)rptr; 2603 2604 sin6->sin6_addr = ip6h->ip6_src; 2605 /* No sin6_flowinfo per API */ 2606 sin6->sin6_flowinfo = 0; 2607 /* For link-scope pass up scope id */ 2608 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2609 sin6->sin6_scope_id = ira->ira_ruifindex; 2610 else 2611 sin6->sin6_scope_id = 0; 2612 sin6->__sin6_src_id = ip_srcid_find_addr( 2613 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2614 us->us_netstack); 2615 } 2616 sin6->sin6_port = udpha->uha_src_port; 2617 sin6->sin6_family = connp->conn_family; 2618 2619 if (udi_size != 0) { 2620 conn_recvancillary_add(connp, recv_ancillary, ira, 2621 &ipps, (uchar_t *)&sin6[1], udi_size); 2622 } 2623 } 2624 2625 /* Walk past the headers unless IP_RECVHDR was set. */ 2626 if (!udp->udp_rcvhdr) { 2627 mp->b_rptr = rptr + hdr_length; 2628 pkt_len -= hdr_length; 2629 } 2630 2631 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2632 udp_ulp_recv(connp, mp1, pkt_len, ira); 2633 return; 2634 2635 tossit: 2636 freemsg(mp); 2637 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2638 } 2639 2640 /* 2641 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2642 * information that can be changing beneath us. 2643 */ 2644 mblk_t * 2645 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2646 { 2647 mblk_t *mpdata; 2648 mblk_t *mp_conn_ctl; 2649 mblk_t *mp_attr_ctl; 2650 mblk_t *mp6_conn_ctl; 2651 mblk_t *mp6_attr_ctl; 2652 mblk_t *mp_conn_tail; 2653 mblk_t *mp_attr_tail; 2654 mblk_t *mp6_conn_tail; 2655 mblk_t *mp6_attr_tail; 2656 struct opthdr *optp; 2657 mib2_udpEntry_t ude; 2658 mib2_udp6Entry_t ude6; 2659 mib2_transportMLPEntry_t mlp; 2660 int state; 2661 zoneid_t zoneid; 2662 int i; 2663 connf_t *connfp; 2664 conn_t *connp = Q_TO_CONN(q); 2665 int v4_conn_idx; 2666 int v6_conn_idx; 2667 boolean_t needattr; 2668 udp_t *udp; 2669 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2670 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2671 mblk_t *mp2ctl; 2672 2673 /* 2674 * make a copy of the original message 2675 */ 2676 mp2ctl = copymsg(mpctl); 2677 2678 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2679 if (mpctl == NULL || 2680 (mpdata = mpctl->b_cont) == NULL || 2681 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2682 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2683 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2684 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2685 freemsg(mp_conn_ctl); 2686 freemsg(mp_attr_ctl); 2687 freemsg(mp6_conn_ctl); 2688 freemsg(mpctl); 2689 freemsg(mp2ctl); 2690 return (0); 2691 } 2692 2693 zoneid = connp->conn_zoneid; 2694 2695 /* fixed length structure for IPv4 and IPv6 counters */ 2696 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2697 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2698 /* synchronize 64- and 32-bit counters */ 2699 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2700 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2701 2702 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2703 optp->level = MIB2_UDP; 2704 optp->name = 0; 2705 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2706 sizeof (us->us_udp_mib)); 2707 optp->len = msgdsize(mpdata); 2708 qreply(q, mpctl); 2709 2710 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2711 v4_conn_idx = v6_conn_idx = 0; 2712 2713 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2714 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2715 connp = NULL; 2716 2717 while ((connp = ipcl_get_next_conn(connfp, connp, 2718 IPCL_UDPCONN))) { 2719 udp = connp->conn_udp; 2720 if (zoneid != connp->conn_zoneid) 2721 continue; 2722 2723 /* 2724 * Note that the port numbers are sent in 2725 * host byte order 2726 */ 2727 2728 if (udp->udp_state == TS_UNBND) 2729 state = MIB2_UDP_unbound; 2730 else if (udp->udp_state == TS_IDLE) 2731 state = MIB2_UDP_idle; 2732 else if (udp->udp_state == TS_DATA_XFER) 2733 state = MIB2_UDP_connected; 2734 else 2735 state = MIB2_UDP_unknown; 2736 2737 needattr = B_FALSE; 2738 bzero(&mlp, sizeof (mlp)); 2739 if (connp->conn_mlp_type != mlptSingle) { 2740 if (connp->conn_mlp_type == mlptShared || 2741 connp->conn_mlp_type == mlptBoth) 2742 mlp.tme_flags |= MIB2_TMEF_SHARED; 2743 if (connp->conn_mlp_type == mlptPrivate || 2744 connp->conn_mlp_type == mlptBoth) 2745 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2746 needattr = B_TRUE; 2747 } 2748 if (connp->conn_anon_mlp) { 2749 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2750 needattr = B_TRUE; 2751 } 2752 switch (connp->conn_mac_mode) { 2753 case CONN_MAC_DEFAULT: 2754 break; 2755 case CONN_MAC_AWARE: 2756 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2757 needattr = B_TRUE; 2758 break; 2759 case CONN_MAC_IMPLICIT: 2760 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2761 needattr = B_TRUE; 2762 break; 2763 } 2764 mutex_enter(&connp->conn_lock); 2765 if (udp->udp_state == TS_DATA_XFER && 2766 connp->conn_ixa->ixa_tsl != NULL) { 2767 ts_label_t *tsl; 2768 2769 tsl = connp->conn_ixa->ixa_tsl; 2770 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2771 mlp.tme_doi = label2doi(tsl); 2772 mlp.tme_label = *label2bslabel(tsl); 2773 needattr = B_TRUE; 2774 } 2775 mutex_exit(&connp->conn_lock); 2776 2777 /* 2778 * Create an IPv4 table entry for IPv4 entries and also 2779 * any IPv6 entries which are bound to in6addr_any 2780 * (i.e. anything a IPv4 peer could connect/send to). 2781 */ 2782 if (connp->conn_ipversion == IPV4_VERSION || 2783 (udp->udp_state <= TS_IDLE && 2784 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2785 ude.udpEntryInfo.ue_state = state; 2786 /* 2787 * If in6addr_any this will set it to 2788 * INADDR_ANY 2789 */ 2790 ude.udpLocalAddress = connp->conn_laddr_v4; 2791 ude.udpLocalPort = ntohs(connp->conn_lport); 2792 if (udp->udp_state == TS_DATA_XFER) { 2793 /* 2794 * Can potentially get here for 2795 * v6 socket if another process 2796 * (say, ping) has just done a 2797 * sendto(), changing the state 2798 * from the TS_IDLE above to 2799 * TS_DATA_XFER by the time we hit 2800 * this part of the code. 2801 */ 2802 ude.udpEntryInfo.ue_RemoteAddress = 2803 connp->conn_faddr_v4; 2804 ude.udpEntryInfo.ue_RemotePort = 2805 ntohs(connp->conn_fport); 2806 } else { 2807 ude.udpEntryInfo.ue_RemoteAddress = 0; 2808 ude.udpEntryInfo.ue_RemotePort = 0; 2809 } 2810 2811 /* 2812 * We make the assumption that all udp_t 2813 * structs will be created within an address 2814 * region no larger than 32-bits. 2815 */ 2816 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2817 ude.udpCreationProcess = 2818 (connp->conn_cpid < 0) ? 2819 MIB2_UNKNOWN_PROCESS : 2820 connp->conn_cpid; 2821 ude.udpCreationTime = connp->conn_open_time; 2822 2823 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2824 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2825 mlp.tme_connidx = v4_conn_idx++; 2826 if (needattr) 2827 (void) snmp_append_data2( 2828 mp_attr_ctl->b_cont, &mp_attr_tail, 2829 (char *)&mlp, sizeof (mlp)); 2830 } 2831 if (connp->conn_ipversion == IPV6_VERSION) { 2832 ude6.udp6EntryInfo.ue_state = state; 2833 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2834 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2835 mutex_enter(&connp->conn_lock); 2836 if (connp->conn_ixa->ixa_flags & 2837 IXAF_SCOPEID_SET) { 2838 ude6.udp6IfIndex = 2839 connp->conn_ixa->ixa_scopeid; 2840 } else { 2841 ude6.udp6IfIndex = connp->conn_bound_if; 2842 } 2843 mutex_exit(&connp->conn_lock); 2844 if (udp->udp_state == TS_DATA_XFER) { 2845 ude6.udp6EntryInfo.ue_RemoteAddress = 2846 connp->conn_faddr_v6; 2847 ude6.udp6EntryInfo.ue_RemotePort = 2848 ntohs(connp->conn_fport); 2849 } else { 2850 ude6.udp6EntryInfo.ue_RemoteAddress = 2851 sin6_null.sin6_addr; 2852 ude6.udp6EntryInfo.ue_RemotePort = 0; 2853 } 2854 /* 2855 * We make the assumption that all udp_t 2856 * structs will be created within an address 2857 * region no larger than 32-bits. 2858 */ 2859 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2860 ude6.udp6CreationProcess = 2861 (connp->conn_cpid < 0) ? 2862 MIB2_UNKNOWN_PROCESS : 2863 connp->conn_cpid; 2864 ude6.udp6CreationTime = connp->conn_open_time; 2865 2866 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2867 &mp6_conn_tail, (char *)&ude6, 2868 sizeof (ude6)); 2869 mlp.tme_connidx = v6_conn_idx++; 2870 if (needattr) 2871 (void) snmp_append_data2( 2872 mp6_attr_ctl->b_cont, 2873 &mp6_attr_tail, (char *)&mlp, 2874 sizeof (mlp)); 2875 } 2876 } 2877 } 2878 2879 /* IPv4 UDP endpoints */ 2880 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2881 sizeof (struct T_optmgmt_ack)]; 2882 optp->level = MIB2_UDP; 2883 optp->name = MIB2_UDP_ENTRY; 2884 optp->len = msgdsize(mp_conn_ctl->b_cont); 2885 qreply(q, mp_conn_ctl); 2886 2887 /* table of MLP attributes... */ 2888 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2889 sizeof (struct T_optmgmt_ack)]; 2890 optp->level = MIB2_UDP; 2891 optp->name = EXPER_XPORT_MLP; 2892 optp->len = msgdsize(mp_attr_ctl->b_cont); 2893 if (optp->len == 0) 2894 freemsg(mp_attr_ctl); 2895 else 2896 qreply(q, mp_attr_ctl); 2897 2898 /* IPv6 UDP endpoints */ 2899 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2900 sizeof (struct T_optmgmt_ack)]; 2901 optp->level = MIB2_UDP6; 2902 optp->name = MIB2_UDP6_ENTRY; 2903 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2904 qreply(q, mp6_conn_ctl); 2905 2906 /* table of MLP attributes... */ 2907 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2908 sizeof (struct T_optmgmt_ack)]; 2909 optp->level = MIB2_UDP6; 2910 optp->name = EXPER_XPORT_MLP; 2911 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2912 if (optp->len == 0) 2913 freemsg(mp6_attr_ctl); 2914 else 2915 qreply(q, mp6_attr_ctl); 2916 2917 return (mp2ctl); 2918 } 2919 2920 /* 2921 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2922 * NOTE: Per MIB-II, UDP has no writable data. 2923 * TODO: If this ever actually tries to set anything, it needs to be 2924 * to do the appropriate locking. 2925 */ 2926 /* ARGSUSED */ 2927 int 2928 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2929 uchar_t *ptr, int len) 2930 { 2931 switch (level) { 2932 case MIB2_UDP: 2933 return (0); 2934 default: 2935 return (1); 2936 } 2937 } 2938 2939 /* 2940 * This routine creates a T_UDERROR_IND message and passes it upstream. 2941 * The address and options are copied from the T_UNITDATA_REQ message 2942 * passed in mp. This message is freed. 2943 */ 2944 static void 2945 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2946 { 2947 struct T_unitdata_req *tudr; 2948 mblk_t *mp1; 2949 uchar_t *destaddr; 2950 t_scalar_t destlen; 2951 uchar_t *optaddr; 2952 t_scalar_t optlen; 2953 2954 if ((mp->b_wptr < mp->b_rptr) || 2955 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2956 goto done; 2957 } 2958 tudr = (struct T_unitdata_req *)mp->b_rptr; 2959 destaddr = mp->b_rptr + tudr->DEST_offset; 2960 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2961 destaddr + tudr->DEST_length < mp->b_rptr || 2962 destaddr + tudr->DEST_length > mp->b_wptr) { 2963 goto done; 2964 } 2965 optaddr = mp->b_rptr + tudr->OPT_offset; 2966 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2967 optaddr + tudr->OPT_length < mp->b_rptr || 2968 optaddr + tudr->OPT_length > mp->b_wptr) { 2969 goto done; 2970 } 2971 destlen = tudr->DEST_length; 2972 optlen = tudr->OPT_length; 2973 2974 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2975 (char *)optaddr, optlen, err); 2976 if (mp1 != NULL) 2977 qreply(q, mp1); 2978 2979 done: 2980 freemsg(mp); 2981 } 2982 2983 /* 2984 * This routine removes a port number association from a stream. It 2985 * is called by udp_wput to handle T_UNBIND_REQ messages. 2986 */ 2987 static void 2988 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2989 { 2990 conn_t *connp = Q_TO_CONN(q); 2991 int error; 2992 2993 error = udp_do_unbind(connp); 2994 if (error) { 2995 if (error < 0) 2996 udp_err_ack(q, mp, -error, 0); 2997 else 2998 udp_err_ack(q, mp, TSYSERR, error); 2999 return; 3000 } 3001 3002 mp = mi_tpi_ok_ack_alloc(mp); 3003 ASSERT(mp != NULL); 3004 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 3005 qreply(q, mp); 3006 } 3007 3008 /* 3009 * Don't let port fall into the privileged range. 3010 * Since the extra privileged ports can be arbitrary we also 3011 * ensure that we exclude those from consideration. 3012 * us->us_epriv_ports is not sorted thus we loop over it until 3013 * there are no changes. 3014 */ 3015 static in_port_t 3016 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 3017 { 3018 int i; 3019 in_port_t nextport; 3020 boolean_t restart = B_FALSE; 3021 udp_stack_t *us = udp->udp_us; 3022 3023 if (random && udp_random_anon_port != 0) { 3024 (void) random_get_pseudo_bytes((uint8_t *)&port, 3025 sizeof (in_port_t)); 3026 /* 3027 * Unless changed by a sys admin, the smallest anon port 3028 * is 32768 and the largest anon port is 65535. It is 3029 * very likely (50%) for the random port to be smaller 3030 * than the smallest anon port. When that happens, 3031 * add port % (anon port range) to the smallest anon 3032 * port to get the random port. It should fall into the 3033 * valid anon port range. 3034 */ 3035 if (port < us->us_smallest_anon_port) { 3036 port = us->us_smallest_anon_port + 3037 port % (us->us_largest_anon_port - 3038 us->us_smallest_anon_port); 3039 } 3040 } 3041 3042 retry: 3043 if (port < us->us_smallest_anon_port) 3044 port = us->us_smallest_anon_port; 3045 3046 if (port > us->us_largest_anon_port) { 3047 port = us->us_smallest_anon_port; 3048 if (restart) 3049 return (0); 3050 restart = B_TRUE; 3051 } 3052 3053 if (port < us->us_smallest_nonpriv_port) 3054 port = us->us_smallest_nonpriv_port; 3055 3056 for (i = 0; i < us->us_num_epriv_ports; i++) { 3057 if (port == us->us_epriv_ports[i]) { 3058 port++; 3059 /* 3060 * Make sure that the port is in the 3061 * valid range. 3062 */ 3063 goto retry; 3064 } 3065 } 3066 3067 if (is_system_labeled() && 3068 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 3069 port, IPPROTO_UDP, B_TRUE)) != 0) { 3070 port = nextport; 3071 goto retry; 3072 } 3073 3074 return (port); 3075 } 3076 3077 /* 3078 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 3079 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 3080 * the TPI options, otherwise we take them from msg_control. 3081 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 3082 * Always consumes mp; never consumes tudr_mp. 3083 */ 3084 static int 3085 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 3086 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 3087 { 3088 udp_t *udp = connp->conn_udp; 3089 udp_stack_t *us = udp->udp_us; 3090 int error; 3091 ip_xmit_attr_t *ixa; 3092 ip_pkt_t *ipp; 3093 in6_addr_t v6src; 3094 in6_addr_t v6dst; 3095 in6_addr_t v6nexthop; 3096 in_port_t dstport; 3097 uint32_t flowinfo; 3098 uint_t srcid; 3099 int is_absreq_failure = 0; 3100 conn_opt_arg_t coas, *coa; 3101 3102 ASSERT(tudr_mp != NULL || msg != NULL); 3103 3104 /* 3105 * Get ixa before checking state to handle a disconnect race. 3106 * 3107 * We need an exclusive copy of conn_ixa since the ancillary data 3108 * options might modify it. That copy has no pointers hence we 3109 * need to set them up once we've parsed the ancillary data. 3110 */ 3111 ixa = conn_get_ixa_exclusive(connp); 3112 if (ixa == NULL) { 3113 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3114 freemsg(mp); 3115 return (ENOMEM); 3116 } 3117 ASSERT(cr != NULL); 3118 ixa->ixa_cred = cr; 3119 ixa->ixa_cpid = pid; 3120 if (is_system_labeled()) { 3121 /* We need to restart with a label based on the cred */ 3122 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3123 } 3124 3125 /* In case previous destination was multicast or multirt */ 3126 ip_attr_newdst(ixa); 3127 3128 /* Get a copy of conn_xmit_ipp since the options might change it */ 3129 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 3130 if (ipp == NULL) { 3131 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3132 ixa->ixa_cpid = connp->conn_cpid; 3133 ixa_refrele(ixa); 3134 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3135 freemsg(mp); 3136 return (ENOMEM); 3137 } 3138 mutex_enter(&connp->conn_lock); 3139 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 3140 mutex_exit(&connp->conn_lock); 3141 if (error != 0) { 3142 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3143 freemsg(mp); 3144 goto done; 3145 } 3146 3147 /* 3148 * Parse the options and update ixa and ipp as a result. 3149 * Note that ixa_tsl can be updated if SCM_UCRED. 3150 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 3151 */ 3152 3153 coa = &coas; 3154 coa->coa_connp = connp; 3155 coa->coa_ixa = ixa; 3156 coa->coa_ipp = ipp; 3157 coa->coa_ancillary = B_TRUE; 3158 coa->coa_changed = 0; 3159 3160 if (msg != NULL) { 3161 error = process_auxiliary_options(connp, msg->msg_control, 3162 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 3163 } else { 3164 struct T_unitdata_req *tudr; 3165 3166 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 3167 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 3168 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 3169 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 3170 coa, &is_absreq_failure); 3171 } 3172 if (error != 0) { 3173 /* 3174 * Note: No special action needed in this 3175 * module for "is_absreq_failure" 3176 */ 3177 freemsg(mp); 3178 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3179 goto done; 3180 } 3181 ASSERT(is_absreq_failure == 0); 3182 3183 mutex_enter(&connp->conn_lock); 3184 /* 3185 * If laddr is unspecified then we look at sin6_src_id. 3186 * We will give precedence to a source address set with IPV6_PKTINFO 3187 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3188 * want ip_attr_connect to select a source (since it can fail) when 3189 * IPV6_PKTINFO is specified. 3190 * If this doesn't result in a source address then we get a source 3191 * from ip_attr_connect() below. 3192 */ 3193 v6src = connp->conn_saddr_v6; 3194 if (sin != NULL) { 3195 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3196 dstport = sin->sin_port; 3197 flowinfo = 0; 3198 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3199 ixa->ixa_flags |= IXAF_IS_IPV4; 3200 } else if (sin6 != NULL) { 3201 v6dst = sin6->sin6_addr; 3202 dstport = sin6->sin6_port; 3203 flowinfo = sin6->sin6_flowinfo; 3204 srcid = sin6->__sin6_src_id; 3205 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3206 ixa->ixa_scopeid = sin6->sin6_scope_id; 3207 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3208 } else { 3209 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3210 } 3211 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3212 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3213 connp->conn_netstack); 3214 } 3215 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3216 ixa->ixa_flags |= IXAF_IS_IPV4; 3217 else 3218 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3219 } else { 3220 /* Connected case */ 3221 v6dst = connp->conn_faddr_v6; 3222 dstport = connp->conn_fport; 3223 flowinfo = connp->conn_flowinfo; 3224 } 3225 mutex_exit(&connp->conn_lock); 3226 3227 /* Handle IPV6_PKTINFO setting source address. */ 3228 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 3229 (ipp->ipp_fields & IPPF_ADDR)) { 3230 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3231 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3232 v6src = ipp->ipp_addr; 3233 } else { 3234 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3235 v6src = ipp->ipp_addr; 3236 } 3237 } 3238 3239 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3240 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3241 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3242 3243 switch (error) { 3244 case 0: 3245 break; 3246 case EADDRNOTAVAIL: 3247 /* 3248 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3249 * Don't have the application see that errno 3250 */ 3251 error = ENETUNREACH; 3252 goto failed; 3253 case ENETDOWN: 3254 /* 3255 * Have !ipif_addr_ready address; drop packet silently 3256 * until we can get applications to not send until we 3257 * are ready. 3258 */ 3259 error = 0; 3260 goto failed; 3261 case EHOSTUNREACH: 3262 case ENETUNREACH: 3263 if (ixa->ixa_ire != NULL) { 3264 /* 3265 * Let conn_ip_output/ire_send_noroute return 3266 * the error and send any local ICMP error. 3267 */ 3268 error = 0; 3269 break; 3270 } 3271 /* FALLTHRU */ 3272 default: 3273 failed: 3274 freemsg(mp); 3275 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3276 goto done; 3277 } 3278 3279 /* 3280 * We might be going to a different destination than last time, 3281 * thus check that TX allows the communication and compute any 3282 * needed label. 3283 * 3284 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3285 * don't have to worry about concurrent threads. 3286 */ 3287 if (is_system_labeled()) { 3288 /* Using UDP MLP requires SCM_UCRED from user */ 3289 if (connp->conn_mlp_type != mlptSingle && 3290 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3291 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3292 error = ECONNREFUSED; 3293 freemsg(mp); 3294 goto done; 3295 } 3296 /* 3297 * Check whether Trusted Solaris policy allows communication 3298 * with this host, and pretend that the destination is 3299 * unreachable if not. 3300 * Compute any needed label and place it in ipp_label_v4/v6. 3301 * 3302 * Later conn_build_hdr_template/conn_prepend_hdr takes 3303 * ipp_label_v4/v6 to form the packet. 3304 * 3305 * Tsol note: We have ipp structure local to this thread so 3306 * no locking is needed. 3307 */ 3308 error = conn_update_label(connp, ixa, &v6dst, ipp); 3309 if (error != 0) { 3310 freemsg(mp); 3311 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3312 goto done; 3313 } 3314 } 3315 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3316 flowinfo, mp, &error); 3317 if (mp == NULL) { 3318 ASSERT(error != 0); 3319 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3320 goto done; 3321 } 3322 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3323 error = EMSGSIZE; 3324 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3325 freemsg(mp); 3326 goto done; 3327 } 3328 /* We're done. Pass the packet to ip. */ 3329 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3330 3331 error = conn_ip_output(mp, ixa); 3332 /* No udpOutErrors if an error since IP increases its error counter */ 3333 switch (error) { 3334 case 0: 3335 break; 3336 case EWOULDBLOCK: 3337 (void) ixa_check_drain_insert(connp, ixa); 3338 error = 0; 3339 break; 3340 case EADDRNOTAVAIL: 3341 /* 3342 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3343 * Don't have the application see that errno 3344 */ 3345 error = ENETUNREACH; 3346 /* FALLTHRU */ 3347 default: 3348 mutex_enter(&connp->conn_lock); 3349 /* 3350 * Clear the source and v6lastdst so we call ip_attr_connect 3351 * for the next packet and try to pick a better source. 3352 */ 3353 if (connp->conn_mcbc_bind) 3354 connp->conn_saddr_v6 = ipv6_all_zeros; 3355 else 3356 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3357 connp->conn_v6lastdst = ipv6_all_zeros; 3358 mutex_exit(&connp->conn_lock); 3359 break; 3360 } 3361 done: 3362 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3363 ixa->ixa_cpid = connp->conn_cpid; 3364 ixa_refrele(ixa); 3365 ip_pkt_free(ipp); 3366 kmem_free(ipp, sizeof (*ipp)); 3367 return (error); 3368 } 3369 3370 /* 3371 * Handle sending an M_DATA for a connected socket. 3372 * Handles both IPv4 and IPv6. 3373 */ 3374 static int 3375 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3376 { 3377 udp_t *udp = connp->conn_udp; 3378 udp_stack_t *us = udp->udp_us; 3379 int error; 3380 ip_xmit_attr_t *ixa; 3381 3382 /* 3383 * If no other thread is using conn_ixa this just gets a reference to 3384 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3385 */ 3386 ixa = conn_get_ixa(connp, B_FALSE); 3387 if (ixa == NULL) { 3388 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3389 freemsg(mp); 3390 return (ENOMEM); 3391 } 3392 3393 ASSERT(cr != NULL); 3394 ixa->ixa_cred = cr; 3395 ixa->ixa_cpid = pid; 3396 3397 mutex_enter(&connp->conn_lock); 3398 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3399 connp->conn_fport, connp->conn_flowinfo, &error); 3400 3401 if (mp == NULL) { 3402 ASSERT(error != 0); 3403 mutex_exit(&connp->conn_lock); 3404 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3405 ixa->ixa_cpid = connp->conn_cpid; 3406 ixa_refrele(ixa); 3407 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3408 freemsg(mp); 3409 return (error); 3410 } 3411 3412 /* 3413 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3414 * safe copy, then we need to fill in any pointers in it. 3415 */ 3416 if (ixa->ixa_ire == NULL) { 3417 in6_addr_t faddr, saddr; 3418 in6_addr_t nexthop; 3419 in_port_t fport; 3420 3421 saddr = connp->conn_saddr_v6; 3422 faddr = connp->conn_faddr_v6; 3423 fport = connp->conn_fport; 3424 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3425 mutex_exit(&connp->conn_lock); 3426 3427 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3428 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3429 IPDF_IPSEC); 3430 switch (error) { 3431 case 0: 3432 break; 3433 case EADDRNOTAVAIL: 3434 /* 3435 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3436 * Don't have the application see that errno 3437 */ 3438 error = ENETUNREACH; 3439 goto failed; 3440 case ENETDOWN: 3441 /* 3442 * Have !ipif_addr_ready address; drop packet silently 3443 * until we can get applications to not send until we 3444 * are ready. 3445 */ 3446 error = 0; 3447 goto failed; 3448 case EHOSTUNREACH: 3449 case ENETUNREACH: 3450 if (ixa->ixa_ire != NULL) { 3451 /* 3452 * Let conn_ip_output/ire_send_noroute return 3453 * the error and send any local ICMP error. 3454 */ 3455 error = 0; 3456 break; 3457 } 3458 /* FALLTHRU */ 3459 default: 3460 failed: 3461 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3462 ixa->ixa_cpid = connp->conn_cpid; 3463 ixa_refrele(ixa); 3464 freemsg(mp); 3465 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3466 return (error); 3467 } 3468 } else { 3469 /* Done with conn_t */ 3470 mutex_exit(&connp->conn_lock); 3471 } 3472 ASSERT(ixa->ixa_ire != NULL); 3473 3474 /* We're done. Pass the packet to ip. */ 3475 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3476 3477 error = conn_ip_output(mp, ixa); 3478 /* No udpOutErrors if an error since IP increases its error counter */ 3479 switch (error) { 3480 case 0: 3481 break; 3482 case EWOULDBLOCK: 3483 (void) ixa_check_drain_insert(connp, ixa); 3484 error = 0; 3485 break; 3486 case EADDRNOTAVAIL: 3487 /* 3488 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3489 * Don't have the application see that errno 3490 */ 3491 error = ENETUNREACH; 3492 break; 3493 } 3494 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3495 ixa->ixa_cpid = connp->conn_cpid; 3496 ixa_refrele(ixa); 3497 return (error); 3498 } 3499 3500 /* 3501 * Handle sending an M_DATA to the last destination. 3502 * Handles both IPv4 and IPv6. 3503 * 3504 * NOTE: The caller must hold conn_lock and we drop it here. 3505 */ 3506 static int 3507 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3508 ip_xmit_attr_t *ixa) 3509 { 3510 udp_t *udp = connp->conn_udp; 3511 udp_stack_t *us = udp->udp_us; 3512 int error; 3513 3514 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3515 ASSERT(ixa != NULL); 3516 3517 ASSERT(cr != NULL); 3518 ixa->ixa_cred = cr; 3519 ixa->ixa_cpid = pid; 3520 3521 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3522 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3523 3524 if (mp == NULL) { 3525 ASSERT(error != 0); 3526 mutex_exit(&connp->conn_lock); 3527 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3528 ixa->ixa_cpid = connp->conn_cpid; 3529 ixa_refrele(ixa); 3530 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3531 freemsg(mp); 3532 return (error); 3533 } 3534 3535 /* 3536 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3537 * safe copy, then we need to fill in any pointers in it. 3538 */ 3539 if (ixa->ixa_ire == NULL) { 3540 in6_addr_t lastdst, lastsrc; 3541 in6_addr_t nexthop; 3542 in_port_t lastport; 3543 3544 lastsrc = connp->conn_v6lastsrc; 3545 lastdst = connp->conn_v6lastdst; 3546 lastport = connp->conn_lastdstport; 3547 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3548 mutex_exit(&connp->conn_lock); 3549 3550 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3551 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3552 IPDF_VERIFY_DST | IPDF_IPSEC); 3553 switch (error) { 3554 case 0: 3555 break; 3556 case EADDRNOTAVAIL: 3557 /* 3558 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3559 * Don't have the application see that errno 3560 */ 3561 error = ENETUNREACH; 3562 goto failed; 3563 case ENETDOWN: 3564 /* 3565 * Have !ipif_addr_ready address; drop packet silently 3566 * until we can get applications to not send until we 3567 * are ready. 3568 */ 3569 error = 0; 3570 goto failed; 3571 case EHOSTUNREACH: 3572 case ENETUNREACH: 3573 if (ixa->ixa_ire != NULL) { 3574 /* 3575 * Let conn_ip_output/ire_send_noroute return 3576 * the error and send any local ICMP error. 3577 */ 3578 error = 0; 3579 break; 3580 } 3581 /* FALLTHRU */ 3582 default: 3583 failed: 3584 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3585 ixa->ixa_cpid = connp->conn_cpid; 3586 ixa_refrele(ixa); 3587 freemsg(mp); 3588 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3589 return (error); 3590 } 3591 } else { 3592 /* Done with conn_t */ 3593 mutex_exit(&connp->conn_lock); 3594 } 3595 3596 /* We're done. Pass the packet to ip. */ 3597 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3598 3599 error = conn_ip_output(mp, ixa); 3600 /* No udpOutErrors if an error since IP increases its error counter */ 3601 switch (error) { 3602 case 0: 3603 break; 3604 case EWOULDBLOCK: 3605 (void) ixa_check_drain_insert(connp, ixa); 3606 error = 0; 3607 break; 3608 case EADDRNOTAVAIL: 3609 /* 3610 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3611 * Don't have the application see that errno 3612 */ 3613 error = ENETUNREACH; 3614 /* FALLTHRU */ 3615 default: 3616 mutex_enter(&connp->conn_lock); 3617 /* 3618 * Clear the source and v6lastdst so we call ip_attr_connect 3619 * for the next packet and try to pick a better source. 3620 */ 3621 if (connp->conn_mcbc_bind) 3622 connp->conn_saddr_v6 = ipv6_all_zeros; 3623 else 3624 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3625 connp->conn_v6lastdst = ipv6_all_zeros; 3626 mutex_exit(&connp->conn_lock); 3627 break; 3628 } 3629 ixa->ixa_cred = connp->conn_cred; /* Restore */ 3630 ixa->ixa_cpid = connp->conn_cpid; 3631 ixa_refrele(ixa); 3632 return (error); 3633 } 3634 3635 3636 /* 3637 * Prepend the header template and then fill in the source and 3638 * flowinfo. The caller needs to handle the destination address since 3639 * it's setting is different if rthdr or source route. 3640 * 3641 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3642 * When it returns NULL it sets errorp. 3643 */ 3644 static mblk_t * 3645 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3646 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3647 { 3648 udp_t *udp = connp->conn_udp; 3649 udp_stack_t *us = udp->udp_us; 3650 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3651 uint_t pktlen; 3652 uint_t alloclen; 3653 uint_t copylen; 3654 uint8_t *iph; 3655 uint_t ip_hdr_length; 3656 udpha_t *udpha; 3657 uint32_t cksum; 3658 ip_pkt_t *ipp; 3659 3660 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3661 3662 /* 3663 * Copy the header template and leave space for an SPI 3664 */ 3665 copylen = connp->conn_ht_iphc_len; 3666 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3667 pktlen = alloclen + msgdsize(mp); 3668 if (pktlen > IP_MAXPACKET) { 3669 freemsg(mp); 3670 *errorp = EMSGSIZE; 3671 return (NULL); 3672 } 3673 ixa->ixa_pktlen = pktlen; 3674 3675 /* check/fix buffer config, setup pointers into it */ 3676 iph = mp->b_rptr - alloclen; 3677 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3678 mblk_t *mp1; 3679 3680 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3681 if (mp1 == NULL) { 3682 freemsg(mp); 3683 *errorp = ENOMEM; 3684 return (NULL); 3685 } 3686 mp1->b_wptr = DB_LIM(mp1); 3687 mp1->b_cont = mp; 3688 mp = mp1; 3689 iph = (mp->b_wptr - alloclen); 3690 } 3691 mp->b_rptr = iph; 3692 bcopy(connp->conn_ht_iphc, iph, copylen); 3693 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3694 3695 ixa->ixa_ip_hdr_length = ip_hdr_length; 3696 udpha = (udpha_t *)(iph + ip_hdr_length); 3697 3698 /* 3699 * Setup header length and prepare for ULP checksum done in IP. 3700 * udp_build_hdr_template has already massaged any routing header 3701 * and placed the result in conn_sum. 3702 * 3703 * We make it easy for IP to include our pseudo header 3704 * by putting our length in uha_checksum. 3705 */ 3706 cksum = pktlen - ip_hdr_length; 3707 udpha->uha_length = htons(cksum); 3708 3709 cksum += connp->conn_sum; 3710 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3711 ASSERT(cksum < 0x10000); 3712 3713 ipp = &connp->conn_xmit_ipp; 3714 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3715 ipha_t *ipha = (ipha_t *)iph; 3716 3717 ipha->ipha_length = htons((uint16_t)pktlen); 3718 3719 /* IP does the checksum if uha_checksum is non-zero */ 3720 if (us->us_do_checksum) 3721 udpha->uha_checksum = htons(cksum); 3722 3723 /* if IP_PKTINFO specified an addres it wins over bind() */ 3724 if ((ipp->ipp_fields & IPPF_ADDR) && 3725 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3726 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3727 ipha->ipha_src = ipp->ipp_addr_v4; 3728 } else { 3729 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3730 } 3731 } else { 3732 ip6_t *ip6h = (ip6_t *)iph; 3733 3734 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3735 udpha->uha_checksum = htons(cksum); 3736 3737 /* if IP_PKTINFO specified an addres it wins over bind() */ 3738 if ((ipp->ipp_fields & IPPF_ADDR) && 3739 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3740 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3741 ip6h->ip6_src = ipp->ipp_addr; 3742 } else { 3743 ip6h->ip6_src = *v6src; 3744 } 3745 ip6h->ip6_vcf = 3746 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3747 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3748 if (ipp->ipp_fields & IPPF_TCLASS) { 3749 /* Overrides the class part of flowinfo */ 3750 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3751 ipp->ipp_tclass); 3752 } 3753 } 3754 3755 /* Insert all-0s SPI now. */ 3756 if (insert_spi) 3757 *((uint32_t *)(udpha + 1)) = 0; 3758 3759 udpha->uha_dst_port = dstport; 3760 return (mp); 3761 } 3762 3763 /* 3764 * Send a T_UDERR_IND in response to an M_DATA 3765 */ 3766 static void 3767 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3768 { 3769 struct sockaddr_storage ss; 3770 sin_t *sin; 3771 sin6_t *sin6; 3772 struct sockaddr *addr; 3773 socklen_t addrlen; 3774 mblk_t *mp1; 3775 3776 mutex_enter(&connp->conn_lock); 3777 /* Initialize addr and addrlen as if they're passed in */ 3778 if (connp->conn_family == AF_INET) { 3779 sin = (sin_t *)&ss; 3780 *sin = sin_null; 3781 sin->sin_family = AF_INET; 3782 sin->sin_port = connp->conn_fport; 3783 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3784 addr = (struct sockaddr *)sin; 3785 addrlen = sizeof (*sin); 3786 } else { 3787 sin6 = (sin6_t *)&ss; 3788 *sin6 = sin6_null; 3789 sin6->sin6_family = AF_INET6; 3790 sin6->sin6_port = connp->conn_fport; 3791 sin6->sin6_flowinfo = connp->conn_flowinfo; 3792 sin6->sin6_addr = connp->conn_faddr_v6; 3793 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3794 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3795 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3796 } else { 3797 sin6->sin6_scope_id = 0; 3798 } 3799 sin6->__sin6_src_id = 0; 3800 addr = (struct sockaddr *)sin6; 3801 addrlen = sizeof (*sin6); 3802 } 3803 mutex_exit(&connp->conn_lock); 3804 3805 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3806 if (mp1 != NULL) 3807 putnext(connp->conn_rq, mp1); 3808 } 3809 3810 /* 3811 * This routine handles all messages passed downstream. It either 3812 * consumes the message or passes it downstream; it never queues a 3813 * a message. 3814 * 3815 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3816 * is valid when we are directly beneath the stream head, and thus sockfs 3817 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3818 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3819 * connected endpoints. 3820 */ 3821 void 3822 udp_wput(queue_t *q, mblk_t *mp) 3823 { 3824 sin6_t *sin6; 3825 sin_t *sin = NULL; 3826 uint_t srcid; 3827 conn_t *connp = Q_TO_CONN(q); 3828 udp_t *udp = connp->conn_udp; 3829 int error = 0; 3830 struct sockaddr *addr = NULL; 3831 socklen_t addrlen; 3832 udp_stack_t *us = udp->udp_us; 3833 struct T_unitdata_req *tudr; 3834 mblk_t *data_mp; 3835 ushort_t ipversion; 3836 cred_t *cr; 3837 pid_t pid; 3838 3839 /* 3840 * We directly handle several cases here: T_UNITDATA_REQ message 3841 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3842 * socket. 3843 */ 3844 switch (DB_TYPE(mp)) { 3845 case M_DATA: 3846 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3847 /* Not connected; address is required */ 3848 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3849 UDP_DBGSTAT(us, udp_data_notconn); 3850 UDP_STAT(us, udp_out_err_notconn); 3851 freemsg(mp); 3852 return; 3853 } 3854 /* 3855 * All Solaris components should pass a db_credp 3856 * for this message, hence we ASSERT. 3857 * On production kernels we return an error to be robust against 3858 * random streams modules sitting on top of us. 3859 */ 3860 cr = msg_getcred(mp, &pid); 3861 ASSERT(cr != NULL); 3862 if (cr == NULL) { 3863 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3864 freemsg(mp); 3865 return; 3866 } 3867 ASSERT(udp->udp_issocket); 3868 UDP_DBGSTAT(us, udp_data_conn); 3869 error = udp_output_connected(connp, mp, cr, pid); 3870 if (error != 0) { 3871 UDP_STAT(us, udp_out_err_output); 3872 if (connp->conn_rq != NULL) 3873 udp_ud_err_connected(connp, (t_scalar_t)error); 3874 #ifdef DEBUG 3875 printf("udp_output_connected returned %d\n", error); 3876 #endif 3877 } 3878 return; 3879 3880 case M_PROTO: 3881 case M_PCPROTO: 3882 tudr = (struct T_unitdata_req *)mp->b_rptr; 3883 if (MBLKL(mp) < sizeof (*tudr) || 3884 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3885 udp_wput_other(q, mp); 3886 return; 3887 } 3888 break; 3889 3890 default: 3891 udp_wput_other(q, mp); 3892 return; 3893 } 3894 3895 /* Handle valid T_UNITDATA_REQ here */ 3896 data_mp = mp->b_cont; 3897 if (data_mp == NULL) { 3898 error = EPROTO; 3899 goto ud_error2; 3900 } 3901 mp->b_cont = NULL; 3902 3903 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3904 error = EADDRNOTAVAIL; 3905 goto ud_error2; 3906 } 3907 3908 /* 3909 * All Solaris components should pass a db_credp 3910 * for this TPI message, hence we should ASSERT. 3911 * However, RPC (svc_clts_ksend) does this odd thing where it 3912 * passes the options from a T_UNITDATA_IND unchanged in a 3913 * T_UNITDATA_REQ. While that is the right thing to do for 3914 * some options, SCM_UCRED being the key one, this also makes it 3915 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3916 */ 3917 cr = msg_getcred(mp, &pid); 3918 if (cr == NULL) { 3919 cr = connp->conn_cred; 3920 pid = connp->conn_cpid; 3921 } 3922 3923 /* 3924 * If a port has not been bound to the stream, fail. 3925 * This is not a problem when sockfs is directly 3926 * above us, because it will ensure that the socket 3927 * is first bound before allowing data to be sent. 3928 */ 3929 if (udp->udp_state == TS_UNBND) { 3930 error = EPROTO; 3931 goto ud_error2; 3932 } 3933 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3934 addrlen = tudr->DEST_length; 3935 3936 switch (connp->conn_family) { 3937 case AF_INET6: 3938 sin6 = (sin6_t *)addr; 3939 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3940 (sin6->sin6_family != AF_INET6)) { 3941 error = EADDRNOTAVAIL; 3942 goto ud_error2; 3943 } 3944 3945 srcid = sin6->__sin6_src_id; 3946 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3947 /* 3948 * Destination is a non-IPv4-compatible IPv6 address. 3949 * Send out an IPv6 format packet. 3950 */ 3951 3952 /* 3953 * If the local address is a mapped address return 3954 * an error. 3955 * It would be possible to send an IPv6 packet but the 3956 * response would never make it back to the application 3957 * since it is bound to a mapped address. 3958 */ 3959 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3960 error = EADDRNOTAVAIL; 3961 goto ud_error2; 3962 } 3963 3964 UDP_DBGSTAT(us, udp_out_ipv6); 3965 3966 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3967 sin6->sin6_addr = ipv6_loopback; 3968 ipversion = IPV6_VERSION; 3969 } else { 3970 if (connp->conn_ipv6_v6only) { 3971 error = EADDRNOTAVAIL; 3972 goto ud_error2; 3973 } 3974 3975 /* 3976 * If the local address is not zero or a mapped address 3977 * return an error. It would be possible to send an 3978 * IPv4 packet but the response would never make it 3979 * back to the application since it is bound to a 3980 * non-mapped address. 3981 */ 3982 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3983 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3984 error = EADDRNOTAVAIL; 3985 goto ud_error2; 3986 } 3987 UDP_DBGSTAT(us, udp_out_mapped); 3988 3989 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3990 V4_PART_OF_V6(sin6->sin6_addr) = 3991 htonl(INADDR_LOOPBACK); 3992 } 3993 ipversion = IPV4_VERSION; 3994 } 3995 3996 if (tudr->OPT_length != 0) { 3997 /* 3998 * If we are connected then the destination needs to be 3999 * the same as the connected one. 4000 */ 4001 if (udp->udp_state == TS_DATA_XFER && 4002 !conn_same_as_last_v6(connp, sin6)) { 4003 error = EISCONN; 4004 goto ud_error2; 4005 } 4006 UDP_STAT(us, udp_out_opt); 4007 error = udp_output_ancillary(connp, NULL, sin6, 4008 data_mp, mp, NULL, cr, pid); 4009 } else { 4010 ip_xmit_attr_t *ixa; 4011 4012 /* 4013 * We have to allocate an ip_xmit_attr_t before we grab 4014 * conn_lock and we need to hold conn_lock once we've 4015 * checked conn_same_as_last_v6 to handle concurrent 4016 * send* calls on a socket. 4017 */ 4018 ixa = conn_get_ixa(connp, B_FALSE); 4019 if (ixa == NULL) { 4020 error = ENOMEM; 4021 goto ud_error2; 4022 } 4023 mutex_enter(&connp->conn_lock); 4024 4025 if (conn_same_as_last_v6(connp, sin6) && 4026 connp->conn_lastsrcid == srcid && 4027 ipsec_outbound_policy_current(ixa)) { 4028 UDP_DBGSTAT(us, udp_out_lastdst); 4029 /* udp_output_lastdst drops conn_lock */ 4030 error = udp_output_lastdst(connp, data_mp, cr, 4031 pid, ixa); 4032 } else { 4033 UDP_DBGSTAT(us, udp_out_diffdst); 4034 /* udp_output_newdst drops conn_lock */ 4035 error = udp_output_newdst(connp, data_mp, NULL, 4036 sin6, ipversion, cr, pid, ixa); 4037 } 4038 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4039 } 4040 if (error == 0) { 4041 freeb(mp); 4042 return; 4043 } 4044 break; 4045 4046 case AF_INET: 4047 sin = (sin_t *)addr; 4048 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 4049 (sin->sin_family != AF_INET)) { 4050 error = EADDRNOTAVAIL; 4051 goto ud_error2; 4052 } 4053 UDP_DBGSTAT(us, udp_out_ipv4); 4054 if (sin->sin_addr.s_addr == INADDR_ANY) 4055 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 4056 ipversion = IPV4_VERSION; 4057 4058 srcid = 0; 4059 if (tudr->OPT_length != 0) { 4060 /* 4061 * If we are connected then the destination needs to be 4062 * the same as the connected one. 4063 */ 4064 if (udp->udp_state == TS_DATA_XFER && 4065 !conn_same_as_last_v4(connp, sin)) { 4066 error = EISCONN; 4067 goto ud_error2; 4068 } 4069 UDP_STAT(us, udp_out_opt); 4070 error = udp_output_ancillary(connp, sin, NULL, 4071 data_mp, mp, NULL, cr, pid); 4072 } else { 4073 ip_xmit_attr_t *ixa; 4074 4075 /* 4076 * We have to allocate an ip_xmit_attr_t before we grab 4077 * conn_lock and we need to hold conn_lock once we've 4078 * checked conn_same_as_last_v4 to handle concurrent 4079 * send* calls on a socket. 4080 */ 4081 ixa = conn_get_ixa(connp, B_FALSE); 4082 if (ixa == NULL) { 4083 error = ENOMEM; 4084 goto ud_error2; 4085 } 4086 mutex_enter(&connp->conn_lock); 4087 4088 if (conn_same_as_last_v4(connp, sin) && 4089 ipsec_outbound_policy_current(ixa)) { 4090 UDP_DBGSTAT(us, udp_out_lastdst); 4091 /* udp_output_lastdst drops conn_lock */ 4092 error = udp_output_lastdst(connp, data_mp, cr, 4093 pid, ixa); 4094 } else { 4095 UDP_DBGSTAT(us, udp_out_diffdst); 4096 /* udp_output_newdst drops conn_lock */ 4097 error = udp_output_newdst(connp, data_mp, sin, 4098 NULL, ipversion, cr, pid, ixa); 4099 } 4100 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4101 } 4102 if (error == 0) { 4103 freeb(mp); 4104 return; 4105 } 4106 break; 4107 } 4108 UDP_STAT(us, udp_out_err_output); 4109 ASSERT(mp != NULL); 4110 /* mp is freed by the following routine */ 4111 udp_ud_err(q, mp, (t_scalar_t)error); 4112 return; 4113 4114 ud_error2: 4115 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4116 freemsg(data_mp); 4117 UDP_STAT(us, udp_out_err_output); 4118 ASSERT(mp != NULL); 4119 /* mp is freed by the following routine */ 4120 udp_ud_err(q, mp, (t_scalar_t)error); 4121 } 4122 4123 /* 4124 * Handle the case of the IP address, port, flow label being different 4125 * for both IPv4 and IPv6. 4126 * 4127 * NOTE: The caller must hold conn_lock and we drop it here. 4128 */ 4129 static int 4130 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 4131 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 4132 { 4133 uint_t srcid; 4134 uint32_t flowinfo; 4135 udp_t *udp = connp->conn_udp; 4136 int error = 0; 4137 ip_xmit_attr_t *oldixa; 4138 udp_stack_t *us = udp->udp_us; 4139 in6_addr_t v6src; 4140 in6_addr_t v6dst; 4141 in6_addr_t v6nexthop; 4142 in_port_t dstport; 4143 4144 ASSERT(MUTEX_HELD(&connp->conn_lock)); 4145 ASSERT(ixa != NULL); 4146 /* 4147 * We hold conn_lock across all the use and modifications of 4148 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 4149 * stay consistent. 4150 */ 4151 4152 ASSERT(cr != NULL); 4153 ixa->ixa_cred = cr; 4154 ixa->ixa_cpid = pid; 4155 if (is_system_labeled()) { 4156 /* We need to restart with a label based on the cred */ 4157 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 4158 } 4159 4160 /* 4161 * If we are connected then the destination needs to be the 4162 * same as the connected one, which is not the case here since we 4163 * checked for that above. 4164 */ 4165 if (udp->udp_state == TS_DATA_XFER) { 4166 mutex_exit(&connp->conn_lock); 4167 error = EISCONN; 4168 goto ud_error; 4169 } 4170 4171 /* In case previous destination was multicast or multirt */ 4172 ip_attr_newdst(ixa); 4173 4174 /* 4175 * If laddr is unspecified then we look at sin6_src_id. 4176 * We will give precedence to a source address set with IPV6_PKTINFO 4177 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4178 * want ip_attr_connect to select a source (since it can fail) when 4179 * IPV6_PKTINFO is specified. 4180 * If this doesn't result in a source address then we get a source 4181 * from ip_attr_connect() below. 4182 */ 4183 v6src = connp->conn_saddr_v6; 4184 if (sin != NULL) { 4185 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4186 dstport = sin->sin_port; 4187 flowinfo = 0; 4188 srcid = 0; 4189 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4190 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4191 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4192 connp->conn_netstack); 4193 } 4194 ixa->ixa_flags |= IXAF_IS_IPV4; 4195 } else { 4196 v6dst = sin6->sin6_addr; 4197 dstport = sin6->sin6_port; 4198 flowinfo = sin6->sin6_flowinfo; 4199 srcid = sin6->__sin6_src_id; 4200 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4201 ixa->ixa_scopeid = sin6->sin6_scope_id; 4202 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4203 } else { 4204 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4205 } 4206 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4207 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4208 connp->conn_netstack); 4209 } 4210 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4211 ixa->ixa_flags |= IXAF_IS_IPV4; 4212 else 4213 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4214 } 4215 /* Handle IPV6_PKTINFO setting source address. */ 4216 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 4217 (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { 4218 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4219 4220 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4221 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4222 v6src = ipp->ipp_addr; 4223 } else { 4224 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4225 v6src = ipp->ipp_addr; 4226 } 4227 } 4228 4229 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4230 mutex_exit(&connp->conn_lock); 4231 4232 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4233 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4234 switch (error) { 4235 case 0: 4236 break; 4237 case EADDRNOTAVAIL: 4238 /* 4239 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4240 * Don't have the application see that errno 4241 */ 4242 error = ENETUNREACH; 4243 goto failed; 4244 case ENETDOWN: 4245 /* 4246 * Have !ipif_addr_ready address; drop packet silently 4247 * until we can get applications to not send until we 4248 * are ready. 4249 */ 4250 error = 0; 4251 goto failed; 4252 case EHOSTUNREACH: 4253 case ENETUNREACH: 4254 if (ixa->ixa_ire != NULL) { 4255 /* 4256 * Let conn_ip_output/ire_send_noroute return 4257 * the error and send any local ICMP error. 4258 */ 4259 error = 0; 4260 break; 4261 } 4262 /* FALLTHRU */ 4263 failed: 4264 default: 4265 goto ud_error; 4266 } 4267 4268 4269 /* 4270 * Cluster note: we let the cluster hook know that we are sending to a 4271 * new address and/or port. 4272 */ 4273 if (cl_inet_connect2 != NULL) { 4274 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4275 if (error != 0) { 4276 error = EHOSTUNREACH; 4277 goto ud_error; 4278 } 4279 } 4280 4281 mutex_enter(&connp->conn_lock); 4282 /* 4283 * While we dropped the lock some other thread might have connected 4284 * this socket. If so we bail out with EISCONN to ensure that the 4285 * connecting thread is the one that updates conn_ixa, conn_ht_* 4286 * and conn_*last*. 4287 */ 4288 if (udp->udp_state == TS_DATA_XFER) { 4289 mutex_exit(&connp->conn_lock); 4290 error = EISCONN; 4291 goto ud_error; 4292 } 4293 4294 /* 4295 * We need to rebuild the headers if 4296 * - we are labeling packets (could be different for different 4297 * destinations) 4298 * - we have a source route (or routing header) since we need to 4299 * massage that to get the pseudo-header checksum 4300 * - the IP version is different than the last time 4301 * - a socket option with COA_HEADER_CHANGED has been set which 4302 * set conn_v6lastdst to zero. 4303 * 4304 * Otherwise the prepend function will just update the src, dst, 4305 * dstport, and flow label. 4306 */ 4307 if (is_system_labeled()) { 4308 /* TX MLP requires SCM_UCRED and don't have that here */ 4309 if (connp->conn_mlp_type != mlptSingle) { 4310 mutex_exit(&connp->conn_lock); 4311 error = ECONNREFUSED; 4312 goto ud_error; 4313 } 4314 /* 4315 * Check whether Trusted Solaris policy allows communication 4316 * with this host, and pretend that the destination is 4317 * unreachable if not. 4318 * Compute any needed label and place it in ipp_label_v4/v6. 4319 * 4320 * Later conn_build_hdr_template/conn_prepend_hdr takes 4321 * ipp_label_v4/v6 to form the packet. 4322 * 4323 * Tsol note: Since we hold conn_lock we know no other 4324 * thread manipulates conn_xmit_ipp. 4325 */ 4326 error = conn_update_label(connp, ixa, &v6dst, 4327 &connp->conn_xmit_ipp); 4328 if (error != 0) { 4329 mutex_exit(&connp->conn_lock); 4330 goto ud_error; 4331 } 4332 /* Rebuild the header template */ 4333 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4334 flowinfo); 4335 if (error != 0) { 4336 mutex_exit(&connp->conn_lock); 4337 goto ud_error; 4338 } 4339 } else if ((connp->conn_xmit_ipp.ipp_fields & 4340 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4341 ipversion != connp->conn_lastipversion || 4342 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4343 /* Rebuild the header template */ 4344 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4345 flowinfo); 4346 if (error != 0) { 4347 mutex_exit(&connp->conn_lock); 4348 goto ud_error; 4349 } 4350 } else { 4351 /* Simply update the destination address if no source route */ 4352 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4353 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4354 4355 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4356 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4357 ipha->ipha_fragment_offset_and_flags |= 4358 IPH_DF_HTONS; 4359 } else { 4360 ipha->ipha_fragment_offset_and_flags &= 4361 ~IPH_DF_HTONS; 4362 } 4363 } else { 4364 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4365 ip6h->ip6_dst = v6dst; 4366 } 4367 } 4368 4369 /* 4370 * Remember the dst/dstport etc which corresponds to the built header 4371 * template and conn_ixa. 4372 */ 4373 oldixa = conn_replace_ixa(connp, ixa); 4374 connp->conn_v6lastdst = v6dst; 4375 connp->conn_lastipversion = ipversion; 4376 connp->conn_lastdstport = dstport; 4377 connp->conn_lastflowinfo = flowinfo; 4378 connp->conn_lastscopeid = ixa->ixa_scopeid; 4379 connp->conn_lastsrcid = srcid; 4380 /* Also remember a source to use together with lastdst */ 4381 connp->conn_v6lastsrc = v6src; 4382 4383 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4384 dstport, flowinfo, &error); 4385 4386 /* Done with conn_t */ 4387 mutex_exit(&connp->conn_lock); 4388 ixa_refrele(oldixa); 4389 4390 if (data_mp == NULL) { 4391 ASSERT(error != 0); 4392 goto ud_error; 4393 } 4394 4395 /* We're done. Pass the packet to ip. */ 4396 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4397 4398 error = conn_ip_output(data_mp, ixa); 4399 /* No udpOutErrors if an error since IP increases its error counter */ 4400 switch (error) { 4401 case 0: 4402 break; 4403 case EWOULDBLOCK: 4404 (void) ixa_check_drain_insert(connp, ixa); 4405 error = 0; 4406 break; 4407 case EADDRNOTAVAIL: 4408 /* 4409 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4410 * Don't have the application see that errno 4411 */ 4412 error = ENETUNREACH; 4413 /* FALLTHRU */ 4414 default: 4415 mutex_enter(&connp->conn_lock); 4416 /* 4417 * Clear the source and v6lastdst so we call ip_attr_connect 4418 * for the next packet and try to pick a better source. 4419 */ 4420 if (connp->conn_mcbc_bind) 4421 connp->conn_saddr_v6 = ipv6_all_zeros; 4422 else 4423 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4424 connp->conn_v6lastdst = ipv6_all_zeros; 4425 mutex_exit(&connp->conn_lock); 4426 break; 4427 } 4428 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4429 ixa->ixa_cpid = connp->conn_cpid; 4430 ixa_refrele(ixa); 4431 return (error); 4432 4433 ud_error: 4434 ixa->ixa_cred = connp->conn_cred; /* Restore */ 4435 ixa->ixa_cpid = connp->conn_cpid; 4436 ixa_refrele(ixa); 4437 4438 freemsg(data_mp); 4439 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4440 UDP_STAT(us, udp_out_err_output); 4441 return (error); 4442 } 4443 4444 /* ARGSUSED */ 4445 static void 4446 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4447 { 4448 #ifdef DEBUG 4449 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4450 #endif 4451 freemsg(mp); 4452 } 4453 4454 4455 /* 4456 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4457 */ 4458 static void 4459 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4460 { 4461 void *data; 4462 mblk_t *datamp = mp->b_cont; 4463 conn_t *connp = Q_TO_CONN(q); 4464 udp_t *udp = connp->conn_udp; 4465 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4466 4467 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4468 cmdp->cb_error = EPROTO; 4469 qreply(q, mp); 4470 return; 4471 } 4472 data = datamp->b_rptr; 4473 4474 mutex_enter(&connp->conn_lock); 4475 switch (cmdp->cb_cmd) { 4476 case TI_GETPEERNAME: 4477 if (udp->udp_state != TS_DATA_XFER) 4478 cmdp->cb_error = ENOTCONN; 4479 else 4480 cmdp->cb_error = conn_getpeername(connp, data, 4481 &cmdp->cb_len); 4482 break; 4483 case TI_GETMYNAME: 4484 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4485 break; 4486 default: 4487 cmdp->cb_error = EINVAL; 4488 break; 4489 } 4490 mutex_exit(&connp->conn_lock); 4491 4492 qreply(q, mp); 4493 } 4494 4495 static void 4496 udp_use_pure_tpi(udp_t *udp) 4497 { 4498 conn_t *connp = udp->udp_connp; 4499 4500 mutex_enter(&connp->conn_lock); 4501 udp->udp_issocket = B_FALSE; 4502 mutex_exit(&connp->conn_lock); 4503 UDP_STAT(udp->udp_us, udp_sock_fallback); 4504 } 4505 4506 static void 4507 udp_wput_other(queue_t *q, mblk_t *mp) 4508 { 4509 uchar_t *rptr = mp->b_rptr; 4510 struct iocblk *iocp; 4511 conn_t *connp = Q_TO_CONN(q); 4512 udp_t *udp = connp->conn_udp; 4513 udp_stack_t *us = udp->udp_us; 4514 cred_t *cr; 4515 4516 switch (mp->b_datap->db_type) { 4517 case M_CMD: 4518 udp_wput_cmdblk(q, mp); 4519 return; 4520 4521 case M_PROTO: 4522 case M_PCPROTO: 4523 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4524 /* 4525 * If the message does not contain a PRIM_type, 4526 * throw it away. 4527 */ 4528 freemsg(mp); 4529 return; 4530 } 4531 switch (((t_primp_t)rptr)->type) { 4532 case T_ADDR_REQ: 4533 udp_addr_req(q, mp); 4534 return; 4535 case O_T_BIND_REQ: 4536 case T_BIND_REQ: 4537 udp_tpi_bind(q, mp); 4538 return; 4539 case T_CONN_REQ: 4540 udp_tpi_connect(q, mp); 4541 return; 4542 case T_CAPABILITY_REQ: 4543 udp_capability_req(q, mp); 4544 return; 4545 case T_INFO_REQ: 4546 udp_info_req(q, mp); 4547 return; 4548 case T_UNITDATA_REQ: 4549 /* 4550 * If a T_UNITDATA_REQ gets here, the address must 4551 * be bad. Valid T_UNITDATA_REQs are handled 4552 * in udp_wput. 4553 */ 4554 udp_ud_err(q, mp, EADDRNOTAVAIL); 4555 return; 4556 case T_UNBIND_REQ: 4557 udp_tpi_unbind(q, mp); 4558 return; 4559 case T_SVR4_OPTMGMT_REQ: 4560 /* 4561 * All Solaris components should pass a db_credp 4562 * for this TPI message, hence we ASSERT. 4563 * But in case there is some other M_PROTO that looks 4564 * like a TPI message sent by some other kernel 4565 * component, we check and return an error. 4566 */ 4567 cr = msg_getcred(mp, NULL); 4568 ASSERT(cr != NULL); 4569 if (cr == NULL) { 4570 udp_err_ack(q, mp, TSYSERR, EINVAL); 4571 return; 4572 } 4573 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4574 cr)) { 4575 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4576 } 4577 return; 4578 4579 case T_OPTMGMT_REQ: 4580 /* 4581 * All Solaris components should pass a db_credp 4582 * for this TPI message, hence we ASSERT. 4583 * But in case there is some other M_PROTO that looks 4584 * like a TPI message sent by some other kernel 4585 * component, we check and return an error. 4586 */ 4587 cr = msg_getcred(mp, NULL); 4588 ASSERT(cr != NULL); 4589 if (cr == NULL) { 4590 udp_err_ack(q, mp, TSYSERR, EINVAL); 4591 return; 4592 } 4593 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4594 return; 4595 4596 case T_DISCON_REQ: 4597 udp_tpi_disconnect(q, mp); 4598 return; 4599 4600 /* The following TPI message is not supported by udp. */ 4601 case O_T_CONN_RES: 4602 case T_CONN_RES: 4603 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4604 return; 4605 4606 /* The following 3 TPI requests are illegal for udp. */ 4607 case T_DATA_REQ: 4608 case T_EXDATA_REQ: 4609 case T_ORDREL_REQ: 4610 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4611 return; 4612 default: 4613 break; 4614 } 4615 break; 4616 case M_FLUSH: 4617 if (*rptr & FLUSHW) 4618 flushq(q, FLUSHDATA); 4619 break; 4620 case M_IOCTL: 4621 iocp = (struct iocblk *)mp->b_rptr; 4622 switch (iocp->ioc_cmd) { 4623 case TI_GETPEERNAME: 4624 if (udp->udp_state != TS_DATA_XFER) { 4625 /* 4626 * If a default destination address has not 4627 * been associated with the stream, then we 4628 * don't know the peer's name. 4629 */ 4630 iocp->ioc_error = ENOTCONN; 4631 iocp->ioc_count = 0; 4632 mp->b_datap->db_type = M_IOCACK; 4633 qreply(q, mp); 4634 return; 4635 } 4636 /* FALLTHRU */ 4637 case TI_GETMYNAME: 4638 /* 4639 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4640 * need to copyin the user's strbuf structure. 4641 * Processing will continue in the M_IOCDATA case 4642 * below. 4643 */ 4644 mi_copyin(q, mp, NULL, 4645 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4646 return; 4647 case ND_SET: 4648 /* nd_getset performs the necessary checking */ 4649 case ND_GET: 4650 if (nd_getset(q, us->us_nd, mp)) { 4651 qreply(q, mp); 4652 return; 4653 } 4654 break; 4655 case _SIOCSOCKFALLBACK: 4656 /* 4657 * Either sockmod is about to be popped and the 4658 * socket would now be treated as a plain stream, 4659 * or a module is about to be pushed so we have 4660 * to follow pure TPI semantics. 4661 */ 4662 if (!udp->udp_issocket) { 4663 DB_TYPE(mp) = M_IOCNAK; 4664 iocp->ioc_error = EINVAL; 4665 } else { 4666 udp_use_pure_tpi(udp); 4667 4668 DB_TYPE(mp) = M_IOCACK; 4669 iocp->ioc_error = 0; 4670 } 4671 iocp->ioc_count = 0; 4672 iocp->ioc_rval = 0; 4673 qreply(q, mp); 4674 return; 4675 default: 4676 break; 4677 } 4678 break; 4679 case M_IOCDATA: 4680 udp_wput_iocdata(q, mp); 4681 return; 4682 default: 4683 /* Unrecognized messages are passed through without change. */ 4684 break; 4685 } 4686 ip_wput_nondata(q, mp); 4687 } 4688 4689 /* 4690 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4691 * messages. 4692 */ 4693 static void 4694 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4695 { 4696 mblk_t *mp1; 4697 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4698 STRUCT_HANDLE(strbuf, sb); 4699 uint_t addrlen; 4700 conn_t *connp = Q_TO_CONN(q); 4701 udp_t *udp = connp->conn_udp; 4702 4703 /* Make sure it is one of ours. */ 4704 switch (iocp->ioc_cmd) { 4705 case TI_GETMYNAME: 4706 case TI_GETPEERNAME: 4707 break; 4708 default: 4709 ip_wput_nondata(q, mp); 4710 return; 4711 } 4712 4713 switch (mi_copy_state(q, mp, &mp1)) { 4714 case -1: 4715 return; 4716 case MI_COPY_CASE(MI_COPY_IN, 1): 4717 break; 4718 case MI_COPY_CASE(MI_COPY_OUT, 1): 4719 /* 4720 * The address has been copied out, so now 4721 * copyout the strbuf. 4722 */ 4723 mi_copyout(q, mp); 4724 return; 4725 case MI_COPY_CASE(MI_COPY_OUT, 2): 4726 /* 4727 * The address and strbuf have been copied out. 4728 * We're done, so just acknowledge the original 4729 * M_IOCTL. 4730 */ 4731 mi_copy_done(q, mp, 0); 4732 return; 4733 default: 4734 /* 4735 * Something strange has happened, so acknowledge 4736 * the original M_IOCTL with an EPROTO error. 4737 */ 4738 mi_copy_done(q, mp, EPROTO); 4739 return; 4740 } 4741 4742 /* 4743 * Now we have the strbuf structure for TI_GETMYNAME 4744 * and TI_GETPEERNAME. Next we copyout the requested 4745 * address and then we'll copyout the strbuf. 4746 */ 4747 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4748 4749 if (connp->conn_family == AF_INET) 4750 addrlen = sizeof (sin_t); 4751 else 4752 addrlen = sizeof (sin6_t); 4753 4754 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4755 mi_copy_done(q, mp, EINVAL); 4756 return; 4757 } 4758 4759 switch (iocp->ioc_cmd) { 4760 case TI_GETMYNAME: 4761 break; 4762 case TI_GETPEERNAME: 4763 if (udp->udp_state != TS_DATA_XFER) { 4764 mi_copy_done(q, mp, ENOTCONN); 4765 return; 4766 } 4767 break; 4768 } 4769 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4770 if (!mp1) 4771 return; 4772 4773 STRUCT_FSET(sb, len, addrlen); 4774 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4775 case TI_GETMYNAME: 4776 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4777 &addrlen); 4778 break; 4779 case TI_GETPEERNAME: 4780 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4781 &addrlen); 4782 break; 4783 } 4784 mp1->b_wptr += addrlen; 4785 /* Copy out the address */ 4786 mi_copyout(q, mp); 4787 } 4788 4789 void 4790 udp_ddi_g_init(void) 4791 { 4792 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4793 udp_opt_obj.odb_opt_arr_cnt); 4794 4795 /* 4796 * We want to be informed each time a stack is created or 4797 * destroyed in the kernel, so we can maintain the 4798 * set of udp_stack_t's. 4799 */ 4800 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4801 } 4802 4803 void 4804 udp_ddi_g_destroy(void) 4805 { 4806 netstack_unregister(NS_UDP); 4807 } 4808 4809 #define INET_NAME "ip" 4810 4811 /* 4812 * Initialize the UDP stack instance. 4813 */ 4814 static void * 4815 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4816 { 4817 udp_stack_t *us; 4818 udpparam_t *pa; 4819 int i; 4820 int error = 0; 4821 major_t major; 4822 4823 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4824 us->us_netstack = ns; 4825 4826 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4827 us->us_epriv_ports[0] = 2049; 4828 us->us_epriv_ports[1] = 4045; 4829 4830 /* 4831 * The smallest anonymous port in the priviledged port range which UDP 4832 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4833 */ 4834 us->us_min_anonpriv_port = 512; 4835 4836 us->us_bind_fanout_size = udp_bind_fanout_size; 4837 4838 /* Roundup variable that might have been modified in /etc/system */ 4839 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4840 /* Not a power of two. Round up to nearest power of two */ 4841 for (i = 0; i < 31; i++) { 4842 if (us->us_bind_fanout_size < (1 << i)) 4843 break; 4844 } 4845 us->us_bind_fanout_size = 1 << i; 4846 } 4847 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4848 sizeof (udp_fanout_t), KM_SLEEP); 4849 for (i = 0; i < us->us_bind_fanout_size; i++) { 4850 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4851 NULL); 4852 } 4853 4854 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 4855 4856 us->us_param_arr = pa; 4857 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 4858 4859 (void) udp_param_register(&us->us_nd, 4860 us->us_param_arr, A_CNT(udp_param_arr)); 4861 4862 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4863 us->us_mibkp = udp_kstat_init(stackid); 4864 4865 major = mod_name_to_major(INET_NAME); 4866 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4867 ASSERT(error == 0); 4868 return (us); 4869 } 4870 4871 /* 4872 * Free the UDP stack instance. 4873 */ 4874 static void 4875 udp_stack_fini(netstackid_t stackid, void *arg) 4876 { 4877 udp_stack_t *us = (udp_stack_t *)arg; 4878 int i; 4879 4880 for (i = 0; i < us->us_bind_fanout_size; i++) { 4881 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4882 } 4883 4884 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4885 sizeof (udp_fanout_t)); 4886 4887 us->us_bind_fanout = NULL; 4888 4889 nd_free(&us->us_nd); 4890 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 4891 us->us_param_arr = NULL; 4892 4893 udp_kstat_fini(stackid, us->us_mibkp); 4894 us->us_mibkp = NULL; 4895 4896 udp_kstat2_fini(stackid, us->us_kstat); 4897 us->us_kstat = NULL; 4898 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4899 4900 ldi_ident_release(us->us_ldi_ident); 4901 kmem_free(us, sizeof (*us)); 4902 } 4903 4904 static void * 4905 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4906 { 4907 kstat_t *ksp; 4908 4909 udp_stat_t template = { 4910 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4911 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4912 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4913 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4914 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4915 #ifdef DEBUG 4916 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4917 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4918 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4919 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4920 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4921 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4922 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4923 #endif 4924 }; 4925 4926 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4927 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4928 KSTAT_FLAG_VIRTUAL, stackid); 4929 4930 if (ksp == NULL) 4931 return (NULL); 4932 4933 bcopy(&template, us_statisticsp, sizeof (template)); 4934 ksp->ks_data = (void *)us_statisticsp; 4935 ksp->ks_private = (void *)(uintptr_t)stackid; 4936 4937 kstat_install(ksp); 4938 return (ksp); 4939 } 4940 4941 static void 4942 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4943 { 4944 if (ksp != NULL) { 4945 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4946 kstat_delete_netstack(ksp, stackid); 4947 } 4948 } 4949 4950 static void * 4951 udp_kstat_init(netstackid_t stackid) 4952 { 4953 kstat_t *ksp; 4954 4955 udp_named_kstat_t template = { 4956 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4957 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4958 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4959 { "entrySize", KSTAT_DATA_INT32, 0 }, 4960 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4961 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4962 }; 4963 4964 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4965 KSTAT_TYPE_NAMED, 4966 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4967 4968 if (ksp == NULL || ksp->ks_data == NULL) 4969 return (NULL); 4970 4971 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4972 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4973 4974 bcopy(&template, ksp->ks_data, sizeof (template)); 4975 ksp->ks_update = udp_kstat_update; 4976 ksp->ks_private = (void *)(uintptr_t)stackid; 4977 4978 kstat_install(ksp); 4979 return (ksp); 4980 } 4981 4982 static void 4983 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4984 { 4985 if (ksp != NULL) { 4986 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4987 kstat_delete_netstack(ksp, stackid); 4988 } 4989 } 4990 4991 static int 4992 udp_kstat_update(kstat_t *kp, int rw) 4993 { 4994 udp_named_kstat_t *udpkp; 4995 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 4996 netstack_t *ns; 4997 udp_stack_t *us; 4998 4999 if ((kp == NULL) || (kp->ks_data == NULL)) 5000 return (EIO); 5001 5002 if (rw == KSTAT_WRITE) 5003 return (EACCES); 5004 5005 ns = netstack_find_by_stackid(stackid); 5006 if (ns == NULL) 5007 return (-1); 5008 us = ns->netstack_udp; 5009 if (us == NULL) { 5010 netstack_rele(ns); 5011 return (-1); 5012 } 5013 udpkp = (udp_named_kstat_t *)kp->ks_data; 5014 5015 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 5016 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 5017 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 5018 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 5019 netstack_rele(ns); 5020 return (0); 5021 } 5022 5023 static size_t 5024 udp_set_rcv_hiwat(udp_t *udp, size_t size) 5025 { 5026 udp_stack_t *us = udp->udp_us; 5027 5028 /* We add a bit of extra buffering */ 5029 size += size >> 1; 5030 if (size > us->us_max_buf) 5031 size = us->us_max_buf; 5032 5033 udp->udp_rcv_hiwat = size; 5034 return (size); 5035 } 5036 5037 /* 5038 * For the lower queue so that UDP can be a dummy mux. 5039 * Nobody should be sending 5040 * packets up this stream 5041 */ 5042 static void 5043 udp_lrput(queue_t *q, mblk_t *mp) 5044 { 5045 switch (mp->b_datap->db_type) { 5046 case M_FLUSH: 5047 /* Turn around */ 5048 if (*mp->b_rptr & FLUSHW) { 5049 *mp->b_rptr &= ~FLUSHR; 5050 qreply(q, mp); 5051 return; 5052 } 5053 break; 5054 } 5055 freemsg(mp); 5056 } 5057 5058 /* 5059 * For the lower queue so that UDP can be a dummy mux. 5060 * Nobody should be sending packets down this stream. 5061 */ 5062 /* ARGSUSED */ 5063 void 5064 udp_lwput(queue_t *q, mblk_t *mp) 5065 { 5066 freemsg(mp); 5067 } 5068 5069 /* 5070 * Below routines for UDP socket module. 5071 */ 5072 5073 static conn_t * 5074 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 5075 { 5076 udp_t *udp; 5077 conn_t *connp; 5078 zoneid_t zoneid; 5079 netstack_t *ns; 5080 udp_stack_t *us; 5081 int len; 5082 5083 ASSERT(errorp != NULL); 5084 5085 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 5086 return (NULL); 5087 5088 ns = netstack_find_by_cred(credp); 5089 ASSERT(ns != NULL); 5090 us = ns->netstack_udp; 5091 ASSERT(us != NULL); 5092 5093 /* 5094 * For exclusive stacks we set the zoneid to zero 5095 * to make UDP operate as if in the global zone. 5096 */ 5097 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 5098 zoneid = GLOBAL_ZONEID; 5099 else 5100 zoneid = crgetzoneid(credp); 5101 5102 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 5103 5104 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 5105 if (connp == NULL) { 5106 netstack_rele(ns); 5107 *errorp = ENOMEM; 5108 return (NULL); 5109 } 5110 udp = connp->conn_udp; 5111 5112 /* 5113 * ipcl_conn_create did a netstack_hold. Undo the hold that was 5114 * done by netstack_find_by_cred() 5115 */ 5116 netstack_rele(ns); 5117 5118 /* 5119 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5120 * need to lock anything. 5121 */ 5122 ASSERT(connp->conn_proto == IPPROTO_UDP); 5123 ASSERT(connp->conn_udp == udp); 5124 ASSERT(udp->udp_connp == connp); 5125 5126 /* Set the initial state of the stream and the privilege status. */ 5127 udp->udp_state = TS_UNBND; 5128 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 5129 if (isv6) { 5130 connp->conn_family = AF_INET6; 5131 connp->conn_ipversion = IPV6_VERSION; 5132 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5133 connp->conn_default_ttl = us->us_ipv6_hoplimit; 5134 len = sizeof (ip6_t) + UDPH_SIZE; 5135 } else { 5136 connp->conn_family = AF_INET; 5137 connp->conn_ipversion = IPV4_VERSION; 5138 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5139 connp->conn_default_ttl = us->us_ipv4_ttl; 5140 len = sizeof (ipha_t) + UDPH_SIZE; 5141 } 5142 5143 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 5144 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 5145 5146 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 5147 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 5148 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 5149 connp->conn_ixa->ixa_zoneid = zoneid; 5150 5151 connp->conn_zoneid = zoneid; 5152 5153 /* 5154 * If the caller has the process-wide flag set, then default to MAC 5155 * exempt mode. This allows read-down to unlabeled hosts. 5156 */ 5157 if (getpflags(NET_MAC_AWARE, credp) != 0) 5158 connp->conn_mac_mode = CONN_MAC_AWARE; 5159 5160 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 5161 5162 udp->udp_us = us; 5163 5164 connp->conn_rcvbuf = us->us_recv_hiwat; 5165 connp->conn_sndbuf = us->us_xmit_hiwat; 5166 connp->conn_sndlowat = us->us_xmit_lowat; 5167 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 5168 5169 connp->conn_wroff = len + us->us_wroff_extra; 5170 connp->conn_so_type = SOCK_DGRAM; 5171 5172 connp->conn_recv = udp_input; 5173 connp->conn_recvicmp = udp_icmp_input; 5174 crhold(credp); 5175 connp->conn_cred = credp; 5176 connp->conn_cpid = curproc->p_pid; 5177 connp->conn_open_time = ddi_get_lbolt64(); 5178 /* Cache things in ixa without an extra refhold */ 5179 connp->conn_ixa->ixa_cred = connp->conn_cred; 5180 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5181 if (is_system_labeled()) 5182 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5183 5184 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5185 5186 if (us->us_pmtu_discovery) 5187 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5188 5189 return (connp); 5190 } 5191 5192 sock_lower_handle_t 5193 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5194 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5195 { 5196 udp_t *udp = NULL; 5197 udp_stack_t *us; 5198 conn_t *connp; 5199 boolean_t isv6; 5200 5201 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5202 (proto != 0 && proto != IPPROTO_UDP)) { 5203 *errorp = EPROTONOSUPPORT; 5204 return (NULL); 5205 } 5206 5207 if (family == AF_INET6) 5208 isv6 = B_TRUE; 5209 else 5210 isv6 = B_FALSE; 5211 5212 connp = udp_do_open(credp, isv6, flags, errorp); 5213 if (connp == NULL) 5214 return (NULL); 5215 5216 udp = connp->conn_udp; 5217 ASSERT(udp != NULL); 5218 us = udp->udp_us; 5219 ASSERT(us != NULL); 5220 5221 udp->udp_issocket = B_TRUE; 5222 connp->conn_flags |= IPCL_NONSTR; 5223 5224 /* 5225 * Set flow control 5226 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5227 * need to lock anything. 5228 */ 5229 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5230 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5231 5232 connp->conn_flow_cntrld = B_FALSE; 5233 5234 mutex_enter(&connp->conn_lock); 5235 connp->conn_state_flags &= ~CONN_INCIPIENT; 5236 mutex_exit(&connp->conn_lock); 5237 5238 *errorp = 0; 5239 *smodep = SM_ATOMIC; 5240 *sock_downcalls = &sock_udp_downcalls; 5241 return ((sock_lower_handle_t)connp); 5242 } 5243 5244 /* ARGSUSED3 */ 5245 void 5246 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5247 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5248 { 5249 conn_t *connp = (conn_t *)proto_handle; 5250 struct sock_proto_props sopp; 5251 5252 /* All Solaris components should pass a cred for this operation. */ 5253 ASSERT(cr != NULL); 5254 5255 connp->conn_upcalls = sock_upcalls; 5256 connp->conn_upper_handle = sock_handle; 5257 5258 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5259 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5260 sopp.sopp_wroff = connp->conn_wroff; 5261 sopp.sopp_maxblk = INFPSZ; 5262 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5263 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5264 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5265 sopp.sopp_maxpsz = 5266 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5267 UDP_MAXPACKET_IPV6; 5268 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5269 udp_mod_info.mi_minpsz; 5270 5271 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5272 &sopp); 5273 } 5274 5275 static void 5276 udp_do_close(conn_t *connp) 5277 { 5278 udp_t *udp; 5279 5280 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5281 udp = connp->conn_udp; 5282 5283 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5284 /* 5285 * Running in cluster mode - register unbind information 5286 */ 5287 if (connp->conn_ipversion == IPV4_VERSION) { 5288 (*cl_inet_unbind)( 5289 connp->conn_netstack->netstack_stackid, 5290 IPPROTO_UDP, AF_INET, 5291 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5292 (in_port_t)connp->conn_lport, NULL); 5293 } else { 5294 (*cl_inet_unbind)( 5295 connp->conn_netstack->netstack_stackid, 5296 IPPROTO_UDP, AF_INET6, 5297 (uint8_t *)&(connp->conn_laddr_v6), 5298 (in_port_t)connp->conn_lport, NULL); 5299 } 5300 } 5301 5302 udp_bind_hash_remove(udp, B_FALSE); 5303 5304 ip_quiesce_conn(connp); 5305 5306 if (!IPCL_IS_NONSTR(connp)) { 5307 ASSERT(connp->conn_wq != NULL); 5308 ASSERT(connp->conn_rq != NULL); 5309 qprocsoff(connp->conn_rq); 5310 } 5311 5312 udp_close_free(connp); 5313 5314 /* 5315 * Now we are truly single threaded on this stream, and can 5316 * delete the things hanging off the connp, and finally the connp. 5317 * We removed this connp from the fanout list, it cannot be 5318 * accessed thru the fanouts, and we already waited for the 5319 * conn_ref to drop to 0. We are already in close, so 5320 * there cannot be any other thread from the top. qprocsoff 5321 * has completed, and service has completed or won't run in 5322 * future. 5323 */ 5324 ASSERT(connp->conn_ref == 1); 5325 5326 if (!IPCL_IS_NONSTR(connp)) { 5327 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5328 } else { 5329 ip_free_helper_stream(connp); 5330 } 5331 5332 connp->conn_ref--; 5333 ipcl_conn_destroy(connp); 5334 } 5335 5336 /* ARGSUSED1 */ 5337 int 5338 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5339 { 5340 conn_t *connp = (conn_t *)proto_handle; 5341 5342 /* All Solaris components should pass a cred for this operation. */ 5343 ASSERT(cr != NULL); 5344 5345 udp_do_close(connp); 5346 return (0); 5347 } 5348 5349 static int 5350 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5351 boolean_t bind_to_req_port_only) 5352 { 5353 sin_t *sin; 5354 sin6_t *sin6; 5355 udp_t *udp = connp->conn_udp; 5356 int error = 0; 5357 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5358 in_port_t port; /* Host byte order */ 5359 in_port_t requested_port; /* Host byte order */ 5360 int count; 5361 ipaddr_t v4src; /* Set if AF_INET */ 5362 in6_addr_t v6src; 5363 int loopmax; 5364 udp_fanout_t *udpf; 5365 in_port_t lport; /* Network byte order */ 5366 uint_t scopeid = 0; 5367 zoneid_t zoneid = IPCL_ZONEID(connp); 5368 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5369 boolean_t is_inaddr_any; 5370 mlp_type_t addrtype, mlptype; 5371 udp_stack_t *us = udp->udp_us; 5372 5373 switch (len) { 5374 case sizeof (sin_t): /* Complete IPv4 address */ 5375 sin = (sin_t *)sa; 5376 5377 if (sin == NULL || !OK_32PTR((char *)sin)) 5378 return (EINVAL); 5379 5380 if (connp->conn_family != AF_INET || 5381 sin->sin_family != AF_INET) { 5382 return (EAFNOSUPPORT); 5383 } 5384 v4src = sin->sin_addr.s_addr; 5385 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5386 if (v4src != INADDR_ANY) { 5387 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5388 B_TRUE); 5389 } 5390 port = ntohs(sin->sin_port); 5391 break; 5392 5393 case sizeof (sin6_t): /* complete IPv6 address */ 5394 sin6 = (sin6_t *)sa; 5395 5396 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5397 return (EINVAL); 5398 5399 if (connp->conn_family != AF_INET6 || 5400 sin6->sin6_family != AF_INET6) { 5401 return (EAFNOSUPPORT); 5402 } 5403 v6src = sin6->sin6_addr; 5404 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5405 if (connp->conn_ipv6_v6only) 5406 return (EADDRNOTAVAIL); 5407 5408 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5409 if (v4src != INADDR_ANY) { 5410 laddr_type = ip_laddr_verify_v4(v4src, 5411 zoneid, ipst, B_FALSE); 5412 } 5413 } else { 5414 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5415 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5416 scopeid = sin6->sin6_scope_id; 5417 laddr_type = ip_laddr_verify_v6(&v6src, 5418 zoneid, ipst, B_TRUE, scopeid); 5419 } 5420 } 5421 port = ntohs(sin6->sin6_port); 5422 break; 5423 5424 default: /* Invalid request */ 5425 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5426 "udp_bind: bad ADDR_length length %u", len); 5427 return (-TBADADDR); 5428 } 5429 5430 /* Is the local address a valid unicast, multicast, or broadcast? */ 5431 if (laddr_type == IPVL_BAD) 5432 return (EADDRNOTAVAIL); 5433 5434 requested_port = port; 5435 5436 if (requested_port == 0 || !bind_to_req_port_only) 5437 bind_to_req_port_only = B_FALSE; 5438 else /* T_BIND_REQ and requested_port != 0 */ 5439 bind_to_req_port_only = B_TRUE; 5440 5441 if (requested_port == 0) { 5442 /* 5443 * If the application passed in zero for the port number, it 5444 * doesn't care which port number we bind to. Get one in the 5445 * valid range. 5446 */ 5447 if (connp->conn_anon_priv_bind) { 5448 port = udp_get_next_priv_port(udp); 5449 } else { 5450 port = udp_update_next_port(udp, 5451 us->us_next_port_to_try, B_TRUE); 5452 } 5453 } else { 5454 /* 5455 * If the port is in the well-known privileged range, 5456 * make sure the caller was privileged. 5457 */ 5458 int i; 5459 boolean_t priv = B_FALSE; 5460 5461 if (port < us->us_smallest_nonpriv_port) { 5462 priv = B_TRUE; 5463 } else { 5464 for (i = 0; i < us->us_num_epriv_ports; i++) { 5465 if (port == us->us_epriv_ports[i]) { 5466 priv = B_TRUE; 5467 break; 5468 } 5469 } 5470 } 5471 5472 if (priv) { 5473 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5474 return (-TACCES); 5475 } 5476 } 5477 5478 if (port == 0) 5479 return (-TNOADDR); 5480 5481 /* 5482 * The state must be TS_UNBND. TPI mandates that users must send 5483 * TPI primitives only 1 at a time and wait for the response before 5484 * sending the next primitive. 5485 */ 5486 mutex_enter(&connp->conn_lock); 5487 if (udp->udp_state != TS_UNBND) { 5488 mutex_exit(&connp->conn_lock); 5489 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5490 "udp_bind: bad state, %u", udp->udp_state); 5491 return (-TOUTSTATE); 5492 } 5493 /* 5494 * Copy the source address into our udp structure. This address 5495 * may still be zero; if so, IP will fill in the correct address 5496 * each time an outbound packet is passed to it. Since the udp is 5497 * not yet in the bind hash list, we don't grab the uf_lock to 5498 * change conn_ipversion 5499 */ 5500 if (connp->conn_family == AF_INET) { 5501 ASSERT(sin != NULL); 5502 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5503 } else { 5504 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5505 /* 5506 * no need to hold the uf_lock to set the conn_ipversion 5507 * since we are not yet in the fanout list 5508 */ 5509 connp->conn_ipversion = IPV4_VERSION; 5510 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5511 } else { 5512 connp->conn_ipversion = IPV6_VERSION; 5513 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5514 } 5515 } 5516 5517 /* 5518 * If conn_reuseaddr is not set, then we have to make sure that 5519 * the IP address and port number the application requested 5520 * (or we selected for the application) is not being used by 5521 * another stream. If another stream is already using the 5522 * requested IP address and port, the behavior depends on 5523 * "bind_to_req_port_only". If set the bind fails; otherwise we 5524 * search for any an unused port to bind to the stream. 5525 * 5526 * As per the BSD semantics, as modified by the Deering multicast 5527 * changes, if udp_reuseaddr is set, then we allow multiple binds 5528 * to the same port independent of the local IP address. 5529 * 5530 * This is slightly different than in SunOS 4.X which did not 5531 * support IP multicast. Note that the change implemented by the 5532 * Deering multicast code effects all binds - not only binding 5533 * to IP multicast addresses. 5534 * 5535 * Note that when binding to port zero we ignore SO_REUSEADDR in 5536 * order to guarantee a unique port. 5537 */ 5538 5539 count = 0; 5540 if (connp->conn_anon_priv_bind) { 5541 /* 5542 * loopmax = (IPPORT_RESERVED-1) - 5543 * us->us_min_anonpriv_port + 1 5544 */ 5545 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5546 } else { 5547 loopmax = us->us_largest_anon_port - 5548 us->us_smallest_anon_port + 1; 5549 } 5550 5551 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5552 5553 for (;;) { 5554 udp_t *udp1; 5555 boolean_t found_exclbind = B_FALSE; 5556 conn_t *connp1; 5557 5558 /* 5559 * Walk through the list of udp streams bound to 5560 * requested port with the same IP address. 5561 */ 5562 lport = htons(port); 5563 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5564 us->us_bind_fanout_size)]; 5565 mutex_enter(&udpf->uf_lock); 5566 for (udp1 = udpf->uf_udp; udp1 != NULL; 5567 udp1 = udp1->udp_bind_hash) { 5568 connp1 = udp1->udp_connp; 5569 5570 if (lport != connp1->conn_lport) 5571 continue; 5572 5573 /* 5574 * On a labeled system, we must treat bindings to ports 5575 * on shared IP addresses by sockets with MAC exemption 5576 * privilege as being in all zones, as there's 5577 * otherwise no way to identify the right receiver. 5578 */ 5579 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5580 continue; 5581 5582 /* 5583 * If UDP_EXCLBIND is set for either the bound or 5584 * binding endpoint, the semantics of bind 5585 * is changed according to the following chart. 5586 * 5587 * spec = specified address (v4 or v6) 5588 * unspec = unspecified address (v4 or v6) 5589 * A = specified addresses are different for endpoints 5590 * 5591 * bound bind to allowed? 5592 * ------------------------------------- 5593 * unspec unspec no 5594 * unspec spec no 5595 * spec unspec no 5596 * spec spec yes if A 5597 * 5598 * For labeled systems, SO_MAC_EXEMPT behaves the same 5599 * as UDP_EXCLBIND, except that zoneid is ignored. 5600 */ 5601 if (connp1->conn_exclbind || connp->conn_exclbind || 5602 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5603 if (V6_OR_V4_INADDR_ANY( 5604 connp1->conn_bound_addr_v6) || 5605 is_inaddr_any || 5606 IN6_ARE_ADDR_EQUAL( 5607 &connp1->conn_bound_addr_v6, 5608 &v6src)) { 5609 found_exclbind = B_TRUE; 5610 break; 5611 } 5612 continue; 5613 } 5614 5615 /* 5616 * Check ipversion to allow IPv4 and IPv6 sockets to 5617 * have disjoint port number spaces. 5618 */ 5619 if (connp->conn_ipversion != connp1->conn_ipversion) { 5620 5621 /* 5622 * On the first time through the loop, if the 5623 * the user intentionally specified a 5624 * particular port number, then ignore any 5625 * bindings of the other protocol that may 5626 * conflict. This allows the user to bind IPv6 5627 * alone and get both v4 and v6, or bind both 5628 * both and get each seperately. On subsequent 5629 * times through the loop, we're checking a 5630 * port that we chose (not the user) and thus 5631 * we do not allow casual duplicate bindings. 5632 */ 5633 if (count == 0 && requested_port != 0) 5634 continue; 5635 } 5636 5637 /* 5638 * No difference depending on SO_REUSEADDR. 5639 * 5640 * If existing port is bound to a 5641 * non-wildcard IP address and 5642 * the requesting stream is bound to 5643 * a distinct different IP addresses 5644 * (non-wildcard, also), keep going. 5645 */ 5646 if (!is_inaddr_any && 5647 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5648 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5649 &v6src)) { 5650 continue; 5651 } 5652 break; 5653 } 5654 5655 if (!found_exclbind && 5656 (connp->conn_reuseaddr && requested_port != 0)) { 5657 break; 5658 } 5659 5660 if (udp1 == NULL) { 5661 /* 5662 * No other stream has this IP address 5663 * and port number. We can use it. 5664 */ 5665 break; 5666 } 5667 mutex_exit(&udpf->uf_lock); 5668 if (bind_to_req_port_only) { 5669 /* 5670 * We get here only when requested port 5671 * is bound (and only first of the for() 5672 * loop iteration). 5673 * 5674 * The semantics of this bind request 5675 * require it to fail so we return from 5676 * the routine (and exit the loop). 5677 * 5678 */ 5679 mutex_exit(&connp->conn_lock); 5680 return (-TADDRBUSY); 5681 } 5682 5683 if (connp->conn_anon_priv_bind) { 5684 port = udp_get_next_priv_port(udp); 5685 } else { 5686 if ((count == 0) && (requested_port != 0)) { 5687 /* 5688 * If the application wants us to find 5689 * a port, get one to start with. Set 5690 * requested_port to 0, so that we will 5691 * update us->us_next_port_to_try below. 5692 */ 5693 port = udp_update_next_port(udp, 5694 us->us_next_port_to_try, B_TRUE); 5695 requested_port = 0; 5696 } else { 5697 port = udp_update_next_port(udp, port + 1, 5698 B_FALSE); 5699 } 5700 } 5701 5702 if (port == 0 || ++count >= loopmax) { 5703 /* 5704 * We've tried every possible port number and 5705 * there are none available, so send an error 5706 * to the user. 5707 */ 5708 mutex_exit(&connp->conn_lock); 5709 return (-TNOADDR); 5710 } 5711 } 5712 5713 /* 5714 * Copy the source address into our udp structure. This address 5715 * may still be zero; if so, ip_attr_connect will fill in the correct 5716 * address when a packet is about to be sent. 5717 * If we are binding to a broadcast or multicast address then 5718 * we just set the conn_bound_addr since we don't want to use 5719 * that as the source address when sending. 5720 */ 5721 connp->conn_bound_addr_v6 = v6src; 5722 connp->conn_laddr_v6 = v6src; 5723 if (scopeid != 0) { 5724 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5725 connp->conn_ixa->ixa_scopeid = scopeid; 5726 connp->conn_incoming_ifindex = scopeid; 5727 } else { 5728 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5729 connp->conn_incoming_ifindex = connp->conn_bound_if; 5730 } 5731 5732 switch (laddr_type) { 5733 case IPVL_UNICAST_UP: 5734 case IPVL_UNICAST_DOWN: 5735 connp->conn_saddr_v6 = v6src; 5736 connp->conn_mcbc_bind = B_FALSE; 5737 break; 5738 case IPVL_MCAST: 5739 case IPVL_BCAST: 5740 /* ip_set_destination will pick a source address later */ 5741 connp->conn_saddr_v6 = ipv6_all_zeros; 5742 connp->conn_mcbc_bind = B_TRUE; 5743 break; 5744 } 5745 5746 /* Any errors after this point should use late_error */ 5747 connp->conn_lport = lport; 5748 5749 /* 5750 * Now reset the next anonymous port if the application requested 5751 * an anonymous port, or we handed out the next anonymous port. 5752 */ 5753 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5754 us->us_next_port_to_try = port + 1; 5755 } 5756 5757 /* Initialize the T_BIND_ACK. */ 5758 if (connp->conn_family == AF_INET) { 5759 sin->sin_port = connp->conn_lport; 5760 } else { 5761 sin6->sin6_port = connp->conn_lport; 5762 } 5763 udp->udp_state = TS_IDLE; 5764 udp_bind_hash_insert(udpf, udp); 5765 mutex_exit(&udpf->uf_lock); 5766 mutex_exit(&connp->conn_lock); 5767 5768 if (cl_inet_bind) { 5769 /* 5770 * Running in cluster mode - register bind information 5771 */ 5772 if (connp->conn_ipversion == IPV4_VERSION) { 5773 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5774 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5775 (in_port_t)connp->conn_lport, NULL); 5776 } else { 5777 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5778 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5779 (in_port_t)connp->conn_lport, NULL); 5780 } 5781 } 5782 5783 mutex_enter(&connp->conn_lock); 5784 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5785 if (is_system_labeled() && (!connp->conn_anon_port || 5786 connp->conn_anon_mlp)) { 5787 uint16_t mlpport; 5788 zone_t *zone; 5789 5790 zone = crgetzone(cr); 5791 connp->conn_mlp_type = 5792 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5793 mlptSingle; 5794 addrtype = tsol_mlp_addr_type( 5795 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5796 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5797 if (addrtype == mlptSingle) { 5798 error = -TNOADDR; 5799 mutex_exit(&connp->conn_lock); 5800 goto late_error; 5801 } 5802 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5803 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5804 addrtype); 5805 5806 /* 5807 * It is a coding error to attempt to bind an MLP port 5808 * without first setting SOL_SOCKET/SCM_UCRED. 5809 */ 5810 if (mlptype != mlptSingle && 5811 connp->conn_mlp_type == mlptSingle) { 5812 error = EINVAL; 5813 mutex_exit(&connp->conn_lock); 5814 goto late_error; 5815 } 5816 5817 /* 5818 * It is an access violation to attempt to bind an MLP port 5819 * without NET_BINDMLP privilege. 5820 */ 5821 if (mlptype != mlptSingle && 5822 secpolicy_net_bindmlp(cr) != 0) { 5823 if (connp->conn_debug) { 5824 (void) strlog(UDP_MOD_ID, 0, 1, 5825 SL_ERROR|SL_TRACE, 5826 "udp_bind: no priv for multilevel port %d", 5827 mlpport); 5828 } 5829 error = -TACCES; 5830 mutex_exit(&connp->conn_lock); 5831 goto late_error; 5832 } 5833 5834 /* 5835 * If we're specifically binding a shared IP address and the 5836 * port is MLP on shared addresses, then check to see if this 5837 * zone actually owns the MLP. Reject if not. 5838 */ 5839 if (mlptype == mlptShared && addrtype == mlptShared) { 5840 /* 5841 * No need to handle exclusive-stack zones since 5842 * ALL_ZONES only applies to the shared stack. 5843 */ 5844 zoneid_t mlpzone; 5845 5846 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5847 htons(mlpport)); 5848 if (connp->conn_zoneid != mlpzone) { 5849 if (connp->conn_debug) { 5850 (void) strlog(UDP_MOD_ID, 0, 1, 5851 SL_ERROR|SL_TRACE, 5852 "udp_bind: attempt to bind port " 5853 "%d on shared addr in zone %d " 5854 "(should be %d)", 5855 mlpport, connp->conn_zoneid, 5856 mlpzone); 5857 } 5858 error = -TACCES; 5859 mutex_exit(&connp->conn_lock); 5860 goto late_error; 5861 } 5862 } 5863 if (connp->conn_anon_port) { 5864 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5865 port, B_TRUE); 5866 if (error != 0) { 5867 if (connp->conn_debug) { 5868 (void) strlog(UDP_MOD_ID, 0, 1, 5869 SL_ERROR|SL_TRACE, 5870 "udp_bind: cannot establish anon " 5871 "MLP for port %d", port); 5872 } 5873 error = -TACCES; 5874 mutex_exit(&connp->conn_lock); 5875 goto late_error; 5876 } 5877 } 5878 connp->conn_mlp_type = mlptype; 5879 } 5880 5881 /* 5882 * We create an initial header template here to make a subsequent 5883 * sendto have a starting point. Since conn_last_dst is zero the 5884 * first sendto will always follow the 'dst changed' code path. 5885 * Note that we defer massaging options and the related checksum 5886 * adjustment until we have a destination address. 5887 */ 5888 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5889 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5890 if (error != 0) { 5891 mutex_exit(&connp->conn_lock); 5892 goto late_error; 5893 } 5894 /* Just in case */ 5895 connp->conn_faddr_v6 = ipv6_all_zeros; 5896 connp->conn_fport = 0; 5897 connp->conn_v6lastdst = ipv6_all_zeros; 5898 mutex_exit(&connp->conn_lock); 5899 5900 error = ip_laddr_fanout_insert(connp); 5901 if (error != 0) 5902 goto late_error; 5903 5904 /* Bind succeeded */ 5905 return (0); 5906 5907 late_error: 5908 /* We had already picked the port number, and then the bind failed */ 5909 mutex_enter(&connp->conn_lock); 5910 udpf = &us->us_bind_fanout[ 5911 UDP_BIND_HASH(connp->conn_lport, 5912 us->us_bind_fanout_size)]; 5913 mutex_enter(&udpf->uf_lock); 5914 connp->conn_saddr_v6 = ipv6_all_zeros; 5915 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5916 connp->conn_laddr_v6 = ipv6_all_zeros; 5917 if (scopeid != 0) { 5918 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5919 connp->conn_incoming_ifindex = connp->conn_bound_if; 5920 } 5921 udp->udp_state = TS_UNBND; 5922 udp_bind_hash_remove(udp, B_TRUE); 5923 connp->conn_lport = 0; 5924 mutex_exit(&udpf->uf_lock); 5925 connp->conn_anon_port = B_FALSE; 5926 connp->conn_mlp_type = mlptSingle; 5927 5928 connp->conn_v6lastdst = ipv6_all_zeros; 5929 5930 /* Restore the header that was built above - different source address */ 5931 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5932 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5933 mutex_exit(&connp->conn_lock); 5934 return (error); 5935 } 5936 5937 int 5938 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5939 socklen_t len, cred_t *cr) 5940 { 5941 int error; 5942 conn_t *connp; 5943 5944 /* All Solaris components should pass a cred for this operation. */ 5945 ASSERT(cr != NULL); 5946 5947 connp = (conn_t *)proto_handle; 5948 5949 if (sa == NULL) 5950 error = udp_do_unbind(connp); 5951 else 5952 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5953 5954 if (error < 0) { 5955 if (error == -TOUTSTATE) 5956 error = EINVAL; 5957 else 5958 error = proto_tlitosyserr(-error); 5959 } 5960 5961 return (error); 5962 } 5963 5964 static int 5965 udp_implicit_bind(conn_t *connp, cred_t *cr) 5966 { 5967 sin6_t sin6addr; 5968 sin_t *sin; 5969 sin6_t *sin6; 5970 socklen_t len; 5971 int error; 5972 5973 /* All Solaris components should pass a cred for this operation. */ 5974 ASSERT(cr != NULL); 5975 5976 if (connp->conn_family == AF_INET) { 5977 len = sizeof (struct sockaddr_in); 5978 sin = (sin_t *)&sin6addr; 5979 *sin = sin_null; 5980 sin->sin_family = AF_INET; 5981 sin->sin_addr.s_addr = INADDR_ANY; 5982 } else { 5983 ASSERT(connp->conn_family == AF_INET6); 5984 len = sizeof (sin6_t); 5985 sin6 = (sin6_t *)&sin6addr; 5986 *sin6 = sin6_null; 5987 sin6->sin6_family = AF_INET6; 5988 V6_SET_ZERO(sin6->sin6_addr); 5989 } 5990 5991 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5992 cr, B_FALSE); 5993 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5994 } 5995 5996 /* 5997 * This routine removes a port number association from a stream. It 5998 * is called by udp_unbind and udp_tpi_unbind. 5999 */ 6000 static int 6001 udp_do_unbind(conn_t *connp) 6002 { 6003 udp_t *udp = connp->conn_udp; 6004 udp_fanout_t *udpf; 6005 udp_stack_t *us = udp->udp_us; 6006 6007 if (cl_inet_unbind != NULL) { 6008 /* 6009 * Running in cluster mode - register unbind information 6010 */ 6011 if (connp->conn_ipversion == IPV4_VERSION) { 6012 (*cl_inet_unbind)( 6013 connp->conn_netstack->netstack_stackid, 6014 IPPROTO_UDP, AF_INET, 6015 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 6016 (in_port_t)connp->conn_lport, NULL); 6017 } else { 6018 (*cl_inet_unbind)( 6019 connp->conn_netstack->netstack_stackid, 6020 IPPROTO_UDP, AF_INET6, 6021 (uint8_t *)&(connp->conn_laddr_v6), 6022 (in_port_t)connp->conn_lport, NULL); 6023 } 6024 } 6025 6026 mutex_enter(&connp->conn_lock); 6027 /* If a bind has not been done, we can't unbind. */ 6028 if (udp->udp_state == TS_UNBND) { 6029 mutex_exit(&connp->conn_lock); 6030 return (-TOUTSTATE); 6031 } 6032 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6033 us->us_bind_fanout_size)]; 6034 mutex_enter(&udpf->uf_lock); 6035 udp_bind_hash_remove(udp, B_TRUE); 6036 connp->conn_saddr_v6 = ipv6_all_zeros; 6037 connp->conn_bound_addr_v6 = ipv6_all_zeros; 6038 connp->conn_laddr_v6 = ipv6_all_zeros; 6039 connp->conn_mcbc_bind = B_FALSE; 6040 connp->conn_lport = 0; 6041 /* In case we were also connected */ 6042 connp->conn_faddr_v6 = ipv6_all_zeros; 6043 connp->conn_fport = 0; 6044 mutex_exit(&udpf->uf_lock); 6045 6046 connp->conn_v6lastdst = ipv6_all_zeros; 6047 udp->udp_state = TS_UNBND; 6048 6049 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6050 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6051 mutex_exit(&connp->conn_lock); 6052 6053 ip_unbind(connp); 6054 6055 return (0); 6056 } 6057 6058 /* 6059 * It associates a default destination address with the stream. 6060 */ 6061 static int 6062 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 6063 cred_t *cr, pid_t pid) 6064 { 6065 sin6_t *sin6; 6066 sin_t *sin; 6067 in6_addr_t v6dst; 6068 ipaddr_t v4dst; 6069 uint16_t dstport; 6070 uint32_t flowinfo; 6071 udp_fanout_t *udpf; 6072 udp_t *udp, *udp1; 6073 ushort_t ipversion; 6074 udp_stack_t *us; 6075 int error; 6076 conn_t *connp1; 6077 ip_xmit_attr_t *ixa; 6078 uint_t scopeid = 0; 6079 uint_t srcid = 0; 6080 in6_addr_t v6src = connp->conn_saddr_v6; 6081 6082 udp = connp->conn_udp; 6083 us = udp->udp_us; 6084 6085 /* 6086 * Address has been verified by the caller 6087 */ 6088 switch (len) { 6089 default: 6090 /* 6091 * Should never happen 6092 */ 6093 return (EINVAL); 6094 6095 case sizeof (sin_t): 6096 sin = (sin_t *)sa; 6097 v4dst = sin->sin_addr.s_addr; 6098 dstport = sin->sin_port; 6099 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6100 ASSERT(connp->conn_ipversion == IPV4_VERSION); 6101 ipversion = IPV4_VERSION; 6102 break; 6103 6104 case sizeof (sin6_t): 6105 sin6 = (sin6_t *)sa; 6106 v6dst = sin6->sin6_addr; 6107 dstport = sin6->sin6_port; 6108 srcid = sin6->__sin6_src_id; 6109 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 6110 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 6111 connp->conn_netstack); 6112 } 6113 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 6114 if (connp->conn_ipv6_v6only) 6115 return (EADDRNOTAVAIL); 6116 6117 /* 6118 * Destination adress is mapped IPv6 address. 6119 * Source bound address should be unspecified or 6120 * IPv6 mapped address as well. 6121 */ 6122 if (!IN6_IS_ADDR_UNSPECIFIED( 6123 &connp->conn_bound_addr_v6) && 6124 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 6125 return (EADDRNOTAVAIL); 6126 } 6127 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 6128 ipversion = IPV4_VERSION; 6129 flowinfo = 0; 6130 } else { 6131 ipversion = IPV6_VERSION; 6132 flowinfo = sin6->sin6_flowinfo; 6133 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 6134 scopeid = sin6->sin6_scope_id; 6135 } 6136 break; 6137 } 6138 6139 if (dstport == 0) 6140 return (-TBADADDR); 6141 6142 /* 6143 * If there is a different thread using conn_ixa then we get a new 6144 * copy and cut the old one loose from conn_ixa. Otherwise we use 6145 * conn_ixa and prevent any other thread from using/changing it. 6146 * Once connect() is done other threads can use conn_ixa since the 6147 * refcnt will be back at one. 6148 */ 6149 ixa = conn_get_ixa(connp, B_TRUE); 6150 if (ixa == NULL) 6151 return (ENOMEM); 6152 6153 ASSERT(ixa->ixa_refcnt >= 2); 6154 ASSERT(ixa == connp->conn_ixa); 6155 6156 mutex_enter(&connp->conn_lock); 6157 /* 6158 * This udp_t must have bound to a port already before doing a connect. 6159 * Reject if a connect is in progress (we drop conn_lock during 6160 * udp_do_connect). 6161 */ 6162 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 6163 mutex_exit(&connp->conn_lock); 6164 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 6165 "udp_connect: bad state, %u", udp->udp_state); 6166 ixa_refrele(ixa); 6167 return (-TOUTSTATE); 6168 } 6169 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6170 6171 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6172 us->us_bind_fanout_size)]; 6173 6174 mutex_enter(&udpf->uf_lock); 6175 if (udp->udp_state == TS_DATA_XFER) { 6176 /* Already connected - clear out state */ 6177 if (connp->conn_mcbc_bind) 6178 connp->conn_saddr_v6 = ipv6_all_zeros; 6179 else 6180 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6181 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6182 connp->conn_faddr_v6 = ipv6_all_zeros; 6183 connp->conn_fport = 0; 6184 udp->udp_state = TS_IDLE; 6185 } 6186 6187 connp->conn_fport = dstport; 6188 connp->conn_ipversion = ipversion; 6189 if (ipversion == IPV4_VERSION) { 6190 /* 6191 * Interpret a zero destination to mean loopback. 6192 * Update the T_CONN_REQ (sin/sin6) since it is used to 6193 * generate the T_CONN_CON. 6194 */ 6195 if (v4dst == INADDR_ANY) { 6196 v4dst = htonl(INADDR_LOOPBACK); 6197 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6198 if (connp->conn_family == AF_INET) { 6199 sin->sin_addr.s_addr = v4dst; 6200 } else { 6201 sin6->sin6_addr = v6dst; 6202 } 6203 } 6204 connp->conn_faddr_v6 = v6dst; 6205 connp->conn_flowinfo = 0; 6206 } else { 6207 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6208 /* 6209 * Interpret a zero destination to mean loopback. 6210 * Update the T_CONN_REQ (sin/sin6) since it is used to 6211 * generate the T_CONN_CON. 6212 */ 6213 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6214 v6dst = ipv6_loopback; 6215 sin6->sin6_addr = v6dst; 6216 } 6217 connp->conn_faddr_v6 = v6dst; 6218 connp->conn_flowinfo = flowinfo; 6219 } 6220 mutex_exit(&udpf->uf_lock); 6221 6222 /* 6223 * We update our cred/cpid based on the caller of connect 6224 */ 6225 if (connp->conn_cred != cr) { 6226 crhold(cr); 6227 crfree(connp->conn_cred); 6228 connp->conn_cred = cr; 6229 } 6230 connp->conn_cpid = pid; 6231 ixa->ixa_cred = cr; 6232 ixa->ixa_cpid = pid; 6233 if (is_system_labeled()) { 6234 /* We need to restart with a label based on the cred */ 6235 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6236 } 6237 6238 if (scopeid != 0) { 6239 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6240 ixa->ixa_scopeid = scopeid; 6241 connp->conn_incoming_ifindex = scopeid; 6242 } else { 6243 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6244 connp->conn_incoming_ifindex = connp->conn_bound_if; 6245 } 6246 /* 6247 * conn_connect will drop conn_lock and reacquire it. 6248 * To prevent a send* from messing with this udp_t while the lock 6249 * is dropped we set udp_state and clear conn_v6lastdst. 6250 * That will make all send* fail with EISCONN. 6251 */ 6252 connp->conn_v6lastdst = ipv6_all_zeros; 6253 udp->udp_state = TS_WCON_CREQ; 6254 6255 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6256 mutex_exit(&connp->conn_lock); 6257 if (error != 0) 6258 goto connect_failed; 6259 6260 /* 6261 * The addresses have been verified. Time to insert in 6262 * the correct fanout list. 6263 */ 6264 error = ipcl_conn_insert(connp); 6265 if (error != 0) 6266 goto connect_failed; 6267 6268 mutex_enter(&connp->conn_lock); 6269 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6270 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6271 if (error != 0) { 6272 mutex_exit(&connp->conn_lock); 6273 goto connect_failed; 6274 } 6275 6276 udp->udp_state = TS_DATA_XFER; 6277 /* Record this as the "last" send even though we haven't sent any */ 6278 connp->conn_v6lastdst = connp->conn_faddr_v6; 6279 connp->conn_lastipversion = connp->conn_ipversion; 6280 connp->conn_lastdstport = connp->conn_fport; 6281 connp->conn_lastflowinfo = connp->conn_flowinfo; 6282 connp->conn_lastscopeid = scopeid; 6283 connp->conn_lastsrcid = srcid; 6284 /* Also remember a source to use together with lastdst */ 6285 connp->conn_v6lastsrc = v6src; 6286 mutex_exit(&connp->conn_lock); 6287 6288 /* 6289 * We've picked a source address above. Now we can 6290 * verify that the src/port/dst/port is unique for all 6291 * connections in TS_DATA_XFER, skipping ourselves. 6292 */ 6293 mutex_enter(&udpf->uf_lock); 6294 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6295 if (udp1->udp_state != TS_DATA_XFER) 6296 continue; 6297 6298 if (udp1 == udp) 6299 continue; 6300 6301 connp1 = udp1->udp_connp; 6302 if (connp->conn_lport != connp1->conn_lport || 6303 connp->conn_ipversion != connp1->conn_ipversion || 6304 dstport != connp1->conn_fport || 6305 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6306 &connp1->conn_laddr_v6) || 6307 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6308 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6309 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6310 continue; 6311 mutex_exit(&udpf->uf_lock); 6312 error = -TBADADDR; 6313 goto connect_failed; 6314 } 6315 if (cl_inet_connect2 != NULL) { 6316 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6317 if (error != 0) { 6318 mutex_exit(&udpf->uf_lock); 6319 error = -TBADADDR; 6320 goto connect_failed; 6321 } 6322 } 6323 mutex_exit(&udpf->uf_lock); 6324 6325 ixa_refrele(ixa); 6326 return (0); 6327 6328 connect_failed: 6329 if (ixa != NULL) 6330 ixa_refrele(ixa); 6331 mutex_enter(&connp->conn_lock); 6332 mutex_enter(&udpf->uf_lock); 6333 udp->udp_state = TS_IDLE; 6334 connp->conn_faddr_v6 = ipv6_all_zeros; 6335 connp->conn_fport = 0; 6336 /* In case the source address was set above */ 6337 if (connp->conn_mcbc_bind) 6338 connp->conn_saddr_v6 = ipv6_all_zeros; 6339 else 6340 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6341 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6342 mutex_exit(&udpf->uf_lock); 6343 6344 connp->conn_v6lastdst = ipv6_all_zeros; 6345 connp->conn_flowinfo = 0; 6346 6347 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6348 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6349 mutex_exit(&connp->conn_lock); 6350 return (error); 6351 } 6352 6353 static int 6354 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6355 socklen_t len, sock_connid_t *id, cred_t *cr) 6356 { 6357 conn_t *connp = (conn_t *)proto_handle; 6358 udp_t *udp = connp->conn_udp; 6359 int error; 6360 boolean_t did_bind = B_FALSE; 6361 pid_t pid = curproc->p_pid; 6362 6363 /* All Solaris components should pass a cred for this operation. */ 6364 ASSERT(cr != NULL); 6365 6366 if (sa == NULL) { 6367 /* 6368 * Disconnect 6369 * Make sure we are connected 6370 */ 6371 if (udp->udp_state != TS_DATA_XFER) 6372 return (EINVAL); 6373 6374 error = udp_disconnect(connp); 6375 return (error); 6376 } 6377 6378 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6379 if (error != 0) 6380 goto done; 6381 6382 /* do an implicit bind if necessary */ 6383 if (udp->udp_state == TS_UNBND) { 6384 error = udp_implicit_bind(connp, cr); 6385 /* 6386 * We could be racing with an actual bind, in which case 6387 * we would see EPROTO. We cross our fingers and try 6388 * to connect. 6389 */ 6390 if (!(error == 0 || error == EPROTO)) 6391 goto done; 6392 did_bind = B_TRUE; 6393 } 6394 /* 6395 * set SO_DGRAM_ERRIND 6396 */ 6397 connp->conn_dgram_errind = B_TRUE; 6398 6399 error = udp_do_connect(connp, sa, len, cr, pid); 6400 6401 if (error != 0 && did_bind) { 6402 int unbind_err; 6403 6404 unbind_err = udp_do_unbind(connp); 6405 ASSERT(unbind_err == 0); 6406 } 6407 6408 if (error == 0) { 6409 *id = 0; 6410 (*connp->conn_upcalls->su_connected) 6411 (connp->conn_upper_handle, 0, NULL, -1); 6412 } else if (error < 0) { 6413 error = proto_tlitosyserr(-error); 6414 } 6415 6416 done: 6417 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6418 /* 6419 * No need to hold locks to set state 6420 * after connect failure socket state is undefined 6421 * We set the state only to imitate old sockfs behavior 6422 */ 6423 udp->udp_state = TS_IDLE; 6424 } 6425 return (error); 6426 } 6427 6428 int 6429 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6430 cred_t *cr) 6431 { 6432 sin6_t *sin6; 6433 sin_t *sin = NULL; 6434 uint_t srcid; 6435 conn_t *connp = (conn_t *)proto_handle; 6436 udp_t *udp = connp->conn_udp; 6437 int error = 0; 6438 udp_stack_t *us = udp->udp_us; 6439 ushort_t ipversion; 6440 pid_t pid = curproc->p_pid; 6441 ip_xmit_attr_t *ixa; 6442 6443 ASSERT(DB_TYPE(mp) == M_DATA); 6444 6445 /* All Solaris components should pass a cred for this operation. */ 6446 ASSERT(cr != NULL); 6447 6448 /* do an implicit bind if necessary */ 6449 if (udp->udp_state == TS_UNBND) { 6450 error = udp_implicit_bind(connp, cr); 6451 /* 6452 * We could be racing with an actual bind, in which case 6453 * we would see EPROTO. We cross our fingers and try 6454 * to connect. 6455 */ 6456 if (!(error == 0 || error == EPROTO)) { 6457 freemsg(mp); 6458 return (error); 6459 } 6460 } 6461 6462 /* Connected? */ 6463 if (msg->msg_name == NULL) { 6464 if (udp->udp_state != TS_DATA_XFER) { 6465 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6466 return (EDESTADDRREQ); 6467 } 6468 if (msg->msg_controllen != 0) { 6469 error = udp_output_ancillary(connp, NULL, NULL, mp, 6470 NULL, msg, cr, pid); 6471 } else { 6472 error = udp_output_connected(connp, mp, cr, pid); 6473 } 6474 if (us->us_sendto_ignerr) 6475 return (0); 6476 else 6477 return (error); 6478 } 6479 if (udp->udp_state == TS_DATA_XFER) { 6480 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6481 return (EISCONN); 6482 } 6483 error = proto_verify_ip_addr(connp->conn_family, 6484 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6485 if (error != 0) { 6486 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6487 return (error); 6488 } 6489 switch (connp->conn_family) { 6490 case AF_INET6: 6491 sin6 = (sin6_t *)msg->msg_name; 6492 6493 srcid = sin6->__sin6_src_id; 6494 6495 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6496 /* 6497 * Destination is a non-IPv4-compatible IPv6 address. 6498 * Send out an IPv6 format packet. 6499 */ 6500 6501 /* 6502 * If the local address is a mapped address return 6503 * an error. 6504 * It would be possible to send an IPv6 packet but the 6505 * response would never make it back to the application 6506 * since it is bound to a mapped address. 6507 */ 6508 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6509 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6510 return (EADDRNOTAVAIL); 6511 } 6512 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6513 sin6->sin6_addr = ipv6_loopback; 6514 ipversion = IPV6_VERSION; 6515 } else { 6516 if (connp->conn_ipv6_v6only) { 6517 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6518 return (EADDRNOTAVAIL); 6519 } 6520 6521 /* 6522 * If the local address is not zero or a mapped address 6523 * return an error. It would be possible to send an 6524 * IPv4 packet but the response would never make it 6525 * back to the application since it is bound to a 6526 * non-mapped address. 6527 */ 6528 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6529 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6530 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6531 return (EADDRNOTAVAIL); 6532 } 6533 6534 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6535 V4_PART_OF_V6(sin6->sin6_addr) = 6536 htonl(INADDR_LOOPBACK); 6537 } 6538 ipversion = IPV4_VERSION; 6539 } 6540 6541 /* 6542 * We have to allocate an ip_xmit_attr_t before we grab 6543 * conn_lock and we need to hold conn_lock once we've check 6544 * conn_same_as_last_v6 to handle concurrent send* calls on a 6545 * socket. 6546 */ 6547 if (msg->msg_controllen == 0) { 6548 ixa = conn_get_ixa(connp, B_FALSE); 6549 if (ixa == NULL) { 6550 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6551 return (ENOMEM); 6552 } 6553 } else { 6554 ixa = NULL; 6555 } 6556 mutex_enter(&connp->conn_lock); 6557 if (udp->udp_delayed_error != 0) { 6558 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6559 6560 error = udp->udp_delayed_error; 6561 udp->udp_delayed_error = 0; 6562 6563 /* Compare IP address, port, and family */ 6564 6565 if (sin6->sin6_port == sin2->sin6_port && 6566 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6567 &sin2->sin6_addr) && 6568 sin6->sin6_family == sin2->sin6_family) { 6569 mutex_exit(&connp->conn_lock); 6570 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6571 if (ixa != NULL) 6572 ixa_refrele(ixa); 6573 return (error); 6574 } 6575 } 6576 6577 if (msg->msg_controllen != 0) { 6578 mutex_exit(&connp->conn_lock); 6579 ASSERT(ixa == NULL); 6580 error = udp_output_ancillary(connp, NULL, sin6, mp, 6581 NULL, msg, cr, pid); 6582 } else if (conn_same_as_last_v6(connp, sin6) && 6583 connp->conn_lastsrcid == srcid && 6584 ipsec_outbound_policy_current(ixa)) { 6585 /* udp_output_lastdst drops conn_lock */ 6586 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6587 } else { 6588 /* udp_output_newdst drops conn_lock */ 6589 error = udp_output_newdst(connp, mp, NULL, sin6, 6590 ipversion, cr, pid, ixa); 6591 } 6592 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6593 if (us->us_sendto_ignerr) 6594 return (0); 6595 else 6596 return (error); 6597 case AF_INET: 6598 sin = (sin_t *)msg->msg_name; 6599 6600 ipversion = IPV4_VERSION; 6601 6602 if (sin->sin_addr.s_addr == INADDR_ANY) 6603 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6604 6605 /* 6606 * We have to allocate an ip_xmit_attr_t before we grab 6607 * conn_lock and we need to hold conn_lock once we've check 6608 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6609 */ 6610 if (msg->msg_controllen == 0) { 6611 ixa = conn_get_ixa(connp, B_FALSE); 6612 if (ixa == NULL) { 6613 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6614 return (ENOMEM); 6615 } 6616 } else { 6617 ixa = NULL; 6618 } 6619 mutex_enter(&connp->conn_lock); 6620 if (udp->udp_delayed_error != 0) { 6621 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6622 6623 error = udp->udp_delayed_error; 6624 udp->udp_delayed_error = 0; 6625 6626 /* Compare IP address and port */ 6627 6628 if (sin->sin_port == sin2->sin_port && 6629 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6630 mutex_exit(&connp->conn_lock); 6631 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6632 if (ixa != NULL) 6633 ixa_refrele(ixa); 6634 return (error); 6635 } 6636 } 6637 if (msg->msg_controllen != 0) { 6638 mutex_exit(&connp->conn_lock); 6639 ASSERT(ixa == NULL); 6640 error = udp_output_ancillary(connp, sin, NULL, mp, 6641 NULL, msg, cr, pid); 6642 } else if (conn_same_as_last_v4(connp, sin) && 6643 ipsec_outbound_policy_current(ixa)) { 6644 /* udp_output_lastdst drops conn_lock */ 6645 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6646 } else { 6647 /* udp_output_newdst drops conn_lock */ 6648 error = udp_output_newdst(connp, mp, sin, NULL, 6649 ipversion, cr, pid, ixa); 6650 } 6651 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6652 if (us->us_sendto_ignerr) 6653 return (0); 6654 else 6655 return (error); 6656 default: 6657 return (EINVAL); 6658 } 6659 } 6660 6661 int 6662 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6663 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 6664 { 6665 conn_t *connp = (conn_t *)proto_handle; 6666 udp_t *udp; 6667 struct T_capability_ack tca; 6668 struct sockaddr_in6 laddr, faddr; 6669 socklen_t laddrlen, faddrlen; 6670 short opts; 6671 struct stroptions *stropt; 6672 mblk_t *stropt_mp; 6673 int error; 6674 6675 udp = connp->conn_udp; 6676 6677 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6678 6679 /* 6680 * setup the fallback stream that was allocated 6681 */ 6682 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6683 connp->conn_minor_arena = WR(q)->q_ptr; 6684 6685 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6686 6687 WR(q)->q_qinfo = &udp_winit; 6688 6689 connp->conn_rq = RD(q); 6690 connp->conn_wq = WR(q); 6691 6692 /* Notify stream head about options before sending up data */ 6693 stropt_mp->b_datap->db_type = M_SETOPTS; 6694 stropt_mp->b_wptr += sizeof (*stropt); 6695 stropt = (struct stroptions *)stropt_mp->b_rptr; 6696 stropt->so_flags = SO_WROFF | SO_HIWAT; 6697 stropt->so_wroff = connp->conn_wroff; 6698 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6699 putnext(RD(q), stropt_mp); 6700 6701 /* 6702 * Free the helper stream 6703 */ 6704 ip_free_helper_stream(connp); 6705 6706 if (!issocket) 6707 udp_use_pure_tpi(udp); 6708 6709 /* 6710 * Collect the information needed to sync with the sonode 6711 */ 6712 udp_do_capability_ack(udp, &tca, TC1_INFO); 6713 6714 laddrlen = faddrlen = sizeof (sin6_t); 6715 (void) udp_getsockname((sock_lower_handle_t)connp, 6716 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6717 error = udp_getpeername((sock_lower_handle_t)connp, 6718 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6719 if (error != 0) 6720 faddrlen = 0; 6721 6722 opts = 0; 6723 if (connp->conn_dgram_errind) 6724 opts |= SO_DGRAM_ERRIND; 6725 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6726 opts |= SO_DONTROUTE; 6727 6728 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6729 (struct sockaddr *)&laddr, laddrlen, 6730 (struct sockaddr *)&faddr, faddrlen, opts); 6731 6732 mutex_enter(&udp->udp_recv_lock); 6733 /* 6734 * Attempts to send data up during fallback will result in it being 6735 * queued in udp_t. Now we push up any queued packets. 6736 */ 6737 while (udp->udp_fallback_queue_head != NULL) { 6738 mblk_t *mp; 6739 mp = udp->udp_fallback_queue_head; 6740 udp->udp_fallback_queue_head = mp->b_next; 6741 mutex_exit(&udp->udp_recv_lock); 6742 mp->b_next = NULL; 6743 putnext(RD(q), mp); 6744 mutex_enter(&udp->udp_recv_lock); 6745 } 6746 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6747 /* 6748 * No longer a streams less socket 6749 */ 6750 mutex_enter(&connp->conn_lock); 6751 connp->conn_flags &= ~IPCL_NONSTR; 6752 mutex_exit(&connp->conn_lock); 6753 6754 mutex_exit(&udp->udp_recv_lock); 6755 6756 ASSERT(connp->conn_ref >= 1); 6757 6758 return (0); 6759 } 6760 6761 /* ARGSUSED3 */ 6762 int 6763 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6764 socklen_t *salenp, cred_t *cr) 6765 { 6766 conn_t *connp = (conn_t *)proto_handle; 6767 udp_t *udp = connp->conn_udp; 6768 int error; 6769 6770 /* All Solaris components should pass a cred for this operation. */ 6771 ASSERT(cr != NULL); 6772 6773 mutex_enter(&connp->conn_lock); 6774 if (udp->udp_state != TS_DATA_XFER) 6775 error = ENOTCONN; 6776 else 6777 error = conn_getpeername(connp, sa, salenp); 6778 mutex_exit(&connp->conn_lock); 6779 return (error); 6780 } 6781 6782 /* ARGSUSED3 */ 6783 int 6784 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6785 socklen_t *salenp, cred_t *cr) 6786 { 6787 conn_t *connp = (conn_t *)proto_handle; 6788 int error; 6789 6790 /* All Solaris components should pass a cred for this operation. */ 6791 ASSERT(cr != NULL); 6792 6793 mutex_enter(&connp->conn_lock); 6794 error = conn_getsockname(connp, sa, salenp); 6795 mutex_exit(&connp->conn_lock); 6796 return (error); 6797 } 6798 6799 int 6800 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6801 void *optvalp, socklen_t *optlen, cred_t *cr) 6802 { 6803 conn_t *connp = (conn_t *)proto_handle; 6804 int error; 6805 t_uscalar_t max_optbuf_len; 6806 void *optvalp_buf; 6807 int len; 6808 6809 /* All Solaris components should pass a cred for this operation. */ 6810 ASSERT(cr != NULL); 6811 6812 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6813 udp_opt_obj.odb_opt_des_arr, 6814 udp_opt_obj.odb_opt_arr_cnt, 6815 B_FALSE, B_TRUE, cr); 6816 if (error != 0) { 6817 if (error < 0) 6818 error = proto_tlitosyserr(-error); 6819 return (error); 6820 } 6821 6822 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6823 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6824 if (len == -1) { 6825 kmem_free(optvalp_buf, max_optbuf_len); 6826 return (EINVAL); 6827 } 6828 6829 /* 6830 * update optlen and copy option value 6831 */ 6832 t_uscalar_t size = MIN(len, *optlen); 6833 6834 bcopy(optvalp_buf, optvalp, size); 6835 bcopy(&size, optlen, sizeof (size)); 6836 6837 kmem_free(optvalp_buf, max_optbuf_len); 6838 return (0); 6839 } 6840 6841 int 6842 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6843 const void *optvalp, socklen_t optlen, cred_t *cr) 6844 { 6845 conn_t *connp = (conn_t *)proto_handle; 6846 int error; 6847 6848 /* All Solaris components should pass a cred for this operation. */ 6849 ASSERT(cr != NULL); 6850 6851 error = proto_opt_check(level, option_name, optlen, NULL, 6852 udp_opt_obj.odb_opt_des_arr, 6853 udp_opt_obj.odb_opt_arr_cnt, 6854 B_TRUE, B_FALSE, cr); 6855 6856 if (error != 0) { 6857 if (error < 0) 6858 error = proto_tlitosyserr(-error); 6859 return (error); 6860 } 6861 6862 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6863 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6864 NULL, cr); 6865 6866 ASSERT(error >= 0); 6867 6868 return (error); 6869 } 6870 6871 void 6872 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6873 { 6874 conn_t *connp = (conn_t *)proto_handle; 6875 udp_t *udp = connp->conn_udp; 6876 6877 mutex_enter(&udp->udp_recv_lock); 6878 connp->conn_flow_cntrld = B_FALSE; 6879 mutex_exit(&udp->udp_recv_lock); 6880 } 6881 6882 /* ARGSUSED2 */ 6883 int 6884 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6885 { 6886 conn_t *connp = (conn_t *)proto_handle; 6887 6888 /* All Solaris components should pass a cred for this operation. */ 6889 ASSERT(cr != NULL); 6890 6891 /* shut down the send side */ 6892 if (how != SHUT_RD) 6893 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6894 SOCK_OPCTL_SHUT_SEND, 0); 6895 /* shut down the recv side */ 6896 if (how != SHUT_WR) 6897 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6898 SOCK_OPCTL_SHUT_RECV, 0); 6899 return (0); 6900 } 6901 6902 int 6903 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6904 int mode, int32_t *rvalp, cred_t *cr) 6905 { 6906 conn_t *connp = (conn_t *)proto_handle; 6907 int error; 6908 6909 /* All Solaris components should pass a cred for this operation. */ 6910 ASSERT(cr != NULL); 6911 6912 /* 6913 * If we don't have a helper stream then create one. 6914 * ip_create_helper_stream takes care of locking the conn_t, 6915 * so this check for NULL is just a performance optimization. 6916 */ 6917 if (connp->conn_helper_info == NULL) { 6918 udp_stack_t *us = connp->conn_udp->udp_us; 6919 6920 ASSERT(us->us_ldi_ident != NULL); 6921 6922 /* 6923 * Create a helper stream for non-STREAMS socket. 6924 */ 6925 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6926 if (error != 0) { 6927 ip0dbg(("tcp_ioctl: create of IP helper stream " 6928 "failed %d\n", error)); 6929 return (error); 6930 } 6931 } 6932 6933 switch (cmd) { 6934 case ND_SET: 6935 case ND_GET: 6936 case _SIOCSOCKFALLBACK: 6937 case TI_GETPEERNAME: 6938 case TI_GETMYNAME: 6939 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6940 cmd)); 6941 error = EINVAL; 6942 break; 6943 default: 6944 /* 6945 * Pass on to IP using helper stream 6946 */ 6947 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6948 cmd, arg, mode, cr, rvalp); 6949 break; 6950 } 6951 return (error); 6952 } 6953 6954 /* ARGSUSED */ 6955 int 6956 udp_accept(sock_lower_handle_t lproto_handle, 6957 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6958 cred_t *cr) 6959 { 6960 return (EOPNOTSUPP); 6961 } 6962 6963 /* ARGSUSED */ 6964 int 6965 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6966 { 6967 return (EOPNOTSUPP); 6968 } 6969 6970 sock_downcalls_t sock_udp_downcalls = { 6971 udp_activate, /* sd_activate */ 6972 udp_accept, /* sd_accept */ 6973 udp_bind, /* sd_bind */ 6974 udp_listen, /* sd_listen */ 6975 udp_connect, /* sd_connect */ 6976 udp_getpeername, /* sd_getpeername */ 6977 udp_getsockname, /* sd_getsockname */ 6978 udp_getsockopt, /* sd_getsockopt */ 6979 udp_setsockopt, /* sd_setsockopt */ 6980 udp_send, /* sd_send */ 6981 NULL, /* sd_send_uio */ 6982 NULL, /* sd_recv_uio */ 6983 NULL, /* sd_poll */ 6984 udp_shutdown, /* sd_shutdown */ 6985 udp_clr_flowctrl, /* sd_setflowctrl */ 6986 udp_ioctl, /* sd_ioctl */ 6987 udp_close /* sd_close */ 6988 }; 6989