1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/optcom.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 #include <inet/ipclassifier.h> 76 #include <sys/squeue_impl.h> 77 #include <inet/ipnet.h> 78 #include <sys/ethernet.h> 79 80 #include <sys/tsol/label.h> 81 #include <sys/tsol/tnet.h> 82 #include <rpc/pmap_prot.h> 83 84 #include <inet/udp_impl.h> 85 86 /* 87 * Synchronization notes: 88 * 89 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 90 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 91 * protects the contents of the udp_t. uf_lock protects the address and the 92 * fanout information. 93 * The lock order is conn_lock -> uf_lock. 94 * 95 * The fanout lock uf_lock: 96 * When a UDP endpoint is bound to a local port, it is inserted into 97 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 98 * The size of the array is controlled by the udp_bind_fanout_size variable. 99 * This variable can be changed in /etc/system if the default value is 100 * not large enough. Each bind hash bucket is protected by a per bucket 101 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 102 * structure and a few other fields in the udp_t. A UDP endpoint is removed 103 * from the bind hash list only when it is being unbound or being closed. 104 * The per bucket lock also protects a UDP endpoint's state changes. 105 * 106 * Plumbing notes: 107 * UDP is always a device driver. For compatibility with mibopen() code 108 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 109 * dummy module. 110 * 111 * The above implies that we don't support any intermediate module to 112 * reside in between /dev/ip and udp -- in fact, we never supported such 113 * scenario in the past as the inter-layer communication semantics have 114 * always been private. 115 */ 116 117 /* For /etc/system control */ 118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 119 120 static void udp_addr_req(queue_t *q, mblk_t *mp); 121 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 122 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 123 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 124 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 125 const in6_addr_t *, in_port_t, uint32_t); 126 static void udp_capability_req(queue_t *q, mblk_t *mp); 127 static int udp_tpi_close(queue_t *q, int flags); 128 static void udp_close_free(conn_t *); 129 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 130 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 131 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 132 int sys_error); 133 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 134 t_scalar_t tlierr, int sys_error); 135 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 136 cred_t *cr); 137 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 140 char *value, caddr_t cp, cred_t *cr); 141 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 142 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 143 ip_recv_attr_t *ira); 144 static void udp_info_req(queue_t *q, mblk_t *mp); 145 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_lrput(queue_t *, mblk_t *); 147 static void udp_lwput(queue_t *, mblk_t *); 148 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp, boolean_t isv6); 150 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp); 154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 155 int udp_opt_set(conn_t *connp, uint_t optset_context, 156 int level, int name, uint_t inlen, 157 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 158 void *thisdg_attrs, cred_t *cr); 159 int udp_opt_get(conn_t *connp, int level, int name, 160 uchar_t *ptr); 161 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid); 163 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 164 pid_t pid, ip_xmit_attr_t *ixa); 165 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 166 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 167 ip_xmit_attr_t *ixa); 168 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 169 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 170 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 171 cred_t *cr); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 static void *udp_kstat_init(netstackid_t stackid); 191 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 192 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 193 static void udp_kstat2_fini(netstackid_t, kstat_t *); 194 static int udp_kstat_update(kstat_t *kp, int rw); 195 196 197 /* Common routines for TPI and socket module */ 198 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 199 200 /* Common routine for TPI and socket module */ 201 static conn_t *udp_do_open(cred_t *, boolean_t, int, int *); 202 static void udp_do_close(conn_t *); 203 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 204 boolean_t); 205 static int udp_do_unbind(conn_t *); 206 207 int udp_getsockname(sock_lower_handle_t, 208 struct sockaddr *, socklen_t *, cred_t *); 209 int udp_getpeername(sock_lower_handle_t, 210 struct sockaddr *, socklen_t *, cred_t *); 211 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 212 cred_t *, pid_t); 213 214 #define UDP_RECV_HIWATER (56 * 1024) 215 #define UDP_RECV_LOWATER 128 216 #define UDP_XMIT_HIWATER (56 * 1024) 217 #define UDP_XMIT_LOWATER 1024 218 219 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 220 221 /* 222 * Checks if the given destination addr/port is allowed out. 223 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 224 * Called for each connect() and for sendto()/sendmsg() to a different 225 * destination. 226 * For connect(), called in udp_connect(). 227 * For sendto()/sendmsg(), called in udp_output_newdst(). 228 * 229 * This macro assumes that the cl_inet_connect2 hook is not NULL. 230 * Please check this before calling this macro. 231 * 232 * void 233 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 234 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 235 */ 236 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 237 (err) = 0; \ 238 /* \ 239 * Running in cluster mode - check and register active \ 240 * "connection" information \ 241 */ \ 242 if ((cp)->conn_ipversion == IPV4_VERSION) \ 243 (err) = (*cl_inet_connect2)( \ 244 (cp)->conn_netstack->netstack_stackid, \ 245 IPPROTO_UDP, is_outgoing, AF_INET, \ 246 (uint8_t *)&((cp)->conn_laddr_v4), \ 247 (cp)->conn_lport, \ 248 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 249 (in_port_t)(fport), NULL); \ 250 else \ 251 (err) = (*cl_inet_connect2)( \ 252 (cp)->conn_netstack->netstack_stackid, \ 253 IPPROTO_UDP, is_outgoing, AF_INET6, \ 254 (uint8_t *)&((cp)->conn_laddr_v6), \ 255 (cp)->conn_lport, \ 256 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 257 } 258 259 static struct module_info udp_mod_info = { 260 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 261 }; 262 263 /* 264 * Entry points for UDP as a device. 265 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 266 */ 267 static struct qinit udp_rinitv4 = { 268 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 269 }; 270 271 static struct qinit udp_rinitv6 = { 272 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 273 }; 274 275 static struct qinit udp_winit = { 276 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 277 }; 278 279 /* UDP entry point during fallback */ 280 struct qinit udp_fallback_sock_winit = { 281 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 282 }; 283 284 /* 285 * UDP needs to handle I_LINK and I_PLINK since ifconfig 286 * likes to use it as a place to hang the various streams. 287 */ 288 static struct qinit udp_lrinit = { 289 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 290 }; 291 292 static struct qinit udp_lwinit = { 293 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 294 }; 295 296 /* For AF_INET aka /dev/udp */ 297 struct streamtab udpinfov4 = { 298 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 299 }; 300 301 /* For AF_INET6 aka /dev/udp6 */ 302 struct streamtab udpinfov6 = { 303 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 304 }; 305 306 static sin_t sin_null; /* Zero address for quick clears */ 307 static sin6_t sin6_null; /* Zero address for quick clears */ 308 309 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 310 311 /* Default structure copied into T_INFO_ACK messages */ 312 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 313 T_INFO_ACK, 314 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 315 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 316 T_INVALID, /* CDATA_size. udp does not support connect data. */ 317 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 318 sizeof (sin_t), /* ADDR_size. */ 319 0, /* OPT_size - not initialized here */ 320 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 321 T_CLTS, /* SERV_type. udp supports connection-less. */ 322 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 323 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 324 }; 325 326 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 327 328 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 329 T_INFO_ACK, 330 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 331 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 332 T_INVALID, /* CDATA_size. udp does not support connect data. */ 333 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 334 sizeof (sin6_t), /* ADDR_size. */ 335 0, /* OPT_size - not initialized here */ 336 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 337 T_CLTS, /* SERV_type. udp supports connection-less. */ 338 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 339 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 340 }; 341 342 /* largest UDP port number */ 343 #define UDP_MAX_PORT 65535 344 345 /* 346 * Table of ND variables supported by udp. These are loaded into us_nd 347 * in udp_open. 348 * All of these are alterable, within the min/max values given, at run time. 349 */ 350 /* BEGIN CSTYLED */ 351 udpparam_t udp_param_arr[] = { 352 /*min max value name */ 353 { 0L, 256, 32, "udp_wroff_extra" }, 354 { 1L, 255, 255, "udp_ipv4_ttl" }, 355 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 356 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 357 { 0, 1, 1, "udp_do_checksum" }, 358 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 359 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 360 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 361 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 362 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 363 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 364 { 0, 1, 0, "udp_pmtu_discovery" }, 365 { 0, 1, 0, "udp_sendto_ignerr" }, 366 }; 367 /* END CSTYLED */ 368 369 /* Setable in /etc/system */ 370 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 371 uint32_t udp_random_anon_port = 1; 372 373 /* 374 * Hook functions to enable cluster networking. 375 * On non-clustered systems these vectors must always be NULL 376 */ 377 378 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 379 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 380 void *args) = NULL; 381 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 382 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 383 void *args) = NULL; 384 385 typedef union T_primitives *t_primp_t; 386 387 /* 388 * Return the next anonymous port in the privileged port range for 389 * bind checking. 390 * 391 * Trusted Extension (TX) notes: TX allows administrator to mark or 392 * reserve ports as Multilevel ports (MLP). MLP has special function 393 * on TX systems. Once a port is made MLP, it's not available as 394 * ordinary port. This creates "holes" in the port name space. It 395 * may be necessary to skip the "holes" find a suitable anon port. 396 */ 397 static in_port_t 398 udp_get_next_priv_port(udp_t *udp) 399 { 400 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 401 in_port_t nextport; 402 boolean_t restart = B_FALSE; 403 udp_stack_t *us = udp->udp_us; 404 405 retry: 406 if (next_priv_port < us->us_min_anonpriv_port || 407 next_priv_port >= IPPORT_RESERVED) { 408 next_priv_port = IPPORT_RESERVED - 1; 409 if (restart) 410 return (0); 411 restart = B_TRUE; 412 } 413 414 if (is_system_labeled() && 415 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 416 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 417 next_priv_port = nextport; 418 goto retry; 419 } 420 421 return (next_priv_port--); 422 } 423 424 /* 425 * Hash list removal routine for udp_t structures. 426 */ 427 static void 428 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 429 { 430 udp_t *udpnext; 431 kmutex_t *lockp; 432 udp_stack_t *us = udp->udp_us; 433 conn_t *connp = udp->udp_connp; 434 435 if (udp->udp_ptpbhn == NULL) 436 return; 437 438 /* 439 * Extract the lock pointer in case there are concurrent 440 * hash_remove's for this instance. 441 */ 442 ASSERT(connp->conn_lport != 0); 443 if (!caller_holds_lock) { 444 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 445 us->us_bind_fanout_size)].uf_lock; 446 ASSERT(lockp != NULL); 447 mutex_enter(lockp); 448 } 449 if (udp->udp_ptpbhn != NULL) { 450 udpnext = udp->udp_bind_hash; 451 if (udpnext != NULL) { 452 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 453 udp->udp_bind_hash = NULL; 454 } 455 *udp->udp_ptpbhn = udpnext; 456 udp->udp_ptpbhn = NULL; 457 } 458 if (!caller_holds_lock) { 459 mutex_exit(lockp); 460 } 461 } 462 463 static void 464 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 465 { 466 conn_t *connp = udp->udp_connp; 467 udp_t **udpp; 468 udp_t *udpnext; 469 conn_t *connext; 470 471 ASSERT(MUTEX_HELD(&uf->uf_lock)); 472 ASSERT(udp->udp_ptpbhn == NULL); 473 udpp = &uf->uf_udp; 474 udpnext = udpp[0]; 475 if (udpnext != NULL) { 476 /* 477 * If the new udp bound to the INADDR_ANY address 478 * and the first one in the list is not bound to 479 * INADDR_ANY we skip all entries until we find the 480 * first one bound to INADDR_ANY. 481 * This makes sure that applications binding to a 482 * specific address get preference over those binding to 483 * INADDR_ANY. 484 */ 485 connext = udpnext->udp_connp; 486 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 487 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 488 while ((udpnext = udpp[0]) != NULL && 489 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 490 udpp = &(udpnext->udp_bind_hash); 491 } 492 if (udpnext != NULL) 493 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 494 } else { 495 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 496 } 497 } 498 udp->udp_bind_hash = udpnext; 499 udp->udp_ptpbhn = udpp; 500 udpp[0] = udp; 501 } 502 503 /* 504 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 505 * passed to udp_wput. 506 * It associates a port number and local address with the stream. 507 * It calls IP to verify the local IP address, and calls IP to insert 508 * the conn_t in the fanout table. 509 * If everything is ok it then sends the T_BIND_ACK back up. 510 * 511 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 512 * without setting SO_REUSEADDR. This is needed so that they 513 * can be viewed as two independent transport protocols. 514 * However, anonymouns ports are allocated from the same range to avoid 515 * duplicating the us->us_next_port_to_try. 516 */ 517 static void 518 udp_tpi_bind(queue_t *q, mblk_t *mp) 519 { 520 sin_t *sin; 521 sin6_t *sin6; 522 mblk_t *mp1; 523 struct T_bind_req *tbr; 524 conn_t *connp; 525 udp_t *udp; 526 int error; 527 struct sockaddr *sa; 528 cred_t *cr; 529 530 /* 531 * All Solaris components should pass a db_credp 532 * for this TPI message, hence we ASSERT. 533 * But in case there is some other M_PROTO that looks 534 * like a TPI message sent by some other kernel 535 * component, we check and return an error. 536 */ 537 cr = msg_getcred(mp, NULL); 538 ASSERT(cr != NULL); 539 if (cr == NULL) { 540 udp_err_ack(q, mp, TSYSERR, EINVAL); 541 return; 542 } 543 544 connp = Q_TO_CONN(q); 545 udp = connp->conn_udp; 546 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 547 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 548 "udp_bind: bad req, len %u", 549 (uint_t)(mp->b_wptr - mp->b_rptr)); 550 udp_err_ack(q, mp, TPROTO, 0); 551 return; 552 } 553 if (udp->udp_state != TS_UNBND) { 554 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 555 "udp_bind: bad state, %u", udp->udp_state); 556 udp_err_ack(q, mp, TOUTSTATE, 0); 557 return; 558 } 559 /* 560 * Reallocate the message to make sure we have enough room for an 561 * address. 562 */ 563 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 564 if (mp1 == NULL) { 565 udp_err_ack(q, mp, TSYSERR, ENOMEM); 566 return; 567 } 568 569 mp = mp1; 570 571 /* Reset the message type in preparation for shipping it back. */ 572 DB_TYPE(mp) = M_PCPROTO; 573 574 tbr = (struct T_bind_req *)mp->b_rptr; 575 switch (tbr->ADDR_length) { 576 case 0: /* Request for a generic port */ 577 tbr->ADDR_offset = sizeof (struct T_bind_req); 578 if (connp->conn_family == AF_INET) { 579 tbr->ADDR_length = sizeof (sin_t); 580 sin = (sin_t *)&tbr[1]; 581 *sin = sin_null; 582 sin->sin_family = AF_INET; 583 mp->b_wptr = (uchar_t *)&sin[1]; 584 sa = (struct sockaddr *)sin; 585 } else { 586 ASSERT(connp->conn_family == AF_INET6); 587 tbr->ADDR_length = sizeof (sin6_t); 588 sin6 = (sin6_t *)&tbr[1]; 589 *sin6 = sin6_null; 590 sin6->sin6_family = AF_INET6; 591 mp->b_wptr = (uchar_t *)&sin6[1]; 592 sa = (struct sockaddr *)sin6; 593 } 594 break; 595 596 case sizeof (sin_t): /* Complete IPv4 address */ 597 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 598 sizeof (sin_t)); 599 if (sa == NULL || !OK_32PTR((char *)sa)) { 600 udp_err_ack(q, mp, TSYSERR, EINVAL); 601 return; 602 } 603 if (connp->conn_family != AF_INET || 604 sa->sa_family != AF_INET) { 605 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 606 return; 607 } 608 break; 609 610 case sizeof (sin6_t): /* complete IPv6 address */ 611 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 612 sizeof (sin6_t)); 613 if (sa == NULL || !OK_32PTR((char *)sa)) { 614 udp_err_ack(q, mp, TSYSERR, EINVAL); 615 return; 616 } 617 if (connp->conn_family != AF_INET6 || 618 sa->sa_family != AF_INET6) { 619 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 620 return; 621 } 622 break; 623 624 default: /* Invalid request */ 625 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 626 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 627 udp_err_ack(q, mp, TBADADDR, 0); 628 return; 629 } 630 631 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 632 tbr->PRIM_type != O_T_BIND_REQ); 633 634 if (error != 0) { 635 if (error > 0) { 636 udp_err_ack(q, mp, TSYSERR, error); 637 } else { 638 udp_err_ack(q, mp, -error, 0); 639 } 640 } else { 641 tbr->PRIM_type = T_BIND_ACK; 642 qreply(q, mp); 643 } 644 } 645 646 /* 647 * This routine handles each T_CONN_REQ message passed to udp. It 648 * associates a default destination address with the stream. 649 * 650 * After various error checks are completed, udp_connect() lays 651 * the target address and port into the composite header template. 652 * Then we ask IP for information, including a source address if we didn't 653 * already have one. Finally we send up the T_OK_ACK reply message. 654 */ 655 static void 656 udp_tpi_connect(queue_t *q, mblk_t *mp) 657 { 658 conn_t *connp = Q_TO_CONN(q); 659 int error; 660 socklen_t len; 661 struct sockaddr *sa; 662 struct T_conn_req *tcr; 663 cred_t *cr; 664 pid_t pid; 665 /* 666 * All Solaris components should pass a db_credp 667 * for this TPI message, hence we ASSERT. 668 * But in case there is some other M_PROTO that looks 669 * like a TPI message sent by some other kernel 670 * component, we check and return an error. 671 */ 672 cr = msg_getcred(mp, &pid); 673 ASSERT(cr != NULL); 674 if (cr == NULL) { 675 udp_err_ack(q, mp, TSYSERR, EINVAL); 676 return; 677 } 678 679 tcr = (struct T_conn_req *)mp->b_rptr; 680 681 /* A bit of sanity checking */ 682 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 683 udp_err_ack(q, mp, TPROTO, 0); 684 return; 685 } 686 687 if (tcr->OPT_length != 0) { 688 udp_err_ack(q, mp, TBADOPT, 0); 689 return; 690 } 691 692 /* 693 * Determine packet type based on type of address passed in 694 * the request should contain an IPv4 or IPv6 address. 695 * Make sure that address family matches the type of 696 * family of the address passed down. 697 */ 698 len = tcr->DEST_length; 699 switch (tcr->DEST_length) { 700 default: 701 udp_err_ack(q, mp, TBADADDR, 0); 702 return; 703 704 case sizeof (sin_t): 705 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 706 sizeof (sin_t)); 707 break; 708 709 case sizeof (sin6_t): 710 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 711 sizeof (sin6_t)); 712 break; 713 } 714 715 error = proto_verify_ip_addr(connp->conn_family, sa, len); 716 if (error != 0) { 717 udp_err_ack(q, mp, TSYSERR, error); 718 return; 719 } 720 721 error = udp_do_connect(connp, sa, len, cr, pid); 722 if (error != 0) { 723 if (error < 0) 724 udp_err_ack(q, mp, -error, 0); 725 else 726 udp_err_ack(q, mp, TSYSERR, error); 727 } else { 728 mblk_t *mp1; 729 /* 730 * We have to send a connection confirmation to 731 * keep TLI happy. 732 */ 733 if (connp->conn_family == AF_INET) { 734 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 735 sizeof (sin_t), NULL, 0); 736 } else { 737 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 738 sizeof (sin6_t), NULL, 0); 739 } 740 if (mp1 == NULL) { 741 udp_err_ack(q, mp, TSYSERR, ENOMEM); 742 return; 743 } 744 745 /* 746 * Send ok_ack for T_CONN_REQ 747 */ 748 mp = mi_tpi_ok_ack_alloc(mp); 749 if (mp == NULL) { 750 /* Unable to reuse the T_CONN_REQ for the ack. */ 751 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 752 return; 753 } 754 755 putnext(connp->conn_rq, mp); 756 putnext(connp->conn_rq, mp1); 757 } 758 } 759 760 static int 761 udp_tpi_close(queue_t *q, int flags) 762 { 763 conn_t *connp; 764 765 if (flags & SO_FALLBACK) { 766 /* 767 * stream is being closed while in fallback 768 * simply free the resources that were allocated 769 */ 770 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 771 qprocsoff(q); 772 goto done; 773 } 774 775 connp = Q_TO_CONN(q); 776 udp_do_close(connp); 777 done: 778 q->q_ptr = WR(q)->q_ptr = NULL; 779 return (0); 780 } 781 782 static void 783 udp_close_free(conn_t *connp) 784 { 785 udp_t *udp = connp->conn_udp; 786 787 /* If there are any options associated with the stream, free them. */ 788 if (udp->udp_recv_ipp.ipp_fields != 0) 789 ip_pkt_free(&udp->udp_recv_ipp); 790 791 /* 792 * Clear any fields which the kmem_cache constructor clears. 793 * Only udp_connp needs to be preserved. 794 * TBD: We should make this more efficient to avoid clearing 795 * everything. 796 */ 797 ASSERT(udp->udp_connp == connp); 798 bzero(udp, sizeof (udp_t)); 799 udp->udp_connp = connp; 800 } 801 802 static int 803 udp_do_disconnect(conn_t *connp) 804 { 805 udp_t *udp; 806 udp_fanout_t *udpf; 807 udp_stack_t *us; 808 int error; 809 810 udp = connp->conn_udp; 811 us = udp->udp_us; 812 mutex_enter(&connp->conn_lock); 813 if (udp->udp_state != TS_DATA_XFER) { 814 mutex_exit(&connp->conn_lock); 815 return (-TOUTSTATE); 816 } 817 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 818 us->us_bind_fanout_size)]; 819 mutex_enter(&udpf->uf_lock); 820 if (connp->conn_mcbc_bind) 821 connp->conn_saddr_v6 = ipv6_all_zeros; 822 else 823 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 824 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 825 connp->conn_faddr_v6 = ipv6_all_zeros; 826 connp->conn_fport = 0; 827 udp->udp_state = TS_IDLE; 828 mutex_exit(&udpf->uf_lock); 829 830 /* Remove any remnants of mapped address binding */ 831 if (connp->conn_family == AF_INET6) 832 connp->conn_ipversion = IPV6_VERSION; 833 834 connp->conn_v6lastdst = ipv6_all_zeros; 835 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 836 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 837 mutex_exit(&connp->conn_lock); 838 if (error != 0) 839 return (error); 840 841 /* 842 * Tell IP to remove the full binding and revert 843 * to the local address binding. 844 */ 845 return (ip_laddr_fanout_insert(connp)); 846 } 847 848 static void 849 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 850 { 851 conn_t *connp = Q_TO_CONN(q); 852 int error; 853 854 /* 855 * Allocate the largest primitive we need to send back 856 * T_error_ack is > than T_ok_ack 857 */ 858 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 859 if (mp == NULL) { 860 /* Unable to reuse the T_DISCON_REQ for the ack. */ 861 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 862 return; 863 } 864 865 error = udp_do_disconnect(connp); 866 867 if (error != 0) { 868 if (error < 0) { 869 udp_err_ack(q, mp, -error, 0); 870 } else { 871 udp_err_ack(q, mp, TSYSERR, error); 872 } 873 } else { 874 mp = mi_tpi_ok_ack_alloc(mp); 875 ASSERT(mp != NULL); 876 qreply(q, mp); 877 } 878 } 879 880 int 881 udp_disconnect(conn_t *connp) 882 { 883 int error; 884 885 connp->conn_dgram_errind = B_FALSE; 886 error = udp_do_disconnect(connp); 887 if (error < 0) 888 error = proto_tlitosyserr(-error); 889 890 return (error); 891 } 892 893 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 894 static void 895 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 896 { 897 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 898 qreply(q, mp); 899 } 900 901 /* Shorthand to generate and send TPI error acks to our client */ 902 static void 903 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 904 t_scalar_t t_error, int sys_error) 905 { 906 struct T_error_ack *teackp; 907 908 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 909 M_PCPROTO, T_ERROR_ACK)) != NULL) { 910 teackp = (struct T_error_ack *)mp->b_rptr; 911 teackp->ERROR_prim = primitive; 912 teackp->TLI_error = t_error; 913 teackp->UNIX_error = sys_error; 914 qreply(q, mp); 915 } 916 } 917 918 /*ARGSUSED2*/ 919 static int 920 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 921 { 922 int i; 923 udp_t *udp = Q_TO_UDP(q); 924 udp_stack_t *us = udp->udp_us; 925 926 for (i = 0; i < us->us_num_epriv_ports; i++) { 927 if (us->us_epriv_ports[i] != 0) 928 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 929 } 930 return (0); 931 } 932 933 /* ARGSUSED1 */ 934 static int 935 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 936 cred_t *cr) 937 { 938 long new_value; 939 int i; 940 udp_t *udp = Q_TO_UDP(q); 941 udp_stack_t *us = udp->udp_us; 942 943 /* 944 * Fail the request if the new value does not lie within the 945 * port number limits. 946 */ 947 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 948 new_value <= 0 || new_value >= 65536) { 949 return (EINVAL); 950 } 951 952 /* Check if the value is already in the list */ 953 for (i = 0; i < us->us_num_epriv_ports; i++) { 954 if (new_value == us->us_epriv_ports[i]) { 955 return (EEXIST); 956 } 957 } 958 /* Find an empty slot */ 959 for (i = 0; i < us->us_num_epriv_ports; i++) { 960 if (us->us_epriv_ports[i] == 0) 961 break; 962 } 963 if (i == us->us_num_epriv_ports) { 964 return (EOVERFLOW); 965 } 966 967 /* Set the new value */ 968 us->us_epriv_ports[i] = (in_port_t)new_value; 969 return (0); 970 } 971 972 /* ARGSUSED1 */ 973 static int 974 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 975 cred_t *cr) 976 { 977 long new_value; 978 int i; 979 udp_t *udp = Q_TO_UDP(q); 980 udp_stack_t *us = udp->udp_us; 981 982 /* 983 * Fail the request if the new value does not lie within the 984 * port number limits. 985 */ 986 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 987 new_value <= 0 || new_value >= 65536) { 988 return (EINVAL); 989 } 990 991 /* Check that the value is already in the list */ 992 for (i = 0; i < us->us_num_epriv_ports; i++) { 993 if (us->us_epriv_ports[i] == new_value) 994 break; 995 } 996 if (i == us->us_num_epriv_ports) { 997 return (ESRCH); 998 } 999 1000 /* Clear the value */ 1001 us->us_epriv_ports[i] = 0; 1002 return (0); 1003 } 1004 1005 /* At minimum we need 4 bytes of UDP header */ 1006 #define ICMP_MIN_UDP_HDR 4 1007 1008 /* 1009 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 1010 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1011 * Assumes that IP has pulled up everything up to and including the ICMP header. 1012 */ 1013 /* ARGSUSED2 */ 1014 static void 1015 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1016 { 1017 conn_t *connp = (conn_t *)arg1; 1018 icmph_t *icmph; 1019 ipha_t *ipha; 1020 int iph_hdr_length; 1021 udpha_t *udpha; 1022 sin_t sin; 1023 sin6_t sin6; 1024 mblk_t *mp1; 1025 int error = 0; 1026 udp_t *udp = connp->conn_udp; 1027 1028 ipha = (ipha_t *)mp->b_rptr; 1029 1030 ASSERT(OK_32PTR(mp->b_rptr)); 1031 1032 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1033 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1034 udp_icmp_error_ipv6(connp, mp, ira); 1035 return; 1036 } 1037 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1038 1039 /* Skip past the outer IP and ICMP headers */ 1040 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 1041 iph_hdr_length = ira->ira_ip_hdr_length; 1042 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1043 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1044 1045 /* Skip past the inner IP and find the ULP header */ 1046 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1047 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1048 1049 switch (icmph->icmph_type) { 1050 case ICMP_DEST_UNREACHABLE: 1051 switch (icmph->icmph_code) { 1052 case ICMP_FRAGMENTATION_NEEDED: { 1053 ipha_t *ipha; 1054 ip_xmit_attr_t *ixa; 1055 /* 1056 * IP has already adjusted the path MTU. 1057 * But we need to adjust DF for IPv4. 1058 */ 1059 if (connp->conn_ipversion != IPV4_VERSION) 1060 break; 1061 1062 ixa = conn_get_ixa(connp, B_FALSE); 1063 if (ixa == NULL || ixa->ixa_ire == NULL) { 1064 /* 1065 * Some other thread holds conn_ixa. We will 1066 * redo this on the next ICMP too big. 1067 */ 1068 if (ixa != NULL) 1069 ixa_refrele(ixa); 1070 break; 1071 } 1072 (void) ip_get_pmtu(ixa); 1073 1074 mutex_enter(&connp->conn_lock); 1075 ipha = (ipha_t *)connp->conn_ht_iphc; 1076 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1077 ipha->ipha_fragment_offset_and_flags |= 1078 IPH_DF_HTONS; 1079 } else { 1080 ipha->ipha_fragment_offset_and_flags &= 1081 ~IPH_DF_HTONS; 1082 } 1083 mutex_exit(&connp->conn_lock); 1084 ixa_refrele(ixa); 1085 break; 1086 } 1087 case ICMP_PORT_UNREACHABLE: 1088 case ICMP_PROTOCOL_UNREACHABLE: 1089 error = ECONNREFUSED; 1090 break; 1091 default: 1092 /* Transient errors */ 1093 break; 1094 } 1095 break; 1096 default: 1097 /* Transient errors */ 1098 break; 1099 } 1100 if (error == 0) { 1101 freemsg(mp); 1102 return; 1103 } 1104 1105 /* 1106 * Deliver T_UDERROR_IND when the application has asked for it. 1107 * The socket layer enables this automatically when connected. 1108 */ 1109 if (!connp->conn_dgram_errind) { 1110 freemsg(mp); 1111 return; 1112 } 1113 1114 switch (connp->conn_family) { 1115 case AF_INET: 1116 sin = sin_null; 1117 sin.sin_family = AF_INET; 1118 sin.sin_addr.s_addr = ipha->ipha_dst; 1119 sin.sin_port = udpha->uha_dst_port; 1120 if (IPCL_IS_NONSTR(connp)) { 1121 mutex_enter(&connp->conn_lock); 1122 if (udp->udp_state == TS_DATA_XFER) { 1123 if (sin.sin_port == connp->conn_fport && 1124 sin.sin_addr.s_addr == 1125 connp->conn_faddr_v4) { 1126 mutex_exit(&connp->conn_lock); 1127 (*connp->conn_upcalls->su_set_error) 1128 (connp->conn_upper_handle, error); 1129 goto done; 1130 } 1131 } else { 1132 udp->udp_delayed_error = error; 1133 *((sin_t *)&udp->udp_delayed_addr) = sin; 1134 } 1135 mutex_exit(&connp->conn_lock); 1136 } else { 1137 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1138 NULL, 0, error); 1139 if (mp1 != NULL) 1140 putnext(connp->conn_rq, mp1); 1141 } 1142 break; 1143 case AF_INET6: 1144 sin6 = sin6_null; 1145 sin6.sin6_family = AF_INET6; 1146 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1147 sin6.sin6_port = udpha->uha_dst_port; 1148 if (IPCL_IS_NONSTR(connp)) { 1149 mutex_enter(&connp->conn_lock); 1150 if (udp->udp_state == TS_DATA_XFER) { 1151 if (sin6.sin6_port == connp->conn_fport && 1152 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1153 &connp->conn_faddr_v6)) { 1154 mutex_exit(&connp->conn_lock); 1155 (*connp->conn_upcalls->su_set_error) 1156 (connp->conn_upper_handle, error); 1157 goto done; 1158 } 1159 } else { 1160 udp->udp_delayed_error = error; 1161 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1162 } 1163 mutex_exit(&connp->conn_lock); 1164 } else { 1165 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1166 NULL, 0, error); 1167 if (mp1 != NULL) 1168 putnext(connp->conn_rq, mp1); 1169 } 1170 break; 1171 } 1172 done: 1173 freemsg(mp); 1174 } 1175 1176 /* 1177 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1178 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1179 * Assumes that IP has pulled up all the extension headers as well as the 1180 * ICMPv6 header. 1181 */ 1182 static void 1183 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1184 { 1185 icmp6_t *icmp6; 1186 ip6_t *ip6h, *outer_ip6h; 1187 uint16_t iph_hdr_length; 1188 uint8_t *nexthdrp; 1189 udpha_t *udpha; 1190 sin6_t sin6; 1191 mblk_t *mp1; 1192 int error = 0; 1193 udp_t *udp = connp->conn_udp; 1194 udp_stack_t *us = udp->udp_us; 1195 1196 outer_ip6h = (ip6_t *)mp->b_rptr; 1197 #ifdef DEBUG 1198 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1199 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1200 else 1201 iph_hdr_length = IPV6_HDR_LEN; 1202 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1203 #endif 1204 /* Skip past the outer IP and ICMP headers */ 1205 iph_hdr_length = ira->ira_ip_hdr_length; 1206 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1207 1208 /* Skip past the inner IP and find the ULP header */ 1209 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1210 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1211 freemsg(mp); 1212 return; 1213 } 1214 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1215 1216 switch (icmp6->icmp6_type) { 1217 case ICMP6_DST_UNREACH: 1218 switch (icmp6->icmp6_code) { 1219 case ICMP6_DST_UNREACH_NOPORT: 1220 error = ECONNREFUSED; 1221 break; 1222 case ICMP6_DST_UNREACH_ADMIN: 1223 case ICMP6_DST_UNREACH_NOROUTE: 1224 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1225 case ICMP6_DST_UNREACH_ADDR: 1226 /* Transient errors */ 1227 break; 1228 default: 1229 break; 1230 } 1231 break; 1232 case ICMP6_PACKET_TOO_BIG: { 1233 struct T_unitdata_ind *tudi; 1234 struct T_opthdr *toh; 1235 size_t udi_size; 1236 mblk_t *newmp; 1237 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1238 sizeof (struct ip6_mtuinfo); 1239 sin6_t *sin6; 1240 struct ip6_mtuinfo *mtuinfo; 1241 1242 /* 1243 * If the application has requested to receive path mtu 1244 * information, send up an empty message containing an 1245 * IPV6_PATHMTU ancillary data item. 1246 */ 1247 if (!connp->conn_ipv6_recvpathmtu) 1248 break; 1249 1250 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1251 opt_length; 1252 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1253 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1254 break; 1255 } 1256 1257 /* 1258 * newmp->b_cont is left to NULL on purpose. This is an 1259 * empty message containing only ancillary data. 1260 */ 1261 newmp->b_datap->db_type = M_PROTO; 1262 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1263 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1264 tudi->PRIM_type = T_UNITDATA_IND; 1265 tudi->SRC_length = sizeof (sin6_t); 1266 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1267 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1268 tudi->OPT_length = opt_length; 1269 1270 sin6 = (sin6_t *)&tudi[1]; 1271 bzero(sin6, sizeof (sin6_t)); 1272 sin6->sin6_family = AF_INET6; 1273 sin6->sin6_addr = connp->conn_faddr_v6; 1274 1275 toh = (struct T_opthdr *)&sin6[1]; 1276 toh->level = IPPROTO_IPV6; 1277 toh->name = IPV6_PATHMTU; 1278 toh->len = opt_length; 1279 toh->status = 0; 1280 1281 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1282 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1283 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1284 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1285 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1286 /* 1287 * We've consumed everything we need from the original 1288 * message. Free it, then send our empty message. 1289 */ 1290 freemsg(mp); 1291 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1292 return; 1293 } 1294 case ICMP6_TIME_EXCEEDED: 1295 /* Transient errors */ 1296 break; 1297 case ICMP6_PARAM_PROB: 1298 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1299 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1300 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1301 (uchar_t *)nexthdrp) { 1302 error = ECONNREFUSED; 1303 break; 1304 } 1305 break; 1306 } 1307 if (error == 0) { 1308 freemsg(mp); 1309 return; 1310 } 1311 1312 /* 1313 * Deliver T_UDERROR_IND when the application has asked for it. 1314 * The socket layer enables this automatically when connected. 1315 */ 1316 if (!connp->conn_dgram_errind) { 1317 freemsg(mp); 1318 return; 1319 } 1320 1321 sin6 = sin6_null; 1322 sin6.sin6_family = AF_INET6; 1323 sin6.sin6_addr = ip6h->ip6_dst; 1324 sin6.sin6_port = udpha->uha_dst_port; 1325 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1326 1327 if (IPCL_IS_NONSTR(connp)) { 1328 mutex_enter(&connp->conn_lock); 1329 if (udp->udp_state == TS_DATA_XFER) { 1330 if (sin6.sin6_port == connp->conn_fport && 1331 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1332 &connp->conn_faddr_v6)) { 1333 mutex_exit(&connp->conn_lock); 1334 (*connp->conn_upcalls->su_set_error) 1335 (connp->conn_upper_handle, error); 1336 goto done; 1337 } 1338 } else { 1339 udp->udp_delayed_error = error; 1340 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1341 } 1342 mutex_exit(&connp->conn_lock); 1343 } else { 1344 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1345 NULL, 0, error); 1346 if (mp1 != NULL) 1347 putnext(connp->conn_rq, mp1); 1348 } 1349 done: 1350 freemsg(mp); 1351 } 1352 1353 /* 1354 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1355 * The local address is filled in if endpoint is bound. The remote address 1356 * is filled in if remote address has been precified ("connected endpoint") 1357 * (The concept of connected CLTS sockets is alien to published TPI 1358 * but we support it anyway). 1359 */ 1360 static void 1361 udp_addr_req(queue_t *q, mblk_t *mp) 1362 { 1363 struct sockaddr *sa; 1364 mblk_t *ackmp; 1365 struct T_addr_ack *taa; 1366 udp_t *udp = Q_TO_UDP(q); 1367 conn_t *connp = udp->udp_connp; 1368 uint_t addrlen; 1369 1370 /* Make it large enough for worst case */ 1371 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1372 2 * sizeof (sin6_t), 1); 1373 if (ackmp == NULL) { 1374 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1375 return; 1376 } 1377 taa = (struct T_addr_ack *)ackmp->b_rptr; 1378 1379 bzero(taa, sizeof (struct T_addr_ack)); 1380 ackmp->b_wptr = (uchar_t *)&taa[1]; 1381 1382 taa->PRIM_type = T_ADDR_ACK; 1383 ackmp->b_datap->db_type = M_PCPROTO; 1384 1385 if (connp->conn_family == AF_INET) 1386 addrlen = sizeof (sin_t); 1387 else 1388 addrlen = sizeof (sin6_t); 1389 1390 mutex_enter(&connp->conn_lock); 1391 /* 1392 * Note: Following code assumes 32 bit alignment of basic 1393 * data structures like sin_t and struct T_addr_ack. 1394 */ 1395 if (udp->udp_state != TS_UNBND) { 1396 /* 1397 * Fill in local address first 1398 */ 1399 taa->LOCADDR_offset = sizeof (*taa); 1400 taa->LOCADDR_length = addrlen; 1401 sa = (struct sockaddr *)&taa[1]; 1402 (void) conn_getsockname(connp, sa, &addrlen); 1403 ackmp->b_wptr += addrlen; 1404 } 1405 if (udp->udp_state == TS_DATA_XFER) { 1406 /* 1407 * connected, fill remote address too 1408 */ 1409 taa->REMADDR_length = addrlen; 1410 /* assumed 32-bit alignment */ 1411 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1412 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1413 (void) conn_getpeername(connp, sa, &addrlen); 1414 ackmp->b_wptr += addrlen; 1415 } 1416 mutex_exit(&connp->conn_lock); 1417 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1418 qreply(q, ackmp); 1419 } 1420 1421 static void 1422 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1423 { 1424 conn_t *connp = udp->udp_connp; 1425 1426 if (connp->conn_family == AF_INET) { 1427 *tap = udp_g_t_info_ack_ipv4; 1428 } else { 1429 *tap = udp_g_t_info_ack_ipv6; 1430 } 1431 tap->CURRENT_state = udp->udp_state; 1432 tap->OPT_size = udp_max_optsize; 1433 } 1434 1435 static void 1436 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1437 t_uscalar_t cap_bits1) 1438 { 1439 tcap->CAP_bits1 = 0; 1440 1441 if (cap_bits1 & TC1_INFO) { 1442 udp_copy_info(&tcap->INFO_ack, udp); 1443 tcap->CAP_bits1 |= TC1_INFO; 1444 } 1445 } 1446 1447 /* 1448 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1449 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1450 * udp_g_t_info_ack. The current state of the stream is copied from 1451 * udp_state. 1452 */ 1453 static void 1454 udp_capability_req(queue_t *q, mblk_t *mp) 1455 { 1456 t_uscalar_t cap_bits1; 1457 struct T_capability_ack *tcap; 1458 udp_t *udp = Q_TO_UDP(q); 1459 1460 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1461 1462 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1463 mp->b_datap->db_type, T_CAPABILITY_ACK); 1464 if (!mp) 1465 return; 1466 1467 tcap = (struct T_capability_ack *)mp->b_rptr; 1468 udp_do_capability_ack(udp, tcap, cap_bits1); 1469 1470 qreply(q, mp); 1471 } 1472 1473 /* 1474 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1475 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1476 * The current state of the stream is copied from udp_state. 1477 */ 1478 static void 1479 udp_info_req(queue_t *q, mblk_t *mp) 1480 { 1481 udp_t *udp = Q_TO_UDP(q); 1482 1483 /* Create a T_INFO_ACK message. */ 1484 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1485 T_INFO_ACK); 1486 if (!mp) 1487 return; 1488 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1489 qreply(q, mp); 1490 } 1491 1492 /* For /dev/udp aka AF_INET open */ 1493 static int 1494 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1495 { 1496 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1497 } 1498 1499 /* For /dev/udp6 aka AF_INET6 open */ 1500 static int 1501 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1502 { 1503 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1504 } 1505 1506 /* 1507 * This is the open routine for udp. It allocates a udp_t structure for 1508 * the stream and, on the first open of the module, creates an ND table. 1509 */ 1510 static int 1511 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1512 boolean_t isv6) 1513 { 1514 udp_t *udp; 1515 conn_t *connp; 1516 dev_t conn_dev; 1517 vmem_t *minor_arena; 1518 int err; 1519 1520 /* If the stream is already open, return immediately. */ 1521 if (q->q_ptr != NULL) 1522 return (0); 1523 1524 if (sflag == MODOPEN) 1525 return (EINVAL); 1526 1527 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1528 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1529 minor_arena = ip_minor_arena_la; 1530 } else { 1531 /* 1532 * Either minor numbers in the large arena were exhausted 1533 * or a non socket application is doing the open. 1534 * Try to allocate from the small arena. 1535 */ 1536 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1537 return (EBUSY); 1538 1539 minor_arena = ip_minor_arena_sa; 1540 } 1541 1542 if (flag & SO_FALLBACK) { 1543 /* 1544 * Non streams socket needs a stream to fallback to 1545 */ 1546 RD(q)->q_ptr = (void *)conn_dev; 1547 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1548 WR(q)->q_ptr = (void *)minor_arena; 1549 qprocson(q); 1550 return (0); 1551 } 1552 1553 connp = udp_do_open(credp, isv6, KM_SLEEP, &err); 1554 if (connp == NULL) { 1555 inet_minor_free(minor_arena, conn_dev); 1556 return (err); 1557 } 1558 udp = connp->conn_udp; 1559 1560 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1561 connp->conn_dev = conn_dev; 1562 connp->conn_minor_arena = minor_arena; 1563 1564 /* 1565 * Initialize the udp_t structure for this stream. 1566 */ 1567 q->q_ptr = connp; 1568 WR(q)->q_ptr = connp; 1569 connp->conn_rq = q; 1570 connp->conn_wq = WR(q); 1571 1572 /* 1573 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1574 * need to lock anything. 1575 */ 1576 ASSERT(connp->conn_proto == IPPROTO_UDP); 1577 ASSERT(connp->conn_udp == udp); 1578 ASSERT(udp->udp_connp == connp); 1579 1580 if (flag & SO_SOCKSTR) { 1581 udp->udp_issocket = B_TRUE; 1582 } 1583 1584 WR(q)->q_hiwat = connp->conn_sndbuf; 1585 WR(q)->q_lowat = connp->conn_sndlowat; 1586 1587 qprocson(q); 1588 1589 /* Set the Stream head write offset and high watermark. */ 1590 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1591 (void) proto_set_rx_hiwat(q, connp, 1592 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1593 1594 mutex_enter(&connp->conn_lock); 1595 connp->conn_state_flags &= ~CONN_INCIPIENT; 1596 mutex_exit(&connp->conn_lock); 1597 return (0); 1598 } 1599 1600 /* 1601 * Which UDP options OK to set through T_UNITDATA_REQ... 1602 */ 1603 /* ARGSUSED */ 1604 static boolean_t 1605 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1606 { 1607 return (B_TRUE); 1608 } 1609 1610 /* 1611 * This routine gets default values of certain options whose default 1612 * values are maintained by protcol specific code 1613 */ 1614 int 1615 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1616 { 1617 udp_t *udp = Q_TO_UDP(q); 1618 udp_stack_t *us = udp->udp_us; 1619 int *i1 = (int *)ptr; 1620 1621 switch (level) { 1622 case IPPROTO_IP: 1623 switch (name) { 1624 case IP_MULTICAST_TTL: 1625 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1626 return (sizeof (uchar_t)); 1627 case IP_MULTICAST_LOOP: 1628 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1629 return (sizeof (uchar_t)); 1630 } 1631 break; 1632 case IPPROTO_IPV6: 1633 switch (name) { 1634 case IPV6_MULTICAST_HOPS: 1635 *i1 = IP_DEFAULT_MULTICAST_TTL; 1636 return (sizeof (int)); 1637 case IPV6_MULTICAST_LOOP: 1638 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1639 return (sizeof (int)); 1640 case IPV6_UNICAST_HOPS: 1641 *i1 = us->us_ipv6_hoplimit; 1642 return (sizeof (int)); 1643 } 1644 break; 1645 } 1646 return (-1); 1647 } 1648 1649 /* 1650 * This routine retrieves the current status of socket options. 1651 * It returns the size of the option retrieved, or -1. 1652 */ 1653 int 1654 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1655 uchar_t *ptr) 1656 { 1657 int *i1 = (int *)ptr; 1658 udp_t *udp = connp->conn_udp; 1659 int len; 1660 conn_opt_arg_t coas; 1661 int retval; 1662 1663 coas.coa_connp = connp; 1664 coas.coa_ixa = connp->conn_ixa; 1665 coas.coa_ipp = &connp->conn_xmit_ipp; 1666 coas.coa_ancillary = B_FALSE; 1667 coas.coa_changed = 0; 1668 1669 /* 1670 * We assume that the optcom framework has checked for the set 1671 * of levels and names that are supported, hence we don't worry 1672 * about rejecting based on that. 1673 * First check for UDP specific handling, then pass to common routine. 1674 */ 1675 switch (level) { 1676 case IPPROTO_IP: 1677 /* 1678 * Only allow IPv4 option processing on IPv4 sockets. 1679 */ 1680 if (connp->conn_family != AF_INET) 1681 return (-1); 1682 1683 switch (name) { 1684 case IP_OPTIONS: 1685 case T_IP_OPTIONS: 1686 mutex_enter(&connp->conn_lock); 1687 if (!(udp->udp_recv_ipp.ipp_fields & 1688 IPPF_IPV4_OPTIONS)) { 1689 mutex_exit(&connp->conn_lock); 1690 return (0); 1691 } 1692 1693 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1694 ASSERT(len != 0); 1695 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1696 mutex_exit(&connp->conn_lock); 1697 return (len); 1698 } 1699 break; 1700 case IPPROTO_UDP: 1701 switch (name) { 1702 case UDP_NAT_T_ENDPOINT: 1703 mutex_enter(&connp->conn_lock); 1704 *i1 = udp->udp_nat_t_endpoint; 1705 mutex_exit(&connp->conn_lock); 1706 return (sizeof (int)); 1707 case UDP_RCVHDR: 1708 mutex_enter(&connp->conn_lock); 1709 *i1 = udp->udp_rcvhdr ? 1 : 0; 1710 mutex_exit(&connp->conn_lock); 1711 return (sizeof (int)); 1712 } 1713 } 1714 mutex_enter(&connp->conn_lock); 1715 retval = conn_opt_get(&coas, level, name, ptr); 1716 mutex_exit(&connp->conn_lock); 1717 return (retval); 1718 } 1719 1720 /* 1721 * This routine retrieves the current status of socket options. 1722 * It returns the size of the option retrieved, or -1. 1723 */ 1724 int 1725 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1726 { 1727 conn_t *connp = Q_TO_CONN(q); 1728 int err; 1729 1730 err = udp_opt_get(connp, level, name, ptr); 1731 return (err); 1732 } 1733 1734 /* 1735 * This routine sets socket options. 1736 */ 1737 int 1738 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1739 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1740 { 1741 conn_t *connp = coa->coa_connp; 1742 ip_xmit_attr_t *ixa = coa->coa_ixa; 1743 udp_t *udp = connp->conn_udp; 1744 udp_stack_t *us = udp->udp_us; 1745 int *i1 = (int *)invalp; 1746 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1747 int error; 1748 1749 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1750 /* 1751 * First do UDP specific sanity checks and handle UDP specific 1752 * options. Note that some IPPROTO_UDP options are handled 1753 * by conn_opt_set. 1754 */ 1755 switch (level) { 1756 case SOL_SOCKET: 1757 switch (name) { 1758 case SO_SNDBUF: 1759 if (*i1 > us->us_max_buf) { 1760 return (ENOBUFS); 1761 } 1762 break; 1763 case SO_RCVBUF: 1764 if (*i1 > us->us_max_buf) { 1765 return (ENOBUFS); 1766 } 1767 break; 1768 1769 case SCM_UCRED: { 1770 struct ucred_s *ucr; 1771 cred_t *newcr; 1772 ts_label_t *tsl; 1773 1774 /* 1775 * Only sockets that have proper privileges and are 1776 * bound to MLPs will have any other value here, so 1777 * this implicitly tests for privilege to set label. 1778 */ 1779 if (connp->conn_mlp_type == mlptSingle) 1780 break; 1781 1782 ucr = (struct ucred_s *)invalp; 1783 if (inlen < sizeof (*ucr) + sizeof (bslabel_t) || 1784 ucr->uc_labeloff < sizeof (*ucr) || 1785 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1786 return (EINVAL); 1787 if (!checkonly) { 1788 /* 1789 * Set ixa_tsl to the new label. 1790 * We assume that crgetzoneid doesn't change 1791 * as part of the SCM_UCRED. 1792 */ 1793 ASSERT(cr != NULL); 1794 if ((tsl = crgetlabel(cr)) == NULL) 1795 return (EINVAL); 1796 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1797 tsl->tsl_doi, KM_NOSLEEP); 1798 if (newcr == NULL) 1799 return (ENOSR); 1800 ASSERT(newcr->cr_label != NULL); 1801 /* 1802 * Move the hold on the cr_label to ixa_tsl by 1803 * setting cr_label to NULL. Then release newcr. 1804 */ 1805 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1806 ixa->ixa_flags |= IXAF_UCRED_TSL; 1807 newcr->cr_label = NULL; 1808 crfree(newcr); 1809 coa->coa_changed |= COA_HEADER_CHANGED; 1810 coa->coa_changed |= COA_WROFF_CHANGED; 1811 } 1812 /* Fully handled this option. */ 1813 return (0); 1814 } 1815 } 1816 break; 1817 case IPPROTO_UDP: 1818 switch (name) { 1819 case UDP_NAT_T_ENDPOINT: 1820 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1821 return (error); 1822 } 1823 1824 /* 1825 * Use conn_family instead so we can avoid ambiguitites 1826 * with AF_INET6 sockets that may switch from IPv4 1827 * to IPv6. 1828 */ 1829 if (connp->conn_family != AF_INET) { 1830 return (EAFNOSUPPORT); 1831 } 1832 1833 if (!checkonly) { 1834 mutex_enter(&connp->conn_lock); 1835 udp->udp_nat_t_endpoint = onoff; 1836 mutex_exit(&connp->conn_lock); 1837 coa->coa_changed |= COA_HEADER_CHANGED; 1838 coa->coa_changed |= COA_WROFF_CHANGED; 1839 } 1840 /* Fully handled this option. */ 1841 return (0); 1842 case UDP_RCVHDR: 1843 mutex_enter(&connp->conn_lock); 1844 udp->udp_rcvhdr = onoff; 1845 mutex_exit(&connp->conn_lock); 1846 return (0); 1847 } 1848 break; 1849 } 1850 error = conn_opt_set(coa, level, name, inlen, invalp, 1851 checkonly, cr); 1852 return (error); 1853 } 1854 1855 /* 1856 * This routine sets socket options. 1857 */ 1858 int 1859 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1860 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1861 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1862 { 1863 udp_t *udp = connp->conn_udp; 1864 int err; 1865 conn_opt_arg_t coas, *coa; 1866 boolean_t checkonly; 1867 udp_stack_t *us = udp->udp_us; 1868 1869 switch (optset_context) { 1870 case SETFN_OPTCOM_CHECKONLY: 1871 checkonly = B_TRUE; 1872 /* 1873 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1874 * inlen != 0 implies value supplied and 1875 * we have to "pretend" to set it. 1876 * inlen == 0 implies that there is no 1877 * value part in T_CHECK request and just validation 1878 * done elsewhere should be enough, we just return here. 1879 */ 1880 if (inlen == 0) { 1881 *outlenp = 0; 1882 return (0); 1883 } 1884 break; 1885 case SETFN_OPTCOM_NEGOTIATE: 1886 checkonly = B_FALSE; 1887 break; 1888 case SETFN_UD_NEGOTIATE: 1889 case SETFN_CONN_NEGOTIATE: 1890 checkonly = B_FALSE; 1891 /* 1892 * Negotiating local and "association-related" options 1893 * through T_UNITDATA_REQ. 1894 * 1895 * Following routine can filter out ones we do not 1896 * want to be "set" this way. 1897 */ 1898 if (!udp_opt_allow_udr_set(level, name)) { 1899 *outlenp = 0; 1900 return (EINVAL); 1901 } 1902 break; 1903 default: 1904 /* 1905 * We should never get here 1906 */ 1907 *outlenp = 0; 1908 return (EINVAL); 1909 } 1910 1911 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1912 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1913 1914 if (thisdg_attrs != NULL) { 1915 /* Options from T_UNITDATA_REQ */ 1916 coa = (conn_opt_arg_t *)thisdg_attrs; 1917 ASSERT(coa->coa_connp == connp); 1918 ASSERT(coa->coa_ixa != NULL); 1919 ASSERT(coa->coa_ipp != NULL); 1920 ASSERT(coa->coa_ancillary); 1921 } else { 1922 coa = &coas; 1923 coas.coa_connp = connp; 1924 /* Get a reference on conn_ixa to prevent concurrent mods */ 1925 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1926 if (coas.coa_ixa == NULL) { 1927 *outlenp = 0; 1928 return (ENOMEM); 1929 } 1930 coas.coa_ipp = &connp->conn_xmit_ipp; 1931 coas.coa_ancillary = B_FALSE; 1932 coas.coa_changed = 0; 1933 } 1934 1935 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1936 cr, checkonly); 1937 if (err != 0) { 1938 errout: 1939 if (!coa->coa_ancillary) 1940 ixa_refrele(coa->coa_ixa); 1941 *outlenp = 0; 1942 return (err); 1943 } 1944 /* Handle DHCPINIT here outside of lock */ 1945 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1946 uint_t ifindex; 1947 ill_t *ill; 1948 1949 ifindex = *(uint_t *)invalp; 1950 if (ifindex == 0) { 1951 ill = NULL; 1952 } else { 1953 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1954 coa->coa_ixa->ixa_ipst); 1955 if (ill == NULL) { 1956 err = ENXIO; 1957 goto errout; 1958 } 1959 1960 mutex_enter(&ill->ill_lock); 1961 if (ill->ill_state_flags & ILL_CONDEMNED) { 1962 mutex_exit(&ill->ill_lock); 1963 ill_refrele(ill); 1964 err = ENXIO; 1965 goto errout; 1966 } 1967 if (IS_VNI(ill)) { 1968 mutex_exit(&ill->ill_lock); 1969 ill_refrele(ill); 1970 err = EINVAL; 1971 goto errout; 1972 } 1973 } 1974 mutex_enter(&connp->conn_lock); 1975 1976 if (connp->conn_dhcpinit_ill != NULL) { 1977 /* 1978 * We've locked the conn so conn_cleanup_ill() 1979 * cannot clear conn_dhcpinit_ill -- so it's 1980 * safe to access the ill. 1981 */ 1982 ill_t *oill = connp->conn_dhcpinit_ill; 1983 1984 ASSERT(oill->ill_dhcpinit != 0); 1985 atomic_dec_32(&oill->ill_dhcpinit); 1986 ill_set_inputfn(connp->conn_dhcpinit_ill); 1987 connp->conn_dhcpinit_ill = NULL; 1988 } 1989 1990 if (ill != NULL) { 1991 connp->conn_dhcpinit_ill = ill; 1992 atomic_inc_32(&ill->ill_dhcpinit); 1993 ill_set_inputfn(ill); 1994 mutex_exit(&connp->conn_lock); 1995 mutex_exit(&ill->ill_lock); 1996 ill_refrele(ill); 1997 } else { 1998 mutex_exit(&connp->conn_lock); 1999 } 2000 } 2001 2002 /* 2003 * Common case of OK return with outval same as inval. 2004 */ 2005 if (invalp != outvalp) { 2006 /* don't trust bcopy for identical src/dst */ 2007 (void) bcopy(invalp, outvalp, inlen); 2008 } 2009 *outlenp = inlen; 2010 2011 /* 2012 * If this was not ancillary data, then we rebuild the headers, 2013 * update the IRE/NCE, and IPsec as needed. 2014 * Since the label depends on the destination we go through 2015 * ip_set_destination first. 2016 */ 2017 if (coa->coa_ancillary) { 2018 return (0); 2019 } 2020 2021 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2022 in6_addr_t saddr, faddr, nexthop; 2023 in_port_t fport; 2024 2025 /* 2026 * We clear lastdst to make sure we pick up the change 2027 * next time sending. 2028 * If we are connected we re-cache the information. 2029 * We ignore errors to preserve BSD behavior. 2030 * Note that we don't redo IPsec policy lookup here 2031 * since the final destination (or source) didn't change. 2032 */ 2033 mutex_enter(&connp->conn_lock); 2034 connp->conn_v6lastdst = ipv6_all_zeros; 2035 2036 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2037 &connp->conn_faddr_v6, &nexthop); 2038 saddr = connp->conn_saddr_v6; 2039 faddr = connp->conn_faddr_v6; 2040 fport = connp->conn_fport; 2041 mutex_exit(&connp->conn_lock); 2042 2043 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2044 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2045 (void) ip_attr_connect(connp, coa->coa_ixa, 2046 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2047 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2048 } 2049 } 2050 2051 ixa_refrele(coa->coa_ixa); 2052 2053 if (coa->coa_changed & COA_HEADER_CHANGED) { 2054 /* 2055 * Rebuild the header template if we are connected. 2056 * Otherwise clear conn_v6lastdst so we rebuild the header 2057 * in the data path. 2058 */ 2059 mutex_enter(&connp->conn_lock); 2060 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2061 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2062 err = udp_build_hdr_template(connp, 2063 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2064 connp->conn_fport, connp->conn_flowinfo); 2065 if (err != 0) { 2066 mutex_exit(&connp->conn_lock); 2067 return (err); 2068 } 2069 } else { 2070 connp->conn_v6lastdst = ipv6_all_zeros; 2071 } 2072 mutex_exit(&connp->conn_lock); 2073 } 2074 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2075 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2076 connp->conn_rcvbuf); 2077 } 2078 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2079 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2080 } 2081 if (coa->coa_changed & COA_WROFF_CHANGED) { 2082 /* Increase wroff if needed */ 2083 uint_t wroff; 2084 2085 mutex_enter(&connp->conn_lock); 2086 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2087 if (udp->udp_nat_t_endpoint) 2088 wroff += sizeof (uint32_t); 2089 if (wroff > connp->conn_wroff) { 2090 connp->conn_wroff = wroff; 2091 mutex_exit(&connp->conn_lock); 2092 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2093 } else { 2094 mutex_exit(&connp->conn_lock); 2095 } 2096 } 2097 return (err); 2098 } 2099 2100 /* This routine sets socket options. */ 2101 int 2102 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2103 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2104 void *thisdg_attrs, cred_t *cr) 2105 { 2106 conn_t *connp = Q_TO_CONN(q); 2107 int error; 2108 2109 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2110 outlenp, outvalp, thisdg_attrs, cr); 2111 return (error); 2112 } 2113 2114 /* 2115 * Setup IP and UDP headers. 2116 * Returns NULL on allocation failure, in which case data_mp is freed. 2117 */ 2118 mblk_t * 2119 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2120 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2121 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2122 { 2123 mblk_t *mp; 2124 udpha_t *udpha; 2125 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2126 uint_t data_len; 2127 uint32_t cksum; 2128 udp_t *udp = connp->conn_udp; 2129 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2130 uint_t ulp_hdr_len; 2131 2132 data_len = msgdsize(data_mp); 2133 ulp_hdr_len = UDPH_SIZE; 2134 if (insert_spi) 2135 ulp_hdr_len += sizeof (uint32_t); 2136 2137 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2138 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2139 if (mp == NULL) { 2140 ASSERT(*errorp != 0); 2141 return (NULL); 2142 } 2143 2144 data_len += ulp_hdr_len; 2145 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2146 2147 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2148 udpha->uha_src_port = connp->conn_lport; 2149 udpha->uha_dst_port = dstport; 2150 udpha->uha_checksum = 0; 2151 udpha->uha_length = htons(data_len); 2152 2153 /* 2154 * If there was a routing option/header then conn_prepend_hdr 2155 * has massaged it and placed the pseudo-header checksum difference 2156 * in the cksum argument. 2157 * 2158 * Setup header length and prepare for ULP checksum done in IP. 2159 * 2160 * We make it easy for IP to include our pseudo header 2161 * by putting our length in uha_checksum. 2162 * The IP source, destination, and length have already been set by 2163 * conn_prepend_hdr. 2164 */ 2165 cksum += data_len; 2166 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2167 ASSERT(cksum < 0x10000); 2168 2169 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2170 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2171 2172 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2173 2174 /* IP does the checksum if uha_checksum is non-zero */ 2175 if (us->us_do_checksum) { 2176 if (cksum == 0) 2177 udpha->uha_checksum = 0xffff; 2178 else 2179 udpha->uha_checksum = htons(cksum); 2180 } else { 2181 udpha->uha_checksum = 0; 2182 } 2183 } else { 2184 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2185 2186 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2187 if (cksum == 0) 2188 udpha->uha_checksum = 0xffff; 2189 else 2190 udpha->uha_checksum = htons(cksum); 2191 } 2192 2193 /* Insert all-0s SPI now. */ 2194 if (insert_spi) 2195 *((uint32_t *)(udpha + 1)) = 0; 2196 2197 return (mp); 2198 } 2199 2200 static int 2201 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2202 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2203 { 2204 udpha_t *udpha; 2205 int error; 2206 2207 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2208 /* 2209 * We clear lastdst to make sure we don't use the lastdst path 2210 * next time sending since we might not have set v6dst yet. 2211 */ 2212 connp->conn_v6lastdst = ipv6_all_zeros; 2213 2214 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2215 flowinfo); 2216 if (error != 0) 2217 return (error); 2218 2219 /* 2220 * Any routing header/option has been massaged. The checksum difference 2221 * is stored in conn_sum. 2222 */ 2223 udpha = (udpha_t *)connp->conn_ht_ulp; 2224 udpha->uha_src_port = connp->conn_lport; 2225 udpha->uha_dst_port = dstport; 2226 udpha->uha_checksum = 0; 2227 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2228 return (0); 2229 } 2230 2231 /* 2232 * This routine retrieves the value of an ND variable in a udpparam_t 2233 * structure. It is called through nd_getset when a user reads the 2234 * variable. 2235 */ 2236 /* ARGSUSED */ 2237 static int 2238 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2239 { 2240 udpparam_t *udppa = (udpparam_t *)cp; 2241 2242 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 2243 return (0); 2244 } 2245 2246 /* 2247 * Walk through the param array specified registering each element with the 2248 * named dispatch (ND) handler. 2249 */ 2250 static boolean_t 2251 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 2252 { 2253 for (; cnt-- > 0; udppa++) { 2254 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 2255 if (!nd_load(ndp, udppa->udp_param_name, 2256 udp_param_get, udp_param_set, 2257 (caddr_t)udppa)) { 2258 nd_free(ndp); 2259 return (B_FALSE); 2260 } 2261 } 2262 } 2263 if (!nd_load(ndp, "udp_extra_priv_ports", 2264 udp_extra_priv_ports_get, NULL, NULL)) { 2265 nd_free(ndp); 2266 return (B_FALSE); 2267 } 2268 if (!nd_load(ndp, "udp_extra_priv_ports_add", 2269 NULL, udp_extra_priv_ports_add, NULL)) { 2270 nd_free(ndp); 2271 return (B_FALSE); 2272 } 2273 if (!nd_load(ndp, "udp_extra_priv_ports_del", 2274 NULL, udp_extra_priv_ports_del, NULL)) { 2275 nd_free(ndp); 2276 return (B_FALSE); 2277 } 2278 return (B_TRUE); 2279 } 2280 2281 /* This routine sets an ND variable in a udpparam_t structure. */ 2282 /* ARGSUSED */ 2283 static int 2284 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2285 { 2286 long new_value; 2287 udpparam_t *udppa = (udpparam_t *)cp; 2288 2289 /* 2290 * Fail the request if the new value does not lie within the 2291 * required bounds. 2292 */ 2293 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2294 new_value < udppa->udp_param_min || 2295 new_value > udppa->udp_param_max) { 2296 return (EINVAL); 2297 } 2298 2299 /* Set the new value */ 2300 udppa->udp_param_value = new_value; 2301 return (0); 2302 } 2303 2304 static mblk_t * 2305 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2306 { 2307 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2308 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2309 /* 2310 * fallback has started but messages have not been moved yet 2311 */ 2312 if (udp->udp_fallback_queue_head == NULL) { 2313 ASSERT(udp->udp_fallback_queue_tail == NULL); 2314 udp->udp_fallback_queue_head = mp; 2315 udp->udp_fallback_queue_tail = mp; 2316 } else { 2317 ASSERT(udp->udp_fallback_queue_tail != NULL); 2318 udp->udp_fallback_queue_tail->b_next = mp; 2319 udp->udp_fallback_queue_tail = mp; 2320 } 2321 return (NULL); 2322 } else { 2323 /* 2324 * Fallback completed, let the caller putnext() the mblk. 2325 */ 2326 return (mp); 2327 } 2328 } 2329 2330 /* 2331 * Deliver data to ULP. In case we have a socket, and it's falling back to 2332 * TPI, then we'll queue the mp for later processing. 2333 */ 2334 static void 2335 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2336 { 2337 if (IPCL_IS_NONSTR(connp)) { 2338 udp_t *udp = connp->conn_udp; 2339 int error; 2340 2341 ASSERT(len == msgdsize(mp)); 2342 if ((*connp->conn_upcalls->su_recv) 2343 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2344 mutex_enter(&udp->udp_recv_lock); 2345 if (error == ENOSPC) { 2346 /* 2347 * let's confirm while holding the lock 2348 */ 2349 if ((*connp->conn_upcalls->su_recv) 2350 (connp->conn_upper_handle, NULL, 0, 0, 2351 &error, NULL) < 0) { 2352 ASSERT(error == ENOSPC); 2353 if (error == ENOSPC) { 2354 connp->conn_flow_cntrld = 2355 B_TRUE; 2356 } 2357 } 2358 mutex_exit(&udp->udp_recv_lock); 2359 } else { 2360 ASSERT(error == EOPNOTSUPP); 2361 mp = udp_queue_fallback(udp, mp); 2362 mutex_exit(&udp->udp_recv_lock); 2363 if (mp != NULL) 2364 putnext(connp->conn_rq, mp); 2365 } 2366 } 2367 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2368 } else { 2369 if (is_system_labeled()) { 2370 ASSERT(ira->ira_cred != NULL); 2371 /* 2372 * Provide for protocols above UDP such as RPC 2373 * NOPID leaves db_cpid unchanged. 2374 */ 2375 mblk_setcred(mp, ira->ira_cred, NOPID); 2376 } 2377 2378 putnext(connp->conn_rq, mp); 2379 } 2380 } 2381 2382 /* 2383 * This is the inbound data path. 2384 * IP has already pulled up the IP plus UDP headers and verified alignment 2385 * etc. 2386 */ 2387 /* ARGSUSED2 */ 2388 static void 2389 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2390 { 2391 conn_t *connp = (conn_t *)arg1; 2392 struct T_unitdata_ind *tudi; 2393 uchar_t *rptr; /* Pointer to IP header */ 2394 int hdr_length; /* Length of IP+UDP headers */ 2395 int udi_size; /* Size of T_unitdata_ind */ 2396 int pkt_len; 2397 udp_t *udp; 2398 udpha_t *udpha; 2399 ip_pkt_t ipps; 2400 ip6_t *ip6h; 2401 mblk_t *mp1; 2402 uint32_t udp_ipv4_options_len; 2403 crb_t recv_ancillary; 2404 udp_stack_t *us; 2405 2406 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2407 2408 udp = connp->conn_udp; 2409 us = udp->udp_us; 2410 rptr = mp->b_rptr; 2411 2412 ASSERT(DB_TYPE(mp) == M_DATA); 2413 ASSERT(OK_32PTR(rptr)); 2414 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2415 pkt_len = ira->ira_pktlen; 2416 2417 /* 2418 * Get a snapshot of these and allow other threads to change 2419 * them after that. We need the same recv_ancillary when determining 2420 * the size as when adding the ancillary data items. 2421 */ 2422 mutex_enter(&connp->conn_lock); 2423 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2424 recv_ancillary = connp->conn_recv_ancillary; 2425 mutex_exit(&connp->conn_lock); 2426 2427 hdr_length = ira->ira_ip_hdr_length; 2428 2429 /* 2430 * IP inspected the UDP header thus all of it must be in the mblk. 2431 * UDP length check is performed for IPv6 packets and IPv4 packets 2432 * to check if the size of the packet as specified 2433 * by the UDP header is the same as the length derived from the IP 2434 * header. 2435 */ 2436 udpha = (udpha_t *)(rptr + hdr_length); 2437 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2438 goto tossit; 2439 2440 hdr_length += UDPH_SIZE; 2441 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2442 2443 /* Initialize regardless of IP version */ 2444 ipps.ipp_fields = 0; 2445 2446 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2447 udp_ipv4_options_len > 0) && 2448 connp->conn_family == AF_INET) { 2449 int err; 2450 2451 /* 2452 * Record/update udp_recv_ipp with the lock 2453 * held. Not needed for AF_INET6 sockets 2454 * since they don't support a getsockopt of IP_OPTIONS. 2455 */ 2456 mutex_enter(&connp->conn_lock); 2457 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2458 B_TRUE); 2459 if (err != 0) { 2460 /* Allocation failed. Drop packet */ 2461 mutex_exit(&connp->conn_lock); 2462 freemsg(mp); 2463 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2464 return; 2465 } 2466 mutex_exit(&connp->conn_lock); 2467 } 2468 2469 if (recv_ancillary.crb_all != 0) { 2470 /* 2471 * Record packet information in the ip_pkt_t 2472 */ 2473 if (ira->ira_flags & IRAF_IS_IPV4) { 2474 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2475 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2476 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2477 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2478 2479 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2480 } else { 2481 uint8_t nexthdrp; 2482 2483 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2484 /* 2485 * IPv6 packets can only be received by applications 2486 * that are prepared to receive IPv6 addresses. 2487 * The IP fanout must ensure this. 2488 */ 2489 ASSERT(connp->conn_family == AF_INET6); 2490 2491 ip6h = (ip6_t *)rptr; 2492 2493 /* We don't care about the length, but need the ipp */ 2494 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2495 &nexthdrp); 2496 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2497 /* Restore */ 2498 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2499 ASSERT(nexthdrp == IPPROTO_UDP); 2500 } 2501 } 2502 2503 /* 2504 * This is the inbound data path. Packets are passed upstream as 2505 * T_UNITDATA_IND messages. 2506 */ 2507 if (connp->conn_family == AF_INET) { 2508 sin_t *sin; 2509 2510 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2511 2512 /* 2513 * Normally only send up the source address. 2514 * If any ancillary data items are wanted we add those. 2515 */ 2516 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2517 if (recv_ancillary.crb_all != 0) { 2518 udi_size += conn_recvancillary_size(connp, 2519 recv_ancillary, ira, mp, &ipps); 2520 } 2521 2522 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2523 mp1 = allocb(udi_size, BPRI_MED); 2524 if (mp1 == NULL) { 2525 freemsg(mp); 2526 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2527 return; 2528 } 2529 mp1->b_cont = mp; 2530 mp1->b_datap->db_type = M_PROTO; 2531 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2532 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2533 tudi->PRIM_type = T_UNITDATA_IND; 2534 tudi->SRC_length = sizeof (sin_t); 2535 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2536 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2537 sizeof (sin_t); 2538 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2539 tudi->OPT_length = udi_size; 2540 sin = (sin_t *)&tudi[1]; 2541 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2542 sin->sin_port = udpha->uha_src_port; 2543 sin->sin_family = connp->conn_family; 2544 *(uint32_t *)&sin->sin_zero[0] = 0; 2545 *(uint32_t *)&sin->sin_zero[4] = 0; 2546 2547 /* 2548 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2549 * IP_RECVTTL has been set. 2550 */ 2551 if (udi_size != 0) { 2552 conn_recvancillary_add(connp, recv_ancillary, ira, 2553 &ipps, (uchar_t *)&sin[1], udi_size); 2554 } 2555 } else { 2556 sin6_t *sin6; 2557 2558 /* 2559 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2560 * 2561 * Normally we only send up the address. If receiving of any 2562 * optional receive side information is enabled, we also send 2563 * that up as options. 2564 */ 2565 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2566 2567 if (recv_ancillary.crb_all != 0) { 2568 udi_size += conn_recvancillary_size(connp, 2569 recv_ancillary, ira, mp, &ipps); 2570 } 2571 2572 mp1 = allocb(udi_size, BPRI_MED); 2573 if (mp1 == NULL) { 2574 freemsg(mp); 2575 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2576 return; 2577 } 2578 mp1->b_cont = mp; 2579 mp1->b_datap->db_type = M_PROTO; 2580 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2581 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2582 tudi->PRIM_type = T_UNITDATA_IND; 2583 tudi->SRC_length = sizeof (sin6_t); 2584 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2585 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2586 sizeof (sin6_t); 2587 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2588 tudi->OPT_length = udi_size; 2589 sin6 = (sin6_t *)&tudi[1]; 2590 if (ira->ira_flags & IRAF_IS_IPV4) { 2591 in6_addr_t v6dst; 2592 2593 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2594 &sin6->sin6_addr); 2595 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2596 &v6dst); 2597 sin6->sin6_flowinfo = 0; 2598 sin6->sin6_scope_id = 0; 2599 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2600 IPCL_ZONEID(connp), us->us_netstack); 2601 } else { 2602 ip6h = (ip6_t *)rptr; 2603 2604 sin6->sin6_addr = ip6h->ip6_src; 2605 /* No sin6_flowinfo per API */ 2606 sin6->sin6_flowinfo = 0; 2607 /* For link-scope pass up scope id */ 2608 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2609 sin6->sin6_scope_id = ira->ira_ruifindex; 2610 else 2611 sin6->sin6_scope_id = 0; 2612 sin6->__sin6_src_id = ip_srcid_find_addr( 2613 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2614 us->us_netstack); 2615 } 2616 sin6->sin6_port = udpha->uha_src_port; 2617 sin6->sin6_family = connp->conn_family; 2618 2619 if (udi_size != 0) { 2620 conn_recvancillary_add(connp, recv_ancillary, ira, 2621 &ipps, (uchar_t *)&sin6[1], udi_size); 2622 } 2623 } 2624 2625 /* Walk past the headers unless IP_RECVHDR was set. */ 2626 if (!udp->udp_rcvhdr) { 2627 mp->b_rptr = rptr + hdr_length; 2628 pkt_len -= hdr_length; 2629 } 2630 2631 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2632 udp_ulp_recv(connp, mp1, pkt_len, ira); 2633 return; 2634 2635 tossit: 2636 freemsg(mp); 2637 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2638 } 2639 2640 /* 2641 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2642 * information that can be changing beneath us. 2643 */ 2644 mblk_t * 2645 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2646 { 2647 mblk_t *mpdata; 2648 mblk_t *mp_conn_ctl; 2649 mblk_t *mp_attr_ctl; 2650 mblk_t *mp6_conn_ctl; 2651 mblk_t *mp6_attr_ctl; 2652 mblk_t *mp_conn_tail; 2653 mblk_t *mp_attr_tail; 2654 mblk_t *mp6_conn_tail; 2655 mblk_t *mp6_attr_tail; 2656 struct opthdr *optp; 2657 mib2_udpEntry_t ude; 2658 mib2_udp6Entry_t ude6; 2659 mib2_transportMLPEntry_t mlp; 2660 int state; 2661 zoneid_t zoneid; 2662 int i; 2663 connf_t *connfp; 2664 conn_t *connp = Q_TO_CONN(q); 2665 int v4_conn_idx; 2666 int v6_conn_idx; 2667 boolean_t needattr; 2668 udp_t *udp; 2669 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2670 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2671 mblk_t *mp2ctl; 2672 2673 /* 2674 * make a copy of the original message 2675 */ 2676 mp2ctl = copymsg(mpctl); 2677 2678 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2679 if (mpctl == NULL || 2680 (mpdata = mpctl->b_cont) == NULL || 2681 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2682 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2683 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2684 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2685 freemsg(mp_conn_ctl); 2686 freemsg(mp_attr_ctl); 2687 freemsg(mp6_conn_ctl); 2688 freemsg(mpctl); 2689 freemsg(mp2ctl); 2690 return (0); 2691 } 2692 2693 zoneid = connp->conn_zoneid; 2694 2695 /* fixed length structure for IPv4 and IPv6 counters */ 2696 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2697 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2698 /* synchronize 64- and 32-bit counters */ 2699 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2700 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2701 2702 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2703 optp->level = MIB2_UDP; 2704 optp->name = 0; 2705 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2706 sizeof (us->us_udp_mib)); 2707 optp->len = msgdsize(mpdata); 2708 qreply(q, mpctl); 2709 2710 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2711 v4_conn_idx = v6_conn_idx = 0; 2712 2713 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2714 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2715 connp = NULL; 2716 2717 while ((connp = ipcl_get_next_conn(connfp, connp, 2718 IPCL_UDPCONN))) { 2719 udp = connp->conn_udp; 2720 if (zoneid != connp->conn_zoneid) 2721 continue; 2722 2723 /* 2724 * Note that the port numbers are sent in 2725 * host byte order 2726 */ 2727 2728 if (udp->udp_state == TS_UNBND) 2729 state = MIB2_UDP_unbound; 2730 else if (udp->udp_state == TS_IDLE) 2731 state = MIB2_UDP_idle; 2732 else if (udp->udp_state == TS_DATA_XFER) 2733 state = MIB2_UDP_connected; 2734 else 2735 state = MIB2_UDP_unknown; 2736 2737 needattr = B_FALSE; 2738 bzero(&mlp, sizeof (mlp)); 2739 if (connp->conn_mlp_type != mlptSingle) { 2740 if (connp->conn_mlp_type == mlptShared || 2741 connp->conn_mlp_type == mlptBoth) 2742 mlp.tme_flags |= MIB2_TMEF_SHARED; 2743 if (connp->conn_mlp_type == mlptPrivate || 2744 connp->conn_mlp_type == mlptBoth) 2745 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2746 needattr = B_TRUE; 2747 } 2748 if (connp->conn_anon_mlp) { 2749 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2750 needattr = B_TRUE; 2751 } 2752 switch (connp->conn_mac_mode) { 2753 case CONN_MAC_DEFAULT: 2754 break; 2755 case CONN_MAC_AWARE: 2756 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2757 needattr = B_TRUE; 2758 break; 2759 case CONN_MAC_IMPLICIT: 2760 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2761 needattr = B_TRUE; 2762 break; 2763 } 2764 mutex_enter(&connp->conn_lock); 2765 if (udp->udp_state == TS_DATA_XFER && 2766 connp->conn_ixa->ixa_tsl != NULL) { 2767 ts_label_t *tsl; 2768 2769 tsl = connp->conn_ixa->ixa_tsl; 2770 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2771 mlp.tme_doi = label2doi(tsl); 2772 mlp.tme_label = *label2bslabel(tsl); 2773 needattr = B_TRUE; 2774 } 2775 mutex_exit(&connp->conn_lock); 2776 2777 /* 2778 * Create an IPv4 table entry for IPv4 entries and also 2779 * any IPv6 entries which are bound to in6addr_any 2780 * (i.e. anything a IPv4 peer could connect/send to). 2781 */ 2782 if (connp->conn_ipversion == IPV4_VERSION || 2783 (udp->udp_state <= TS_IDLE && 2784 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2785 ude.udpEntryInfo.ue_state = state; 2786 /* 2787 * If in6addr_any this will set it to 2788 * INADDR_ANY 2789 */ 2790 ude.udpLocalAddress = connp->conn_laddr_v4; 2791 ude.udpLocalPort = ntohs(connp->conn_lport); 2792 if (udp->udp_state == TS_DATA_XFER) { 2793 /* 2794 * Can potentially get here for 2795 * v6 socket if another process 2796 * (say, ping) has just done a 2797 * sendto(), changing the state 2798 * from the TS_IDLE above to 2799 * TS_DATA_XFER by the time we hit 2800 * this part of the code. 2801 */ 2802 ude.udpEntryInfo.ue_RemoteAddress = 2803 connp->conn_faddr_v4; 2804 ude.udpEntryInfo.ue_RemotePort = 2805 ntohs(connp->conn_fport); 2806 } else { 2807 ude.udpEntryInfo.ue_RemoteAddress = 0; 2808 ude.udpEntryInfo.ue_RemotePort = 0; 2809 } 2810 2811 /* 2812 * We make the assumption that all udp_t 2813 * structs will be created within an address 2814 * region no larger than 32-bits. 2815 */ 2816 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2817 ude.udpCreationProcess = 2818 (connp->conn_cpid < 0) ? 2819 MIB2_UNKNOWN_PROCESS : 2820 connp->conn_cpid; 2821 ude.udpCreationTime = connp->conn_open_time; 2822 2823 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2824 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2825 mlp.tme_connidx = v4_conn_idx++; 2826 if (needattr) 2827 (void) snmp_append_data2( 2828 mp_attr_ctl->b_cont, &mp_attr_tail, 2829 (char *)&mlp, sizeof (mlp)); 2830 } 2831 if (connp->conn_ipversion == IPV6_VERSION) { 2832 ude6.udp6EntryInfo.ue_state = state; 2833 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2834 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2835 mutex_enter(&connp->conn_lock); 2836 if (connp->conn_ixa->ixa_flags & 2837 IXAF_SCOPEID_SET) { 2838 ude6.udp6IfIndex = 2839 connp->conn_ixa->ixa_scopeid; 2840 } else { 2841 ude6.udp6IfIndex = connp->conn_bound_if; 2842 } 2843 mutex_exit(&connp->conn_lock); 2844 if (udp->udp_state == TS_DATA_XFER) { 2845 ude6.udp6EntryInfo.ue_RemoteAddress = 2846 connp->conn_faddr_v6; 2847 ude6.udp6EntryInfo.ue_RemotePort = 2848 ntohs(connp->conn_fport); 2849 } else { 2850 ude6.udp6EntryInfo.ue_RemoteAddress = 2851 sin6_null.sin6_addr; 2852 ude6.udp6EntryInfo.ue_RemotePort = 0; 2853 } 2854 /* 2855 * We make the assumption that all udp_t 2856 * structs will be created within an address 2857 * region no larger than 32-bits. 2858 */ 2859 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2860 ude6.udp6CreationProcess = 2861 (connp->conn_cpid < 0) ? 2862 MIB2_UNKNOWN_PROCESS : 2863 connp->conn_cpid; 2864 ude6.udp6CreationTime = connp->conn_open_time; 2865 2866 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2867 &mp6_conn_tail, (char *)&ude6, 2868 sizeof (ude6)); 2869 mlp.tme_connidx = v6_conn_idx++; 2870 if (needattr) 2871 (void) snmp_append_data2( 2872 mp6_attr_ctl->b_cont, 2873 &mp6_attr_tail, (char *)&mlp, 2874 sizeof (mlp)); 2875 } 2876 } 2877 } 2878 2879 /* IPv4 UDP endpoints */ 2880 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2881 sizeof (struct T_optmgmt_ack)]; 2882 optp->level = MIB2_UDP; 2883 optp->name = MIB2_UDP_ENTRY; 2884 optp->len = msgdsize(mp_conn_ctl->b_cont); 2885 qreply(q, mp_conn_ctl); 2886 2887 /* table of MLP attributes... */ 2888 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2889 sizeof (struct T_optmgmt_ack)]; 2890 optp->level = MIB2_UDP; 2891 optp->name = EXPER_XPORT_MLP; 2892 optp->len = msgdsize(mp_attr_ctl->b_cont); 2893 if (optp->len == 0) 2894 freemsg(mp_attr_ctl); 2895 else 2896 qreply(q, mp_attr_ctl); 2897 2898 /* IPv6 UDP endpoints */ 2899 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2900 sizeof (struct T_optmgmt_ack)]; 2901 optp->level = MIB2_UDP6; 2902 optp->name = MIB2_UDP6_ENTRY; 2903 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2904 qreply(q, mp6_conn_ctl); 2905 2906 /* table of MLP attributes... */ 2907 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2908 sizeof (struct T_optmgmt_ack)]; 2909 optp->level = MIB2_UDP6; 2910 optp->name = EXPER_XPORT_MLP; 2911 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2912 if (optp->len == 0) 2913 freemsg(mp6_attr_ctl); 2914 else 2915 qreply(q, mp6_attr_ctl); 2916 2917 return (mp2ctl); 2918 } 2919 2920 /* 2921 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2922 * NOTE: Per MIB-II, UDP has no writable data. 2923 * TODO: If this ever actually tries to set anything, it needs to be 2924 * to do the appropriate locking. 2925 */ 2926 /* ARGSUSED */ 2927 int 2928 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2929 uchar_t *ptr, int len) 2930 { 2931 switch (level) { 2932 case MIB2_UDP: 2933 return (0); 2934 default: 2935 return (1); 2936 } 2937 } 2938 2939 /* 2940 * This routine creates a T_UDERROR_IND message and passes it upstream. 2941 * The address and options are copied from the T_UNITDATA_REQ message 2942 * passed in mp. This message is freed. 2943 */ 2944 static void 2945 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2946 { 2947 struct T_unitdata_req *tudr; 2948 mblk_t *mp1; 2949 uchar_t *destaddr; 2950 t_scalar_t destlen; 2951 uchar_t *optaddr; 2952 t_scalar_t optlen; 2953 2954 if ((mp->b_wptr < mp->b_rptr) || 2955 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2956 goto done; 2957 } 2958 tudr = (struct T_unitdata_req *)mp->b_rptr; 2959 destaddr = mp->b_rptr + tudr->DEST_offset; 2960 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2961 destaddr + tudr->DEST_length < mp->b_rptr || 2962 destaddr + tudr->DEST_length > mp->b_wptr) { 2963 goto done; 2964 } 2965 optaddr = mp->b_rptr + tudr->OPT_offset; 2966 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2967 optaddr + tudr->OPT_length < mp->b_rptr || 2968 optaddr + tudr->OPT_length > mp->b_wptr) { 2969 goto done; 2970 } 2971 destlen = tudr->DEST_length; 2972 optlen = tudr->OPT_length; 2973 2974 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2975 (char *)optaddr, optlen, err); 2976 if (mp1 != NULL) 2977 qreply(q, mp1); 2978 2979 done: 2980 freemsg(mp); 2981 } 2982 2983 /* 2984 * This routine removes a port number association from a stream. It 2985 * is called by udp_wput to handle T_UNBIND_REQ messages. 2986 */ 2987 static void 2988 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2989 { 2990 conn_t *connp = Q_TO_CONN(q); 2991 int error; 2992 2993 error = udp_do_unbind(connp); 2994 if (error) { 2995 if (error < 0) 2996 udp_err_ack(q, mp, -error, 0); 2997 else 2998 udp_err_ack(q, mp, TSYSERR, error); 2999 return; 3000 } 3001 3002 mp = mi_tpi_ok_ack_alloc(mp); 3003 ASSERT(mp != NULL); 3004 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 3005 qreply(q, mp); 3006 } 3007 3008 /* 3009 * Don't let port fall into the privileged range. 3010 * Since the extra privileged ports can be arbitrary we also 3011 * ensure that we exclude those from consideration. 3012 * us->us_epriv_ports is not sorted thus we loop over it until 3013 * there are no changes. 3014 */ 3015 static in_port_t 3016 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 3017 { 3018 int i; 3019 in_port_t nextport; 3020 boolean_t restart = B_FALSE; 3021 udp_stack_t *us = udp->udp_us; 3022 3023 if (random && udp_random_anon_port != 0) { 3024 (void) random_get_pseudo_bytes((uint8_t *)&port, 3025 sizeof (in_port_t)); 3026 /* 3027 * Unless changed by a sys admin, the smallest anon port 3028 * is 32768 and the largest anon port is 65535. It is 3029 * very likely (50%) for the random port to be smaller 3030 * than the smallest anon port. When that happens, 3031 * add port % (anon port range) to the smallest anon 3032 * port to get the random port. It should fall into the 3033 * valid anon port range. 3034 */ 3035 if (port < us->us_smallest_anon_port) { 3036 port = us->us_smallest_anon_port + 3037 port % (us->us_largest_anon_port - 3038 us->us_smallest_anon_port); 3039 } 3040 } 3041 3042 retry: 3043 if (port < us->us_smallest_anon_port) 3044 port = us->us_smallest_anon_port; 3045 3046 if (port > us->us_largest_anon_port) { 3047 port = us->us_smallest_anon_port; 3048 if (restart) 3049 return (0); 3050 restart = B_TRUE; 3051 } 3052 3053 if (port < us->us_smallest_nonpriv_port) 3054 port = us->us_smallest_nonpriv_port; 3055 3056 for (i = 0; i < us->us_num_epriv_ports; i++) { 3057 if (port == us->us_epriv_ports[i]) { 3058 port++; 3059 /* 3060 * Make sure that the port is in the 3061 * valid range. 3062 */ 3063 goto retry; 3064 } 3065 } 3066 3067 if (is_system_labeled() && 3068 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 3069 port, IPPROTO_UDP, B_TRUE)) != 0) { 3070 port = nextport; 3071 goto retry; 3072 } 3073 3074 return (port); 3075 } 3076 3077 /* 3078 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 3079 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 3080 * the TPI options, otherwise we take them from msg_control. 3081 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 3082 * Always consumes mp; never consumes tudr_mp. 3083 */ 3084 static int 3085 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 3086 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 3087 { 3088 udp_t *udp = connp->conn_udp; 3089 udp_stack_t *us = udp->udp_us; 3090 int error; 3091 ip_xmit_attr_t *ixa; 3092 ip_pkt_t *ipp; 3093 in6_addr_t v6src; 3094 in6_addr_t v6dst; 3095 in6_addr_t v6nexthop; 3096 in_port_t dstport; 3097 uint32_t flowinfo; 3098 uint_t srcid; 3099 int is_absreq_failure = 0; 3100 conn_opt_arg_t coas, *coa; 3101 3102 ASSERT(tudr_mp != NULL || msg != NULL); 3103 3104 /* 3105 * Get ixa before checking state to handle a disconnect race. 3106 * 3107 * We need an exclusive copy of conn_ixa since the ancillary data 3108 * options might modify it. That copy has no pointers hence we 3109 * need to set them up once we've parsed the ancillary data. 3110 */ 3111 ixa = conn_get_ixa_exclusive(connp); 3112 if (ixa == NULL) { 3113 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3114 freemsg(mp); 3115 return (ENOMEM); 3116 } 3117 ASSERT(cr != NULL); 3118 ixa->ixa_cred = cr; 3119 ixa->ixa_cpid = pid; 3120 if (is_system_labeled()) { 3121 /* We need to restart with a label based on the cred */ 3122 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3123 } 3124 3125 /* In case previous destination was multicast or multirt */ 3126 ip_attr_newdst(ixa); 3127 3128 /* Get a copy of conn_xmit_ipp since the options might change it */ 3129 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 3130 if (ipp == NULL) { 3131 ixa_refrele(ixa); 3132 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3133 freemsg(mp); 3134 return (ENOMEM); 3135 } 3136 mutex_enter(&connp->conn_lock); 3137 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 3138 mutex_exit(&connp->conn_lock); 3139 if (error != 0) { 3140 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3141 freemsg(mp); 3142 goto done; 3143 } 3144 3145 /* 3146 * Parse the options and update ixa and ipp as a result. 3147 * Note that ixa_tsl can be updated if SCM_UCRED. 3148 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 3149 */ 3150 3151 coa = &coas; 3152 coa->coa_connp = connp; 3153 coa->coa_ixa = ixa; 3154 coa->coa_ipp = ipp; 3155 coa->coa_ancillary = B_TRUE; 3156 coa->coa_changed = 0; 3157 3158 if (msg != NULL) { 3159 error = process_auxiliary_options(connp, msg->msg_control, 3160 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 3161 } else { 3162 struct T_unitdata_req *tudr; 3163 3164 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 3165 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 3166 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 3167 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 3168 coa, &is_absreq_failure); 3169 } 3170 if (error != 0) { 3171 /* 3172 * Note: No special action needed in this 3173 * module for "is_absreq_failure" 3174 */ 3175 freemsg(mp); 3176 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3177 goto done; 3178 } 3179 ASSERT(is_absreq_failure == 0); 3180 3181 mutex_enter(&connp->conn_lock); 3182 /* 3183 * If laddr is unspecified then we look at sin6_src_id. 3184 * We will give precedence to a source address set with IPV6_PKTINFO 3185 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3186 * want ip_attr_connect to select a source (since it can fail) when 3187 * IPV6_PKTINFO is specified. 3188 * If this doesn't result in a source address then we get a source 3189 * from ip_attr_connect() below. 3190 */ 3191 v6src = connp->conn_saddr_v6; 3192 if (sin != NULL) { 3193 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3194 dstport = sin->sin_port; 3195 flowinfo = 0; 3196 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3197 ixa->ixa_flags |= IXAF_IS_IPV4; 3198 } else if (sin6 != NULL) { 3199 v6dst = sin6->sin6_addr; 3200 dstport = sin6->sin6_port; 3201 flowinfo = sin6->sin6_flowinfo; 3202 srcid = sin6->__sin6_src_id; 3203 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3204 ixa->ixa_scopeid = sin6->sin6_scope_id; 3205 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3206 } else { 3207 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3208 } 3209 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3210 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3211 connp->conn_netstack); 3212 } 3213 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3214 ixa->ixa_flags |= IXAF_IS_IPV4; 3215 else 3216 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3217 } else { 3218 /* Connected case */ 3219 v6dst = connp->conn_faddr_v6; 3220 dstport = connp->conn_fport; 3221 flowinfo = connp->conn_flowinfo; 3222 } 3223 mutex_exit(&connp->conn_lock); 3224 3225 /* Handle IPV6_PKTINFO setting source address. */ 3226 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 3227 (ipp->ipp_fields & IPPF_ADDR)) { 3228 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3229 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3230 v6src = ipp->ipp_addr; 3231 } else { 3232 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3233 v6src = ipp->ipp_addr; 3234 } 3235 } 3236 3237 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3238 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3239 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3240 3241 switch (error) { 3242 case 0: 3243 break; 3244 case EADDRNOTAVAIL: 3245 /* 3246 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3247 * Don't have the application see that errno 3248 */ 3249 error = ENETUNREACH; 3250 goto failed; 3251 case ENETDOWN: 3252 /* 3253 * Have !ipif_addr_ready address; drop packet silently 3254 * until we can get applications to not send until we 3255 * are ready. 3256 */ 3257 error = 0; 3258 goto failed; 3259 case EHOSTUNREACH: 3260 case ENETUNREACH: 3261 if (ixa->ixa_ire != NULL) { 3262 /* 3263 * Let conn_ip_output/ire_send_noroute return 3264 * the error and send any local ICMP error. 3265 */ 3266 error = 0; 3267 break; 3268 } 3269 /* FALLTHRU */ 3270 default: 3271 failed: 3272 freemsg(mp); 3273 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3274 goto done; 3275 } 3276 3277 /* 3278 * We might be going to a different destination than last time, 3279 * thus check that TX allows the communication and compute any 3280 * needed label. 3281 * 3282 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3283 * don't have to worry about concurrent threads. 3284 */ 3285 if (is_system_labeled()) { 3286 /* Using UDP MLP requires SCM_UCRED from user */ 3287 if (connp->conn_mlp_type != mlptSingle && 3288 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3289 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3290 error = ECONNREFUSED; 3291 freemsg(mp); 3292 goto done; 3293 } 3294 /* 3295 * Check whether Trusted Solaris policy allows communication 3296 * with this host, and pretend that the destination is 3297 * unreachable if not. 3298 * Compute any needed label and place it in ipp_label_v4/v6. 3299 * 3300 * Later conn_build_hdr_template/conn_prepend_hdr takes 3301 * ipp_label_v4/v6 to form the packet. 3302 * 3303 * Tsol note: We have ipp structure local to this thread so 3304 * no locking is needed. 3305 */ 3306 error = conn_update_label(connp, ixa, &v6dst, ipp); 3307 if (error != 0) { 3308 freemsg(mp); 3309 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3310 goto done; 3311 } 3312 } 3313 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3314 flowinfo, mp, &error); 3315 if (mp == NULL) { 3316 ASSERT(error != 0); 3317 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3318 goto done; 3319 } 3320 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3321 error = EMSGSIZE; 3322 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3323 freemsg(mp); 3324 goto done; 3325 } 3326 /* We're done. Pass the packet to ip. */ 3327 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3328 3329 error = conn_ip_output(mp, ixa); 3330 /* No udpOutErrors if an error since IP increases its error counter */ 3331 switch (error) { 3332 case 0: 3333 break; 3334 case EWOULDBLOCK: 3335 (void) ixa_check_drain_insert(connp, ixa); 3336 error = 0; 3337 break; 3338 case EADDRNOTAVAIL: 3339 /* 3340 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3341 * Don't have the application see that errno 3342 */ 3343 error = ENETUNREACH; 3344 /* FALLTHRU */ 3345 default: 3346 mutex_enter(&connp->conn_lock); 3347 /* 3348 * Clear the source and v6lastdst so we call ip_attr_connect 3349 * for the next packet and try to pick a better source. 3350 */ 3351 if (connp->conn_mcbc_bind) 3352 connp->conn_saddr_v6 = ipv6_all_zeros; 3353 else 3354 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3355 connp->conn_v6lastdst = ipv6_all_zeros; 3356 mutex_exit(&connp->conn_lock); 3357 break; 3358 } 3359 done: 3360 ixa_refrele(ixa); 3361 ip_pkt_free(ipp); 3362 kmem_free(ipp, sizeof (*ipp)); 3363 return (error); 3364 } 3365 3366 /* 3367 * Handle sending an M_DATA for a connected socket. 3368 * Handles both IPv4 and IPv6. 3369 */ 3370 static int 3371 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3372 { 3373 udp_t *udp = connp->conn_udp; 3374 udp_stack_t *us = udp->udp_us; 3375 int error; 3376 ip_xmit_attr_t *ixa; 3377 3378 /* 3379 * If no other thread is using conn_ixa this just gets a reference to 3380 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3381 */ 3382 ixa = conn_get_ixa(connp, B_FALSE); 3383 if (ixa == NULL) { 3384 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3385 freemsg(mp); 3386 return (ENOMEM); 3387 } 3388 3389 ASSERT(cr != NULL); 3390 ixa->ixa_cred = cr; 3391 ixa->ixa_cpid = pid; 3392 3393 mutex_enter(&connp->conn_lock); 3394 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3395 connp->conn_fport, connp->conn_flowinfo, &error); 3396 3397 if (mp == NULL) { 3398 ASSERT(error != 0); 3399 mutex_exit(&connp->conn_lock); 3400 ixa_refrele(ixa); 3401 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3402 freemsg(mp); 3403 return (error); 3404 } 3405 3406 /* 3407 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3408 * safe copy, then we need to fill in any pointers in it. 3409 */ 3410 if (ixa->ixa_ire == NULL) { 3411 in6_addr_t faddr, saddr; 3412 in6_addr_t nexthop; 3413 in_port_t fport; 3414 3415 saddr = connp->conn_saddr_v6; 3416 faddr = connp->conn_faddr_v6; 3417 fport = connp->conn_fport; 3418 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3419 mutex_exit(&connp->conn_lock); 3420 3421 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3422 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3423 IPDF_IPSEC); 3424 switch (error) { 3425 case 0: 3426 break; 3427 case EADDRNOTAVAIL: 3428 /* 3429 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3430 * Don't have the application see that errno 3431 */ 3432 error = ENETUNREACH; 3433 goto failed; 3434 case ENETDOWN: 3435 /* 3436 * Have !ipif_addr_ready address; drop packet silently 3437 * until we can get applications to not send until we 3438 * are ready. 3439 */ 3440 error = 0; 3441 goto failed; 3442 case EHOSTUNREACH: 3443 case ENETUNREACH: 3444 if (ixa->ixa_ire != NULL) { 3445 /* 3446 * Let conn_ip_output/ire_send_noroute return 3447 * the error and send any local ICMP error. 3448 */ 3449 error = 0; 3450 break; 3451 } 3452 /* FALLTHRU */ 3453 default: 3454 failed: 3455 ixa_refrele(ixa); 3456 freemsg(mp); 3457 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3458 return (error); 3459 } 3460 } else { 3461 /* Done with conn_t */ 3462 mutex_exit(&connp->conn_lock); 3463 } 3464 ASSERT(ixa->ixa_ire != NULL); 3465 3466 /* We're done. Pass the packet to ip. */ 3467 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3468 3469 error = conn_ip_output(mp, ixa); 3470 /* No udpOutErrors if an error since IP increases its error counter */ 3471 switch (error) { 3472 case 0: 3473 break; 3474 case EWOULDBLOCK: 3475 (void) ixa_check_drain_insert(connp, ixa); 3476 error = 0; 3477 break; 3478 case EADDRNOTAVAIL: 3479 /* 3480 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3481 * Don't have the application see that errno 3482 */ 3483 error = ENETUNREACH; 3484 break; 3485 } 3486 ixa_refrele(ixa); 3487 return (error); 3488 } 3489 3490 /* 3491 * Handle sending an M_DATA to the last destination. 3492 * Handles both IPv4 and IPv6. 3493 * 3494 * NOTE: The caller must hold conn_lock and we drop it here. 3495 */ 3496 static int 3497 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3498 ip_xmit_attr_t *ixa) 3499 { 3500 udp_t *udp = connp->conn_udp; 3501 udp_stack_t *us = udp->udp_us; 3502 int error; 3503 3504 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3505 ASSERT(ixa != NULL); 3506 3507 ASSERT(cr != NULL); 3508 ixa->ixa_cred = cr; 3509 ixa->ixa_cpid = pid; 3510 3511 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3512 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3513 3514 if (mp == NULL) { 3515 ASSERT(error != 0); 3516 mutex_exit(&connp->conn_lock); 3517 ixa_refrele(ixa); 3518 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3519 freemsg(mp); 3520 return (error); 3521 } 3522 3523 /* 3524 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3525 * safe copy, then we need to fill in any pointers in it. 3526 */ 3527 if (ixa->ixa_ire == NULL) { 3528 in6_addr_t lastdst, lastsrc; 3529 in6_addr_t nexthop; 3530 in_port_t lastport; 3531 3532 lastsrc = connp->conn_v6lastsrc; 3533 lastdst = connp->conn_v6lastdst; 3534 lastport = connp->conn_lastdstport; 3535 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3536 mutex_exit(&connp->conn_lock); 3537 3538 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3539 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3540 IPDF_VERIFY_DST | IPDF_IPSEC); 3541 switch (error) { 3542 case 0: 3543 break; 3544 case EADDRNOTAVAIL: 3545 /* 3546 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3547 * Don't have the application see that errno 3548 */ 3549 error = ENETUNREACH; 3550 goto failed; 3551 case ENETDOWN: 3552 /* 3553 * Have !ipif_addr_ready address; drop packet silently 3554 * until we can get applications to not send until we 3555 * are ready. 3556 */ 3557 error = 0; 3558 goto failed; 3559 case EHOSTUNREACH: 3560 case ENETUNREACH: 3561 if (ixa->ixa_ire != NULL) { 3562 /* 3563 * Let conn_ip_output/ire_send_noroute return 3564 * the error and send any local ICMP error. 3565 */ 3566 error = 0; 3567 break; 3568 } 3569 /* FALLTHRU */ 3570 default: 3571 failed: 3572 ixa_refrele(ixa); 3573 freemsg(mp); 3574 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3575 return (error); 3576 } 3577 } else { 3578 /* Done with conn_t */ 3579 mutex_exit(&connp->conn_lock); 3580 } 3581 3582 /* We're done. Pass the packet to ip. */ 3583 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3584 3585 error = conn_ip_output(mp, ixa); 3586 /* No udpOutErrors if an error since IP increases its error counter */ 3587 switch (error) { 3588 case 0: 3589 break; 3590 case EWOULDBLOCK: 3591 (void) ixa_check_drain_insert(connp, ixa); 3592 error = 0; 3593 break; 3594 case EADDRNOTAVAIL: 3595 /* 3596 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3597 * Don't have the application see that errno 3598 */ 3599 error = ENETUNREACH; 3600 /* FALLTHRU */ 3601 default: 3602 mutex_enter(&connp->conn_lock); 3603 /* 3604 * Clear the source and v6lastdst so we call ip_attr_connect 3605 * for the next packet and try to pick a better source. 3606 */ 3607 if (connp->conn_mcbc_bind) 3608 connp->conn_saddr_v6 = ipv6_all_zeros; 3609 else 3610 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3611 connp->conn_v6lastdst = ipv6_all_zeros; 3612 mutex_exit(&connp->conn_lock); 3613 break; 3614 } 3615 ixa_refrele(ixa); 3616 return (error); 3617 } 3618 3619 3620 /* 3621 * Prepend the header template and then fill in the source and 3622 * flowinfo. The caller needs to handle the destination address since 3623 * it's setting is different if rthdr or source route. 3624 * 3625 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3626 * When it returns NULL it sets errorp. 3627 */ 3628 static mblk_t * 3629 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3630 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3631 { 3632 udp_t *udp = connp->conn_udp; 3633 udp_stack_t *us = udp->udp_us; 3634 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3635 uint_t pktlen; 3636 uint_t alloclen; 3637 uint_t copylen; 3638 uint8_t *iph; 3639 uint_t ip_hdr_length; 3640 udpha_t *udpha; 3641 uint32_t cksum; 3642 ip_pkt_t *ipp; 3643 3644 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3645 3646 /* 3647 * Copy the header template and leave space for an SPI 3648 */ 3649 copylen = connp->conn_ht_iphc_len; 3650 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3651 pktlen = alloclen + msgdsize(mp); 3652 if (pktlen > IP_MAXPACKET) { 3653 freemsg(mp); 3654 *errorp = EMSGSIZE; 3655 return (NULL); 3656 } 3657 ixa->ixa_pktlen = pktlen; 3658 3659 /* check/fix buffer config, setup pointers into it */ 3660 iph = mp->b_rptr - alloclen; 3661 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3662 mblk_t *mp1; 3663 3664 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3665 if (mp1 == NULL) { 3666 freemsg(mp); 3667 *errorp = ENOMEM; 3668 return (NULL); 3669 } 3670 mp1->b_wptr = DB_LIM(mp1); 3671 mp1->b_cont = mp; 3672 mp = mp1; 3673 iph = (mp->b_wptr - alloclen); 3674 } 3675 mp->b_rptr = iph; 3676 bcopy(connp->conn_ht_iphc, iph, copylen); 3677 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3678 3679 ixa->ixa_ip_hdr_length = ip_hdr_length; 3680 udpha = (udpha_t *)(iph + ip_hdr_length); 3681 3682 /* 3683 * Setup header length and prepare for ULP checksum done in IP. 3684 * udp_build_hdr_template has already massaged any routing header 3685 * and placed the result in conn_sum. 3686 * 3687 * We make it easy for IP to include our pseudo header 3688 * by putting our length in uha_checksum. 3689 */ 3690 cksum = pktlen - ip_hdr_length; 3691 udpha->uha_length = htons(cksum); 3692 3693 cksum += connp->conn_sum; 3694 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3695 ASSERT(cksum < 0x10000); 3696 3697 ipp = &connp->conn_xmit_ipp; 3698 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3699 ipha_t *ipha = (ipha_t *)iph; 3700 3701 ipha->ipha_length = htons((uint16_t)pktlen); 3702 3703 /* IP does the checksum if uha_checksum is non-zero */ 3704 if (us->us_do_checksum) 3705 udpha->uha_checksum = htons(cksum); 3706 3707 /* if IP_PKTINFO specified an addres it wins over bind() */ 3708 if ((ipp->ipp_fields & IPPF_ADDR) && 3709 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3710 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3711 ipha->ipha_src = ipp->ipp_addr_v4; 3712 } else { 3713 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3714 } 3715 } else { 3716 ip6_t *ip6h = (ip6_t *)iph; 3717 3718 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3719 udpha->uha_checksum = htons(cksum); 3720 3721 /* if IP_PKTINFO specified an addres it wins over bind() */ 3722 if ((ipp->ipp_fields & IPPF_ADDR) && 3723 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3724 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3725 ip6h->ip6_src = ipp->ipp_addr; 3726 } else { 3727 ip6h->ip6_src = *v6src; 3728 } 3729 ip6h->ip6_vcf = 3730 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3731 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3732 if (ipp->ipp_fields & IPPF_TCLASS) { 3733 /* Overrides the class part of flowinfo */ 3734 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3735 ipp->ipp_tclass); 3736 } 3737 } 3738 3739 /* Insert all-0s SPI now. */ 3740 if (insert_spi) 3741 *((uint32_t *)(udpha + 1)) = 0; 3742 3743 udpha->uha_dst_port = dstport; 3744 return (mp); 3745 } 3746 3747 /* 3748 * Send a T_UDERR_IND in response to an M_DATA 3749 */ 3750 static void 3751 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3752 { 3753 struct sockaddr_storage ss; 3754 sin_t *sin; 3755 sin6_t *sin6; 3756 struct sockaddr *addr; 3757 socklen_t addrlen; 3758 mblk_t *mp1; 3759 3760 mutex_enter(&connp->conn_lock); 3761 /* Initialize addr and addrlen as if they're passed in */ 3762 if (connp->conn_family == AF_INET) { 3763 sin = (sin_t *)&ss; 3764 *sin = sin_null; 3765 sin->sin_family = AF_INET; 3766 sin->sin_port = connp->conn_fport; 3767 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3768 addr = (struct sockaddr *)sin; 3769 addrlen = sizeof (*sin); 3770 } else { 3771 sin6 = (sin6_t *)&ss; 3772 *sin6 = sin6_null; 3773 sin6->sin6_family = AF_INET6; 3774 sin6->sin6_port = connp->conn_fport; 3775 sin6->sin6_flowinfo = connp->conn_flowinfo; 3776 sin6->sin6_addr = connp->conn_faddr_v6; 3777 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3778 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3779 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3780 } else { 3781 sin6->sin6_scope_id = 0; 3782 } 3783 sin6->__sin6_src_id = 0; 3784 addr = (struct sockaddr *)sin6; 3785 addrlen = sizeof (*sin6); 3786 } 3787 mutex_exit(&connp->conn_lock); 3788 3789 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3790 if (mp1 != NULL) 3791 putnext(connp->conn_rq, mp1); 3792 } 3793 3794 /* 3795 * This routine handles all messages passed downstream. It either 3796 * consumes the message or passes it downstream; it never queues a 3797 * a message. 3798 * 3799 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3800 * is valid when we are directly beneath the stream head, and thus sockfs 3801 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3802 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3803 * connected endpoints. 3804 */ 3805 void 3806 udp_wput(queue_t *q, mblk_t *mp) 3807 { 3808 sin6_t *sin6; 3809 sin_t *sin = NULL; 3810 uint_t srcid; 3811 conn_t *connp = Q_TO_CONN(q); 3812 udp_t *udp = connp->conn_udp; 3813 int error = 0; 3814 struct sockaddr *addr = NULL; 3815 socklen_t addrlen; 3816 udp_stack_t *us = udp->udp_us; 3817 struct T_unitdata_req *tudr; 3818 mblk_t *data_mp; 3819 ushort_t ipversion; 3820 cred_t *cr; 3821 pid_t pid; 3822 3823 /* 3824 * We directly handle several cases here: T_UNITDATA_REQ message 3825 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3826 * socket. 3827 */ 3828 switch (DB_TYPE(mp)) { 3829 case M_DATA: 3830 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3831 /* Not connected; address is required */ 3832 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3833 UDP_DBGSTAT(us, udp_data_notconn); 3834 UDP_STAT(us, udp_out_err_notconn); 3835 freemsg(mp); 3836 return; 3837 } 3838 /* 3839 * All Solaris components should pass a db_credp 3840 * for this message, hence we ASSERT. 3841 * On production kernels we return an error to be robust against 3842 * random streams modules sitting on top of us. 3843 */ 3844 cr = msg_getcred(mp, &pid); 3845 ASSERT(cr != NULL); 3846 if (cr == NULL) { 3847 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3848 freemsg(mp); 3849 return; 3850 } 3851 ASSERT(udp->udp_issocket); 3852 UDP_DBGSTAT(us, udp_data_conn); 3853 error = udp_output_connected(connp, mp, cr, pid); 3854 if (error != 0) { 3855 UDP_STAT(us, udp_out_err_output); 3856 if (connp->conn_rq != NULL) 3857 udp_ud_err_connected(connp, (t_scalar_t)error); 3858 #ifdef DEBUG 3859 printf("udp_output_connected returned %d\n", error); 3860 #endif 3861 } 3862 return; 3863 3864 case M_PROTO: 3865 case M_PCPROTO: 3866 tudr = (struct T_unitdata_req *)mp->b_rptr; 3867 if (MBLKL(mp) < sizeof (*tudr) || 3868 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3869 udp_wput_other(q, mp); 3870 return; 3871 } 3872 break; 3873 3874 default: 3875 udp_wput_other(q, mp); 3876 return; 3877 } 3878 3879 /* Handle valid T_UNITDATA_REQ here */ 3880 data_mp = mp->b_cont; 3881 if (data_mp == NULL) { 3882 error = EPROTO; 3883 goto ud_error2; 3884 } 3885 mp->b_cont = NULL; 3886 3887 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3888 error = EADDRNOTAVAIL; 3889 goto ud_error2; 3890 } 3891 3892 /* 3893 * All Solaris components should pass a db_credp 3894 * for this TPI message, hence we should ASSERT. 3895 * However, RPC (svc_clts_ksend) does this odd thing where it 3896 * passes the options from a T_UNITDATA_IND unchanged in a 3897 * T_UNITDATA_REQ. While that is the right thing to do for 3898 * some options, SCM_UCRED being the key one, this also makes it 3899 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3900 */ 3901 cr = msg_getcred(mp, &pid); 3902 if (cr == NULL) { 3903 cr = connp->conn_cred; 3904 pid = connp->conn_cpid; 3905 } 3906 3907 /* 3908 * If a port has not been bound to the stream, fail. 3909 * This is not a problem when sockfs is directly 3910 * above us, because it will ensure that the socket 3911 * is first bound before allowing data to be sent. 3912 */ 3913 if (udp->udp_state == TS_UNBND) { 3914 error = EPROTO; 3915 goto ud_error2; 3916 } 3917 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3918 addrlen = tudr->DEST_length; 3919 3920 switch (connp->conn_family) { 3921 case AF_INET6: 3922 sin6 = (sin6_t *)addr; 3923 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3924 (sin6->sin6_family != AF_INET6)) { 3925 error = EADDRNOTAVAIL; 3926 goto ud_error2; 3927 } 3928 3929 srcid = sin6->__sin6_src_id; 3930 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3931 /* 3932 * Destination is a non-IPv4-compatible IPv6 address. 3933 * Send out an IPv6 format packet. 3934 */ 3935 3936 /* 3937 * If the local address is a mapped address return 3938 * an error. 3939 * It would be possible to send an IPv6 packet but the 3940 * response would never make it back to the application 3941 * since it is bound to a mapped address. 3942 */ 3943 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3944 error = EADDRNOTAVAIL; 3945 goto ud_error2; 3946 } 3947 3948 UDP_DBGSTAT(us, udp_out_ipv6); 3949 3950 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3951 sin6->sin6_addr = ipv6_loopback; 3952 ipversion = IPV6_VERSION; 3953 } else { 3954 if (connp->conn_ipv6_v6only) { 3955 error = EADDRNOTAVAIL; 3956 goto ud_error2; 3957 } 3958 3959 /* 3960 * If the local address is not zero or a mapped address 3961 * return an error. It would be possible to send an 3962 * IPv4 packet but the response would never make it 3963 * back to the application since it is bound to a 3964 * non-mapped address. 3965 */ 3966 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3967 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3968 error = EADDRNOTAVAIL; 3969 goto ud_error2; 3970 } 3971 UDP_DBGSTAT(us, udp_out_mapped); 3972 3973 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3974 V4_PART_OF_V6(sin6->sin6_addr) = 3975 htonl(INADDR_LOOPBACK); 3976 } 3977 ipversion = IPV4_VERSION; 3978 } 3979 3980 if (tudr->OPT_length != 0) { 3981 /* 3982 * If we are connected then the destination needs to be 3983 * the same as the connected one. 3984 */ 3985 if (udp->udp_state == TS_DATA_XFER && 3986 !conn_same_as_last_v6(connp, sin6)) { 3987 error = EISCONN; 3988 goto ud_error2; 3989 } 3990 UDP_STAT(us, udp_out_opt); 3991 error = udp_output_ancillary(connp, NULL, sin6, 3992 data_mp, mp, NULL, cr, pid); 3993 } else { 3994 ip_xmit_attr_t *ixa; 3995 3996 /* 3997 * We have to allocate an ip_xmit_attr_t before we grab 3998 * conn_lock and we need to hold conn_lock once we've 3999 * checked conn_same_as_last_v6 to handle concurrent 4000 * send* calls on a socket. 4001 */ 4002 ixa = conn_get_ixa(connp, B_FALSE); 4003 if (ixa == NULL) { 4004 error = ENOMEM; 4005 goto ud_error2; 4006 } 4007 mutex_enter(&connp->conn_lock); 4008 4009 if (conn_same_as_last_v6(connp, sin6) && 4010 connp->conn_lastsrcid == srcid && 4011 ipsec_outbound_policy_current(ixa)) { 4012 UDP_DBGSTAT(us, udp_out_lastdst); 4013 /* udp_output_lastdst drops conn_lock */ 4014 error = udp_output_lastdst(connp, data_mp, cr, 4015 pid, ixa); 4016 } else { 4017 UDP_DBGSTAT(us, udp_out_diffdst); 4018 /* udp_output_newdst drops conn_lock */ 4019 error = udp_output_newdst(connp, data_mp, NULL, 4020 sin6, ipversion, cr, pid, ixa); 4021 } 4022 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4023 } 4024 if (error == 0) { 4025 freeb(mp); 4026 return; 4027 } 4028 break; 4029 4030 case AF_INET: 4031 sin = (sin_t *)addr; 4032 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 4033 (sin->sin_family != AF_INET)) { 4034 error = EADDRNOTAVAIL; 4035 goto ud_error2; 4036 } 4037 UDP_DBGSTAT(us, udp_out_ipv4); 4038 if (sin->sin_addr.s_addr == INADDR_ANY) 4039 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 4040 ipversion = IPV4_VERSION; 4041 4042 srcid = 0; 4043 if (tudr->OPT_length != 0) { 4044 /* 4045 * If we are connected then the destination needs to be 4046 * the same as the connected one. 4047 */ 4048 if (udp->udp_state == TS_DATA_XFER && 4049 !conn_same_as_last_v4(connp, sin)) { 4050 error = EISCONN; 4051 goto ud_error2; 4052 } 4053 UDP_STAT(us, udp_out_opt); 4054 error = udp_output_ancillary(connp, sin, NULL, 4055 data_mp, mp, NULL, cr, pid); 4056 } else { 4057 ip_xmit_attr_t *ixa; 4058 4059 /* 4060 * We have to allocate an ip_xmit_attr_t before we grab 4061 * conn_lock and we need to hold conn_lock once we've 4062 * checked conn_same_as_last_v4 to handle concurrent 4063 * send* calls on a socket. 4064 */ 4065 ixa = conn_get_ixa(connp, B_FALSE); 4066 if (ixa == NULL) { 4067 error = ENOMEM; 4068 goto ud_error2; 4069 } 4070 mutex_enter(&connp->conn_lock); 4071 4072 if (conn_same_as_last_v4(connp, sin) && 4073 ipsec_outbound_policy_current(ixa)) { 4074 UDP_DBGSTAT(us, udp_out_lastdst); 4075 /* udp_output_lastdst drops conn_lock */ 4076 error = udp_output_lastdst(connp, data_mp, cr, 4077 pid, ixa); 4078 } else { 4079 UDP_DBGSTAT(us, udp_out_diffdst); 4080 /* udp_output_newdst drops conn_lock */ 4081 error = udp_output_newdst(connp, data_mp, sin, 4082 NULL, ipversion, cr, pid, ixa); 4083 } 4084 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4085 } 4086 if (error == 0) { 4087 freeb(mp); 4088 return; 4089 } 4090 break; 4091 } 4092 UDP_STAT(us, udp_out_err_output); 4093 ASSERT(mp != NULL); 4094 /* mp is freed by the following routine */ 4095 udp_ud_err(q, mp, (t_scalar_t)error); 4096 return; 4097 4098 ud_error2: 4099 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4100 freemsg(data_mp); 4101 UDP_STAT(us, udp_out_err_output); 4102 ASSERT(mp != NULL); 4103 /* mp is freed by the following routine */ 4104 udp_ud_err(q, mp, (t_scalar_t)error); 4105 } 4106 4107 /* 4108 * Handle the case of the IP address, port, flow label being different 4109 * for both IPv4 and IPv6. 4110 * 4111 * NOTE: The caller must hold conn_lock and we drop it here. 4112 */ 4113 static int 4114 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 4115 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 4116 { 4117 uint_t srcid; 4118 uint32_t flowinfo; 4119 udp_t *udp = connp->conn_udp; 4120 int error = 0; 4121 ip_xmit_attr_t *oldixa; 4122 udp_stack_t *us = udp->udp_us; 4123 in6_addr_t v6src; 4124 in6_addr_t v6dst; 4125 in6_addr_t v6nexthop; 4126 in_port_t dstport; 4127 4128 ASSERT(MUTEX_HELD(&connp->conn_lock)); 4129 ASSERT(ixa != NULL); 4130 /* 4131 * We hold conn_lock across all the use and modifications of 4132 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 4133 * stay consistent. 4134 */ 4135 4136 ASSERT(cr != NULL); 4137 ixa->ixa_cred = cr; 4138 ixa->ixa_cpid = pid; 4139 if (is_system_labeled()) { 4140 /* We need to restart with a label based on the cred */ 4141 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 4142 } 4143 4144 /* 4145 * If we are connected then the destination needs to be the 4146 * same as the connected one, which is not the case here since we 4147 * checked for that above. 4148 */ 4149 if (udp->udp_state == TS_DATA_XFER) { 4150 mutex_exit(&connp->conn_lock); 4151 error = EISCONN; 4152 goto ud_error; 4153 } 4154 4155 /* In case previous destination was multicast or multirt */ 4156 ip_attr_newdst(ixa); 4157 4158 /* 4159 * If laddr is unspecified then we look at sin6_src_id. 4160 * We will give precedence to a source address set with IPV6_PKTINFO 4161 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4162 * want ip_attr_connect to select a source (since it can fail) when 4163 * IPV6_PKTINFO is specified. 4164 * If this doesn't result in a source address then we get a source 4165 * from ip_attr_connect() below. 4166 */ 4167 v6src = connp->conn_saddr_v6; 4168 if (sin != NULL) { 4169 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4170 dstport = sin->sin_port; 4171 flowinfo = 0; 4172 srcid = 0; 4173 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4174 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4175 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4176 connp->conn_netstack); 4177 } 4178 ixa->ixa_flags |= IXAF_IS_IPV4; 4179 } else { 4180 v6dst = sin6->sin6_addr; 4181 dstport = sin6->sin6_port; 4182 flowinfo = sin6->sin6_flowinfo; 4183 srcid = sin6->__sin6_src_id; 4184 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4185 ixa->ixa_scopeid = sin6->sin6_scope_id; 4186 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4187 } else { 4188 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4189 } 4190 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4191 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4192 connp->conn_netstack); 4193 } 4194 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4195 ixa->ixa_flags |= IXAF_IS_IPV4; 4196 else 4197 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4198 } 4199 /* Handle IPV6_PKTINFO setting source address. */ 4200 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 4201 (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { 4202 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4203 4204 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4205 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4206 v6src = ipp->ipp_addr; 4207 } else { 4208 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4209 v6src = ipp->ipp_addr; 4210 } 4211 } 4212 4213 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4214 mutex_exit(&connp->conn_lock); 4215 4216 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4217 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4218 switch (error) { 4219 case 0: 4220 break; 4221 case EADDRNOTAVAIL: 4222 /* 4223 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4224 * Don't have the application see that errno 4225 */ 4226 error = ENETUNREACH; 4227 goto failed; 4228 case ENETDOWN: 4229 /* 4230 * Have !ipif_addr_ready address; drop packet silently 4231 * until we can get applications to not send until we 4232 * are ready. 4233 */ 4234 error = 0; 4235 goto failed; 4236 case EHOSTUNREACH: 4237 case ENETUNREACH: 4238 if (ixa->ixa_ire != NULL) { 4239 /* 4240 * Let conn_ip_output/ire_send_noroute return 4241 * the error and send any local ICMP error. 4242 */ 4243 error = 0; 4244 break; 4245 } 4246 /* FALLTHRU */ 4247 failed: 4248 default: 4249 goto ud_error; 4250 } 4251 4252 4253 /* 4254 * Cluster note: we let the cluster hook know that we are sending to a 4255 * new address and/or port. 4256 */ 4257 if (cl_inet_connect2 != NULL) { 4258 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4259 if (error != 0) { 4260 error = EHOSTUNREACH; 4261 goto ud_error; 4262 } 4263 } 4264 4265 mutex_enter(&connp->conn_lock); 4266 /* 4267 * While we dropped the lock some other thread might have connected 4268 * this socket. If so we bail out with EISCONN to ensure that the 4269 * connecting thread is the one that updates conn_ixa, conn_ht_* 4270 * and conn_*last*. 4271 */ 4272 if (udp->udp_state == TS_DATA_XFER) { 4273 mutex_exit(&connp->conn_lock); 4274 error = EISCONN; 4275 goto ud_error; 4276 } 4277 4278 /* 4279 * We need to rebuild the headers if 4280 * - we are labeling packets (could be different for different 4281 * destinations) 4282 * - we have a source route (or routing header) since we need to 4283 * massage that to get the pseudo-header checksum 4284 * - the IP version is different than the last time 4285 * - a socket option with COA_HEADER_CHANGED has been set which 4286 * set conn_v6lastdst to zero. 4287 * 4288 * Otherwise the prepend function will just update the src, dst, 4289 * dstport, and flow label. 4290 */ 4291 if (is_system_labeled()) { 4292 /* TX MLP requires SCM_UCRED and don't have that here */ 4293 if (connp->conn_mlp_type != mlptSingle) { 4294 mutex_exit(&connp->conn_lock); 4295 error = ECONNREFUSED; 4296 goto ud_error; 4297 } 4298 /* 4299 * Check whether Trusted Solaris policy allows communication 4300 * with this host, and pretend that the destination is 4301 * unreachable if not. 4302 * Compute any needed label and place it in ipp_label_v4/v6. 4303 * 4304 * Later conn_build_hdr_template/conn_prepend_hdr takes 4305 * ipp_label_v4/v6 to form the packet. 4306 * 4307 * Tsol note: Since we hold conn_lock we know no other 4308 * thread manipulates conn_xmit_ipp. 4309 */ 4310 error = conn_update_label(connp, ixa, &v6dst, 4311 &connp->conn_xmit_ipp); 4312 if (error != 0) { 4313 mutex_exit(&connp->conn_lock); 4314 goto ud_error; 4315 } 4316 /* Rebuild the header template */ 4317 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4318 flowinfo); 4319 if (error != 0) { 4320 mutex_exit(&connp->conn_lock); 4321 goto ud_error; 4322 } 4323 } else if ((connp->conn_xmit_ipp.ipp_fields & 4324 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4325 ipversion != connp->conn_lastipversion || 4326 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4327 /* Rebuild the header template */ 4328 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4329 flowinfo); 4330 if (error != 0) { 4331 mutex_exit(&connp->conn_lock); 4332 goto ud_error; 4333 } 4334 } else { 4335 /* Simply update the destination address if no source route */ 4336 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4337 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4338 4339 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4340 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4341 ipha->ipha_fragment_offset_and_flags |= 4342 IPH_DF_HTONS; 4343 } else { 4344 ipha->ipha_fragment_offset_and_flags &= 4345 ~IPH_DF_HTONS; 4346 } 4347 } else { 4348 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4349 ip6h->ip6_dst = v6dst; 4350 } 4351 } 4352 4353 /* 4354 * Remember the dst/dstport etc which corresponds to the built header 4355 * template and conn_ixa. 4356 */ 4357 oldixa = conn_replace_ixa(connp, ixa); 4358 connp->conn_v6lastdst = v6dst; 4359 connp->conn_lastipversion = ipversion; 4360 connp->conn_lastdstport = dstport; 4361 connp->conn_lastflowinfo = flowinfo; 4362 connp->conn_lastscopeid = ixa->ixa_scopeid; 4363 connp->conn_lastsrcid = srcid; 4364 /* Also remember a source to use together with lastdst */ 4365 connp->conn_v6lastsrc = v6src; 4366 4367 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4368 dstport, flowinfo, &error); 4369 4370 /* Done with conn_t */ 4371 mutex_exit(&connp->conn_lock); 4372 ixa_refrele(oldixa); 4373 4374 if (data_mp == NULL) { 4375 ASSERT(error != 0); 4376 goto ud_error; 4377 } 4378 4379 /* We're done. Pass the packet to ip. */ 4380 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4381 4382 error = conn_ip_output(data_mp, ixa); 4383 /* No udpOutErrors if an error since IP increases its error counter */ 4384 switch (error) { 4385 case 0: 4386 break; 4387 case EWOULDBLOCK: 4388 (void) ixa_check_drain_insert(connp, ixa); 4389 error = 0; 4390 break; 4391 case EADDRNOTAVAIL: 4392 /* 4393 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4394 * Don't have the application see that errno 4395 */ 4396 error = ENETUNREACH; 4397 /* FALLTHRU */ 4398 default: 4399 mutex_enter(&connp->conn_lock); 4400 /* 4401 * Clear the source and v6lastdst so we call ip_attr_connect 4402 * for the next packet and try to pick a better source. 4403 */ 4404 if (connp->conn_mcbc_bind) 4405 connp->conn_saddr_v6 = ipv6_all_zeros; 4406 else 4407 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4408 connp->conn_v6lastdst = ipv6_all_zeros; 4409 mutex_exit(&connp->conn_lock); 4410 break; 4411 } 4412 ixa_refrele(ixa); 4413 return (error); 4414 4415 ud_error: 4416 if (ixa != NULL) 4417 ixa_refrele(ixa); 4418 4419 freemsg(data_mp); 4420 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4421 UDP_STAT(us, udp_out_err_output); 4422 return (error); 4423 } 4424 4425 /* ARGSUSED */ 4426 static void 4427 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4428 { 4429 #ifdef DEBUG 4430 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4431 #endif 4432 freemsg(mp); 4433 } 4434 4435 4436 /* 4437 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4438 */ 4439 static void 4440 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4441 { 4442 void *data; 4443 mblk_t *datamp = mp->b_cont; 4444 conn_t *connp = Q_TO_CONN(q); 4445 udp_t *udp = connp->conn_udp; 4446 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4447 4448 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4449 cmdp->cb_error = EPROTO; 4450 qreply(q, mp); 4451 return; 4452 } 4453 data = datamp->b_rptr; 4454 4455 mutex_enter(&connp->conn_lock); 4456 switch (cmdp->cb_cmd) { 4457 case TI_GETPEERNAME: 4458 if (udp->udp_state != TS_DATA_XFER) 4459 cmdp->cb_error = ENOTCONN; 4460 else 4461 cmdp->cb_error = conn_getpeername(connp, data, 4462 &cmdp->cb_len); 4463 break; 4464 case TI_GETMYNAME: 4465 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4466 break; 4467 default: 4468 cmdp->cb_error = EINVAL; 4469 break; 4470 } 4471 mutex_exit(&connp->conn_lock); 4472 4473 qreply(q, mp); 4474 } 4475 4476 static void 4477 udp_use_pure_tpi(udp_t *udp) 4478 { 4479 conn_t *connp = udp->udp_connp; 4480 4481 mutex_enter(&connp->conn_lock); 4482 udp->udp_issocket = B_FALSE; 4483 mutex_exit(&connp->conn_lock); 4484 UDP_STAT(udp->udp_us, udp_sock_fallback); 4485 } 4486 4487 static void 4488 udp_wput_other(queue_t *q, mblk_t *mp) 4489 { 4490 uchar_t *rptr = mp->b_rptr; 4491 struct iocblk *iocp; 4492 conn_t *connp = Q_TO_CONN(q); 4493 udp_t *udp = connp->conn_udp; 4494 udp_stack_t *us = udp->udp_us; 4495 cred_t *cr; 4496 4497 switch (mp->b_datap->db_type) { 4498 case M_CMD: 4499 udp_wput_cmdblk(q, mp); 4500 return; 4501 4502 case M_PROTO: 4503 case M_PCPROTO: 4504 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4505 /* 4506 * If the message does not contain a PRIM_type, 4507 * throw it away. 4508 */ 4509 freemsg(mp); 4510 return; 4511 } 4512 switch (((t_primp_t)rptr)->type) { 4513 case T_ADDR_REQ: 4514 udp_addr_req(q, mp); 4515 return; 4516 case O_T_BIND_REQ: 4517 case T_BIND_REQ: 4518 udp_tpi_bind(q, mp); 4519 return; 4520 case T_CONN_REQ: 4521 udp_tpi_connect(q, mp); 4522 return; 4523 case T_CAPABILITY_REQ: 4524 udp_capability_req(q, mp); 4525 return; 4526 case T_INFO_REQ: 4527 udp_info_req(q, mp); 4528 return; 4529 case T_UNITDATA_REQ: 4530 /* 4531 * If a T_UNITDATA_REQ gets here, the address must 4532 * be bad. Valid T_UNITDATA_REQs are handled 4533 * in udp_wput. 4534 */ 4535 udp_ud_err(q, mp, EADDRNOTAVAIL); 4536 return; 4537 case T_UNBIND_REQ: 4538 udp_tpi_unbind(q, mp); 4539 return; 4540 case T_SVR4_OPTMGMT_REQ: 4541 /* 4542 * All Solaris components should pass a db_credp 4543 * for this TPI message, hence we ASSERT. 4544 * But in case there is some other M_PROTO that looks 4545 * like a TPI message sent by some other kernel 4546 * component, we check and return an error. 4547 */ 4548 cr = msg_getcred(mp, NULL); 4549 ASSERT(cr != NULL); 4550 if (cr == NULL) { 4551 udp_err_ack(q, mp, TSYSERR, EINVAL); 4552 return; 4553 } 4554 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4555 cr)) { 4556 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4557 } 4558 return; 4559 4560 case T_OPTMGMT_REQ: 4561 /* 4562 * All Solaris components should pass a db_credp 4563 * for this TPI message, hence we ASSERT. 4564 * But in case there is some other M_PROTO that looks 4565 * like a TPI message sent by some other kernel 4566 * component, we check and return an error. 4567 */ 4568 cr = msg_getcred(mp, NULL); 4569 ASSERT(cr != NULL); 4570 if (cr == NULL) { 4571 udp_err_ack(q, mp, TSYSERR, EINVAL); 4572 return; 4573 } 4574 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4575 return; 4576 4577 case T_DISCON_REQ: 4578 udp_tpi_disconnect(q, mp); 4579 return; 4580 4581 /* The following TPI message is not supported by udp. */ 4582 case O_T_CONN_RES: 4583 case T_CONN_RES: 4584 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4585 return; 4586 4587 /* The following 3 TPI requests are illegal for udp. */ 4588 case T_DATA_REQ: 4589 case T_EXDATA_REQ: 4590 case T_ORDREL_REQ: 4591 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4592 return; 4593 default: 4594 break; 4595 } 4596 break; 4597 case M_FLUSH: 4598 if (*rptr & FLUSHW) 4599 flushq(q, FLUSHDATA); 4600 break; 4601 case M_IOCTL: 4602 iocp = (struct iocblk *)mp->b_rptr; 4603 switch (iocp->ioc_cmd) { 4604 case TI_GETPEERNAME: 4605 if (udp->udp_state != TS_DATA_XFER) { 4606 /* 4607 * If a default destination address has not 4608 * been associated with the stream, then we 4609 * don't know the peer's name. 4610 */ 4611 iocp->ioc_error = ENOTCONN; 4612 iocp->ioc_count = 0; 4613 mp->b_datap->db_type = M_IOCACK; 4614 qreply(q, mp); 4615 return; 4616 } 4617 /* FALLTHRU */ 4618 case TI_GETMYNAME: 4619 /* 4620 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4621 * need to copyin the user's strbuf structure. 4622 * Processing will continue in the M_IOCDATA case 4623 * below. 4624 */ 4625 mi_copyin(q, mp, NULL, 4626 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4627 return; 4628 case ND_SET: 4629 /* nd_getset performs the necessary checking */ 4630 case ND_GET: 4631 if (nd_getset(q, us->us_nd, mp)) { 4632 qreply(q, mp); 4633 return; 4634 } 4635 break; 4636 case _SIOCSOCKFALLBACK: 4637 /* 4638 * Either sockmod is about to be popped and the 4639 * socket would now be treated as a plain stream, 4640 * or a module is about to be pushed so we have 4641 * to follow pure TPI semantics. 4642 */ 4643 if (!udp->udp_issocket) { 4644 DB_TYPE(mp) = M_IOCNAK; 4645 iocp->ioc_error = EINVAL; 4646 } else { 4647 udp_use_pure_tpi(udp); 4648 4649 DB_TYPE(mp) = M_IOCACK; 4650 iocp->ioc_error = 0; 4651 } 4652 iocp->ioc_count = 0; 4653 iocp->ioc_rval = 0; 4654 qreply(q, mp); 4655 return; 4656 default: 4657 break; 4658 } 4659 break; 4660 case M_IOCDATA: 4661 udp_wput_iocdata(q, mp); 4662 return; 4663 default: 4664 /* Unrecognized messages are passed through without change. */ 4665 break; 4666 } 4667 ip_wput_nondata(q, mp); 4668 } 4669 4670 /* 4671 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4672 * messages. 4673 */ 4674 static void 4675 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4676 { 4677 mblk_t *mp1; 4678 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4679 STRUCT_HANDLE(strbuf, sb); 4680 uint_t addrlen; 4681 conn_t *connp = Q_TO_CONN(q); 4682 udp_t *udp = connp->conn_udp; 4683 4684 /* Make sure it is one of ours. */ 4685 switch (iocp->ioc_cmd) { 4686 case TI_GETMYNAME: 4687 case TI_GETPEERNAME: 4688 break; 4689 default: 4690 ip_wput_nondata(q, mp); 4691 return; 4692 } 4693 4694 switch (mi_copy_state(q, mp, &mp1)) { 4695 case -1: 4696 return; 4697 case MI_COPY_CASE(MI_COPY_IN, 1): 4698 break; 4699 case MI_COPY_CASE(MI_COPY_OUT, 1): 4700 /* 4701 * The address has been copied out, so now 4702 * copyout the strbuf. 4703 */ 4704 mi_copyout(q, mp); 4705 return; 4706 case MI_COPY_CASE(MI_COPY_OUT, 2): 4707 /* 4708 * The address and strbuf have been copied out. 4709 * We're done, so just acknowledge the original 4710 * M_IOCTL. 4711 */ 4712 mi_copy_done(q, mp, 0); 4713 return; 4714 default: 4715 /* 4716 * Something strange has happened, so acknowledge 4717 * the original M_IOCTL with an EPROTO error. 4718 */ 4719 mi_copy_done(q, mp, EPROTO); 4720 return; 4721 } 4722 4723 /* 4724 * Now we have the strbuf structure for TI_GETMYNAME 4725 * and TI_GETPEERNAME. Next we copyout the requested 4726 * address and then we'll copyout the strbuf. 4727 */ 4728 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4729 4730 if (connp->conn_family == AF_INET) 4731 addrlen = sizeof (sin_t); 4732 else 4733 addrlen = sizeof (sin6_t); 4734 4735 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4736 mi_copy_done(q, mp, EINVAL); 4737 return; 4738 } 4739 4740 switch (iocp->ioc_cmd) { 4741 case TI_GETMYNAME: 4742 break; 4743 case TI_GETPEERNAME: 4744 if (udp->udp_state != TS_DATA_XFER) { 4745 mi_copy_done(q, mp, ENOTCONN); 4746 return; 4747 } 4748 break; 4749 } 4750 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4751 if (!mp1) 4752 return; 4753 4754 STRUCT_FSET(sb, len, addrlen); 4755 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4756 case TI_GETMYNAME: 4757 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4758 &addrlen); 4759 break; 4760 case TI_GETPEERNAME: 4761 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4762 &addrlen); 4763 break; 4764 } 4765 mp1->b_wptr += addrlen; 4766 /* Copy out the address */ 4767 mi_copyout(q, mp); 4768 } 4769 4770 void 4771 udp_ddi_g_init(void) 4772 { 4773 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4774 udp_opt_obj.odb_opt_arr_cnt); 4775 4776 /* 4777 * We want to be informed each time a stack is created or 4778 * destroyed in the kernel, so we can maintain the 4779 * set of udp_stack_t's. 4780 */ 4781 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4782 } 4783 4784 void 4785 udp_ddi_g_destroy(void) 4786 { 4787 netstack_unregister(NS_UDP); 4788 } 4789 4790 #define INET_NAME "ip" 4791 4792 /* 4793 * Initialize the UDP stack instance. 4794 */ 4795 static void * 4796 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4797 { 4798 udp_stack_t *us; 4799 udpparam_t *pa; 4800 int i; 4801 int error = 0; 4802 major_t major; 4803 4804 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4805 us->us_netstack = ns; 4806 4807 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4808 us->us_epriv_ports[0] = 2049; 4809 us->us_epriv_ports[1] = 4045; 4810 4811 /* 4812 * The smallest anonymous port in the priviledged port range which UDP 4813 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4814 */ 4815 us->us_min_anonpriv_port = 512; 4816 4817 us->us_bind_fanout_size = udp_bind_fanout_size; 4818 4819 /* Roundup variable that might have been modified in /etc/system */ 4820 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4821 /* Not a power of two. Round up to nearest power of two */ 4822 for (i = 0; i < 31; i++) { 4823 if (us->us_bind_fanout_size < (1 << i)) 4824 break; 4825 } 4826 us->us_bind_fanout_size = 1 << i; 4827 } 4828 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4829 sizeof (udp_fanout_t), KM_SLEEP); 4830 for (i = 0; i < us->us_bind_fanout_size; i++) { 4831 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4832 NULL); 4833 } 4834 4835 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 4836 4837 us->us_param_arr = pa; 4838 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 4839 4840 (void) udp_param_register(&us->us_nd, 4841 us->us_param_arr, A_CNT(udp_param_arr)); 4842 4843 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4844 us->us_mibkp = udp_kstat_init(stackid); 4845 4846 major = mod_name_to_major(INET_NAME); 4847 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4848 ASSERT(error == 0); 4849 return (us); 4850 } 4851 4852 /* 4853 * Free the UDP stack instance. 4854 */ 4855 static void 4856 udp_stack_fini(netstackid_t stackid, void *arg) 4857 { 4858 udp_stack_t *us = (udp_stack_t *)arg; 4859 int i; 4860 4861 for (i = 0; i < us->us_bind_fanout_size; i++) { 4862 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4863 } 4864 4865 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4866 sizeof (udp_fanout_t)); 4867 4868 us->us_bind_fanout = NULL; 4869 4870 nd_free(&us->us_nd); 4871 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 4872 us->us_param_arr = NULL; 4873 4874 udp_kstat_fini(stackid, us->us_mibkp); 4875 us->us_mibkp = NULL; 4876 4877 udp_kstat2_fini(stackid, us->us_kstat); 4878 us->us_kstat = NULL; 4879 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4880 4881 ldi_ident_release(us->us_ldi_ident); 4882 kmem_free(us, sizeof (*us)); 4883 } 4884 4885 static void * 4886 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4887 { 4888 kstat_t *ksp; 4889 4890 udp_stat_t template = { 4891 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4892 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4893 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4894 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4895 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4896 #ifdef DEBUG 4897 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4898 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4899 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4900 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4901 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4902 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4903 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4904 #endif 4905 }; 4906 4907 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4908 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4909 KSTAT_FLAG_VIRTUAL, stackid); 4910 4911 if (ksp == NULL) 4912 return (NULL); 4913 4914 bcopy(&template, us_statisticsp, sizeof (template)); 4915 ksp->ks_data = (void *)us_statisticsp; 4916 ksp->ks_private = (void *)(uintptr_t)stackid; 4917 4918 kstat_install(ksp); 4919 return (ksp); 4920 } 4921 4922 static void 4923 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4924 { 4925 if (ksp != NULL) { 4926 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4927 kstat_delete_netstack(ksp, stackid); 4928 } 4929 } 4930 4931 static void * 4932 udp_kstat_init(netstackid_t stackid) 4933 { 4934 kstat_t *ksp; 4935 4936 udp_named_kstat_t template = { 4937 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4938 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4939 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4940 { "entrySize", KSTAT_DATA_INT32, 0 }, 4941 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4942 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4943 }; 4944 4945 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4946 KSTAT_TYPE_NAMED, 4947 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4948 4949 if (ksp == NULL || ksp->ks_data == NULL) 4950 return (NULL); 4951 4952 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4953 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4954 4955 bcopy(&template, ksp->ks_data, sizeof (template)); 4956 ksp->ks_update = udp_kstat_update; 4957 ksp->ks_private = (void *)(uintptr_t)stackid; 4958 4959 kstat_install(ksp); 4960 return (ksp); 4961 } 4962 4963 static void 4964 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4965 { 4966 if (ksp != NULL) { 4967 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4968 kstat_delete_netstack(ksp, stackid); 4969 } 4970 } 4971 4972 static int 4973 udp_kstat_update(kstat_t *kp, int rw) 4974 { 4975 udp_named_kstat_t *udpkp; 4976 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 4977 netstack_t *ns; 4978 udp_stack_t *us; 4979 4980 if ((kp == NULL) || (kp->ks_data == NULL)) 4981 return (EIO); 4982 4983 if (rw == KSTAT_WRITE) 4984 return (EACCES); 4985 4986 ns = netstack_find_by_stackid(stackid); 4987 if (ns == NULL) 4988 return (-1); 4989 us = ns->netstack_udp; 4990 if (us == NULL) { 4991 netstack_rele(ns); 4992 return (-1); 4993 } 4994 udpkp = (udp_named_kstat_t *)kp->ks_data; 4995 4996 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 4997 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 4998 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 4999 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 5000 netstack_rele(ns); 5001 return (0); 5002 } 5003 5004 static size_t 5005 udp_set_rcv_hiwat(udp_t *udp, size_t size) 5006 { 5007 udp_stack_t *us = udp->udp_us; 5008 5009 /* We add a bit of extra buffering */ 5010 size += size >> 1; 5011 if (size > us->us_max_buf) 5012 size = us->us_max_buf; 5013 5014 udp->udp_rcv_hiwat = size; 5015 return (size); 5016 } 5017 5018 /* 5019 * For the lower queue so that UDP can be a dummy mux. 5020 * Nobody should be sending 5021 * packets up this stream 5022 */ 5023 static void 5024 udp_lrput(queue_t *q, mblk_t *mp) 5025 { 5026 switch (mp->b_datap->db_type) { 5027 case M_FLUSH: 5028 /* Turn around */ 5029 if (*mp->b_rptr & FLUSHW) { 5030 *mp->b_rptr &= ~FLUSHR; 5031 qreply(q, mp); 5032 return; 5033 } 5034 break; 5035 } 5036 freemsg(mp); 5037 } 5038 5039 /* 5040 * For the lower queue so that UDP can be a dummy mux. 5041 * Nobody should be sending packets down this stream. 5042 */ 5043 /* ARGSUSED */ 5044 void 5045 udp_lwput(queue_t *q, mblk_t *mp) 5046 { 5047 freemsg(mp); 5048 } 5049 5050 /* 5051 * Below routines for UDP socket module. 5052 */ 5053 5054 static conn_t * 5055 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp) 5056 { 5057 udp_t *udp; 5058 conn_t *connp; 5059 zoneid_t zoneid; 5060 netstack_t *ns; 5061 udp_stack_t *us; 5062 int len; 5063 5064 ASSERT(errorp != NULL); 5065 5066 if ((*errorp = secpolicy_basic_net_access(credp)) != 0) 5067 return (NULL); 5068 5069 ns = netstack_find_by_cred(credp); 5070 ASSERT(ns != NULL); 5071 us = ns->netstack_udp; 5072 ASSERT(us != NULL); 5073 5074 /* 5075 * For exclusive stacks we set the zoneid to zero 5076 * to make UDP operate as if in the global zone. 5077 */ 5078 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 5079 zoneid = GLOBAL_ZONEID; 5080 else 5081 zoneid = crgetzoneid(credp); 5082 5083 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 5084 5085 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 5086 if (connp == NULL) { 5087 netstack_rele(ns); 5088 *errorp = ENOMEM; 5089 return (NULL); 5090 } 5091 udp = connp->conn_udp; 5092 5093 /* 5094 * ipcl_conn_create did a netstack_hold. Undo the hold that was 5095 * done by netstack_find_by_cred() 5096 */ 5097 netstack_rele(ns); 5098 5099 /* 5100 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5101 * need to lock anything. 5102 */ 5103 ASSERT(connp->conn_proto == IPPROTO_UDP); 5104 ASSERT(connp->conn_udp == udp); 5105 ASSERT(udp->udp_connp == connp); 5106 5107 /* Set the initial state of the stream and the privilege status. */ 5108 udp->udp_state = TS_UNBND; 5109 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 5110 if (isv6) { 5111 connp->conn_family = AF_INET6; 5112 connp->conn_ipversion = IPV6_VERSION; 5113 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5114 connp->conn_default_ttl = us->us_ipv6_hoplimit; 5115 len = sizeof (ip6_t) + UDPH_SIZE; 5116 } else { 5117 connp->conn_family = AF_INET; 5118 connp->conn_ipversion = IPV4_VERSION; 5119 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5120 connp->conn_default_ttl = us->us_ipv4_ttl; 5121 len = sizeof (ipha_t) + UDPH_SIZE; 5122 } 5123 5124 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 5125 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 5126 5127 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 5128 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 5129 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 5130 connp->conn_ixa->ixa_zoneid = zoneid; 5131 5132 connp->conn_zoneid = zoneid; 5133 5134 /* 5135 * If the caller has the process-wide flag set, then default to MAC 5136 * exempt mode. This allows read-down to unlabeled hosts. 5137 */ 5138 if (getpflags(NET_MAC_AWARE, credp) != 0) 5139 connp->conn_mac_mode = CONN_MAC_AWARE; 5140 5141 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 5142 5143 udp->udp_us = us; 5144 5145 connp->conn_rcvbuf = us->us_recv_hiwat; 5146 connp->conn_sndbuf = us->us_xmit_hiwat; 5147 connp->conn_sndlowat = us->us_xmit_lowat; 5148 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 5149 5150 connp->conn_wroff = len + us->us_wroff_extra; 5151 connp->conn_so_type = SOCK_DGRAM; 5152 5153 connp->conn_recv = udp_input; 5154 connp->conn_recvicmp = udp_icmp_input; 5155 crhold(credp); 5156 connp->conn_cred = credp; 5157 connp->conn_cpid = curproc->p_pid; 5158 connp->conn_open_time = ddi_get_lbolt64(); 5159 /* Cache things in ixa without an extra refhold */ 5160 connp->conn_ixa->ixa_cred = connp->conn_cred; 5161 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5162 if (is_system_labeled()) 5163 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5164 5165 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5166 5167 if (us->us_pmtu_discovery) 5168 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5169 5170 return (connp); 5171 } 5172 5173 sock_lower_handle_t 5174 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5175 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5176 { 5177 udp_t *udp = NULL; 5178 udp_stack_t *us; 5179 conn_t *connp; 5180 boolean_t isv6; 5181 5182 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5183 (proto != 0 && proto != IPPROTO_UDP)) { 5184 *errorp = EPROTONOSUPPORT; 5185 return (NULL); 5186 } 5187 5188 if (family == AF_INET6) 5189 isv6 = B_TRUE; 5190 else 5191 isv6 = B_FALSE; 5192 5193 connp = udp_do_open(credp, isv6, flags, errorp); 5194 if (connp == NULL) 5195 return (NULL); 5196 5197 udp = connp->conn_udp; 5198 ASSERT(udp != NULL); 5199 us = udp->udp_us; 5200 ASSERT(us != NULL); 5201 5202 udp->udp_issocket = B_TRUE; 5203 connp->conn_flags |= IPCL_NONSTR; 5204 5205 /* 5206 * Set flow control 5207 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5208 * need to lock anything. 5209 */ 5210 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5211 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5212 5213 connp->conn_flow_cntrld = B_FALSE; 5214 5215 mutex_enter(&connp->conn_lock); 5216 connp->conn_state_flags &= ~CONN_INCIPIENT; 5217 mutex_exit(&connp->conn_lock); 5218 5219 *errorp = 0; 5220 *smodep = SM_ATOMIC; 5221 *sock_downcalls = &sock_udp_downcalls; 5222 return ((sock_lower_handle_t)connp); 5223 } 5224 5225 /* ARGSUSED3 */ 5226 void 5227 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5228 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5229 { 5230 conn_t *connp = (conn_t *)proto_handle; 5231 struct sock_proto_props sopp; 5232 5233 /* All Solaris components should pass a cred for this operation. */ 5234 ASSERT(cr != NULL); 5235 5236 connp->conn_upcalls = sock_upcalls; 5237 connp->conn_upper_handle = sock_handle; 5238 5239 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5240 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5241 sopp.sopp_wroff = connp->conn_wroff; 5242 sopp.sopp_maxblk = INFPSZ; 5243 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5244 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5245 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5246 sopp.sopp_maxpsz = 5247 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5248 UDP_MAXPACKET_IPV6; 5249 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5250 udp_mod_info.mi_minpsz; 5251 5252 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5253 &sopp); 5254 } 5255 5256 static void 5257 udp_do_close(conn_t *connp) 5258 { 5259 udp_t *udp; 5260 5261 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5262 udp = connp->conn_udp; 5263 5264 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5265 /* 5266 * Running in cluster mode - register unbind information 5267 */ 5268 if (connp->conn_ipversion == IPV4_VERSION) { 5269 (*cl_inet_unbind)( 5270 connp->conn_netstack->netstack_stackid, 5271 IPPROTO_UDP, AF_INET, 5272 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5273 (in_port_t)connp->conn_lport, NULL); 5274 } else { 5275 (*cl_inet_unbind)( 5276 connp->conn_netstack->netstack_stackid, 5277 IPPROTO_UDP, AF_INET6, 5278 (uint8_t *)&(connp->conn_laddr_v6), 5279 (in_port_t)connp->conn_lport, NULL); 5280 } 5281 } 5282 5283 udp_bind_hash_remove(udp, B_FALSE); 5284 5285 ip_quiesce_conn(connp); 5286 5287 if (!IPCL_IS_NONSTR(connp)) { 5288 ASSERT(connp->conn_wq != NULL); 5289 ASSERT(connp->conn_rq != NULL); 5290 qprocsoff(connp->conn_rq); 5291 } 5292 5293 udp_close_free(connp); 5294 5295 /* 5296 * Now we are truly single threaded on this stream, and can 5297 * delete the things hanging off the connp, and finally the connp. 5298 * We removed this connp from the fanout list, it cannot be 5299 * accessed thru the fanouts, and we already waited for the 5300 * conn_ref to drop to 0. We are already in close, so 5301 * there cannot be any other thread from the top. qprocsoff 5302 * has completed, and service has completed or won't run in 5303 * future. 5304 */ 5305 ASSERT(connp->conn_ref == 1); 5306 5307 if (!IPCL_IS_NONSTR(connp)) { 5308 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5309 } else { 5310 ip_free_helper_stream(connp); 5311 } 5312 5313 connp->conn_ref--; 5314 ipcl_conn_destroy(connp); 5315 } 5316 5317 /* ARGSUSED1 */ 5318 int 5319 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5320 { 5321 conn_t *connp = (conn_t *)proto_handle; 5322 5323 /* All Solaris components should pass a cred for this operation. */ 5324 ASSERT(cr != NULL); 5325 5326 udp_do_close(connp); 5327 return (0); 5328 } 5329 5330 static int 5331 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5332 boolean_t bind_to_req_port_only) 5333 { 5334 sin_t *sin; 5335 sin6_t *sin6; 5336 udp_t *udp = connp->conn_udp; 5337 int error = 0; 5338 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5339 in_port_t port; /* Host byte order */ 5340 in_port_t requested_port; /* Host byte order */ 5341 int count; 5342 ipaddr_t v4src; /* Set if AF_INET */ 5343 in6_addr_t v6src; 5344 int loopmax; 5345 udp_fanout_t *udpf; 5346 in_port_t lport; /* Network byte order */ 5347 uint_t scopeid = 0; 5348 zoneid_t zoneid = IPCL_ZONEID(connp); 5349 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5350 boolean_t is_inaddr_any; 5351 mlp_type_t addrtype, mlptype; 5352 udp_stack_t *us = udp->udp_us; 5353 5354 switch (len) { 5355 case sizeof (sin_t): /* Complete IPv4 address */ 5356 sin = (sin_t *)sa; 5357 5358 if (sin == NULL || !OK_32PTR((char *)sin)) 5359 return (EINVAL); 5360 5361 if (connp->conn_family != AF_INET || 5362 sin->sin_family != AF_INET) { 5363 return (EAFNOSUPPORT); 5364 } 5365 v4src = sin->sin_addr.s_addr; 5366 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5367 if (v4src != INADDR_ANY) { 5368 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5369 B_TRUE); 5370 } 5371 port = ntohs(sin->sin_port); 5372 break; 5373 5374 case sizeof (sin6_t): /* complete IPv6 address */ 5375 sin6 = (sin6_t *)sa; 5376 5377 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5378 return (EINVAL); 5379 5380 if (connp->conn_family != AF_INET6 || 5381 sin6->sin6_family != AF_INET6) { 5382 return (EAFNOSUPPORT); 5383 } 5384 v6src = sin6->sin6_addr; 5385 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5386 if (connp->conn_ipv6_v6only) 5387 return (EADDRNOTAVAIL); 5388 5389 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5390 if (v4src != INADDR_ANY) { 5391 laddr_type = ip_laddr_verify_v4(v4src, 5392 zoneid, ipst, B_FALSE); 5393 } 5394 } else { 5395 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5396 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5397 scopeid = sin6->sin6_scope_id; 5398 laddr_type = ip_laddr_verify_v6(&v6src, 5399 zoneid, ipst, B_TRUE, scopeid); 5400 } 5401 } 5402 port = ntohs(sin6->sin6_port); 5403 break; 5404 5405 default: /* Invalid request */ 5406 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5407 "udp_bind: bad ADDR_length length %u", len); 5408 return (-TBADADDR); 5409 } 5410 5411 /* Is the local address a valid unicast, multicast, or broadcast? */ 5412 if (laddr_type == IPVL_BAD) 5413 return (EADDRNOTAVAIL); 5414 5415 requested_port = port; 5416 5417 if (requested_port == 0 || !bind_to_req_port_only) 5418 bind_to_req_port_only = B_FALSE; 5419 else /* T_BIND_REQ and requested_port != 0 */ 5420 bind_to_req_port_only = B_TRUE; 5421 5422 if (requested_port == 0) { 5423 /* 5424 * If the application passed in zero for the port number, it 5425 * doesn't care which port number we bind to. Get one in the 5426 * valid range. 5427 */ 5428 if (connp->conn_anon_priv_bind) { 5429 port = udp_get_next_priv_port(udp); 5430 } else { 5431 port = udp_update_next_port(udp, 5432 us->us_next_port_to_try, B_TRUE); 5433 } 5434 } else { 5435 /* 5436 * If the port is in the well-known privileged range, 5437 * make sure the caller was privileged. 5438 */ 5439 int i; 5440 boolean_t priv = B_FALSE; 5441 5442 if (port < us->us_smallest_nonpriv_port) { 5443 priv = B_TRUE; 5444 } else { 5445 for (i = 0; i < us->us_num_epriv_ports; i++) { 5446 if (port == us->us_epriv_ports[i]) { 5447 priv = B_TRUE; 5448 break; 5449 } 5450 } 5451 } 5452 5453 if (priv) { 5454 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5455 return (-TACCES); 5456 } 5457 } 5458 5459 if (port == 0) 5460 return (-TNOADDR); 5461 5462 /* 5463 * The state must be TS_UNBND. TPI mandates that users must send 5464 * TPI primitives only 1 at a time and wait for the response before 5465 * sending the next primitive. 5466 */ 5467 mutex_enter(&connp->conn_lock); 5468 if (udp->udp_state != TS_UNBND) { 5469 mutex_exit(&connp->conn_lock); 5470 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5471 "udp_bind: bad state, %u", udp->udp_state); 5472 return (-TOUTSTATE); 5473 } 5474 /* 5475 * Copy the source address into our udp structure. This address 5476 * may still be zero; if so, IP will fill in the correct address 5477 * each time an outbound packet is passed to it. Since the udp is 5478 * not yet in the bind hash list, we don't grab the uf_lock to 5479 * change conn_ipversion 5480 */ 5481 if (connp->conn_family == AF_INET) { 5482 ASSERT(sin != NULL); 5483 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5484 } else { 5485 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5486 /* 5487 * no need to hold the uf_lock to set the conn_ipversion 5488 * since we are not yet in the fanout list 5489 */ 5490 connp->conn_ipversion = IPV4_VERSION; 5491 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5492 } else { 5493 connp->conn_ipversion = IPV6_VERSION; 5494 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5495 } 5496 } 5497 5498 /* 5499 * If conn_reuseaddr is not set, then we have to make sure that 5500 * the IP address and port number the application requested 5501 * (or we selected for the application) is not being used by 5502 * another stream. If another stream is already using the 5503 * requested IP address and port, the behavior depends on 5504 * "bind_to_req_port_only". If set the bind fails; otherwise we 5505 * search for any an unused port to bind to the stream. 5506 * 5507 * As per the BSD semantics, as modified by the Deering multicast 5508 * changes, if udp_reuseaddr is set, then we allow multiple binds 5509 * to the same port independent of the local IP address. 5510 * 5511 * This is slightly different than in SunOS 4.X which did not 5512 * support IP multicast. Note that the change implemented by the 5513 * Deering multicast code effects all binds - not only binding 5514 * to IP multicast addresses. 5515 * 5516 * Note that when binding to port zero we ignore SO_REUSEADDR in 5517 * order to guarantee a unique port. 5518 */ 5519 5520 count = 0; 5521 if (connp->conn_anon_priv_bind) { 5522 /* 5523 * loopmax = (IPPORT_RESERVED-1) - 5524 * us->us_min_anonpriv_port + 1 5525 */ 5526 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5527 } else { 5528 loopmax = us->us_largest_anon_port - 5529 us->us_smallest_anon_port + 1; 5530 } 5531 5532 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5533 5534 for (;;) { 5535 udp_t *udp1; 5536 boolean_t found_exclbind = B_FALSE; 5537 conn_t *connp1; 5538 5539 /* 5540 * Walk through the list of udp streams bound to 5541 * requested port with the same IP address. 5542 */ 5543 lport = htons(port); 5544 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5545 us->us_bind_fanout_size)]; 5546 mutex_enter(&udpf->uf_lock); 5547 for (udp1 = udpf->uf_udp; udp1 != NULL; 5548 udp1 = udp1->udp_bind_hash) { 5549 connp1 = udp1->udp_connp; 5550 5551 if (lport != connp1->conn_lport) 5552 continue; 5553 5554 /* 5555 * On a labeled system, we must treat bindings to ports 5556 * on shared IP addresses by sockets with MAC exemption 5557 * privilege as being in all zones, as there's 5558 * otherwise no way to identify the right receiver. 5559 */ 5560 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5561 continue; 5562 5563 /* 5564 * If UDP_EXCLBIND is set for either the bound or 5565 * binding endpoint, the semantics of bind 5566 * is changed according to the following chart. 5567 * 5568 * spec = specified address (v4 or v6) 5569 * unspec = unspecified address (v4 or v6) 5570 * A = specified addresses are different for endpoints 5571 * 5572 * bound bind to allowed? 5573 * ------------------------------------- 5574 * unspec unspec no 5575 * unspec spec no 5576 * spec unspec no 5577 * spec spec yes if A 5578 * 5579 * For labeled systems, SO_MAC_EXEMPT behaves the same 5580 * as UDP_EXCLBIND, except that zoneid is ignored. 5581 */ 5582 if (connp1->conn_exclbind || connp->conn_exclbind || 5583 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5584 if (V6_OR_V4_INADDR_ANY( 5585 connp1->conn_bound_addr_v6) || 5586 is_inaddr_any || 5587 IN6_ARE_ADDR_EQUAL( 5588 &connp1->conn_bound_addr_v6, 5589 &v6src)) { 5590 found_exclbind = B_TRUE; 5591 break; 5592 } 5593 continue; 5594 } 5595 5596 /* 5597 * Check ipversion to allow IPv4 and IPv6 sockets to 5598 * have disjoint port number spaces. 5599 */ 5600 if (connp->conn_ipversion != connp1->conn_ipversion) { 5601 5602 /* 5603 * On the first time through the loop, if the 5604 * the user intentionally specified a 5605 * particular port number, then ignore any 5606 * bindings of the other protocol that may 5607 * conflict. This allows the user to bind IPv6 5608 * alone and get both v4 and v6, or bind both 5609 * both and get each seperately. On subsequent 5610 * times through the loop, we're checking a 5611 * port that we chose (not the user) and thus 5612 * we do not allow casual duplicate bindings. 5613 */ 5614 if (count == 0 && requested_port != 0) 5615 continue; 5616 } 5617 5618 /* 5619 * No difference depending on SO_REUSEADDR. 5620 * 5621 * If existing port is bound to a 5622 * non-wildcard IP address and 5623 * the requesting stream is bound to 5624 * a distinct different IP addresses 5625 * (non-wildcard, also), keep going. 5626 */ 5627 if (!is_inaddr_any && 5628 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5629 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5630 &v6src)) { 5631 continue; 5632 } 5633 break; 5634 } 5635 5636 if (!found_exclbind && 5637 (connp->conn_reuseaddr && requested_port != 0)) { 5638 break; 5639 } 5640 5641 if (udp1 == NULL) { 5642 /* 5643 * No other stream has this IP address 5644 * and port number. We can use it. 5645 */ 5646 break; 5647 } 5648 mutex_exit(&udpf->uf_lock); 5649 if (bind_to_req_port_only) { 5650 /* 5651 * We get here only when requested port 5652 * is bound (and only first of the for() 5653 * loop iteration). 5654 * 5655 * The semantics of this bind request 5656 * require it to fail so we return from 5657 * the routine (and exit the loop). 5658 * 5659 */ 5660 mutex_exit(&connp->conn_lock); 5661 return (-TADDRBUSY); 5662 } 5663 5664 if (connp->conn_anon_priv_bind) { 5665 port = udp_get_next_priv_port(udp); 5666 } else { 5667 if ((count == 0) && (requested_port != 0)) { 5668 /* 5669 * If the application wants us to find 5670 * a port, get one to start with. Set 5671 * requested_port to 0, so that we will 5672 * update us->us_next_port_to_try below. 5673 */ 5674 port = udp_update_next_port(udp, 5675 us->us_next_port_to_try, B_TRUE); 5676 requested_port = 0; 5677 } else { 5678 port = udp_update_next_port(udp, port + 1, 5679 B_FALSE); 5680 } 5681 } 5682 5683 if (port == 0 || ++count >= loopmax) { 5684 /* 5685 * We've tried every possible port number and 5686 * there are none available, so send an error 5687 * to the user. 5688 */ 5689 mutex_exit(&connp->conn_lock); 5690 return (-TNOADDR); 5691 } 5692 } 5693 5694 /* 5695 * Copy the source address into our udp structure. This address 5696 * may still be zero; if so, ip_attr_connect will fill in the correct 5697 * address when a packet is about to be sent. 5698 * If we are binding to a broadcast or multicast address then 5699 * we just set the conn_bound_addr since we don't want to use 5700 * that as the source address when sending. 5701 */ 5702 connp->conn_bound_addr_v6 = v6src; 5703 connp->conn_laddr_v6 = v6src; 5704 if (scopeid != 0) { 5705 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5706 connp->conn_ixa->ixa_scopeid = scopeid; 5707 connp->conn_incoming_ifindex = scopeid; 5708 } else { 5709 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5710 connp->conn_incoming_ifindex = connp->conn_bound_if; 5711 } 5712 5713 switch (laddr_type) { 5714 case IPVL_UNICAST_UP: 5715 case IPVL_UNICAST_DOWN: 5716 connp->conn_saddr_v6 = v6src; 5717 connp->conn_mcbc_bind = B_FALSE; 5718 break; 5719 case IPVL_MCAST: 5720 case IPVL_BCAST: 5721 /* ip_set_destination will pick a source address later */ 5722 connp->conn_saddr_v6 = ipv6_all_zeros; 5723 connp->conn_mcbc_bind = B_TRUE; 5724 break; 5725 } 5726 5727 /* Any errors after this point should use late_error */ 5728 connp->conn_lport = lport; 5729 5730 /* 5731 * Now reset the next anonymous port if the application requested 5732 * an anonymous port, or we handed out the next anonymous port. 5733 */ 5734 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5735 us->us_next_port_to_try = port + 1; 5736 } 5737 5738 /* Initialize the T_BIND_ACK. */ 5739 if (connp->conn_family == AF_INET) { 5740 sin->sin_port = connp->conn_lport; 5741 } else { 5742 sin6->sin6_port = connp->conn_lport; 5743 } 5744 udp->udp_state = TS_IDLE; 5745 udp_bind_hash_insert(udpf, udp); 5746 mutex_exit(&udpf->uf_lock); 5747 mutex_exit(&connp->conn_lock); 5748 5749 if (cl_inet_bind) { 5750 /* 5751 * Running in cluster mode - register bind information 5752 */ 5753 if (connp->conn_ipversion == IPV4_VERSION) { 5754 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5755 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5756 (in_port_t)connp->conn_lport, NULL); 5757 } else { 5758 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5759 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5760 (in_port_t)connp->conn_lport, NULL); 5761 } 5762 } 5763 5764 mutex_enter(&connp->conn_lock); 5765 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5766 if (is_system_labeled() && (!connp->conn_anon_port || 5767 connp->conn_anon_mlp)) { 5768 uint16_t mlpport; 5769 zone_t *zone; 5770 5771 zone = crgetzone(cr); 5772 connp->conn_mlp_type = 5773 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5774 mlptSingle; 5775 addrtype = tsol_mlp_addr_type( 5776 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5777 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5778 if (addrtype == mlptSingle) { 5779 error = -TNOADDR; 5780 mutex_exit(&connp->conn_lock); 5781 goto late_error; 5782 } 5783 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5784 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5785 addrtype); 5786 5787 /* 5788 * It is a coding error to attempt to bind an MLP port 5789 * without first setting SOL_SOCKET/SCM_UCRED. 5790 */ 5791 if (mlptype != mlptSingle && 5792 connp->conn_mlp_type == mlptSingle) { 5793 error = EINVAL; 5794 mutex_exit(&connp->conn_lock); 5795 goto late_error; 5796 } 5797 5798 /* 5799 * It is an access violation to attempt to bind an MLP port 5800 * without NET_BINDMLP privilege. 5801 */ 5802 if (mlptype != mlptSingle && 5803 secpolicy_net_bindmlp(cr) != 0) { 5804 if (connp->conn_debug) { 5805 (void) strlog(UDP_MOD_ID, 0, 1, 5806 SL_ERROR|SL_TRACE, 5807 "udp_bind: no priv for multilevel port %d", 5808 mlpport); 5809 } 5810 error = -TACCES; 5811 mutex_exit(&connp->conn_lock); 5812 goto late_error; 5813 } 5814 5815 /* 5816 * If we're specifically binding a shared IP address and the 5817 * port is MLP on shared addresses, then check to see if this 5818 * zone actually owns the MLP. Reject if not. 5819 */ 5820 if (mlptype == mlptShared && addrtype == mlptShared) { 5821 /* 5822 * No need to handle exclusive-stack zones since 5823 * ALL_ZONES only applies to the shared stack. 5824 */ 5825 zoneid_t mlpzone; 5826 5827 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5828 htons(mlpport)); 5829 if (connp->conn_zoneid != mlpzone) { 5830 if (connp->conn_debug) { 5831 (void) strlog(UDP_MOD_ID, 0, 1, 5832 SL_ERROR|SL_TRACE, 5833 "udp_bind: attempt to bind port " 5834 "%d on shared addr in zone %d " 5835 "(should be %d)", 5836 mlpport, connp->conn_zoneid, 5837 mlpzone); 5838 } 5839 error = -TACCES; 5840 mutex_exit(&connp->conn_lock); 5841 goto late_error; 5842 } 5843 } 5844 if (connp->conn_anon_port) { 5845 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5846 port, B_TRUE); 5847 if (error != 0) { 5848 if (connp->conn_debug) { 5849 (void) strlog(UDP_MOD_ID, 0, 1, 5850 SL_ERROR|SL_TRACE, 5851 "udp_bind: cannot establish anon " 5852 "MLP for port %d", port); 5853 } 5854 error = -TACCES; 5855 mutex_exit(&connp->conn_lock); 5856 goto late_error; 5857 } 5858 } 5859 connp->conn_mlp_type = mlptype; 5860 } 5861 5862 /* 5863 * We create an initial header template here to make a subsequent 5864 * sendto have a starting point. Since conn_last_dst is zero the 5865 * first sendto will always follow the 'dst changed' code path. 5866 * Note that we defer massaging options and the related checksum 5867 * adjustment until we have a destination address. 5868 */ 5869 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5870 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5871 if (error != 0) { 5872 mutex_exit(&connp->conn_lock); 5873 goto late_error; 5874 } 5875 /* Just in case */ 5876 connp->conn_faddr_v6 = ipv6_all_zeros; 5877 connp->conn_fport = 0; 5878 connp->conn_v6lastdst = ipv6_all_zeros; 5879 mutex_exit(&connp->conn_lock); 5880 5881 error = ip_laddr_fanout_insert(connp); 5882 if (error != 0) 5883 goto late_error; 5884 5885 /* Bind succeeded */ 5886 return (0); 5887 5888 late_error: 5889 /* We had already picked the port number, and then the bind failed */ 5890 mutex_enter(&connp->conn_lock); 5891 udpf = &us->us_bind_fanout[ 5892 UDP_BIND_HASH(connp->conn_lport, 5893 us->us_bind_fanout_size)]; 5894 mutex_enter(&udpf->uf_lock); 5895 connp->conn_saddr_v6 = ipv6_all_zeros; 5896 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5897 connp->conn_laddr_v6 = ipv6_all_zeros; 5898 if (scopeid != 0) { 5899 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5900 connp->conn_incoming_ifindex = connp->conn_bound_if; 5901 } 5902 udp->udp_state = TS_UNBND; 5903 udp_bind_hash_remove(udp, B_TRUE); 5904 connp->conn_lport = 0; 5905 mutex_exit(&udpf->uf_lock); 5906 connp->conn_anon_port = B_FALSE; 5907 connp->conn_mlp_type = mlptSingle; 5908 5909 connp->conn_v6lastdst = ipv6_all_zeros; 5910 5911 /* Restore the header that was built above - different source address */ 5912 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5913 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5914 mutex_exit(&connp->conn_lock); 5915 return (error); 5916 } 5917 5918 int 5919 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5920 socklen_t len, cred_t *cr) 5921 { 5922 int error; 5923 conn_t *connp; 5924 5925 /* All Solaris components should pass a cred for this operation. */ 5926 ASSERT(cr != NULL); 5927 5928 connp = (conn_t *)proto_handle; 5929 5930 if (sa == NULL) 5931 error = udp_do_unbind(connp); 5932 else 5933 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5934 5935 if (error < 0) { 5936 if (error == -TOUTSTATE) 5937 error = EINVAL; 5938 else 5939 error = proto_tlitosyserr(-error); 5940 } 5941 5942 return (error); 5943 } 5944 5945 static int 5946 udp_implicit_bind(conn_t *connp, cred_t *cr) 5947 { 5948 sin6_t sin6addr; 5949 sin_t *sin; 5950 sin6_t *sin6; 5951 socklen_t len; 5952 int error; 5953 5954 /* All Solaris components should pass a cred for this operation. */ 5955 ASSERT(cr != NULL); 5956 5957 if (connp->conn_family == AF_INET) { 5958 len = sizeof (struct sockaddr_in); 5959 sin = (sin_t *)&sin6addr; 5960 *sin = sin_null; 5961 sin->sin_family = AF_INET; 5962 sin->sin_addr.s_addr = INADDR_ANY; 5963 } else { 5964 ASSERT(connp->conn_family == AF_INET6); 5965 len = sizeof (sin6_t); 5966 sin6 = (sin6_t *)&sin6addr; 5967 *sin6 = sin6_null; 5968 sin6->sin6_family = AF_INET6; 5969 V6_SET_ZERO(sin6->sin6_addr); 5970 } 5971 5972 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5973 cr, B_FALSE); 5974 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5975 } 5976 5977 /* 5978 * This routine removes a port number association from a stream. It 5979 * is called by udp_unbind and udp_tpi_unbind. 5980 */ 5981 static int 5982 udp_do_unbind(conn_t *connp) 5983 { 5984 udp_t *udp = connp->conn_udp; 5985 udp_fanout_t *udpf; 5986 udp_stack_t *us = udp->udp_us; 5987 5988 if (cl_inet_unbind != NULL) { 5989 /* 5990 * Running in cluster mode - register unbind information 5991 */ 5992 if (connp->conn_ipversion == IPV4_VERSION) { 5993 (*cl_inet_unbind)( 5994 connp->conn_netstack->netstack_stackid, 5995 IPPROTO_UDP, AF_INET, 5996 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5997 (in_port_t)connp->conn_lport, NULL); 5998 } else { 5999 (*cl_inet_unbind)( 6000 connp->conn_netstack->netstack_stackid, 6001 IPPROTO_UDP, AF_INET6, 6002 (uint8_t *)&(connp->conn_laddr_v6), 6003 (in_port_t)connp->conn_lport, NULL); 6004 } 6005 } 6006 6007 mutex_enter(&connp->conn_lock); 6008 /* If a bind has not been done, we can't unbind. */ 6009 if (udp->udp_state == TS_UNBND) { 6010 mutex_exit(&connp->conn_lock); 6011 return (-TOUTSTATE); 6012 } 6013 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6014 us->us_bind_fanout_size)]; 6015 mutex_enter(&udpf->uf_lock); 6016 udp_bind_hash_remove(udp, B_TRUE); 6017 connp->conn_saddr_v6 = ipv6_all_zeros; 6018 connp->conn_bound_addr_v6 = ipv6_all_zeros; 6019 connp->conn_laddr_v6 = ipv6_all_zeros; 6020 connp->conn_mcbc_bind = B_FALSE; 6021 connp->conn_lport = 0; 6022 /* In case we were also connected */ 6023 connp->conn_faddr_v6 = ipv6_all_zeros; 6024 connp->conn_fport = 0; 6025 mutex_exit(&udpf->uf_lock); 6026 6027 connp->conn_v6lastdst = ipv6_all_zeros; 6028 udp->udp_state = TS_UNBND; 6029 6030 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6031 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6032 mutex_exit(&connp->conn_lock); 6033 6034 ip_unbind(connp); 6035 6036 return (0); 6037 } 6038 6039 /* 6040 * It associates a default destination address with the stream. 6041 */ 6042 static int 6043 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 6044 cred_t *cr, pid_t pid) 6045 { 6046 sin6_t *sin6; 6047 sin_t *sin; 6048 in6_addr_t v6dst; 6049 ipaddr_t v4dst; 6050 uint16_t dstport; 6051 uint32_t flowinfo; 6052 udp_fanout_t *udpf; 6053 udp_t *udp, *udp1; 6054 ushort_t ipversion; 6055 udp_stack_t *us; 6056 int error; 6057 conn_t *connp1; 6058 ip_xmit_attr_t *ixa; 6059 uint_t scopeid = 0; 6060 uint_t srcid = 0; 6061 in6_addr_t v6src = connp->conn_saddr_v6; 6062 6063 udp = connp->conn_udp; 6064 us = udp->udp_us; 6065 6066 /* 6067 * Address has been verified by the caller 6068 */ 6069 switch (len) { 6070 default: 6071 /* 6072 * Should never happen 6073 */ 6074 return (EINVAL); 6075 6076 case sizeof (sin_t): 6077 sin = (sin_t *)sa; 6078 v4dst = sin->sin_addr.s_addr; 6079 dstport = sin->sin_port; 6080 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6081 ASSERT(connp->conn_ipversion == IPV4_VERSION); 6082 ipversion = IPV4_VERSION; 6083 break; 6084 6085 case sizeof (sin6_t): 6086 sin6 = (sin6_t *)sa; 6087 v6dst = sin6->sin6_addr; 6088 dstport = sin6->sin6_port; 6089 srcid = sin6->__sin6_src_id; 6090 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 6091 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 6092 connp->conn_netstack); 6093 } 6094 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 6095 if (connp->conn_ipv6_v6only) 6096 return (EADDRNOTAVAIL); 6097 6098 /* 6099 * Destination adress is mapped IPv6 address. 6100 * Source bound address should be unspecified or 6101 * IPv6 mapped address as well. 6102 */ 6103 if (!IN6_IS_ADDR_UNSPECIFIED( 6104 &connp->conn_bound_addr_v6) && 6105 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 6106 return (EADDRNOTAVAIL); 6107 } 6108 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 6109 ipversion = IPV4_VERSION; 6110 flowinfo = 0; 6111 } else { 6112 ipversion = IPV6_VERSION; 6113 flowinfo = sin6->sin6_flowinfo; 6114 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 6115 scopeid = sin6->sin6_scope_id; 6116 } 6117 break; 6118 } 6119 6120 if (dstport == 0) 6121 return (-TBADADDR); 6122 6123 /* 6124 * If there is a different thread using conn_ixa then we get a new 6125 * copy and cut the old one loose from conn_ixa. Otherwise we use 6126 * conn_ixa and prevent any other thread from using/changing it. 6127 * Once connect() is done other threads can use conn_ixa since the 6128 * refcnt will be back at one. 6129 */ 6130 ixa = conn_get_ixa(connp, B_TRUE); 6131 if (ixa == NULL) 6132 return (ENOMEM); 6133 6134 ASSERT(ixa->ixa_refcnt >= 2); 6135 ASSERT(ixa == connp->conn_ixa); 6136 6137 mutex_enter(&connp->conn_lock); 6138 /* 6139 * This udp_t must have bound to a port already before doing a connect. 6140 * Reject if a connect is in progress (we drop conn_lock during 6141 * udp_do_connect). 6142 */ 6143 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 6144 mutex_exit(&connp->conn_lock); 6145 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 6146 "udp_connect: bad state, %u", udp->udp_state); 6147 ixa_refrele(ixa); 6148 return (-TOUTSTATE); 6149 } 6150 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6151 6152 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6153 us->us_bind_fanout_size)]; 6154 6155 mutex_enter(&udpf->uf_lock); 6156 if (udp->udp_state == TS_DATA_XFER) { 6157 /* Already connected - clear out state */ 6158 if (connp->conn_mcbc_bind) 6159 connp->conn_saddr_v6 = ipv6_all_zeros; 6160 else 6161 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6162 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6163 connp->conn_faddr_v6 = ipv6_all_zeros; 6164 connp->conn_fport = 0; 6165 udp->udp_state = TS_IDLE; 6166 } 6167 6168 connp->conn_fport = dstport; 6169 connp->conn_ipversion = ipversion; 6170 if (ipversion == IPV4_VERSION) { 6171 /* 6172 * Interpret a zero destination to mean loopback. 6173 * Update the T_CONN_REQ (sin/sin6) since it is used to 6174 * generate the T_CONN_CON. 6175 */ 6176 if (v4dst == INADDR_ANY) { 6177 v4dst = htonl(INADDR_LOOPBACK); 6178 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6179 if (connp->conn_family == AF_INET) { 6180 sin->sin_addr.s_addr = v4dst; 6181 } else { 6182 sin6->sin6_addr = v6dst; 6183 } 6184 } 6185 connp->conn_faddr_v6 = v6dst; 6186 connp->conn_flowinfo = 0; 6187 } else { 6188 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6189 /* 6190 * Interpret a zero destination to mean loopback. 6191 * Update the T_CONN_REQ (sin/sin6) since it is used to 6192 * generate the T_CONN_CON. 6193 */ 6194 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6195 v6dst = ipv6_loopback; 6196 sin6->sin6_addr = v6dst; 6197 } 6198 connp->conn_faddr_v6 = v6dst; 6199 connp->conn_flowinfo = flowinfo; 6200 } 6201 mutex_exit(&udpf->uf_lock); 6202 6203 ixa->ixa_cred = cr; 6204 ixa->ixa_cpid = pid; 6205 if (is_system_labeled()) { 6206 /* We need to restart with a label based on the cred */ 6207 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6208 } 6209 6210 if (scopeid != 0) { 6211 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6212 ixa->ixa_scopeid = scopeid; 6213 connp->conn_incoming_ifindex = scopeid; 6214 } else { 6215 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6216 connp->conn_incoming_ifindex = connp->conn_bound_if; 6217 } 6218 /* 6219 * conn_connect will drop conn_lock and reacquire it. 6220 * To prevent a send* from messing with this udp_t while the lock 6221 * is dropped we set udp_state and clear conn_v6lastdst. 6222 * That will make all send* fail with EISCONN. 6223 */ 6224 connp->conn_v6lastdst = ipv6_all_zeros; 6225 udp->udp_state = TS_WCON_CREQ; 6226 6227 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6228 mutex_exit(&connp->conn_lock); 6229 if (error != 0) 6230 goto connect_failed; 6231 6232 /* 6233 * The addresses have been verified. Time to insert in 6234 * the correct fanout list. 6235 */ 6236 error = ipcl_conn_insert(connp); 6237 if (error != 0) 6238 goto connect_failed; 6239 6240 mutex_enter(&connp->conn_lock); 6241 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6242 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6243 if (error != 0) { 6244 mutex_exit(&connp->conn_lock); 6245 goto connect_failed; 6246 } 6247 6248 udp->udp_state = TS_DATA_XFER; 6249 /* Record this as the "last" send even though we haven't sent any */ 6250 connp->conn_v6lastdst = connp->conn_faddr_v6; 6251 connp->conn_lastipversion = connp->conn_ipversion; 6252 connp->conn_lastdstport = connp->conn_fport; 6253 connp->conn_lastflowinfo = connp->conn_flowinfo; 6254 connp->conn_lastscopeid = scopeid; 6255 connp->conn_lastsrcid = srcid; 6256 /* Also remember a source to use together with lastdst */ 6257 connp->conn_v6lastsrc = v6src; 6258 mutex_exit(&connp->conn_lock); 6259 6260 /* 6261 * We've picked a source address above. Now we can 6262 * verify that the src/port/dst/port is unique for all 6263 * connections in TS_DATA_XFER, skipping ourselves. 6264 */ 6265 mutex_enter(&udpf->uf_lock); 6266 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6267 if (udp1->udp_state != TS_DATA_XFER) 6268 continue; 6269 6270 if (udp1 == udp) 6271 continue; 6272 6273 connp1 = udp1->udp_connp; 6274 if (connp->conn_lport != connp1->conn_lport || 6275 connp->conn_ipversion != connp1->conn_ipversion || 6276 dstport != connp1->conn_fport || 6277 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6278 &connp1->conn_laddr_v6) || 6279 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6280 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6281 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6282 continue; 6283 mutex_exit(&udpf->uf_lock); 6284 error = -TBADADDR; 6285 goto connect_failed; 6286 } 6287 if (cl_inet_connect2 != NULL) { 6288 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6289 if (error != 0) { 6290 mutex_exit(&udpf->uf_lock); 6291 error = -TBADADDR; 6292 goto connect_failed; 6293 } 6294 } 6295 mutex_exit(&udpf->uf_lock); 6296 6297 ixa_refrele(ixa); 6298 return (0); 6299 6300 connect_failed: 6301 if (ixa != NULL) 6302 ixa_refrele(ixa); 6303 mutex_enter(&connp->conn_lock); 6304 mutex_enter(&udpf->uf_lock); 6305 udp->udp_state = TS_IDLE; 6306 connp->conn_faddr_v6 = ipv6_all_zeros; 6307 connp->conn_fport = 0; 6308 /* In case the source address was set above */ 6309 if (connp->conn_mcbc_bind) 6310 connp->conn_saddr_v6 = ipv6_all_zeros; 6311 else 6312 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6313 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6314 mutex_exit(&udpf->uf_lock); 6315 6316 connp->conn_v6lastdst = ipv6_all_zeros; 6317 connp->conn_flowinfo = 0; 6318 6319 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6320 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6321 mutex_exit(&connp->conn_lock); 6322 return (error); 6323 } 6324 6325 static int 6326 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6327 socklen_t len, sock_connid_t *id, cred_t *cr) 6328 { 6329 conn_t *connp = (conn_t *)proto_handle; 6330 udp_t *udp = connp->conn_udp; 6331 int error; 6332 boolean_t did_bind = B_FALSE; 6333 pid_t pid = curproc->p_pid; 6334 6335 /* All Solaris components should pass a cred for this operation. */ 6336 ASSERT(cr != NULL); 6337 6338 if (sa == NULL) { 6339 /* 6340 * Disconnect 6341 * Make sure we are connected 6342 */ 6343 if (udp->udp_state != TS_DATA_XFER) 6344 return (EINVAL); 6345 6346 error = udp_disconnect(connp); 6347 return (error); 6348 } 6349 6350 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6351 if (error != 0) 6352 goto done; 6353 6354 /* do an implicit bind if necessary */ 6355 if (udp->udp_state == TS_UNBND) { 6356 error = udp_implicit_bind(connp, cr); 6357 /* 6358 * We could be racing with an actual bind, in which case 6359 * we would see EPROTO. We cross our fingers and try 6360 * to connect. 6361 */ 6362 if (!(error == 0 || error == EPROTO)) 6363 goto done; 6364 did_bind = B_TRUE; 6365 } 6366 /* 6367 * set SO_DGRAM_ERRIND 6368 */ 6369 connp->conn_dgram_errind = B_TRUE; 6370 6371 error = udp_do_connect(connp, sa, len, cr, pid); 6372 6373 if (error != 0 && did_bind) { 6374 int unbind_err; 6375 6376 unbind_err = udp_do_unbind(connp); 6377 ASSERT(unbind_err == 0); 6378 } 6379 6380 if (error == 0) { 6381 *id = 0; 6382 (*connp->conn_upcalls->su_connected) 6383 (connp->conn_upper_handle, 0, NULL, -1); 6384 } else if (error < 0) { 6385 error = proto_tlitosyserr(-error); 6386 } 6387 6388 done: 6389 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6390 /* 6391 * No need to hold locks to set state 6392 * after connect failure socket state is undefined 6393 * We set the state only to imitate old sockfs behavior 6394 */ 6395 udp->udp_state = TS_IDLE; 6396 } 6397 return (error); 6398 } 6399 6400 int 6401 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6402 cred_t *cr) 6403 { 6404 sin6_t *sin6; 6405 sin_t *sin = NULL; 6406 uint_t srcid; 6407 conn_t *connp = (conn_t *)proto_handle; 6408 udp_t *udp = connp->conn_udp; 6409 int error = 0; 6410 udp_stack_t *us = udp->udp_us; 6411 ushort_t ipversion; 6412 pid_t pid = curproc->p_pid; 6413 ip_xmit_attr_t *ixa; 6414 6415 ASSERT(DB_TYPE(mp) == M_DATA); 6416 6417 /* All Solaris components should pass a cred for this operation. */ 6418 ASSERT(cr != NULL); 6419 6420 /* do an implicit bind if necessary */ 6421 if (udp->udp_state == TS_UNBND) { 6422 error = udp_implicit_bind(connp, cr); 6423 /* 6424 * We could be racing with an actual bind, in which case 6425 * we would see EPROTO. We cross our fingers and try 6426 * to connect. 6427 */ 6428 if (!(error == 0 || error == EPROTO)) { 6429 freemsg(mp); 6430 return (error); 6431 } 6432 } 6433 6434 /* Connected? */ 6435 if (msg->msg_name == NULL) { 6436 if (udp->udp_state != TS_DATA_XFER) { 6437 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6438 return (EDESTADDRREQ); 6439 } 6440 if (msg->msg_controllen != 0) { 6441 error = udp_output_ancillary(connp, NULL, NULL, mp, 6442 NULL, msg, cr, pid); 6443 } else { 6444 error = udp_output_connected(connp, mp, cr, pid); 6445 } 6446 if (us->us_sendto_ignerr) 6447 return (0); 6448 else 6449 return (error); 6450 } 6451 if (udp->udp_state == TS_DATA_XFER) { 6452 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6453 return (EISCONN); 6454 } 6455 error = proto_verify_ip_addr(connp->conn_family, 6456 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6457 if (error != 0) { 6458 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6459 return (error); 6460 } 6461 switch (connp->conn_family) { 6462 case AF_INET6: 6463 sin6 = (sin6_t *)msg->msg_name; 6464 6465 srcid = sin6->__sin6_src_id; 6466 6467 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6468 /* 6469 * Destination is a non-IPv4-compatible IPv6 address. 6470 * Send out an IPv6 format packet. 6471 */ 6472 6473 /* 6474 * If the local address is a mapped address return 6475 * an error. 6476 * It would be possible to send an IPv6 packet but the 6477 * response would never make it back to the application 6478 * since it is bound to a mapped address. 6479 */ 6480 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6481 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6482 return (EADDRNOTAVAIL); 6483 } 6484 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6485 sin6->sin6_addr = ipv6_loopback; 6486 ipversion = IPV6_VERSION; 6487 } else { 6488 if (connp->conn_ipv6_v6only) { 6489 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6490 return (EADDRNOTAVAIL); 6491 } 6492 6493 /* 6494 * If the local address is not zero or a mapped address 6495 * return an error. It would be possible to send an 6496 * IPv4 packet but the response would never make it 6497 * back to the application since it is bound to a 6498 * non-mapped address. 6499 */ 6500 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6501 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6502 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6503 return (EADDRNOTAVAIL); 6504 } 6505 6506 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6507 V4_PART_OF_V6(sin6->sin6_addr) = 6508 htonl(INADDR_LOOPBACK); 6509 } 6510 ipversion = IPV4_VERSION; 6511 } 6512 6513 /* 6514 * We have to allocate an ip_xmit_attr_t before we grab 6515 * conn_lock and we need to hold conn_lock once we've check 6516 * conn_same_as_last_v6 to handle concurrent send* calls on a 6517 * socket. 6518 */ 6519 if (msg->msg_controllen == 0) { 6520 ixa = conn_get_ixa(connp, B_FALSE); 6521 if (ixa == NULL) { 6522 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6523 return (ENOMEM); 6524 } 6525 } else { 6526 ixa = NULL; 6527 } 6528 mutex_enter(&connp->conn_lock); 6529 if (udp->udp_delayed_error != 0) { 6530 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6531 6532 error = udp->udp_delayed_error; 6533 udp->udp_delayed_error = 0; 6534 6535 /* Compare IP address, port, and family */ 6536 6537 if (sin6->sin6_port == sin2->sin6_port && 6538 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6539 &sin2->sin6_addr) && 6540 sin6->sin6_family == sin2->sin6_family) { 6541 mutex_exit(&connp->conn_lock); 6542 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6543 if (ixa != NULL) 6544 ixa_refrele(ixa); 6545 return (error); 6546 } 6547 } 6548 6549 if (msg->msg_controllen != 0) { 6550 mutex_exit(&connp->conn_lock); 6551 ASSERT(ixa == NULL); 6552 error = udp_output_ancillary(connp, NULL, sin6, mp, 6553 NULL, msg, cr, pid); 6554 } else if (conn_same_as_last_v6(connp, sin6) && 6555 connp->conn_lastsrcid == srcid && 6556 ipsec_outbound_policy_current(ixa)) { 6557 /* udp_output_lastdst drops conn_lock */ 6558 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6559 } else { 6560 /* udp_output_newdst drops conn_lock */ 6561 error = udp_output_newdst(connp, mp, NULL, sin6, 6562 ipversion, cr, pid, ixa); 6563 } 6564 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6565 if (us->us_sendto_ignerr) 6566 return (0); 6567 else 6568 return (error); 6569 case AF_INET: 6570 sin = (sin_t *)msg->msg_name; 6571 6572 ipversion = IPV4_VERSION; 6573 6574 if (sin->sin_addr.s_addr == INADDR_ANY) 6575 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6576 6577 /* 6578 * We have to allocate an ip_xmit_attr_t before we grab 6579 * conn_lock and we need to hold conn_lock once we've check 6580 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6581 */ 6582 if (msg->msg_controllen == 0) { 6583 ixa = conn_get_ixa(connp, B_FALSE); 6584 if (ixa == NULL) { 6585 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6586 return (ENOMEM); 6587 } 6588 } else { 6589 ixa = NULL; 6590 } 6591 mutex_enter(&connp->conn_lock); 6592 if (udp->udp_delayed_error != 0) { 6593 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6594 6595 error = udp->udp_delayed_error; 6596 udp->udp_delayed_error = 0; 6597 6598 /* Compare IP address and port */ 6599 6600 if (sin->sin_port == sin2->sin_port && 6601 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6602 mutex_exit(&connp->conn_lock); 6603 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6604 if (ixa != NULL) 6605 ixa_refrele(ixa); 6606 return (error); 6607 } 6608 } 6609 if (msg->msg_controllen != 0) { 6610 mutex_exit(&connp->conn_lock); 6611 ASSERT(ixa == NULL); 6612 error = udp_output_ancillary(connp, sin, NULL, mp, 6613 NULL, msg, cr, pid); 6614 } else if (conn_same_as_last_v4(connp, sin) && 6615 ipsec_outbound_policy_current(ixa)) { 6616 /* udp_output_lastdst drops conn_lock */ 6617 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6618 } else { 6619 /* udp_output_newdst drops conn_lock */ 6620 error = udp_output_newdst(connp, mp, sin, NULL, 6621 ipversion, cr, pid, ixa); 6622 } 6623 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6624 if (us->us_sendto_ignerr) 6625 return (0); 6626 else 6627 return (error); 6628 default: 6629 return (EINVAL); 6630 } 6631 } 6632 6633 int 6634 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6635 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 6636 { 6637 conn_t *connp = (conn_t *)proto_handle; 6638 udp_t *udp; 6639 struct T_capability_ack tca; 6640 struct sockaddr_in6 laddr, faddr; 6641 socklen_t laddrlen, faddrlen; 6642 short opts; 6643 struct stroptions *stropt; 6644 mblk_t *stropt_mp; 6645 int error; 6646 6647 udp = connp->conn_udp; 6648 6649 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6650 6651 /* 6652 * setup the fallback stream that was allocated 6653 */ 6654 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6655 connp->conn_minor_arena = WR(q)->q_ptr; 6656 6657 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6658 6659 WR(q)->q_qinfo = &udp_winit; 6660 6661 connp->conn_rq = RD(q); 6662 connp->conn_wq = WR(q); 6663 6664 /* Notify stream head about options before sending up data */ 6665 stropt_mp->b_datap->db_type = M_SETOPTS; 6666 stropt_mp->b_wptr += sizeof (*stropt); 6667 stropt = (struct stroptions *)stropt_mp->b_rptr; 6668 stropt->so_flags = SO_WROFF | SO_HIWAT; 6669 stropt->so_wroff = connp->conn_wroff; 6670 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6671 putnext(RD(q), stropt_mp); 6672 6673 /* 6674 * Free the helper stream 6675 */ 6676 ip_free_helper_stream(connp); 6677 6678 if (!issocket) 6679 udp_use_pure_tpi(udp); 6680 6681 /* 6682 * Collect the information needed to sync with the sonode 6683 */ 6684 udp_do_capability_ack(udp, &tca, TC1_INFO); 6685 6686 laddrlen = faddrlen = sizeof (sin6_t); 6687 (void) udp_getsockname((sock_lower_handle_t)connp, 6688 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6689 error = udp_getpeername((sock_lower_handle_t)connp, 6690 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6691 if (error != 0) 6692 faddrlen = 0; 6693 6694 opts = 0; 6695 if (connp->conn_dgram_errind) 6696 opts |= SO_DGRAM_ERRIND; 6697 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6698 opts |= SO_DONTROUTE; 6699 6700 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6701 (struct sockaddr *)&laddr, laddrlen, 6702 (struct sockaddr *)&faddr, faddrlen, opts); 6703 6704 mutex_enter(&udp->udp_recv_lock); 6705 /* 6706 * Attempts to send data up during fallback will result in it being 6707 * queued in udp_t. Now we push up any queued packets. 6708 */ 6709 while (udp->udp_fallback_queue_head != NULL) { 6710 mblk_t *mp; 6711 mp = udp->udp_fallback_queue_head; 6712 udp->udp_fallback_queue_head = mp->b_next; 6713 mutex_exit(&udp->udp_recv_lock); 6714 mp->b_next = NULL; 6715 putnext(RD(q), mp); 6716 mutex_enter(&udp->udp_recv_lock); 6717 } 6718 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6719 /* 6720 * No longer a streams less socket 6721 */ 6722 mutex_enter(&connp->conn_lock); 6723 connp->conn_flags &= ~IPCL_NONSTR; 6724 mutex_exit(&connp->conn_lock); 6725 6726 mutex_exit(&udp->udp_recv_lock); 6727 6728 ASSERT(connp->conn_ref >= 1); 6729 6730 return (0); 6731 } 6732 6733 /* ARGSUSED3 */ 6734 int 6735 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6736 socklen_t *salenp, cred_t *cr) 6737 { 6738 conn_t *connp = (conn_t *)proto_handle; 6739 udp_t *udp = connp->conn_udp; 6740 int error; 6741 6742 /* All Solaris components should pass a cred for this operation. */ 6743 ASSERT(cr != NULL); 6744 6745 mutex_enter(&connp->conn_lock); 6746 if (udp->udp_state != TS_DATA_XFER) 6747 error = ENOTCONN; 6748 else 6749 error = conn_getpeername(connp, sa, salenp); 6750 mutex_exit(&connp->conn_lock); 6751 return (error); 6752 } 6753 6754 /* ARGSUSED3 */ 6755 int 6756 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6757 socklen_t *salenp, cred_t *cr) 6758 { 6759 conn_t *connp = (conn_t *)proto_handle; 6760 int error; 6761 6762 /* All Solaris components should pass a cred for this operation. */ 6763 ASSERT(cr != NULL); 6764 6765 mutex_enter(&connp->conn_lock); 6766 error = conn_getsockname(connp, sa, salenp); 6767 mutex_exit(&connp->conn_lock); 6768 return (error); 6769 } 6770 6771 int 6772 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6773 void *optvalp, socklen_t *optlen, cred_t *cr) 6774 { 6775 conn_t *connp = (conn_t *)proto_handle; 6776 int error; 6777 t_uscalar_t max_optbuf_len; 6778 void *optvalp_buf; 6779 int len; 6780 6781 /* All Solaris components should pass a cred for this operation. */ 6782 ASSERT(cr != NULL); 6783 6784 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6785 udp_opt_obj.odb_opt_des_arr, 6786 udp_opt_obj.odb_opt_arr_cnt, 6787 B_FALSE, B_TRUE, cr); 6788 if (error != 0) { 6789 if (error < 0) 6790 error = proto_tlitosyserr(-error); 6791 return (error); 6792 } 6793 6794 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6795 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6796 if (len == -1) { 6797 kmem_free(optvalp_buf, max_optbuf_len); 6798 return (EINVAL); 6799 } 6800 6801 /* 6802 * update optlen and copy option value 6803 */ 6804 t_uscalar_t size = MIN(len, *optlen); 6805 6806 bcopy(optvalp_buf, optvalp, size); 6807 bcopy(&size, optlen, sizeof (size)); 6808 6809 kmem_free(optvalp_buf, max_optbuf_len); 6810 return (0); 6811 } 6812 6813 int 6814 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6815 const void *optvalp, socklen_t optlen, cred_t *cr) 6816 { 6817 conn_t *connp = (conn_t *)proto_handle; 6818 int error; 6819 6820 /* All Solaris components should pass a cred for this operation. */ 6821 ASSERT(cr != NULL); 6822 6823 error = proto_opt_check(level, option_name, optlen, NULL, 6824 udp_opt_obj.odb_opt_des_arr, 6825 udp_opt_obj.odb_opt_arr_cnt, 6826 B_TRUE, B_FALSE, cr); 6827 6828 if (error != 0) { 6829 if (error < 0) 6830 error = proto_tlitosyserr(-error); 6831 return (error); 6832 } 6833 6834 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6835 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6836 NULL, cr); 6837 6838 ASSERT(error >= 0); 6839 6840 return (error); 6841 } 6842 6843 void 6844 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6845 { 6846 conn_t *connp = (conn_t *)proto_handle; 6847 udp_t *udp = connp->conn_udp; 6848 6849 mutex_enter(&udp->udp_recv_lock); 6850 connp->conn_flow_cntrld = B_FALSE; 6851 mutex_exit(&udp->udp_recv_lock); 6852 } 6853 6854 /* ARGSUSED2 */ 6855 int 6856 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6857 { 6858 conn_t *connp = (conn_t *)proto_handle; 6859 6860 /* All Solaris components should pass a cred for this operation. */ 6861 ASSERT(cr != NULL); 6862 6863 /* shut down the send side */ 6864 if (how != SHUT_RD) 6865 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6866 SOCK_OPCTL_SHUT_SEND, 0); 6867 /* shut down the recv side */ 6868 if (how != SHUT_WR) 6869 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6870 SOCK_OPCTL_SHUT_RECV, 0); 6871 return (0); 6872 } 6873 6874 int 6875 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6876 int mode, int32_t *rvalp, cred_t *cr) 6877 { 6878 conn_t *connp = (conn_t *)proto_handle; 6879 int error; 6880 6881 /* All Solaris components should pass a cred for this operation. */ 6882 ASSERT(cr != NULL); 6883 6884 /* 6885 * If we don't have a helper stream then create one. 6886 * ip_create_helper_stream takes care of locking the conn_t, 6887 * so this check for NULL is just a performance optimization. 6888 */ 6889 if (connp->conn_helper_info == NULL) { 6890 udp_stack_t *us = connp->conn_udp->udp_us; 6891 6892 ASSERT(us->us_ldi_ident != NULL); 6893 6894 /* 6895 * Create a helper stream for non-STREAMS socket. 6896 */ 6897 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6898 if (error != 0) { 6899 ip0dbg(("tcp_ioctl: create of IP helper stream " 6900 "failed %d\n", error)); 6901 return (error); 6902 } 6903 } 6904 6905 switch (cmd) { 6906 case ND_SET: 6907 case ND_GET: 6908 case _SIOCSOCKFALLBACK: 6909 case TI_GETPEERNAME: 6910 case TI_GETMYNAME: 6911 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6912 cmd)); 6913 error = EINVAL; 6914 break; 6915 default: 6916 /* 6917 * Pass on to IP using helper stream 6918 */ 6919 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6920 cmd, arg, mode, cr, rvalp); 6921 break; 6922 } 6923 return (error); 6924 } 6925 6926 /* ARGSUSED */ 6927 int 6928 udp_accept(sock_lower_handle_t lproto_handle, 6929 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6930 cred_t *cr) 6931 { 6932 return (EOPNOTSUPP); 6933 } 6934 6935 /* ARGSUSED */ 6936 int 6937 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6938 { 6939 return (EOPNOTSUPP); 6940 } 6941 6942 sock_downcalls_t sock_udp_downcalls = { 6943 udp_activate, /* sd_activate */ 6944 udp_accept, /* sd_accept */ 6945 udp_bind, /* sd_bind */ 6946 udp_listen, /* sd_listen */ 6947 udp_connect, /* sd_connect */ 6948 udp_getpeername, /* sd_getpeername */ 6949 udp_getsockname, /* sd_getsockname */ 6950 udp_getsockopt, /* sd_getsockopt */ 6951 udp_setsockopt, /* sd_setsockopt */ 6952 udp_send, /* sd_send */ 6953 NULL, /* sd_send_uio */ 6954 NULL, /* sd_recv_uio */ 6955 NULL, /* sd_poll */ 6956 udp_shutdown, /* sd_shutdown */ 6957 udp_clr_flowctrl, /* sd_setflowctrl */ 6958 udp_ioctl, /* sd_ioctl */ 6959 udp_close /* sd_close */ 6960 }; 6961