1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/stropts.h> 30 #include <sys/strlog.h> 31 #include <sys/strsun.h> 32 #define _SUN_TPI_VERSION 2 33 #include <sys/tihdr.h> 34 #include <sys/timod.h> 35 #include <sys/ddi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsubr.h> 38 #include <sys/suntpi.h> 39 #include <sys/xti_inet.h> 40 #include <sys/kmem.h> 41 #include <sys/cred_impl.h> 42 #include <sys/policy.h> 43 #include <sys/priv.h> 44 #include <sys/ucred.h> 45 #include <sys/zone.h> 46 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/sockio.h> 50 #include <sys/vtrace.h> 51 #include <sys/sdt.h> 52 #include <sys/debug.h> 53 #include <sys/isa_defs.h> 54 #include <sys/random.h> 55 #include <netinet/in.h> 56 #include <netinet/ip6.h> 57 #include <netinet/icmp6.h> 58 #include <netinet/udp.h> 59 60 #include <inet/common.h> 61 #include <inet/ip.h> 62 #include <inet/ip_impl.h> 63 #include <inet/ipsec_impl.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_ire.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_ndp.h> 69 #include <inet/proto_set.h> 70 #include <inet/mib2.h> 71 #include <inet/nd.h> 72 #include <inet/optcom.h> 73 #include <inet/snmpcom.h> 74 #include <inet/kstatcom.h> 75 #include <inet/ipclassifier.h> 76 #include <sys/squeue_impl.h> 77 #include <inet/ipnet.h> 78 #include <sys/ethernet.h> 79 80 #include <sys/tsol/label.h> 81 #include <sys/tsol/tnet.h> 82 #include <rpc/pmap_prot.h> 83 84 #include <inet/udp_impl.h> 85 86 /* 87 * Synchronization notes: 88 * 89 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 90 * locks, the fanout lock (uf_lock) and conn_lock. conn_lock 91 * protects the contents of the udp_t. uf_lock protects the address and the 92 * fanout information. 93 * The lock order is conn_lock -> uf_lock. 94 * 95 * The fanout lock uf_lock: 96 * When a UDP endpoint is bound to a local port, it is inserted into 97 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 98 * The size of the array is controlled by the udp_bind_fanout_size variable. 99 * This variable can be changed in /etc/system if the default value is 100 * not large enough. Each bind hash bucket is protected by a per bucket 101 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 102 * structure and a few other fields in the udp_t. A UDP endpoint is removed 103 * from the bind hash list only when it is being unbound or being closed. 104 * The per bucket lock also protects a UDP endpoint's state changes. 105 * 106 * Plumbing notes: 107 * UDP is always a device driver. For compatibility with mibopen() code 108 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 109 * dummy module. 110 * 111 * The above implies that we don't support any intermediate module to 112 * reside in between /dev/ip and udp -- in fact, we never supported such 113 * scenario in the past as the inter-layer communication semantics have 114 * always been private. 115 */ 116 117 /* For /etc/system control */ 118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 119 120 static void udp_addr_req(queue_t *q, mblk_t *mp); 121 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 122 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 123 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 124 static int udp_build_hdr_template(conn_t *, const in6_addr_t *, 125 const in6_addr_t *, in_port_t, uint32_t); 126 static void udp_capability_req(queue_t *q, mblk_t *mp); 127 static int udp_tpi_close(queue_t *q, int flags); 128 static void udp_close_free(conn_t *); 129 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 130 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 131 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 132 int sys_error); 133 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 134 t_scalar_t tlierr, int sys_error); 135 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 136 cred_t *cr); 137 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 138 char *value, caddr_t cp, cred_t *cr); 139 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 140 char *value, caddr_t cp, cred_t *cr); 141 static void udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 142 static void udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, 143 ip_recv_attr_t *ira); 144 static void udp_info_req(queue_t *q, mblk_t *mp); 145 static void udp_input(void *, mblk_t *, void *, ip_recv_attr_t *); 146 static void udp_lrput(queue_t *, mblk_t *); 147 static void udp_lwput(queue_t *, mblk_t *); 148 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 149 cred_t *credp, boolean_t isv6); 150 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 151 cred_t *credp); 152 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 153 cred_t *credp); 154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 155 int udp_opt_set(conn_t *connp, uint_t optset_context, 156 int level, int name, uint_t inlen, 157 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 158 void *thisdg_attrs, cred_t *cr); 159 int udp_opt_get(conn_t *connp, int level, int name, 160 uchar_t *ptr); 161 static int udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, 162 pid_t pid); 163 static int udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, 164 pid_t pid, ip_xmit_attr_t *ixa); 165 static int udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, 166 sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t, 167 ip_xmit_attr_t *ixa); 168 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 169 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 170 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 171 cred_t *cr); 172 static mblk_t *udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *, 173 const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *, 174 int *); 175 static mblk_t *udp_prepend_header_template(conn_t *, ip_xmit_attr_t *, 176 mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *); 177 static void udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err); 178 static void udp_ud_err_connected(conn_t *, t_scalar_t); 179 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 181 boolean_t random); 182 static void udp_wput_other(queue_t *q, mblk_t *mp); 183 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 184 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 185 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 186 187 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 188 static void udp_stack_fini(netstackid_t stackid, void *arg); 189 190 static void *udp_kstat_init(netstackid_t stackid); 191 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 192 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 193 static void udp_kstat2_fini(netstackid_t, kstat_t *); 194 static int udp_kstat_update(kstat_t *kp, int rw); 195 196 197 /* Common routines for TPI and socket module */ 198 static void udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *); 199 200 /* Common routine for TPI and socket module */ 201 static conn_t *udp_do_open(cred_t *, boolean_t, int); 202 static void udp_do_close(conn_t *); 203 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 204 boolean_t); 205 static int udp_do_unbind(conn_t *); 206 207 int udp_getsockname(sock_lower_handle_t, 208 struct sockaddr *, socklen_t *, cred_t *); 209 int udp_getpeername(sock_lower_handle_t, 210 struct sockaddr *, socklen_t *, cred_t *); 211 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 212 cred_t *, pid_t); 213 214 #define UDP_RECV_HIWATER (56 * 1024) 215 #define UDP_RECV_LOWATER 128 216 #define UDP_XMIT_HIWATER (56 * 1024) 217 #define UDP_XMIT_LOWATER 1024 218 219 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst) 220 221 /* 222 * Checks if the given destination addr/port is allowed out. 223 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 224 * Called for each connect() and for sendto()/sendmsg() to a different 225 * destination. 226 * For connect(), called in udp_connect(). 227 * For sendto()/sendmsg(), called in udp_output_newdst(). 228 * 229 * This macro assumes that the cl_inet_connect2 hook is not NULL. 230 * Please check this before calling this macro. 231 * 232 * void 233 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 234 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 235 */ 236 #define CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) { \ 237 (err) = 0; \ 238 /* \ 239 * Running in cluster mode - check and register active \ 240 * "connection" information \ 241 */ \ 242 if ((cp)->conn_ipversion == IPV4_VERSION) \ 243 (err) = (*cl_inet_connect2)( \ 244 (cp)->conn_netstack->netstack_stackid, \ 245 IPPROTO_UDP, is_outgoing, AF_INET, \ 246 (uint8_t *)&((cp)->conn_laddr_v4), \ 247 (cp)->conn_lport, \ 248 (uint8_t *)&(V4_PART_OF_V6(*faddrp)), \ 249 (in_port_t)(fport), NULL); \ 250 else \ 251 (err) = (*cl_inet_connect2)( \ 252 (cp)->conn_netstack->netstack_stackid, \ 253 IPPROTO_UDP, is_outgoing, AF_INET6, \ 254 (uint8_t *)&((cp)->conn_laddr_v6), \ 255 (cp)->conn_lport, \ 256 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 257 } 258 259 static struct module_info udp_mod_info = { 260 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 261 }; 262 263 /* 264 * Entry points for UDP as a device. 265 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 266 */ 267 static struct qinit udp_rinitv4 = { 268 NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL 269 }; 270 271 static struct qinit udp_rinitv6 = { 272 NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL 273 }; 274 275 static struct qinit udp_winit = { 276 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info 277 }; 278 279 /* UDP entry point during fallback */ 280 struct qinit udp_fallback_sock_winit = { 281 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 282 }; 283 284 /* 285 * UDP needs to handle I_LINK and I_PLINK since ifconfig 286 * likes to use it as a place to hang the various streams. 287 */ 288 static struct qinit udp_lrinit = { 289 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 290 }; 291 292 static struct qinit udp_lwinit = { 293 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info 294 }; 295 296 /* For AF_INET aka /dev/udp */ 297 struct streamtab udpinfov4 = { 298 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 299 }; 300 301 /* For AF_INET6 aka /dev/udp6 */ 302 struct streamtab udpinfov6 = { 303 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 304 }; 305 306 static sin_t sin_null; /* Zero address for quick clears */ 307 static sin6_t sin6_null; /* Zero address for quick clears */ 308 309 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 310 311 /* Default structure copied into T_INFO_ACK messages */ 312 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 313 T_INFO_ACK, 314 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 315 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 316 T_INVALID, /* CDATA_size. udp does not support connect data. */ 317 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 318 sizeof (sin_t), /* ADDR_size. */ 319 0, /* OPT_size - not initialized here */ 320 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 321 T_CLTS, /* SERV_type. udp supports connection-less. */ 322 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 323 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 324 }; 325 326 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 327 328 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 329 T_INFO_ACK, 330 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 331 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 332 T_INVALID, /* CDATA_size. udp does not support connect data. */ 333 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 334 sizeof (sin6_t), /* ADDR_size. */ 335 0, /* OPT_size - not initialized here */ 336 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 337 T_CLTS, /* SERV_type. udp supports connection-less. */ 338 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 339 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 340 }; 341 342 /* largest UDP port number */ 343 #define UDP_MAX_PORT 65535 344 345 /* 346 * Table of ND variables supported by udp. These are loaded into us_nd 347 * in udp_open. 348 * All of these are alterable, within the min/max values given, at run time. 349 */ 350 /* BEGIN CSTYLED */ 351 udpparam_t udp_param_arr[] = { 352 /*min max value name */ 353 { 0L, 256, 32, "udp_wroff_extra" }, 354 { 1L, 255, 255, "udp_ipv4_ttl" }, 355 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 356 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 357 { 0, 1, 1, "udp_do_checksum" }, 358 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 359 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 360 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 361 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 362 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 363 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 364 { 0, 1, 0, "udp_pmtu_discovery" }, 365 { 0, 1, 0, "udp_sendto_ignerr" }, 366 }; 367 /* END CSTYLED */ 368 369 /* Setable in /etc/system */ 370 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 371 uint32_t udp_random_anon_port = 1; 372 373 /* 374 * Hook functions to enable cluster networking. 375 * On non-clustered systems these vectors must always be NULL 376 */ 377 378 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 379 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 380 void *args) = NULL; 381 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 382 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 383 void *args) = NULL; 384 385 typedef union T_primitives *t_primp_t; 386 387 /* 388 * Return the next anonymous port in the privileged port range for 389 * bind checking. 390 * 391 * Trusted Extension (TX) notes: TX allows administrator to mark or 392 * reserve ports as Multilevel ports (MLP). MLP has special function 393 * on TX systems. Once a port is made MLP, it's not available as 394 * ordinary port. This creates "holes" in the port name space. It 395 * may be necessary to skip the "holes" find a suitable anon port. 396 */ 397 static in_port_t 398 udp_get_next_priv_port(udp_t *udp) 399 { 400 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 401 in_port_t nextport; 402 boolean_t restart = B_FALSE; 403 udp_stack_t *us = udp->udp_us; 404 405 retry: 406 if (next_priv_port < us->us_min_anonpriv_port || 407 next_priv_port >= IPPORT_RESERVED) { 408 next_priv_port = IPPORT_RESERVED - 1; 409 if (restart) 410 return (0); 411 restart = B_TRUE; 412 } 413 414 if (is_system_labeled() && 415 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 416 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 417 next_priv_port = nextport; 418 goto retry; 419 } 420 421 return (next_priv_port--); 422 } 423 424 /* 425 * Hash list removal routine for udp_t structures. 426 */ 427 static void 428 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 429 { 430 udp_t *udpnext; 431 kmutex_t *lockp; 432 udp_stack_t *us = udp->udp_us; 433 conn_t *connp = udp->udp_connp; 434 435 if (udp->udp_ptpbhn == NULL) 436 return; 437 438 /* 439 * Extract the lock pointer in case there are concurrent 440 * hash_remove's for this instance. 441 */ 442 ASSERT(connp->conn_lport != 0); 443 if (!caller_holds_lock) { 444 lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 445 us->us_bind_fanout_size)].uf_lock; 446 ASSERT(lockp != NULL); 447 mutex_enter(lockp); 448 } 449 if (udp->udp_ptpbhn != NULL) { 450 udpnext = udp->udp_bind_hash; 451 if (udpnext != NULL) { 452 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 453 udp->udp_bind_hash = NULL; 454 } 455 *udp->udp_ptpbhn = udpnext; 456 udp->udp_ptpbhn = NULL; 457 } 458 if (!caller_holds_lock) { 459 mutex_exit(lockp); 460 } 461 } 462 463 static void 464 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 465 { 466 conn_t *connp = udp->udp_connp; 467 udp_t **udpp; 468 udp_t *udpnext; 469 conn_t *connext; 470 471 ASSERT(MUTEX_HELD(&uf->uf_lock)); 472 ASSERT(udp->udp_ptpbhn == NULL); 473 udpp = &uf->uf_udp; 474 udpnext = udpp[0]; 475 if (udpnext != NULL) { 476 /* 477 * If the new udp bound to the INADDR_ANY address 478 * and the first one in the list is not bound to 479 * INADDR_ANY we skip all entries until we find the 480 * first one bound to INADDR_ANY. 481 * This makes sure that applications binding to a 482 * specific address get preference over those binding to 483 * INADDR_ANY. 484 */ 485 connext = udpnext->udp_connp; 486 if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) && 487 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 488 while ((udpnext = udpp[0]) != NULL && 489 !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) { 490 udpp = &(udpnext->udp_bind_hash); 491 } 492 if (udpnext != NULL) 493 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 494 } else { 495 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 496 } 497 } 498 udp->udp_bind_hash = udpnext; 499 udp->udp_ptpbhn = udpp; 500 udpp[0] = udp; 501 } 502 503 /* 504 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 505 * passed to udp_wput. 506 * It associates a port number and local address with the stream. 507 * It calls IP to verify the local IP address, and calls IP to insert 508 * the conn_t in the fanout table. 509 * If everything is ok it then sends the T_BIND_ACK back up. 510 * 511 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 512 * without setting SO_REUSEADDR. This is needed so that they 513 * can be viewed as two independent transport protocols. 514 * However, anonymouns ports are allocated from the same range to avoid 515 * duplicating the us->us_next_port_to_try. 516 */ 517 static void 518 udp_tpi_bind(queue_t *q, mblk_t *mp) 519 { 520 sin_t *sin; 521 sin6_t *sin6; 522 mblk_t *mp1; 523 struct T_bind_req *tbr; 524 conn_t *connp; 525 udp_t *udp; 526 int error; 527 struct sockaddr *sa; 528 cred_t *cr; 529 530 /* 531 * All Solaris components should pass a db_credp 532 * for this TPI message, hence we ASSERT. 533 * But in case there is some other M_PROTO that looks 534 * like a TPI message sent by some other kernel 535 * component, we check and return an error. 536 */ 537 cr = msg_getcred(mp, NULL); 538 ASSERT(cr != NULL); 539 if (cr == NULL) { 540 udp_err_ack(q, mp, TSYSERR, EINVAL); 541 return; 542 } 543 544 connp = Q_TO_CONN(q); 545 udp = connp->conn_udp; 546 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 547 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 548 "udp_bind: bad req, len %u", 549 (uint_t)(mp->b_wptr - mp->b_rptr)); 550 udp_err_ack(q, mp, TPROTO, 0); 551 return; 552 } 553 if (udp->udp_state != TS_UNBND) { 554 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 555 "udp_bind: bad state, %u", udp->udp_state); 556 udp_err_ack(q, mp, TOUTSTATE, 0); 557 return; 558 } 559 /* 560 * Reallocate the message to make sure we have enough room for an 561 * address. 562 */ 563 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1); 564 if (mp1 == NULL) { 565 udp_err_ack(q, mp, TSYSERR, ENOMEM); 566 return; 567 } 568 569 mp = mp1; 570 571 /* Reset the message type in preparation for shipping it back. */ 572 DB_TYPE(mp) = M_PCPROTO; 573 574 tbr = (struct T_bind_req *)mp->b_rptr; 575 switch (tbr->ADDR_length) { 576 case 0: /* Request for a generic port */ 577 tbr->ADDR_offset = sizeof (struct T_bind_req); 578 if (connp->conn_family == AF_INET) { 579 tbr->ADDR_length = sizeof (sin_t); 580 sin = (sin_t *)&tbr[1]; 581 *sin = sin_null; 582 sin->sin_family = AF_INET; 583 mp->b_wptr = (uchar_t *)&sin[1]; 584 sa = (struct sockaddr *)sin; 585 } else { 586 ASSERT(connp->conn_family == AF_INET6); 587 tbr->ADDR_length = sizeof (sin6_t); 588 sin6 = (sin6_t *)&tbr[1]; 589 *sin6 = sin6_null; 590 sin6->sin6_family = AF_INET6; 591 mp->b_wptr = (uchar_t *)&sin6[1]; 592 sa = (struct sockaddr *)sin6; 593 } 594 break; 595 596 case sizeof (sin_t): /* Complete IPv4 address */ 597 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 598 sizeof (sin_t)); 599 if (sa == NULL || !OK_32PTR((char *)sa)) { 600 udp_err_ack(q, mp, TSYSERR, EINVAL); 601 return; 602 } 603 if (connp->conn_family != AF_INET || 604 sa->sa_family != AF_INET) { 605 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 606 return; 607 } 608 break; 609 610 case sizeof (sin6_t): /* complete IPv6 address */ 611 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 612 sizeof (sin6_t)); 613 if (sa == NULL || !OK_32PTR((char *)sa)) { 614 udp_err_ack(q, mp, TSYSERR, EINVAL); 615 return; 616 } 617 if (connp->conn_family != AF_INET6 || 618 sa->sa_family != AF_INET6) { 619 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 620 return; 621 } 622 break; 623 624 default: /* Invalid request */ 625 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 626 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 627 udp_err_ack(q, mp, TBADADDR, 0); 628 return; 629 } 630 631 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 632 tbr->PRIM_type != O_T_BIND_REQ); 633 634 if (error != 0) { 635 if (error > 0) { 636 udp_err_ack(q, mp, TSYSERR, error); 637 } else { 638 udp_err_ack(q, mp, -error, 0); 639 } 640 } else { 641 tbr->PRIM_type = T_BIND_ACK; 642 qreply(q, mp); 643 } 644 } 645 646 /* 647 * This routine handles each T_CONN_REQ message passed to udp. It 648 * associates a default destination address with the stream. 649 * 650 * After various error checks are completed, udp_connect() lays 651 * the target address and port into the composite header template. 652 * Then we ask IP for information, including a source address if we didn't 653 * already have one. Finally we send up the T_OK_ACK reply message. 654 */ 655 static void 656 udp_tpi_connect(queue_t *q, mblk_t *mp) 657 { 658 conn_t *connp = Q_TO_CONN(q); 659 int error; 660 socklen_t len; 661 struct sockaddr *sa; 662 struct T_conn_req *tcr; 663 cred_t *cr; 664 pid_t pid; 665 /* 666 * All Solaris components should pass a db_credp 667 * for this TPI message, hence we ASSERT. 668 * But in case there is some other M_PROTO that looks 669 * like a TPI message sent by some other kernel 670 * component, we check and return an error. 671 */ 672 cr = msg_getcred(mp, &pid); 673 ASSERT(cr != NULL); 674 if (cr == NULL) { 675 udp_err_ack(q, mp, TSYSERR, EINVAL); 676 return; 677 } 678 679 tcr = (struct T_conn_req *)mp->b_rptr; 680 681 /* A bit of sanity checking */ 682 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 683 udp_err_ack(q, mp, TPROTO, 0); 684 return; 685 } 686 687 if (tcr->OPT_length != 0) { 688 udp_err_ack(q, mp, TBADOPT, 0); 689 return; 690 } 691 692 /* 693 * Determine packet type based on type of address passed in 694 * the request should contain an IPv4 or IPv6 address. 695 * Make sure that address family matches the type of 696 * family of the address passed down. 697 */ 698 len = tcr->DEST_length; 699 switch (tcr->DEST_length) { 700 default: 701 udp_err_ack(q, mp, TBADADDR, 0); 702 return; 703 704 case sizeof (sin_t): 705 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 706 sizeof (sin_t)); 707 break; 708 709 case sizeof (sin6_t): 710 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 711 sizeof (sin6_t)); 712 break; 713 } 714 715 error = proto_verify_ip_addr(connp->conn_family, sa, len); 716 if (error != 0) { 717 udp_err_ack(q, mp, TSYSERR, error); 718 return; 719 } 720 721 error = udp_do_connect(connp, sa, len, cr, pid); 722 if (error != 0) { 723 if (error < 0) 724 udp_err_ack(q, mp, -error, 0); 725 else 726 udp_err_ack(q, mp, TSYSERR, error); 727 } else { 728 mblk_t *mp1; 729 /* 730 * We have to send a connection confirmation to 731 * keep TLI happy. 732 */ 733 if (connp->conn_family == AF_INET) { 734 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 735 sizeof (sin_t), NULL, 0); 736 } else { 737 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 738 sizeof (sin6_t), NULL, 0); 739 } 740 if (mp1 == NULL) { 741 udp_err_ack(q, mp, TSYSERR, ENOMEM); 742 return; 743 } 744 745 /* 746 * Send ok_ack for T_CONN_REQ 747 */ 748 mp = mi_tpi_ok_ack_alloc(mp); 749 if (mp == NULL) { 750 /* Unable to reuse the T_CONN_REQ for the ack. */ 751 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 752 return; 753 } 754 755 putnext(connp->conn_rq, mp); 756 putnext(connp->conn_rq, mp1); 757 } 758 } 759 760 static int 761 udp_tpi_close(queue_t *q, int flags) 762 { 763 conn_t *connp; 764 765 if (flags & SO_FALLBACK) { 766 /* 767 * stream is being closed while in fallback 768 * simply free the resources that were allocated 769 */ 770 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 771 qprocsoff(q); 772 goto done; 773 } 774 775 connp = Q_TO_CONN(q); 776 udp_do_close(connp); 777 done: 778 q->q_ptr = WR(q)->q_ptr = NULL; 779 return (0); 780 } 781 782 static void 783 udp_close_free(conn_t *connp) 784 { 785 udp_t *udp = connp->conn_udp; 786 787 /* If there are any options associated with the stream, free them. */ 788 if (udp->udp_recv_ipp.ipp_fields != 0) 789 ip_pkt_free(&udp->udp_recv_ipp); 790 791 /* 792 * Clear any fields which the kmem_cache constructor clears. 793 * Only udp_connp needs to be preserved. 794 * TBD: We should make this more efficient to avoid clearing 795 * everything. 796 */ 797 ASSERT(udp->udp_connp == connp); 798 bzero(udp, sizeof (udp_t)); 799 udp->udp_connp = connp; 800 } 801 802 static int 803 udp_do_disconnect(conn_t *connp) 804 { 805 udp_t *udp; 806 udp_fanout_t *udpf; 807 udp_stack_t *us; 808 int error; 809 810 udp = connp->conn_udp; 811 us = udp->udp_us; 812 mutex_enter(&connp->conn_lock); 813 if (udp->udp_state != TS_DATA_XFER) { 814 mutex_exit(&connp->conn_lock); 815 return (-TOUTSTATE); 816 } 817 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 818 us->us_bind_fanout_size)]; 819 mutex_enter(&udpf->uf_lock); 820 if (connp->conn_mcbc_bind) 821 connp->conn_saddr_v6 = ipv6_all_zeros; 822 else 823 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 824 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 825 connp->conn_faddr_v6 = ipv6_all_zeros; 826 connp->conn_fport = 0; 827 udp->udp_state = TS_IDLE; 828 mutex_exit(&udpf->uf_lock); 829 830 /* Remove any remnants of mapped address binding */ 831 if (connp->conn_family == AF_INET6) 832 connp->conn_ipversion = IPV6_VERSION; 833 834 connp->conn_v6lastdst = ipv6_all_zeros; 835 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 836 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 837 mutex_exit(&connp->conn_lock); 838 if (error != 0) 839 return (error); 840 841 /* 842 * Tell IP to remove the full binding and revert 843 * to the local address binding. 844 */ 845 return (ip_laddr_fanout_insert(connp)); 846 } 847 848 static void 849 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 850 { 851 conn_t *connp = Q_TO_CONN(q); 852 int error; 853 854 /* 855 * Allocate the largest primitive we need to send back 856 * T_error_ack is > than T_ok_ack 857 */ 858 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 859 if (mp == NULL) { 860 /* Unable to reuse the T_DISCON_REQ for the ack. */ 861 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 862 return; 863 } 864 865 error = udp_do_disconnect(connp); 866 867 if (error != 0) { 868 if (error < 0) { 869 udp_err_ack(q, mp, -error, 0); 870 } else { 871 udp_err_ack(q, mp, TSYSERR, error); 872 } 873 } else { 874 mp = mi_tpi_ok_ack_alloc(mp); 875 ASSERT(mp != NULL); 876 qreply(q, mp); 877 } 878 } 879 880 int 881 udp_disconnect(conn_t *connp) 882 { 883 int error; 884 885 connp->conn_dgram_errind = B_FALSE; 886 error = udp_do_disconnect(connp); 887 if (error < 0) 888 error = proto_tlitosyserr(-error); 889 890 return (error); 891 } 892 893 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 894 static void 895 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 896 { 897 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 898 qreply(q, mp); 899 } 900 901 /* Shorthand to generate and send TPI error acks to our client */ 902 static void 903 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive, 904 t_scalar_t t_error, int sys_error) 905 { 906 struct T_error_ack *teackp; 907 908 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 909 M_PCPROTO, T_ERROR_ACK)) != NULL) { 910 teackp = (struct T_error_ack *)mp->b_rptr; 911 teackp->ERROR_prim = primitive; 912 teackp->TLI_error = t_error; 913 teackp->UNIX_error = sys_error; 914 qreply(q, mp); 915 } 916 } 917 918 /*ARGSUSED2*/ 919 static int 920 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 921 { 922 int i; 923 udp_t *udp = Q_TO_UDP(q); 924 udp_stack_t *us = udp->udp_us; 925 926 for (i = 0; i < us->us_num_epriv_ports; i++) { 927 if (us->us_epriv_ports[i] != 0) 928 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 929 } 930 return (0); 931 } 932 933 /* ARGSUSED1 */ 934 static int 935 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 936 cred_t *cr) 937 { 938 long new_value; 939 int i; 940 udp_t *udp = Q_TO_UDP(q); 941 udp_stack_t *us = udp->udp_us; 942 943 /* 944 * Fail the request if the new value does not lie within the 945 * port number limits. 946 */ 947 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 948 new_value <= 0 || new_value >= 65536) { 949 return (EINVAL); 950 } 951 952 /* Check if the value is already in the list */ 953 for (i = 0; i < us->us_num_epriv_ports; i++) { 954 if (new_value == us->us_epriv_ports[i]) { 955 return (EEXIST); 956 } 957 } 958 /* Find an empty slot */ 959 for (i = 0; i < us->us_num_epriv_ports; i++) { 960 if (us->us_epriv_ports[i] == 0) 961 break; 962 } 963 if (i == us->us_num_epriv_ports) { 964 return (EOVERFLOW); 965 } 966 967 /* Set the new value */ 968 us->us_epriv_ports[i] = (in_port_t)new_value; 969 return (0); 970 } 971 972 /* ARGSUSED1 */ 973 static int 974 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 975 cred_t *cr) 976 { 977 long new_value; 978 int i; 979 udp_t *udp = Q_TO_UDP(q); 980 udp_stack_t *us = udp->udp_us; 981 982 /* 983 * Fail the request if the new value does not lie within the 984 * port number limits. 985 */ 986 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 987 new_value <= 0 || new_value >= 65536) { 988 return (EINVAL); 989 } 990 991 /* Check that the value is already in the list */ 992 for (i = 0; i < us->us_num_epriv_ports; i++) { 993 if (us->us_epriv_ports[i] == new_value) 994 break; 995 } 996 if (i == us->us_num_epriv_ports) { 997 return (ESRCH); 998 } 999 1000 /* Clear the value */ 1001 us->us_epriv_ports[i] = 0; 1002 return (0); 1003 } 1004 1005 /* At minimum we need 4 bytes of UDP header */ 1006 #define ICMP_MIN_UDP_HDR 4 1007 1008 /* 1009 * udp_icmp_input is called as conn_recvicmp to process ICMP messages. 1010 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1011 * Assumes that IP has pulled up everything up to and including the ICMP header. 1012 */ 1013 /* ARGSUSED2 */ 1014 static void 1015 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 1016 { 1017 conn_t *connp = (conn_t *)arg1; 1018 icmph_t *icmph; 1019 ipha_t *ipha; 1020 int iph_hdr_length; 1021 udpha_t *udpha; 1022 sin_t sin; 1023 sin6_t sin6; 1024 mblk_t *mp1; 1025 int error = 0; 1026 udp_t *udp = connp->conn_udp; 1027 1028 ipha = (ipha_t *)mp->b_rptr; 1029 1030 ASSERT(OK_32PTR(mp->b_rptr)); 1031 1032 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1033 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1034 udp_icmp_error_ipv6(connp, mp, ira); 1035 return; 1036 } 1037 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1038 1039 /* Skip past the outer IP and ICMP headers */ 1040 ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length); 1041 iph_hdr_length = ira->ira_ip_hdr_length; 1042 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1043 ipha = (ipha_t *)&icmph[1]; /* Inner IP header */ 1044 1045 /* Skip past the inner IP and find the ULP header */ 1046 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1047 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1048 1049 switch (icmph->icmph_type) { 1050 case ICMP_DEST_UNREACHABLE: 1051 switch (icmph->icmph_code) { 1052 case ICMP_FRAGMENTATION_NEEDED: { 1053 ipha_t *ipha; 1054 ip_xmit_attr_t *ixa; 1055 /* 1056 * IP has already adjusted the path MTU. 1057 * But we need to adjust DF for IPv4. 1058 */ 1059 if (connp->conn_ipversion != IPV4_VERSION) 1060 break; 1061 1062 ixa = conn_get_ixa(connp, B_FALSE); 1063 if (ixa == NULL || ixa->ixa_ire == NULL) { 1064 /* 1065 * Some other thread holds conn_ixa. We will 1066 * redo this on the next ICMP too big. 1067 */ 1068 if (ixa != NULL) 1069 ixa_refrele(ixa); 1070 break; 1071 } 1072 (void) ip_get_pmtu(ixa); 1073 1074 mutex_enter(&connp->conn_lock); 1075 ipha = (ipha_t *)connp->conn_ht_iphc; 1076 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 1077 ipha->ipha_fragment_offset_and_flags |= 1078 IPH_DF_HTONS; 1079 } else { 1080 ipha->ipha_fragment_offset_and_flags &= 1081 ~IPH_DF_HTONS; 1082 } 1083 mutex_exit(&connp->conn_lock); 1084 ixa_refrele(ixa); 1085 break; 1086 } 1087 case ICMP_PORT_UNREACHABLE: 1088 case ICMP_PROTOCOL_UNREACHABLE: 1089 error = ECONNREFUSED; 1090 break; 1091 default: 1092 /* Transient errors */ 1093 break; 1094 } 1095 break; 1096 default: 1097 /* Transient errors */ 1098 break; 1099 } 1100 if (error == 0) { 1101 freemsg(mp); 1102 return; 1103 } 1104 1105 /* 1106 * Deliver T_UDERROR_IND when the application has asked for it. 1107 * The socket layer enables this automatically when connected. 1108 */ 1109 if (!connp->conn_dgram_errind) { 1110 freemsg(mp); 1111 return; 1112 } 1113 1114 switch (connp->conn_family) { 1115 case AF_INET: 1116 sin = sin_null; 1117 sin.sin_family = AF_INET; 1118 sin.sin_addr.s_addr = ipha->ipha_dst; 1119 sin.sin_port = udpha->uha_dst_port; 1120 if (IPCL_IS_NONSTR(connp)) { 1121 mutex_enter(&connp->conn_lock); 1122 if (udp->udp_state == TS_DATA_XFER) { 1123 if (sin.sin_port == connp->conn_fport && 1124 sin.sin_addr.s_addr == 1125 connp->conn_faddr_v4) { 1126 mutex_exit(&connp->conn_lock); 1127 (*connp->conn_upcalls->su_set_error) 1128 (connp->conn_upper_handle, error); 1129 goto done; 1130 } 1131 } else { 1132 udp->udp_delayed_error = error; 1133 *((sin_t *)&udp->udp_delayed_addr) = sin; 1134 } 1135 mutex_exit(&connp->conn_lock); 1136 } else { 1137 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1138 NULL, 0, error); 1139 if (mp1 != NULL) 1140 putnext(connp->conn_rq, mp1); 1141 } 1142 break; 1143 case AF_INET6: 1144 sin6 = sin6_null; 1145 sin6.sin6_family = AF_INET6; 1146 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1147 sin6.sin6_port = udpha->uha_dst_port; 1148 if (IPCL_IS_NONSTR(connp)) { 1149 mutex_enter(&connp->conn_lock); 1150 if (udp->udp_state == TS_DATA_XFER) { 1151 if (sin6.sin6_port == connp->conn_fport && 1152 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1153 &connp->conn_faddr_v6)) { 1154 mutex_exit(&connp->conn_lock); 1155 (*connp->conn_upcalls->su_set_error) 1156 (connp->conn_upper_handle, error); 1157 goto done; 1158 } 1159 } else { 1160 udp->udp_delayed_error = error; 1161 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1162 } 1163 mutex_exit(&connp->conn_lock); 1164 } else { 1165 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1166 NULL, 0, error); 1167 if (mp1 != NULL) 1168 putnext(connp->conn_rq, mp1); 1169 } 1170 break; 1171 } 1172 done: 1173 freemsg(mp); 1174 } 1175 1176 /* 1177 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1178 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1179 * Assumes that IP has pulled up all the extension headers as well as the 1180 * ICMPv6 header. 1181 */ 1182 static void 1183 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira) 1184 { 1185 icmp6_t *icmp6; 1186 ip6_t *ip6h, *outer_ip6h; 1187 uint16_t iph_hdr_length; 1188 uint8_t *nexthdrp; 1189 udpha_t *udpha; 1190 sin6_t sin6; 1191 mblk_t *mp1; 1192 int error = 0; 1193 udp_t *udp = connp->conn_udp; 1194 udp_stack_t *us = udp->udp_us; 1195 1196 outer_ip6h = (ip6_t *)mp->b_rptr; 1197 #ifdef DEBUG 1198 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1199 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1200 else 1201 iph_hdr_length = IPV6_HDR_LEN; 1202 ASSERT(iph_hdr_length == ira->ira_ip_hdr_length); 1203 #endif 1204 /* Skip past the outer IP and ICMP headers */ 1205 iph_hdr_length = ira->ira_ip_hdr_length; 1206 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1207 1208 /* Skip past the inner IP and find the ULP header */ 1209 ip6h = (ip6_t *)&icmp6[1]; /* Inner IP header */ 1210 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1211 freemsg(mp); 1212 return; 1213 } 1214 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1215 1216 switch (icmp6->icmp6_type) { 1217 case ICMP6_DST_UNREACH: 1218 switch (icmp6->icmp6_code) { 1219 case ICMP6_DST_UNREACH_NOPORT: 1220 error = ECONNREFUSED; 1221 break; 1222 case ICMP6_DST_UNREACH_ADMIN: 1223 case ICMP6_DST_UNREACH_NOROUTE: 1224 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1225 case ICMP6_DST_UNREACH_ADDR: 1226 /* Transient errors */ 1227 break; 1228 default: 1229 break; 1230 } 1231 break; 1232 case ICMP6_PACKET_TOO_BIG: { 1233 struct T_unitdata_ind *tudi; 1234 struct T_opthdr *toh; 1235 size_t udi_size; 1236 mblk_t *newmp; 1237 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1238 sizeof (struct ip6_mtuinfo); 1239 sin6_t *sin6; 1240 struct ip6_mtuinfo *mtuinfo; 1241 1242 /* 1243 * If the application has requested to receive path mtu 1244 * information, send up an empty message containing an 1245 * IPV6_PATHMTU ancillary data item. 1246 */ 1247 if (!connp->conn_ipv6_recvpathmtu) 1248 break; 1249 1250 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1251 opt_length; 1252 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1253 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1254 break; 1255 } 1256 1257 /* 1258 * newmp->b_cont is left to NULL on purpose. This is an 1259 * empty message containing only ancillary data. 1260 */ 1261 newmp->b_datap->db_type = M_PROTO; 1262 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1263 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1264 tudi->PRIM_type = T_UNITDATA_IND; 1265 tudi->SRC_length = sizeof (sin6_t); 1266 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1267 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1268 tudi->OPT_length = opt_length; 1269 1270 sin6 = (sin6_t *)&tudi[1]; 1271 bzero(sin6, sizeof (sin6_t)); 1272 sin6->sin6_family = AF_INET6; 1273 sin6->sin6_addr = connp->conn_faddr_v6; 1274 1275 toh = (struct T_opthdr *)&sin6[1]; 1276 toh->level = IPPROTO_IPV6; 1277 toh->name = IPV6_PATHMTU; 1278 toh->len = opt_length; 1279 toh->status = 0; 1280 1281 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1282 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1283 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1284 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1285 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1286 /* 1287 * We've consumed everything we need from the original 1288 * message. Free it, then send our empty message. 1289 */ 1290 freemsg(mp); 1291 udp_ulp_recv(connp, newmp, msgdsize(newmp), ira); 1292 return; 1293 } 1294 case ICMP6_TIME_EXCEEDED: 1295 /* Transient errors */ 1296 break; 1297 case ICMP6_PARAM_PROB: 1298 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1299 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1300 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1301 (uchar_t *)nexthdrp) { 1302 error = ECONNREFUSED; 1303 break; 1304 } 1305 break; 1306 } 1307 if (error == 0) { 1308 freemsg(mp); 1309 return; 1310 } 1311 1312 /* 1313 * Deliver T_UDERROR_IND when the application has asked for it. 1314 * The socket layer enables this automatically when connected. 1315 */ 1316 if (!connp->conn_dgram_errind) { 1317 freemsg(mp); 1318 return; 1319 } 1320 1321 sin6 = sin6_null; 1322 sin6.sin6_family = AF_INET6; 1323 sin6.sin6_addr = ip6h->ip6_dst; 1324 sin6.sin6_port = udpha->uha_dst_port; 1325 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1326 1327 if (IPCL_IS_NONSTR(connp)) { 1328 mutex_enter(&connp->conn_lock); 1329 if (udp->udp_state == TS_DATA_XFER) { 1330 if (sin6.sin6_port == connp->conn_fport && 1331 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1332 &connp->conn_faddr_v6)) { 1333 mutex_exit(&connp->conn_lock); 1334 (*connp->conn_upcalls->su_set_error) 1335 (connp->conn_upper_handle, error); 1336 goto done; 1337 } 1338 } else { 1339 udp->udp_delayed_error = error; 1340 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1341 } 1342 mutex_exit(&connp->conn_lock); 1343 } else { 1344 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1345 NULL, 0, error); 1346 if (mp1 != NULL) 1347 putnext(connp->conn_rq, mp1); 1348 } 1349 done: 1350 freemsg(mp); 1351 } 1352 1353 /* 1354 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1355 * The local address is filled in if endpoint is bound. The remote address 1356 * is filled in if remote address has been precified ("connected endpoint") 1357 * (The concept of connected CLTS sockets is alien to published TPI 1358 * but we support it anyway). 1359 */ 1360 static void 1361 udp_addr_req(queue_t *q, mblk_t *mp) 1362 { 1363 struct sockaddr *sa; 1364 mblk_t *ackmp; 1365 struct T_addr_ack *taa; 1366 udp_t *udp = Q_TO_UDP(q); 1367 conn_t *connp = udp->udp_connp; 1368 uint_t addrlen; 1369 1370 /* Make it large enough for worst case */ 1371 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1372 2 * sizeof (sin6_t), 1); 1373 if (ackmp == NULL) { 1374 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1375 return; 1376 } 1377 taa = (struct T_addr_ack *)ackmp->b_rptr; 1378 1379 bzero(taa, sizeof (struct T_addr_ack)); 1380 ackmp->b_wptr = (uchar_t *)&taa[1]; 1381 1382 taa->PRIM_type = T_ADDR_ACK; 1383 ackmp->b_datap->db_type = M_PCPROTO; 1384 1385 if (connp->conn_family == AF_INET) 1386 addrlen = sizeof (sin_t); 1387 else 1388 addrlen = sizeof (sin6_t); 1389 1390 mutex_enter(&connp->conn_lock); 1391 /* 1392 * Note: Following code assumes 32 bit alignment of basic 1393 * data structures like sin_t and struct T_addr_ack. 1394 */ 1395 if (udp->udp_state != TS_UNBND) { 1396 /* 1397 * Fill in local address first 1398 */ 1399 taa->LOCADDR_offset = sizeof (*taa); 1400 taa->LOCADDR_length = addrlen; 1401 sa = (struct sockaddr *)&taa[1]; 1402 (void) conn_getsockname(connp, sa, &addrlen); 1403 ackmp->b_wptr += addrlen; 1404 } 1405 if (udp->udp_state == TS_DATA_XFER) { 1406 /* 1407 * connected, fill remote address too 1408 */ 1409 taa->REMADDR_length = addrlen; 1410 /* assumed 32-bit alignment */ 1411 taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length; 1412 sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset); 1413 (void) conn_getpeername(connp, sa, &addrlen); 1414 ackmp->b_wptr += addrlen; 1415 } 1416 mutex_exit(&connp->conn_lock); 1417 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1418 qreply(q, ackmp); 1419 } 1420 1421 static void 1422 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1423 { 1424 conn_t *connp = udp->udp_connp; 1425 1426 if (connp->conn_family == AF_INET) { 1427 *tap = udp_g_t_info_ack_ipv4; 1428 } else { 1429 *tap = udp_g_t_info_ack_ipv6; 1430 } 1431 tap->CURRENT_state = udp->udp_state; 1432 tap->OPT_size = udp_max_optsize; 1433 } 1434 1435 static void 1436 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1437 t_uscalar_t cap_bits1) 1438 { 1439 tcap->CAP_bits1 = 0; 1440 1441 if (cap_bits1 & TC1_INFO) { 1442 udp_copy_info(&tcap->INFO_ack, udp); 1443 tcap->CAP_bits1 |= TC1_INFO; 1444 } 1445 } 1446 1447 /* 1448 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1449 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1450 * udp_g_t_info_ack. The current state of the stream is copied from 1451 * udp_state. 1452 */ 1453 static void 1454 udp_capability_req(queue_t *q, mblk_t *mp) 1455 { 1456 t_uscalar_t cap_bits1; 1457 struct T_capability_ack *tcap; 1458 udp_t *udp = Q_TO_UDP(q); 1459 1460 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1461 1462 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1463 mp->b_datap->db_type, T_CAPABILITY_ACK); 1464 if (!mp) 1465 return; 1466 1467 tcap = (struct T_capability_ack *)mp->b_rptr; 1468 udp_do_capability_ack(udp, tcap, cap_bits1); 1469 1470 qreply(q, mp); 1471 } 1472 1473 /* 1474 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1475 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1476 * The current state of the stream is copied from udp_state. 1477 */ 1478 static void 1479 udp_info_req(queue_t *q, mblk_t *mp) 1480 { 1481 udp_t *udp = Q_TO_UDP(q); 1482 1483 /* Create a T_INFO_ACK message. */ 1484 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1485 T_INFO_ACK); 1486 if (!mp) 1487 return; 1488 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1489 qreply(q, mp); 1490 } 1491 1492 /* For /dev/udp aka AF_INET open */ 1493 static int 1494 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1495 { 1496 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1497 } 1498 1499 /* For /dev/udp6 aka AF_INET6 open */ 1500 static int 1501 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1502 { 1503 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1504 } 1505 1506 /* 1507 * This is the open routine for udp. It allocates a udp_t structure for 1508 * the stream and, on the first open of the module, creates an ND table. 1509 */ 1510 static int 1511 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1512 boolean_t isv6) 1513 { 1514 udp_t *udp; 1515 conn_t *connp; 1516 dev_t conn_dev; 1517 vmem_t *minor_arena; 1518 1519 /* If the stream is already open, return immediately. */ 1520 if (q->q_ptr != NULL) 1521 return (0); 1522 1523 if (sflag == MODOPEN) 1524 return (EINVAL); 1525 1526 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1527 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1528 minor_arena = ip_minor_arena_la; 1529 } else { 1530 /* 1531 * Either minor numbers in the large arena were exhausted 1532 * or a non socket application is doing the open. 1533 * Try to allocate from the small arena. 1534 */ 1535 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1536 return (EBUSY); 1537 1538 minor_arena = ip_minor_arena_sa; 1539 } 1540 1541 if (flag & SO_FALLBACK) { 1542 /* 1543 * Non streams socket needs a stream to fallback to 1544 */ 1545 RD(q)->q_ptr = (void *)conn_dev; 1546 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1547 WR(q)->q_ptr = (void *)minor_arena; 1548 qprocson(q); 1549 return (0); 1550 } 1551 1552 connp = udp_do_open(credp, isv6, KM_SLEEP); 1553 if (connp == NULL) { 1554 inet_minor_free(minor_arena, conn_dev); 1555 return (ENOMEM); 1556 } 1557 udp = connp->conn_udp; 1558 1559 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1560 connp->conn_dev = conn_dev; 1561 connp->conn_minor_arena = minor_arena; 1562 1563 /* 1564 * Initialize the udp_t structure for this stream. 1565 */ 1566 q->q_ptr = connp; 1567 WR(q)->q_ptr = connp; 1568 connp->conn_rq = q; 1569 connp->conn_wq = WR(q); 1570 1571 /* 1572 * Since this conn_t/udp_t is not yet visible to anybody else we don't 1573 * need to lock anything. 1574 */ 1575 ASSERT(connp->conn_proto == IPPROTO_UDP); 1576 ASSERT(connp->conn_udp == udp); 1577 ASSERT(udp->udp_connp == connp); 1578 1579 if (flag & SO_SOCKSTR) { 1580 udp->udp_issocket = B_TRUE; 1581 } 1582 1583 WR(q)->q_hiwat = connp->conn_sndbuf; 1584 WR(q)->q_lowat = connp->conn_sndlowat; 1585 1586 qprocson(q); 1587 1588 /* Set the Stream head write offset and high watermark. */ 1589 (void) proto_set_tx_wroff(q, connp, connp->conn_wroff); 1590 (void) proto_set_rx_hiwat(q, connp, 1591 udp_set_rcv_hiwat(udp, connp->conn_rcvbuf)); 1592 1593 mutex_enter(&connp->conn_lock); 1594 connp->conn_state_flags &= ~CONN_INCIPIENT; 1595 mutex_exit(&connp->conn_lock); 1596 return (0); 1597 } 1598 1599 /* 1600 * Which UDP options OK to set through T_UNITDATA_REQ... 1601 */ 1602 /* ARGSUSED */ 1603 static boolean_t 1604 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1605 { 1606 return (B_TRUE); 1607 } 1608 1609 /* 1610 * This routine gets default values of certain options whose default 1611 * values are maintained by protcol specific code 1612 */ 1613 int 1614 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1615 { 1616 udp_t *udp = Q_TO_UDP(q); 1617 udp_stack_t *us = udp->udp_us; 1618 int *i1 = (int *)ptr; 1619 1620 switch (level) { 1621 case IPPROTO_IP: 1622 switch (name) { 1623 case IP_MULTICAST_TTL: 1624 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1625 return (sizeof (uchar_t)); 1626 case IP_MULTICAST_LOOP: 1627 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1628 return (sizeof (uchar_t)); 1629 } 1630 break; 1631 case IPPROTO_IPV6: 1632 switch (name) { 1633 case IPV6_MULTICAST_HOPS: 1634 *i1 = IP_DEFAULT_MULTICAST_TTL; 1635 return (sizeof (int)); 1636 case IPV6_MULTICAST_LOOP: 1637 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1638 return (sizeof (int)); 1639 case IPV6_UNICAST_HOPS: 1640 *i1 = us->us_ipv6_hoplimit; 1641 return (sizeof (int)); 1642 } 1643 break; 1644 } 1645 return (-1); 1646 } 1647 1648 /* 1649 * This routine retrieves the current status of socket options. 1650 * It returns the size of the option retrieved, or -1. 1651 */ 1652 int 1653 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name, 1654 uchar_t *ptr) 1655 { 1656 int *i1 = (int *)ptr; 1657 udp_t *udp = connp->conn_udp; 1658 int len; 1659 conn_opt_arg_t coas; 1660 int retval; 1661 1662 coas.coa_connp = connp; 1663 coas.coa_ixa = connp->conn_ixa; 1664 coas.coa_ipp = &connp->conn_xmit_ipp; 1665 coas.coa_ancillary = B_FALSE; 1666 coas.coa_changed = 0; 1667 1668 /* 1669 * We assume that the optcom framework has checked for the set 1670 * of levels and names that are supported, hence we don't worry 1671 * about rejecting based on that. 1672 * First check for UDP specific handling, then pass to common routine. 1673 */ 1674 switch (level) { 1675 case IPPROTO_IP: 1676 /* 1677 * Only allow IPv4 option processing on IPv4 sockets. 1678 */ 1679 if (connp->conn_family != AF_INET) 1680 return (-1); 1681 1682 switch (name) { 1683 case IP_OPTIONS: 1684 case T_IP_OPTIONS: 1685 mutex_enter(&connp->conn_lock); 1686 if (!(udp->udp_recv_ipp.ipp_fields & 1687 IPPF_IPV4_OPTIONS)) { 1688 mutex_exit(&connp->conn_lock); 1689 return (0); 1690 } 1691 1692 len = udp->udp_recv_ipp.ipp_ipv4_options_len; 1693 ASSERT(len != 0); 1694 bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len); 1695 mutex_exit(&connp->conn_lock); 1696 return (len); 1697 } 1698 break; 1699 case IPPROTO_UDP: 1700 switch (name) { 1701 case UDP_NAT_T_ENDPOINT: 1702 mutex_enter(&connp->conn_lock); 1703 *i1 = udp->udp_nat_t_endpoint; 1704 mutex_exit(&connp->conn_lock); 1705 return (sizeof (int)); 1706 case UDP_RCVHDR: 1707 mutex_enter(&connp->conn_lock); 1708 *i1 = udp->udp_rcvhdr ? 1 : 0; 1709 mutex_exit(&connp->conn_lock); 1710 return (sizeof (int)); 1711 } 1712 } 1713 mutex_enter(&connp->conn_lock); 1714 retval = conn_opt_get(&coas, level, name, ptr); 1715 mutex_exit(&connp->conn_lock); 1716 return (retval); 1717 } 1718 1719 /* 1720 * This routine retrieves the current status of socket options. 1721 * It returns the size of the option retrieved, or -1. 1722 */ 1723 int 1724 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1725 { 1726 conn_t *connp = Q_TO_CONN(q); 1727 int err; 1728 1729 err = udp_opt_get(connp, level, name, ptr); 1730 return (err); 1731 } 1732 1733 /* 1734 * This routine sets socket options. 1735 */ 1736 int 1737 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name, 1738 uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly) 1739 { 1740 conn_t *connp = coa->coa_connp; 1741 ip_xmit_attr_t *ixa = coa->coa_ixa; 1742 udp_t *udp = connp->conn_udp; 1743 udp_stack_t *us = udp->udp_us; 1744 int *i1 = (int *)invalp; 1745 boolean_t onoff = (*i1 == 0) ? 0 : 1; 1746 int error; 1747 1748 ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock)); 1749 /* 1750 * First do UDP specific sanity checks and handle UDP specific 1751 * options. Note that some IPPROTO_UDP options are handled 1752 * by conn_opt_set. 1753 */ 1754 switch (level) { 1755 case SOL_SOCKET: 1756 switch (name) { 1757 case SO_SNDBUF: 1758 if (*i1 > us->us_max_buf) { 1759 return (ENOBUFS); 1760 } 1761 break; 1762 case SO_RCVBUF: 1763 if (*i1 > us->us_max_buf) { 1764 return (ENOBUFS); 1765 } 1766 break; 1767 1768 case SCM_UCRED: { 1769 struct ucred_s *ucr; 1770 cred_t *newcr; 1771 ts_label_t *tsl; 1772 1773 /* 1774 * Only sockets that have proper privileges and are 1775 * bound to MLPs will have any other value here, so 1776 * this implicitly tests for privilege to set label. 1777 */ 1778 if (connp->conn_mlp_type == mlptSingle) 1779 break; 1780 1781 ucr = (struct ucred_s *)invalp; 1782 if (inlen != ucredsize || 1783 ucr->uc_labeloff < sizeof (*ucr) || 1784 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 1785 return (EINVAL); 1786 if (!checkonly) { 1787 /* 1788 * Set ixa_tsl to the new label. 1789 * We assume that crgetzoneid doesn't change 1790 * as part of the SCM_UCRED. 1791 */ 1792 ASSERT(cr != NULL); 1793 if ((tsl = crgetlabel(cr)) == NULL) 1794 return (EINVAL); 1795 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 1796 tsl->tsl_doi, KM_NOSLEEP); 1797 if (newcr == NULL) 1798 return (ENOSR); 1799 ASSERT(newcr->cr_label != NULL); 1800 /* 1801 * Move the hold on the cr_label to ixa_tsl by 1802 * setting cr_label to NULL. Then release newcr. 1803 */ 1804 ip_xmit_attr_replace_tsl(ixa, newcr->cr_label); 1805 ixa->ixa_flags |= IXAF_UCRED_TSL; 1806 newcr->cr_label = NULL; 1807 crfree(newcr); 1808 coa->coa_changed |= COA_HEADER_CHANGED; 1809 coa->coa_changed |= COA_WROFF_CHANGED; 1810 } 1811 /* Fully handled this option. */ 1812 return (0); 1813 } 1814 } 1815 break; 1816 case IPPROTO_UDP: 1817 switch (name) { 1818 case UDP_NAT_T_ENDPOINT: 1819 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 1820 return (error); 1821 } 1822 1823 /* 1824 * Use conn_family instead so we can avoid ambiguitites 1825 * with AF_INET6 sockets that may switch from IPv4 1826 * to IPv6. 1827 */ 1828 if (connp->conn_family != AF_INET) { 1829 return (EAFNOSUPPORT); 1830 } 1831 1832 if (!checkonly) { 1833 mutex_enter(&connp->conn_lock); 1834 udp->udp_nat_t_endpoint = onoff; 1835 mutex_exit(&connp->conn_lock); 1836 coa->coa_changed |= COA_HEADER_CHANGED; 1837 coa->coa_changed |= COA_WROFF_CHANGED; 1838 } 1839 /* Fully handled this option. */ 1840 return (0); 1841 case UDP_RCVHDR: 1842 mutex_enter(&connp->conn_lock); 1843 udp->udp_rcvhdr = onoff; 1844 mutex_exit(&connp->conn_lock); 1845 return (0); 1846 } 1847 break; 1848 } 1849 error = conn_opt_set(coa, level, name, inlen, invalp, 1850 checkonly, cr); 1851 return (error); 1852 } 1853 1854 /* 1855 * This routine sets socket options. 1856 */ 1857 int 1858 udp_opt_set(conn_t *connp, uint_t optset_context, int level, 1859 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 1860 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr) 1861 { 1862 udp_t *udp = connp->conn_udp; 1863 int err; 1864 conn_opt_arg_t coas, *coa; 1865 boolean_t checkonly; 1866 udp_stack_t *us = udp->udp_us; 1867 1868 switch (optset_context) { 1869 case SETFN_OPTCOM_CHECKONLY: 1870 checkonly = B_TRUE; 1871 /* 1872 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 1873 * inlen != 0 implies value supplied and 1874 * we have to "pretend" to set it. 1875 * inlen == 0 implies that there is no 1876 * value part in T_CHECK request and just validation 1877 * done elsewhere should be enough, we just return here. 1878 */ 1879 if (inlen == 0) { 1880 *outlenp = 0; 1881 return (0); 1882 } 1883 break; 1884 case SETFN_OPTCOM_NEGOTIATE: 1885 checkonly = B_FALSE; 1886 break; 1887 case SETFN_UD_NEGOTIATE: 1888 case SETFN_CONN_NEGOTIATE: 1889 checkonly = B_FALSE; 1890 /* 1891 * Negotiating local and "association-related" options 1892 * through T_UNITDATA_REQ. 1893 * 1894 * Following routine can filter out ones we do not 1895 * want to be "set" this way. 1896 */ 1897 if (!udp_opt_allow_udr_set(level, name)) { 1898 *outlenp = 0; 1899 return (EINVAL); 1900 } 1901 break; 1902 default: 1903 /* 1904 * We should never get here 1905 */ 1906 *outlenp = 0; 1907 return (EINVAL); 1908 } 1909 1910 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 1911 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 1912 1913 if (thisdg_attrs != NULL) { 1914 /* Options from T_UNITDATA_REQ */ 1915 coa = (conn_opt_arg_t *)thisdg_attrs; 1916 ASSERT(coa->coa_connp == connp); 1917 ASSERT(coa->coa_ixa != NULL); 1918 ASSERT(coa->coa_ipp != NULL); 1919 ASSERT(coa->coa_ancillary); 1920 } else { 1921 coa = &coas; 1922 coas.coa_connp = connp; 1923 /* Get a reference on conn_ixa to prevent concurrent mods */ 1924 coas.coa_ixa = conn_get_ixa(connp, B_TRUE); 1925 if (coas.coa_ixa == NULL) { 1926 *outlenp = 0; 1927 return (ENOMEM); 1928 } 1929 coas.coa_ipp = &connp->conn_xmit_ipp; 1930 coas.coa_ancillary = B_FALSE; 1931 coas.coa_changed = 0; 1932 } 1933 1934 err = udp_do_opt_set(coa, level, name, inlen, invalp, 1935 cr, checkonly); 1936 if (err != 0) { 1937 errout: 1938 if (!coa->coa_ancillary) 1939 ixa_refrele(coa->coa_ixa); 1940 *outlenp = 0; 1941 return (err); 1942 } 1943 /* Handle DHCPINIT here outside of lock */ 1944 if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) { 1945 uint_t ifindex; 1946 ill_t *ill; 1947 1948 ifindex = *(uint_t *)invalp; 1949 if (ifindex == 0) { 1950 ill = NULL; 1951 } else { 1952 ill = ill_lookup_on_ifindex(ifindex, B_FALSE, 1953 coa->coa_ixa->ixa_ipst); 1954 if (ill == NULL) { 1955 err = ENXIO; 1956 goto errout; 1957 } 1958 1959 mutex_enter(&ill->ill_lock); 1960 if (ill->ill_state_flags & ILL_CONDEMNED) { 1961 mutex_exit(&ill->ill_lock); 1962 ill_refrele(ill); 1963 err = ENXIO; 1964 goto errout; 1965 } 1966 if (IS_VNI(ill)) { 1967 mutex_exit(&ill->ill_lock); 1968 ill_refrele(ill); 1969 err = EINVAL; 1970 goto errout; 1971 } 1972 } 1973 mutex_enter(&connp->conn_lock); 1974 1975 if (connp->conn_dhcpinit_ill != NULL) { 1976 /* 1977 * We've locked the conn so conn_cleanup_ill() 1978 * cannot clear conn_dhcpinit_ill -- so it's 1979 * safe to access the ill. 1980 */ 1981 ill_t *oill = connp->conn_dhcpinit_ill; 1982 1983 ASSERT(oill->ill_dhcpinit != 0); 1984 atomic_dec_32(&oill->ill_dhcpinit); 1985 ill_set_inputfn(connp->conn_dhcpinit_ill); 1986 connp->conn_dhcpinit_ill = NULL; 1987 } 1988 1989 if (ill != NULL) { 1990 connp->conn_dhcpinit_ill = ill; 1991 atomic_inc_32(&ill->ill_dhcpinit); 1992 ill_set_inputfn(ill); 1993 mutex_exit(&connp->conn_lock); 1994 mutex_exit(&ill->ill_lock); 1995 ill_refrele(ill); 1996 } else { 1997 mutex_exit(&connp->conn_lock); 1998 } 1999 } 2000 2001 /* 2002 * Common case of OK return with outval same as inval. 2003 */ 2004 if (invalp != outvalp) { 2005 /* don't trust bcopy for identical src/dst */ 2006 (void) bcopy(invalp, outvalp, inlen); 2007 } 2008 *outlenp = inlen; 2009 2010 /* 2011 * If this was not ancillary data, then we rebuild the headers, 2012 * update the IRE/NCE, and IPsec as needed. 2013 * Since the label depends on the destination we go through 2014 * ip_set_destination first. 2015 */ 2016 if (coa->coa_ancillary) { 2017 return (0); 2018 } 2019 2020 if (coa->coa_changed & COA_ROUTE_CHANGED) { 2021 in6_addr_t saddr, faddr, nexthop; 2022 in_port_t fport; 2023 2024 /* 2025 * We clear lastdst to make sure we pick up the change 2026 * next time sending. 2027 * If we are connected we re-cache the information. 2028 * We ignore errors to preserve BSD behavior. 2029 * Note that we don't redo IPsec policy lookup here 2030 * since the final destination (or source) didn't change. 2031 */ 2032 mutex_enter(&connp->conn_lock); 2033 connp->conn_v6lastdst = ipv6_all_zeros; 2034 2035 ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa, 2036 &connp->conn_faddr_v6, &nexthop); 2037 saddr = connp->conn_saddr_v6; 2038 faddr = connp->conn_faddr_v6; 2039 fport = connp->conn_fport; 2040 mutex_exit(&connp->conn_lock); 2041 2042 if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) && 2043 !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) { 2044 (void) ip_attr_connect(connp, coa->coa_ixa, 2045 &saddr, &faddr, &nexthop, fport, NULL, NULL, 2046 IPDF_ALLOW_MCBC | IPDF_VERIFY_DST); 2047 } 2048 } 2049 2050 ixa_refrele(coa->coa_ixa); 2051 2052 if (coa->coa_changed & COA_HEADER_CHANGED) { 2053 /* 2054 * Rebuild the header template if we are connected. 2055 * Otherwise clear conn_v6lastdst so we rebuild the header 2056 * in the data path. 2057 */ 2058 mutex_enter(&connp->conn_lock); 2059 if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) && 2060 !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) { 2061 err = udp_build_hdr_template(connp, 2062 &connp->conn_saddr_v6, &connp->conn_faddr_v6, 2063 connp->conn_fport, connp->conn_flowinfo); 2064 if (err != 0) { 2065 mutex_exit(&connp->conn_lock); 2066 return (err); 2067 } 2068 } else { 2069 connp->conn_v6lastdst = ipv6_all_zeros; 2070 } 2071 mutex_exit(&connp->conn_lock); 2072 } 2073 if (coa->coa_changed & COA_RCVBUF_CHANGED) { 2074 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2075 connp->conn_rcvbuf); 2076 } 2077 if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) { 2078 connp->conn_wq->q_hiwat = connp->conn_sndbuf; 2079 } 2080 if (coa->coa_changed & COA_WROFF_CHANGED) { 2081 /* Increase wroff if needed */ 2082 uint_t wroff; 2083 2084 mutex_enter(&connp->conn_lock); 2085 wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra; 2086 if (udp->udp_nat_t_endpoint) 2087 wroff += sizeof (uint32_t); 2088 if (wroff > connp->conn_wroff) { 2089 connp->conn_wroff = wroff; 2090 mutex_exit(&connp->conn_lock); 2091 (void) proto_set_tx_wroff(connp->conn_rq, connp, wroff); 2092 } else { 2093 mutex_exit(&connp->conn_lock); 2094 } 2095 } 2096 return (err); 2097 } 2098 2099 /* This routine sets socket options. */ 2100 int 2101 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 2102 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 2103 void *thisdg_attrs, cred_t *cr) 2104 { 2105 conn_t *connp = Q_TO_CONN(q); 2106 int error; 2107 2108 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 2109 outlenp, outvalp, thisdg_attrs, cr); 2110 return (error); 2111 } 2112 2113 /* 2114 * Setup IP and UDP headers. 2115 * Returns NULL on allocation failure, in which case data_mp is freed. 2116 */ 2117 mblk_t * 2118 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp, 2119 const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport, 2120 uint32_t flowinfo, mblk_t *data_mp, int *errorp) 2121 { 2122 mblk_t *mp; 2123 udpha_t *udpha; 2124 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2125 uint_t data_len; 2126 uint32_t cksum; 2127 udp_t *udp = connp->conn_udp; 2128 boolean_t insert_spi = udp->udp_nat_t_endpoint; 2129 uint_t ulp_hdr_len; 2130 2131 data_len = msgdsize(data_mp); 2132 ulp_hdr_len = UDPH_SIZE; 2133 if (insert_spi) 2134 ulp_hdr_len += sizeof (uint32_t); 2135 2136 mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo, 2137 ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp); 2138 if (mp == NULL) { 2139 ASSERT(*errorp != 0); 2140 return (NULL); 2141 } 2142 2143 data_len += ulp_hdr_len; 2144 ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length; 2145 2146 udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length); 2147 udpha->uha_src_port = connp->conn_lport; 2148 udpha->uha_dst_port = dstport; 2149 udpha->uha_checksum = 0; 2150 udpha->uha_length = htons(data_len); 2151 2152 /* 2153 * If there was a routing option/header then conn_prepend_hdr 2154 * has massaged it and placed the pseudo-header checksum difference 2155 * in the cksum argument. 2156 * 2157 * Setup header length and prepare for ULP checksum done in IP. 2158 * 2159 * We make it easy for IP to include our pseudo header 2160 * by putting our length in uha_checksum. 2161 * The IP source, destination, and length have already been set by 2162 * conn_prepend_hdr. 2163 */ 2164 cksum += data_len; 2165 cksum = (cksum >> 16) + (cksum & 0xFFFF); 2166 ASSERT(cksum < 0x10000); 2167 2168 if (ixa->ixa_flags & IXAF_IS_IPV4) { 2169 ipha_t *ipha = (ipha_t *)mp->b_rptr; 2170 2171 ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen); 2172 2173 /* IP does the checksum if uha_checksum is non-zero */ 2174 if (us->us_do_checksum) { 2175 if (cksum == 0) 2176 udpha->uha_checksum = 0xffff; 2177 else 2178 udpha->uha_checksum = htons(cksum); 2179 } else { 2180 udpha->uha_checksum = 0; 2181 } 2182 } else { 2183 ip6_t *ip6h = (ip6_t *)mp->b_rptr; 2184 2185 ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen); 2186 if (cksum == 0) 2187 udpha->uha_checksum = 0xffff; 2188 else 2189 udpha->uha_checksum = htons(cksum); 2190 } 2191 2192 /* Insert all-0s SPI now. */ 2193 if (insert_spi) 2194 *((uint32_t *)(udpha + 1)) = 0; 2195 2196 return (mp); 2197 } 2198 2199 static int 2200 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src, 2201 const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo) 2202 { 2203 udpha_t *udpha; 2204 int error; 2205 2206 ASSERT(MUTEX_HELD(&connp->conn_lock)); 2207 /* 2208 * We clear lastdst to make sure we don't use the lastdst path 2209 * next time sending since we might not have set v6dst yet. 2210 */ 2211 connp->conn_v6lastdst = ipv6_all_zeros; 2212 2213 error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst, 2214 flowinfo); 2215 if (error != 0) 2216 return (error); 2217 2218 /* 2219 * Any routing header/option has been massaged. The checksum difference 2220 * is stored in conn_sum. 2221 */ 2222 udpha = (udpha_t *)connp->conn_ht_ulp; 2223 udpha->uha_src_port = connp->conn_lport; 2224 udpha->uha_dst_port = dstport; 2225 udpha->uha_checksum = 0; 2226 udpha->uha_length = htons(UDPH_SIZE); /* Filled in later */ 2227 return (0); 2228 } 2229 2230 /* 2231 * This routine retrieves the value of an ND variable in a udpparam_t 2232 * structure. It is called through nd_getset when a user reads the 2233 * variable. 2234 */ 2235 /* ARGSUSED */ 2236 static int 2237 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2238 { 2239 udpparam_t *udppa = (udpparam_t *)cp; 2240 2241 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 2242 return (0); 2243 } 2244 2245 /* 2246 * Walk through the param array specified registering each element with the 2247 * named dispatch (ND) handler. 2248 */ 2249 static boolean_t 2250 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 2251 { 2252 for (; cnt-- > 0; udppa++) { 2253 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 2254 if (!nd_load(ndp, udppa->udp_param_name, 2255 udp_param_get, udp_param_set, 2256 (caddr_t)udppa)) { 2257 nd_free(ndp); 2258 return (B_FALSE); 2259 } 2260 } 2261 } 2262 if (!nd_load(ndp, "udp_extra_priv_ports", 2263 udp_extra_priv_ports_get, NULL, NULL)) { 2264 nd_free(ndp); 2265 return (B_FALSE); 2266 } 2267 if (!nd_load(ndp, "udp_extra_priv_ports_add", 2268 NULL, udp_extra_priv_ports_add, NULL)) { 2269 nd_free(ndp); 2270 return (B_FALSE); 2271 } 2272 if (!nd_load(ndp, "udp_extra_priv_ports_del", 2273 NULL, udp_extra_priv_ports_del, NULL)) { 2274 nd_free(ndp); 2275 return (B_FALSE); 2276 } 2277 return (B_TRUE); 2278 } 2279 2280 /* This routine sets an ND variable in a udpparam_t structure. */ 2281 /* ARGSUSED */ 2282 static int 2283 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 2284 { 2285 long new_value; 2286 udpparam_t *udppa = (udpparam_t *)cp; 2287 2288 /* 2289 * Fail the request if the new value does not lie within the 2290 * required bounds. 2291 */ 2292 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2293 new_value < udppa->udp_param_min || 2294 new_value > udppa->udp_param_max) { 2295 return (EINVAL); 2296 } 2297 2298 /* Set the new value */ 2299 udppa->udp_param_value = new_value; 2300 return (0); 2301 } 2302 2303 static mblk_t * 2304 udp_queue_fallback(udp_t *udp, mblk_t *mp) 2305 { 2306 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 2307 if (IPCL_IS_NONSTR(udp->udp_connp)) { 2308 /* 2309 * fallback has started but messages have not been moved yet 2310 */ 2311 if (udp->udp_fallback_queue_head == NULL) { 2312 ASSERT(udp->udp_fallback_queue_tail == NULL); 2313 udp->udp_fallback_queue_head = mp; 2314 udp->udp_fallback_queue_tail = mp; 2315 } else { 2316 ASSERT(udp->udp_fallback_queue_tail != NULL); 2317 udp->udp_fallback_queue_tail->b_next = mp; 2318 udp->udp_fallback_queue_tail = mp; 2319 } 2320 return (NULL); 2321 } else { 2322 /* 2323 * Fallback completed, let the caller putnext() the mblk. 2324 */ 2325 return (mp); 2326 } 2327 } 2328 2329 /* 2330 * Deliver data to ULP. In case we have a socket, and it's falling back to 2331 * TPI, then we'll queue the mp for later processing. 2332 */ 2333 static void 2334 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira) 2335 { 2336 if (IPCL_IS_NONSTR(connp)) { 2337 udp_t *udp = connp->conn_udp; 2338 int error; 2339 2340 ASSERT(len == msgdsize(mp)); 2341 if ((*connp->conn_upcalls->su_recv) 2342 (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) { 2343 mutex_enter(&udp->udp_recv_lock); 2344 if (error == ENOSPC) { 2345 /* 2346 * let's confirm while holding the lock 2347 */ 2348 if ((*connp->conn_upcalls->su_recv) 2349 (connp->conn_upper_handle, NULL, 0, 0, 2350 &error, NULL) < 0) { 2351 ASSERT(error == ENOSPC); 2352 if (error == ENOSPC) { 2353 connp->conn_flow_cntrld = 2354 B_TRUE; 2355 } 2356 } 2357 mutex_exit(&udp->udp_recv_lock); 2358 } else { 2359 ASSERT(error == EOPNOTSUPP); 2360 mp = udp_queue_fallback(udp, mp); 2361 mutex_exit(&udp->udp_recv_lock); 2362 if (mp != NULL) 2363 putnext(connp->conn_rq, mp); 2364 } 2365 } 2366 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 2367 } else { 2368 if (is_system_labeled()) { 2369 ASSERT(ira->ira_cred != NULL); 2370 /* 2371 * Provide for protocols above UDP such as RPC 2372 * NOPID leaves db_cpid unchanged. 2373 */ 2374 mblk_setcred(mp, ira->ira_cred, NOPID); 2375 } 2376 2377 putnext(connp->conn_rq, mp); 2378 } 2379 } 2380 2381 /* 2382 * This is the inbound data path. 2383 * IP has already pulled up the IP plus UDP headers and verified alignment 2384 * etc. 2385 */ 2386 /* ARGSUSED2 */ 2387 static void 2388 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira) 2389 { 2390 conn_t *connp = (conn_t *)arg1; 2391 struct T_unitdata_ind *tudi; 2392 uchar_t *rptr; /* Pointer to IP header */ 2393 int hdr_length; /* Length of IP+UDP headers */ 2394 int udi_size; /* Size of T_unitdata_ind */ 2395 int pkt_len; 2396 udp_t *udp; 2397 udpha_t *udpha; 2398 ip_pkt_t ipps; 2399 ip6_t *ip6h; 2400 mblk_t *mp1; 2401 uint32_t udp_ipv4_options_len; 2402 crb_t recv_ancillary; 2403 udp_stack_t *us; 2404 2405 ASSERT(connp->conn_flags & IPCL_UDPCONN); 2406 2407 udp = connp->conn_udp; 2408 us = udp->udp_us; 2409 rptr = mp->b_rptr; 2410 2411 ASSERT(DB_TYPE(mp) == M_DATA); 2412 ASSERT(OK_32PTR(rptr)); 2413 ASSERT(ira->ira_pktlen == msgdsize(mp)); 2414 pkt_len = ira->ira_pktlen; 2415 2416 /* 2417 * Get a snapshot of these and allow other threads to change 2418 * them after that. We need the same recv_ancillary when determining 2419 * the size as when adding the ancillary data items. 2420 */ 2421 mutex_enter(&connp->conn_lock); 2422 udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len; 2423 recv_ancillary = connp->conn_recv_ancillary; 2424 mutex_exit(&connp->conn_lock); 2425 2426 hdr_length = ira->ira_ip_hdr_length; 2427 2428 /* 2429 * IP inspected the UDP header thus all of it must be in the mblk. 2430 * UDP length check is performed for IPv6 packets and IPv4 packets 2431 * to check if the size of the packet as specified 2432 * by the UDP header is the same as the length derived from the IP 2433 * header. 2434 */ 2435 udpha = (udpha_t *)(rptr + hdr_length); 2436 if (pkt_len != ntohs(udpha->uha_length) + hdr_length) 2437 goto tossit; 2438 2439 hdr_length += UDPH_SIZE; 2440 ASSERT(MBLKL(mp) >= hdr_length); /* IP did a pullup */ 2441 2442 /* Initialize regardless of IP version */ 2443 ipps.ipp_fields = 0; 2444 2445 if (((ira->ira_flags & IRAF_IPV4_OPTIONS) || 2446 udp_ipv4_options_len > 0) && 2447 connp->conn_family == AF_INET) { 2448 int err; 2449 2450 /* 2451 * Record/update udp_recv_ipp with the lock 2452 * held. Not needed for AF_INET6 sockets 2453 * since they don't support a getsockopt of IP_OPTIONS. 2454 */ 2455 mutex_enter(&connp->conn_lock); 2456 err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp, 2457 B_TRUE); 2458 if (err != 0) { 2459 /* Allocation failed. Drop packet */ 2460 mutex_exit(&connp->conn_lock); 2461 freemsg(mp); 2462 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2463 return; 2464 } 2465 mutex_exit(&connp->conn_lock); 2466 } 2467 2468 if (recv_ancillary.crb_all != 0) { 2469 /* 2470 * Record packet information in the ip_pkt_t 2471 */ 2472 if (ira->ira_flags & IRAF_IS_IPV4) { 2473 ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION); 2474 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 2475 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 2476 ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr)); 2477 2478 (void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE); 2479 } else { 2480 uint8_t nexthdrp; 2481 2482 ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION); 2483 /* 2484 * IPv6 packets can only be received by applications 2485 * that are prepared to receive IPv6 addresses. 2486 * The IP fanout must ensure this. 2487 */ 2488 ASSERT(connp->conn_family == AF_INET6); 2489 2490 ip6h = (ip6_t *)rptr; 2491 2492 /* We don't care about the length, but need the ipp */ 2493 hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps, 2494 &nexthdrp); 2495 ASSERT(hdr_length == ira->ira_ip_hdr_length); 2496 /* Restore */ 2497 hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE; 2498 ASSERT(nexthdrp == IPPROTO_UDP); 2499 } 2500 } 2501 2502 /* 2503 * This is the inbound data path. Packets are passed upstream as 2504 * T_UNITDATA_IND messages. 2505 */ 2506 if (connp->conn_family == AF_INET) { 2507 sin_t *sin; 2508 2509 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 2510 2511 /* 2512 * Normally only send up the source address. 2513 * If any ancillary data items are wanted we add those. 2514 */ 2515 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 2516 if (recv_ancillary.crb_all != 0) { 2517 udi_size += conn_recvancillary_size(connp, 2518 recv_ancillary, ira, mp, &ipps); 2519 } 2520 2521 /* Allocate a message block for the T_UNITDATA_IND structure. */ 2522 mp1 = allocb(udi_size, BPRI_MED); 2523 if (mp1 == NULL) { 2524 freemsg(mp); 2525 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2526 return; 2527 } 2528 mp1->b_cont = mp; 2529 mp1->b_datap->db_type = M_PROTO; 2530 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2531 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2532 tudi->PRIM_type = T_UNITDATA_IND; 2533 tudi->SRC_length = sizeof (sin_t); 2534 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2535 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2536 sizeof (sin_t); 2537 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 2538 tudi->OPT_length = udi_size; 2539 sin = (sin_t *)&tudi[1]; 2540 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 2541 sin->sin_port = udpha->uha_src_port; 2542 sin->sin_family = connp->conn_family; 2543 *(uint32_t *)&sin->sin_zero[0] = 0; 2544 *(uint32_t *)&sin->sin_zero[4] = 0; 2545 2546 /* 2547 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 2548 * IP_RECVTTL has been set. 2549 */ 2550 if (udi_size != 0) { 2551 conn_recvancillary_add(connp, recv_ancillary, ira, 2552 &ipps, (uchar_t *)&sin[1], udi_size); 2553 } 2554 } else { 2555 sin6_t *sin6; 2556 2557 /* 2558 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 2559 * 2560 * Normally we only send up the address. If receiving of any 2561 * optional receive side information is enabled, we also send 2562 * that up as options. 2563 */ 2564 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 2565 2566 if (recv_ancillary.crb_all != 0) { 2567 udi_size += conn_recvancillary_size(connp, 2568 recv_ancillary, ira, mp, &ipps); 2569 } 2570 2571 mp1 = allocb(udi_size, BPRI_MED); 2572 if (mp1 == NULL) { 2573 freemsg(mp); 2574 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2575 return; 2576 } 2577 mp1->b_cont = mp; 2578 mp1->b_datap->db_type = M_PROTO; 2579 tudi = (struct T_unitdata_ind *)mp1->b_rptr; 2580 mp1->b_wptr = (uchar_t *)tudi + udi_size; 2581 tudi->PRIM_type = T_UNITDATA_IND; 2582 tudi->SRC_length = sizeof (sin6_t); 2583 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2584 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 2585 sizeof (sin6_t); 2586 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 2587 tudi->OPT_length = udi_size; 2588 sin6 = (sin6_t *)&tudi[1]; 2589 if (ira->ira_flags & IRAF_IS_IPV4) { 2590 in6_addr_t v6dst; 2591 2592 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 2593 &sin6->sin6_addr); 2594 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 2595 &v6dst); 2596 sin6->sin6_flowinfo = 0; 2597 sin6->sin6_scope_id = 0; 2598 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 2599 IPCL_ZONEID(connp), us->us_netstack); 2600 } else { 2601 ip6h = (ip6_t *)rptr; 2602 2603 sin6->sin6_addr = ip6h->ip6_src; 2604 /* No sin6_flowinfo per API */ 2605 sin6->sin6_flowinfo = 0; 2606 /* For link-scope pass up scope id */ 2607 if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 2608 sin6->sin6_scope_id = ira->ira_ruifindex; 2609 else 2610 sin6->sin6_scope_id = 0; 2611 sin6->__sin6_src_id = ip_srcid_find_addr( 2612 &ip6h->ip6_dst, IPCL_ZONEID(connp), 2613 us->us_netstack); 2614 } 2615 sin6->sin6_port = udpha->uha_src_port; 2616 sin6->sin6_family = connp->conn_family; 2617 2618 if (udi_size != 0) { 2619 conn_recvancillary_add(connp, recv_ancillary, ira, 2620 &ipps, (uchar_t *)&sin6[1], udi_size); 2621 } 2622 } 2623 2624 /* Walk past the headers unless IP_RECVHDR was set. */ 2625 if (!udp->udp_rcvhdr) { 2626 mp->b_rptr = rptr + hdr_length; 2627 pkt_len -= hdr_length; 2628 } 2629 2630 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 2631 udp_ulp_recv(connp, mp1, pkt_len, ira); 2632 return; 2633 2634 tossit: 2635 freemsg(mp); 2636 BUMP_MIB(&us->us_udp_mib, udpInErrors); 2637 } 2638 2639 /* 2640 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 2641 * information that can be changing beneath us. 2642 */ 2643 mblk_t * 2644 udp_snmp_get(queue_t *q, mblk_t *mpctl) 2645 { 2646 mblk_t *mpdata; 2647 mblk_t *mp_conn_ctl; 2648 mblk_t *mp_attr_ctl; 2649 mblk_t *mp6_conn_ctl; 2650 mblk_t *mp6_attr_ctl; 2651 mblk_t *mp_conn_tail; 2652 mblk_t *mp_attr_tail; 2653 mblk_t *mp6_conn_tail; 2654 mblk_t *mp6_attr_tail; 2655 struct opthdr *optp; 2656 mib2_udpEntry_t ude; 2657 mib2_udp6Entry_t ude6; 2658 mib2_transportMLPEntry_t mlp; 2659 int state; 2660 zoneid_t zoneid; 2661 int i; 2662 connf_t *connfp; 2663 conn_t *connp = Q_TO_CONN(q); 2664 int v4_conn_idx; 2665 int v6_conn_idx; 2666 boolean_t needattr; 2667 udp_t *udp; 2668 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2669 udp_stack_t *us = connp->conn_netstack->netstack_udp; 2670 mblk_t *mp2ctl; 2671 2672 /* 2673 * make a copy of the original message 2674 */ 2675 mp2ctl = copymsg(mpctl); 2676 2677 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 2678 if (mpctl == NULL || 2679 (mpdata = mpctl->b_cont) == NULL || 2680 (mp_conn_ctl = copymsg(mpctl)) == NULL || 2681 (mp_attr_ctl = copymsg(mpctl)) == NULL || 2682 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 2683 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 2684 freemsg(mp_conn_ctl); 2685 freemsg(mp_attr_ctl); 2686 freemsg(mp6_conn_ctl); 2687 freemsg(mpctl); 2688 freemsg(mp2ctl); 2689 return (0); 2690 } 2691 2692 zoneid = connp->conn_zoneid; 2693 2694 /* fixed length structure for IPv4 and IPv6 counters */ 2695 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 2696 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 2697 /* synchronize 64- and 32-bit counters */ 2698 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 2699 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 2700 2701 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 2702 optp->level = MIB2_UDP; 2703 optp->name = 0; 2704 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 2705 sizeof (us->us_udp_mib)); 2706 optp->len = msgdsize(mpdata); 2707 qreply(q, mpctl); 2708 2709 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 2710 v4_conn_idx = v6_conn_idx = 0; 2711 2712 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 2713 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 2714 connp = NULL; 2715 2716 while ((connp = ipcl_get_next_conn(connfp, connp, 2717 IPCL_UDPCONN))) { 2718 udp = connp->conn_udp; 2719 if (zoneid != connp->conn_zoneid) 2720 continue; 2721 2722 /* 2723 * Note that the port numbers are sent in 2724 * host byte order 2725 */ 2726 2727 if (udp->udp_state == TS_UNBND) 2728 state = MIB2_UDP_unbound; 2729 else if (udp->udp_state == TS_IDLE) 2730 state = MIB2_UDP_idle; 2731 else if (udp->udp_state == TS_DATA_XFER) 2732 state = MIB2_UDP_connected; 2733 else 2734 state = MIB2_UDP_unknown; 2735 2736 needattr = B_FALSE; 2737 bzero(&mlp, sizeof (mlp)); 2738 if (connp->conn_mlp_type != mlptSingle) { 2739 if (connp->conn_mlp_type == mlptShared || 2740 connp->conn_mlp_type == mlptBoth) 2741 mlp.tme_flags |= MIB2_TMEF_SHARED; 2742 if (connp->conn_mlp_type == mlptPrivate || 2743 connp->conn_mlp_type == mlptBoth) 2744 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 2745 needattr = B_TRUE; 2746 } 2747 if (connp->conn_anon_mlp) { 2748 mlp.tme_flags |= MIB2_TMEF_ANONMLP; 2749 needattr = B_TRUE; 2750 } 2751 switch (connp->conn_mac_mode) { 2752 case CONN_MAC_DEFAULT: 2753 break; 2754 case CONN_MAC_AWARE: 2755 mlp.tme_flags |= MIB2_TMEF_MACEXEMPT; 2756 needattr = B_TRUE; 2757 break; 2758 case CONN_MAC_IMPLICIT: 2759 mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT; 2760 needattr = B_TRUE; 2761 break; 2762 } 2763 mutex_enter(&connp->conn_lock); 2764 if (udp->udp_state == TS_DATA_XFER && 2765 connp->conn_ixa->ixa_tsl != NULL) { 2766 ts_label_t *tsl; 2767 2768 tsl = connp->conn_ixa->ixa_tsl; 2769 mlp.tme_flags |= MIB2_TMEF_IS_LABELED; 2770 mlp.tme_doi = label2doi(tsl); 2771 mlp.tme_label = *label2bslabel(tsl); 2772 needattr = B_TRUE; 2773 } 2774 mutex_exit(&connp->conn_lock); 2775 2776 /* 2777 * Create an IPv4 table entry for IPv4 entries and also 2778 * any IPv6 entries which are bound to in6addr_any 2779 * (i.e. anything a IPv4 peer could connect/send to). 2780 */ 2781 if (connp->conn_ipversion == IPV4_VERSION || 2782 (udp->udp_state <= TS_IDLE && 2783 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) { 2784 ude.udpEntryInfo.ue_state = state; 2785 /* 2786 * If in6addr_any this will set it to 2787 * INADDR_ANY 2788 */ 2789 ude.udpLocalAddress = connp->conn_laddr_v4; 2790 ude.udpLocalPort = ntohs(connp->conn_lport); 2791 if (udp->udp_state == TS_DATA_XFER) { 2792 /* 2793 * Can potentially get here for 2794 * v6 socket if another process 2795 * (say, ping) has just done a 2796 * sendto(), changing the state 2797 * from the TS_IDLE above to 2798 * TS_DATA_XFER by the time we hit 2799 * this part of the code. 2800 */ 2801 ude.udpEntryInfo.ue_RemoteAddress = 2802 connp->conn_faddr_v4; 2803 ude.udpEntryInfo.ue_RemotePort = 2804 ntohs(connp->conn_fport); 2805 } else { 2806 ude.udpEntryInfo.ue_RemoteAddress = 0; 2807 ude.udpEntryInfo.ue_RemotePort = 0; 2808 } 2809 2810 /* 2811 * We make the assumption that all udp_t 2812 * structs will be created within an address 2813 * region no larger than 32-bits. 2814 */ 2815 ude.udpInstance = (uint32_t)(uintptr_t)udp; 2816 ude.udpCreationProcess = 2817 (connp->conn_cpid < 0) ? 2818 MIB2_UNKNOWN_PROCESS : 2819 connp->conn_cpid; 2820 ude.udpCreationTime = connp->conn_open_time; 2821 2822 (void) snmp_append_data2(mp_conn_ctl->b_cont, 2823 &mp_conn_tail, (char *)&ude, sizeof (ude)); 2824 mlp.tme_connidx = v4_conn_idx++; 2825 if (needattr) 2826 (void) snmp_append_data2( 2827 mp_attr_ctl->b_cont, &mp_attr_tail, 2828 (char *)&mlp, sizeof (mlp)); 2829 } 2830 if (connp->conn_ipversion == IPV6_VERSION) { 2831 ude6.udp6EntryInfo.ue_state = state; 2832 ude6.udp6LocalAddress = connp->conn_laddr_v6; 2833 ude6.udp6LocalPort = ntohs(connp->conn_lport); 2834 mutex_enter(&connp->conn_lock); 2835 if (connp->conn_ixa->ixa_flags & 2836 IXAF_SCOPEID_SET) { 2837 ude6.udp6IfIndex = 2838 connp->conn_ixa->ixa_scopeid; 2839 } else { 2840 ude6.udp6IfIndex = connp->conn_bound_if; 2841 } 2842 mutex_exit(&connp->conn_lock); 2843 if (udp->udp_state == TS_DATA_XFER) { 2844 ude6.udp6EntryInfo.ue_RemoteAddress = 2845 connp->conn_faddr_v6; 2846 ude6.udp6EntryInfo.ue_RemotePort = 2847 ntohs(connp->conn_fport); 2848 } else { 2849 ude6.udp6EntryInfo.ue_RemoteAddress = 2850 sin6_null.sin6_addr; 2851 ude6.udp6EntryInfo.ue_RemotePort = 0; 2852 } 2853 /* 2854 * We make the assumption that all udp_t 2855 * structs will be created within an address 2856 * region no larger than 32-bits. 2857 */ 2858 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 2859 ude6.udp6CreationProcess = 2860 (connp->conn_cpid < 0) ? 2861 MIB2_UNKNOWN_PROCESS : 2862 connp->conn_cpid; 2863 ude6.udp6CreationTime = connp->conn_open_time; 2864 2865 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 2866 &mp6_conn_tail, (char *)&ude6, 2867 sizeof (ude6)); 2868 mlp.tme_connidx = v6_conn_idx++; 2869 if (needattr) 2870 (void) snmp_append_data2( 2871 mp6_attr_ctl->b_cont, 2872 &mp6_attr_tail, (char *)&mlp, 2873 sizeof (mlp)); 2874 } 2875 } 2876 } 2877 2878 /* IPv4 UDP endpoints */ 2879 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 2880 sizeof (struct T_optmgmt_ack)]; 2881 optp->level = MIB2_UDP; 2882 optp->name = MIB2_UDP_ENTRY; 2883 optp->len = msgdsize(mp_conn_ctl->b_cont); 2884 qreply(q, mp_conn_ctl); 2885 2886 /* table of MLP attributes... */ 2887 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 2888 sizeof (struct T_optmgmt_ack)]; 2889 optp->level = MIB2_UDP; 2890 optp->name = EXPER_XPORT_MLP; 2891 optp->len = msgdsize(mp_attr_ctl->b_cont); 2892 if (optp->len == 0) 2893 freemsg(mp_attr_ctl); 2894 else 2895 qreply(q, mp_attr_ctl); 2896 2897 /* IPv6 UDP endpoints */ 2898 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 2899 sizeof (struct T_optmgmt_ack)]; 2900 optp->level = MIB2_UDP6; 2901 optp->name = MIB2_UDP6_ENTRY; 2902 optp->len = msgdsize(mp6_conn_ctl->b_cont); 2903 qreply(q, mp6_conn_ctl); 2904 2905 /* table of MLP attributes... */ 2906 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 2907 sizeof (struct T_optmgmt_ack)]; 2908 optp->level = MIB2_UDP6; 2909 optp->name = EXPER_XPORT_MLP; 2910 optp->len = msgdsize(mp6_attr_ctl->b_cont); 2911 if (optp->len == 0) 2912 freemsg(mp6_attr_ctl); 2913 else 2914 qreply(q, mp6_attr_ctl); 2915 2916 return (mp2ctl); 2917 } 2918 2919 /* 2920 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 2921 * NOTE: Per MIB-II, UDP has no writable data. 2922 * TODO: If this ever actually tries to set anything, it needs to be 2923 * to do the appropriate locking. 2924 */ 2925 /* ARGSUSED */ 2926 int 2927 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 2928 uchar_t *ptr, int len) 2929 { 2930 switch (level) { 2931 case MIB2_UDP: 2932 return (0); 2933 default: 2934 return (1); 2935 } 2936 } 2937 2938 /* 2939 * This routine creates a T_UDERROR_IND message and passes it upstream. 2940 * The address and options are copied from the T_UNITDATA_REQ message 2941 * passed in mp. This message is freed. 2942 */ 2943 static void 2944 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err) 2945 { 2946 struct T_unitdata_req *tudr; 2947 mblk_t *mp1; 2948 uchar_t *destaddr; 2949 t_scalar_t destlen; 2950 uchar_t *optaddr; 2951 t_scalar_t optlen; 2952 2953 if ((mp->b_wptr < mp->b_rptr) || 2954 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 2955 goto done; 2956 } 2957 tudr = (struct T_unitdata_req *)mp->b_rptr; 2958 destaddr = mp->b_rptr + tudr->DEST_offset; 2959 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 2960 destaddr + tudr->DEST_length < mp->b_rptr || 2961 destaddr + tudr->DEST_length > mp->b_wptr) { 2962 goto done; 2963 } 2964 optaddr = mp->b_rptr + tudr->OPT_offset; 2965 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 2966 optaddr + tudr->OPT_length < mp->b_rptr || 2967 optaddr + tudr->OPT_length > mp->b_wptr) { 2968 goto done; 2969 } 2970 destlen = tudr->DEST_length; 2971 optlen = tudr->OPT_length; 2972 2973 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 2974 (char *)optaddr, optlen, err); 2975 if (mp1 != NULL) 2976 qreply(q, mp1); 2977 2978 done: 2979 freemsg(mp); 2980 } 2981 2982 /* 2983 * This routine removes a port number association from a stream. It 2984 * is called by udp_wput to handle T_UNBIND_REQ messages. 2985 */ 2986 static void 2987 udp_tpi_unbind(queue_t *q, mblk_t *mp) 2988 { 2989 conn_t *connp = Q_TO_CONN(q); 2990 int error; 2991 2992 error = udp_do_unbind(connp); 2993 if (error) { 2994 if (error < 0) 2995 udp_err_ack(q, mp, -error, 0); 2996 else 2997 udp_err_ack(q, mp, TSYSERR, error); 2998 return; 2999 } 3000 3001 mp = mi_tpi_ok_ack_alloc(mp); 3002 ASSERT(mp != NULL); 3003 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 3004 qreply(q, mp); 3005 } 3006 3007 /* 3008 * Don't let port fall into the privileged range. 3009 * Since the extra privileged ports can be arbitrary we also 3010 * ensure that we exclude those from consideration. 3011 * us->us_epriv_ports is not sorted thus we loop over it until 3012 * there are no changes. 3013 */ 3014 static in_port_t 3015 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 3016 { 3017 int i; 3018 in_port_t nextport; 3019 boolean_t restart = B_FALSE; 3020 udp_stack_t *us = udp->udp_us; 3021 3022 if (random && udp_random_anon_port != 0) { 3023 (void) random_get_pseudo_bytes((uint8_t *)&port, 3024 sizeof (in_port_t)); 3025 /* 3026 * Unless changed by a sys admin, the smallest anon port 3027 * is 32768 and the largest anon port is 65535. It is 3028 * very likely (50%) for the random port to be smaller 3029 * than the smallest anon port. When that happens, 3030 * add port % (anon port range) to the smallest anon 3031 * port to get the random port. It should fall into the 3032 * valid anon port range. 3033 */ 3034 if (port < us->us_smallest_anon_port) { 3035 port = us->us_smallest_anon_port + 3036 port % (us->us_largest_anon_port - 3037 us->us_smallest_anon_port); 3038 } 3039 } 3040 3041 retry: 3042 if (port < us->us_smallest_anon_port) 3043 port = us->us_smallest_anon_port; 3044 3045 if (port > us->us_largest_anon_port) { 3046 port = us->us_smallest_anon_port; 3047 if (restart) 3048 return (0); 3049 restart = B_TRUE; 3050 } 3051 3052 if (port < us->us_smallest_nonpriv_port) 3053 port = us->us_smallest_nonpriv_port; 3054 3055 for (i = 0; i < us->us_num_epriv_ports; i++) { 3056 if (port == us->us_epriv_ports[i]) { 3057 port++; 3058 /* 3059 * Make sure that the port is in the 3060 * valid range. 3061 */ 3062 goto retry; 3063 } 3064 } 3065 3066 if (is_system_labeled() && 3067 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 3068 port, IPPROTO_UDP, B_TRUE)) != 0) { 3069 port = nextport; 3070 goto retry; 3071 } 3072 3073 return (port); 3074 } 3075 3076 /* 3077 * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6 3078 * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from 3079 * the TPI options, otherwise we take them from msg_control. 3080 * If both sin and sin6 is set it is a connected socket and we use conn_faddr. 3081 * Always consumes mp; never consumes tudr_mp. 3082 */ 3083 static int 3084 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp, 3085 mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid) 3086 { 3087 udp_t *udp = connp->conn_udp; 3088 udp_stack_t *us = udp->udp_us; 3089 int error; 3090 ip_xmit_attr_t *ixa; 3091 ip_pkt_t *ipp; 3092 in6_addr_t v6src; 3093 in6_addr_t v6dst; 3094 in6_addr_t v6nexthop; 3095 in_port_t dstport; 3096 uint32_t flowinfo; 3097 uint_t srcid; 3098 int is_absreq_failure = 0; 3099 conn_opt_arg_t coas, *coa; 3100 3101 ASSERT(tudr_mp != NULL || msg != NULL); 3102 3103 /* 3104 * Get ixa before checking state to handle a disconnect race. 3105 * 3106 * We need an exclusive copy of conn_ixa since the ancillary data 3107 * options might modify it. That copy has no pointers hence we 3108 * need to set them up once we've parsed the ancillary data. 3109 */ 3110 ixa = conn_get_ixa_exclusive(connp); 3111 if (ixa == NULL) { 3112 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3113 freemsg(mp); 3114 return (ENOMEM); 3115 } 3116 ASSERT(cr != NULL); 3117 ixa->ixa_cred = cr; 3118 ixa->ixa_cpid = pid; 3119 if (is_system_labeled()) { 3120 /* We need to restart with a label based on the cred */ 3121 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 3122 } 3123 3124 /* In case previous destination was multicast or multirt */ 3125 ip_attr_newdst(ixa); 3126 3127 /* Get a copy of conn_xmit_ipp since the options might change it */ 3128 ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP); 3129 if (ipp == NULL) { 3130 ixa_refrele(ixa); 3131 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3132 freemsg(mp); 3133 return (ENOMEM); 3134 } 3135 mutex_enter(&connp->conn_lock); 3136 error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP); 3137 mutex_exit(&connp->conn_lock); 3138 if (error != 0) { 3139 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3140 freemsg(mp); 3141 goto done; 3142 } 3143 3144 /* 3145 * Parse the options and update ixa and ipp as a result. 3146 * Note that ixa_tsl can be updated if SCM_UCRED. 3147 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl. 3148 */ 3149 3150 coa = &coas; 3151 coa->coa_connp = connp; 3152 coa->coa_ixa = ixa; 3153 coa->coa_ipp = ipp; 3154 coa->coa_ancillary = B_TRUE; 3155 coa->coa_changed = 0; 3156 3157 if (msg != NULL) { 3158 error = process_auxiliary_options(connp, msg->msg_control, 3159 msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr); 3160 } else { 3161 struct T_unitdata_req *tudr; 3162 3163 tudr = (struct T_unitdata_req *)tudr_mp->b_rptr; 3164 ASSERT(tudr->PRIM_type == T_UNITDATA_REQ); 3165 error = tpi_optcom_buf(connp->conn_wq, tudr_mp, 3166 &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj, 3167 coa, &is_absreq_failure); 3168 } 3169 if (error != 0) { 3170 /* 3171 * Note: No special action needed in this 3172 * module for "is_absreq_failure" 3173 */ 3174 freemsg(mp); 3175 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3176 goto done; 3177 } 3178 ASSERT(is_absreq_failure == 0); 3179 3180 mutex_enter(&connp->conn_lock); 3181 /* 3182 * If laddr is unspecified then we look at sin6_src_id. 3183 * We will give precedence to a source address set with IPV6_PKTINFO 3184 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 3185 * want ip_attr_connect to select a source (since it can fail) when 3186 * IPV6_PKTINFO is specified. 3187 * If this doesn't result in a source address then we get a source 3188 * from ip_attr_connect() below. 3189 */ 3190 v6src = connp->conn_saddr_v6; 3191 if (sin != NULL) { 3192 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 3193 dstport = sin->sin_port; 3194 flowinfo = 0; 3195 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3196 ixa->ixa_flags |= IXAF_IS_IPV4; 3197 } else if (sin6 != NULL) { 3198 v6dst = sin6->sin6_addr; 3199 dstport = sin6->sin6_port; 3200 flowinfo = sin6->sin6_flowinfo; 3201 srcid = sin6->__sin6_src_id; 3202 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 3203 ixa->ixa_scopeid = sin6->sin6_scope_id; 3204 ixa->ixa_flags |= IXAF_SCOPEID_SET; 3205 } else { 3206 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 3207 } 3208 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 3209 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 3210 connp->conn_netstack); 3211 } 3212 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 3213 ixa->ixa_flags |= IXAF_IS_IPV4; 3214 else 3215 ixa->ixa_flags &= ~IXAF_IS_IPV4; 3216 } else { 3217 /* Connected case */ 3218 v6dst = connp->conn_faddr_v6; 3219 dstport = connp->conn_fport; 3220 flowinfo = connp->conn_flowinfo; 3221 } 3222 mutex_exit(&connp->conn_lock); 3223 3224 /* Handle IPV6_PKTINFO setting source address. */ 3225 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 3226 (ipp->ipp_fields & IPPF_ADDR)) { 3227 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3228 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3229 v6src = ipp->ipp_addr; 3230 } else { 3231 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 3232 v6src = ipp->ipp_addr; 3233 } 3234 } 3235 3236 ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop); 3237 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 3238 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 3239 3240 switch (error) { 3241 case 0: 3242 break; 3243 case EADDRNOTAVAIL: 3244 /* 3245 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3246 * Don't have the application see that errno 3247 */ 3248 error = ENETUNREACH; 3249 goto failed; 3250 case ENETDOWN: 3251 /* 3252 * Have !ipif_addr_ready address; drop packet silently 3253 * until we can get applications to not send until we 3254 * are ready. 3255 */ 3256 error = 0; 3257 goto failed; 3258 case EHOSTUNREACH: 3259 case ENETUNREACH: 3260 if (ixa->ixa_ire != NULL) { 3261 /* 3262 * Let conn_ip_output/ire_send_noroute return 3263 * the error and send any local ICMP error. 3264 */ 3265 error = 0; 3266 break; 3267 } 3268 /* FALLTHRU */ 3269 default: 3270 failed: 3271 freemsg(mp); 3272 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3273 goto done; 3274 } 3275 3276 /* 3277 * We might be going to a different destination than last time, 3278 * thus check that TX allows the communication and compute any 3279 * needed label. 3280 * 3281 * TSOL Note: We have an exclusive ipp and ixa for this thread so we 3282 * don't have to worry about concurrent threads. 3283 */ 3284 if (is_system_labeled()) { 3285 /* Using UDP MLP requires SCM_UCRED from user */ 3286 if (connp->conn_mlp_type != mlptSingle && 3287 !((ixa->ixa_flags & IXAF_UCRED_TSL))) { 3288 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3289 error = ECONNREFUSED; 3290 freemsg(mp); 3291 goto done; 3292 } 3293 /* 3294 * Check whether Trusted Solaris policy allows communication 3295 * with this host, and pretend that the destination is 3296 * unreachable if not. 3297 * Compute any needed label and place it in ipp_label_v4/v6. 3298 * 3299 * Later conn_build_hdr_template/conn_prepend_hdr takes 3300 * ipp_label_v4/v6 to form the packet. 3301 * 3302 * Tsol note: We have ipp structure local to this thread so 3303 * no locking is needed. 3304 */ 3305 error = conn_update_label(connp, ixa, &v6dst, ipp); 3306 if (error != 0) { 3307 freemsg(mp); 3308 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3309 goto done; 3310 } 3311 } 3312 mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport, 3313 flowinfo, mp, &error); 3314 if (mp == NULL) { 3315 ASSERT(error != 0); 3316 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3317 goto done; 3318 } 3319 if (ixa->ixa_pktlen > IP_MAXPACKET) { 3320 error = EMSGSIZE; 3321 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3322 freemsg(mp); 3323 goto done; 3324 } 3325 /* We're done. Pass the packet to ip. */ 3326 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3327 3328 error = conn_ip_output(mp, ixa); 3329 /* No udpOutErrors if an error since IP increases its error counter */ 3330 switch (error) { 3331 case 0: 3332 break; 3333 case EWOULDBLOCK: 3334 (void) ixa_check_drain_insert(connp, ixa); 3335 error = 0; 3336 break; 3337 case EADDRNOTAVAIL: 3338 /* 3339 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3340 * Don't have the application see that errno 3341 */ 3342 error = ENETUNREACH; 3343 /* FALLTHRU */ 3344 default: 3345 mutex_enter(&connp->conn_lock); 3346 /* 3347 * Clear the source and v6lastdst so we call ip_attr_connect 3348 * for the next packet and try to pick a better source. 3349 */ 3350 if (connp->conn_mcbc_bind) 3351 connp->conn_saddr_v6 = ipv6_all_zeros; 3352 else 3353 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3354 connp->conn_v6lastdst = ipv6_all_zeros; 3355 mutex_exit(&connp->conn_lock); 3356 break; 3357 } 3358 done: 3359 ixa_refrele(ixa); 3360 ip_pkt_free(ipp); 3361 kmem_free(ipp, sizeof (*ipp)); 3362 return (error); 3363 } 3364 3365 /* 3366 * Handle sending an M_DATA for a connected socket. 3367 * Handles both IPv4 and IPv6. 3368 */ 3369 static int 3370 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid) 3371 { 3372 udp_t *udp = connp->conn_udp; 3373 udp_stack_t *us = udp->udp_us; 3374 int error; 3375 ip_xmit_attr_t *ixa; 3376 3377 /* 3378 * If no other thread is using conn_ixa this just gets a reference to 3379 * conn_ixa. Otherwise we get a safe copy of conn_ixa. 3380 */ 3381 ixa = conn_get_ixa(connp, B_FALSE); 3382 if (ixa == NULL) { 3383 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3384 freemsg(mp); 3385 return (ENOMEM); 3386 } 3387 3388 ASSERT(cr != NULL); 3389 ixa->ixa_cred = cr; 3390 ixa->ixa_cpid = pid; 3391 3392 mutex_enter(&connp->conn_lock); 3393 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6, 3394 connp->conn_fport, connp->conn_flowinfo, &error); 3395 3396 if (mp == NULL) { 3397 ASSERT(error != 0); 3398 mutex_exit(&connp->conn_lock); 3399 ixa_refrele(ixa); 3400 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3401 freemsg(mp); 3402 return (error); 3403 } 3404 3405 /* 3406 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3407 * safe copy, then we need to fill in any pointers in it. 3408 */ 3409 if (ixa->ixa_ire == NULL) { 3410 in6_addr_t faddr, saddr; 3411 in6_addr_t nexthop; 3412 in_port_t fport; 3413 3414 saddr = connp->conn_saddr_v6; 3415 faddr = connp->conn_faddr_v6; 3416 fport = connp->conn_fport; 3417 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop); 3418 mutex_exit(&connp->conn_lock); 3419 3420 error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop, 3421 fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | 3422 IPDF_IPSEC); 3423 switch (error) { 3424 case 0: 3425 break; 3426 case EADDRNOTAVAIL: 3427 /* 3428 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3429 * Don't have the application see that errno 3430 */ 3431 error = ENETUNREACH; 3432 goto failed; 3433 case ENETDOWN: 3434 /* 3435 * Have !ipif_addr_ready address; drop packet silently 3436 * until we can get applications to not send until we 3437 * are ready. 3438 */ 3439 error = 0; 3440 goto failed; 3441 case EHOSTUNREACH: 3442 case ENETUNREACH: 3443 if (ixa->ixa_ire != NULL) { 3444 /* 3445 * Let conn_ip_output/ire_send_noroute return 3446 * the error and send any local ICMP error. 3447 */ 3448 error = 0; 3449 break; 3450 } 3451 /* FALLTHRU */ 3452 default: 3453 failed: 3454 ixa_refrele(ixa); 3455 freemsg(mp); 3456 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3457 return (error); 3458 } 3459 } else { 3460 /* Done with conn_t */ 3461 mutex_exit(&connp->conn_lock); 3462 } 3463 ASSERT(ixa->ixa_ire != NULL); 3464 3465 /* We're done. Pass the packet to ip. */ 3466 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3467 3468 error = conn_ip_output(mp, ixa); 3469 /* No udpOutErrors if an error since IP increases its error counter */ 3470 switch (error) { 3471 case 0: 3472 break; 3473 case EWOULDBLOCK: 3474 (void) ixa_check_drain_insert(connp, ixa); 3475 error = 0; 3476 break; 3477 case EADDRNOTAVAIL: 3478 /* 3479 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3480 * Don't have the application see that errno 3481 */ 3482 error = ENETUNREACH; 3483 break; 3484 } 3485 ixa_refrele(ixa); 3486 return (error); 3487 } 3488 3489 /* 3490 * Handle sending an M_DATA to the last destination. 3491 * Handles both IPv4 and IPv6. 3492 * 3493 * NOTE: The caller must hold conn_lock and we drop it here. 3494 */ 3495 static int 3496 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid, 3497 ip_xmit_attr_t *ixa) 3498 { 3499 udp_t *udp = connp->conn_udp; 3500 udp_stack_t *us = udp->udp_us; 3501 int error; 3502 3503 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3504 ASSERT(ixa != NULL); 3505 3506 ASSERT(cr != NULL); 3507 ixa->ixa_cred = cr; 3508 ixa->ixa_cpid = pid; 3509 3510 mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc, 3511 connp->conn_lastdstport, connp->conn_lastflowinfo, &error); 3512 3513 if (mp == NULL) { 3514 ASSERT(error != 0); 3515 mutex_exit(&connp->conn_lock); 3516 ixa_refrele(ixa); 3517 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3518 freemsg(mp); 3519 return (error); 3520 } 3521 3522 /* 3523 * In case we got a safe copy of conn_ixa, or if opt_set made us a new 3524 * safe copy, then we need to fill in any pointers in it. 3525 */ 3526 if (ixa->ixa_ire == NULL) { 3527 in6_addr_t lastdst, lastsrc; 3528 in6_addr_t nexthop; 3529 in_port_t lastport; 3530 3531 lastsrc = connp->conn_v6lastsrc; 3532 lastdst = connp->conn_v6lastdst; 3533 lastport = connp->conn_lastdstport; 3534 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop); 3535 mutex_exit(&connp->conn_lock); 3536 3537 error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst, 3538 &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC | 3539 IPDF_VERIFY_DST | IPDF_IPSEC); 3540 switch (error) { 3541 case 0: 3542 break; 3543 case EADDRNOTAVAIL: 3544 /* 3545 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3546 * Don't have the application see that errno 3547 */ 3548 error = ENETUNREACH; 3549 goto failed; 3550 case ENETDOWN: 3551 /* 3552 * Have !ipif_addr_ready address; drop packet silently 3553 * until we can get applications to not send until we 3554 * are ready. 3555 */ 3556 error = 0; 3557 goto failed; 3558 case EHOSTUNREACH: 3559 case ENETUNREACH: 3560 if (ixa->ixa_ire != NULL) { 3561 /* 3562 * Let conn_ip_output/ire_send_noroute return 3563 * the error and send any local ICMP error. 3564 */ 3565 error = 0; 3566 break; 3567 } 3568 /* FALLTHRU */ 3569 default: 3570 failed: 3571 ixa_refrele(ixa); 3572 freemsg(mp); 3573 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3574 return (error); 3575 } 3576 } else { 3577 /* Done with conn_t */ 3578 mutex_exit(&connp->conn_lock); 3579 } 3580 3581 /* We're done. Pass the packet to ip. */ 3582 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 3583 3584 error = conn_ip_output(mp, ixa); 3585 /* No udpOutErrors if an error since IP increases its error counter */ 3586 switch (error) { 3587 case 0: 3588 break; 3589 case EWOULDBLOCK: 3590 (void) ixa_check_drain_insert(connp, ixa); 3591 error = 0; 3592 break; 3593 case EADDRNOTAVAIL: 3594 /* 3595 * IXAF_VERIFY_SOURCE tells us to pick a better source. 3596 * Don't have the application see that errno 3597 */ 3598 error = ENETUNREACH; 3599 /* FALLTHRU */ 3600 default: 3601 mutex_enter(&connp->conn_lock); 3602 /* 3603 * Clear the source and v6lastdst so we call ip_attr_connect 3604 * for the next packet and try to pick a better source. 3605 */ 3606 if (connp->conn_mcbc_bind) 3607 connp->conn_saddr_v6 = ipv6_all_zeros; 3608 else 3609 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 3610 connp->conn_v6lastdst = ipv6_all_zeros; 3611 mutex_exit(&connp->conn_lock); 3612 break; 3613 } 3614 ixa_refrele(ixa); 3615 return (error); 3616 } 3617 3618 3619 /* 3620 * Prepend the header template and then fill in the source and 3621 * flowinfo. The caller needs to handle the destination address since 3622 * it's setting is different if rthdr or source route. 3623 * 3624 * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET. 3625 * When it returns NULL it sets errorp. 3626 */ 3627 static mblk_t * 3628 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp, 3629 const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp) 3630 { 3631 udp_t *udp = connp->conn_udp; 3632 udp_stack_t *us = udp->udp_us; 3633 boolean_t insert_spi = udp->udp_nat_t_endpoint; 3634 uint_t pktlen; 3635 uint_t alloclen; 3636 uint_t copylen; 3637 uint8_t *iph; 3638 uint_t ip_hdr_length; 3639 udpha_t *udpha; 3640 uint32_t cksum; 3641 ip_pkt_t *ipp; 3642 3643 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3644 3645 /* 3646 * Copy the header template and leave space for an SPI 3647 */ 3648 copylen = connp->conn_ht_iphc_len; 3649 alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0); 3650 pktlen = alloclen + msgdsize(mp); 3651 if (pktlen > IP_MAXPACKET) { 3652 freemsg(mp); 3653 *errorp = EMSGSIZE; 3654 return (NULL); 3655 } 3656 ixa->ixa_pktlen = pktlen; 3657 3658 /* check/fix buffer config, setup pointers into it */ 3659 iph = mp->b_rptr - alloclen; 3660 if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) { 3661 mblk_t *mp1; 3662 3663 mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED); 3664 if (mp1 == NULL) { 3665 freemsg(mp); 3666 *errorp = ENOMEM; 3667 return (NULL); 3668 } 3669 mp1->b_wptr = DB_LIM(mp1); 3670 mp1->b_cont = mp; 3671 mp = mp1; 3672 iph = (mp->b_wptr - alloclen); 3673 } 3674 mp->b_rptr = iph; 3675 bcopy(connp->conn_ht_iphc, iph, copylen); 3676 ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc); 3677 3678 ixa->ixa_ip_hdr_length = ip_hdr_length; 3679 udpha = (udpha_t *)(iph + ip_hdr_length); 3680 3681 /* 3682 * Setup header length and prepare for ULP checksum done in IP. 3683 * udp_build_hdr_template has already massaged any routing header 3684 * and placed the result in conn_sum. 3685 * 3686 * We make it easy for IP to include our pseudo header 3687 * by putting our length in uha_checksum. 3688 */ 3689 cksum = pktlen - ip_hdr_length; 3690 udpha->uha_length = htons(cksum); 3691 3692 cksum += connp->conn_sum; 3693 cksum = (cksum >> 16) + (cksum & 0xFFFF); 3694 ASSERT(cksum < 0x10000); 3695 3696 ipp = &connp->conn_xmit_ipp; 3697 if (ixa->ixa_flags & IXAF_IS_IPV4) { 3698 ipha_t *ipha = (ipha_t *)iph; 3699 3700 ipha->ipha_length = htons((uint16_t)pktlen); 3701 3702 /* IP does the checksum if uha_checksum is non-zero */ 3703 if (us->us_do_checksum) 3704 udpha->uha_checksum = htons(cksum); 3705 3706 /* if IP_PKTINFO specified an addres it wins over bind() */ 3707 if ((ipp->ipp_fields & IPPF_ADDR) && 3708 IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3709 ASSERT(ipp->ipp_addr_v4 != INADDR_ANY); 3710 ipha->ipha_src = ipp->ipp_addr_v4; 3711 } else { 3712 IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src); 3713 } 3714 } else { 3715 ip6_t *ip6h = (ip6_t *)iph; 3716 3717 ip6h->ip6_plen = htons((uint16_t)(pktlen - IPV6_HDR_LEN)); 3718 udpha->uha_checksum = htons(cksum); 3719 3720 /* if IP_PKTINFO specified an addres it wins over bind() */ 3721 if ((ipp->ipp_fields & IPPF_ADDR) && 3722 !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) { 3723 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr)); 3724 ip6h->ip6_src = ipp->ipp_addr; 3725 } else { 3726 ip6h->ip6_src = *v6src; 3727 } 3728 ip6h->ip6_vcf = 3729 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 3730 (flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 3731 if (ipp->ipp_fields & IPPF_TCLASS) { 3732 /* Overrides the class part of flowinfo */ 3733 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 3734 ipp->ipp_tclass); 3735 } 3736 } 3737 3738 /* Insert all-0s SPI now. */ 3739 if (insert_spi) 3740 *((uint32_t *)(udpha + 1)) = 0; 3741 3742 udpha->uha_dst_port = dstport; 3743 return (mp); 3744 } 3745 3746 /* 3747 * Send a T_UDERR_IND in response to an M_DATA 3748 */ 3749 static void 3750 udp_ud_err_connected(conn_t *connp, t_scalar_t error) 3751 { 3752 struct sockaddr_storage ss; 3753 sin_t *sin; 3754 sin6_t *sin6; 3755 struct sockaddr *addr; 3756 socklen_t addrlen; 3757 mblk_t *mp1; 3758 3759 mutex_enter(&connp->conn_lock); 3760 /* Initialize addr and addrlen as if they're passed in */ 3761 if (connp->conn_family == AF_INET) { 3762 sin = (sin_t *)&ss; 3763 *sin = sin_null; 3764 sin->sin_family = AF_INET; 3765 sin->sin_port = connp->conn_fport; 3766 sin->sin_addr.s_addr = connp->conn_faddr_v4; 3767 addr = (struct sockaddr *)sin; 3768 addrlen = sizeof (*sin); 3769 } else { 3770 sin6 = (sin6_t *)&ss; 3771 *sin6 = sin6_null; 3772 sin6->sin6_family = AF_INET6; 3773 sin6->sin6_port = connp->conn_fport; 3774 sin6->sin6_flowinfo = connp->conn_flowinfo; 3775 sin6->sin6_addr = connp->conn_faddr_v6; 3776 if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) && 3777 (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) { 3778 sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid; 3779 } else { 3780 sin6->sin6_scope_id = 0; 3781 } 3782 sin6->__sin6_src_id = 0; 3783 addr = (struct sockaddr *)sin6; 3784 addrlen = sizeof (*sin6); 3785 } 3786 mutex_exit(&connp->conn_lock); 3787 3788 mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error); 3789 if (mp1 != NULL) 3790 putnext(connp->conn_rq, mp1); 3791 } 3792 3793 /* 3794 * This routine handles all messages passed downstream. It either 3795 * consumes the message or passes it downstream; it never queues a 3796 * a message. 3797 * 3798 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 3799 * is valid when we are directly beneath the stream head, and thus sockfs 3800 * is able to bypass STREAMS and directly call us, passing along the sockaddr 3801 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 3802 * connected endpoints. 3803 */ 3804 void 3805 udp_wput(queue_t *q, mblk_t *mp) 3806 { 3807 sin6_t *sin6; 3808 sin_t *sin = NULL; 3809 uint_t srcid; 3810 conn_t *connp = Q_TO_CONN(q); 3811 udp_t *udp = connp->conn_udp; 3812 int error = 0; 3813 struct sockaddr *addr = NULL; 3814 socklen_t addrlen; 3815 udp_stack_t *us = udp->udp_us; 3816 struct T_unitdata_req *tudr; 3817 mblk_t *data_mp; 3818 ushort_t ipversion; 3819 cred_t *cr; 3820 pid_t pid; 3821 3822 /* 3823 * We directly handle several cases here: T_UNITDATA_REQ message 3824 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 3825 * socket. 3826 */ 3827 switch (DB_TYPE(mp)) { 3828 case M_DATA: 3829 if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) { 3830 /* Not connected; address is required */ 3831 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3832 UDP_DBGSTAT(us, udp_data_notconn); 3833 UDP_STAT(us, udp_out_err_notconn); 3834 freemsg(mp); 3835 return; 3836 } 3837 /* 3838 * All Solaris components should pass a db_credp 3839 * for this message, hence we ASSERT. 3840 * On production kernels we return an error to be robust against 3841 * random streams modules sitting on top of us. 3842 */ 3843 cr = msg_getcred(mp, &pid); 3844 ASSERT(cr != NULL); 3845 if (cr == NULL) { 3846 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 3847 freemsg(mp); 3848 return; 3849 } 3850 ASSERT(udp->udp_issocket); 3851 UDP_DBGSTAT(us, udp_data_conn); 3852 error = udp_output_connected(connp, mp, cr, pid); 3853 if (error != 0) { 3854 UDP_STAT(us, udp_out_err_output); 3855 if (connp->conn_rq != NULL) 3856 udp_ud_err_connected(connp, (t_scalar_t)error); 3857 #ifdef DEBUG 3858 printf("udp_output_connected returned %d\n", error); 3859 #endif 3860 } 3861 return; 3862 3863 case M_PROTO: 3864 case M_PCPROTO: 3865 tudr = (struct T_unitdata_req *)mp->b_rptr; 3866 if (MBLKL(mp) < sizeof (*tudr) || 3867 ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) { 3868 udp_wput_other(q, mp); 3869 return; 3870 } 3871 break; 3872 3873 default: 3874 udp_wput_other(q, mp); 3875 return; 3876 } 3877 3878 /* Handle valid T_UNITDATA_REQ here */ 3879 data_mp = mp->b_cont; 3880 if (data_mp == NULL) { 3881 error = EPROTO; 3882 goto ud_error2; 3883 } 3884 mp->b_cont = NULL; 3885 3886 if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) { 3887 error = EADDRNOTAVAIL; 3888 goto ud_error2; 3889 } 3890 3891 /* 3892 * All Solaris components should pass a db_credp 3893 * for this TPI message, hence we should ASSERT. 3894 * However, RPC (svc_clts_ksend) does this odd thing where it 3895 * passes the options from a T_UNITDATA_IND unchanged in a 3896 * T_UNITDATA_REQ. While that is the right thing to do for 3897 * some options, SCM_UCRED being the key one, this also makes it 3898 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 3899 */ 3900 cr = msg_getcred(mp, &pid); 3901 if (cr == NULL) { 3902 cr = connp->conn_cred; 3903 pid = connp->conn_cpid; 3904 } 3905 3906 /* 3907 * If a port has not been bound to the stream, fail. 3908 * This is not a problem when sockfs is directly 3909 * above us, because it will ensure that the socket 3910 * is first bound before allowing data to be sent. 3911 */ 3912 if (udp->udp_state == TS_UNBND) { 3913 error = EPROTO; 3914 goto ud_error2; 3915 } 3916 addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset]; 3917 addrlen = tudr->DEST_length; 3918 3919 switch (connp->conn_family) { 3920 case AF_INET6: 3921 sin6 = (sin6_t *)addr; 3922 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 3923 (sin6->sin6_family != AF_INET6)) { 3924 error = EADDRNOTAVAIL; 3925 goto ud_error2; 3926 } 3927 3928 srcid = sin6->__sin6_src_id; 3929 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 3930 /* 3931 * Destination is a non-IPv4-compatible IPv6 address. 3932 * Send out an IPv6 format packet. 3933 */ 3934 3935 /* 3936 * If the local address is a mapped address return 3937 * an error. 3938 * It would be possible to send an IPv6 packet but the 3939 * response would never make it back to the application 3940 * since it is bound to a mapped address. 3941 */ 3942 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 3943 error = EADDRNOTAVAIL; 3944 goto ud_error2; 3945 } 3946 3947 UDP_DBGSTAT(us, udp_out_ipv6); 3948 3949 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 3950 sin6->sin6_addr = ipv6_loopback; 3951 ipversion = IPV6_VERSION; 3952 } else { 3953 if (connp->conn_ipv6_v6only) { 3954 error = EADDRNOTAVAIL; 3955 goto ud_error2; 3956 } 3957 3958 /* 3959 * If the local address is not zero or a mapped address 3960 * return an error. It would be possible to send an 3961 * IPv4 packet but the response would never make it 3962 * back to the application since it is bound to a 3963 * non-mapped address. 3964 */ 3965 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 3966 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 3967 error = EADDRNOTAVAIL; 3968 goto ud_error2; 3969 } 3970 UDP_DBGSTAT(us, udp_out_mapped); 3971 3972 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 3973 V4_PART_OF_V6(sin6->sin6_addr) = 3974 htonl(INADDR_LOOPBACK); 3975 } 3976 ipversion = IPV4_VERSION; 3977 } 3978 3979 if (tudr->OPT_length != 0) { 3980 /* 3981 * If we are connected then the destination needs to be 3982 * the same as the connected one. 3983 */ 3984 if (udp->udp_state == TS_DATA_XFER && 3985 !conn_same_as_last_v6(connp, sin6)) { 3986 error = EISCONN; 3987 goto ud_error2; 3988 } 3989 UDP_STAT(us, udp_out_opt); 3990 error = udp_output_ancillary(connp, NULL, sin6, 3991 data_mp, mp, NULL, cr, pid); 3992 } else { 3993 ip_xmit_attr_t *ixa; 3994 3995 /* 3996 * We have to allocate an ip_xmit_attr_t before we grab 3997 * conn_lock and we need to hold conn_lock once we've 3998 * checked conn_same_as_last_v6 to handle concurrent 3999 * send* calls on a socket. 4000 */ 4001 ixa = conn_get_ixa(connp, B_FALSE); 4002 if (ixa == NULL) { 4003 error = ENOMEM; 4004 goto ud_error2; 4005 } 4006 mutex_enter(&connp->conn_lock); 4007 4008 if (conn_same_as_last_v6(connp, sin6) && 4009 connp->conn_lastsrcid == srcid && 4010 ipsec_outbound_policy_current(ixa)) { 4011 UDP_DBGSTAT(us, udp_out_lastdst); 4012 /* udp_output_lastdst drops conn_lock */ 4013 error = udp_output_lastdst(connp, data_mp, cr, 4014 pid, ixa); 4015 } else { 4016 UDP_DBGSTAT(us, udp_out_diffdst); 4017 /* udp_output_newdst drops conn_lock */ 4018 error = udp_output_newdst(connp, data_mp, NULL, 4019 sin6, ipversion, cr, pid, ixa); 4020 } 4021 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4022 } 4023 if (error == 0) { 4024 freeb(mp); 4025 return; 4026 } 4027 break; 4028 4029 case AF_INET: 4030 sin = (sin_t *)addr; 4031 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 4032 (sin->sin_family != AF_INET)) { 4033 error = EADDRNOTAVAIL; 4034 goto ud_error2; 4035 } 4036 UDP_DBGSTAT(us, udp_out_ipv4); 4037 if (sin->sin_addr.s_addr == INADDR_ANY) 4038 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 4039 ipversion = IPV4_VERSION; 4040 4041 srcid = 0; 4042 if (tudr->OPT_length != 0) { 4043 /* 4044 * If we are connected then the destination needs to be 4045 * the same as the connected one. 4046 */ 4047 if (udp->udp_state == TS_DATA_XFER && 4048 !conn_same_as_last_v4(connp, sin)) { 4049 error = EISCONN; 4050 goto ud_error2; 4051 } 4052 UDP_STAT(us, udp_out_opt); 4053 error = udp_output_ancillary(connp, sin, NULL, 4054 data_mp, mp, NULL, cr, pid); 4055 } else { 4056 ip_xmit_attr_t *ixa; 4057 4058 /* 4059 * We have to allocate an ip_xmit_attr_t before we grab 4060 * conn_lock and we need to hold conn_lock once we've 4061 * checked conn_same_as_last_v4 to handle concurrent 4062 * send* calls on a socket. 4063 */ 4064 ixa = conn_get_ixa(connp, B_FALSE); 4065 if (ixa == NULL) { 4066 error = ENOMEM; 4067 goto ud_error2; 4068 } 4069 mutex_enter(&connp->conn_lock); 4070 4071 if (conn_same_as_last_v4(connp, sin) && 4072 ipsec_outbound_policy_current(ixa)) { 4073 UDP_DBGSTAT(us, udp_out_lastdst); 4074 /* udp_output_lastdst drops conn_lock */ 4075 error = udp_output_lastdst(connp, data_mp, cr, 4076 pid, ixa); 4077 } else { 4078 UDP_DBGSTAT(us, udp_out_diffdst); 4079 /* udp_output_newdst drops conn_lock */ 4080 error = udp_output_newdst(connp, data_mp, sin, 4081 NULL, ipversion, cr, pid, ixa); 4082 } 4083 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 4084 } 4085 if (error == 0) { 4086 freeb(mp); 4087 return; 4088 } 4089 break; 4090 } 4091 UDP_STAT(us, udp_out_err_output); 4092 ASSERT(mp != NULL); 4093 /* mp is freed by the following routine */ 4094 udp_ud_err(q, mp, (t_scalar_t)error); 4095 return; 4096 4097 ud_error2: 4098 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4099 freemsg(data_mp); 4100 UDP_STAT(us, udp_out_err_output); 4101 ASSERT(mp != NULL); 4102 /* mp is freed by the following routine */ 4103 udp_ud_err(q, mp, (t_scalar_t)error); 4104 } 4105 4106 /* 4107 * Handle the case of the IP address, port, flow label being different 4108 * for both IPv4 and IPv6. 4109 * 4110 * NOTE: The caller must hold conn_lock and we drop it here. 4111 */ 4112 static int 4113 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6, 4114 ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa) 4115 { 4116 uint_t srcid; 4117 uint32_t flowinfo; 4118 udp_t *udp = connp->conn_udp; 4119 int error = 0; 4120 ip_xmit_attr_t *oldixa; 4121 udp_stack_t *us = udp->udp_us; 4122 in6_addr_t v6src; 4123 in6_addr_t v6dst; 4124 in6_addr_t v6nexthop; 4125 in_port_t dstport; 4126 4127 ASSERT(MUTEX_HELD(&connp->conn_lock)); 4128 ASSERT(ixa != NULL); 4129 /* 4130 * We hold conn_lock across all the use and modifications of 4131 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they 4132 * stay consistent. 4133 */ 4134 4135 ASSERT(cr != NULL); 4136 ixa->ixa_cred = cr; 4137 ixa->ixa_cpid = pid; 4138 if (is_system_labeled()) { 4139 /* We need to restart with a label based on the cred */ 4140 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 4141 } 4142 4143 /* 4144 * If we are connected then the destination needs to be the 4145 * same as the connected one, which is not the case here since we 4146 * checked for that above. 4147 */ 4148 if (udp->udp_state == TS_DATA_XFER) { 4149 mutex_exit(&connp->conn_lock); 4150 error = EISCONN; 4151 goto ud_error; 4152 } 4153 4154 /* In case previous destination was multicast or multirt */ 4155 ip_attr_newdst(ixa); 4156 4157 /* 4158 * If laddr is unspecified then we look at sin6_src_id. 4159 * We will give precedence to a source address set with IPV6_PKTINFO 4160 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't 4161 * want ip_attr_connect to select a source (since it can fail) when 4162 * IPV6_PKTINFO is specified. 4163 * If this doesn't result in a source address then we get a source 4164 * from ip_attr_connect() below. 4165 */ 4166 v6src = connp->conn_saddr_v6; 4167 if (sin != NULL) { 4168 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst); 4169 dstport = sin->sin_port; 4170 flowinfo = 0; 4171 srcid = 0; 4172 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4173 if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) { 4174 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4175 connp->conn_netstack); 4176 } 4177 ixa->ixa_flags |= IXAF_IS_IPV4; 4178 } else { 4179 v6dst = sin6->sin6_addr; 4180 dstport = sin6->sin6_port; 4181 flowinfo = sin6->sin6_flowinfo; 4182 srcid = sin6->__sin6_src_id; 4183 if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) { 4184 ixa->ixa_scopeid = sin6->sin6_scope_id; 4185 ixa->ixa_flags |= IXAF_SCOPEID_SET; 4186 } else { 4187 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 4188 } 4189 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 4190 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 4191 connp->conn_netstack); 4192 } 4193 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) 4194 ixa->ixa_flags |= IXAF_IS_IPV4; 4195 else 4196 ixa->ixa_flags &= ~IXAF_IS_IPV4; 4197 } 4198 /* Handle IPV6_PKTINFO setting source address. */ 4199 if (IN6_IS_ADDR_UNSPECIFIED(&v6src) && 4200 (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) { 4201 ip_pkt_t *ipp = &connp->conn_xmit_ipp; 4202 4203 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4204 if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4205 v6src = ipp->ipp_addr; 4206 } else { 4207 if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) 4208 v6src = ipp->ipp_addr; 4209 } 4210 } 4211 4212 ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop); 4213 mutex_exit(&connp->conn_lock); 4214 4215 error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport, 4216 &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC); 4217 switch (error) { 4218 case 0: 4219 break; 4220 case EADDRNOTAVAIL: 4221 /* 4222 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4223 * Don't have the application see that errno 4224 */ 4225 error = ENETUNREACH; 4226 goto failed; 4227 case ENETDOWN: 4228 /* 4229 * Have !ipif_addr_ready address; drop packet silently 4230 * until we can get applications to not send until we 4231 * are ready. 4232 */ 4233 error = 0; 4234 goto failed; 4235 case EHOSTUNREACH: 4236 case ENETUNREACH: 4237 if (ixa->ixa_ire != NULL) { 4238 /* 4239 * Let conn_ip_output/ire_send_noroute return 4240 * the error and send any local ICMP error. 4241 */ 4242 error = 0; 4243 break; 4244 } 4245 /* FALLTHRU */ 4246 failed: 4247 default: 4248 goto ud_error; 4249 } 4250 4251 4252 /* 4253 * Cluster note: we let the cluster hook know that we are sending to a 4254 * new address and/or port. 4255 */ 4256 if (cl_inet_connect2 != NULL) { 4257 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 4258 if (error != 0) { 4259 error = EHOSTUNREACH; 4260 goto ud_error; 4261 } 4262 } 4263 4264 mutex_enter(&connp->conn_lock); 4265 /* 4266 * While we dropped the lock some other thread might have connected 4267 * this socket. If so we bail out with EISCONN to ensure that the 4268 * connecting thread is the one that updates conn_ixa, conn_ht_* 4269 * and conn_*last*. 4270 */ 4271 if (udp->udp_state == TS_DATA_XFER) { 4272 mutex_exit(&connp->conn_lock); 4273 error = EISCONN; 4274 goto ud_error; 4275 } 4276 4277 /* 4278 * We need to rebuild the headers if 4279 * - we are labeling packets (could be different for different 4280 * destinations) 4281 * - we have a source route (or routing header) since we need to 4282 * massage that to get the pseudo-header checksum 4283 * - the IP version is different than the last time 4284 * - a socket option with COA_HEADER_CHANGED has been set which 4285 * set conn_v6lastdst to zero. 4286 * 4287 * Otherwise the prepend function will just update the src, dst, 4288 * dstport, and flow label. 4289 */ 4290 if (is_system_labeled()) { 4291 /* TX MLP requires SCM_UCRED and don't have that here */ 4292 if (connp->conn_mlp_type != mlptSingle) { 4293 mutex_exit(&connp->conn_lock); 4294 error = ECONNREFUSED; 4295 goto ud_error; 4296 } 4297 /* 4298 * Check whether Trusted Solaris policy allows communication 4299 * with this host, and pretend that the destination is 4300 * unreachable if not. 4301 * Compute any needed label and place it in ipp_label_v4/v6. 4302 * 4303 * Later conn_build_hdr_template/conn_prepend_hdr takes 4304 * ipp_label_v4/v6 to form the packet. 4305 * 4306 * Tsol note: Since we hold conn_lock we know no other 4307 * thread manipulates conn_xmit_ipp. 4308 */ 4309 error = conn_update_label(connp, ixa, &v6dst, 4310 &connp->conn_xmit_ipp); 4311 if (error != 0) { 4312 mutex_exit(&connp->conn_lock); 4313 goto ud_error; 4314 } 4315 /* Rebuild the header template */ 4316 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4317 flowinfo); 4318 if (error != 0) { 4319 mutex_exit(&connp->conn_lock); 4320 goto ud_error; 4321 } 4322 } else if ((connp->conn_xmit_ipp.ipp_fields & 4323 (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) || 4324 ipversion != connp->conn_lastipversion || 4325 IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) { 4326 /* Rebuild the header template */ 4327 error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport, 4328 flowinfo); 4329 if (error != 0) { 4330 mutex_exit(&connp->conn_lock); 4331 goto ud_error; 4332 } 4333 } else { 4334 /* Simply update the destination address if no source route */ 4335 if (ixa->ixa_flags & IXAF_IS_IPV4) { 4336 ipha_t *ipha = (ipha_t *)connp->conn_ht_iphc; 4337 4338 IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst); 4339 if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) { 4340 ipha->ipha_fragment_offset_and_flags |= 4341 IPH_DF_HTONS; 4342 } else { 4343 ipha->ipha_fragment_offset_and_flags &= 4344 ~IPH_DF_HTONS; 4345 } 4346 } else { 4347 ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc; 4348 ip6h->ip6_dst = v6dst; 4349 } 4350 } 4351 4352 /* 4353 * Remember the dst/dstport etc which corresponds to the built header 4354 * template and conn_ixa. 4355 */ 4356 oldixa = conn_replace_ixa(connp, ixa); 4357 connp->conn_v6lastdst = v6dst; 4358 connp->conn_lastipversion = ipversion; 4359 connp->conn_lastdstport = dstport; 4360 connp->conn_lastflowinfo = flowinfo; 4361 connp->conn_lastscopeid = ixa->ixa_scopeid; 4362 connp->conn_lastsrcid = srcid; 4363 /* Also remember a source to use together with lastdst */ 4364 connp->conn_v6lastsrc = v6src; 4365 4366 data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src, 4367 dstport, flowinfo, &error); 4368 4369 /* Done with conn_t */ 4370 mutex_exit(&connp->conn_lock); 4371 ixa_refrele(oldixa); 4372 4373 if (data_mp == NULL) { 4374 ASSERT(error != 0); 4375 goto ud_error; 4376 } 4377 4378 /* We're done. Pass the packet to ip. */ 4379 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 4380 4381 error = conn_ip_output(data_mp, ixa); 4382 /* No udpOutErrors if an error since IP increases its error counter */ 4383 switch (error) { 4384 case 0: 4385 break; 4386 case EWOULDBLOCK: 4387 (void) ixa_check_drain_insert(connp, ixa); 4388 error = 0; 4389 break; 4390 case EADDRNOTAVAIL: 4391 /* 4392 * IXAF_VERIFY_SOURCE tells us to pick a better source. 4393 * Don't have the application see that errno 4394 */ 4395 error = ENETUNREACH; 4396 /* FALLTHRU */ 4397 default: 4398 mutex_enter(&connp->conn_lock); 4399 /* 4400 * Clear the source and v6lastdst so we call ip_attr_connect 4401 * for the next packet and try to pick a better source. 4402 */ 4403 if (connp->conn_mcbc_bind) 4404 connp->conn_saddr_v6 = ipv6_all_zeros; 4405 else 4406 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 4407 connp->conn_v6lastdst = ipv6_all_zeros; 4408 mutex_exit(&connp->conn_lock); 4409 break; 4410 } 4411 ixa_refrele(ixa); 4412 return (error); 4413 4414 ud_error: 4415 if (ixa != NULL) 4416 ixa_refrele(ixa); 4417 4418 freemsg(data_mp); 4419 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 4420 UDP_STAT(us, udp_out_err_output); 4421 return (error); 4422 } 4423 4424 /* ARGSUSED */ 4425 static void 4426 udp_wput_fallback(queue_t *wq, mblk_t *mp) 4427 { 4428 #ifdef DEBUG 4429 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 4430 #endif 4431 freemsg(mp); 4432 } 4433 4434 4435 /* 4436 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 4437 */ 4438 static void 4439 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 4440 { 4441 void *data; 4442 mblk_t *datamp = mp->b_cont; 4443 conn_t *connp = Q_TO_CONN(q); 4444 udp_t *udp = connp->conn_udp; 4445 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 4446 4447 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 4448 cmdp->cb_error = EPROTO; 4449 qreply(q, mp); 4450 return; 4451 } 4452 data = datamp->b_rptr; 4453 4454 mutex_enter(&connp->conn_lock); 4455 switch (cmdp->cb_cmd) { 4456 case TI_GETPEERNAME: 4457 if (udp->udp_state != TS_DATA_XFER) 4458 cmdp->cb_error = ENOTCONN; 4459 else 4460 cmdp->cb_error = conn_getpeername(connp, data, 4461 &cmdp->cb_len); 4462 break; 4463 case TI_GETMYNAME: 4464 cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len); 4465 break; 4466 default: 4467 cmdp->cb_error = EINVAL; 4468 break; 4469 } 4470 mutex_exit(&connp->conn_lock); 4471 4472 qreply(q, mp); 4473 } 4474 4475 static void 4476 udp_use_pure_tpi(udp_t *udp) 4477 { 4478 conn_t *connp = udp->udp_connp; 4479 4480 mutex_enter(&connp->conn_lock); 4481 udp->udp_issocket = B_FALSE; 4482 mutex_exit(&connp->conn_lock); 4483 UDP_STAT(udp->udp_us, udp_sock_fallback); 4484 } 4485 4486 static void 4487 udp_wput_other(queue_t *q, mblk_t *mp) 4488 { 4489 uchar_t *rptr = mp->b_rptr; 4490 struct iocblk *iocp; 4491 conn_t *connp = Q_TO_CONN(q); 4492 udp_t *udp = connp->conn_udp; 4493 udp_stack_t *us = udp->udp_us; 4494 cred_t *cr; 4495 4496 switch (mp->b_datap->db_type) { 4497 case M_CMD: 4498 udp_wput_cmdblk(q, mp); 4499 return; 4500 4501 case M_PROTO: 4502 case M_PCPROTO: 4503 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4504 /* 4505 * If the message does not contain a PRIM_type, 4506 * throw it away. 4507 */ 4508 freemsg(mp); 4509 return; 4510 } 4511 switch (((t_primp_t)rptr)->type) { 4512 case T_ADDR_REQ: 4513 udp_addr_req(q, mp); 4514 return; 4515 case O_T_BIND_REQ: 4516 case T_BIND_REQ: 4517 udp_tpi_bind(q, mp); 4518 return; 4519 case T_CONN_REQ: 4520 udp_tpi_connect(q, mp); 4521 return; 4522 case T_CAPABILITY_REQ: 4523 udp_capability_req(q, mp); 4524 return; 4525 case T_INFO_REQ: 4526 udp_info_req(q, mp); 4527 return; 4528 case T_UNITDATA_REQ: 4529 /* 4530 * If a T_UNITDATA_REQ gets here, the address must 4531 * be bad. Valid T_UNITDATA_REQs are handled 4532 * in udp_wput. 4533 */ 4534 udp_ud_err(q, mp, EADDRNOTAVAIL); 4535 return; 4536 case T_UNBIND_REQ: 4537 udp_tpi_unbind(q, mp); 4538 return; 4539 case T_SVR4_OPTMGMT_REQ: 4540 /* 4541 * All Solaris components should pass a db_credp 4542 * for this TPI message, hence we ASSERT. 4543 * But in case there is some other M_PROTO that looks 4544 * like a TPI message sent by some other kernel 4545 * component, we check and return an error. 4546 */ 4547 cr = msg_getcred(mp, NULL); 4548 ASSERT(cr != NULL); 4549 if (cr == NULL) { 4550 udp_err_ack(q, mp, TSYSERR, EINVAL); 4551 return; 4552 } 4553 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 4554 cr)) { 4555 svr4_optcom_req(q, mp, cr, &udp_opt_obj); 4556 } 4557 return; 4558 4559 case T_OPTMGMT_REQ: 4560 /* 4561 * All Solaris components should pass a db_credp 4562 * for this TPI message, hence we ASSERT. 4563 * But in case there is some other M_PROTO that looks 4564 * like a TPI message sent by some other kernel 4565 * component, we check and return an error. 4566 */ 4567 cr = msg_getcred(mp, NULL); 4568 ASSERT(cr != NULL); 4569 if (cr == NULL) { 4570 udp_err_ack(q, mp, TSYSERR, EINVAL); 4571 return; 4572 } 4573 tpi_optcom_req(q, mp, cr, &udp_opt_obj); 4574 return; 4575 4576 case T_DISCON_REQ: 4577 udp_tpi_disconnect(q, mp); 4578 return; 4579 4580 /* The following TPI message is not supported by udp. */ 4581 case O_T_CONN_RES: 4582 case T_CONN_RES: 4583 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4584 return; 4585 4586 /* The following 3 TPI requests are illegal for udp. */ 4587 case T_DATA_REQ: 4588 case T_EXDATA_REQ: 4589 case T_ORDREL_REQ: 4590 udp_err_ack(q, mp, TNOTSUPPORT, 0); 4591 return; 4592 default: 4593 break; 4594 } 4595 break; 4596 case M_FLUSH: 4597 if (*rptr & FLUSHW) 4598 flushq(q, FLUSHDATA); 4599 break; 4600 case M_IOCTL: 4601 iocp = (struct iocblk *)mp->b_rptr; 4602 switch (iocp->ioc_cmd) { 4603 case TI_GETPEERNAME: 4604 if (udp->udp_state != TS_DATA_XFER) { 4605 /* 4606 * If a default destination address has not 4607 * been associated with the stream, then we 4608 * don't know the peer's name. 4609 */ 4610 iocp->ioc_error = ENOTCONN; 4611 iocp->ioc_count = 0; 4612 mp->b_datap->db_type = M_IOCACK; 4613 qreply(q, mp); 4614 return; 4615 } 4616 /* FALLTHRU */ 4617 case TI_GETMYNAME: 4618 /* 4619 * For TI_GETPEERNAME and TI_GETMYNAME, we first 4620 * need to copyin the user's strbuf structure. 4621 * Processing will continue in the M_IOCDATA case 4622 * below. 4623 */ 4624 mi_copyin(q, mp, NULL, 4625 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 4626 return; 4627 case ND_SET: 4628 /* nd_getset performs the necessary checking */ 4629 case ND_GET: 4630 if (nd_getset(q, us->us_nd, mp)) { 4631 qreply(q, mp); 4632 return; 4633 } 4634 break; 4635 case _SIOCSOCKFALLBACK: 4636 /* 4637 * Either sockmod is about to be popped and the 4638 * socket would now be treated as a plain stream, 4639 * or a module is about to be pushed so we have 4640 * to follow pure TPI semantics. 4641 */ 4642 if (!udp->udp_issocket) { 4643 DB_TYPE(mp) = M_IOCNAK; 4644 iocp->ioc_error = EINVAL; 4645 } else { 4646 udp_use_pure_tpi(udp); 4647 4648 DB_TYPE(mp) = M_IOCACK; 4649 iocp->ioc_error = 0; 4650 } 4651 iocp->ioc_count = 0; 4652 iocp->ioc_rval = 0; 4653 qreply(q, mp); 4654 return; 4655 default: 4656 break; 4657 } 4658 break; 4659 case M_IOCDATA: 4660 udp_wput_iocdata(q, mp); 4661 return; 4662 default: 4663 /* Unrecognized messages are passed through without change. */ 4664 break; 4665 } 4666 ip_wput_nondata(q, mp); 4667 } 4668 4669 /* 4670 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 4671 * messages. 4672 */ 4673 static void 4674 udp_wput_iocdata(queue_t *q, mblk_t *mp) 4675 { 4676 mblk_t *mp1; 4677 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 4678 STRUCT_HANDLE(strbuf, sb); 4679 uint_t addrlen; 4680 conn_t *connp = Q_TO_CONN(q); 4681 udp_t *udp = connp->conn_udp; 4682 4683 /* Make sure it is one of ours. */ 4684 switch (iocp->ioc_cmd) { 4685 case TI_GETMYNAME: 4686 case TI_GETPEERNAME: 4687 break; 4688 default: 4689 ip_wput_nondata(q, mp); 4690 return; 4691 } 4692 4693 switch (mi_copy_state(q, mp, &mp1)) { 4694 case -1: 4695 return; 4696 case MI_COPY_CASE(MI_COPY_IN, 1): 4697 break; 4698 case MI_COPY_CASE(MI_COPY_OUT, 1): 4699 /* 4700 * The address has been copied out, so now 4701 * copyout the strbuf. 4702 */ 4703 mi_copyout(q, mp); 4704 return; 4705 case MI_COPY_CASE(MI_COPY_OUT, 2): 4706 /* 4707 * The address and strbuf have been copied out. 4708 * We're done, so just acknowledge the original 4709 * M_IOCTL. 4710 */ 4711 mi_copy_done(q, mp, 0); 4712 return; 4713 default: 4714 /* 4715 * Something strange has happened, so acknowledge 4716 * the original M_IOCTL with an EPROTO error. 4717 */ 4718 mi_copy_done(q, mp, EPROTO); 4719 return; 4720 } 4721 4722 /* 4723 * Now we have the strbuf structure for TI_GETMYNAME 4724 * and TI_GETPEERNAME. Next we copyout the requested 4725 * address and then we'll copyout the strbuf. 4726 */ 4727 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 4728 4729 if (connp->conn_family == AF_INET) 4730 addrlen = sizeof (sin_t); 4731 else 4732 addrlen = sizeof (sin6_t); 4733 4734 if (STRUCT_FGET(sb, maxlen) < addrlen) { 4735 mi_copy_done(q, mp, EINVAL); 4736 return; 4737 } 4738 4739 switch (iocp->ioc_cmd) { 4740 case TI_GETMYNAME: 4741 break; 4742 case TI_GETPEERNAME: 4743 if (udp->udp_state != TS_DATA_XFER) { 4744 mi_copy_done(q, mp, ENOTCONN); 4745 return; 4746 } 4747 break; 4748 } 4749 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 4750 if (!mp1) 4751 return; 4752 4753 STRUCT_FSET(sb, len, addrlen); 4754 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 4755 case TI_GETMYNAME: 4756 (void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr, 4757 &addrlen); 4758 break; 4759 case TI_GETPEERNAME: 4760 (void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr, 4761 &addrlen); 4762 break; 4763 } 4764 mp1->b_wptr += addrlen; 4765 /* Copy out the address */ 4766 mi_copyout(q, mp); 4767 } 4768 4769 void 4770 udp_ddi_g_init(void) 4771 { 4772 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 4773 udp_opt_obj.odb_opt_arr_cnt); 4774 4775 /* 4776 * We want to be informed each time a stack is created or 4777 * destroyed in the kernel, so we can maintain the 4778 * set of udp_stack_t's. 4779 */ 4780 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 4781 } 4782 4783 void 4784 udp_ddi_g_destroy(void) 4785 { 4786 netstack_unregister(NS_UDP); 4787 } 4788 4789 #define INET_NAME "ip" 4790 4791 /* 4792 * Initialize the UDP stack instance. 4793 */ 4794 static void * 4795 udp_stack_init(netstackid_t stackid, netstack_t *ns) 4796 { 4797 udp_stack_t *us; 4798 udpparam_t *pa; 4799 int i; 4800 int error = 0; 4801 major_t major; 4802 4803 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 4804 us->us_netstack = ns; 4805 4806 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 4807 us->us_epriv_ports[0] = 2049; 4808 us->us_epriv_ports[1] = 4045; 4809 4810 /* 4811 * The smallest anonymous port in the priviledged port range which UDP 4812 * looks for free port. Use in the option UDP_ANONPRIVBIND. 4813 */ 4814 us->us_min_anonpriv_port = 512; 4815 4816 us->us_bind_fanout_size = udp_bind_fanout_size; 4817 4818 /* Roundup variable that might have been modified in /etc/system */ 4819 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 4820 /* Not a power of two. Round up to nearest power of two */ 4821 for (i = 0; i < 31; i++) { 4822 if (us->us_bind_fanout_size < (1 << i)) 4823 break; 4824 } 4825 us->us_bind_fanout_size = 1 << i; 4826 } 4827 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 4828 sizeof (udp_fanout_t), KM_SLEEP); 4829 for (i = 0; i < us->us_bind_fanout_size; i++) { 4830 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 4831 NULL); 4832 } 4833 4834 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 4835 4836 us->us_param_arr = pa; 4837 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 4838 4839 (void) udp_param_register(&us->us_nd, 4840 us->us_param_arr, A_CNT(udp_param_arr)); 4841 4842 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 4843 us->us_mibkp = udp_kstat_init(stackid); 4844 4845 major = mod_name_to_major(INET_NAME); 4846 error = ldi_ident_from_major(major, &us->us_ldi_ident); 4847 ASSERT(error == 0); 4848 return (us); 4849 } 4850 4851 /* 4852 * Free the UDP stack instance. 4853 */ 4854 static void 4855 udp_stack_fini(netstackid_t stackid, void *arg) 4856 { 4857 udp_stack_t *us = (udp_stack_t *)arg; 4858 int i; 4859 4860 for (i = 0; i < us->us_bind_fanout_size; i++) { 4861 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 4862 } 4863 4864 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 4865 sizeof (udp_fanout_t)); 4866 4867 us->us_bind_fanout = NULL; 4868 4869 nd_free(&us->us_nd); 4870 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 4871 us->us_param_arr = NULL; 4872 4873 udp_kstat_fini(stackid, us->us_mibkp); 4874 us->us_mibkp = NULL; 4875 4876 udp_kstat2_fini(stackid, us->us_kstat); 4877 us->us_kstat = NULL; 4878 bzero(&us->us_statistics, sizeof (us->us_statistics)); 4879 4880 ldi_ident_release(us->us_ldi_ident); 4881 kmem_free(us, sizeof (*us)); 4882 } 4883 4884 static void * 4885 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 4886 { 4887 kstat_t *ksp; 4888 4889 udp_stat_t template = { 4890 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 4891 { "udp_out_opt", KSTAT_DATA_UINT64 }, 4892 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 4893 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 4894 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 4895 #ifdef DEBUG 4896 { "udp_data_conn", KSTAT_DATA_UINT64 }, 4897 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 4898 { "udp_out_lastdst", KSTAT_DATA_UINT64 }, 4899 { "udp_out_diffdst", KSTAT_DATA_UINT64 }, 4900 { "udp_out_ipv6", KSTAT_DATA_UINT64 }, 4901 { "udp_out_mapped", KSTAT_DATA_UINT64 }, 4902 { "udp_out_ipv4", KSTAT_DATA_UINT64 }, 4903 #endif 4904 }; 4905 4906 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 4907 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 4908 KSTAT_FLAG_VIRTUAL, stackid); 4909 4910 if (ksp == NULL) 4911 return (NULL); 4912 4913 bcopy(&template, us_statisticsp, sizeof (template)); 4914 ksp->ks_data = (void *)us_statisticsp; 4915 ksp->ks_private = (void *)(uintptr_t)stackid; 4916 4917 kstat_install(ksp); 4918 return (ksp); 4919 } 4920 4921 static void 4922 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 4923 { 4924 if (ksp != NULL) { 4925 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4926 kstat_delete_netstack(ksp, stackid); 4927 } 4928 } 4929 4930 static void * 4931 udp_kstat_init(netstackid_t stackid) 4932 { 4933 kstat_t *ksp; 4934 4935 udp_named_kstat_t template = { 4936 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 4937 { "inErrors", KSTAT_DATA_UINT32, 0 }, 4938 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 4939 { "entrySize", KSTAT_DATA_INT32, 0 }, 4940 { "entry6Size", KSTAT_DATA_INT32, 0 }, 4941 { "outErrors", KSTAT_DATA_UINT32, 0 }, 4942 }; 4943 4944 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 4945 KSTAT_TYPE_NAMED, 4946 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 4947 4948 if (ksp == NULL || ksp->ks_data == NULL) 4949 return (NULL); 4950 4951 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 4952 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 4953 4954 bcopy(&template, ksp->ks_data, sizeof (template)); 4955 ksp->ks_update = udp_kstat_update; 4956 ksp->ks_private = (void *)(uintptr_t)stackid; 4957 4958 kstat_install(ksp); 4959 return (ksp); 4960 } 4961 4962 static void 4963 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 4964 { 4965 if (ksp != NULL) { 4966 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 4967 kstat_delete_netstack(ksp, stackid); 4968 } 4969 } 4970 4971 static int 4972 udp_kstat_update(kstat_t *kp, int rw) 4973 { 4974 udp_named_kstat_t *udpkp; 4975 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 4976 netstack_t *ns; 4977 udp_stack_t *us; 4978 4979 if ((kp == NULL) || (kp->ks_data == NULL)) 4980 return (EIO); 4981 4982 if (rw == KSTAT_WRITE) 4983 return (EACCES); 4984 4985 ns = netstack_find_by_stackid(stackid); 4986 if (ns == NULL) 4987 return (-1); 4988 us = ns->netstack_udp; 4989 if (us == NULL) { 4990 netstack_rele(ns); 4991 return (-1); 4992 } 4993 udpkp = (udp_named_kstat_t *)kp->ks_data; 4994 4995 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 4996 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 4997 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 4998 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 4999 netstack_rele(ns); 5000 return (0); 5001 } 5002 5003 static size_t 5004 udp_set_rcv_hiwat(udp_t *udp, size_t size) 5005 { 5006 udp_stack_t *us = udp->udp_us; 5007 5008 /* We add a bit of extra buffering */ 5009 size += size >> 1; 5010 if (size > us->us_max_buf) 5011 size = us->us_max_buf; 5012 5013 udp->udp_rcv_hiwat = size; 5014 return (size); 5015 } 5016 5017 /* 5018 * For the lower queue so that UDP can be a dummy mux. 5019 * Nobody should be sending 5020 * packets up this stream 5021 */ 5022 static void 5023 udp_lrput(queue_t *q, mblk_t *mp) 5024 { 5025 switch (mp->b_datap->db_type) { 5026 case M_FLUSH: 5027 /* Turn around */ 5028 if (*mp->b_rptr & FLUSHW) { 5029 *mp->b_rptr &= ~FLUSHR; 5030 qreply(q, mp); 5031 return; 5032 } 5033 break; 5034 } 5035 freemsg(mp); 5036 } 5037 5038 /* 5039 * For the lower queue so that UDP can be a dummy mux. 5040 * Nobody should be sending packets down this stream. 5041 */ 5042 /* ARGSUSED */ 5043 void 5044 udp_lwput(queue_t *q, mblk_t *mp) 5045 { 5046 freemsg(mp); 5047 } 5048 5049 /* 5050 * Below routines for UDP socket module. 5051 */ 5052 5053 static conn_t * 5054 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 5055 { 5056 udp_t *udp; 5057 conn_t *connp; 5058 zoneid_t zoneid; 5059 netstack_t *ns; 5060 udp_stack_t *us; 5061 int len; 5062 5063 ns = netstack_find_by_cred(credp); 5064 ASSERT(ns != NULL); 5065 us = ns->netstack_udp; 5066 ASSERT(us != NULL); 5067 5068 /* 5069 * For exclusive stacks we set the zoneid to zero 5070 * to make UDP operate as if in the global zone. 5071 */ 5072 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 5073 zoneid = GLOBAL_ZONEID; 5074 else 5075 zoneid = crgetzoneid(credp); 5076 5077 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 5078 5079 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 5080 if (connp == NULL) { 5081 netstack_rele(ns); 5082 return (NULL); 5083 } 5084 udp = connp->conn_udp; 5085 5086 /* 5087 * ipcl_conn_create did a netstack_hold. Undo the hold that was 5088 * done by netstack_find_by_cred() 5089 */ 5090 netstack_rele(ns); 5091 5092 /* 5093 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5094 * need to lock anything. 5095 */ 5096 ASSERT(connp->conn_proto == IPPROTO_UDP); 5097 ASSERT(connp->conn_udp == udp); 5098 ASSERT(udp->udp_connp == connp); 5099 5100 /* Set the initial state of the stream and the privilege status. */ 5101 udp->udp_state = TS_UNBND; 5102 connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE; 5103 if (isv6) { 5104 connp->conn_family = AF_INET6; 5105 connp->conn_ipversion = IPV6_VERSION; 5106 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5107 connp->conn_default_ttl = us->us_ipv6_hoplimit; 5108 len = sizeof (ip6_t) + UDPH_SIZE; 5109 } else { 5110 connp->conn_family = AF_INET; 5111 connp->conn_ipversion = IPV4_VERSION; 5112 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5113 connp->conn_default_ttl = us->us_ipv4_ttl; 5114 len = sizeof (ipha_t) + UDPH_SIZE; 5115 } 5116 5117 ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto); 5118 connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl; 5119 5120 connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 5121 connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM; 5122 /* conn_allzones can not be set this early, hence no IPCL_ZONEID */ 5123 connp->conn_ixa->ixa_zoneid = zoneid; 5124 5125 connp->conn_zoneid = zoneid; 5126 5127 /* 5128 * If the caller has the process-wide flag set, then default to MAC 5129 * exempt mode. This allows read-down to unlabeled hosts. 5130 */ 5131 if (getpflags(NET_MAC_AWARE, credp) != 0) 5132 connp->conn_mac_mode = CONN_MAC_AWARE; 5133 5134 connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID); 5135 5136 udp->udp_us = us; 5137 5138 connp->conn_rcvbuf = us->us_recv_hiwat; 5139 connp->conn_sndbuf = us->us_xmit_hiwat; 5140 connp->conn_sndlowat = us->us_xmit_lowat; 5141 connp->conn_rcvlowat = udp_mod_info.mi_lowat; 5142 5143 connp->conn_wroff = len + us->us_wroff_extra; 5144 connp->conn_so_type = SOCK_DGRAM; 5145 5146 connp->conn_recv = udp_input; 5147 connp->conn_recvicmp = udp_icmp_input; 5148 crhold(credp); 5149 connp->conn_cred = credp; 5150 connp->conn_cpid = curproc->p_pid; 5151 connp->conn_open_time = ddi_get_lbolt64(); 5152 /* Cache things in ixa without an extra refhold */ 5153 connp->conn_ixa->ixa_cred = connp->conn_cred; 5154 connp->conn_ixa->ixa_cpid = connp->conn_cpid; 5155 if (is_system_labeled()) 5156 connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred); 5157 5158 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 5159 5160 if (us->us_pmtu_discovery) 5161 connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY; 5162 5163 return (connp); 5164 } 5165 5166 sock_lower_handle_t 5167 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 5168 uint_t *smodep, int *errorp, int flags, cred_t *credp) 5169 { 5170 udp_t *udp = NULL; 5171 udp_stack_t *us; 5172 conn_t *connp; 5173 boolean_t isv6; 5174 5175 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 5176 (proto != 0 && proto != IPPROTO_UDP)) { 5177 *errorp = EPROTONOSUPPORT; 5178 return (NULL); 5179 } 5180 5181 if (family == AF_INET6) 5182 isv6 = B_TRUE; 5183 else 5184 isv6 = B_FALSE; 5185 5186 connp = udp_do_open(credp, isv6, flags); 5187 if (connp == NULL) { 5188 *errorp = ENOMEM; 5189 return (NULL); 5190 } 5191 5192 udp = connp->conn_udp; 5193 ASSERT(udp != NULL); 5194 us = udp->udp_us; 5195 ASSERT(us != NULL); 5196 5197 udp->udp_issocket = B_TRUE; 5198 connp->conn_flags |= IPCL_NONSTR; 5199 5200 /* 5201 * Set flow control 5202 * Since this conn_t/udp_t is not yet visible to anybody else we don't 5203 * need to lock anything. 5204 */ 5205 (void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf); 5206 udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf; 5207 5208 connp->conn_flow_cntrld = B_FALSE; 5209 5210 mutex_enter(&connp->conn_lock); 5211 connp->conn_state_flags &= ~CONN_INCIPIENT; 5212 mutex_exit(&connp->conn_lock); 5213 5214 *errorp = 0; 5215 *smodep = SM_ATOMIC; 5216 *sock_downcalls = &sock_udp_downcalls; 5217 return ((sock_lower_handle_t)connp); 5218 } 5219 5220 /* ARGSUSED3 */ 5221 void 5222 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 5223 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 5224 { 5225 conn_t *connp = (conn_t *)proto_handle; 5226 struct sock_proto_props sopp; 5227 5228 /* All Solaris components should pass a cred for this operation. */ 5229 ASSERT(cr != NULL); 5230 5231 connp->conn_upcalls = sock_upcalls; 5232 connp->conn_upper_handle = sock_handle; 5233 5234 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | 5235 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 5236 sopp.sopp_wroff = connp->conn_wroff; 5237 sopp.sopp_maxblk = INFPSZ; 5238 sopp.sopp_rxhiwat = connp->conn_rcvbuf; 5239 sopp.sopp_rxlowat = connp->conn_rcvlowat; 5240 sopp.sopp_maxaddrlen = sizeof (sin6_t); 5241 sopp.sopp_maxpsz = 5242 (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 5243 UDP_MAXPACKET_IPV6; 5244 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 5245 udp_mod_info.mi_minpsz; 5246 5247 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 5248 &sopp); 5249 } 5250 5251 static void 5252 udp_do_close(conn_t *connp) 5253 { 5254 udp_t *udp; 5255 5256 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 5257 udp = connp->conn_udp; 5258 5259 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 5260 /* 5261 * Running in cluster mode - register unbind information 5262 */ 5263 if (connp->conn_ipversion == IPV4_VERSION) { 5264 (*cl_inet_unbind)( 5265 connp->conn_netstack->netstack_stackid, 5266 IPPROTO_UDP, AF_INET, 5267 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5268 (in_port_t)connp->conn_lport, NULL); 5269 } else { 5270 (*cl_inet_unbind)( 5271 connp->conn_netstack->netstack_stackid, 5272 IPPROTO_UDP, AF_INET6, 5273 (uint8_t *)&(connp->conn_laddr_v6), 5274 (in_port_t)connp->conn_lport, NULL); 5275 } 5276 } 5277 5278 udp_bind_hash_remove(udp, B_FALSE); 5279 5280 ip_quiesce_conn(connp); 5281 5282 if (!IPCL_IS_NONSTR(connp)) { 5283 ASSERT(connp->conn_wq != NULL); 5284 ASSERT(connp->conn_rq != NULL); 5285 qprocsoff(connp->conn_rq); 5286 } 5287 5288 udp_close_free(connp); 5289 5290 /* 5291 * Now we are truly single threaded on this stream, and can 5292 * delete the things hanging off the connp, and finally the connp. 5293 * We removed this connp from the fanout list, it cannot be 5294 * accessed thru the fanouts, and we already waited for the 5295 * conn_ref to drop to 0. We are already in close, so 5296 * there cannot be any other thread from the top. qprocsoff 5297 * has completed, and service has completed or won't run in 5298 * future. 5299 */ 5300 ASSERT(connp->conn_ref == 1); 5301 5302 if (!IPCL_IS_NONSTR(connp)) { 5303 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 5304 } else { 5305 ip_free_helper_stream(connp); 5306 } 5307 5308 connp->conn_ref--; 5309 ipcl_conn_destroy(connp); 5310 } 5311 5312 /* ARGSUSED1 */ 5313 int 5314 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 5315 { 5316 conn_t *connp = (conn_t *)proto_handle; 5317 5318 /* All Solaris components should pass a cred for this operation. */ 5319 ASSERT(cr != NULL); 5320 5321 udp_do_close(connp); 5322 return (0); 5323 } 5324 5325 static int 5326 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 5327 boolean_t bind_to_req_port_only) 5328 { 5329 sin_t *sin; 5330 sin6_t *sin6; 5331 udp_t *udp = connp->conn_udp; 5332 int error = 0; 5333 ip_laddr_t laddr_type = IPVL_UNICAST_UP; /* INADDR_ANY */ 5334 in_port_t port; /* Host byte order */ 5335 in_port_t requested_port; /* Host byte order */ 5336 int count; 5337 ipaddr_t v4src; /* Set if AF_INET */ 5338 in6_addr_t v6src; 5339 int loopmax; 5340 udp_fanout_t *udpf; 5341 in_port_t lport; /* Network byte order */ 5342 uint_t scopeid = 0; 5343 zoneid_t zoneid = IPCL_ZONEID(connp); 5344 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5345 boolean_t is_inaddr_any; 5346 mlp_type_t addrtype, mlptype; 5347 udp_stack_t *us = udp->udp_us; 5348 5349 switch (len) { 5350 case sizeof (sin_t): /* Complete IPv4 address */ 5351 sin = (sin_t *)sa; 5352 5353 if (sin == NULL || !OK_32PTR((char *)sin)) 5354 return (EINVAL); 5355 5356 if (connp->conn_family != AF_INET || 5357 sin->sin_family != AF_INET) { 5358 return (EAFNOSUPPORT); 5359 } 5360 v4src = sin->sin_addr.s_addr; 5361 IN6_IPADDR_TO_V4MAPPED(v4src, &v6src); 5362 if (v4src != INADDR_ANY) { 5363 laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst, 5364 B_TRUE); 5365 } 5366 port = ntohs(sin->sin_port); 5367 break; 5368 5369 case sizeof (sin6_t): /* complete IPv6 address */ 5370 sin6 = (sin6_t *)sa; 5371 5372 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 5373 return (EINVAL); 5374 5375 if (connp->conn_family != AF_INET6 || 5376 sin6->sin6_family != AF_INET6) { 5377 return (EAFNOSUPPORT); 5378 } 5379 v6src = sin6->sin6_addr; 5380 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5381 if (connp->conn_ipv6_v6only) 5382 return (EADDRNOTAVAIL); 5383 5384 IN6_V4MAPPED_TO_IPADDR(&v6src, v4src); 5385 if (v4src != INADDR_ANY) { 5386 laddr_type = ip_laddr_verify_v4(v4src, 5387 zoneid, ipst, B_FALSE); 5388 } 5389 } else { 5390 if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 5391 if (IN6_IS_ADDR_LINKSCOPE(&v6src)) 5392 scopeid = sin6->sin6_scope_id; 5393 laddr_type = ip_laddr_verify_v6(&v6src, 5394 zoneid, ipst, B_TRUE, scopeid); 5395 } 5396 } 5397 port = ntohs(sin6->sin6_port); 5398 break; 5399 5400 default: /* Invalid request */ 5401 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5402 "udp_bind: bad ADDR_length length %u", len); 5403 return (-TBADADDR); 5404 } 5405 5406 /* Is the local address a valid unicast, multicast, or broadcast? */ 5407 if (laddr_type == IPVL_BAD) 5408 return (EADDRNOTAVAIL); 5409 5410 requested_port = port; 5411 5412 if (requested_port == 0 || !bind_to_req_port_only) 5413 bind_to_req_port_only = B_FALSE; 5414 else /* T_BIND_REQ and requested_port != 0 */ 5415 bind_to_req_port_only = B_TRUE; 5416 5417 if (requested_port == 0) { 5418 /* 5419 * If the application passed in zero for the port number, it 5420 * doesn't care which port number we bind to. Get one in the 5421 * valid range. 5422 */ 5423 if (connp->conn_anon_priv_bind) { 5424 port = udp_get_next_priv_port(udp); 5425 } else { 5426 port = udp_update_next_port(udp, 5427 us->us_next_port_to_try, B_TRUE); 5428 } 5429 } else { 5430 /* 5431 * If the port is in the well-known privileged range, 5432 * make sure the caller was privileged. 5433 */ 5434 int i; 5435 boolean_t priv = B_FALSE; 5436 5437 if (port < us->us_smallest_nonpriv_port) { 5438 priv = B_TRUE; 5439 } else { 5440 for (i = 0; i < us->us_num_epriv_ports; i++) { 5441 if (port == us->us_epriv_ports[i]) { 5442 priv = B_TRUE; 5443 break; 5444 } 5445 } 5446 } 5447 5448 if (priv) { 5449 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 5450 return (-TACCES); 5451 } 5452 } 5453 5454 if (port == 0) 5455 return (-TNOADDR); 5456 5457 /* 5458 * The state must be TS_UNBND. TPI mandates that users must send 5459 * TPI primitives only 1 at a time and wait for the response before 5460 * sending the next primitive. 5461 */ 5462 mutex_enter(&connp->conn_lock); 5463 if (udp->udp_state != TS_UNBND) { 5464 mutex_exit(&connp->conn_lock); 5465 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 5466 "udp_bind: bad state, %u", udp->udp_state); 5467 return (-TOUTSTATE); 5468 } 5469 /* 5470 * Copy the source address into our udp structure. This address 5471 * may still be zero; if so, IP will fill in the correct address 5472 * each time an outbound packet is passed to it. Since the udp is 5473 * not yet in the bind hash list, we don't grab the uf_lock to 5474 * change conn_ipversion 5475 */ 5476 if (connp->conn_family == AF_INET) { 5477 ASSERT(sin != NULL); 5478 ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4); 5479 } else { 5480 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 5481 /* 5482 * no need to hold the uf_lock to set the conn_ipversion 5483 * since we are not yet in the fanout list 5484 */ 5485 connp->conn_ipversion = IPV4_VERSION; 5486 connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4; 5487 } else { 5488 connp->conn_ipversion = IPV6_VERSION; 5489 connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4; 5490 } 5491 } 5492 5493 /* 5494 * If conn_reuseaddr is not set, then we have to make sure that 5495 * the IP address and port number the application requested 5496 * (or we selected for the application) is not being used by 5497 * another stream. If another stream is already using the 5498 * requested IP address and port, the behavior depends on 5499 * "bind_to_req_port_only". If set the bind fails; otherwise we 5500 * search for any an unused port to bind to the stream. 5501 * 5502 * As per the BSD semantics, as modified by the Deering multicast 5503 * changes, if udp_reuseaddr is set, then we allow multiple binds 5504 * to the same port independent of the local IP address. 5505 * 5506 * This is slightly different than in SunOS 4.X which did not 5507 * support IP multicast. Note that the change implemented by the 5508 * Deering multicast code effects all binds - not only binding 5509 * to IP multicast addresses. 5510 * 5511 * Note that when binding to port zero we ignore SO_REUSEADDR in 5512 * order to guarantee a unique port. 5513 */ 5514 5515 count = 0; 5516 if (connp->conn_anon_priv_bind) { 5517 /* 5518 * loopmax = (IPPORT_RESERVED-1) - 5519 * us->us_min_anonpriv_port + 1 5520 */ 5521 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 5522 } else { 5523 loopmax = us->us_largest_anon_port - 5524 us->us_smallest_anon_port + 1; 5525 } 5526 5527 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 5528 5529 for (;;) { 5530 udp_t *udp1; 5531 boolean_t found_exclbind = B_FALSE; 5532 conn_t *connp1; 5533 5534 /* 5535 * Walk through the list of udp streams bound to 5536 * requested port with the same IP address. 5537 */ 5538 lport = htons(port); 5539 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 5540 us->us_bind_fanout_size)]; 5541 mutex_enter(&udpf->uf_lock); 5542 for (udp1 = udpf->uf_udp; udp1 != NULL; 5543 udp1 = udp1->udp_bind_hash) { 5544 connp1 = udp1->udp_connp; 5545 5546 if (lport != connp1->conn_lport) 5547 continue; 5548 5549 /* 5550 * On a labeled system, we must treat bindings to ports 5551 * on shared IP addresses by sockets with MAC exemption 5552 * privilege as being in all zones, as there's 5553 * otherwise no way to identify the right receiver. 5554 */ 5555 if (!IPCL_BIND_ZONE_MATCH(connp1, connp)) 5556 continue; 5557 5558 /* 5559 * If UDP_EXCLBIND is set for either the bound or 5560 * binding endpoint, the semantics of bind 5561 * is changed according to the following chart. 5562 * 5563 * spec = specified address (v4 or v6) 5564 * unspec = unspecified address (v4 or v6) 5565 * A = specified addresses are different for endpoints 5566 * 5567 * bound bind to allowed? 5568 * ------------------------------------- 5569 * unspec unspec no 5570 * unspec spec no 5571 * spec unspec no 5572 * spec spec yes if A 5573 * 5574 * For labeled systems, SO_MAC_EXEMPT behaves the same 5575 * as UDP_EXCLBIND, except that zoneid is ignored. 5576 */ 5577 if (connp1->conn_exclbind || connp->conn_exclbind || 5578 IPCL_CONNS_MAC(udp1->udp_connp, connp)) { 5579 if (V6_OR_V4_INADDR_ANY( 5580 connp1->conn_bound_addr_v6) || 5581 is_inaddr_any || 5582 IN6_ARE_ADDR_EQUAL( 5583 &connp1->conn_bound_addr_v6, 5584 &v6src)) { 5585 found_exclbind = B_TRUE; 5586 break; 5587 } 5588 continue; 5589 } 5590 5591 /* 5592 * Check ipversion to allow IPv4 and IPv6 sockets to 5593 * have disjoint port number spaces. 5594 */ 5595 if (connp->conn_ipversion != connp1->conn_ipversion) { 5596 5597 /* 5598 * On the first time through the loop, if the 5599 * the user intentionally specified a 5600 * particular port number, then ignore any 5601 * bindings of the other protocol that may 5602 * conflict. This allows the user to bind IPv6 5603 * alone and get both v4 and v6, or bind both 5604 * both and get each seperately. On subsequent 5605 * times through the loop, we're checking a 5606 * port that we chose (not the user) and thus 5607 * we do not allow casual duplicate bindings. 5608 */ 5609 if (count == 0 && requested_port != 0) 5610 continue; 5611 } 5612 5613 /* 5614 * No difference depending on SO_REUSEADDR. 5615 * 5616 * If existing port is bound to a 5617 * non-wildcard IP address and 5618 * the requesting stream is bound to 5619 * a distinct different IP addresses 5620 * (non-wildcard, also), keep going. 5621 */ 5622 if (!is_inaddr_any && 5623 !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) && 5624 !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6, 5625 &v6src)) { 5626 continue; 5627 } 5628 break; 5629 } 5630 5631 if (!found_exclbind && 5632 (connp->conn_reuseaddr && requested_port != 0)) { 5633 break; 5634 } 5635 5636 if (udp1 == NULL) { 5637 /* 5638 * No other stream has this IP address 5639 * and port number. We can use it. 5640 */ 5641 break; 5642 } 5643 mutex_exit(&udpf->uf_lock); 5644 if (bind_to_req_port_only) { 5645 /* 5646 * We get here only when requested port 5647 * is bound (and only first of the for() 5648 * loop iteration). 5649 * 5650 * The semantics of this bind request 5651 * require it to fail so we return from 5652 * the routine (and exit the loop). 5653 * 5654 */ 5655 mutex_exit(&connp->conn_lock); 5656 return (-TADDRBUSY); 5657 } 5658 5659 if (connp->conn_anon_priv_bind) { 5660 port = udp_get_next_priv_port(udp); 5661 } else { 5662 if ((count == 0) && (requested_port != 0)) { 5663 /* 5664 * If the application wants us to find 5665 * a port, get one to start with. Set 5666 * requested_port to 0, so that we will 5667 * update us->us_next_port_to_try below. 5668 */ 5669 port = udp_update_next_port(udp, 5670 us->us_next_port_to_try, B_TRUE); 5671 requested_port = 0; 5672 } else { 5673 port = udp_update_next_port(udp, port + 1, 5674 B_FALSE); 5675 } 5676 } 5677 5678 if (port == 0 || ++count >= loopmax) { 5679 /* 5680 * We've tried every possible port number and 5681 * there are none available, so send an error 5682 * to the user. 5683 */ 5684 mutex_exit(&connp->conn_lock); 5685 return (-TNOADDR); 5686 } 5687 } 5688 5689 /* 5690 * Copy the source address into our udp structure. This address 5691 * may still be zero; if so, ip_attr_connect will fill in the correct 5692 * address when a packet is about to be sent. 5693 * If we are binding to a broadcast or multicast address then 5694 * we just set the conn_bound_addr since we don't want to use 5695 * that as the source address when sending. 5696 */ 5697 connp->conn_bound_addr_v6 = v6src; 5698 connp->conn_laddr_v6 = v6src; 5699 if (scopeid != 0) { 5700 connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET; 5701 connp->conn_ixa->ixa_scopeid = scopeid; 5702 connp->conn_incoming_ifindex = scopeid; 5703 } else { 5704 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5705 connp->conn_incoming_ifindex = connp->conn_bound_if; 5706 } 5707 5708 switch (laddr_type) { 5709 case IPVL_UNICAST_UP: 5710 case IPVL_UNICAST_DOWN: 5711 connp->conn_saddr_v6 = v6src; 5712 connp->conn_mcbc_bind = B_FALSE; 5713 break; 5714 case IPVL_MCAST: 5715 case IPVL_BCAST: 5716 /* ip_set_destination will pick a source address later */ 5717 connp->conn_saddr_v6 = ipv6_all_zeros; 5718 connp->conn_mcbc_bind = B_TRUE; 5719 break; 5720 } 5721 5722 /* Any errors after this point should use late_error */ 5723 connp->conn_lport = lport; 5724 5725 /* 5726 * Now reset the next anonymous port if the application requested 5727 * an anonymous port, or we handed out the next anonymous port. 5728 */ 5729 if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) { 5730 us->us_next_port_to_try = port + 1; 5731 } 5732 5733 /* Initialize the T_BIND_ACK. */ 5734 if (connp->conn_family == AF_INET) { 5735 sin->sin_port = connp->conn_lport; 5736 } else { 5737 sin6->sin6_port = connp->conn_lport; 5738 } 5739 udp->udp_state = TS_IDLE; 5740 udp_bind_hash_insert(udpf, udp); 5741 mutex_exit(&udpf->uf_lock); 5742 mutex_exit(&connp->conn_lock); 5743 5744 if (cl_inet_bind) { 5745 /* 5746 * Running in cluster mode - register bind information 5747 */ 5748 if (connp->conn_ipversion == IPV4_VERSION) { 5749 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5750 IPPROTO_UDP, AF_INET, (uint8_t *)&v4src, 5751 (in_port_t)connp->conn_lport, NULL); 5752 } else { 5753 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 5754 IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src, 5755 (in_port_t)connp->conn_lport, NULL); 5756 } 5757 } 5758 5759 mutex_enter(&connp->conn_lock); 5760 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 5761 if (is_system_labeled() && (!connp->conn_anon_port || 5762 connp->conn_anon_mlp)) { 5763 uint16_t mlpport; 5764 zone_t *zone; 5765 5766 zone = crgetzone(cr); 5767 connp->conn_mlp_type = 5768 connp->conn_recv_ancillary.crb_recvucred ? mlptBoth : 5769 mlptSingle; 5770 addrtype = tsol_mlp_addr_type( 5771 connp->conn_allzones ? ALL_ZONES : zone->zone_id, 5772 IPV6_VERSION, &v6src, us->us_netstack->netstack_ip); 5773 if (addrtype == mlptSingle) { 5774 error = -TNOADDR; 5775 mutex_exit(&connp->conn_lock); 5776 goto late_error; 5777 } 5778 mlpport = connp->conn_anon_port ? PMAPPORT : port; 5779 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 5780 addrtype); 5781 5782 /* 5783 * It is a coding error to attempt to bind an MLP port 5784 * without first setting SOL_SOCKET/SCM_UCRED. 5785 */ 5786 if (mlptype != mlptSingle && 5787 connp->conn_mlp_type == mlptSingle) { 5788 error = EINVAL; 5789 mutex_exit(&connp->conn_lock); 5790 goto late_error; 5791 } 5792 5793 /* 5794 * It is an access violation to attempt to bind an MLP port 5795 * without NET_BINDMLP privilege. 5796 */ 5797 if (mlptype != mlptSingle && 5798 secpolicy_net_bindmlp(cr) != 0) { 5799 if (connp->conn_debug) { 5800 (void) strlog(UDP_MOD_ID, 0, 1, 5801 SL_ERROR|SL_TRACE, 5802 "udp_bind: no priv for multilevel port %d", 5803 mlpport); 5804 } 5805 error = -TACCES; 5806 mutex_exit(&connp->conn_lock); 5807 goto late_error; 5808 } 5809 5810 /* 5811 * If we're specifically binding a shared IP address and the 5812 * port is MLP on shared addresses, then check to see if this 5813 * zone actually owns the MLP. Reject if not. 5814 */ 5815 if (mlptype == mlptShared && addrtype == mlptShared) { 5816 /* 5817 * No need to handle exclusive-stack zones since 5818 * ALL_ZONES only applies to the shared stack. 5819 */ 5820 zoneid_t mlpzone; 5821 5822 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 5823 htons(mlpport)); 5824 if (connp->conn_zoneid != mlpzone) { 5825 if (connp->conn_debug) { 5826 (void) strlog(UDP_MOD_ID, 0, 1, 5827 SL_ERROR|SL_TRACE, 5828 "udp_bind: attempt to bind port " 5829 "%d on shared addr in zone %d " 5830 "(should be %d)", 5831 mlpport, connp->conn_zoneid, 5832 mlpzone); 5833 } 5834 error = -TACCES; 5835 mutex_exit(&connp->conn_lock); 5836 goto late_error; 5837 } 5838 } 5839 if (connp->conn_anon_port) { 5840 error = tsol_mlp_anon(zone, mlptype, connp->conn_proto, 5841 port, B_TRUE); 5842 if (error != 0) { 5843 if (connp->conn_debug) { 5844 (void) strlog(UDP_MOD_ID, 0, 1, 5845 SL_ERROR|SL_TRACE, 5846 "udp_bind: cannot establish anon " 5847 "MLP for port %d", port); 5848 } 5849 error = -TACCES; 5850 mutex_exit(&connp->conn_lock); 5851 goto late_error; 5852 } 5853 } 5854 connp->conn_mlp_type = mlptype; 5855 } 5856 5857 /* 5858 * We create an initial header template here to make a subsequent 5859 * sendto have a starting point. Since conn_last_dst is zero the 5860 * first sendto will always follow the 'dst changed' code path. 5861 * Note that we defer massaging options and the related checksum 5862 * adjustment until we have a destination address. 5863 */ 5864 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5865 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5866 if (error != 0) { 5867 mutex_exit(&connp->conn_lock); 5868 goto late_error; 5869 } 5870 /* Just in case */ 5871 connp->conn_faddr_v6 = ipv6_all_zeros; 5872 connp->conn_fport = 0; 5873 connp->conn_v6lastdst = ipv6_all_zeros; 5874 mutex_exit(&connp->conn_lock); 5875 5876 error = ip_laddr_fanout_insert(connp); 5877 if (error != 0) 5878 goto late_error; 5879 5880 /* Bind succeeded */ 5881 return (0); 5882 5883 late_error: 5884 /* We had already picked the port number, and then the bind failed */ 5885 mutex_enter(&connp->conn_lock); 5886 udpf = &us->us_bind_fanout[ 5887 UDP_BIND_HASH(connp->conn_lport, 5888 us->us_bind_fanout_size)]; 5889 mutex_enter(&udpf->uf_lock); 5890 connp->conn_saddr_v6 = ipv6_all_zeros; 5891 connp->conn_bound_addr_v6 = ipv6_all_zeros; 5892 connp->conn_laddr_v6 = ipv6_all_zeros; 5893 if (scopeid != 0) { 5894 connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 5895 connp->conn_incoming_ifindex = connp->conn_bound_if; 5896 } 5897 udp->udp_state = TS_UNBND; 5898 udp_bind_hash_remove(udp, B_TRUE); 5899 connp->conn_lport = 0; 5900 mutex_exit(&udpf->uf_lock); 5901 connp->conn_anon_port = B_FALSE; 5902 connp->conn_mlp_type = mlptSingle; 5903 5904 connp->conn_v6lastdst = ipv6_all_zeros; 5905 5906 /* Restore the header that was built above - different source address */ 5907 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 5908 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 5909 mutex_exit(&connp->conn_lock); 5910 return (error); 5911 } 5912 5913 int 5914 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 5915 socklen_t len, cred_t *cr) 5916 { 5917 int error; 5918 conn_t *connp; 5919 5920 /* All Solaris components should pass a cred for this operation. */ 5921 ASSERT(cr != NULL); 5922 5923 connp = (conn_t *)proto_handle; 5924 5925 if (sa == NULL) 5926 error = udp_do_unbind(connp); 5927 else 5928 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 5929 5930 if (error < 0) { 5931 if (error == -TOUTSTATE) 5932 error = EINVAL; 5933 else 5934 error = proto_tlitosyserr(-error); 5935 } 5936 5937 return (error); 5938 } 5939 5940 static int 5941 udp_implicit_bind(conn_t *connp, cred_t *cr) 5942 { 5943 sin6_t sin6addr; 5944 sin_t *sin; 5945 sin6_t *sin6; 5946 socklen_t len; 5947 int error; 5948 5949 /* All Solaris components should pass a cred for this operation. */ 5950 ASSERT(cr != NULL); 5951 5952 if (connp->conn_family == AF_INET) { 5953 len = sizeof (struct sockaddr_in); 5954 sin = (sin_t *)&sin6addr; 5955 *sin = sin_null; 5956 sin->sin_family = AF_INET; 5957 sin->sin_addr.s_addr = INADDR_ANY; 5958 } else { 5959 ASSERT(connp->conn_family == AF_INET6); 5960 len = sizeof (sin6_t); 5961 sin6 = (sin6_t *)&sin6addr; 5962 *sin6 = sin6_null; 5963 sin6->sin6_family = AF_INET6; 5964 V6_SET_ZERO(sin6->sin6_addr); 5965 } 5966 5967 error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len, 5968 cr, B_FALSE); 5969 return ((error < 0) ? proto_tlitosyserr(-error) : error); 5970 } 5971 5972 /* 5973 * This routine removes a port number association from a stream. It 5974 * is called by udp_unbind and udp_tpi_unbind. 5975 */ 5976 static int 5977 udp_do_unbind(conn_t *connp) 5978 { 5979 udp_t *udp = connp->conn_udp; 5980 udp_fanout_t *udpf; 5981 udp_stack_t *us = udp->udp_us; 5982 5983 if (cl_inet_unbind != NULL) { 5984 /* 5985 * Running in cluster mode - register unbind information 5986 */ 5987 if (connp->conn_ipversion == IPV4_VERSION) { 5988 (*cl_inet_unbind)( 5989 connp->conn_netstack->netstack_stackid, 5990 IPPROTO_UDP, AF_INET, 5991 (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)), 5992 (in_port_t)connp->conn_lport, NULL); 5993 } else { 5994 (*cl_inet_unbind)( 5995 connp->conn_netstack->netstack_stackid, 5996 IPPROTO_UDP, AF_INET6, 5997 (uint8_t *)&(connp->conn_laddr_v6), 5998 (in_port_t)connp->conn_lport, NULL); 5999 } 6000 } 6001 6002 mutex_enter(&connp->conn_lock); 6003 /* If a bind has not been done, we can't unbind. */ 6004 if (udp->udp_state == TS_UNBND) { 6005 mutex_exit(&connp->conn_lock); 6006 return (-TOUTSTATE); 6007 } 6008 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6009 us->us_bind_fanout_size)]; 6010 mutex_enter(&udpf->uf_lock); 6011 udp_bind_hash_remove(udp, B_TRUE); 6012 connp->conn_saddr_v6 = ipv6_all_zeros; 6013 connp->conn_bound_addr_v6 = ipv6_all_zeros; 6014 connp->conn_laddr_v6 = ipv6_all_zeros; 6015 connp->conn_mcbc_bind = B_FALSE; 6016 connp->conn_lport = 0; 6017 /* In case we were also connected */ 6018 connp->conn_faddr_v6 = ipv6_all_zeros; 6019 connp->conn_fport = 0; 6020 mutex_exit(&udpf->uf_lock); 6021 6022 connp->conn_v6lastdst = ipv6_all_zeros; 6023 udp->udp_state = TS_UNBND; 6024 6025 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6026 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6027 mutex_exit(&connp->conn_lock); 6028 6029 ip_unbind(connp); 6030 6031 return (0); 6032 } 6033 6034 /* 6035 * It associates a default destination address with the stream. 6036 */ 6037 static int 6038 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 6039 cred_t *cr, pid_t pid) 6040 { 6041 sin6_t *sin6; 6042 sin_t *sin; 6043 in6_addr_t v6dst; 6044 ipaddr_t v4dst; 6045 uint16_t dstport; 6046 uint32_t flowinfo; 6047 udp_fanout_t *udpf; 6048 udp_t *udp, *udp1; 6049 ushort_t ipversion; 6050 udp_stack_t *us; 6051 int error; 6052 conn_t *connp1; 6053 ip_xmit_attr_t *ixa; 6054 uint_t scopeid = 0; 6055 uint_t srcid = 0; 6056 in6_addr_t v6src = connp->conn_saddr_v6; 6057 6058 udp = connp->conn_udp; 6059 us = udp->udp_us; 6060 6061 /* 6062 * Address has been verified by the caller 6063 */ 6064 switch (len) { 6065 default: 6066 /* 6067 * Should never happen 6068 */ 6069 return (EINVAL); 6070 6071 case sizeof (sin_t): 6072 sin = (sin_t *)sa; 6073 v4dst = sin->sin_addr.s_addr; 6074 dstport = sin->sin_port; 6075 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6076 ASSERT(connp->conn_ipversion == IPV4_VERSION); 6077 ipversion = IPV4_VERSION; 6078 break; 6079 6080 case sizeof (sin6_t): 6081 sin6 = (sin6_t *)sa; 6082 v6dst = sin6->sin6_addr; 6083 dstport = sin6->sin6_port; 6084 srcid = sin6->__sin6_src_id; 6085 if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) { 6086 ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp), 6087 connp->conn_netstack); 6088 } 6089 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 6090 if (connp->conn_ipv6_v6only) 6091 return (EADDRNOTAVAIL); 6092 6093 /* 6094 * Destination adress is mapped IPv6 address. 6095 * Source bound address should be unspecified or 6096 * IPv6 mapped address as well. 6097 */ 6098 if (!IN6_IS_ADDR_UNSPECIFIED( 6099 &connp->conn_bound_addr_v6) && 6100 !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) { 6101 return (EADDRNOTAVAIL); 6102 } 6103 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 6104 ipversion = IPV4_VERSION; 6105 flowinfo = 0; 6106 } else { 6107 ipversion = IPV6_VERSION; 6108 flowinfo = sin6->sin6_flowinfo; 6109 if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) 6110 scopeid = sin6->sin6_scope_id; 6111 } 6112 break; 6113 } 6114 6115 if (dstport == 0) 6116 return (-TBADADDR); 6117 6118 /* 6119 * If there is a different thread using conn_ixa then we get a new 6120 * copy and cut the old one loose from conn_ixa. Otherwise we use 6121 * conn_ixa and prevent any other thread from using/changing it. 6122 * Once connect() is done other threads can use conn_ixa since the 6123 * refcnt will be back at one. 6124 */ 6125 ixa = conn_get_ixa(connp, B_TRUE); 6126 if (ixa == NULL) 6127 return (ENOMEM); 6128 6129 ASSERT(ixa->ixa_refcnt >= 2); 6130 ASSERT(ixa == connp->conn_ixa); 6131 6132 mutex_enter(&connp->conn_lock); 6133 /* 6134 * This udp_t must have bound to a port already before doing a connect. 6135 * Reject if a connect is in progress (we drop conn_lock during 6136 * udp_do_connect). 6137 */ 6138 if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) { 6139 mutex_exit(&connp->conn_lock); 6140 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 6141 "udp_connect: bad state, %u", udp->udp_state); 6142 ixa_refrele(ixa); 6143 return (-TOUTSTATE); 6144 } 6145 ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL); 6146 6147 udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport, 6148 us->us_bind_fanout_size)]; 6149 6150 mutex_enter(&udpf->uf_lock); 6151 if (udp->udp_state == TS_DATA_XFER) { 6152 /* Already connected - clear out state */ 6153 if (connp->conn_mcbc_bind) 6154 connp->conn_saddr_v6 = ipv6_all_zeros; 6155 else 6156 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6157 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6158 connp->conn_faddr_v6 = ipv6_all_zeros; 6159 connp->conn_fport = 0; 6160 udp->udp_state = TS_IDLE; 6161 } 6162 6163 connp->conn_fport = dstport; 6164 connp->conn_ipversion = ipversion; 6165 if (ipversion == IPV4_VERSION) { 6166 /* 6167 * Interpret a zero destination to mean loopback. 6168 * Update the T_CONN_REQ (sin/sin6) since it is used to 6169 * generate the T_CONN_CON. 6170 */ 6171 if (v4dst == INADDR_ANY) { 6172 v4dst = htonl(INADDR_LOOPBACK); 6173 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 6174 if (connp->conn_family == AF_INET) { 6175 sin->sin_addr.s_addr = v4dst; 6176 } else { 6177 sin6->sin6_addr = v6dst; 6178 } 6179 } 6180 connp->conn_faddr_v6 = v6dst; 6181 connp->conn_flowinfo = 0; 6182 } else { 6183 ASSERT(connp->conn_ipversion == IPV6_VERSION); 6184 /* 6185 * Interpret a zero destination to mean loopback. 6186 * Update the T_CONN_REQ (sin/sin6) since it is used to 6187 * generate the T_CONN_CON. 6188 */ 6189 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 6190 v6dst = ipv6_loopback; 6191 sin6->sin6_addr = v6dst; 6192 } 6193 connp->conn_faddr_v6 = v6dst; 6194 connp->conn_flowinfo = flowinfo; 6195 } 6196 mutex_exit(&udpf->uf_lock); 6197 6198 ixa->ixa_cred = cr; 6199 ixa->ixa_cpid = pid; 6200 if (is_system_labeled()) { 6201 /* We need to restart with a label based on the cred */ 6202 ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred); 6203 } 6204 6205 if (scopeid != 0) { 6206 ixa->ixa_flags |= IXAF_SCOPEID_SET; 6207 ixa->ixa_scopeid = scopeid; 6208 connp->conn_incoming_ifindex = scopeid; 6209 } else { 6210 ixa->ixa_flags &= ~IXAF_SCOPEID_SET; 6211 connp->conn_incoming_ifindex = connp->conn_bound_if; 6212 } 6213 /* 6214 * conn_connect will drop conn_lock and reacquire it. 6215 * To prevent a send* from messing with this udp_t while the lock 6216 * is dropped we set udp_state and clear conn_v6lastdst. 6217 * That will make all send* fail with EISCONN. 6218 */ 6219 connp->conn_v6lastdst = ipv6_all_zeros; 6220 udp->udp_state = TS_WCON_CREQ; 6221 6222 error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC); 6223 mutex_exit(&connp->conn_lock); 6224 if (error != 0) 6225 goto connect_failed; 6226 6227 /* 6228 * The addresses have been verified. Time to insert in 6229 * the correct fanout list. 6230 */ 6231 error = ipcl_conn_insert(connp); 6232 if (error != 0) 6233 goto connect_failed; 6234 6235 mutex_enter(&connp->conn_lock); 6236 error = udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6237 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6238 if (error != 0) { 6239 mutex_exit(&connp->conn_lock); 6240 goto connect_failed; 6241 } 6242 6243 udp->udp_state = TS_DATA_XFER; 6244 /* Record this as the "last" send even though we haven't sent any */ 6245 connp->conn_v6lastdst = connp->conn_faddr_v6; 6246 connp->conn_lastipversion = connp->conn_ipversion; 6247 connp->conn_lastdstport = connp->conn_fport; 6248 connp->conn_lastflowinfo = connp->conn_flowinfo; 6249 connp->conn_lastscopeid = scopeid; 6250 connp->conn_lastsrcid = srcid; 6251 /* Also remember a source to use together with lastdst */ 6252 connp->conn_v6lastsrc = v6src; 6253 mutex_exit(&connp->conn_lock); 6254 6255 /* 6256 * We've picked a source address above. Now we can 6257 * verify that the src/port/dst/port is unique for all 6258 * connections in TS_DATA_XFER, skipping ourselves. 6259 */ 6260 mutex_enter(&udpf->uf_lock); 6261 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 6262 if (udp1->udp_state != TS_DATA_XFER) 6263 continue; 6264 6265 if (udp1 == udp) 6266 continue; 6267 6268 connp1 = udp1->udp_connp; 6269 if (connp->conn_lport != connp1->conn_lport || 6270 connp->conn_ipversion != connp1->conn_ipversion || 6271 dstport != connp1->conn_fport || 6272 !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6, 6273 &connp1->conn_laddr_v6) || 6274 !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) || 6275 !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) || 6276 IPCL_ZONE_MATCH(connp1, connp->conn_zoneid))) 6277 continue; 6278 mutex_exit(&udpf->uf_lock); 6279 error = -TBADADDR; 6280 goto connect_failed; 6281 } 6282 if (cl_inet_connect2 != NULL) { 6283 CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error); 6284 if (error != 0) { 6285 mutex_exit(&udpf->uf_lock); 6286 error = -TBADADDR; 6287 goto connect_failed; 6288 } 6289 } 6290 mutex_exit(&udpf->uf_lock); 6291 6292 ixa_refrele(ixa); 6293 return (0); 6294 6295 connect_failed: 6296 if (ixa != NULL) 6297 ixa_refrele(ixa); 6298 mutex_enter(&connp->conn_lock); 6299 mutex_enter(&udpf->uf_lock); 6300 udp->udp_state = TS_IDLE; 6301 connp->conn_faddr_v6 = ipv6_all_zeros; 6302 connp->conn_fport = 0; 6303 /* In case the source address was set above */ 6304 if (connp->conn_mcbc_bind) 6305 connp->conn_saddr_v6 = ipv6_all_zeros; 6306 else 6307 connp->conn_saddr_v6 = connp->conn_bound_addr_v6; 6308 connp->conn_laddr_v6 = connp->conn_bound_addr_v6; 6309 mutex_exit(&udpf->uf_lock); 6310 6311 connp->conn_v6lastdst = ipv6_all_zeros; 6312 connp->conn_flowinfo = 0; 6313 6314 (void) udp_build_hdr_template(connp, &connp->conn_saddr_v6, 6315 &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo); 6316 mutex_exit(&connp->conn_lock); 6317 return (error); 6318 } 6319 6320 static int 6321 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 6322 socklen_t len, sock_connid_t *id, cred_t *cr) 6323 { 6324 conn_t *connp = (conn_t *)proto_handle; 6325 udp_t *udp = connp->conn_udp; 6326 int error; 6327 boolean_t did_bind = B_FALSE; 6328 pid_t pid = curproc->p_pid; 6329 6330 /* All Solaris components should pass a cred for this operation. */ 6331 ASSERT(cr != NULL); 6332 6333 if (sa == NULL) { 6334 /* 6335 * Disconnect 6336 * Make sure we are connected 6337 */ 6338 if (udp->udp_state != TS_DATA_XFER) 6339 return (EINVAL); 6340 6341 error = udp_disconnect(connp); 6342 return (error); 6343 } 6344 6345 error = proto_verify_ip_addr(connp->conn_family, sa, len); 6346 if (error != 0) 6347 goto done; 6348 6349 /* do an implicit bind if necessary */ 6350 if (udp->udp_state == TS_UNBND) { 6351 error = udp_implicit_bind(connp, cr); 6352 /* 6353 * We could be racing with an actual bind, in which case 6354 * we would see EPROTO. We cross our fingers and try 6355 * to connect. 6356 */ 6357 if (!(error == 0 || error == EPROTO)) 6358 goto done; 6359 did_bind = B_TRUE; 6360 } 6361 /* 6362 * set SO_DGRAM_ERRIND 6363 */ 6364 connp->conn_dgram_errind = B_TRUE; 6365 6366 error = udp_do_connect(connp, sa, len, cr, pid); 6367 6368 if (error != 0 && did_bind) { 6369 int unbind_err; 6370 6371 unbind_err = udp_do_unbind(connp); 6372 ASSERT(unbind_err == 0); 6373 } 6374 6375 if (error == 0) { 6376 *id = 0; 6377 (*connp->conn_upcalls->su_connected) 6378 (connp->conn_upper_handle, 0, NULL, -1); 6379 } else if (error < 0) { 6380 error = proto_tlitosyserr(-error); 6381 } 6382 6383 done: 6384 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 6385 /* 6386 * No need to hold locks to set state 6387 * after connect failure socket state is undefined 6388 * We set the state only to imitate old sockfs behavior 6389 */ 6390 udp->udp_state = TS_IDLE; 6391 } 6392 return (error); 6393 } 6394 6395 int 6396 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 6397 cred_t *cr) 6398 { 6399 sin6_t *sin6; 6400 sin_t *sin = NULL; 6401 uint_t srcid; 6402 conn_t *connp = (conn_t *)proto_handle; 6403 udp_t *udp = connp->conn_udp; 6404 int error = 0; 6405 udp_stack_t *us = udp->udp_us; 6406 ushort_t ipversion; 6407 pid_t pid = curproc->p_pid; 6408 ip_xmit_attr_t *ixa; 6409 6410 ASSERT(DB_TYPE(mp) == M_DATA); 6411 6412 /* All Solaris components should pass a cred for this operation. */ 6413 ASSERT(cr != NULL); 6414 6415 /* do an implicit bind if necessary */ 6416 if (udp->udp_state == TS_UNBND) { 6417 error = udp_implicit_bind(connp, cr); 6418 /* 6419 * We could be racing with an actual bind, in which case 6420 * we would see EPROTO. We cross our fingers and try 6421 * to connect. 6422 */ 6423 if (!(error == 0 || error == EPROTO)) { 6424 freemsg(mp); 6425 return (error); 6426 } 6427 } 6428 6429 /* Connected? */ 6430 if (msg->msg_name == NULL) { 6431 if (udp->udp_state != TS_DATA_XFER) { 6432 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6433 return (EDESTADDRREQ); 6434 } 6435 if (msg->msg_controllen != 0) { 6436 error = udp_output_ancillary(connp, NULL, NULL, mp, 6437 NULL, msg, cr, pid); 6438 } else { 6439 error = udp_output_connected(connp, mp, cr, pid); 6440 } 6441 if (us->us_sendto_ignerr) 6442 return (0); 6443 else 6444 return (error); 6445 } 6446 if (udp->udp_state == TS_DATA_XFER) { 6447 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6448 return (EISCONN); 6449 } 6450 error = proto_verify_ip_addr(connp->conn_family, 6451 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 6452 if (error != 0) { 6453 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6454 return (error); 6455 } 6456 switch (connp->conn_family) { 6457 case AF_INET6: 6458 sin6 = (sin6_t *)msg->msg_name; 6459 6460 srcid = sin6->__sin6_src_id; 6461 6462 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6463 /* 6464 * Destination is a non-IPv4-compatible IPv6 address. 6465 * Send out an IPv6 format packet. 6466 */ 6467 6468 /* 6469 * If the local address is a mapped address return 6470 * an error. 6471 * It would be possible to send an IPv6 packet but the 6472 * response would never make it back to the application 6473 * since it is bound to a mapped address. 6474 */ 6475 if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) { 6476 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6477 return (EADDRNOTAVAIL); 6478 } 6479 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6480 sin6->sin6_addr = ipv6_loopback; 6481 ipversion = IPV6_VERSION; 6482 } else { 6483 if (connp->conn_ipv6_v6only) { 6484 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6485 return (EADDRNOTAVAIL); 6486 } 6487 6488 /* 6489 * If the local address is not zero or a mapped address 6490 * return an error. It would be possible to send an 6491 * IPv4 packet but the response would never make it 6492 * back to the application since it is bound to a 6493 * non-mapped address. 6494 */ 6495 if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) && 6496 !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) { 6497 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6498 return (EADDRNOTAVAIL); 6499 } 6500 6501 if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) { 6502 V4_PART_OF_V6(sin6->sin6_addr) = 6503 htonl(INADDR_LOOPBACK); 6504 } 6505 ipversion = IPV4_VERSION; 6506 } 6507 6508 /* 6509 * We have to allocate an ip_xmit_attr_t before we grab 6510 * conn_lock and we need to hold conn_lock once we've check 6511 * conn_same_as_last_v6 to handle concurrent send* calls on a 6512 * socket. 6513 */ 6514 if (msg->msg_controllen == 0) { 6515 ixa = conn_get_ixa(connp, B_FALSE); 6516 if (ixa == NULL) { 6517 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6518 return (ENOMEM); 6519 } 6520 } else { 6521 ixa = NULL; 6522 } 6523 mutex_enter(&connp->conn_lock); 6524 if (udp->udp_delayed_error != 0) { 6525 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 6526 6527 error = udp->udp_delayed_error; 6528 udp->udp_delayed_error = 0; 6529 6530 /* Compare IP address, port, and family */ 6531 6532 if (sin6->sin6_port == sin2->sin6_port && 6533 IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr, 6534 &sin2->sin6_addr) && 6535 sin6->sin6_family == sin2->sin6_family) { 6536 mutex_exit(&connp->conn_lock); 6537 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6538 if (ixa != NULL) 6539 ixa_refrele(ixa); 6540 return (error); 6541 } 6542 } 6543 6544 if (msg->msg_controllen != 0) { 6545 mutex_exit(&connp->conn_lock); 6546 ASSERT(ixa == NULL); 6547 error = udp_output_ancillary(connp, NULL, sin6, mp, 6548 NULL, msg, cr, pid); 6549 } else if (conn_same_as_last_v6(connp, sin6) && 6550 connp->conn_lastsrcid == srcid && 6551 ipsec_outbound_policy_current(ixa)) { 6552 /* udp_output_lastdst drops conn_lock */ 6553 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6554 } else { 6555 /* udp_output_newdst drops conn_lock */ 6556 error = udp_output_newdst(connp, mp, NULL, sin6, 6557 ipversion, cr, pid, ixa); 6558 } 6559 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6560 if (us->us_sendto_ignerr) 6561 return (0); 6562 else 6563 return (error); 6564 case AF_INET: 6565 sin = (sin_t *)msg->msg_name; 6566 6567 ipversion = IPV4_VERSION; 6568 6569 if (sin->sin_addr.s_addr == INADDR_ANY) 6570 sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK); 6571 6572 /* 6573 * We have to allocate an ip_xmit_attr_t before we grab 6574 * conn_lock and we need to hold conn_lock once we've check 6575 * conn_same_as_last_v6 to handle concurrent send* on a socket. 6576 */ 6577 if (msg->msg_controllen == 0) { 6578 ixa = conn_get_ixa(connp, B_FALSE); 6579 if (ixa == NULL) { 6580 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6581 return (ENOMEM); 6582 } 6583 } else { 6584 ixa = NULL; 6585 } 6586 mutex_enter(&connp->conn_lock); 6587 if (udp->udp_delayed_error != 0) { 6588 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 6589 6590 error = udp->udp_delayed_error; 6591 udp->udp_delayed_error = 0; 6592 6593 /* Compare IP address and port */ 6594 6595 if (sin->sin_port == sin2->sin_port && 6596 sin->sin_addr.s_addr == sin2->sin_addr.s_addr) { 6597 mutex_exit(&connp->conn_lock); 6598 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6599 if (ixa != NULL) 6600 ixa_refrele(ixa); 6601 return (error); 6602 } 6603 } 6604 if (msg->msg_controllen != 0) { 6605 mutex_exit(&connp->conn_lock); 6606 ASSERT(ixa == NULL); 6607 error = udp_output_ancillary(connp, sin, NULL, mp, 6608 NULL, msg, cr, pid); 6609 } else if (conn_same_as_last_v4(connp, sin) && 6610 ipsec_outbound_policy_current(ixa)) { 6611 /* udp_output_lastdst drops conn_lock */ 6612 error = udp_output_lastdst(connp, mp, cr, pid, ixa); 6613 } else { 6614 /* udp_output_newdst drops conn_lock */ 6615 error = udp_output_newdst(connp, mp, sin, NULL, 6616 ipversion, cr, pid, ixa); 6617 } 6618 ASSERT(MUTEX_NOT_HELD(&connp->conn_lock)); 6619 if (us->us_sendto_ignerr) 6620 return (0); 6621 else 6622 return (error); 6623 default: 6624 return (EINVAL); 6625 } 6626 } 6627 6628 int 6629 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 6630 boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb) 6631 { 6632 conn_t *connp = (conn_t *)proto_handle; 6633 udp_t *udp; 6634 struct T_capability_ack tca; 6635 struct sockaddr_in6 laddr, faddr; 6636 socklen_t laddrlen, faddrlen; 6637 short opts; 6638 struct stroptions *stropt; 6639 mblk_t *stropt_mp; 6640 int error; 6641 6642 udp = connp->conn_udp; 6643 6644 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 6645 6646 /* 6647 * setup the fallback stream that was allocated 6648 */ 6649 connp->conn_dev = (dev_t)RD(q)->q_ptr; 6650 connp->conn_minor_arena = WR(q)->q_ptr; 6651 6652 RD(q)->q_ptr = WR(q)->q_ptr = connp; 6653 6654 WR(q)->q_qinfo = &udp_winit; 6655 6656 connp->conn_rq = RD(q); 6657 connp->conn_wq = WR(q); 6658 6659 /* Notify stream head about options before sending up data */ 6660 stropt_mp->b_datap->db_type = M_SETOPTS; 6661 stropt_mp->b_wptr += sizeof (*stropt); 6662 stropt = (struct stroptions *)stropt_mp->b_rptr; 6663 stropt->so_flags = SO_WROFF | SO_HIWAT; 6664 stropt->so_wroff = connp->conn_wroff; 6665 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 6666 putnext(RD(q), stropt_mp); 6667 6668 /* 6669 * Free the helper stream 6670 */ 6671 ip_free_helper_stream(connp); 6672 6673 if (!issocket) 6674 udp_use_pure_tpi(udp); 6675 6676 /* 6677 * Collect the information needed to sync with the sonode 6678 */ 6679 udp_do_capability_ack(udp, &tca, TC1_INFO); 6680 6681 laddrlen = faddrlen = sizeof (sin6_t); 6682 (void) udp_getsockname((sock_lower_handle_t)connp, 6683 (struct sockaddr *)&laddr, &laddrlen, CRED()); 6684 error = udp_getpeername((sock_lower_handle_t)connp, 6685 (struct sockaddr *)&faddr, &faddrlen, CRED()); 6686 if (error != 0) 6687 faddrlen = 0; 6688 6689 opts = 0; 6690 if (connp->conn_dgram_errind) 6691 opts |= SO_DGRAM_ERRIND; 6692 if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE) 6693 opts |= SO_DONTROUTE; 6694 6695 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 6696 (struct sockaddr *)&laddr, laddrlen, 6697 (struct sockaddr *)&faddr, faddrlen, opts); 6698 6699 mutex_enter(&udp->udp_recv_lock); 6700 /* 6701 * Attempts to send data up during fallback will result in it being 6702 * queued in udp_t. Now we push up any queued packets. 6703 */ 6704 while (udp->udp_fallback_queue_head != NULL) { 6705 mblk_t *mp; 6706 mp = udp->udp_fallback_queue_head; 6707 udp->udp_fallback_queue_head = mp->b_next; 6708 mutex_exit(&udp->udp_recv_lock); 6709 mp->b_next = NULL; 6710 putnext(RD(q), mp); 6711 mutex_enter(&udp->udp_recv_lock); 6712 } 6713 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 6714 /* 6715 * No longer a streams less socket 6716 */ 6717 mutex_enter(&connp->conn_lock); 6718 connp->conn_flags &= ~IPCL_NONSTR; 6719 mutex_exit(&connp->conn_lock); 6720 6721 mutex_exit(&udp->udp_recv_lock); 6722 6723 ASSERT(connp->conn_ref >= 1); 6724 6725 return (0); 6726 } 6727 6728 /* ARGSUSED3 */ 6729 int 6730 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6731 socklen_t *salenp, cred_t *cr) 6732 { 6733 conn_t *connp = (conn_t *)proto_handle; 6734 udp_t *udp = connp->conn_udp; 6735 int error; 6736 6737 /* All Solaris components should pass a cred for this operation. */ 6738 ASSERT(cr != NULL); 6739 6740 mutex_enter(&connp->conn_lock); 6741 if (udp->udp_state != TS_DATA_XFER) 6742 error = ENOTCONN; 6743 else 6744 error = conn_getpeername(connp, sa, salenp); 6745 mutex_exit(&connp->conn_lock); 6746 return (error); 6747 } 6748 6749 /* ARGSUSED3 */ 6750 int 6751 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 6752 socklen_t *salenp, cred_t *cr) 6753 { 6754 conn_t *connp = (conn_t *)proto_handle; 6755 int error; 6756 6757 /* All Solaris components should pass a cred for this operation. */ 6758 ASSERT(cr != NULL); 6759 6760 mutex_enter(&connp->conn_lock); 6761 error = conn_getsockname(connp, sa, salenp); 6762 mutex_exit(&connp->conn_lock); 6763 return (error); 6764 } 6765 6766 int 6767 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6768 void *optvalp, socklen_t *optlen, cred_t *cr) 6769 { 6770 conn_t *connp = (conn_t *)proto_handle; 6771 int error; 6772 t_uscalar_t max_optbuf_len; 6773 void *optvalp_buf; 6774 int len; 6775 6776 /* All Solaris components should pass a cred for this operation. */ 6777 ASSERT(cr != NULL); 6778 6779 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 6780 udp_opt_obj.odb_opt_des_arr, 6781 udp_opt_obj.odb_opt_arr_cnt, 6782 B_FALSE, B_TRUE, cr); 6783 if (error != 0) { 6784 if (error < 0) 6785 error = proto_tlitosyserr(-error); 6786 return (error); 6787 } 6788 6789 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 6790 len = udp_opt_get(connp, level, option_name, optvalp_buf); 6791 if (len == -1) { 6792 kmem_free(optvalp_buf, max_optbuf_len); 6793 return (EINVAL); 6794 } 6795 6796 /* 6797 * update optlen and copy option value 6798 */ 6799 t_uscalar_t size = MIN(len, *optlen); 6800 6801 bcopy(optvalp_buf, optvalp, size); 6802 bcopy(&size, optlen, sizeof (size)); 6803 6804 kmem_free(optvalp_buf, max_optbuf_len); 6805 return (0); 6806 } 6807 6808 int 6809 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 6810 const void *optvalp, socklen_t optlen, cred_t *cr) 6811 { 6812 conn_t *connp = (conn_t *)proto_handle; 6813 int error; 6814 6815 /* All Solaris components should pass a cred for this operation. */ 6816 ASSERT(cr != NULL); 6817 6818 error = proto_opt_check(level, option_name, optlen, NULL, 6819 udp_opt_obj.odb_opt_des_arr, 6820 udp_opt_obj.odb_opt_arr_cnt, 6821 B_TRUE, B_FALSE, cr); 6822 6823 if (error != 0) { 6824 if (error < 0) 6825 error = proto_tlitosyserr(-error); 6826 return (error); 6827 } 6828 6829 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 6830 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 6831 NULL, cr); 6832 6833 ASSERT(error >= 0); 6834 6835 return (error); 6836 } 6837 6838 void 6839 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 6840 { 6841 conn_t *connp = (conn_t *)proto_handle; 6842 udp_t *udp = connp->conn_udp; 6843 6844 mutex_enter(&udp->udp_recv_lock); 6845 connp->conn_flow_cntrld = B_FALSE; 6846 mutex_exit(&udp->udp_recv_lock); 6847 } 6848 6849 /* ARGSUSED2 */ 6850 int 6851 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 6852 { 6853 conn_t *connp = (conn_t *)proto_handle; 6854 6855 /* All Solaris components should pass a cred for this operation. */ 6856 ASSERT(cr != NULL); 6857 6858 /* shut down the send side */ 6859 if (how != SHUT_RD) 6860 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6861 SOCK_OPCTL_SHUT_SEND, 0); 6862 /* shut down the recv side */ 6863 if (how != SHUT_WR) 6864 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 6865 SOCK_OPCTL_SHUT_RECV, 0); 6866 return (0); 6867 } 6868 6869 int 6870 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 6871 int mode, int32_t *rvalp, cred_t *cr) 6872 { 6873 conn_t *connp = (conn_t *)proto_handle; 6874 int error; 6875 6876 /* All Solaris components should pass a cred for this operation. */ 6877 ASSERT(cr != NULL); 6878 6879 /* 6880 * If we don't have a helper stream then create one. 6881 * ip_create_helper_stream takes care of locking the conn_t, 6882 * so this check for NULL is just a performance optimization. 6883 */ 6884 if (connp->conn_helper_info == NULL) { 6885 udp_stack_t *us = connp->conn_udp->udp_us; 6886 6887 ASSERT(us->us_ldi_ident != NULL); 6888 6889 /* 6890 * Create a helper stream for non-STREAMS socket. 6891 */ 6892 error = ip_create_helper_stream(connp, us->us_ldi_ident); 6893 if (error != 0) { 6894 ip0dbg(("tcp_ioctl: create of IP helper stream " 6895 "failed %d\n", error)); 6896 return (error); 6897 } 6898 } 6899 6900 switch (cmd) { 6901 case ND_SET: 6902 case ND_GET: 6903 case _SIOCSOCKFALLBACK: 6904 case TI_GETPEERNAME: 6905 case TI_GETMYNAME: 6906 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 6907 cmd)); 6908 error = EINVAL; 6909 break; 6910 default: 6911 /* 6912 * Pass on to IP using helper stream 6913 */ 6914 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 6915 cmd, arg, mode, cr, rvalp); 6916 break; 6917 } 6918 return (error); 6919 } 6920 6921 /* ARGSUSED */ 6922 int 6923 udp_accept(sock_lower_handle_t lproto_handle, 6924 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 6925 cred_t *cr) 6926 { 6927 return (EOPNOTSUPP); 6928 } 6929 6930 /* ARGSUSED */ 6931 int 6932 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 6933 { 6934 return (EOPNOTSUPP); 6935 } 6936 6937 sock_downcalls_t sock_udp_downcalls = { 6938 udp_activate, /* sd_activate */ 6939 udp_accept, /* sd_accept */ 6940 udp_bind, /* sd_bind */ 6941 udp_listen, /* sd_listen */ 6942 udp_connect, /* sd_connect */ 6943 udp_getpeername, /* sd_getpeername */ 6944 udp_getsockname, /* sd_getsockname */ 6945 udp_getsockopt, /* sd_getsockopt */ 6946 udp_setsockopt, /* sd_setsockopt */ 6947 udp_send, /* sd_send */ 6948 NULL, /* sd_send_uio */ 6949 NULL, /* sd_recv_uio */ 6950 NULL, /* sd_poll */ 6951 udp_shutdown, /* sd_shutdown */ 6952 udp_clr_flowctrl, /* sd_setflowctrl */ 6953 udp_ioctl, /* sd_ioctl */ 6954 udp_close /* sd_close */ 6955 }; 6956