1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 #include <sys/ethernet.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 101 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 102 * We also use conn_lock when updating things that affect the IP classifier 103 * lookup. 104 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 105 * 106 * The fanout lock uf_lock: 107 * When a UDP endpoint is bound to a local port, it is inserted into 108 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 109 * The size of the array is controlled by the udp_bind_fanout_size variable. 110 * This variable can be changed in /etc/system if the default value is 111 * not large enough. Each bind hash bucket is protected by a per bucket 112 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 113 * structure and a few other fields in the udp_t. A UDP endpoint is removed 114 * from the bind hash list only when it is being unbound or being closed. 115 * The per bucket lock also protects a UDP endpoint's state changes. 116 * 117 * The udp_rwlock: 118 * This protects most of the other fields in the udp_t. The exact list of 119 * fields which are protected by each of the above locks is documented in 120 * the udp_t structure definition. 121 * 122 * Plumbing notes: 123 * UDP is always a device driver. For compatibility with mibopen() code 124 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 125 * dummy module. 126 * 127 * The above implies that we don't support any intermediate module to 128 * reside in between /dev/ip and udp -- in fact, we never supported such 129 * scenario in the past as the inter-layer communication semantics have 130 * always been private. 131 */ 132 133 /* For /etc/system control */ 134 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 135 136 /* Option processing attrs */ 137 typedef struct udpattrs_s { 138 union { 139 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 140 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 141 } udpattr_ippu; 142 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 143 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 144 mblk_t *udpattr_mb; 145 boolean_t udpattr_credset; 146 } udpattrs_t; 147 148 static void udp_addr_req(queue_t *q, mblk_t *mp); 149 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 150 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 151 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 152 static int udp_build_hdrs(udp_t *udp); 153 static void udp_capability_req(queue_t *q, mblk_t *mp); 154 static int udp_tpi_close(queue_t *q, int flags); 155 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 156 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 157 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 158 int sys_error); 159 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 160 t_scalar_t tlierr, int unixerr); 161 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 162 cred_t *cr); 163 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 164 char *value, caddr_t cp, cred_t *cr); 165 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 166 char *value, caddr_t cp, cred_t *cr); 167 static void udp_icmp_error(conn_t *, mblk_t *); 168 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 169 static void udp_info_req(queue_t *q, mblk_t *mp); 170 static void udp_input(void *, mblk_t *, void *); 171 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 172 t_scalar_t addr_length); 173 static void udp_lrput(queue_t *, mblk_t *); 174 static void udp_lwput(queue_t *, mblk_t *); 175 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 176 cred_t *credp, boolean_t isv6); 177 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 178 cred_t *credp); 179 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 180 cred_t *credp); 181 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 182 int *errorp, udpattrs_t *udpattrs); 183 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 184 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 185 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 186 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 187 cred_t *cr); 188 static int udp_rinfop(queue_t *q, infod_t *dp); 189 static int udp_rrw(queue_t *q, struiod_t *dp); 190 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 191 ipha_t *ipha); 192 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 193 t_scalar_t destlen, t_scalar_t err); 194 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 195 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 196 boolean_t random); 197 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 198 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 199 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 200 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 201 static void udp_wput_other(queue_t *q, mblk_t *mp); 202 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 203 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 204 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 205 206 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 207 static void udp_stack_fini(netstackid_t stackid, void *arg); 208 209 static void *udp_kstat_init(netstackid_t stackid); 210 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 211 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 212 static void udp_kstat2_fini(netstackid_t, kstat_t *); 213 static int udp_kstat_update(kstat_t *kp, int rw); 214 215 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 216 uint_t pkt_len); 217 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 218 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 219 220 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 221 cred_t *, pid_t); 222 static void udp_ulp_recv(conn_t *, mblk_t *); 223 224 /* Common routine for TPI and socket module */ 225 static conn_t *udp_do_open(cred_t *, boolean_t, int); 226 static void udp_do_close(conn_t *); 227 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 228 boolean_t); 229 static int udp_do_unbind(conn_t *); 230 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 231 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 232 233 int udp_getsockname(sock_lower_handle_t, 234 struct sockaddr *, socklen_t *, cred_t *); 235 int udp_getpeername(sock_lower_handle_t, 236 struct sockaddr *, socklen_t *, cred_t *); 237 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t, 238 cred_t *cr); 239 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 240 241 #define UDP_RECV_HIWATER (56 * 1024) 242 #define UDP_RECV_LOWATER 128 243 #define UDP_XMIT_HIWATER (56 * 1024) 244 #define UDP_XMIT_LOWATER 1024 245 246 /* 247 * The following is defined in tcp.c 248 */ 249 extern int (*cl_inet_connect2)(netstackid_t stack_id, 250 uint8_t protocol, boolean_t is_outgoing, 251 sa_family_t addr_family, 252 uint8_t *laddrp, in_port_t lport, 253 uint8_t *faddrp, in_port_t fport, void *args); 254 255 /* 256 * Checks if the given destination addr/port is allowed out. 257 * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster. 258 * Called for each connect() and for sendto()/sendmsg() to a different 259 * destination. 260 * For connect(), called in udp_connect(). 261 * For sendto()/sendmsg(), called in udp_output_v{4,6}(). 262 * 263 * This macro assumes that the cl_inet_connect2 hook is not NULL. 264 * Please check this before calling this macro. 265 * 266 * void 267 * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing, 268 * in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err); 269 */ 270 #define CL_INET_UDP_CONNECT(cp, udp, is_outgoing, faddrp, fport, err) { \ 271 (err) = 0; \ 272 /* \ 273 * Running in cluster mode - check and register active \ 274 * "connection" information \ 275 */ \ 276 if ((udp)->udp_ipversion == IPV4_VERSION) \ 277 (err) = (*cl_inet_connect2)( \ 278 (cp)->conn_netstack->netstack_stackid, \ 279 IPPROTO_UDP, is_outgoing, AF_INET, \ 280 (uint8_t *)&((udp)->udp_v6src._S6_un._S6_u32[3]), \ 281 (udp)->udp_port, \ 282 (uint8_t *)&((faddrp)->_S6_un._S6_u32[3]), \ 283 (in_port_t)(fport), NULL); \ 284 else \ 285 (err) = (*cl_inet_connect2)( \ 286 (cp)->conn_netstack->netstack_stackid, \ 287 IPPROTO_UDP, is_outgoing, AF_INET6, \ 288 (uint8_t *)&((udp)->udp_v6src), (udp)->udp_port, \ 289 (uint8_t *)(faddrp), (in_port_t)(fport), NULL); \ 290 } 291 292 static struct module_info udp_mod_info = { 293 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 294 }; 295 296 /* 297 * Entry points for UDP as a device. 298 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 299 */ 300 static struct qinit udp_rinitv4 = { 301 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 302 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 303 }; 304 305 static struct qinit udp_rinitv6 = { 306 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 307 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 308 }; 309 310 static struct qinit udp_winit = { 311 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 312 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 313 }; 314 315 /* UDP entry point during fallback */ 316 struct qinit udp_fallback_sock_winit = { 317 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 318 }; 319 320 /* 321 * UDP needs to handle I_LINK and I_PLINK since ifconfig 322 * likes to use it as a place to hang the various streams. 323 */ 324 static struct qinit udp_lrinit = { 325 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 326 &udp_mod_info 327 }; 328 329 static struct qinit udp_lwinit = { 330 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 331 &udp_mod_info 332 }; 333 334 /* For AF_INET aka /dev/udp */ 335 struct streamtab udpinfov4 = { 336 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 337 }; 338 339 /* For AF_INET6 aka /dev/udp6 */ 340 struct streamtab udpinfov6 = { 341 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 342 }; 343 344 static sin_t sin_null; /* Zero address for quick clears */ 345 static sin6_t sin6_null; /* Zero address for quick clears */ 346 347 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 348 349 /* Default structure copied into T_INFO_ACK messages */ 350 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 351 T_INFO_ACK, 352 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 353 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 354 T_INVALID, /* CDATA_size. udp does not support connect data. */ 355 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 356 sizeof (sin_t), /* ADDR_size. */ 357 0, /* OPT_size - not initialized here */ 358 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 359 T_CLTS, /* SERV_type. udp supports connection-less. */ 360 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 361 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 362 }; 363 364 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 365 366 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 367 T_INFO_ACK, 368 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 369 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 370 T_INVALID, /* CDATA_size. udp does not support connect data. */ 371 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 372 sizeof (sin6_t), /* ADDR_size. */ 373 0, /* OPT_size - not initialized here */ 374 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 375 T_CLTS, /* SERV_type. udp supports connection-less. */ 376 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 377 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 378 }; 379 380 /* largest UDP port number */ 381 #define UDP_MAX_PORT 65535 382 383 /* 384 * Table of ND variables supported by udp. These are loaded into us_nd 385 * in udp_open. 386 * All of these are alterable, within the min/max values given, at run time. 387 */ 388 /* BEGIN CSTYLED */ 389 udpparam_t udp_param_arr[] = { 390 /*min max value name */ 391 { 0L, 256, 32, "udp_wroff_extra" }, 392 { 1L, 255, 255, "udp_ipv4_ttl" }, 393 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 394 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 395 { 0, 1, 1, "udp_do_checksum" }, 396 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 397 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 398 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 399 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 400 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 401 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 402 }; 403 /* END CSTYLED */ 404 405 /* Setable in /etc/system */ 406 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 407 uint32_t udp_random_anon_port = 1; 408 409 /* 410 * Hook functions to enable cluster networking. 411 * On non-clustered systems these vectors must always be NULL 412 */ 413 414 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol, 415 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 416 void *args) = NULL; 417 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol, 418 sa_family_t addr_family, uint8_t *laddrp, in_port_t lport, 419 void *args) = NULL; 420 421 typedef union T_primitives *t_primp_t; 422 423 /* 424 * Return the next anonymous port in the privileged port range for 425 * bind checking. 426 * 427 * Trusted Extension (TX) notes: TX allows administrator to mark or 428 * reserve ports as Multilevel ports (MLP). MLP has special function 429 * on TX systems. Once a port is made MLP, it's not available as 430 * ordinary port. This creates "holes" in the port name space. It 431 * may be necessary to skip the "holes" find a suitable anon port. 432 */ 433 static in_port_t 434 udp_get_next_priv_port(udp_t *udp) 435 { 436 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 437 in_port_t nextport; 438 boolean_t restart = B_FALSE; 439 udp_stack_t *us = udp->udp_us; 440 441 retry: 442 if (next_priv_port < us->us_min_anonpriv_port || 443 next_priv_port >= IPPORT_RESERVED) { 444 next_priv_port = IPPORT_RESERVED - 1; 445 if (restart) 446 return (0); 447 restart = B_TRUE; 448 } 449 450 if (is_system_labeled() && 451 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 452 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 453 next_priv_port = nextport; 454 goto retry; 455 } 456 457 return (next_priv_port--); 458 } 459 460 /* 461 * Hash list removal routine for udp_t structures. 462 */ 463 static void 464 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 465 { 466 udp_t *udpnext; 467 kmutex_t *lockp; 468 udp_stack_t *us = udp->udp_us; 469 470 if (udp->udp_ptpbhn == NULL) 471 return; 472 473 /* 474 * Extract the lock pointer in case there are concurrent 475 * hash_remove's for this instance. 476 */ 477 ASSERT(udp->udp_port != 0); 478 if (!caller_holds_lock) { 479 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 480 us->us_bind_fanout_size)].uf_lock; 481 ASSERT(lockp != NULL); 482 mutex_enter(lockp); 483 } 484 if (udp->udp_ptpbhn != NULL) { 485 udpnext = udp->udp_bind_hash; 486 if (udpnext != NULL) { 487 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 488 udp->udp_bind_hash = NULL; 489 } 490 *udp->udp_ptpbhn = udpnext; 491 udp->udp_ptpbhn = NULL; 492 } 493 if (!caller_holds_lock) { 494 mutex_exit(lockp); 495 } 496 } 497 498 static void 499 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 500 { 501 udp_t **udpp; 502 udp_t *udpnext; 503 504 ASSERT(MUTEX_HELD(&uf->uf_lock)); 505 ASSERT(udp->udp_ptpbhn == NULL); 506 udpp = &uf->uf_udp; 507 udpnext = udpp[0]; 508 if (udpnext != NULL) { 509 /* 510 * If the new udp bound to the INADDR_ANY address 511 * and the first one in the list is not bound to 512 * INADDR_ANY we skip all entries until we find the 513 * first one bound to INADDR_ANY. 514 * This makes sure that applications binding to a 515 * specific address get preference over those binding to 516 * INADDR_ANY. 517 */ 518 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 519 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 520 while ((udpnext = udpp[0]) != NULL && 521 !V6_OR_V4_INADDR_ANY( 522 udpnext->udp_bound_v6src)) { 523 udpp = &(udpnext->udp_bind_hash); 524 } 525 if (udpnext != NULL) 526 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 527 } else { 528 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 529 } 530 } 531 udp->udp_bind_hash = udpnext; 532 udp->udp_ptpbhn = udpp; 533 udpp[0] = udp; 534 } 535 536 /* 537 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 538 * passed to udp_wput. 539 * It associates a port number and local address with the stream. 540 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 541 * protocol type (IPPROTO_UDP) placed in the message following the address. 542 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 543 * (Called as writer.) 544 * 545 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 546 * without setting SO_REUSEADDR. This is needed so that they 547 * can be viewed as two independent transport protocols. 548 * However, anonymouns ports are allocated from the same range to avoid 549 * duplicating the us->us_next_port_to_try. 550 */ 551 static void 552 udp_tpi_bind(queue_t *q, mblk_t *mp) 553 { 554 sin_t *sin; 555 sin6_t *sin6; 556 mblk_t *mp1; 557 struct T_bind_req *tbr; 558 conn_t *connp; 559 udp_t *udp; 560 int error; 561 struct sockaddr *sa; 562 cred_t *cr; 563 564 /* 565 * All Solaris components should pass a db_credp 566 * for this TPI message, hence we ASSERT. 567 * But in case there is some other M_PROTO that looks 568 * like a TPI message sent by some other kernel 569 * component, we check and return an error. 570 */ 571 cr = msg_getcred(mp, NULL); 572 ASSERT(cr != NULL); 573 if (cr == NULL) { 574 udp_err_ack(q, mp, TSYSERR, EINVAL); 575 return; 576 } 577 578 connp = Q_TO_CONN(q); 579 udp = connp->conn_udp; 580 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 581 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 582 "udp_bind: bad req, len %u", 583 (uint_t)(mp->b_wptr - mp->b_rptr)); 584 udp_err_ack(q, mp, TPROTO, 0); 585 return; 586 } 587 if (udp->udp_state != TS_UNBND) { 588 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 589 "udp_bind: bad state, %u", udp->udp_state); 590 udp_err_ack(q, mp, TOUTSTATE, 0); 591 return; 592 } 593 /* 594 * Reallocate the message to make sure we have enough room for an 595 * address and the protocol type. 596 */ 597 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 598 if (!mp1) { 599 udp_err_ack(q, mp, TSYSERR, ENOMEM); 600 return; 601 } 602 603 mp = mp1; 604 605 /* Reset the message type in preparation for shipping it back. */ 606 DB_TYPE(mp) = M_PCPROTO; 607 608 tbr = (struct T_bind_req *)mp->b_rptr; 609 switch (tbr->ADDR_length) { 610 case 0: /* Request for a generic port */ 611 tbr->ADDR_offset = sizeof (struct T_bind_req); 612 if (udp->udp_family == AF_INET) { 613 tbr->ADDR_length = sizeof (sin_t); 614 sin = (sin_t *)&tbr[1]; 615 *sin = sin_null; 616 sin->sin_family = AF_INET; 617 mp->b_wptr = (uchar_t *)&sin[1]; 618 sa = (struct sockaddr *)sin; 619 } else { 620 ASSERT(udp->udp_family == AF_INET6); 621 tbr->ADDR_length = sizeof (sin6_t); 622 sin6 = (sin6_t *)&tbr[1]; 623 *sin6 = sin6_null; 624 sin6->sin6_family = AF_INET6; 625 mp->b_wptr = (uchar_t *)&sin6[1]; 626 sa = (struct sockaddr *)sin6; 627 } 628 break; 629 630 case sizeof (sin_t): /* Complete IPv4 address */ 631 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 632 sizeof (sin_t)); 633 if (sa == NULL || !OK_32PTR((char *)sa)) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 if (udp->udp_family != AF_INET || 638 sa->sa_family != AF_INET) { 639 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 640 return; 641 } 642 break; 643 644 case sizeof (sin6_t): /* complete IPv6 address */ 645 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 646 sizeof (sin6_t)); 647 if (sa == NULL || !OK_32PTR((char *)sa)) { 648 udp_err_ack(q, mp, TSYSERR, EINVAL); 649 return; 650 } 651 if (udp->udp_family != AF_INET6 || 652 sa->sa_family != AF_INET6) { 653 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 654 return; 655 } 656 break; 657 658 default: /* Invalid request */ 659 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 660 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 661 udp_err_ack(q, mp, TBADADDR, 0); 662 return; 663 } 664 665 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 666 tbr->PRIM_type != O_T_BIND_REQ); 667 668 if (error != 0) { 669 if (error > 0) { 670 udp_err_ack(q, mp, TSYSERR, error); 671 } else { 672 udp_err_ack(q, mp, -error, 0); 673 } 674 } else { 675 tbr->PRIM_type = T_BIND_ACK; 676 qreply(q, mp); 677 } 678 } 679 680 /* 681 * This routine handles each T_CONN_REQ message passed to udp. It 682 * associates a default destination address with the stream. 683 * 684 * This routine sends down a T_BIND_REQ to IP with the following mblks: 685 * T_BIND_REQ - specifying local and remote address/port 686 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 687 * T_OK_ACK - for the T_CONN_REQ 688 * T_CONN_CON - to keep the TPI user happy 689 * 690 * The connect completes in udp_do_connect. 691 * When a T_BIND_ACK is received information is extracted from the IRE 692 * and the two appended messages are sent to the TPI user. 693 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 694 * convert it to an error ack for the appropriate primitive. 695 */ 696 static void 697 udp_tpi_connect(queue_t *q, mblk_t *mp) 698 { 699 mblk_t *mp1; 700 udp_t *udp; 701 conn_t *connp = Q_TO_CONN(q); 702 int error; 703 socklen_t len; 704 struct sockaddr *sa; 705 struct T_conn_req *tcr; 706 cred_t *cr; 707 708 /* 709 * All Solaris components should pass a db_credp 710 * for this TPI message, hence we ASSERT. 711 * But in case there is some other M_PROTO that looks 712 * like a TPI message sent by some other kernel 713 * component, we check and return an error. 714 */ 715 cr = msg_getcred(mp, NULL); 716 ASSERT(cr != NULL); 717 if (cr == NULL) { 718 udp_err_ack(q, mp, TSYSERR, EINVAL); 719 return; 720 } 721 722 udp = connp->conn_udp; 723 tcr = (struct T_conn_req *)mp->b_rptr; 724 725 /* A bit of sanity checking */ 726 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 727 udp_err_ack(q, mp, TPROTO, 0); 728 return; 729 } 730 731 if (tcr->OPT_length != 0) { 732 udp_err_ack(q, mp, TBADOPT, 0); 733 return; 734 } 735 736 /* 737 * Determine packet type based on type of address passed in 738 * the request should contain an IPv4 or IPv6 address. 739 * Make sure that address family matches the type of 740 * family of the the address passed down 741 */ 742 len = tcr->DEST_length; 743 switch (tcr->DEST_length) { 744 default: 745 udp_err_ack(q, mp, TBADADDR, 0); 746 return; 747 748 case sizeof (sin_t): 749 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 750 sizeof (sin_t)); 751 break; 752 753 case sizeof (sin6_t): 754 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 755 sizeof (sin6_t)); 756 break; 757 } 758 759 error = proto_verify_ip_addr(udp->udp_family, sa, len); 760 if (error != 0) { 761 udp_err_ack(q, mp, TSYSERR, error); 762 return; 763 } 764 765 /* 766 * We have to send a connection confirmation to 767 * keep TLI happy. 768 */ 769 if (udp->udp_family == AF_INET) { 770 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 771 sizeof (sin_t), NULL, 0); 772 } else { 773 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 774 sizeof (sin6_t), NULL, 0); 775 } 776 if (mp1 == NULL) { 777 udp_err_ack(q, mp, TSYSERR, ENOMEM); 778 return; 779 } 780 781 /* 782 * Allocate the largest primitive we need to send back 783 * T_error_ack is > than T_ok_ack 784 */ 785 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 786 if (mp == NULL) { 787 /* Unable to reuse the T_CONN_REQ for the ack. */ 788 freemsg(mp1); 789 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 790 return; 791 } 792 793 error = udp_do_connect(connp, sa, len, cr); 794 if (error != 0) { 795 freeb(mp1); 796 if (error < 0) 797 udp_err_ack(q, mp, -error, 0); 798 else 799 udp_err_ack(q, mp, TSYSERR, error); 800 } else { 801 mp = mi_tpi_ok_ack_alloc(mp); 802 ASSERT(mp != NULL); 803 putnext(connp->conn_rq, mp); 804 putnext(connp->conn_rq, mp1); 805 } 806 } 807 808 static int 809 udp_tpi_close(queue_t *q, int flags) 810 { 811 conn_t *connp; 812 813 if (flags & SO_FALLBACK) { 814 /* 815 * stream is being closed while in fallback 816 * simply free the resources that were allocated 817 */ 818 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 819 qprocsoff(q); 820 goto done; 821 } 822 823 connp = Q_TO_CONN(q); 824 udp_do_close(connp); 825 done: 826 q->q_ptr = WR(q)->q_ptr = NULL; 827 return (0); 828 } 829 830 /* 831 * Called in the close path to quiesce the conn 832 */ 833 void 834 udp_quiesce_conn(conn_t *connp) 835 { 836 udp_t *udp = connp->conn_udp; 837 838 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 839 /* 840 * Running in cluster mode - register unbind information 841 */ 842 if (udp->udp_ipversion == IPV4_VERSION) { 843 (*cl_inet_unbind)( 844 connp->conn_netstack->netstack_stackid, 845 IPPROTO_UDP, AF_INET, 846 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 847 (in_port_t)udp->udp_port, NULL); 848 } else { 849 (*cl_inet_unbind)( 850 connp->conn_netstack->netstack_stackid, 851 IPPROTO_UDP, AF_INET6, 852 (uint8_t *)(&(udp->udp_v6src)), 853 (in_port_t)udp->udp_port, NULL); 854 } 855 } 856 857 udp_bind_hash_remove(udp, B_FALSE); 858 859 } 860 861 void 862 udp_close_free(conn_t *connp) 863 { 864 udp_t *udp = connp->conn_udp; 865 866 /* If there are any options associated with the stream, free them. */ 867 if (udp->udp_ip_snd_options != NULL) { 868 mi_free((char *)udp->udp_ip_snd_options); 869 udp->udp_ip_snd_options = NULL; 870 udp->udp_ip_snd_options_len = 0; 871 } 872 873 if (udp->udp_ip_rcv_options != NULL) { 874 mi_free((char *)udp->udp_ip_rcv_options); 875 udp->udp_ip_rcv_options = NULL; 876 udp->udp_ip_rcv_options_len = 0; 877 } 878 879 /* Free memory associated with sticky options */ 880 if (udp->udp_sticky_hdrs_len != 0) { 881 kmem_free(udp->udp_sticky_hdrs, 882 udp->udp_sticky_hdrs_len); 883 udp->udp_sticky_hdrs = NULL; 884 udp->udp_sticky_hdrs_len = 0; 885 } 886 887 ip6_pkt_free(&udp->udp_sticky_ipp); 888 889 /* 890 * Clear any fields which the kmem_cache constructor clears. 891 * Only udp_connp needs to be preserved. 892 * TBD: We should make this more efficient to avoid clearing 893 * everything. 894 */ 895 ASSERT(udp->udp_connp == connp); 896 bzero(udp, sizeof (udp_t)); 897 udp->udp_connp = connp; 898 } 899 900 static int 901 udp_do_disconnect(conn_t *connp) 902 { 903 udp_t *udp; 904 mblk_t *ire_mp; 905 udp_fanout_t *udpf; 906 udp_stack_t *us; 907 int error; 908 909 udp = connp->conn_udp; 910 us = udp->udp_us; 911 rw_enter(&udp->udp_rwlock, RW_WRITER); 912 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 913 rw_exit(&udp->udp_rwlock); 914 return (-TOUTSTATE); 915 } 916 udp->udp_pending_op = T_DISCON_REQ; 917 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 918 us->us_bind_fanout_size)]; 919 mutex_enter(&udpf->uf_lock); 920 udp->udp_v6src = udp->udp_bound_v6src; 921 udp->udp_state = TS_IDLE; 922 mutex_exit(&udpf->uf_lock); 923 924 if (udp->udp_family == AF_INET6) { 925 /* Rebuild the header template */ 926 error = udp_build_hdrs(udp); 927 if (error != 0) { 928 udp->udp_pending_op = -1; 929 rw_exit(&udp->udp_rwlock); 930 return (error); 931 } 932 } 933 934 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 935 if (ire_mp == NULL) { 936 mutex_enter(&udpf->uf_lock); 937 udp->udp_pending_op = -1; 938 mutex_exit(&udpf->uf_lock); 939 rw_exit(&udp->udp_rwlock); 940 return (ENOMEM); 941 } 942 943 rw_exit(&udp->udp_rwlock); 944 945 if (udp->udp_family == AF_INET6) { 946 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 947 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 948 } else { 949 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 950 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 951 } 952 953 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 954 } 955 956 957 static void 958 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 959 { 960 conn_t *connp = Q_TO_CONN(q); 961 int error; 962 963 /* 964 * Allocate the largest primitive we need to send back 965 * T_error_ack is > than T_ok_ack 966 */ 967 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 968 if (mp == NULL) { 969 /* Unable to reuse the T_DISCON_REQ for the ack. */ 970 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 971 return; 972 } 973 974 error = udp_do_disconnect(connp); 975 976 if (error != 0) { 977 if (error < 0) { 978 udp_err_ack(q, mp, -error, 0); 979 } else { 980 udp_err_ack(q, mp, TSYSERR, error); 981 } 982 } else { 983 mp = mi_tpi_ok_ack_alloc(mp); 984 ASSERT(mp != NULL); 985 qreply(q, mp); 986 } 987 } 988 989 int 990 udp_disconnect(conn_t *connp) 991 { 992 int error; 993 udp_t *udp = connp->conn_udp; 994 995 udp->udp_dgram_errind = B_FALSE; 996 997 error = udp_do_disconnect(connp); 998 999 if (error < 0) 1000 error = proto_tlitosyserr(-error); 1001 1002 return (error); 1003 } 1004 1005 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1006 static void 1007 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1008 { 1009 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1010 qreply(q, mp); 1011 } 1012 1013 /* Shorthand to generate and send TPI error acks to our client */ 1014 static void 1015 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1016 int sys_error) 1017 { 1018 struct T_error_ack *teackp; 1019 1020 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1021 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1022 teackp = (struct T_error_ack *)mp->b_rptr; 1023 teackp->ERROR_prim = primitive; 1024 teackp->TLI_error = t_error; 1025 teackp->UNIX_error = sys_error; 1026 qreply(q, mp); 1027 } 1028 } 1029 1030 /*ARGSUSED*/ 1031 static int 1032 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1033 { 1034 int i; 1035 udp_t *udp = Q_TO_UDP(q); 1036 udp_stack_t *us = udp->udp_us; 1037 1038 for (i = 0; i < us->us_num_epriv_ports; i++) { 1039 if (us->us_epriv_ports[i] != 0) 1040 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1041 } 1042 return (0); 1043 } 1044 1045 /* ARGSUSED */ 1046 static int 1047 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1048 cred_t *cr) 1049 { 1050 long new_value; 1051 int i; 1052 udp_t *udp = Q_TO_UDP(q); 1053 udp_stack_t *us = udp->udp_us; 1054 1055 /* 1056 * Fail the request if the new value does not lie within the 1057 * port number limits. 1058 */ 1059 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1060 new_value <= 0 || new_value >= 65536) { 1061 return (EINVAL); 1062 } 1063 1064 /* Check if the value is already in the list */ 1065 for (i = 0; i < us->us_num_epriv_ports; i++) { 1066 if (new_value == us->us_epriv_ports[i]) { 1067 return (EEXIST); 1068 } 1069 } 1070 /* Find an empty slot */ 1071 for (i = 0; i < us->us_num_epriv_ports; i++) { 1072 if (us->us_epriv_ports[i] == 0) 1073 break; 1074 } 1075 if (i == us->us_num_epriv_ports) { 1076 return (EOVERFLOW); 1077 } 1078 1079 /* Set the new value */ 1080 us->us_epriv_ports[i] = (in_port_t)new_value; 1081 return (0); 1082 } 1083 1084 /* ARGSUSED */ 1085 static int 1086 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1087 cred_t *cr) 1088 { 1089 long new_value; 1090 int i; 1091 udp_t *udp = Q_TO_UDP(q); 1092 udp_stack_t *us = udp->udp_us; 1093 1094 /* 1095 * Fail the request if the new value does not lie within the 1096 * port number limits. 1097 */ 1098 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1099 new_value <= 0 || new_value >= 65536) { 1100 return (EINVAL); 1101 } 1102 1103 /* Check that the value is already in the list */ 1104 for (i = 0; i < us->us_num_epriv_ports; i++) { 1105 if (us->us_epriv_ports[i] == new_value) 1106 break; 1107 } 1108 if (i == us->us_num_epriv_ports) { 1109 return (ESRCH); 1110 } 1111 1112 /* Clear the value */ 1113 us->us_epriv_ports[i] = 0; 1114 return (0); 1115 } 1116 1117 /* At minimum we need 4 bytes of UDP header */ 1118 #define ICMP_MIN_UDP_HDR 4 1119 1120 /* 1121 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1122 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1123 * Assumes that IP has pulled up everything up to and including the ICMP header. 1124 */ 1125 static void 1126 udp_icmp_error(conn_t *connp, mblk_t *mp) 1127 { 1128 icmph_t *icmph; 1129 ipha_t *ipha; 1130 int iph_hdr_length; 1131 udpha_t *udpha; 1132 sin_t sin; 1133 sin6_t sin6; 1134 mblk_t *mp1; 1135 int error = 0; 1136 udp_t *udp = connp->conn_udp; 1137 1138 mp1 = NULL; 1139 ipha = (ipha_t *)mp->b_rptr; 1140 1141 ASSERT(OK_32PTR(mp->b_rptr)); 1142 1143 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1144 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1145 udp_icmp_error_ipv6(connp, mp); 1146 return; 1147 } 1148 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1149 1150 /* Skip past the outer IP and ICMP headers */ 1151 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1152 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1153 ipha = (ipha_t *)&icmph[1]; 1154 1155 /* Skip past the inner IP and find the ULP header */ 1156 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1157 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1158 1159 switch (icmph->icmph_type) { 1160 case ICMP_DEST_UNREACHABLE: 1161 switch (icmph->icmph_code) { 1162 case ICMP_FRAGMENTATION_NEEDED: 1163 /* 1164 * IP has already adjusted the path MTU. 1165 */ 1166 break; 1167 case ICMP_PORT_UNREACHABLE: 1168 case ICMP_PROTOCOL_UNREACHABLE: 1169 error = ECONNREFUSED; 1170 break; 1171 default: 1172 /* Transient errors */ 1173 break; 1174 } 1175 break; 1176 default: 1177 /* Transient errors */ 1178 break; 1179 } 1180 if (error == 0) { 1181 freemsg(mp); 1182 return; 1183 } 1184 1185 /* 1186 * Deliver T_UDERROR_IND when the application has asked for it. 1187 * The socket layer enables this automatically when connected. 1188 */ 1189 if (!udp->udp_dgram_errind) { 1190 freemsg(mp); 1191 return; 1192 } 1193 1194 1195 switch (udp->udp_family) { 1196 case AF_INET: 1197 sin = sin_null; 1198 sin.sin_family = AF_INET; 1199 sin.sin_addr.s_addr = ipha->ipha_dst; 1200 sin.sin_port = udpha->uha_dst_port; 1201 if (IPCL_IS_NONSTR(connp)) { 1202 rw_enter(&udp->udp_rwlock, RW_WRITER); 1203 if (udp->udp_state == TS_DATA_XFER) { 1204 if (sin.sin_port == udp->udp_dstport && 1205 sin.sin_addr.s_addr == 1206 V4_PART_OF_V6(udp->udp_v6dst)) { 1207 rw_exit(&udp->udp_rwlock); 1208 (*connp->conn_upcalls->su_set_error) 1209 (connp->conn_upper_handle, error); 1210 goto done; 1211 } 1212 } else { 1213 udp->udp_delayed_error = error; 1214 *((sin_t *)&udp->udp_delayed_addr) = sin; 1215 } 1216 rw_exit(&udp->udp_rwlock); 1217 } else { 1218 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1219 NULL, 0, error); 1220 } 1221 break; 1222 case AF_INET6: 1223 sin6 = sin6_null; 1224 sin6.sin6_family = AF_INET6; 1225 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1226 sin6.sin6_port = udpha->uha_dst_port; 1227 if (IPCL_IS_NONSTR(connp)) { 1228 rw_enter(&udp->udp_rwlock, RW_WRITER); 1229 if (udp->udp_state == TS_DATA_XFER) { 1230 if (sin6.sin6_port == udp->udp_dstport && 1231 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1232 &udp->udp_v6dst)) { 1233 rw_exit(&udp->udp_rwlock); 1234 (*connp->conn_upcalls->su_set_error) 1235 (connp->conn_upper_handle, error); 1236 goto done; 1237 } 1238 } else { 1239 udp->udp_delayed_error = error; 1240 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1241 } 1242 rw_exit(&udp->udp_rwlock); 1243 } else { 1244 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1245 NULL, 0, error); 1246 } 1247 break; 1248 } 1249 if (mp1 != NULL) 1250 putnext(connp->conn_rq, mp1); 1251 done: 1252 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1253 freemsg(mp); 1254 } 1255 1256 /* 1257 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1258 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1259 * Assumes that IP has pulled up all the extension headers as well as the 1260 * ICMPv6 header. 1261 */ 1262 static void 1263 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1264 { 1265 icmp6_t *icmp6; 1266 ip6_t *ip6h, *outer_ip6h; 1267 uint16_t iph_hdr_length; 1268 uint8_t *nexthdrp; 1269 udpha_t *udpha; 1270 sin6_t sin6; 1271 mblk_t *mp1; 1272 int error = 0; 1273 udp_t *udp = connp->conn_udp; 1274 udp_stack_t *us = udp->udp_us; 1275 1276 outer_ip6h = (ip6_t *)mp->b_rptr; 1277 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1278 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1279 else 1280 iph_hdr_length = IPV6_HDR_LEN; 1281 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1282 ip6h = (ip6_t *)&icmp6[1]; 1283 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1284 freemsg(mp); 1285 return; 1286 } 1287 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1288 1289 switch (icmp6->icmp6_type) { 1290 case ICMP6_DST_UNREACH: 1291 switch (icmp6->icmp6_code) { 1292 case ICMP6_DST_UNREACH_NOPORT: 1293 error = ECONNREFUSED; 1294 break; 1295 case ICMP6_DST_UNREACH_ADMIN: 1296 case ICMP6_DST_UNREACH_NOROUTE: 1297 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1298 case ICMP6_DST_UNREACH_ADDR: 1299 /* Transient errors */ 1300 break; 1301 default: 1302 break; 1303 } 1304 break; 1305 case ICMP6_PACKET_TOO_BIG: { 1306 struct T_unitdata_ind *tudi; 1307 struct T_opthdr *toh; 1308 size_t udi_size; 1309 mblk_t *newmp; 1310 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1311 sizeof (struct ip6_mtuinfo); 1312 sin6_t *sin6; 1313 struct ip6_mtuinfo *mtuinfo; 1314 1315 /* 1316 * If the application has requested to receive path mtu 1317 * information, send up an empty message containing an 1318 * IPV6_PATHMTU ancillary data item. 1319 */ 1320 if (!udp->udp_ipv6_recvpathmtu) 1321 break; 1322 1323 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1324 opt_length; 1325 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1326 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1327 break; 1328 } 1329 1330 /* 1331 * newmp->b_cont is left to NULL on purpose. This is an 1332 * empty message containing only ancillary data. 1333 */ 1334 newmp->b_datap->db_type = M_PROTO; 1335 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1336 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1337 tudi->PRIM_type = T_UNITDATA_IND; 1338 tudi->SRC_length = sizeof (sin6_t); 1339 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1340 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1341 tudi->OPT_length = opt_length; 1342 1343 sin6 = (sin6_t *)&tudi[1]; 1344 bzero(sin6, sizeof (sin6_t)); 1345 sin6->sin6_family = AF_INET6; 1346 sin6->sin6_addr = udp->udp_v6dst; 1347 1348 toh = (struct T_opthdr *)&sin6[1]; 1349 toh->level = IPPROTO_IPV6; 1350 toh->name = IPV6_PATHMTU; 1351 toh->len = opt_length; 1352 toh->status = 0; 1353 1354 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1355 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1356 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1357 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1358 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1359 /* 1360 * We've consumed everything we need from the original 1361 * message. Free it, then send our empty message. 1362 */ 1363 freemsg(mp); 1364 udp_ulp_recv(connp, newmp); 1365 1366 return; 1367 } 1368 case ICMP6_TIME_EXCEEDED: 1369 /* Transient errors */ 1370 break; 1371 case ICMP6_PARAM_PROB: 1372 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1373 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1374 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1375 (uchar_t *)nexthdrp) { 1376 error = ECONNREFUSED; 1377 break; 1378 } 1379 break; 1380 } 1381 if (error == 0) { 1382 freemsg(mp); 1383 return; 1384 } 1385 1386 /* 1387 * Deliver T_UDERROR_IND when the application has asked for it. 1388 * The socket layer enables this automatically when connected. 1389 */ 1390 if (!udp->udp_dgram_errind) { 1391 freemsg(mp); 1392 return; 1393 } 1394 1395 sin6 = sin6_null; 1396 sin6.sin6_family = AF_INET6; 1397 sin6.sin6_addr = ip6h->ip6_dst; 1398 sin6.sin6_port = udpha->uha_dst_port; 1399 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1400 1401 if (IPCL_IS_NONSTR(connp)) { 1402 rw_enter(&udp->udp_rwlock, RW_WRITER); 1403 if (udp->udp_state == TS_DATA_XFER) { 1404 if (sin6.sin6_port == udp->udp_dstport && 1405 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1406 &udp->udp_v6dst)) { 1407 rw_exit(&udp->udp_rwlock); 1408 (*connp->conn_upcalls->su_set_error) 1409 (connp->conn_upper_handle, error); 1410 goto done; 1411 } 1412 } else { 1413 udp->udp_delayed_error = error; 1414 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1415 } 1416 rw_exit(&udp->udp_rwlock); 1417 } else { 1418 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1419 NULL, 0, error); 1420 if (mp1 != NULL) 1421 putnext(connp->conn_rq, mp1); 1422 } 1423 done: 1424 ASSERT(!RW_ISWRITER(&udp->udp_rwlock)); 1425 freemsg(mp); 1426 } 1427 1428 /* 1429 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1430 * The local address is filled in if endpoint is bound. The remote address 1431 * is filled in if remote address has been precified ("connected endpoint") 1432 * (The concept of connected CLTS sockets is alien to published TPI 1433 * but we support it anyway). 1434 */ 1435 static void 1436 udp_addr_req(queue_t *q, mblk_t *mp) 1437 { 1438 sin_t *sin; 1439 sin6_t *sin6; 1440 mblk_t *ackmp; 1441 struct T_addr_ack *taa; 1442 udp_t *udp = Q_TO_UDP(q); 1443 1444 /* Make it large enough for worst case */ 1445 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1446 2 * sizeof (sin6_t), 1); 1447 if (ackmp == NULL) { 1448 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1449 return; 1450 } 1451 taa = (struct T_addr_ack *)ackmp->b_rptr; 1452 1453 bzero(taa, sizeof (struct T_addr_ack)); 1454 ackmp->b_wptr = (uchar_t *)&taa[1]; 1455 1456 taa->PRIM_type = T_ADDR_ACK; 1457 ackmp->b_datap->db_type = M_PCPROTO; 1458 rw_enter(&udp->udp_rwlock, RW_READER); 1459 /* 1460 * Note: Following code assumes 32 bit alignment of basic 1461 * data structures like sin_t and struct T_addr_ack. 1462 */ 1463 if (udp->udp_state != TS_UNBND) { 1464 /* 1465 * Fill in local address first 1466 */ 1467 taa->LOCADDR_offset = sizeof (*taa); 1468 if (udp->udp_family == AF_INET) { 1469 taa->LOCADDR_length = sizeof (sin_t); 1470 sin = (sin_t *)&taa[1]; 1471 /* Fill zeroes and then initialize non-zero fields */ 1472 *sin = sin_null; 1473 sin->sin_family = AF_INET; 1474 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1475 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1476 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1477 sin->sin_addr.s_addr); 1478 } else { 1479 /* 1480 * INADDR_ANY 1481 * udp_v6src is not set, we might be bound to 1482 * broadcast/multicast. Use udp_bound_v6src as 1483 * local address instead (that could 1484 * also still be INADDR_ANY) 1485 */ 1486 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1487 sin->sin_addr.s_addr); 1488 } 1489 sin->sin_port = udp->udp_port; 1490 ackmp->b_wptr = (uchar_t *)&sin[1]; 1491 if (udp->udp_state == TS_DATA_XFER) { 1492 /* 1493 * connected, fill remote address too 1494 */ 1495 taa->REMADDR_length = sizeof (sin_t); 1496 /* assumed 32-bit alignment */ 1497 taa->REMADDR_offset = taa->LOCADDR_offset + 1498 taa->LOCADDR_length; 1499 1500 sin = (sin_t *)(ackmp->b_rptr + 1501 taa->REMADDR_offset); 1502 /* initialize */ 1503 *sin = sin_null; 1504 sin->sin_family = AF_INET; 1505 sin->sin_addr.s_addr = 1506 V4_PART_OF_V6(udp->udp_v6dst); 1507 sin->sin_port = udp->udp_dstport; 1508 ackmp->b_wptr = (uchar_t *)&sin[1]; 1509 } 1510 } else { 1511 taa->LOCADDR_length = sizeof (sin6_t); 1512 sin6 = (sin6_t *)&taa[1]; 1513 /* Fill zeroes and then initialize non-zero fields */ 1514 *sin6 = sin6_null; 1515 sin6->sin6_family = AF_INET6; 1516 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1517 sin6->sin6_addr = udp->udp_v6src; 1518 } else { 1519 /* 1520 * UNSPECIFIED 1521 * udp_v6src is not set, we might be bound to 1522 * broadcast/multicast. Use udp_bound_v6src as 1523 * local address instead (that could 1524 * also still be UNSPECIFIED) 1525 */ 1526 sin6->sin6_addr = 1527 udp->udp_bound_v6src; 1528 } 1529 sin6->sin6_port = udp->udp_port; 1530 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1531 if (udp->udp_state == TS_DATA_XFER) { 1532 /* 1533 * connected, fill remote address too 1534 */ 1535 taa->REMADDR_length = sizeof (sin6_t); 1536 /* assumed 32-bit alignment */ 1537 taa->REMADDR_offset = taa->LOCADDR_offset + 1538 taa->LOCADDR_length; 1539 1540 sin6 = (sin6_t *)(ackmp->b_rptr + 1541 taa->REMADDR_offset); 1542 /* initialize */ 1543 *sin6 = sin6_null; 1544 sin6->sin6_family = AF_INET6; 1545 sin6->sin6_addr = udp->udp_v6dst; 1546 sin6->sin6_port = udp->udp_dstport; 1547 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1548 } 1549 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1550 } 1551 } 1552 rw_exit(&udp->udp_rwlock); 1553 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1554 qreply(q, ackmp); 1555 } 1556 1557 static void 1558 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1559 { 1560 if (udp->udp_family == AF_INET) { 1561 *tap = udp_g_t_info_ack_ipv4; 1562 } else { 1563 *tap = udp_g_t_info_ack_ipv6; 1564 } 1565 tap->CURRENT_state = udp->udp_state; 1566 tap->OPT_size = udp_max_optsize; 1567 } 1568 1569 static void 1570 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1571 t_uscalar_t cap_bits1) 1572 { 1573 tcap->CAP_bits1 = 0; 1574 1575 if (cap_bits1 & TC1_INFO) { 1576 udp_copy_info(&tcap->INFO_ack, udp); 1577 tcap->CAP_bits1 |= TC1_INFO; 1578 } 1579 } 1580 1581 /* 1582 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1583 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1584 * udp_g_t_info_ack. The current state of the stream is copied from 1585 * udp_state. 1586 */ 1587 static void 1588 udp_capability_req(queue_t *q, mblk_t *mp) 1589 { 1590 t_uscalar_t cap_bits1; 1591 struct T_capability_ack *tcap; 1592 udp_t *udp = Q_TO_UDP(q); 1593 1594 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1595 1596 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1597 mp->b_datap->db_type, T_CAPABILITY_ACK); 1598 if (!mp) 1599 return; 1600 1601 tcap = (struct T_capability_ack *)mp->b_rptr; 1602 udp_do_capability_ack(udp, tcap, cap_bits1); 1603 1604 qreply(q, mp); 1605 } 1606 1607 /* 1608 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1609 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1610 * The current state of the stream is copied from udp_state. 1611 */ 1612 static void 1613 udp_info_req(queue_t *q, mblk_t *mp) 1614 { 1615 udp_t *udp = Q_TO_UDP(q); 1616 1617 /* Create a T_INFO_ACK message. */ 1618 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1619 T_INFO_ACK); 1620 if (!mp) 1621 return; 1622 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1623 qreply(q, mp); 1624 } 1625 1626 /* 1627 * IP recognizes seven kinds of bind requests: 1628 * 1629 * - A zero-length address binds only to the protocol number. 1630 * 1631 * - A 4-byte address is treated as a request to 1632 * validate that the address is a valid local IPv4 1633 * address, appropriate for an application to bind to. 1634 * IP does the verification, but does not make any note 1635 * of the address at this time. 1636 * 1637 * - A 16-byte address contains is treated as a request 1638 * to validate a local IPv6 address, as the 4-byte 1639 * address case above. 1640 * 1641 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1642 * use it for the inbound fanout of packets. 1643 * 1644 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1645 * use it for the inbound fanout of packets. 1646 * 1647 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1648 * information consisting of local and remote addresses 1649 * and ports. In this case, the addresses are both 1650 * validated as appropriate for this operation, and, if 1651 * so, the information is retained for use in the 1652 * inbound fanout. 1653 * 1654 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1655 * fanout information, like the 12-byte case above. 1656 * 1657 * IP will also fill in the IRE request mblk with information 1658 * regarding our peer. In all cases, we notify IP of our protocol 1659 * type by appending a single protocol byte to the bind request. 1660 */ 1661 static mblk_t * 1662 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1663 { 1664 char *cp; 1665 mblk_t *mp; 1666 struct T_bind_req *tbr; 1667 ipa_conn_t *ac; 1668 ipa6_conn_t *ac6; 1669 sin_t *sin; 1670 sin6_t *sin6; 1671 1672 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1673 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1674 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1675 if (!mp) 1676 return (mp); 1677 mp->b_datap->db_type = M_PROTO; 1678 tbr = (struct T_bind_req *)mp->b_rptr; 1679 tbr->PRIM_type = bind_prim; 1680 tbr->ADDR_offset = sizeof (*tbr); 1681 tbr->CONIND_number = 0; 1682 tbr->ADDR_length = addr_length; 1683 cp = (char *)&tbr[1]; 1684 switch (addr_length) { 1685 case sizeof (ipa_conn_t): 1686 ASSERT(udp->udp_family == AF_INET); 1687 /* Append a request for an IRE */ 1688 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1689 if (!mp->b_cont) { 1690 freemsg(mp); 1691 return (NULL); 1692 } 1693 mp->b_cont->b_wptr += sizeof (ire_t); 1694 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1695 1696 /* cp known to be 32 bit aligned */ 1697 ac = (ipa_conn_t *)cp; 1698 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1699 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1700 ac->ac_fport = udp->udp_dstport; 1701 ac->ac_lport = udp->udp_port; 1702 break; 1703 1704 case sizeof (ipa6_conn_t): 1705 ASSERT(udp->udp_family == AF_INET6); 1706 /* Append a request for an IRE */ 1707 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1708 if (!mp->b_cont) { 1709 freemsg(mp); 1710 return (NULL); 1711 } 1712 mp->b_cont->b_wptr += sizeof (ire_t); 1713 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1714 1715 /* cp known to be 32 bit aligned */ 1716 ac6 = (ipa6_conn_t *)cp; 1717 ac6->ac6_laddr = udp->udp_v6src; 1718 ac6->ac6_faddr = udp->udp_v6dst; 1719 ac6->ac6_fport = udp->udp_dstport; 1720 ac6->ac6_lport = udp->udp_port; 1721 break; 1722 1723 case sizeof (sin_t): 1724 ASSERT(udp->udp_family == AF_INET); 1725 /* Append a request for an IRE */ 1726 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1727 if (!mp->b_cont) { 1728 freemsg(mp); 1729 return (NULL); 1730 } 1731 mp->b_cont->b_wptr += sizeof (ire_t); 1732 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1733 1734 sin = (sin_t *)cp; 1735 *sin = sin_null; 1736 sin->sin_family = AF_INET; 1737 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1738 sin->sin_port = udp->udp_port; 1739 break; 1740 1741 case sizeof (sin6_t): 1742 ASSERT(udp->udp_family == AF_INET6); 1743 /* Append a request for an IRE */ 1744 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1745 if (!mp->b_cont) { 1746 freemsg(mp); 1747 return (NULL); 1748 } 1749 mp->b_cont->b_wptr += sizeof (ire_t); 1750 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1751 1752 sin6 = (sin6_t *)cp; 1753 *sin6 = sin6_null; 1754 sin6->sin6_family = AF_INET6; 1755 sin6->sin6_addr = udp->udp_bound_v6src; 1756 sin6->sin6_port = udp->udp_port; 1757 break; 1758 } 1759 /* Add protocol number to end */ 1760 cp[addr_length] = (char)IPPROTO_UDP; 1761 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1762 return (mp); 1763 } 1764 1765 /* For /dev/udp aka AF_INET open */ 1766 static int 1767 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1768 { 1769 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1770 } 1771 1772 /* For /dev/udp6 aka AF_INET6 open */ 1773 static int 1774 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1775 { 1776 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1777 } 1778 1779 /* 1780 * This is the open routine for udp. It allocates a udp_t structure for 1781 * the stream and, on the first open of the module, creates an ND table. 1782 */ 1783 /*ARGSUSED2*/ 1784 static int 1785 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1786 boolean_t isv6) 1787 { 1788 int error; 1789 udp_t *udp; 1790 conn_t *connp; 1791 dev_t conn_dev; 1792 udp_stack_t *us; 1793 vmem_t *minor_arena; 1794 1795 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1796 1797 /* If the stream is already open, return immediately. */ 1798 if (q->q_ptr != NULL) 1799 return (0); 1800 1801 if (sflag == MODOPEN) 1802 return (EINVAL); 1803 1804 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1805 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1806 minor_arena = ip_minor_arena_la; 1807 } else { 1808 /* 1809 * Either minor numbers in the large arena were exhausted 1810 * or a non socket application is doing the open. 1811 * Try to allocate from the small arena. 1812 */ 1813 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1814 return (EBUSY); 1815 1816 minor_arena = ip_minor_arena_sa; 1817 } 1818 1819 if (flag & SO_FALLBACK) { 1820 /* 1821 * Non streams socket needs a stream to fallback to 1822 */ 1823 RD(q)->q_ptr = (void *)conn_dev; 1824 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1825 WR(q)->q_ptr = (void *)minor_arena; 1826 qprocson(q); 1827 return (0); 1828 } 1829 1830 connp = udp_do_open(credp, isv6, KM_SLEEP); 1831 if (connp == NULL) { 1832 inet_minor_free(minor_arena, conn_dev); 1833 return (ENOMEM); 1834 } 1835 udp = connp->conn_udp; 1836 us = udp->udp_us; 1837 1838 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1839 connp->conn_dev = conn_dev; 1840 connp->conn_minor_arena = minor_arena; 1841 1842 /* 1843 * Initialize the udp_t structure for this stream. 1844 */ 1845 q->q_ptr = connp; 1846 WR(q)->q_ptr = connp; 1847 connp->conn_rq = q; 1848 connp->conn_wq = WR(q); 1849 1850 rw_enter(&udp->udp_rwlock, RW_WRITER); 1851 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1852 ASSERT(connp->conn_udp == udp); 1853 ASSERT(udp->udp_connp == connp); 1854 1855 if (flag & SO_SOCKSTR) { 1856 connp->conn_flags |= IPCL_SOCKET; 1857 udp->udp_issocket = B_TRUE; 1858 udp->udp_direct_sockfs = B_TRUE; 1859 } 1860 1861 q->q_hiwat = us->us_recv_hiwat; 1862 WR(q)->q_hiwat = us->us_xmit_hiwat; 1863 WR(q)->q_lowat = us->us_xmit_lowat; 1864 1865 qprocson(q); 1866 1867 if (udp->udp_family == AF_INET6) { 1868 /* Build initial header template for transmit */ 1869 if ((error = udp_build_hdrs(udp)) != 0) { 1870 rw_exit(&udp->udp_rwlock); 1871 qprocsoff(q); 1872 inet_minor_free(minor_arena, conn_dev); 1873 ipcl_conn_destroy(connp); 1874 return (error); 1875 } 1876 } 1877 rw_exit(&udp->udp_rwlock); 1878 1879 /* Set the Stream head write offset and high watermark. */ 1880 (void) proto_set_tx_wroff(q, connp, 1881 udp->udp_max_hdr_len + us->us_wroff_extra); 1882 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1883 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1884 1885 mutex_enter(&connp->conn_lock); 1886 connp->conn_state_flags &= ~CONN_INCIPIENT; 1887 mutex_exit(&connp->conn_lock); 1888 return (0); 1889 } 1890 1891 /* 1892 * Which UDP options OK to set through T_UNITDATA_REQ... 1893 */ 1894 /* ARGSUSED */ 1895 static boolean_t 1896 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1897 { 1898 return (B_TRUE); 1899 } 1900 1901 /* 1902 * This routine gets default values of certain options whose default 1903 * values are maintained by protcol specific code 1904 */ 1905 /* ARGSUSED */ 1906 int 1907 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1908 { 1909 udp_t *udp = Q_TO_UDP(q); 1910 udp_stack_t *us = udp->udp_us; 1911 int *i1 = (int *)ptr; 1912 1913 switch (level) { 1914 case IPPROTO_IP: 1915 switch (name) { 1916 case IP_MULTICAST_TTL: 1917 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1918 return (sizeof (uchar_t)); 1919 case IP_MULTICAST_LOOP: 1920 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1921 return (sizeof (uchar_t)); 1922 } 1923 break; 1924 case IPPROTO_IPV6: 1925 switch (name) { 1926 case IPV6_MULTICAST_HOPS: 1927 *i1 = IP_DEFAULT_MULTICAST_TTL; 1928 return (sizeof (int)); 1929 case IPV6_MULTICAST_LOOP: 1930 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1931 return (sizeof (int)); 1932 case IPV6_UNICAST_HOPS: 1933 *i1 = us->us_ipv6_hoplimit; 1934 return (sizeof (int)); 1935 } 1936 break; 1937 } 1938 return (-1); 1939 } 1940 1941 /* 1942 * This routine retrieves the current status of socket options. 1943 * It returns the size of the option retrieved. 1944 */ 1945 static int 1946 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1947 { 1948 udp_t *udp = connp->conn_udp; 1949 udp_stack_t *us = udp->udp_us; 1950 int *i1 = (int *)ptr; 1951 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1952 int len; 1953 1954 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1955 switch (level) { 1956 case SOL_SOCKET: 1957 switch (name) { 1958 case SO_DEBUG: 1959 *i1 = udp->udp_debug; 1960 break; /* goto sizeof (int) option return */ 1961 case SO_REUSEADDR: 1962 *i1 = udp->udp_reuseaddr; 1963 break; /* goto sizeof (int) option return */ 1964 case SO_TYPE: 1965 *i1 = SOCK_DGRAM; 1966 break; /* goto sizeof (int) option return */ 1967 1968 /* 1969 * The following three items are available here, 1970 * but are only meaningful to IP. 1971 */ 1972 case SO_DONTROUTE: 1973 *i1 = udp->udp_dontroute; 1974 break; /* goto sizeof (int) option return */ 1975 case SO_USELOOPBACK: 1976 *i1 = udp->udp_useloopback; 1977 break; /* goto sizeof (int) option return */ 1978 case SO_BROADCAST: 1979 *i1 = udp->udp_broadcast; 1980 break; /* goto sizeof (int) option return */ 1981 1982 case SO_SNDBUF: 1983 *i1 = udp->udp_xmit_hiwat; 1984 break; /* goto sizeof (int) option return */ 1985 case SO_RCVBUF: 1986 *i1 = udp->udp_rcv_disply_hiwat; 1987 break; /* goto sizeof (int) option return */ 1988 case SO_DGRAM_ERRIND: 1989 *i1 = udp->udp_dgram_errind; 1990 break; /* goto sizeof (int) option return */ 1991 case SO_RECVUCRED: 1992 *i1 = udp->udp_recvucred; 1993 break; /* goto sizeof (int) option return */ 1994 case SO_TIMESTAMP: 1995 *i1 = udp->udp_timestamp; 1996 break; /* goto sizeof (int) option return */ 1997 case SO_ANON_MLP: 1998 *i1 = connp->conn_anon_mlp; 1999 break; /* goto sizeof (int) option return */ 2000 case SO_MAC_EXEMPT: 2001 *i1 = connp->conn_mac_exempt; 2002 break; /* goto sizeof (int) option return */ 2003 case SO_ALLZONES: 2004 *i1 = connp->conn_allzones; 2005 break; /* goto sizeof (int) option return */ 2006 case SO_EXCLBIND: 2007 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2008 break; 2009 case SO_PROTOTYPE: 2010 *i1 = IPPROTO_UDP; 2011 break; 2012 case SO_DOMAIN: 2013 *i1 = udp->udp_family; 2014 break; 2015 default: 2016 return (-1); 2017 } 2018 break; 2019 case IPPROTO_IP: 2020 if (udp->udp_family != AF_INET) 2021 return (-1); 2022 switch (name) { 2023 case IP_OPTIONS: 2024 case T_IP_OPTIONS: 2025 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2026 if (len > 0) { 2027 bcopy(udp->udp_ip_rcv_options + 2028 udp->udp_label_len, ptr, len); 2029 } 2030 return (len); 2031 case IP_TOS: 2032 case T_IP_TOS: 2033 *i1 = (int)udp->udp_type_of_service; 2034 break; /* goto sizeof (int) option return */ 2035 case IP_TTL: 2036 *i1 = (int)udp->udp_ttl; 2037 break; /* goto sizeof (int) option return */ 2038 case IP_DHCPINIT_IF: 2039 return (-EINVAL); 2040 case IP_NEXTHOP: 2041 case IP_RECVPKTINFO: 2042 /* 2043 * This also handles IP_PKTINFO. 2044 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2045 * Differentiation is based on the size of the argument 2046 * passed in. 2047 * This option is handled in IP which will return an 2048 * error for IP_PKTINFO as it's not supported as a 2049 * sticky option. 2050 */ 2051 return (-EINVAL); 2052 case IP_MULTICAST_IF: 2053 /* 0 address if not set */ 2054 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2055 return (sizeof (ipaddr_t)); 2056 case IP_MULTICAST_TTL: 2057 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2058 return (sizeof (uchar_t)); 2059 case IP_MULTICAST_LOOP: 2060 *ptr = connp->conn_multicast_loop; 2061 return (sizeof (uint8_t)); 2062 case IP_RECVOPTS: 2063 *i1 = udp->udp_recvopts; 2064 break; /* goto sizeof (int) option return */ 2065 case IP_RECVDSTADDR: 2066 *i1 = udp->udp_recvdstaddr; 2067 break; /* goto sizeof (int) option return */ 2068 case IP_RECVIF: 2069 *i1 = udp->udp_recvif; 2070 break; /* goto sizeof (int) option return */ 2071 case IP_RECVSLLA: 2072 *i1 = udp->udp_recvslla; 2073 break; /* goto sizeof (int) option return */ 2074 case IP_RECVTTL: 2075 *i1 = udp->udp_recvttl; 2076 break; /* goto sizeof (int) option return */ 2077 case IP_ADD_MEMBERSHIP: 2078 case IP_DROP_MEMBERSHIP: 2079 case IP_BLOCK_SOURCE: 2080 case IP_UNBLOCK_SOURCE: 2081 case IP_ADD_SOURCE_MEMBERSHIP: 2082 case IP_DROP_SOURCE_MEMBERSHIP: 2083 case MCAST_JOIN_GROUP: 2084 case MCAST_LEAVE_GROUP: 2085 case MCAST_BLOCK_SOURCE: 2086 case MCAST_UNBLOCK_SOURCE: 2087 case MCAST_JOIN_SOURCE_GROUP: 2088 case MCAST_LEAVE_SOURCE_GROUP: 2089 /* cannot "get" the value for these */ 2090 return (-1); 2091 case IP_BOUND_IF: 2092 /* Zero if not set */ 2093 *i1 = udp->udp_bound_if; 2094 break; /* goto sizeof (int) option return */ 2095 case IP_UNSPEC_SRC: 2096 *i1 = udp->udp_unspec_source; 2097 break; /* goto sizeof (int) option return */ 2098 case IP_BROADCAST_TTL: 2099 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2100 return (sizeof (uchar_t)); 2101 default: 2102 return (-1); 2103 } 2104 break; 2105 case IPPROTO_IPV6: 2106 if (udp->udp_family != AF_INET6) 2107 return (-1); 2108 switch (name) { 2109 case IPV6_UNICAST_HOPS: 2110 *i1 = (unsigned int)udp->udp_ttl; 2111 break; /* goto sizeof (int) option return */ 2112 case IPV6_MULTICAST_IF: 2113 /* 0 index if not set */ 2114 *i1 = udp->udp_multicast_if_index; 2115 break; /* goto sizeof (int) option return */ 2116 case IPV6_MULTICAST_HOPS: 2117 *i1 = udp->udp_multicast_ttl; 2118 break; /* goto sizeof (int) option return */ 2119 case IPV6_MULTICAST_LOOP: 2120 *i1 = connp->conn_multicast_loop; 2121 break; /* goto sizeof (int) option return */ 2122 case IPV6_JOIN_GROUP: 2123 case IPV6_LEAVE_GROUP: 2124 case MCAST_JOIN_GROUP: 2125 case MCAST_LEAVE_GROUP: 2126 case MCAST_BLOCK_SOURCE: 2127 case MCAST_UNBLOCK_SOURCE: 2128 case MCAST_JOIN_SOURCE_GROUP: 2129 case MCAST_LEAVE_SOURCE_GROUP: 2130 /* cannot "get" the value for these */ 2131 return (-1); 2132 case IPV6_BOUND_IF: 2133 /* Zero if not set */ 2134 *i1 = udp->udp_bound_if; 2135 break; /* goto sizeof (int) option return */ 2136 case IPV6_UNSPEC_SRC: 2137 *i1 = udp->udp_unspec_source; 2138 break; /* goto sizeof (int) option return */ 2139 case IPV6_RECVPKTINFO: 2140 *i1 = udp->udp_ip_recvpktinfo; 2141 break; /* goto sizeof (int) option return */ 2142 case IPV6_RECVTCLASS: 2143 *i1 = udp->udp_ipv6_recvtclass; 2144 break; /* goto sizeof (int) option return */ 2145 case IPV6_RECVPATHMTU: 2146 *i1 = udp->udp_ipv6_recvpathmtu; 2147 break; /* goto sizeof (int) option return */ 2148 case IPV6_RECVHOPLIMIT: 2149 *i1 = udp->udp_ipv6_recvhoplimit; 2150 break; /* goto sizeof (int) option return */ 2151 case IPV6_RECVHOPOPTS: 2152 *i1 = udp->udp_ipv6_recvhopopts; 2153 break; /* goto sizeof (int) option return */ 2154 case IPV6_RECVDSTOPTS: 2155 *i1 = udp->udp_ipv6_recvdstopts; 2156 break; /* goto sizeof (int) option return */ 2157 case _OLD_IPV6_RECVDSTOPTS: 2158 *i1 = udp->udp_old_ipv6_recvdstopts; 2159 break; /* goto sizeof (int) option return */ 2160 case IPV6_RECVRTHDRDSTOPTS: 2161 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2162 break; /* goto sizeof (int) option return */ 2163 case IPV6_RECVRTHDR: 2164 *i1 = udp->udp_ipv6_recvrthdr; 2165 break; /* goto sizeof (int) option return */ 2166 case IPV6_PKTINFO: { 2167 /* XXX assumes that caller has room for max size! */ 2168 struct in6_pktinfo *pkti; 2169 2170 pkti = (struct in6_pktinfo *)ptr; 2171 if (ipp->ipp_fields & IPPF_IFINDEX) 2172 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2173 else 2174 pkti->ipi6_ifindex = 0; 2175 if (ipp->ipp_fields & IPPF_ADDR) 2176 pkti->ipi6_addr = ipp->ipp_addr; 2177 else 2178 pkti->ipi6_addr = ipv6_all_zeros; 2179 return (sizeof (struct in6_pktinfo)); 2180 } 2181 case IPV6_TCLASS: 2182 if (ipp->ipp_fields & IPPF_TCLASS) 2183 *i1 = ipp->ipp_tclass; 2184 else 2185 *i1 = IPV6_FLOW_TCLASS( 2186 IPV6_DEFAULT_VERS_AND_FLOW); 2187 break; /* goto sizeof (int) option return */ 2188 case IPV6_NEXTHOP: { 2189 sin6_t *sin6 = (sin6_t *)ptr; 2190 2191 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2192 return (0); 2193 *sin6 = sin6_null; 2194 sin6->sin6_family = AF_INET6; 2195 sin6->sin6_addr = ipp->ipp_nexthop; 2196 return (sizeof (sin6_t)); 2197 } 2198 case IPV6_HOPOPTS: 2199 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2200 return (0); 2201 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2202 return (0); 2203 /* 2204 * The cipso/label option is added by kernel. 2205 * User is not usually aware of this option. 2206 * We copy out the hbh opt after the label option. 2207 */ 2208 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2209 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2210 if (udp->udp_label_len_v6 > 0) { 2211 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2212 ptr[1] = (ipp->ipp_hopoptslen - 2213 udp->udp_label_len_v6 + 7) / 8 - 1; 2214 } 2215 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2216 case IPV6_RTHDRDSTOPTS: 2217 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2218 return (0); 2219 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2220 return (ipp->ipp_rtdstoptslen); 2221 case IPV6_RTHDR: 2222 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2223 return (0); 2224 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2225 return (ipp->ipp_rthdrlen); 2226 case IPV6_DSTOPTS: 2227 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2228 return (0); 2229 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2230 return (ipp->ipp_dstoptslen); 2231 case IPV6_PATHMTU: 2232 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2233 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2234 us->us_netstack)); 2235 default: 2236 return (-1); 2237 } 2238 break; 2239 case IPPROTO_UDP: 2240 switch (name) { 2241 case UDP_ANONPRIVBIND: 2242 *i1 = udp->udp_anon_priv_bind; 2243 break; 2244 case UDP_EXCLBIND: 2245 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2246 break; 2247 case UDP_RCVHDR: 2248 *i1 = udp->udp_rcvhdr ? 1 : 0; 2249 break; 2250 case UDP_NAT_T_ENDPOINT: 2251 *i1 = udp->udp_nat_t_endpoint; 2252 break; 2253 default: 2254 return (-1); 2255 } 2256 break; 2257 default: 2258 return (-1); 2259 } 2260 return (sizeof (int)); 2261 } 2262 2263 int 2264 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2265 { 2266 udp_t *udp; 2267 int err; 2268 2269 udp = Q_TO_UDP(q); 2270 2271 rw_enter(&udp->udp_rwlock, RW_READER); 2272 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2273 rw_exit(&udp->udp_rwlock); 2274 return (err); 2275 } 2276 2277 /* 2278 * This routine sets socket options. 2279 */ 2280 /* ARGSUSED */ 2281 static int 2282 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2283 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2284 void *thisdg_attrs, boolean_t checkonly) 2285 { 2286 udpattrs_t *attrs = thisdg_attrs; 2287 int *i1 = (int *)invalp; 2288 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2289 udp_t *udp = connp->conn_udp; 2290 udp_stack_t *us = udp->udp_us; 2291 int error; 2292 uint_t newlen; 2293 size_t sth_wroff; 2294 2295 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2296 /* 2297 * For fixed length options, no sanity check 2298 * of passed in length is done. It is assumed *_optcom_req() 2299 * routines do the right thing. 2300 */ 2301 switch (level) { 2302 case SOL_SOCKET: 2303 switch (name) { 2304 case SO_REUSEADDR: 2305 if (!checkonly) { 2306 udp->udp_reuseaddr = onoff; 2307 PASS_OPT_TO_IP(connp); 2308 } 2309 break; 2310 case SO_DEBUG: 2311 if (!checkonly) 2312 udp->udp_debug = onoff; 2313 break; 2314 /* 2315 * The following three items are available here, 2316 * but are only meaningful to IP. 2317 */ 2318 case SO_DONTROUTE: 2319 if (!checkonly) { 2320 udp->udp_dontroute = onoff; 2321 PASS_OPT_TO_IP(connp); 2322 } 2323 break; 2324 case SO_USELOOPBACK: 2325 if (!checkonly) { 2326 udp->udp_useloopback = onoff; 2327 PASS_OPT_TO_IP(connp); 2328 } 2329 break; 2330 case SO_BROADCAST: 2331 if (!checkonly) { 2332 udp->udp_broadcast = onoff; 2333 PASS_OPT_TO_IP(connp); 2334 } 2335 break; 2336 2337 case SO_SNDBUF: 2338 if (*i1 > us->us_max_buf) { 2339 *outlenp = 0; 2340 return (ENOBUFS); 2341 } 2342 if (!checkonly) { 2343 udp->udp_xmit_hiwat = *i1; 2344 connp->conn_wq->q_hiwat = *i1; 2345 } 2346 break; 2347 case SO_RCVBUF: 2348 if (*i1 > us->us_max_buf) { 2349 *outlenp = 0; 2350 return (ENOBUFS); 2351 } 2352 if (!checkonly) { 2353 int size; 2354 2355 udp->udp_rcv_disply_hiwat = *i1; 2356 size = udp_set_rcv_hiwat(udp, *i1); 2357 rw_exit(&udp->udp_rwlock); 2358 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2359 size); 2360 rw_enter(&udp->udp_rwlock, RW_WRITER); 2361 } 2362 break; 2363 case SO_DGRAM_ERRIND: 2364 if (!checkonly) 2365 udp->udp_dgram_errind = onoff; 2366 break; 2367 case SO_RECVUCRED: 2368 if (!checkonly) 2369 udp->udp_recvucred = onoff; 2370 break; 2371 case SO_ALLZONES: 2372 /* 2373 * "soft" error (negative) 2374 * option not handled at this level 2375 * Do not modify *outlenp. 2376 */ 2377 return (-EINVAL); 2378 case SO_TIMESTAMP: 2379 if (!checkonly) 2380 udp->udp_timestamp = onoff; 2381 break; 2382 case SO_ANON_MLP: 2383 if (!checkonly) { 2384 connp->conn_anon_mlp = onoff; 2385 PASS_OPT_TO_IP(connp); 2386 } 2387 break; 2388 case SO_MAC_EXEMPT: 2389 if (secpolicy_net_mac_aware(cr) != 0 || 2390 udp->udp_state != TS_UNBND) 2391 return (EACCES); 2392 if (!checkonly) { 2393 connp->conn_mac_exempt = onoff; 2394 PASS_OPT_TO_IP(connp); 2395 } 2396 break; 2397 case SCM_UCRED: { 2398 struct ucred_s *ucr; 2399 cred_t *cr, *newcr; 2400 ts_label_t *tsl; 2401 2402 /* 2403 * Only sockets that have proper privileges and are 2404 * bound to MLPs will have any other value here, so 2405 * this implicitly tests for privilege to set label. 2406 */ 2407 if (connp->conn_mlp_type == mlptSingle) 2408 break; 2409 ucr = (struct ucred_s *)invalp; 2410 if (inlen != ucredsize || 2411 ucr->uc_labeloff < sizeof (*ucr) || 2412 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2413 return (EINVAL); 2414 if (!checkonly) { 2415 mblk_t *mb; 2416 pid_t cpid; 2417 2418 if (attrs == NULL || 2419 (mb = attrs->udpattr_mb) == NULL) 2420 return (EINVAL); 2421 if ((cr = msg_getcred(mb, &cpid)) == NULL) 2422 cr = udp->udp_connp->conn_cred; 2423 ASSERT(cr != NULL); 2424 if ((tsl = crgetlabel(cr)) == NULL) 2425 return (EINVAL); 2426 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2427 tsl->tsl_doi, KM_NOSLEEP); 2428 if (newcr == NULL) 2429 return (ENOSR); 2430 mblk_setcred(mb, newcr, cpid); 2431 attrs->udpattr_credset = B_TRUE; 2432 crfree(newcr); 2433 } 2434 break; 2435 } 2436 case SO_EXCLBIND: 2437 if (!checkonly) 2438 udp->udp_exclbind = onoff; 2439 break; 2440 case SO_RCVTIMEO: 2441 case SO_SNDTIMEO: 2442 /* 2443 * Pass these two options in order for third part 2444 * protocol usage. Here just return directly. 2445 */ 2446 return (0); 2447 default: 2448 *outlenp = 0; 2449 return (EINVAL); 2450 } 2451 break; 2452 case IPPROTO_IP: 2453 if (udp->udp_family != AF_INET) { 2454 *outlenp = 0; 2455 return (ENOPROTOOPT); 2456 } 2457 switch (name) { 2458 case IP_OPTIONS: 2459 case T_IP_OPTIONS: 2460 /* Save options for use by IP. */ 2461 newlen = inlen + udp->udp_label_len; 2462 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2463 *outlenp = 0; 2464 return (EINVAL); 2465 } 2466 if (checkonly) 2467 break; 2468 2469 /* 2470 * Update the stored options taking into account 2471 * any CIPSO option which we should not overwrite. 2472 */ 2473 if (!tsol_option_set(&udp->udp_ip_snd_options, 2474 &udp->udp_ip_snd_options_len, 2475 udp->udp_label_len, invalp, inlen)) { 2476 *outlenp = 0; 2477 return (ENOMEM); 2478 } 2479 2480 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2481 UDPH_SIZE + udp->udp_ip_snd_options_len; 2482 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2483 rw_exit(&udp->udp_rwlock); 2484 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2485 sth_wroff); 2486 rw_enter(&udp->udp_rwlock, RW_WRITER); 2487 break; 2488 2489 case IP_TTL: 2490 if (!checkonly) { 2491 udp->udp_ttl = (uchar_t)*i1; 2492 } 2493 break; 2494 case IP_TOS: 2495 case T_IP_TOS: 2496 if (!checkonly) { 2497 udp->udp_type_of_service = (uchar_t)*i1; 2498 } 2499 break; 2500 case IP_MULTICAST_IF: { 2501 /* 2502 * TODO should check OPTMGMT reply and undo this if 2503 * there is an error. 2504 */ 2505 struct in_addr *inap = (struct in_addr *)invalp; 2506 if (!checkonly) { 2507 udp->udp_multicast_if_addr = 2508 inap->s_addr; 2509 PASS_OPT_TO_IP(connp); 2510 } 2511 break; 2512 } 2513 case IP_MULTICAST_TTL: 2514 if (!checkonly) 2515 udp->udp_multicast_ttl = *invalp; 2516 break; 2517 case IP_MULTICAST_LOOP: 2518 if (!checkonly) { 2519 connp->conn_multicast_loop = *invalp; 2520 PASS_OPT_TO_IP(connp); 2521 } 2522 break; 2523 case IP_RECVOPTS: 2524 if (!checkonly) 2525 udp->udp_recvopts = onoff; 2526 break; 2527 case IP_RECVDSTADDR: 2528 if (!checkonly) 2529 udp->udp_recvdstaddr = onoff; 2530 break; 2531 case IP_RECVIF: 2532 if (!checkonly) { 2533 udp->udp_recvif = onoff; 2534 PASS_OPT_TO_IP(connp); 2535 } 2536 break; 2537 case IP_RECVSLLA: 2538 if (!checkonly) { 2539 udp->udp_recvslla = onoff; 2540 PASS_OPT_TO_IP(connp); 2541 } 2542 break; 2543 case IP_RECVTTL: 2544 if (!checkonly) 2545 udp->udp_recvttl = onoff; 2546 break; 2547 case IP_PKTINFO: { 2548 /* 2549 * This also handles IP_RECVPKTINFO. 2550 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2551 * Differentiation is based on the size of the 2552 * argument passed in. 2553 */ 2554 struct in_pktinfo *pktinfop; 2555 ip4_pkt_t *attr_pktinfop; 2556 2557 if (checkonly) 2558 break; 2559 2560 if (inlen == sizeof (int)) { 2561 /* 2562 * This is IP_RECVPKTINFO option. 2563 * Keep a local copy of whether this option is 2564 * set or not and pass it down to IP for 2565 * processing. 2566 */ 2567 2568 udp->udp_ip_recvpktinfo = onoff; 2569 return (-EINVAL); 2570 } 2571 2572 if (attrs == NULL || 2573 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2574 /* 2575 * sticky option or no buffer to return 2576 * the results. 2577 */ 2578 return (EINVAL); 2579 } 2580 2581 if (inlen != sizeof (struct in_pktinfo)) 2582 return (EINVAL); 2583 2584 pktinfop = (struct in_pktinfo *)invalp; 2585 2586 /* 2587 * At least one of the values should be specified 2588 */ 2589 if (pktinfop->ipi_ifindex == 0 && 2590 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2591 return (EINVAL); 2592 } 2593 2594 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2595 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2596 2597 break; 2598 } 2599 case IP_ADD_MEMBERSHIP: 2600 case IP_DROP_MEMBERSHIP: 2601 case IP_BLOCK_SOURCE: 2602 case IP_UNBLOCK_SOURCE: 2603 case IP_ADD_SOURCE_MEMBERSHIP: 2604 case IP_DROP_SOURCE_MEMBERSHIP: 2605 case MCAST_JOIN_GROUP: 2606 case MCAST_LEAVE_GROUP: 2607 case MCAST_BLOCK_SOURCE: 2608 case MCAST_UNBLOCK_SOURCE: 2609 case MCAST_JOIN_SOURCE_GROUP: 2610 case MCAST_LEAVE_SOURCE_GROUP: 2611 case IP_SEC_OPT: 2612 case IP_NEXTHOP: 2613 case IP_DHCPINIT_IF: 2614 /* 2615 * "soft" error (negative) 2616 * option not handled at this level 2617 * Do not modify *outlenp. 2618 */ 2619 return (-EINVAL); 2620 case IP_BOUND_IF: 2621 if (!checkonly) { 2622 udp->udp_bound_if = *i1; 2623 PASS_OPT_TO_IP(connp); 2624 } 2625 break; 2626 case IP_UNSPEC_SRC: 2627 if (!checkonly) { 2628 udp->udp_unspec_source = onoff; 2629 PASS_OPT_TO_IP(connp); 2630 } 2631 break; 2632 case IP_BROADCAST_TTL: 2633 if (!checkonly) 2634 connp->conn_broadcast_ttl = *invalp; 2635 break; 2636 default: 2637 *outlenp = 0; 2638 return (EINVAL); 2639 } 2640 break; 2641 case IPPROTO_IPV6: { 2642 ip6_pkt_t *ipp; 2643 boolean_t sticky; 2644 2645 if (udp->udp_family != AF_INET6) { 2646 *outlenp = 0; 2647 return (ENOPROTOOPT); 2648 } 2649 /* 2650 * Deal with both sticky options and ancillary data 2651 */ 2652 sticky = B_FALSE; 2653 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2654 NULL) { 2655 /* sticky options, or none */ 2656 ipp = &udp->udp_sticky_ipp; 2657 sticky = B_TRUE; 2658 } 2659 2660 switch (name) { 2661 case IPV6_MULTICAST_IF: 2662 if (!checkonly) { 2663 udp->udp_multicast_if_index = *i1; 2664 PASS_OPT_TO_IP(connp); 2665 } 2666 break; 2667 case IPV6_UNICAST_HOPS: 2668 /* -1 means use default */ 2669 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2670 *outlenp = 0; 2671 return (EINVAL); 2672 } 2673 if (!checkonly) { 2674 if (*i1 == -1) { 2675 udp->udp_ttl = ipp->ipp_unicast_hops = 2676 us->us_ipv6_hoplimit; 2677 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2678 /* Pass modified value to IP. */ 2679 *i1 = udp->udp_ttl; 2680 } else { 2681 udp->udp_ttl = ipp->ipp_unicast_hops = 2682 (uint8_t)*i1; 2683 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2684 } 2685 /* Rebuild the header template */ 2686 error = udp_build_hdrs(udp); 2687 if (error != 0) { 2688 *outlenp = 0; 2689 return (error); 2690 } 2691 } 2692 break; 2693 case IPV6_MULTICAST_HOPS: 2694 /* -1 means use default */ 2695 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2696 *outlenp = 0; 2697 return (EINVAL); 2698 } 2699 if (!checkonly) { 2700 if (*i1 == -1) { 2701 udp->udp_multicast_ttl = 2702 ipp->ipp_multicast_hops = 2703 IP_DEFAULT_MULTICAST_TTL; 2704 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2705 /* Pass modified value to IP. */ 2706 *i1 = udp->udp_multicast_ttl; 2707 } else { 2708 udp->udp_multicast_ttl = 2709 ipp->ipp_multicast_hops = 2710 (uint8_t)*i1; 2711 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2712 } 2713 } 2714 break; 2715 case IPV6_MULTICAST_LOOP: 2716 if (*i1 != 0 && *i1 != 1) { 2717 *outlenp = 0; 2718 return (EINVAL); 2719 } 2720 if (!checkonly) { 2721 connp->conn_multicast_loop = *i1; 2722 PASS_OPT_TO_IP(connp); 2723 } 2724 break; 2725 case IPV6_JOIN_GROUP: 2726 case IPV6_LEAVE_GROUP: 2727 case MCAST_JOIN_GROUP: 2728 case MCAST_LEAVE_GROUP: 2729 case MCAST_BLOCK_SOURCE: 2730 case MCAST_UNBLOCK_SOURCE: 2731 case MCAST_JOIN_SOURCE_GROUP: 2732 case MCAST_LEAVE_SOURCE_GROUP: 2733 /* 2734 * "soft" error (negative) 2735 * option not handled at this level 2736 * Note: Do not modify *outlenp 2737 */ 2738 return (-EINVAL); 2739 case IPV6_BOUND_IF: 2740 if (!checkonly) { 2741 udp->udp_bound_if = *i1; 2742 PASS_OPT_TO_IP(connp); 2743 } 2744 break; 2745 case IPV6_UNSPEC_SRC: 2746 if (!checkonly) { 2747 udp->udp_unspec_source = onoff; 2748 PASS_OPT_TO_IP(connp); 2749 } 2750 break; 2751 /* 2752 * Set boolean switches for ancillary data delivery 2753 */ 2754 case IPV6_RECVPKTINFO: 2755 if (!checkonly) { 2756 udp->udp_ip_recvpktinfo = onoff; 2757 PASS_OPT_TO_IP(connp); 2758 } 2759 break; 2760 case IPV6_RECVTCLASS: 2761 if (!checkonly) { 2762 udp->udp_ipv6_recvtclass = onoff; 2763 PASS_OPT_TO_IP(connp); 2764 } 2765 break; 2766 case IPV6_RECVPATHMTU: 2767 if (!checkonly) { 2768 udp->udp_ipv6_recvpathmtu = onoff; 2769 PASS_OPT_TO_IP(connp); 2770 } 2771 break; 2772 case IPV6_RECVHOPLIMIT: 2773 if (!checkonly) { 2774 udp->udp_ipv6_recvhoplimit = onoff; 2775 PASS_OPT_TO_IP(connp); 2776 } 2777 break; 2778 case IPV6_RECVHOPOPTS: 2779 if (!checkonly) { 2780 udp->udp_ipv6_recvhopopts = onoff; 2781 PASS_OPT_TO_IP(connp); 2782 } 2783 break; 2784 case IPV6_RECVDSTOPTS: 2785 if (!checkonly) { 2786 udp->udp_ipv6_recvdstopts = onoff; 2787 PASS_OPT_TO_IP(connp); 2788 } 2789 break; 2790 case _OLD_IPV6_RECVDSTOPTS: 2791 if (!checkonly) 2792 udp->udp_old_ipv6_recvdstopts = onoff; 2793 break; 2794 case IPV6_RECVRTHDRDSTOPTS: 2795 if (!checkonly) { 2796 udp->udp_ipv6_recvrthdrdstopts = onoff; 2797 PASS_OPT_TO_IP(connp); 2798 } 2799 break; 2800 case IPV6_RECVRTHDR: 2801 if (!checkonly) { 2802 udp->udp_ipv6_recvrthdr = onoff; 2803 PASS_OPT_TO_IP(connp); 2804 } 2805 break; 2806 /* 2807 * Set sticky options or ancillary data. 2808 * If sticky options, (re)build any extension headers 2809 * that might be needed as a result. 2810 */ 2811 case IPV6_PKTINFO: 2812 /* 2813 * The source address and ifindex are verified 2814 * in ip_opt_set(). For ancillary data the 2815 * source address is checked in ip_wput_v6. 2816 */ 2817 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2818 return (EINVAL); 2819 if (checkonly) 2820 break; 2821 2822 if (inlen == 0) { 2823 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2824 ipp->ipp_sticky_ignored |= 2825 (IPPF_IFINDEX|IPPF_ADDR); 2826 } else { 2827 struct in6_pktinfo *pkti; 2828 2829 pkti = (struct in6_pktinfo *)invalp; 2830 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2831 ipp->ipp_addr = pkti->ipi6_addr; 2832 if (ipp->ipp_ifindex != 0) 2833 ipp->ipp_fields |= IPPF_IFINDEX; 2834 else 2835 ipp->ipp_fields &= ~IPPF_IFINDEX; 2836 if (!IN6_IS_ADDR_UNSPECIFIED( 2837 &ipp->ipp_addr)) 2838 ipp->ipp_fields |= IPPF_ADDR; 2839 else 2840 ipp->ipp_fields &= ~IPPF_ADDR; 2841 } 2842 if (sticky) { 2843 error = udp_build_hdrs(udp); 2844 if (error != 0) 2845 return (error); 2846 PASS_OPT_TO_IP(connp); 2847 } 2848 break; 2849 case IPV6_HOPLIMIT: 2850 if (sticky) 2851 return (EINVAL); 2852 if (inlen != 0 && inlen != sizeof (int)) 2853 return (EINVAL); 2854 if (checkonly) 2855 break; 2856 2857 if (inlen == 0) { 2858 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2859 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2860 } else { 2861 if (*i1 > 255 || *i1 < -1) 2862 return (EINVAL); 2863 if (*i1 == -1) 2864 ipp->ipp_hoplimit = 2865 us->us_ipv6_hoplimit; 2866 else 2867 ipp->ipp_hoplimit = *i1; 2868 ipp->ipp_fields |= IPPF_HOPLIMIT; 2869 } 2870 break; 2871 case IPV6_TCLASS: 2872 if (inlen != 0 && inlen != sizeof (int)) 2873 return (EINVAL); 2874 if (checkonly) 2875 break; 2876 2877 if (inlen == 0) { 2878 ipp->ipp_fields &= ~IPPF_TCLASS; 2879 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2880 } else { 2881 if (*i1 > 255 || *i1 < -1) 2882 return (EINVAL); 2883 if (*i1 == -1) 2884 ipp->ipp_tclass = 0; 2885 else 2886 ipp->ipp_tclass = *i1; 2887 ipp->ipp_fields |= IPPF_TCLASS; 2888 } 2889 if (sticky) { 2890 error = udp_build_hdrs(udp); 2891 if (error != 0) 2892 return (error); 2893 } 2894 break; 2895 case IPV6_NEXTHOP: 2896 /* 2897 * IP will verify that the nexthop is reachable 2898 * and fail for sticky options. 2899 */ 2900 if (inlen != 0 && inlen != sizeof (sin6_t)) 2901 return (EINVAL); 2902 if (checkonly) 2903 break; 2904 2905 if (inlen == 0) { 2906 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2907 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2908 } else { 2909 sin6_t *sin6 = (sin6_t *)invalp; 2910 2911 if (sin6->sin6_family != AF_INET6) { 2912 return (EAFNOSUPPORT); 2913 } 2914 if (IN6_IS_ADDR_V4MAPPED( 2915 &sin6->sin6_addr)) 2916 return (EADDRNOTAVAIL); 2917 ipp->ipp_nexthop = sin6->sin6_addr; 2918 if (!IN6_IS_ADDR_UNSPECIFIED( 2919 &ipp->ipp_nexthop)) 2920 ipp->ipp_fields |= IPPF_NEXTHOP; 2921 else 2922 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2923 } 2924 if (sticky) { 2925 error = udp_build_hdrs(udp); 2926 if (error != 0) 2927 return (error); 2928 PASS_OPT_TO_IP(connp); 2929 } 2930 break; 2931 case IPV6_HOPOPTS: { 2932 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2933 /* 2934 * Sanity checks - minimum size, size a multiple of 2935 * eight bytes, and matching size passed in. 2936 */ 2937 if (inlen != 0 && 2938 inlen != (8 * (hopts->ip6h_len + 1))) 2939 return (EINVAL); 2940 2941 if (checkonly) 2942 break; 2943 2944 error = optcom_pkt_set(invalp, inlen, sticky, 2945 (uchar_t **)&ipp->ipp_hopopts, 2946 &ipp->ipp_hopoptslen, 2947 sticky ? udp->udp_label_len_v6 : 0); 2948 if (error != 0) 2949 return (error); 2950 if (ipp->ipp_hopoptslen == 0) { 2951 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2952 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2953 } else { 2954 ipp->ipp_fields |= IPPF_HOPOPTS; 2955 } 2956 if (sticky) { 2957 error = udp_build_hdrs(udp); 2958 if (error != 0) 2959 return (error); 2960 } 2961 break; 2962 } 2963 case IPV6_RTHDRDSTOPTS: { 2964 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2965 2966 /* 2967 * Sanity checks - minimum size, size a multiple of 2968 * eight bytes, and matching size passed in. 2969 */ 2970 if (inlen != 0 && 2971 inlen != (8 * (dopts->ip6d_len + 1))) 2972 return (EINVAL); 2973 2974 if (checkonly) 2975 break; 2976 2977 if (inlen == 0) { 2978 if (sticky && 2979 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2980 kmem_free(ipp->ipp_rtdstopts, 2981 ipp->ipp_rtdstoptslen); 2982 ipp->ipp_rtdstopts = NULL; 2983 ipp->ipp_rtdstoptslen = 0; 2984 } 2985 2986 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2987 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2988 } else { 2989 error = optcom_pkt_set(invalp, inlen, sticky, 2990 (uchar_t **)&ipp->ipp_rtdstopts, 2991 &ipp->ipp_rtdstoptslen, 0); 2992 if (error != 0) 2993 return (error); 2994 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2995 } 2996 if (sticky) { 2997 error = udp_build_hdrs(udp); 2998 if (error != 0) 2999 return (error); 3000 } 3001 break; 3002 } 3003 case IPV6_DSTOPTS: { 3004 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3005 3006 /* 3007 * Sanity checks - minimum size, size a multiple of 3008 * eight bytes, and matching size passed in. 3009 */ 3010 if (inlen != 0 && 3011 inlen != (8 * (dopts->ip6d_len + 1))) 3012 return (EINVAL); 3013 3014 if (checkonly) 3015 break; 3016 3017 if (inlen == 0) { 3018 if (sticky && 3019 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3020 kmem_free(ipp->ipp_dstopts, 3021 ipp->ipp_dstoptslen); 3022 ipp->ipp_dstopts = NULL; 3023 ipp->ipp_dstoptslen = 0; 3024 } 3025 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3026 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3027 } else { 3028 error = optcom_pkt_set(invalp, inlen, sticky, 3029 (uchar_t **)&ipp->ipp_dstopts, 3030 &ipp->ipp_dstoptslen, 0); 3031 if (error != 0) 3032 return (error); 3033 ipp->ipp_fields |= IPPF_DSTOPTS; 3034 } 3035 if (sticky) { 3036 error = udp_build_hdrs(udp); 3037 if (error != 0) 3038 return (error); 3039 } 3040 break; 3041 } 3042 case IPV6_RTHDR: { 3043 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3044 3045 /* 3046 * Sanity checks - minimum size, size a multiple of 3047 * eight bytes, and matching size passed in. 3048 */ 3049 if (inlen != 0 && 3050 inlen != (8 * (rt->ip6r_len + 1))) 3051 return (EINVAL); 3052 3053 if (checkonly) 3054 break; 3055 3056 if (inlen == 0) { 3057 if (sticky && 3058 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3059 kmem_free(ipp->ipp_rthdr, 3060 ipp->ipp_rthdrlen); 3061 ipp->ipp_rthdr = NULL; 3062 ipp->ipp_rthdrlen = 0; 3063 } 3064 ipp->ipp_fields &= ~IPPF_RTHDR; 3065 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3066 } else { 3067 error = optcom_pkt_set(invalp, inlen, sticky, 3068 (uchar_t **)&ipp->ipp_rthdr, 3069 &ipp->ipp_rthdrlen, 0); 3070 if (error != 0) 3071 return (error); 3072 ipp->ipp_fields |= IPPF_RTHDR; 3073 } 3074 if (sticky) { 3075 error = udp_build_hdrs(udp); 3076 if (error != 0) 3077 return (error); 3078 } 3079 break; 3080 } 3081 3082 case IPV6_DONTFRAG: 3083 if (checkonly) 3084 break; 3085 3086 if (onoff) { 3087 ipp->ipp_fields |= IPPF_DONTFRAG; 3088 } else { 3089 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3090 } 3091 break; 3092 3093 case IPV6_USE_MIN_MTU: 3094 if (inlen != sizeof (int)) 3095 return (EINVAL); 3096 3097 if (*i1 < -1 || *i1 > 1) 3098 return (EINVAL); 3099 3100 if (checkonly) 3101 break; 3102 3103 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3104 ipp->ipp_use_min_mtu = *i1; 3105 break; 3106 3107 case IPV6_SEC_OPT: 3108 case IPV6_SRC_PREFERENCES: 3109 case IPV6_V6ONLY: 3110 /* Handled at the IP level */ 3111 return (-EINVAL); 3112 default: 3113 *outlenp = 0; 3114 return (EINVAL); 3115 } 3116 break; 3117 } /* end IPPROTO_IPV6 */ 3118 case IPPROTO_UDP: 3119 switch (name) { 3120 case UDP_ANONPRIVBIND: 3121 if ((error = secpolicy_net_privaddr(cr, 0, 3122 IPPROTO_UDP)) != 0) { 3123 *outlenp = 0; 3124 return (error); 3125 } 3126 if (!checkonly) { 3127 udp->udp_anon_priv_bind = onoff; 3128 } 3129 break; 3130 case UDP_EXCLBIND: 3131 if (!checkonly) 3132 udp->udp_exclbind = onoff; 3133 break; 3134 case UDP_RCVHDR: 3135 if (!checkonly) 3136 udp->udp_rcvhdr = onoff; 3137 break; 3138 case UDP_NAT_T_ENDPOINT: 3139 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3140 *outlenp = 0; 3141 return (error); 3142 } 3143 3144 /* 3145 * Use udp_family instead so we can avoid ambiguitites 3146 * with AF_INET6 sockets that may switch from IPv4 3147 * to IPv6. 3148 */ 3149 if (udp->udp_family != AF_INET) { 3150 *outlenp = 0; 3151 return (EAFNOSUPPORT); 3152 } 3153 3154 if (!checkonly) { 3155 int size; 3156 3157 udp->udp_nat_t_endpoint = onoff; 3158 3159 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3160 UDPH_SIZE + udp->udp_ip_snd_options_len; 3161 3162 /* Also, adjust wroff */ 3163 if (onoff) { 3164 udp->udp_max_hdr_len += 3165 sizeof (uint32_t); 3166 } 3167 size = udp->udp_max_hdr_len + 3168 us->us_wroff_extra; 3169 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3170 size); 3171 } 3172 break; 3173 default: 3174 *outlenp = 0; 3175 return (EINVAL); 3176 } 3177 break; 3178 default: 3179 *outlenp = 0; 3180 return (EINVAL); 3181 } 3182 /* 3183 * Common case of OK return with outval same as inval. 3184 */ 3185 if (invalp != outvalp) { 3186 /* don't trust bcopy for identical src/dst */ 3187 (void) bcopy(invalp, outvalp, inlen); 3188 } 3189 *outlenp = inlen; 3190 return (0); 3191 } 3192 3193 int 3194 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3195 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3196 void *thisdg_attrs, cred_t *cr) 3197 { 3198 int error; 3199 boolean_t checkonly; 3200 3201 error = 0; 3202 switch (optset_context) { 3203 case SETFN_OPTCOM_CHECKONLY: 3204 checkonly = B_TRUE; 3205 /* 3206 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3207 * inlen != 0 implies value supplied and 3208 * we have to "pretend" to set it. 3209 * inlen == 0 implies that there is no 3210 * value part in T_CHECK request and just validation 3211 * done elsewhere should be enough, we just return here. 3212 */ 3213 if (inlen == 0) { 3214 *outlenp = 0; 3215 goto done; 3216 } 3217 break; 3218 case SETFN_OPTCOM_NEGOTIATE: 3219 checkonly = B_FALSE; 3220 break; 3221 case SETFN_UD_NEGOTIATE: 3222 case SETFN_CONN_NEGOTIATE: 3223 checkonly = B_FALSE; 3224 /* 3225 * Negotiating local and "association-related" options 3226 * through T_UNITDATA_REQ. 3227 * 3228 * Following routine can filter out ones we do not 3229 * want to be "set" this way. 3230 */ 3231 if (!udp_opt_allow_udr_set(level, name)) { 3232 *outlenp = 0; 3233 error = EINVAL; 3234 goto done; 3235 } 3236 break; 3237 default: 3238 /* 3239 * We should never get here 3240 */ 3241 *outlenp = 0; 3242 error = EINVAL; 3243 goto done; 3244 } 3245 3246 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3247 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3248 3249 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3250 outvalp, cr, thisdg_attrs, checkonly); 3251 done: 3252 return (error); 3253 } 3254 3255 /* ARGSUSED */ 3256 int 3257 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3258 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3259 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3260 { 3261 conn_t *connp = Q_TO_CONN(q); 3262 int error; 3263 udp_t *udp = connp->conn_udp; 3264 3265 rw_enter(&udp->udp_rwlock, RW_WRITER); 3266 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3267 outlenp, outvalp, thisdg_attrs, cr); 3268 rw_exit(&udp->udp_rwlock); 3269 return (error); 3270 } 3271 3272 /* 3273 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3274 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3275 * headers, and the udp header. 3276 * Returns failure if can't allocate memory. 3277 */ 3278 static int 3279 udp_build_hdrs(udp_t *udp) 3280 { 3281 udp_stack_t *us = udp->udp_us; 3282 uchar_t *hdrs; 3283 uint_t hdrs_len; 3284 ip6_t *ip6h; 3285 ip6i_t *ip6i; 3286 udpha_t *udpha; 3287 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3288 size_t sth_wroff; 3289 conn_t *connp = udp->udp_connp; 3290 3291 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3292 ASSERT(connp != NULL); 3293 3294 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3295 ASSERT(hdrs_len != 0); 3296 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3297 /* Need to reallocate */ 3298 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3299 if (hdrs == NULL) 3300 return (ENOMEM); 3301 3302 if (udp->udp_sticky_hdrs_len != 0) { 3303 kmem_free(udp->udp_sticky_hdrs, 3304 udp->udp_sticky_hdrs_len); 3305 } 3306 udp->udp_sticky_hdrs = hdrs; 3307 udp->udp_sticky_hdrs_len = hdrs_len; 3308 } 3309 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3310 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3311 3312 /* Set header fields not in ipp */ 3313 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3314 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3315 ip6h = (ip6_t *)&ip6i[1]; 3316 } else { 3317 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3318 } 3319 3320 if (!(ipp->ipp_fields & IPPF_ADDR)) 3321 ip6h->ip6_src = udp->udp_v6src; 3322 3323 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3324 udpha->uha_src_port = udp->udp_port; 3325 3326 /* Try to get everything in a single mblk */ 3327 if (hdrs_len > udp->udp_max_hdr_len) { 3328 udp->udp_max_hdr_len = hdrs_len; 3329 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3330 rw_exit(&udp->udp_rwlock); 3331 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3332 udp->udp_connp, sth_wroff); 3333 rw_enter(&udp->udp_rwlock, RW_WRITER); 3334 } 3335 return (0); 3336 } 3337 3338 /* 3339 * This routine retrieves the value of an ND variable in a udpparam_t 3340 * structure. It is called through nd_getset when a user reads the 3341 * variable. 3342 */ 3343 /* ARGSUSED */ 3344 static int 3345 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3346 { 3347 udpparam_t *udppa = (udpparam_t *)cp; 3348 3349 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3350 return (0); 3351 } 3352 3353 /* 3354 * Walk through the param array specified registering each element with the 3355 * named dispatch (ND) handler. 3356 */ 3357 static boolean_t 3358 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3359 { 3360 for (; cnt-- > 0; udppa++) { 3361 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3362 if (!nd_load(ndp, udppa->udp_param_name, 3363 udp_param_get, udp_param_set, 3364 (caddr_t)udppa)) { 3365 nd_free(ndp); 3366 return (B_FALSE); 3367 } 3368 } 3369 } 3370 if (!nd_load(ndp, "udp_extra_priv_ports", 3371 udp_extra_priv_ports_get, NULL, NULL)) { 3372 nd_free(ndp); 3373 return (B_FALSE); 3374 } 3375 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3376 NULL, udp_extra_priv_ports_add, NULL)) { 3377 nd_free(ndp); 3378 return (B_FALSE); 3379 } 3380 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3381 NULL, udp_extra_priv_ports_del, NULL)) { 3382 nd_free(ndp); 3383 return (B_FALSE); 3384 } 3385 return (B_TRUE); 3386 } 3387 3388 /* This routine sets an ND variable in a udpparam_t structure. */ 3389 /* ARGSUSED */ 3390 static int 3391 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3392 { 3393 long new_value; 3394 udpparam_t *udppa = (udpparam_t *)cp; 3395 3396 /* 3397 * Fail the request if the new value does not lie within the 3398 * required bounds. 3399 */ 3400 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3401 new_value < udppa->udp_param_min || 3402 new_value > udppa->udp_param_max) { 3403 return (EINVAL); 3404 } 3405 3406 /* Set the new value */ 3407 udppa->udp_param_value = new_value; 3408 return (0); 3409 } 3410 3411 /* 3412 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3413 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3414 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3415 * then it's assumed to be allocated to be large enough. 3416 * 3417 * Returns zero if trimming of the security option causes all options to go 3418 * away. 3419 */ 3420 static size_t 3421 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3422 { 3423 struct T_opthdr *toh; 3424 size_t hol = ipp->ipp_hopoptslen; 3425 ip6_hbh_t *dstopt = NULL; 3426 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3427 size_t tlen, olen, plen; 3428 boolean_t deleting; 3429 const struct ip6_opt *sopt, *lastpad; 3430 struct ip6_opt *dopt; 3431 3432 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3433 toh->level = IPPROTO_IPV6; 3434 toh->name = IPV6_HOPOPTS; 3435 toh->status = 0; 3436 dstopt = (ip6_hbh_t *)(toh + 1); 3437 } 3438 3439 /* 3440 * If labeling is enabled, then skip the label option 3441 * but get other options if there are any. 3442 */ 3443 if (is_system_labeled()) { 3444 dopt = NULL; 3445 if (dstopt != NULL) { 3446 /* will fill in ip6h_len later */ 3447 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3448 dopt = (struct ip6_opt *)(dstopt + 1); 3449 } 3450 sopt = (const struct ip6_opt *)(srcopt + 1); 3451 hol -= sizeof (*srcopt); 3452 tlen = sizeof (*dstopt); 3453 lastpad = NULL; 3454 deleting = B_FALSE; 3455 /* 3456 * This loop finds the first (lastpad pointer) of any number of 3457 * pads that preceeds the security option, then treats the 3458 * security option as though it were a pad, and then finds the 3459 * next non-pad option (or end of list). 3460 * 3461 * It then treats the entire block as one big pad. To preserve 3462 * alignment of any options that follow, or just the end of the 3463 * list, it computes a minimal new padding size that keeps the 3464 * same alignment for the next option. 3465 * 3466 * If it encounters just a sequence of pads with no security 3467 * option, those are copied as-is rather than collapsed. 3468 * 3469 * Note that to handle the end of list case, the code makes one 3470 * loop with 'hol' set to zero. 3471 */ 3472 for (;;) { 3473 if (hol > 0) { 3474 if (sopt->ip6o_type == IP6OPT_PAD1) { 3475 if (lastpad == NULL) 3476 lastpad = sopt; 3477 sopt = (const struct ip6_opt *) 3478 &sopt->ip6o_len; 3479 hol--; 3480 continue; 3481 } 3482 olen = sopt->ip6o_len + sizeof (*sopt); 3483 if (olen > hol) 3484 olen = hol; 3485 if (sopt->ip6o_type == IP6OPT_PADN || 3486 sopt->ip6o_type == ip6opt_ls) { 3487 if (sopt->ip6o_type == ip6opt_ls) 3488 deleting = B_TRUE; 3489 if (lastpad == NULL) 3490 lastpad = sopt; 3491 sopt = (const struct ip6_opt *) 3492 ((const char *)sopt + olen); 3493 hol -= olen; 3494 continue; 3495 } 3496 } else { 3497 /* if nothing was copied at all, then delete */ 3498 if (tlen == sizeof (*dstopt)) 3499 return (0); 3500 /* last pass; pick up any trailing padding */ 3501 olen = 0; 3502 } 3503 if (deleting) { 3504 /* 3505 * compute aligning effect of deleted material 3506 * to reproduce with pad. 3507 */ 3508 plen = ((const char *)sopt - 3509 (const char *)lastpad) & 7; 3510 tlen += plen; 3511 if (dopt != NULL) { 3512 if (plen == 1) { 3513 dopt->ip6o_type = IP6OPT_PAD1; 3514 } else if (plen > 1) { 3515 plen -= sizeof (*dopt); 3516 dopt->ip6o_type = IP6OPT_PADN; 3517 dopt->ip6o_len = plen; 3518 if (plen > 0) 3519 bzero(dopt + 1, plen); 3520 } 3521 dopt = (struct ip6_opt *) 3522 ((char *)dopt + plen); 3523 } 3524 deleting = B_FALSE; 3525 lastpad = NULL; 3526 } 3527 /* if there's uncopied padding, then copy that now */ 3528 if (lastpad != NULL) { 3529 olen += (const char *)sopt - 3530 (const char *)lastpad; 3531 sopt = lastpad; 3532 lastpad = NULL; 3533 } 3534 if (dopt != NULL && olen > 0) { 3535 bcopy(sopt, dopt, olen); 3536 dopt = (struct ip6_opt *)((char *)dopt + olen); 3537 } 3538 if (hol == 0) 3539 break; 3540 tlen += olen; 3541 sopt = (const struct ip6_opt *) 3542 ((const char *)sopt + olen); 3543 hol -= olen; 3544 } 3545 /* go back and patch up the length value, rounded upward */ 3546 if (dstopt != NULL) 3547 dstopt->ip6h_len = (tlen - 1) >> 3; 3548 } else { 3549 tlen = hol; 3550 if (dstopt != NULL) 3551 bcopy(srcopt, dstopt, hol); 3552 } 3553 3554 tlen += sizeof (*toh); 3555 if (toh != NULL) 3556 toh->len = tlen; 3557 3558 return (tlen); 3559 } 3560 3561 /* 3562 * Update udp_rcv_opt_len from the packet. 3563 * Called when options received, and when no options received but 3564 * udp_ip_recv_opt_len has previously recorded options. 3565 */ 3566 static void 3567 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3568 { 3569 /* Save the options if any */ 3570 if (opt_len > 0) { 3571 if (opt_len > udp->udp_ip_rcv_options_len) { 3572 /* Need to allocate larger buffer */ 3573 if (udp->udp_ip_rcv_options_len != 0) 3574 mi_free((char *)udp->udp_ip_rcv_options); 3575 udp->udp_ip_rcv_options_len = 0; 3576 udp->udp_ip_rcv_options = 3577 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3578 if (udp->udp_ip_rcv_options != NULL) 3579 udp->udp_ip_rcv_options_len = opt_len; 3580 } 3581 if (udp->udp_ip_rcv_options_len != 0) { 3582 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3583 /* Adjust length if we are resusing the space */ 3584 udp->udp_ip_rcv_options_len = opt_len; 3585 } 3586 } else if (udp->udp_ip_rcv_options_len != 0) { 3587 /* Clear out previously recorded options */ 3588 mi_free((char *)udp->udp_ip_rcv_options); 3589 udp->udp_ip_rcv_options = NULL; 3590 udp->udp_ip_rcv_options_len = 0; 3591 } 3592 } 3593 3594 static mblk_t * 3595 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3596 { 3597 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3598 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3599 /* 3600 * fallback has started but messages have not been moved yet 3601 */ 3602 if (udp->udp_fallback_queue_head == NULL) { 3603 ASSERT(udp->udp_fallback_queue_tail == NULL); 3604 udp->udp_fallback_queue_head = mp; 3605 udp->udp_fallback_queue_tail = mp; 3606 } else { 3607 ASSERT(udp->udp_fallback_queue_tail != NULL); 3608 udp->udp_fallback_queue_tail->b_next = mp; 3609 udp->udp_fallback_queue_tail = mp; 3610 } 3611 return (NULL); 3612 } else { 3613 /* 3614 * Fallback completed, let the caller putnext() the mblk. 3615 */ 3616 return (mp); 3617 } 3618 } 3619 3620 /* 3621 * Deliver data to ULP. In case we have a socket, and it's falling back to 3622 * TPI, then we'll queue the mp for later processing. 3623 */ 3624 static void 3625 udp_ulp_recv(conn_t *connp, mblk_t *mp) 3626 { 3627 if (IPCL_IS_NONSTR(connp)) { 3628 udp_t *udp = connp->conn_udp; 3629 int error; 3630 3631 if ((*connp->conn_upcalls->su_recv) 3632 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 3633 NULL) < 0) { 3634 mutex_enter(&udp->udp_recv_lock); 3635 if (error == ENOSPC) { 3636 /* 3637 * let's confirm while holding the lock 3638 */ 3639 if ((*connp->conn_upcalls->su_recv) 3640 (connp->conn_upper_handle, NULL, 0, 0, 3641 &error, NULL) < 0) { 3642 ASSERT(error == ENOSPC); 3643 if (error == ENOSPC) { 3644 connp->conn_flow_cntrld = 3645 B_TRUE; 3646 } 3647 } 3648 mutex_exit(&udp->udp_recv_lock); 3649 } else { 3650 ASSERT(error == EOPNOTSUPP); 3651 mp = udp_queue_fallback(udp, mp); 3652 mutex_exit(&udp->udp_recv_lock); 3653 if (mp != NULL) 3654 putnext(connp->conn_rq, mp); 3655 } 3656 } 3657 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 3658 } else { 3659 putnext(connp->conn_rq, mp); 3660 } 3661 } 3662 3663 /* ARGSUSED2 */ 3664 static void 3665 udp_input(void *arg1, mblk_t *mp, void *arg2) 3666 { 3667 conn_t *connp = (conn_t *)arg1; 3668 struct T_unitdata_ind *tudi; 3669 uchar_t *rptr; /* Pointer to IP header */ 3670 int hdr_length; /* Length of IP+UDP headers */ 3671 int opt_len; 3672 int udi_size; /* Size of T_unitdata_ind */ 3673 int mp_len; 3674 udp_t *udp; 3675 udpha_t *udpha; 3676 int ipversion; 3677 ip6_pkt_t ipp; 3678 ip6_t *ip6h; 3679 ip6i_t *ip6i; 3680 mblk_t *mp1; 3681 mblk_t *options_mp = NULL; 3682 ip_pktinfo_t *pinfo = NULL; 3683 cred_t *cr = NULL; 3684 pid_t cpid; 3685 uint32_t udp_ip_rcv_options_len; 3686 udp_bits_t udp_bits; 3687 cred_t *rcr = connp->conn_cred; 3688 udp_stack_t *us; 3689 3690 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3691 3692 udp = connp->conn_udp; 3693 us = udp->udp_us; 3694 rptr = mp->b_rptr; 3695 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3696 ASSERT(OK_32PTR(rptr)); 3697 3698 /* 3699 * IP should have prepended the options data in an M_CTL 3700 * Check M_CTL "type" to make sure are not here bcos of 3701 * a valid ICMP message 3702 */ 3703 if (DB_TYPE(mp) == M_CTL) { 3704 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3705 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3706 IN_PKTINFO) { 3707 /* 3708 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3709 * has been prepended to the packet by IP. We need to 3710 * extract the mblk and adjust the rptr 3711 */ 3712 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3713 options_mp = mp; 3714 mp = mp->b_cont; 3715 rptr = mp->b_rptr; 3716 UDP_STAT(us, udp_in_pktinfo); 3717 } else { 3718 /* 3719 * ICMP messages. 3720 */ 3721 udp_icmp_error(connp, mp); 3722 return; 3723 } 3724 } 3725 3726 mp_len = msgdsize(mp); 3727 /* 3728 * This is the inbound data path. 3729 * First, we check to make sure the IP version number is correct, 3730 * and then pull the IP and UDP headers into the first mblk. 3731 */ 3732 3733 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3734 ipp.ipp_fields = 0; 3735 3736 ipversion = IPH_HDR_VERSION(rptr); 3737 3738 rw_enter(&udp->udp_rwlock, RW_READER); 3739 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3740 udp_bits = udp->udp_bits; 3741 rw_exit(&udp->udp_rwlock); 3742 3743 switch (ipversion) { 3744 case IPV4_VERSION: 3745 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3746 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3747 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3748 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3749 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3750 udp->udp_family == AF_INET) { 3751 /* 3752 * Record/update udp_ip_rcv_options with the lock 3753 * held. Not needed for AF_INET6 sockets 3754 * since they don't support a getsockopt of IP_OPTIONS. 3755 */ 3756 rw_enter(&udp->udp_rwlock, RW_WRITER); 3757 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3758 opt_len); 3759 rw_exit(&udp->udp_rwlock); 3760 } 3761 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3762 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3763 udp->udp_ip_recvpktinfo) { 3764 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3765 ipp.ipp_fields |= IPPF_IFINDEX; 3766 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3767 } 3768 } 3769 break; 3770 case IPV6_VERSION: 3771 /* 3772 * IPv6 packets can only be received by applications 3773 * that are prepared to receive IPv6 addresses. 3774 * The IP fanout must ensure this. 3775 */ 3776 ASSERT(udp->udp_family == AF_INET6); 3777 3778 ip6h = (ip6_t *)rptr; 3779 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3780 3781 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3782 uint8_t nexthdrp; 3783 /* Look for ifindex information */ 3784 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3785 ip6i = (ip6i_t *)ip6h; 3786 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3787 goto tossit; 3788 3789 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3790 ASSERT(ip6i->ip6i_ifindex != 0); 3791 ipp.ipp_fields |= IPPF_IFINDEX; 3792 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3793 } 3794 rptr = (uchar_t *)&ip6i[1]; 3795 mp->b_rptr = rptr; 3796 if (rptr == mp->b_wptr) { 3797 mp1 = mp->b_cont; 3798 freeb(mp); 3799 mp = mp1; 3800 rptr = mp->b_rptr; 3801 } 3802 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3803 goto tossit; 3804 ip6h = (ip6_t *)rptr; 3805 mp_len = msgdsize(mp); 3806 } 3807 /* 3808 * Find any potentially interesting extension headers 3809 * as well as the length of the IPv6 + extension 3810 * headers. 3811 */ 3812 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3813 UDPH_SIZE; 3814 ASSERT(nexthdrp == IPPROTO_UDP); 3815 } else { 3816 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3817 ip6i = NULL; 3818 } 3819 break; 3820 default: 3821 ASSERT(0); 3822 } 3823 3824 /* 3825 * IP inspected the UDP header thus all of it must be in the mblk. 3826 * UDP length check is performed for IPv6 packets and IPv4 packets 3827 * to check if the size of the packet as specified 3828 * by the header is the same as the physical size of the packet. 3829 * FIXME? Didn't IP already check this? 3830 */ 3831 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3832 if ((MBLKL(mp) < hdr_length) || 3833 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3834 goto tossit; 3835 } 3836 3837 3838 /* Walk past the headers unless UDP_RCVHDR was set. */ 3839 if (!udp_bits.udpb_rcvhdr) { 3840 mp->b_rptr = rptr + hdr_length; 3841 mp_len -= hdr_length; 3842 } 3843 3844 /* 3845 * This is the inbound data path. Packets are passed upstream as 3846 * T_UNITDATA_IND messages with full IP headers still attached. 3847 */ 3848 if (udp->udp_family == AF_INET) { 3849 sin_t *sin; 3850 3851 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3852 3853 /* 3854 * Normally only send up the source address. 3855 * If IP_RECVDSTADDR is set we include the destination IP 3856 * address as an option. With IP_RECVOPTS we include all 3857 * the IP options. 3858 */ 3859 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3860 if (udp_bits.udpb_recvdstaddr) { 3861 udi_size += sizeof (struct T_opthdr) + 3862 sizeof (struct in_addr); 3863 UDP_STAT(us, udp_in_recvdstaddr); 3864 } 3865 3866 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3867 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3868 udi_size += sizeof (struct T_opthdr) + 3869 sizeof (struct in_pktinfo); 3870 UDP_STAT(us, udp_ip_rcvpktinfo); 3871 } 3872 3873 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3874 udi_size += sizeof (struct T_opthdr) + opt_len; 3875 UDP_STAT(us, udp_in_recvopts); 3876 } 3877 3878 /* 3879 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3880 * space accordingly 3881 */ 3882 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3883 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3884 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3885 UDP_STAT(us, udp_in_recvif); 3886 } 3887 3888 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3889 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3890 udi_size += sizeof (struct T_opthdr) + 3891 sizeof (struct sockaddr_dl); 3892 UDP_STAT(us, udp_in_recvslla); 3893 } 3894 3895 if ((udp_bits.udpb_recvucred) && 3896 (cr = msg_getcred(mp, &cpid)) != NULL) { 3897 udi_size += sizeof (struct T_opthdr) + ucredsize; 3898 UDP_STAT(us, udp_in_recvucred); 3899 } 3900 3901 /* 3902 * If SO_TIMESTAMP is set allocate the appropriate sized 3903 * buffer. Since gethrestime() expects a pointer aligned 3904 * argument, we allocate space necessary for extra 3905 * alignment (even though it might not be used). 3906 */ 3907 if (udp_bits.udpb_timestamp) { 3908 udi_size += sizeof (struct T_opthdr) + 3909 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3910 UDP_STAT(us, udp_in_timestamp); 3911 } 3912 3913 /* 3914 * If IP_RECVTTL is set allocate the appropriate sized buffer 3915 */ 3916 if (udp_bits.udpb_recvttl) { 3917 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3918 UDP_STAT(us, udp_in_recvttl); 3919 } 3920 3921 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3922 mp1 = allocb(udi_size, BPRI_MED); 3923 if (mp1 == NULL) { 3924 freemsg(mp); 3925 if (options_mp != NULL) 3926 freeb(options_mp); 3927 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3928 return; 3929 } 3930 mp1->b_cont = mp; 3931 mp = mp1; 3932 mp->b_datap->db_type = M_PROTO; 3933 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3934 mp->b_wptr = (uchar_t *)tudi + udi_size; 3935 tudi->PRIM_type = T_UNITDATA_IND; 3936 tudi->SRC_length = sizeof (sin_t); 3937 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3938 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3939 sizeof (sin_t); 3940 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3941 tudi->OPT_length = udi_size; 3942 sin = (sin_t *)&tudi[1]; 3943 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3944 sin->sin_port = udpha->uha_src_port; 3945 sin->sin_family = udp->udp_family; 3946 *(uint32_t *)&sin->sin_zero[0] = 0; 3947 *(uint32_t *)&sin->sin_zero[4] = 0; 3948 3949 /* 3950 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3951 * IP_RECVTTL has been set. 3952 */ 3953 if (udi_size != 0) { 3954 /* 3955 * Copy in destination address before options to avoid 3956 * any padding issues. 3957 */ 3958 char *dstopt; 3959 3960 dstopt = (char *)&sin[1]; 3961 if (udp_bits.udpb_recvdstaddr) { 3962 struct T_opthdr *toh; 3963 ipaddr_t *dstptr; 3964 3965 toh = (struct T_opthdr *)dstopt; 3966 toh->level = IPPROTO_IP; 3967 toh->name = IP_RECVDSTADDR; 3968 toh->len = sizeof (struct T_opthdr) + 3969 sizeof (ipaddr_t); 3970 toh->status = 0; 3971 dstopt += sizeof (struct T_opthdr); 3972 dstptr = (ipaddr_t *)dstopt; 3973 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3974 dstopt += sizeof (ipaddr_t); 3975 udi_size -= toh->len; 3976 } 3977 3978 if (udp_bits.udpb_recvopts && opt_len > 0) { 3979 struct T_opthdr *toh; 3980 3981 toh = (struct T_opthdr *)dstopt; 3982 toh->level = IPPROTO_IP; 3983 toh->name = IP_RECVOPTS; 3984 toh->len = sizeof (struct T_opthdr) + opt_len; 3985 toh->status = 0; 3986 dstopt += sizeof (struct T_opthdr); 3987 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3988 opt_len); 3989 dstopt += opt_len; 3990 udi_size -= toh->len; 3991 } 3992 3993 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3994 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3995 struct T_opthdr *toh; 3996 struct in_pktinfo *pktinfop; 3997 3998 toh = (struct T_opthdr *)dstopt; 3999 toh->level = IPPROTO_IP; 4000 toh->name = IP_PKTINFO; 4001 toh->len = sizeof (struct T_opthdr) + 4002 sizeof (*pktinfop); 4003 toh->status = 0; 4004 dstopt += sizeof (struct T_opthdr); 4005 pktinfop = (struct in_pktinfo *)dstopt; 4006 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4007 pktinfop->ipi_spec_dst = 4008 pinfo->ip_pkt_match_addr; 4009 pktinfop->ipi_addr.s_addr = 4010 ((ipha_t *)rptr)->ipha_dst; 4011 4012 dstopt += sizeof (struct in_pktinfo); 4013 udi_size -= toh->len; 4014 } 4015 4016 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4017 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4018 4019 struct T_opthdr *toh; 4020 struct sockaddr_dl *dstptr; 4021 4022 toh = (struct T_opthdr *)dstopt; 4023 toh->level = IPPROTO_IP; 4024 toh->name = IP_RECVSLLA; 4025 toh->len = sizeof (struct T_opthdr) + 4026 sizeof (struct sockaddr_dl); 4027 toh->status = 0; 4028 dstopt += sizeof (struct T_opthdr); 4029 dstptr = (struct sockaddr_dl *)dstopt; 4030 bcopy(&pinfo->ip_pkt_slla, dstptr, 4031 sizeof (struct sockaddr_dl)); 4032 dstopt += sizeof (struct sockaddr_dl); 4033 udi_size -= toh->len; 4034 } 4035 4036 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4037 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4038 4039 struct T_opthdr *toh; 4040 uint_t *dstptr; 4041 4042 toh = (struct T_opthdr *)dstopt; 4043 toh->level = IPPROTO_IP; 4044 toh->name = IP_RECVIF; 4045 toh->len = sizeof (struct T_opthdr) + 4046 sizeof (uint_t); 4047 toh->status = 0; 4048 dstopt += sizeof (struct T_opthdr); 4049 dstptr = (uint_t *)dstopt; 4050 *dstptr = pinfo->ip_pkt_ifindex; 4051 dstopt += sizeof (uint_t); 4052 udi_size -= toh->len; 4053 } 4054 4055 if (cr != NULL) { 4056 struct T_opthdr *toh; 4057 4058 toh = (struct T_opthdr *)dstopt; 4059 toh->level = SOL_SOCKET; 4060 toh->name = SCM_UCRED; 4061 toh->len = sizeof (struct T_opthdr) + ucredsize; 4062 toh->status = 0; 4063 dstopt += sizeof (struct T_opthdr); 4064 (void) cred2ucred(cr, cpid, dstopt, rcr); 4065 dstopt += ucredsize; 4066 udi_size -= toh->len; 4067 } 4068 4069 if (udp_bits.udpb_timestamp) { 4070 struct T_opthdr *toh; 4071 4072 toh = (struct T_opthdr *)dstopt; 4073 toh->level = SOL_SOCKET; 4074 toh->name = SCM_TIMESTAMP; 4075 toh->len = sizeof (struct T_opthdr) + 4076 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4077 toh->status = 0; 4078 dstopt += sizeof (struct T_opthdr); 4079 /* Align for gethrestime() */ 4080 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4081 sizeof (intptr_t)); 4082 gethrestime((timestruc_t *)dstopt); 4083 dstopt = (char *)toh + toh->len; 4084 udi_size -= toh->len; 4085 } 4086 4087 /* 4088 * CAUTION: 4089 * Due to aligment issues 4090 * Processing of IP_RECVTTL option 4091 * should always be the last. Adding 4092 * any option processing after this will 4093 * cause alignment panic. 4094 */ 4095 if (udp_bits.udpb_recvttl) { 4096 struct T_opthdr *toh; 4097 uint8_t *dstptr; 4098 4099 toh = (struct T_opthdr *)dstopt; 4100 toh->level = IPPROTO_IP; 4101 toh->name = IP_RECVTTL; 4102 toh->len = sizeof (struct T_opthdr) + 4103 sizeof (uint8_t); 4104 toh->status = 0; 4105 dstopt += sizeof (struct T_opthdr); 4106 dstptr = (uint8_t *)dstopt; 4107 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4108 dstopt += sizeof (uint8_t); 4109 udi_size -= toh->len; 4110 } 4111 4112 /* Consumed all of allocated space */ 4113 ASSERT(udi_size == 0); 4114 } 4115 } else { 4116 sin6_t *sin6; 4117 4118 /* 4119 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4120 * 4121 * Normally we only send up the address. If receiving of any 4122 * optional receive side information is enabled, we also send 4123 * that up as options. 4124 */ 4125 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4126 4127 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4128 IPPF_RTHDR|IPPF_IFINDEX)) { 4129 if ((udp_bits.udpb_ipv6_recvhopopts) && 4130 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4131 size_t hlen; 4132 4133 UDP_STAT(us, udp_in_recvhopopts); 4134 hlen = copy_hop_opts(&ipp, NULL); 4135 if (hlen == 0) 4136 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4137 udi_size += hlen; 4138 } 4139 if (((udp_bits.udpb_ipv6_recvdstopts) || 4140 udp_bits.udpb_old_ipv6_recvdstopts) && 4141 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4142 udi_size += sizeof (struct T_opthdr) + 4143 ipp.ipp_dstoptslen; 4144 UDP_STAT(us, udp_in_recvdstopts); 4145 } 4146 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4147 udp_bits.udpb_ipv6_recvrthdr && 4148 (ipp.ipp_fields & IPPF_RTHDR)) || 4149 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4150 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4151 udi_size += sizeof (struct T_opthdr) + 4152 ipp.ipp_rtdstoptslen; 4153 UDP_STAT(us, udp_in_recvrtdstopts); 4154 } 4155 if ((udp_bits.udpb_ipv6_recvrthdr) && 4156 (ipp.ipp_fields & IPPF_RTHDR)) { 4157 udi_size += sizeof (struct T_opthdr) + 4158 ipp.ipp_rthdrlen; 4159 UDP_STAT(us, udp_in_recvrthdr); 4160 } 4161 if ((udp_bits.udpb_ip_recvpktinfo) && 4162 (ipp.ipp_fields & IPPF_IFINDEX)) { 4163 udi_size += sizeof (struct T_opthdr) + 4164 sizeof (struct in6_pktinfo); 4165 UDP_STAT(us, udp_in_recvpktinfo); 4166 } 4167 4168 } 4169 if ((udp_bits.udpb_recvucred) && 4170 (cr = msg_getcred(mp, &cpid)) != NULL) { 4171 udi_size += sizeof (struct T_opthdr) + ucredsize; 4172 UDP_STAT(us, udp_in_recvucred); 4173 } 4174 4175 /* 4176 * If SO_TIMESTAMP is set allocate the appropriate sized 4177 * buffer. Since gethrestime() expects a pointer aligned 4178 * argument, we allocate space necessary for extra 4179 * alignment (even though it might not be used). 4180 */ 4181 if (udp_bits.udpb_timestamp) { 4182 udi_size += sizeof (struct T_opthdr) + 4183 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4184 UDP_STAT(us, udp_in_timestamp); 4185 } 4186 4187 if (udp_bits.udpb_ipv6_recvhoplimit) { 4188 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4189 UDP_STAT(us, udp_in_recvhoplimit); 4190 } 4191 4192 if (udp_bits.udpb_ipv6_recvtclass) { 4193 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4194 UDP_STAT(us, udp_in_recvtclass); 4195 } 4196 4197 mp1 = allocb(udi_size, BPRI_MED); 4198 if (mp1 == NULL) { 4199 freemsg(mp); 4200 if (options_mp != NULL) 4201 freeb(options_mp); 4202 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4203 return; 4204 } 4205 mp1->b_cont = mp; 4206 mp = mp1; 4207 mp->b_datap->db_type = M_PROTO; 4208 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4209 mp->b_wptr = (uchar_t *)tudi + udi_size; 4210 tudi->PRIM_type = T_UNITDATA_IND; 4211 tudi->SRC_length = sizeof (sin6_t); 4212 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4213 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4214 sizeof (sin6_t); 4215 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4216 tudi->OPT_length = udi_size; 4217 sin6 = (sin6_t *)&tudi[1]; 4218 if (ipversion == IPV4_VERSION) { 4219 in6_addr_t v6dst; 4220 4221 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4222 &sin6->sin6_addr); 4223 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4224 &v6dst); 4225 sin6->sin6_flowinfo = 0; 4226 sin6->sin6_scope_id = 0; 4227 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4228 connp->conn_zoneid, us->us_netstack); 4229 } else { 4230 sin6->sin6_addr = ip6h->ip6_src; 4231 /* No sin6_flowinfo per API */ 4232 sin6->sin6_flowinfo = 0; 4233 /* For link-scope source pass up scope id */ 4234 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4235 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4236 sin6->sin6_scope_id = ipp.ipp_ifindex; 4237 else 4238 sin6->sin6_scope_id = 0; 4239 sin6->__sin6_src_id = ip_srcid_find_addr( 4240 &ip6h->ip6_dst, connp->conn_zoneid, 4241 us->us_netstack); 4242 } 4243 sin6->sin6_port = udpha->uha_src_port; 4244 sin6->sin6_family = udp->udp_family; 4245 4246 if (udi_size != 0) { 4247 uchar_t *dstopt; 4248 4249 dstopt = (uchar_t *)&sin6[1]; 4250 if ((udp_bits.udpb_ip_recvpktinfo) && 4251 (ipp.ipp_fields & IPPF_IFINDEX)) { 4252 struct T_opthdr *toh; 4253 struct in6_pktinfo *pkti; 4254 4255 toh = (struct T_opthdr *)dstopt; 4256 toh->level = IPPROTO_IPV6; 4257 toh->name = IPV6_PKTINFO; 4258 toh->len = sizeof (struct T_opthdr) + 4259 sizeof (*pkti); 4260 toh->status = 0; 4261 dstopt += sizeof (struct T_opthdr); 4262 pkti = (struct in6_pktinfo *)dstopt; 4263 if (ipversion == IPV6_VERSION) 4264 pkti->ipi6_addr = ip6h->ip6_dst; 4265 else 4266 IN6_IPADDR_TO_V4MAPPED( 4267 ((ipha_t *)rptr)->ipha_dst, 4268 &pkti->ipi6_addr); 4269 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4270 dstopt += sizeof (*pkti); 4271 udi_size -= toh->len; 4272 } 4273 if (udp_bits.udpb_ipv6_recvhoplimit) { 4274 struct T_opthdr *toh; 4275 4276 toh = (struct T_opthdr *)dstopt; 4277 toh->level = IPPROTO_IPV6; 4278 toh->name = IPV6_HOPLIMIT; 4279 toh->len = sizeof (struct T_opthdr) + 4280 sizeof (uint_t); 4281 toh->status = 0; 4282 dstopt += sizeof (struct T_opthdr); 4283 if (ipversion == IPV6_VERSION) 4284 *(uint_t *)dstopt = ip6h->ip6_hops; 4285 else 4286 *(uint_t *)dstopt = 4287 ((ipha_t *)rptr)->ipha_ttl; 4288 dstopt += sizeof (uint_t); 4289 udi_size -= toh->len; 4290 } 4291 if (udp_bits.udpb_ipv6_recvtclass) { 4292 struct T_opthdr *toh; 4293 4294 toh = (struct T_opthdr *)dstopt; 4295 toh->level = IPPROTO_IPV6; 4296 toh->name = IPV6_TCLASS; 4297 toh->len = sizeof (struct T_opthdr) + 4298 sizeof (uint_t); 4299 toh->status = 0; 4300 dstopt += sizeof (struct T_opthdr); 4301 if (ipversion == IPV6_VERSION) { 4302 *(uint_t *)dstopt = 4303 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4304 } else { 4305 ipha_t *ipha = (ipha_t *)rptr; 4306 *(uint_t *)dstopt = 4307 ipha->ipha_type_of_service; 4308 } 4309 dstopt += sizeof (uint_t); 4310 udi_size -= toh->len; 4311 } 4312 if ((udp_bits.udpb_ipv6_recvhopopts) && 4313 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4314 size_t hlen; 4315 4316 hlen = copy_hop_opts(&ipp, dstopt); 4317 dstopt += hlen; 4318 udi_size -= hlen; 4319 } 4320 if ((udp_bits.udpb_ipv6_recvdstopts) && 4321 (udp_bits.udpb_ipv6_recvrthdr) && 4322 (ipp.ipp_fields & IPPF_RTHDR) && 4323 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4324 struct T_opthdr *toh; 4325 4326 toh = (struct T_opthdr *)dstopt; 4327 toh->level = IPPROTO_IPV6; 4328 toh->name = IPV6_DSTOPTS; 4329 toh->len = sizeof (struct T_opthdr) + 4330 ipp.ipp_rtdstoptslen; 4331 toh->status = 0; 4332 dstopt += sizeof (struct T_opthdr); 4333 bcopy(ipp.ipp_rtdstopts, dstopt, 4334 ipp.ipp_rtdstoptslen); 4335 dstopt += ipp.ipp_rtdstoptslen; 4336 udi_size -= toh->len; 4337 } 4338 if ((udp_bits.udpb_ipv6_recvrthdr) && 4339 (ipp.ipp_fields & IPPF_RTHDR)) { 4340 struct T_opthdr *toh; 4341 4342 toh = (struct T_opthdr *)dstopt; 4343 toh->level = IPPROTO_IPV6; 4344 toh->name = IPV6_RTHDR; 4345 toh->len = sizeof (struct T_opthdr) + 4346 ipp.ipp_rthdrlen; 4347 toh->status = 0; 4348 dstopt += sizeof (struct T_opthdr); 4349 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4350 dstopt += ipp.ipp_rthdrlen; 4351 udi_size -= toh->len; 4352 } 4353 if ((udp_bits.udpb_ipv6_recvdstopts) && 4354 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4355 struct T_opthdr *toh; 4356 4357 toh = (struct T_opthdr *)dstopt; 4358 toh->level = IPPROTO_IPV6; 4359 toh->name = IPV6_DSTOPTS; 4360 toh->len = sizeof (struct T_opthdr) + 4361 ipp.ipp_dstoptslen; 4362 toh->status = 0; 4363 dstopt += sizeof (struct T_opthdr); 4364 bcopy(ipp.ipp_dstopts, dstopt, 4365 ipp.ipp_dstoptslen); 4366 dstopt += ipp.ipp_dstoptslen; 4367 udi_size -= toh->len; 4368 } 4369 if (cr != NULL) { 4370 struct T_opthdr *toh; 4371 4372 toh = (struct T_opthdr *)dstopt; 4373 toh->level = SOL_SOCKET; 4374 toh->name = SCM_UCRED; 4375 toh->len = sizeof (struct T_opthdr) + ucredsize; 4376 toh->status = 0; 4377 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4378 dstopt += toh->len; 4379 udi_size -= toh->len; 4380 } 4381 if (udp_bits.udpb_timestamp) { 4382 struct T_opthdr *toh; 4383 4384 toh = (struct T_opthdr *)dstopt; 4385 toh->level = SOL_SOCKET; 4386 toh->name = SCM_TIMESTAMP; 4387 toh->len = sizeof (struct T_opthdr) + 4388 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4389 toh->status = 0; 4390 dstopt += sizeof (struct T_opthdr); 4391 /* Align for gethrestime() */ 4392 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4393 sizeof (intptr_t)); 4394 gethrestime((timestruc_t *)dstopt); 4395 dstopt = (uchar_t *)toh + toh->len; 4396 udi_size -= toh->len; 4397 } 4398 4399 /* Consumed all of allocated space */ 4400 ASSERT(udi_size == 0); 4401 } 4402 #undef sin6 4403 /* No IP_RECVDSTADDR for IPv6. */ 4404 } 4405 4406 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4407 if (options_mp != NULL) 4408 freeb(options_mp); 4409 4410 udp_ulp_recv(connp, mp); 4411 4412 return; 4413 4414 tossit: 4415 freemsg(mp); 4416 if (options_mp != NULL) 4417 freeb(options_mp); 4418 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4419 } 4420 4421 /* 4422 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4423 * information that can be changing beneath us. 4424 */ 4425 mblk_t * 4426 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4427 { 4428 mblk_t *mpdata; 4429 mblk_t *mp_conn_ctl; 4430 mblk_t *mp_attr_ctl; 4431 mblk_t *mp6_conn_ctl; 4432 mblk_t *mp6_attr_ctl; 4433 mblk_t *mp_conn_tail; 4434 mblk_t *mp_attr_tail; 4435 mblk_t *mp6_conn_tail; 4436 mblk_t *mp6_attr_tail; 4437 struct opthdr *optp; 4438 mib2_udpEntry_t ude; 4439 mib2_udp6Entry_t ude6; 4440 mib2_transportMLPEntry_t mlp; 4441 int state; 4442 zoneid_t zoneid; 4443 int i; 4444 connf_t *connfp; 4445 conn_t *connp = Q_TO_CONN(q); 4446 int v4_conn_idx; 4447 int v6_conn_idx; 4448 boolean_t needattr; 4449 udp_t *udp; 4450 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4451 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4452 mblk_t *mp2ctl; 4453 4454 /* 4455 * make a copy of the original message 4456 */ 4457 mp2ctl = copymsg(mpctl); 4458 4459 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4460 if (mpctl == NULL || 4461 (mpdata = mpctl->b_cont) == NULL || 4462 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4463 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4464 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4465 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4466 freemsg(mp_conn_ctl); 4467 freemsg(mp_attr_ctl); 4468 freemsg(mp6_conn_ctl); 4469 freemsg(mpctl); 4470 freemsg(mp2ctl); 4471 return (0); 4472 } 4473 4474 zoneid = connp->conn_zoneid; 4475 4476 /* fixed length structure for IPv4 and IPv6 counters */ 4477 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4478 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4479 /* synchronize 64- and 32-bit counters */ 4480 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4481 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4482 4483 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4484 optp->level = MIB2_UDP; 4485 optp->name = 0; 4486 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4487 sizeof (us->us_udp_mib)); 4488 optp->len = msgdsize(mpdata); 4489 qreply(q, mpctl); 4490 4491 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4492 v4_conn_idx = v6_conn_idx = 0; 4493 4494 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4495 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4496 connp = NULL; 4497 4498 while ((connp = ipcl_get_next_conn(connfp, connp, 4499 IPCL_UDPCONN))) { 4500 udp = connp->conn_udp; 4501 if (zoneid != connp->conn_zoneid) 4502 continue; 4503 4504 /* 4505 * Note that the port numbers are sent in 4506 * host byte order 4507 */ 4508 4509 if (udp->udp_state == TS_UNBND) 4510 state = MIB2_UDP_unbound; 4511 else if (udp->udp_state == TS_IDLE) 4512 state = MIB2_UDP_idle; 4513 else if (udp->udp_state == TS_DATA_XFER) 4514 state = MIB2_UDP_connected; 4515 else 4516 state = MIB2_UDP_unknown; 4517 4518 needattr = B_FALSE; 4519 bzero(&mlp, sizeof (mlp)); 4520 if (connp->conn_mlp_type != mlptSingle) { 4521 if (connp->conn_mlp_type == mlptShared || 4522 connp->conn_mlp_type == mlptBoth) 4523 mlp.tme_flags |= MIB2_TMEF_SHARED; 4524 if (connp->conn_mlp_type == mlptPrivate || 4525 connp->conn_mlp_type == mlptBoth) 4526 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4527 needattr = B_TRUE; 4528 } 4529 4530 /* 4531 * Create an IPv4 table entry for IPv4 entries and also 4532 * any IPv6 entries which are bound to in6addr_any 4533 * (i.e. anything a IPv4 peer could connect/send to). 4534 */ 4535 if (udp->udp_ipversion == IPV4_VERSION || 4536 (udp->udp_state <= TS_IDLE && 4537 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4538 ude.udpEntryInfo.ue_state = state; 4539 /* 4540 * If in6addr_any this will set it to 4541 * INADDR_ANY 4542 */ 4543 ude.udpLocalAddress = 4544 V4_PART_OF_V6(udp->udp_v6src); 4545 ude.udpLocalPort = ntohs(udp->udp_port); 4546 if (udp->udp_state == TS_DATA_XFER) { 4547 /* 4548 * Can potentially get here for 4549 * v6 socket if another process 4550 * (say, ping) has just done a 4551 * sendto(), changing the state 4552 * from the TS_IDLE above to 4553 * TS_DATA_XFER by the time we hit 4554 * this part of the code. 4555 */ 4556 ude.udpEntryInfo.ue_RemoteAddress = 4557 V4_PART_OF_V6(udp->udp_v6dst); 4558 ude.udpEntryInfo.ue_RemotePort = 4559 ntohs(udp->udp_dstport); 4560 } else { 4561 ude.udpEntryInfo.ue_RemoteAddress = 0; 4562 ude.udpEntryInfo.ue_RemotePort = 0; 4563 } 4564 4565 /* 4566 * We make the assumption that all udp_t 4567 * structs will be created within an address 4568 * region no larger than 32-bits. 4569 */ 4570 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4571 ude.udpCreationProcess = 4572 (udp->udp_open_pid < 0) ? 4573 MIB2_UNKNOWN_PROCESS : 4574 udp->udp_open_pid; 4575 ude.udpCreationTime = udp->udp_open_time; 4576 4577 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4578 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4579 mlp.tme_connidx = v4_conn_idx++; 4580 if (needattr) 4581 (void) snmp_append_data2( 4582 mp_attr_ctl->b_cont, &mp_attr_tail, 4583 (char *)&mlp, sizeof (mlp)); 4584 } 4585 if (udp->udp_ipversion == IPV6_VERSION) { 4586 ude6.udp6EntryInfo.ue_state = state; 4587 ude6.udp6LocalAddress = udp->udp_v6src; 4588 ude6.udp6LocalPort = ntohs(udp->udp_port); 4589 ude6.udp6IfIndex = udp->udp_bound_if; 4590 if (udp->udp_state == TS_DATA_XFER) { 4591 ude6.udp6EntryInfo.ue_RemoteAddress = 4592 udp->udp_v6dst; 4593 ude6.udp6EntryInfo.ue_RemotePort = 4594 ntohs(udp->udp_dstport); 4595 } else { 4596 ude6.udp6EntryInfo.ue_RemoteAddress = 4597 sin6_null.sin6_addr; 4598 ude6.udp6EntryInfo.ue_RemotePort = 0; 4599 } 4600 /* 4601 * We make the assumption that all udp_t 4602 * structs will be created within an address 4603 * region no larger than 32-bits. 4604 */ 4605 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4606 ude6.udp6CreationProcess = 4607 (udp->udp_open_pid < 0) ? 4608 MIB2_UNKNOWN_PROCESS : 4609 udp->udp_open_pid; 4610 ude6.udp6CreationTime = udp->udp_open_time; 4611 4612 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4613 &mp6_conn_tail, (char *)&ude6, 4614 sizeof (ude6)); 4615 mlp.tme_connidx = v6_conn_idx++; 4616 if (needattr) 4617 (void) snmp_append_data2( 4618 mp6_attr_ctl->b_cont, 4619 &mp6_attr_tail, (char *)&mlp, 4620 sizeof (mlp)); 4621 } 4622 } 4623 } 4624 4625 /* IPv4 UDP endpoints */ 4626 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4627 sizeof (struct T_optmgmt_ack)]; 4628 optp->level = MIB2_UDP; 4629 optp->name = MIB2_UDP_ENTRY; 4630 optp->len = msgdsize(mp_conn_ctl->b_cont); 4631 qreply(q, mp_conn_ctl); 4632 4633 /* table of MLP attributes... */ 4634 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4635 sizeof (struct T_optmgmt_ack)]; 4636 optp->level = MIB2_UDP; 4637 optp->name = EXPER_XPORT_MLP; 4638 optp->len = msgdsize(mp_attr_ctl->b_cont); 4639 if (optp->len == 0) 4640 freemsg(mp_attr_ctl); 4641 else 4642 qreply(q, mp_attr_ctl); 4643 4644 /* IPv6 UDP endpoints */ 4645 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4646 sizeof (struct T_optmgmt_ack)]; 4647 optp->level = MIB2_UDP6; 4648 optp->name = MIB2_UDP6_ENTRY; 4649 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4650 qreply(q, mp6_conn_ctl); 4651 4652 /* table of MLP attributes... */ 4653 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4654 sizeof (struct T_optmgmt_ack)]; 4655 optp->level = MIB2_UDP6; 4656 optp->name = EXPER_XPORT_MLP; 4657 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4658 if (optp->len == 0) 4659 freemsg(mp6_attr_ctl); 4660 else 4661 qreply(q, mp6_attr_ctl); 4662 4663 return (mp2ctl); 4664 } 4665 4666 /* 4667 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4668 * NOTE: Per MIB-II, UDP has no writable data. 4669 * TODO: If this ever actually tries to set anything, it needs to be 4670 * to do the appropriate locking. 4671 */ 4672 /* ARGSUSED */ 4673 int 4674 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4675 uchar_t *ptr, int len) 4676 { 4677 switch (level) { 4678 case MIB2_UDP: 4679 return (0); 4680 default: 4681 return (1); 4682 } 4683 } 4684 4685 /* 4686 * This routine creates a T_UDERROR_IND message and passes it upstream. 4687 * The address and options are copied from the T_UNITDATA_REQ message 4688 * passed in mp. This message is freed. 4689 */ 4690 static void 4691 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4692 t_scalar_t err) 4693 { 4694 struct T_unitdata_req *tudr; 4695 mblk_t *mp1; 4696 uchar_t *optaddr; 4697 t_scalar_t optlen; 4698 4699 if (DB_TYPE(mp) == M_DATA) { 4700 ASSERT(destaddr != NULL && destlen != 0); 4701 optaddr = NULL; 4702 optlen = 0; 4703 } else { 4704 if ((mp->b_wptr < mp->b_rptr) || 4705 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4706 goto done; 4707 } 4708 tudr = (struct T_unitdata_req *)mp->b_rptr; 4709 destaddr = mp->b_rptr + tudr->DEST_offset; 4710 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4711 destaddr + tudr->DEST_length < mp->b_rptr || 4712 destaddr + tudr->DEST_length > mp->b_wptr) { 4713 goto done; 4714 } 4715 optaddr = mp->b_rptr + tudr->OPT_offset; 4716 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4717 optaddr + tudr->OPT_length < mp->b_rptr || 4718 optaddr + tudr->OPT_length > mp->b_wptr) { 4719 goto done; 4720 } 4721 destlen = tudr->DEST_length; 4722 optlen = tudr->OPT_length; 4723 } 4724 4725 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4726 (char *)optaddr, optlen, err); 4727 if (mp1 != NULL) 4728 qreply(q, mp1); 4729 4730 done: 4731 freemsg(mp); 4732 } 4733 4734 /* 4735 * This routine removes a port number association from a stream. It 4736 * is called by udp_wput to handle T_UNBIND_REQ messages. 4737 */ 4738 static void 4739 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4740 { 4741 conn_t *connp = Q_TO_CONN(q); 4742 int error; 4743 4744 error = udp_do_unbind(connp); 4745 if (error) { 4746 if (error < 0) 4747 udp_err_ack(q, mp, -error, 0); 4748 else 4749 udp_err_ack(q, mp, TSYSERR, error); 4750 return; 4751 } 4752 4753 mp = mi_tpi_ok_ack_alloc(mp); 4754 ASSERT(mp != NULL); 4755 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4756 qreply(q, mp); 4757 } 4758 4759 /* 4760 * Don't let port fall into the privileged range. 4761 * Since the extra privileged ports can be arbitrary we also 4762 * ensure that we exclude those from consideration. 4763 * us->us_epriv_ports is not sorted thus we loop over it until 4764 * there are no changes. 4765 */ 4766 static in_port_t 4767 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4768 { 4769 int i; 4770 in_port_t nextport; 4771 boolean_t restart = B_FALSE; 4772 udp_stack_t *us = udp->udp_us; 4773 4774 if (random && udp_random_anon_port != 0) { 4775 (void) random_get_pseudo_bytes((uint8_t *)&port, 4776 sizeof (in_port_t)); 4777 /* 4778 * Unless changed by a sys admin, the smallest anon port 4779 * is 32768 and the largest anon port is 65535. It is 4780 * very likely (50%) for the random port to be smaller 4781 * than the smallest anon port. When that happens, 4782 * add port % (anon port range) to the smallest anon 4783 * port to get the random port. It should fall into the 4784 * valid anon port range. 4785 */ 4786 if (port < us->us_smallest_anon_port) { 4787 port = us->us_smallest_anon_port + 4788 port % (us->us_largest_anon_port - 4789 us->us_smallest_anon_port); 4790 } 4791 } 4792 4793 retry: 4794 if (port < us->us_smallest_anon_port) 4795 port = us->us_smallest_anon_port; 4796 4797 if (port > us->us_largest_anon_port) { 4798 port = us->us_smallest_anon_port; 4799 if (restart) 4800 return (0); 4801 restart = B_TRUE; 4802 } 4803 4804 if (port < us->us_smallest_nonpriv_port) 4805 port = us->us_smallest_nonpriv_port; 4806 4807 for (i = 0; i < us->us_num_epriv_ports; i++) { 4808 if (port == us->us_epriv_ports[i]) { 4809 port++; 4810 /* 4811 * Make sure that the port is in the 4812 * valid range. 4813 */ 4814 goto retry; 4815 } 4816 } 4817 4818 if (is_system_labeled() && 4819 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4820 port, IPPROTO_UDP, B_TRUE)) != 0) { 4821 port = nextport; 4822 goto retry; 4823 } 4824 4825 return (port); 4826 } 4827 4828 static int 4829 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst, 4830 boolean_t *update_lastdst) 4831 { 4832 int err; 4833 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4834 udp_t *udp = Q_TO_UDP(wq); 4835 udp_stack_t *us = udp->udp_us; 4836 cred_t *cr; 4837 4838 /* 4839 * All Solaris components should pass a db_credp 4840 * for this message, hence we ASSERT. 4841 * On production kernels we return an error to be robust against 4842 * random streams modules sitting on top of us. 4843 */ 4844 cr = msg_getcred(mp, NULL); 4845 ASSERT(cr != NULL); 4846 if (cr == NULL) 4847 return (EINVAL); 4848 4849 /* Note that we use the cred/label from the message to handle MLP */ 4850 err = tsol_compute_label(cr, dst, 4851 opt_storage, udp->udp_connp->conn_mac_exempt, 4852 us->us_netstack->netstack_ip); 4853 if (err == 0) { 4854 err = tsol_update_options(&udp->udp_ip_snd_options, 4855 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4856 opt_storage); 4857 } 4858 if (err != 0) { 4859 DTRACE_PROBE4( 4860 tx__ip__log__info__updatelabel__udp, 4861 char *, "queue(1) failed to update options(2) on mp(3)", 4862 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4863 } else { 4864 *update_lastdst = B_TRUE; 4865 } 4866 return (err); 4867 } 4868 4869 static mblk_t * 4870 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4871 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4872 cred_t *cr, pid_t pid) 4873 { 4874 udp_t *udp = connp->conn_udp; 4875 mblk_t *mp1 = mp; 4876 mblk_t *mp2; 4877 ipha_t *ipha; 4878 int ip_hdr_length; 4879 uint32_t ip_len; 4880 udpha_t *udpha; 4881 boolean_t lock_held = B_FALSE; 4882 in_port_t uha_src_port; 4883 udpattrs_t attrs; 4884 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4885 uint32_t ip_snd_opt_len = 0; 4886 ip4_pkt_t pktinfo; 4887 ip4_pkt_t *pktinfop = &pktinfo; 4888 ip_opt_info_t optinfo; 4889 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4890 udp_stack_t *us = udp->udp_us; 4891 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4892 queue_t *q = connp->conn_wq; 4893 ire_t *ire; 4894 in6_addr_t v6dst; 4895 boolean_t update_lastdst = B_FALSE; 4896 4897 *error = 0; 4898 pktinfop->ip4_ill_index = 0; 4899 pktinfop->ip4_addr = INADDR_ANY; 4900 optinfo.ip_opt_flags = 0; 4901 optinfo.ip_opt_ill_index = 0; 4902 4903 if (v4dst == INADDR_ANY) 4904 v4dst = htonl(INADDR_LOOPBACK); 4905 4906 /* 4907 * If options passed in, feed it for verification and handling 4908 */ 4909 attrs.udpattr_credset = B_FALSE; 4910 if (IPCL_IS_NONSTR(connp)) { 4911 if (msg->msg_controllen != 0) { 4912 attrs.udpattr_ipp4 = pktinfop; 4913 attrs.udpattr_mb = mp; 4914 4915 rw_enter(&udp->udp_rwlock, RW_WRITER); 4916 *error = process_auxiliary_options(connp, 4917 msg->msg_control, msg->msg_controllen, 4918 &attrs, &udp_opt_obj, udp_opt_set, cr); 4919 rw_exit(&udp->udp_rwlock); 4920 if (*error) 4921 goto done; 4922 } 4923 } else { 4924 if (DB_TYPE(mp) != M_DATA) { 4925 mp1 = mp->b_cont; 4926 if (((struct T_unitdata_req *) 4927 mp->b_rptr)->OPT_length != 0) { 4928 attrs.udpattr_ipp4 = pktinfop; 4929 attrs.udpattr_mb = mp; 4930 if (udp_unitdata_opt_process(q, mp, error, 4931 &attrs) < 0) 4932 goto done; 4933 /* 4934 * Note: success in processing options. 4935 * mp option buffer represented by 4936 * OPT_length/offset now potentially modified 4937 * and contain option setting results 4938 */ 4939 ASSERT(*error == 0); 4940 } 4941 } 4942 } 4943 4944 /* mp1 points to the M_DATA mblk carrying the packet */ 4945 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 4946 4947 /* 4948 * Determine whether we need to mark the mblk with the user's 4949 * credentials. 4950 * If labeled then sockfs would have already done this. 4951 */ 4952 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 4953 4954 ire = connp->conn_ire_cache; 4955 if (CLASSD(v4dst) || (ire == NULL) || (ire->ire_addr != v4dst) || 4956 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 4957 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 4958 mblk_setcred(mp, cr, pid); 4959 } 4960 4961 rw_enter(&udp->udp_rwlock, RW_READER); 4962 lock_held = B_TRUE; 4963 4964 /* 4965 * Cluster and TSOL note: 4966 * udp.udp_v6lastdst is shared by Cluster and TSOL 4967 * udp.udp_lastdstport is used by Cluster 4968 * 4969 * Both Cluster and TSOL need to update the dest addr and/or port. 4970 * Updating is done after both Cluster and TSOL checks, protected 4971 * by conn_lock. 4972 */ 4973 mutex_enter(&connp->conn_lock); 4974 4975 if (cl_inet_connect2 != NULL && 4976 (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 4977 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 4978 udp->udp_lastdstport != port)) { 4979 mutex_exit(&connp->conn_lock); 4980 *error = 0; 4981 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 4982 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, port, *error); 4983 if (*error != 0) { 4984 *error = EHOSTUNREACH; 4985 goto done; 4986 } 4987 update_lastdst = B_TRUE; 4988 mutex_enter(&connp->conn_lock); 4989 } 4990 4991 /* 4992 * Check if our saved options are valid; update if not. 4993 * TSOL Note: Since we are not in WRITER mode, UDP packets 4994 * to different destination may require different labels, 4995 * or worse, UDP packets to same IP address may require 4996 * different labels due to use of shared all-zones address. 4997 * We use conn_lock to ensure that lastdst, ip_snd_options, 4998 * and ip_snd_options_len are consistent for the current 4999 * destination and are updated atomically. 5000 */ 5001 if (is_system_labeled()) { 5002 /* Using UDP MLP requires SCM_UCRED from user */ 5003 if (connp->conn_mlp_type != mlptSingle && 5004 !attrs.udpattr_credset) { 5005 mutex_exit(&connp->conn_lock); 5006 DTRACE_PROBE4( 5007 tx__ip__log__info__output__udp, 5008 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5009 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5010 *error = ECONNREFUSED; 5011 goto done; 5012 } 5013 /* 5014 * update label option for this UDP socket if 5015 * - the destination has changed, or 5016 * - the UDP socket is MLP 5017 */ 5018 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5019 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5020 connp->conn_mlp_type != mlptSingle) && 5021 (*error = udp_update_label(q, mp, v4dst, &update_lastdst)) 5022 != 0) { 5023 mutex_exit(&connp->conn_lock); 5024 goto done; 5025 } 5026 } 5027 if (update_lastdst) { 5028 IN6_IPADDR_TO_V4MAPPED(v4dst, &udp->udp_v6lastdst); 5029 udp->udp_lastdstport = port; 5030 } 5031 if (udp->udp_ip_snd_options_len > 0) { 5032 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5033 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5034 } 5035 mutex_exit(&connp->conn_lock); 5036 5037 /* Add an IP header */ 5038 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5039 (insert_spi ? sizeof (uint32_t) : 0); 5040 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5041 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5042 !OK_32PTR(ipha)) { 5043 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5044 if (mp2 == NULL) { 5045 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5046 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5047 *error = ENOMEM; 5048 goto done; 5049 } 5050 mp2->b_wptr = DB_LIM(mp2); 5051 mp2->b_cont = mp1; 5052 mp1 = mp2; 5053 if (DB_TYPE(mp) != M_DATA) 5054 mp->b_cont = mp1; 5055 else 5056 mp = mp1; 5057 5058 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5059 } 5060 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5061 #ifdef _BIG_ENDIAN 5062 /* Set version, header length, and tos */ 5063 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5064 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5065 udp->udp_type_of_service); 5066 /* Set ttl and protocol */ 5067 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5068 #else 5069 /* Set version, header length, and tos */ 5070 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5071 ((udp->udp_type_of_service << 8) | 5072 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5073 /* Set ttl and protocol */ 5074 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5075 #endif 5076 if (pktinfop->ip4_addr != INADDR_ANY) { 5077 ipha->ipha_src = pktinfop->ip4_addr; 5078 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5079 } else { 5080 /* 5081 * Copy our address into the packet. If this is zero, 5082 * first look at __sin6_src_id for a hint. If we leave the 5083 * source as INADDR_ANY then ip will fill in the real source 5084 * address. 5085 */ 5086 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5087 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5088 in6_addr_t v6src; 5089 5090 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5091 us->us_netstack); 5092 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5093 } 5094 } 5095 uha_src_port = udp->udp_port; 5096 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5097 rw_exit(&udp->udp_rwlock); 5098 lock_held = B_FALSE; 5099 } 5100 5101 if (pktinfop->ip4_ill_index != 0) { 5102 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5103 } 5104 5105 ipha->ipha_fragment_offset_and_flags = 0; 5106 ipha->ipha_ident = 0; 5107 5108 mp1->b_rptr = (uchar_t *)ipha; 5109 5110 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5111 (uintptr_t)UINT_MAX); 5112 5113 /* Determine length of packet */ 5114 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5115 if ((mp2 = mp1->b_cont) != NULL) { 5116 do { 5117 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5118 ip_len += (uint32_t)MBLKL(mp2); 5119 } while ((mp2 = mp2->b_cont) != NULL); 5120 } 5121 /* 5122 * If the size of the packet is greater than the maximum allowed by 5123 * ip, return an error. Passing this down could cause panics because 5124 * the size will have wrapped and be inconsistent with the msg size. 5125 */ 5126 if (ip_len > IP_MAXPACKET) { 5127 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5128 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5129 *error = EMSGSIZE; 5130 goto done; 5131 } 5132 ipha->ipha_length = htons((uint16_t)ip_len); 5133 ip_len -= ip_hdr_length; 5134 ip_len = htons((uint16_t)ip_len); 5135 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5136 5137 /* Insert all-0s SPI now. */ 5138 if (insert_spi) 5139 *((uint32_t *)(udpha + 1)) = 0; 5140 5141 /* 5142 * Copy in the destination address 5143 */ 5144 ipha->ipha_dst = v4dst; 5145 5146 /* 5147 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5148 */ 5149 if (CLASSD(v4dst)) 5150 ipha->ipha_ttl = udp->udp_multicast_ttl; 5151 5152 udpha->uha_dst_port = port; 5153 udpha->uha_src_port = uha_src_port; 5154 5155 if (ip_snd_opt_len > 0) { 5156 uint32_t cksum; 5157 5158 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5159 lock_held = B_FALSE; 5160 rw_exit(&udp->udp_rwlock); 5161 /* 5162 * Massage source route putting first source route in ipha_dst. 5163 * Ignore the destination in T_unitdata_req. 5164 * Create a checksum adjustment for a source route, if any. 5165 */ 5166 cksum = ip_massage_options(ipha, us->us_netstack); 5167 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5168 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5169 (ipha->ipha_dst & 0xFFFF); 5170 if ((int)cksum < 0) 5171 cksum--; 5172 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5173 /* 5174 * IP does the checksum if uha_checksum is non-zero, 5175 * We make it easy for IP to include our pseudo header 5176 * by putting our length in uha_checksum. 5177 */ 5178 cksum += ip_len; 5179 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5180 /* There might be a carry. */ 5181 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5182 #ifdef _LITTLE_ENDIAN 5183 if (us->us_do_checksum) 5184 ip_len = (cksum << 16) | ip_len; 5185 #else 5186 if (us->us_do_checksum) 5187 ip_len = (ip_len << 16) | cksum; 5188 else 5189 ip_len <<= 16; 5190 #endif 5191 } else { 5192 /* 5193 * IP does the checksum if uha_checksum is non-zero, 5194 * We make it easy for IP to include our pseudo header 5195 * by putting our length in uha_checksum. 5196 */ 5197 if (us->us_do_checksum) 5198 ip_len |= (ip_len << 16); 5199 #ifndef _LITTLE_ENDIAN 5200 else 5201 ip_len <<= 16; 5202 #endif 5203 } 5204 ASSERT(!lock_held); 5205 /* Set UDP length and checksum */ 5206 *((uint32_t *)&udpha->uha_length) = ip_len; 5207 5208 if (DB_TYPE(mp) != M_DATA) { 5209 cred_t *cr; 5210 pid_t cpid; 5211 5212 /* Move any cred from the T_UNITDATA_REQ to the packet */ 5213 cr = msg_extractcred(mp, &cpid); 5214 if (cr != NULL) { 5215 if (mp1->b_datap->db_credp != NULL) 5216 crfree(mp1->b_datap->db_credp); 5217 mp1->b_datap->db_credp = cr; 5218 mp1->b_datap->db_cpid = cpid; 5219 } 5220 ASSERT(mp != mp1); 5221 freeb(mp); 5222 } 5223 5224 /* mp has been consumed and we'll return success */ 5225 ASSERT(*error == 0); 5226 mp = NULL; 5227 5228 /* We're done. Pass the packet to ip. */ 5229 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5230 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5231 "udp_wput_end: q %p (%S)", q, "end"); 5232 5233 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5234 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5235 connp->conn_dontroute || 5236 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5237 optinfo.ip_opt_ill_index != 0 || 5238 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5239 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5240 ipst->ips_ip_g_mrouter != NULL) { 5241 UDP_STAT(us, udp_ip_send); 5242 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5243 &optinfo); 5244 } else { 5245 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5246 } 5247 5248 done: 5249 if (lock_held) 5250 rw_exit(&udp->udp_rwlock); 5251 if (*error != 0) { 5252 ASSERT(mp != NULL); 5253 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5254 } 5255 return (mp); 5256 } 5257 5258 static void 5259 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5260 { 5261 conn_t *connp = udp->udp_connp; 5262 ipaddr_t src, dst; 5263 ire_t *ire; 5264 ipif_t *ipif = NULL; 5265 mblk_t *ire_fp_mp; 5266 boolean_t retry_caching; 5267 udp_stack_t *us = udp->udp_us; 5268 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5269 5270 dst = ipha->ipha_dst; 5271 src = ipha->ipha_src; 5272 ASSERT(ipha->ipha_ident == 0); 5273 5274 if (CLASSD(dst)) { 5275 int err; 5276 5277 ipif = conn_get_held_ipif(connp, 5278 &connp->conn_multicast_ipif, &err); 5279 5280 if (ipif == NULL || ipif->ipif_isv6 || 5281 (ipif->ipif_ill->ill_phyint->phyint_flags & 5282 PHYI_LOOPBACK)) { 5283 if (ipif != NULL) 5284 ipif_refrele(ipif); 5285 UDP_STAT(us, udp_ip_send); 5286 ip_output(connp, mp, q, IP_WPUT); 5287 return; 5288 } 5289 } 5290 5291 retry_caching = B_FALSE; 5292 mutex_enter(&connp->conn_lock); 5293 ire = connp->conn_ire_cache; 5294 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5295 5296 if (ire == NULL || ire->ire_addr != dst || 5297 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5298 retry_caching = B_TRUE; 5299 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5300 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5301 5302 ASSERT(ipif != NULL); 5303 if (!IS_ON_SAME_LAN(stq_ill, ipif->ipif_ill)) 5304 retry_caching = B_TRUE; 5305 } 5306 5307 if (!retry_caching) { 5308 ASSERT(ire != NULL); 5309 IRE_REFHOLD(ire); 5310 mutex_exit(&connp->conn_lock); 5311 } else { 5312 boolean_t cached = B_FALSE; 5313 5314 connp->conn_ire_cache = NULL; 5315 mutex_exit(&connp->conn_lock); 5316 5317 /* Release the old ire */ 5318 if (ire != NULL) { 5319 IRE_REFRELE_NOTR(ire); 5320 ire = NULL; 5321 } 5322 5323 if (CLASSD(dst)) { 5324 ASSERT(ipif != NULL); 5325 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5326 connp->conn_zoneid, msg_getlabel(mp), 5327 MATCH_IRE_ILL, ipst); 5328 } else { 5329 ASSERT(ipif == NULL); 5330 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5331 msg_getlabel(mp), ipst); 5332 } 5333 5334 if (ire == NULL) { 5335 if (ipif != NULL) 5336 ipif_refrele(ipif); 5337 UDP_STAT(us, udp_ire_null); 5338 ip_output(connp, mp, q, IP_WPUT); 5339 return; 5340 } 5341 IRE_REFHOLD_NOTR(ire); 5342 5343 mutex_enter(&connp->conn_lock); 5344 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5345 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5346 irb_t *irb = ire->ire_bucket; 5347 5348 /* 5349 * IRE's created for non-connection oriented transports 5350 * are normally initialized with IRE_MARK_TEMPORARY set 5351 * in the ire_marks. These IRE's are preferentially 5352 * reaped when the hash chain length in the cache 5353 * bucket exceeds the maximum value specified in 5354 * ip[6]_ire_max_bucket_cnt. This can severely affect 5355 * UDP performance if IRE cache entries that we need 5356 * to reuse are continually removed. To remedy this, 5357 * when we cache the IRE in the conn_t, we remove the 5358 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5359 * set. 5360 */ 5361 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5362 rw_enter(&irb->irb_lock, RW_WRITER); 5363 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5364 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5365 irb->irb_tmp_ire_cnt--; 5366 } 5367 rw_exit(&irb->irb_lock); 5368 } 5369 connp->conn_ire_cache = ire; 5370 cached = B_TRUE; 5371 } 5372 mutex_exit(&connp->conn_lock); 5373 5374 /* 5375 * We can continue to use the ire but since it was not 5376 * cached, we should drop the extra reference. 5377 */ 5378 if (!cached) 5379 IRE_REFRELE_NOTR(ire); 5380 } 5381 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5382 ASSERT(!CLASSD(dst) || ipif != NULL); 5383 5384 /* 5385 * Check if we can take the fast-path. 5386 * Note that "incomplete" ire's (where the link-layer for next hop 5387 * is not resolved, or where the fast-path header in nce_fp_mp is not 5388 * available yet) are sent down the legacy (slow) path 5389 */ 5390 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5391 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5392 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5393 ((ire->ire_nce == NULL) || 5394 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5395 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5396 if (ipif != NULL) 5397 ipif_refrele(ipif); 5398 UDP_STAT(us, udp_ip_ire_send); 5399 IRE_REFRELE(ire); 5400 ip_output(connp, mp, q, IP_WPUT); 5401 return; 5402 } 5403 5404 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5405 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5406 ipha->ipha_src = ipif->ipif_src_addr; 5407 else 5408 ipha->ipha_src = ire->ire_src_addr; 5409 } 5410 5411 if (ipif != NULL) 5412 ipif_refrele(ipif); 5413 5414 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5415 } 5416 5417 static void 5418 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5419 { 5420 ipaddr_t src, dst; 5421 ill_t *ill; 5422 mblk_t *ire_fp_mp; 5423 uint_t ire_fp_mp_len; 5424 uint16_t *up; 5425 uint32_t cksum, hcksum_txflags; 5426 queue_t *dev_q; 5427 udp_t *udp = connp->conn_udp; 5428 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5429 udp_stack_t *us = udp->udp_us; 5430 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5431 boolean_t ll_multicast = B_FALSE; 5432 boolean_t direct_send; 5433 5434 dev_q = ire->ire_stq->q_next; 5435 ASSERT(dev_q != NULL); 5436 5437 ill = ire_to_ill(ire); 5438 ASSERT(ill != NULL); 5439 5440 /* 5441 * For the direct send case, if resetting of conn_direct_blocked 5442 * was missed, it is still ok because the putq() would enable 5443 * the queue and write service will drain it out. 5444 */ 5445 direct_send = ILL_DIRECT_CAPABLE(ill); 5446 5447 /* is queue flow controlled? */ 5448 if ((!direct_send) && (q->q_first != NULL || connp->conn_draining || 5449 DEV_Q_FLOW_BLOCKED(dev_q))) { 5450 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5451 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5452 if (ipst->ips_ip_output_queue) { 5453 DTRACE_PROBE1(udp__xmit__putq, conn_t *, connp); 5454 (void) putq(connp->conn_wq, mp); 5455 } else { 5456 freemsg(mp); 5457 } 5458 ire_refrele(ire); 5459 return; 5460 } 5461 5462 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5463 ire_fp_mp_len = MBLKL(ire_fp_mp); 5464 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5465 5466 dst = ipha->ipha_dst; 5467 src = ipha->ipha_src; 5468 5469 5470 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5471 5472 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5473 #ifndef _BIG_ENDIAN 5474 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5475 #endif 5476 5477 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5478 ASSERT(ill->ill_hcksum_capab != NULL); 5479 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5480 } else { 5481 hcksum_txflags = 0; 5482 } 5483 5484 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5485 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5486 5487 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5488 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5489 if (*up != 0) { 5490 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5491 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5492 ntohs(ipha->ipha_length), cksum); 5493 5494 /* Software checksum? */ 5495 if (DB_CKSUMFLAGS(mp) == 0) { 5496 UDP_STAT(us, udp_out_sw_cksum); 5497 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5498 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5499 } 5500 } 5501 5502 if (!CLASSD(dst)) { 5503 ipha->ipha_fragment_offset_and_flags |= 5504 (uint32_t)htons(ire->ire_frag_flag); 5505 } 5506 5507 /* Calculate IP header checksum if hardware isn't capable */ 5508 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5509 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5510 ((uint16_t *)ipha)[4]); 5511 } 5512 5513 if (CLASSD(dst)) { 5514 if (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL) { 5515 ip_multicast_loopback(q, ill, mp, 5516 connp->conn_multicast_loop ? 0 : 5517 IP_FF_NO_MCAST_LOOP, zoneid); 5518 } 5519 5520 /* If multicast TTL is 0 then we are done */ 5521 if (ipha->ipha_ttl == 0) { 5522 freemsg(mp); 5523 ire_refrele(ire); 5524 return; 5525 } 5526 ll_multicast = B_TRUE; 5527 } 5528 5529 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5530 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5531 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5532 5533 UPDATE_OB_PKT_COUNT(ire); 5534 ire->ire_last_used_time = lbolt; 5535 5536 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5537 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5538 ntohs(ipha->ipha_length)); 5539 5540 DTRACE_PROBE4(ip4__physical__out__start, 5541 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5542 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5543 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5544 ll_multicast, ipst); 5545 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5546 if (ipst->ips_ipobs_enabled && mp != NULL) { 5547 zoneid_t szone; 5548 5549 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5550 ipst, ALL_ZONES); 5551 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5552 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5553 } 5554 5555 if (mp == NULL) 5556 goto bail; 5557 5558 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5559 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5560 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5561 5562 if (direct_send) { 5563 uintptr_t cookie; 5564 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5565 5566 cookie = idd->idd_tx_df(idd->idd_tx_dh, mp, 5567 (uintptr_t)connp, 0); 5568 if (cookie != NULL) { 5569 idl_tx_list_t *idl_txl; 5570 5571 /* 5572 * Flow controlled. 5573 */ 5574 DTRACE_PROBE2(non__null__cookie, uintptr_t, 5575 cookie, conn_t *, connp); 5576 idl_txl = &ipst->ips_idl_tx_list[IDLHASHINDEX(cookie)]; 5577 mutex_enter(&idl_txl->txl_lock); 5578 /* 5579 * Check again after holding txl_lock to see if Tx 5580 * ring is still blocked and only then insert the 5581 * connp into the drain list. 5582 */ 5583 if (connp->conn_direct_blocked || 5584 (idd->idd_tx_fctl_df(idd->idd_tx_fctl_dh, 5585 cookie) == 0)) { 5586 mutex_exit(&idl_txl->txl_lock); 5587 goto bail; 5588 } 5589 if (idl_txl->txl_cookie != NULL && 5590 idl_txl->txl_cookie != cookie) { 5591 DTRACE_PROBE2(udp__xmit__collision, 5592 uintptr_t, cookie, 5593 uintptr_t, idl_txl->txl_cookie); 5594 UDP_STAT(us, udp_cookie_coll); 5595 } else { 5596 connp->conn_direct_blocked = B_TRUE; 5597 idl_txl->txl_cookie = cookie; 5598 conn_drain_insert(connp, idl_txl); 5599 DTRACE_PROBE1(udp__xmit__insert, 5600 conn_t *, connp); 5601 } 5602 mutex_exit(&idl_txl->txl_lock); 5603 } 5604 } else { 5605 DTRACE_PROBE1(udp__xmit__putnext, mblk_t *, mp); 5606 putnext(ire->ire_stq, mp); 5607 } 5608 bail: 5609 IRE_REFRELE(ire); 5610 } 5611 5612 static boolean_t 5613 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst, 5614 boolean_t *update_lastdst) 5615 { 5616 udp_t *udp = Q_TO_UDP(wq); 5617 int err; 5618 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5619 udp_stack_t *us = udp->udp_us; 5620 cred_t *cr; 5621 5622 /* 5623 * All Solaris components should pass a db_credp 5624 * for this message, hence we ASSERT. 5625 * On production kernels we return an error to be robust against 5626 * random streams modules sitting on top of us. 5627 */ 5628 cr = msg_getcred(mp, NULL); 5629 ASSERT(cr != NULL); 5630 if (cr == NULL) 5631 return (EINVAL); 5632 5633 /* Note that we use the cred/label from the message to handle MLP */ 5634 err = tsol_compute_label_v6(cr, 5635 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5636 us->us_netstack->netstack_ip); 5637 if (err == 0) { 5638 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5639 &udp->udp_label_len_v6, opt_storage); 5640 } 5641 if (err != 0) { 5642 DTRACE_PROBE4( 5643 tx__ip__log__drop__updatelabel__udp6, 5644 char *, "queue(1) failed to update options(2) on mp(3)", 5645 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5646 } else { 5647 *update_lastdst = B_TRUE; 5648 } 5649 return (err); 5650 } 5651 5652 static int 5653 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5654 pid_t pid) 5655 { 5656 udp_t *udp = connp->conn_udp; 5657 udp_stack_t *us = udp->udp_us; 5658 ipaddr_t v4dst; 5659 in_port_t dstport; 5660 boolean_t mapped_addr; 5661 struct sockaddr_storage ss; 5662 sin_t *sin; 5663 sin6_t *sin6; 5664 struct sockaddr *addr; 5665 socklen_t addrlen; 5666 int error; 5667 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5668 5669 /* M_DATA for connected socket */ 5670 5671 ASSERT(udp->udp_issocket); 5672 UDP_DBGSTAT(us, udp_data_conn); 5673 5674 mutex_enter(&connp->conn_lock); 5675 if (udp->udp_state != TS_DATA_XFER) { 5676 mutex_exit(&connp->conn_lock); 5677 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5678 UDP_STAT(us, udp_out_err_notconn); 5679 freemsg(mp); 5680 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5681 "udp_wput_end: connp %p (%S)", connp, 5682 "not-connected; address required"); 5683 return (EDESTADDRREQ); 5684 } 5685 5686 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5687 if (mapped_addr) 5688 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5689 5690 /* Initialize addr and addrlen as if they're passed in */ 5691 if (udp->udp_family == AF_INET) { 5692 sin = (sin_t *)&ss; 5693 sin->sin_family = AF_INET; 5694 dstport = sin->sin_port = udp->udp_dstport; 5695 ASSERT(mapped_addr); 5696 sin->sin_addr.s_addr = v4dst; 5697 addr = (struct sockaddr *)sin; 5698 addrlen = sizeof (*sin); 5699 } else { 5700 sin6 = (sin6_t *)&ss; 5701 sin6->sin6_family = AF_INET6; 5702 dstport = sin6->sin6_port = udp->udp_dstport; 5703 sin6->sin6_flowinfo = udp->udp_flowinfo; 5704 sin6->sin6_addr = udp->udp_v6dst; 5705 sin6->sin6_scope_id = 0; 5706 sin6->__sin6_src_id = 0; 5707 addr = (struct sockaddr *)sin6; 5708 addrlen = sizeof (*sin6); 5709 } 5710 mutex_exit(&connp->conn_lock); 5711 5712 if (mapped_addr) { 5713 /* 5714 * Handle both AF_INET and AF_INET6; the latter 5715 * for IPV4 mapped destination addresses. Note 5716 * here that both addr and addrlen point to the 5717 * corresponding struct depending on the address 5718 * family of the socket. 5719 */ 5720 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5721 insert_spi, msg, cr, pid); 5722 } else { 5723 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5724 } 5725 if (error == 0) { 5726 ASSERT(mp == NULL); 5727 return (0); 5728 } 5729 5730 UDP_STAT(us, udp_out_err_output); 5731 ASSERT(mp != NULL); 5732 if (IPCL_IS_NONSTR(connp)) { 5733 freemsg(mp); 5734 return (error); 5735 } else { 5736 /* mp is freed by the following routine */ 5737 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5738 (t_scalar_t)addrlen, (t_scalar_t)error); 5739 return (0); 5740 } 5741 } 5742 5743 /* ARGSUSED */ 5744 static int 5745 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5746 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5747 { 5748 5749 udp_t *udp = connp->conn_udp; 5750 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5751 int error = 0; 5752 sin6_t *sin6; 5753 sin_t *sin; 5754 uint_t srcid; 5755 uint16_t port; 5756 ipaddr_t v4dst; 5757 5758 5759 ASSERT(addr != NULL); 5760 5761 switch (udp->udp_family) { 5762 case AF_INET6: 5763 sin6 = (sin6_t *)addr; 5764 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5765 /* 5766 * Destination is a non-IPv4-compatible IPv6 address. 5767 * Send out an IPv6 format packet. 5768 */ 5769 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5770 pid); 5771 if (error != 0) 5772 goto ud_error; 5773 5774 return (0); 5775 } 5776 /* 5777 * If the local address is not zero or a mapped address 5778 * return an error. It would be possible to send an IPv4 5779 * packet but the response would never make it back to the 5780 * application since it is bound to a non-mapped address. 5781 */ 5782 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5783 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5784 error = EADDRNOTAVAIL; 5785 goto ud_error; 5786 } 5787 /* Send IPv4 packet without modifying udp_ipversion */ 5788 /* Extract port and ipaddr */ 5789 port = sin6->sin6_port; 5790 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5791 srcid = sin6->__sin6_src_id; 5792 break; 5793 5794 case AF_INET: 5795 sin = (sin_t *)addr; 5796 /* Extract port and ipaddr */ 5797 port = sin->sin_port; 5798 v4dst = sin->sin_addr.s_addr; 5799 srcid = 0; 5800 break; 5801 } 5802 5803 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5804 msg, cr, pid); 5805 5806 if (error == 0) { 5807 ASSERT(mp == NULL); 5808 return (0); 5809 } 5810 5811 ud_error: 5812 ASSERT(mp != NULL); 5813 5814 return (error); 5815 } 5816 5817 /* 5818 * This routine handles all messages passed downstream. It either 5819 * consumes the message or passes it downstream; it never queues a 5820 * a message. 5821 * 5822 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5823 * is valid when we are directly beneath the stream head, and thus sockfs 5824 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5825 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5826 * connected endpoints. 5827 */ 5828 void 5829 udp_wput(queue_t *q, mblk_t *mp) 5830 { 5831 conn_t *connp = Q_TO_CONN(q); 5832 udp_t *udp = connp->conn_udp; 5833 int error = 0; 5834 struct sockaddr *addr; 5835 socklen_t addrlen; 5836 udp_stack_t *us = udp->udp_us; 5837 5838 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5839 "udp_wput_start: queue %p mp %p", q, mp); 5840 5841 /* 5842 * We directly handle several cases here: T_UNITDATA_REQ message 5843 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5844 * socket. 5845 */ 5846 switch (DB_TYPE(mp)) { 5847 case M_DATA: 5848 /* 5849 * Quick check for error cases. Checks will be done again 5850 * under the lock later on 5851 */ 5852 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5853 /* Not connected; address is required */ 5854 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5855 UDP_STAT(us, udp_out_err_notconn); 5856 freemsg(mp); 5857 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5858 "udp_wput_end: connp %p (%S)", connp, 5859 "not-connected; address required"); 5860 return; 5861 } 5862 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5863 return; 5864 5865 case M_PROTO: 5866 case M_PCPROTO: { 5867 struct T_unitdata_req *tudr; 5868 5869 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5870 tudr = (struct T_unitdata_req *)mp->b_rptr; 5871 5872 /* Handle valid T_UNITDATA_REQ here */ 5873 if (MBLKL(mp) >= sizeof (*tudr) && 5874 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5875 if (mp->b_cont == NULL) { 5876 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5877 "udp_wput_end: q %p (%S)", q, "badaddr"); 5878 error = EPROTO; 5879 goto ud_error; 5880 } 5881 5882 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5883 tudr->DEST_length)) { 5884 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5885 "udp_wput_end: q %p (%S)", q, "badaddr"); 5886 error = EADDRNOTAVAIL; 5887 goto ud_error; 5888 } 5889 /* 5890 * If a port has not been bound to the stream, fail. 5891 * This is not a problem when sockfs is directly 5892 * above us, because it will ensure that the socket 5893 * is first bound before allowing data to be sent. 5894 */ 5895 if (udp->udp_state == TS_UNBND) { 5896 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5897 "udp_wput_end: q %p (%S)", q, "outstate"); 5898 error = EPROTO; 5899 goto ud_error; 5900 } 5901 addr = (struct sockaddr *) 5902 &mp->b_rptr[tudr->DEST_offset]; 5903 addrlen = tudr->DEST_length; 5904 if (tudr->OPT_length != 0) 5905 UDP_STAT(us, udp_out_opt); 5906 break; 5907 } 5908 /* FALLTHRU */ 5909 } 5910 default: 5911 udp_wput_other(q, mp); 5912 return; 5913 } 5914 ASSERT(addr != NULL); 5915 5916 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5917 -1); 5918 if (error != 0) { 5919 ud_error: 5920 UDP_STAT(us, udp_out_err_output); 5921 ASSERT(mp != NULL); 5922 /* mp is freed by the following routine */ 5923 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5924 (t_scalar_t)error); 5925 } 5926 } 5927 5928 /* ARGSUSED */ 5929 static void 5930 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5931 { 5932 #ifdef DEBUG 5933 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5934 #endif 5935 freemsg(mp); 5936 } 5937 5938 5939 /* 5940 * udp_output_v6(): 5941 * Assumes that udp_wput did some sanity checking on the destination 5942 * address. 5943 */ 5944 static mblk_t * 5945 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5946 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5947 { 5948 ip6_t *ip6h; 5949 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5950 mblk_t *mp1 = mp; 5951 mblk_t *mp2; 5952 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5953 size_t ip_len; 5954 udpha_t *udph; 5955 udp_t *udp = connp->conn_udp; 5956 udp_stack_t *us = udp->udp_us; 5957 queue_t *q = connp->conn_wq; 5958 ip6_pkt_t ipp_s; /* For ancillary data options */ 5959 ip6_pkt_t *ipp = &ipp_s; 5960 ip6_pkt_t *tipp; /* temporary ipp */ 5961 uint32_t csum = 0; 5962 uint_t ignore = 0; 5963 uint_t option_exists = 0, is_sticky = 0; 5964 uint8_t *cp; 5965 uint8_t *nxthdr_ptr; 5966 in6_addr_t ip6_dst; 5967 in_port_t port; 5968 udpattrs_t attrs; 5969 boolean_t opt_present; 5970 ip6_hbh_t *hopoptsptr = NULL; 5971 uint_t hopoptslen = 0; 5972 boolean_t is_ancillary = B_FALSE; 5973 size_t sth_wroff = 0; 5974 ire_t *ire; 5975 boolean_t update_lastdst = B_FALSE; 5976 5977 *error = 0; 5978 5979 /* 5980 * If the local address is a mapped address return 5981 * an error. 5982 * It would be possible to send an IPv6 packet but the 5983 * response would never make it back to the application 5984 * since it is bound to a mapped address. 5985 */ 5986 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5987 *error = EADDRNOTAVAIL; 5988 goto done; 5989 } 5990 5991 ipp->ipp_fields = 0; 5992 ipp->ipp_sticky_ignored = 0; 5993 5994 /* 5995 * If TPI options passed in, feed it for verification and handling 5996 */ 5997 attrs.udpattr_credset = B_FALSE; 5998 opt_present = B_FALSE; 5999 if (IPCL_IS_NONSTR(connp)) { 6000 if (msg->msg_controllen != 0) { 6001 attrs.udpattr_ipp6 = ipp; 6002 attrs.udpattr_mb = mp; 6003 6004 rw_enter(&udp->udp_rwlock, RW_WRITER); 6005 *error = process_auxiliary_options(connp, 6006 msg->msg_control, msg->msg_controllen, 6007 &attrs, &udp_opt_obj, udp_opt_set, cr); 6008 rw_exit(&udp->udp_rwlock); 6009 if (*error) 6010 goto done; 6011 ASSERT(*error == 0); 6012 opt_present = B_TRUE; 6013 } 6014 } else { 6015 if (DB_TYPE(mp) != M_DATA) { 6016 mp1 = mp->b_cont; 6017 if (((struct T_unitdata_req *) 6018 mp->b_rptr)->OPT_length != 0) { 6019 attrs.udpattr_ipp6 = ipp; 6020 attrs.udpattr_mb = mp; 6021 if (udp_unitdata_opt_process(q, mp, error, 6022 &attrs) < 0) { 6023 goto done; 6024 } 6025 ASSERT(*error == 0); 6026 opt_present = B_TRUE; 6027 } 6028 } 6029 } 6030 6031 /* 6032 * Determine whether we need to mark the mblk with the user's 6033 * credentials. 6034 * If labeled then sockfs would have already done this. 6035 */ 6036 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 6037 ire = connp->conn_ire_cache; 6038 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || (ire == NULL) || 6039 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6040 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6041 if (cr != NULL && msg_getcred(mp, NULL) == NULL) 6042 mblk_setcred(mp, cr, pid); 6043 } 6044 6045 rw_enter(&udp->udp_rwlock, RW_READER); 6046 ignore = ipp->ipp_sticky_ignored; 6047 6048 /* mp1 points to the M_DATA mblk carrying the packet */ 6049 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6050 6051 if (sin6->sin6_scope_id != 0 && 6052 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6053 /* 6054 * IPPF_SCOPE_ID is special. It's neither a sticky 6055 * option nor ancillary data. It needs to be 6056 * explicitly set in options_exists. 6057 */ 6058 option_exists |= IPPF_SCOPE_ID; 6059 } 6060 6061 /* 6062 * Compute the destination address 6063 */ 6064 ip6_dst = sin6->sin6_addr; 6065 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6066 ip6_dst = ipv6_loopback; 6067 6068 port = sin6->sin6_port; 6069 6070 /* 6071 * Cluster and TSOL notes, Cluster check: 6072 * see comments in udp_output_v4(). 6073 */ 6074 mutex_enter(&connp->conn_lock); 6075 6076 if (cl_inet_connect2 != NULL && 6077 (!IN6_ARE_ADDR_EQUAL(&ip6_dst, &udp->udp_v6lastdst) || 6078 port != udp->udp_lastdstport)) { 6079 mutex_exit(&connp->conn_lock); 6080 *error = 0; 6081 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &ip6_dst, port, *error); 6082 if (*error != 0) { 6083 *error = EHOSTUNREACH; 6084 rw_exit(&udp->udp_rwlock); 6085 goto done; 6086 } 6087 update_lastdst = B_TRUE; 6088 mutex_enter(&connp->conn_lock); 6089 } 6090 6091 /* 6092 * If we're not going to the same destination as last time, then 6093 * recompute the label required. This is done in a separate routine to 6094 * avoid blowing up our stack here. 6095 * 6096 * TSOL Note: Since we are not in WRITER mode, UDP packets 6097 * to different destination may require different labels, 6098 * or worse, UDP packets to same IP address may require 6099 * different labels due to use of shared all-zones address. 6100 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6101 * and sticky ipp_hopoptslen are consistent for the current 6102 * destination and are updated atomically. 6103 */ 6104 if (is_system_labeled()) { 6105 /* Using UDP MLP requires SCM_UCRED from user */ 6106 if (connp->conn_mlp_type != mlptSingle && 6107 !attrs.udpattr_credset) { 6108 DTRACE_PROBE4( 6109 tx__ip__log__info__output__udp6, 6110 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6111 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6112 *error = ECONNREFUSED; 6113 rw_exit(&udp->udp_rwlock); 6114 mutex_exit(&connp->conn_lock); 6115 goto done; 6116 } 6117 /* 6118 * update label option for this UDP socket if 6119 * - the destination has changed, or 6120 * - the UDP socket is MLP 6121 */ 6122 if ((opt_present || 6123 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6124 connp->conn_mlp_type != mlptSingle) && 6125 (*error = udp_update_label_v6(q, mp, &ip6_dst, 6126 &update_lastdst)) != 0) { 6127 rw_exit(&udp->udp_rwlock); 6128 mutex_exit(&connp->conn_lock); 6129 goto done; 6130 } 6131 } 6132 6133 if (update_lastdst) { 6134 udp->udp_v6lastdst = ip6_dst; 6135 udp->udp_lastdstport = port; 6136 } 6137 6138 /* 6139 * If there's a security label here, then we ignore any options the 6140 * user may try to set. We keep the peer's label as a hidden sticky 6141 * option. We make a private copy of this label before releasing the 6142 * lock so that label is kept consistent with the destination addr. 6143 */ 6144 if (udp->udp_label_len_v6 > 0) { 6145 ignore &= ~IPPF_HOPOPTS; 6146 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6147 } 6148 6149 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6150 /* No sticky options nor ancillary data. */ 6151 mutex_exit(&connp->conn_lock); 6152 goto no_options; 6153 } 6154 6155 /* 6156 * Go through the options figuring out where each is going to 6157 * come from and build two masks. The first mask indicates if 6158 * the option exists at all. The second mask indicates if the 6159 * option is sticky or ancillary. 6160 */ 6161 if (!(ignore & IPPF_HOPOPTS)) { 6162 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6163 option_exists |= IPPF_HOPOPTS; 6164 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6165 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6166 option_exists |= IPPF_HOPOPTS; 6167 is_sticky |= IPPF_HOPOPTS; 6168 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6169 hopoptsptr = kmem_alloc( 6170 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6171 if (hopoptsptr == NULL) { 6172 *error = ENOMEM; 6173 mutex_exit(&connp->conn_lock); 6174 goto done; 6175 } 6176 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6177 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6178 hopoptslen); 6179 udp_ip_hdr_len += hopoptslen; 6180 } 6181 } 6182 mutex_exit(&connp->conn_lock); 6183 6184 if (!(ignore & IPPF_RTHDR)) { 6185 if (ipp->ipp_fields & IPPF_RTHDR) { 6186 option_exists |= IPPF_RTHDR; 6187 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6188 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6189 option_exists |= IPPF_RTHDR; 6190 is_sticky |= IPPF_RTHDR; 6191 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6192 } 6193 } 6194 6195 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6196 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6197 option_exists |= IPPF_RTDSTOPTS; 6198 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6199 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6200 option_exists |= IPPF_RTDSTOPTS; 6201 is_sticky |= IPPF_RTDSTOPTS; 6202 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6203 } 6204 } 6205 6206 if (!(ignore & IPPF_DSTOPTS)) { 6207 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6208 option_exists |= IPPF_DSTOPTS; 6209 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6210 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6211 option_exists |= IPPF_DSTOPTS; 6212 is_sticky |= IPPF_DSTOPTS; 6213 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6214 } 6215 } 6216 6217 if (!(ignore & IPPF_IFINDEX)) { 6218 if (ipp->ipp_fields & IPPF_IFINDEX) { 6219 option_exists |= IPPF_IFINDEX; 6220 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6221 option_exists |= IPPF_IFINDEX; 6222 is_sticky |= IPPF_IFINDEX; 6223 } 6224 } 6225 6226 if (!(ignore & IPPF_ADDR)) { 6227 if (ipp->ipp_fields & IPPF_ADDR) { 6228 option_exists |= IPPF_ADDR; 6229 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6230 option_exists |= IPPF_ADDR; 6231 is_sticky |= IPPF_ADDR; 6232 } 6233 } 6234 6235 if (!(ignore & IPPF_DONTFRAG)) { 6236 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6237 option_exists |= IPPF_DONTFRAG; 6238 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6239 option_exists |= IPPF_DONTFRAG; 6240 is_sticky |= IPPF_DONTFRAG; 6241 } 6242 } 6243 6244 if (!(ignore & IPPF_USE_MIN_MTU)) { 6245 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6246 option_exists |= IPPF_USE_MIN_MTU; 6247 } else if (udp->udp_sticky_ipp.ipp_fields & 6248 IPPF_USE_MIN_MTU) { 6249 option_exists |= IPPF_USE_MIN_MTU; 6250 is_sticky |= IPPF_USE_MIN_MTU; 6251 } 6252 } 6253 6254 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6255 option_exists |= IPPF_HOPLIMIT; 6256 /* IPV6_HOPLIMIT can never be sticky */ 6257 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6258 6259 if (!(ignore & IPPF_UNICAST_HOPS) && 6260 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6261 option_exists |= IPPF_UNICAST_HOPS; 6262 is_sticky |= IPPF_UNICAST_HOPS; 6263 } 6264 6265 if (!(ignore & IPPF_MULTICAST_HOPS) && 6266 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6267 option_exists |= IPPF_MULTICAST_HOPS; 6268 is_sticky |= IPPF_MULTICAST_HOPS; 6269 } 6270 6271 if (!(ignore & IPPF_TCLASS)) { 6272 if (ipp->ipp_fields & IPPF_TCLASS) { 6273 option_exists |= IPPF_TCLASS; 6274 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6275 option_exists |= IPPF_TCLASS; 6276 is_sticky |= IPPF_TCLASS; 6277 } 6278 } 6279 6280 if (!(ignore & IPPF_NEXTHOP) && 6281 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6282 option_exists |= IPPF_NEXTHOP; 6283 is_sticky |= IPPF_NEXTHOP; 6284 } 6285 6286 no_options: 6287 6288 /* 6289 * If any options carried in the ip6i_t were specified, we 6290 * need to account for the ip6i_t in the data we'll be sending 6291 * down. 6292 */ 6293 if (option_exists & IPPF_HAS_IP6I) 6294 udp_ip_hdr_len += sizeof (ip6i_t); 6295 6296 /* check/fix buffer config, setup pointers into it */ 6297 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6298 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6299 !OK_32PTR(ip6h)) { 6300 6301 /* Try to get everything in a single mblk next time */ 6302 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6303 udp->udp_max_hdr_len = udp_ip_hdr_len; 6304 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6305 } 6306 6307 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6308 if (mp2 == NULL) { 6309 *error = ENOMEM; 6310 rw_exit(&udp->udp_rwlock); 6311 goto done; 6312 } 6313 mp2->b_wptr = DB_LIM(mp2); 6314 mp2->b_cont = mp1; 6315 mp1 = mp2; 6316 if (DB_TYPE(mp) != M_DATA) 6317 mp->b_cont = mp1; 6318 else 6319 mp = mp1; 6320 6321 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6322 } 6323 mp1->b_rptr = (unsigned char *)ip6h; 6324 ip6i = (ip6i_t *)ip6h; 6325 6326 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6327 if (option_exists & IPPF_HAS_IP6I) { 6328 ip6h = (ip6_t *)&ip6i[1]; 6329 ip6i->ip6i_flags = 0; 6330 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6331 6332 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6333 if (option_exists & IPPF_SCOPE_ID) { 6334 ip6i->ip6i_flags |= IP6I_IFINDEX; 6335 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6336 } else if (option_exists & IPPF_IFINDEX) { 6337 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6338 ASSERT(tipp->ipp_ifindex != 0); 6339 ip6i->ip6i_flags |= IP6I_IFINDEX; 6340 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6341 } 6342 6343 if (option_exists & IPPF_ADDR) { 6344 /* 6345 * Enable per-packet source address verification if 6346 * IPV6_PKTINFO specified the source address. 6347 * ip6_src is set in the transport's _wput function. 6348 */ 6349 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6350 } 6351 6352 if (option_exists & IPPF_DONTFRAG) { 6353 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6354 } 6355 6356 if (option_exists & IPPF_USE_MIN_MTU) { 6357 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6358 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6359 } 6360 6361 if (option_exists & IPPF_NEXTHOP) { 6362 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6363 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6364 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6365 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6366 } 6367 6368 /* 6369 * tell IP this is an ip6i_t private header 6370 */ 6371 ip6i->ip6i_nxt = IPPROTO_RAW; 6372 } 6373 6374 /* Initialize IPv6 header */ 6375 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6376 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6377 6378 /* Set the hoplimit of the outgoing packet. */ 6379 if (option_exists & IPPF_HOPLIMIT) { 6380 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6381 ip6h->ip6_hops = ipp->ipp_hoplimit; 6382 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6383 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6384 ip6h->ip6_hops = udp->udp_multicast_ttl; 6385 if (option_exists & IPPF_MULTICAST_HOPS) 6386 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6387 } else { 6388 ip6h->ip6_hops = udp->udp_ttl; 6389 if (option_exists & IPPF_UNICAST_HOPS) 6390 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6391 } 6392 6393 if (option_exists & IPPF_ADDR) { 6394 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6395 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6396 ip6h->ip6_src = tipp->ipp_addr; 6397 } else { 6398 /* 6399 * The source address was not set using IPV6_PKTINFO. 6400 * First look at the bound source. 6401 * If unspecified fallback to __sin6_src_id. 6402 */ 6403 ip6h->ip6_src = udp->udp_v6src; 6404 if (sin6->__sin6_src_id != 0 && 6405 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6406 ip_srcid_find_id(sin6->__sin6_src_id, 6407 &ip6h->ip6_src, connp->conn_zoneid, 6408 us->us_netstack); 6409 } 6410 } 6411 6412 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6413 cp = (uint8_t *)&ip6h[1]; 6414 6415 /* 6416 * Here's where we have to start stringing together 6417 * any extension headers in the right order: 6418 * Hop-by-hop, destination, routing, and final destination opts. 6419 */ 6420 if (option_exists & IPPF_HOPOPTS) { 6421 /* Hop-by-hop options */ 6422 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6423 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6424 if (hopoptslen == 0) { 6425 hopoptsptr = tipp->ipp_hopopts; 6426 hopoptslen = tipp->ipp_hopoptslen; 6427 is_ancillary = B_TRUE; 6428 } 6429 6430 *nxthdr_ptr = IPPROTO_HOPOPTS; 6431 nxthdr_ptr = &hbh->ip6h_nxt; 6432 6433 bcopy(hopoptsptr, cp, hopoptslen); 6434 cp += hopoptslen; 6435 6436 if (hopoptsptr != NULL && !is_ancillary) { 6437 kmem_free(hopoptsptr, hopoptslen); 6438 hopoptsptr = NULL; 6439 hopoptslen = 0; 6440 } 6441 } 6442 /* 6443 * En-route destination options 6444 * Only do them if there's a routing header as well 6445 */ 6446 if (option_exists & IPPF_RTDSTOPTS) { 6447 ip6_dest_t *dst = (ip6_dest_t *)cp; 6448 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6449 6450 *nxthdr_ptr = IPPROTO_DSTOPTS; 6451 nxthdr_ptr = &dst->ip6d_nxt; 6452 6453 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6454 cp += tipp->ipp_rtdstoptslen; 6455 } 6456 /* 6457 * Routing header next 6458 */ 6459 if (option_exists & IPPF_RTHDR) { 6460 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6461 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6462 6463 *nxthdr_ptr = IPPROTO_ROUTING; 6464 nxthdr_ptr = &rt->ip6r_nxt; 6465 6466 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6467 cp += tipp->ipp_rthdrlen; 6468 } 6469 /* 6470 * Do ultimate destination options 6471 */ 6472 if (option_exists & IPPF_DSTOPTS) { 6473 ip6_dest_t *dest = (ip6_dest_t *)cp; 6474 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6475 6476 *nxthdr_ptr = IPPROTO_DSTOPTS; 6477 nxthdr_ptr = &dest->ip6d_nxt; 6478 6479 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6480 cp += tipp->ipp_dstoptslen; 6481 } 6482 /* 6483 * Now set the last header pointer to the proto passed in 6484 */ 6485 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6486 *nxthdr_ptr = IPPROTO_UDP; 6487 6488 /* Update UDP header */ 6489 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6490 udph->uha_dst_port = sin6->sin6_port; 6491 udph->uha_src_port = udp->udp_port; 6492 6493 /* 6494 * Copy in the destination address 6495 */ 6496 ip6h->ip6_dst = ip6_dst; 6497 6498 ip6h->ip6_vcf = 6499 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6500 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6501 6502 if (option_exists & IPPF_TCLASS) { 6503 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6504 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6505 tipp->ipp_tclass); 6506 } 6507 rw_exit(&udp->udp_rwlock); 6508 6509 if (option_exists & IPPF_RTHDR) { 6510 ip6_rthdr_t *rth; 6511 6512 /* 6513 * Perform any processing needed for source routing. 6514 * We know that all extension headers will be in the same mblk 6515 * as the IPv6 header. 6516 */ 6517 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6518 if (rth != NULL && rth->ip6r_segleft != 0) { 6519 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6520 /* 6521 * Drop packet - only support Type 0 routing. 6522 * Notify the application as well. 6523 */ 6524 *error = EPROTO; 6525 goto done; 6526 } 6527 6528 /* 6529 * rth->ip6r_len is twice the number of 6530 * addresses in the header. Thus it must be even. 6531 */ 6532 if (rth->ip6r_len & 0x1) { 6533 *error = EPROTO; 6534 goto done; 6535 } 6536 /* 6537 * Shuffle the routing header and ip6_dst 6538 * addresses, and get the checksum difference 6539 * between the first hop (in ip6_dst) and 6540 * the destination (in the last routing hdr entry). 6541 */ 6542 csum = ip_massage_options_v6(ip6h, rth, 6543 us->us_netstack); 6544 /* 6545 * Verify that the first hop isn't a mapped address. 6546 * Routers along the path need to do this verification 6547 * for subsequent hops. 6548 */ 6549 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6550 *error = EADDRNOTAVAIL; 6551 goto done; 6552 } 6553 6554 cp += (rth->ip6r_len + 1)*8; 6555 } 6556 } 6557 6558 /* count up length of UDP packet */ 6559 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6560 if ((mp2 = mp1->b_cont) != NULL) { 6561 do { 6562 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6563 ip_len += (uint32_t)MBLKL(mp2); 6564 } while ((mp2 = mp2->b_cont) != NULL); 6565 } 6566 6567 /* 6568 * If the size of the packet is greater than the maximum allowed by 6569 * ip, return an error. Passing this down could cause panics because 6570 * the size will have wrapped and be inconsistent with the msg size. 6571 */ 6572 if (ip_len > IP_MAXPACKET) { 6573 *error = EMSGSIZE; 6574 goto done; 6575 } 6576 6577 /* Store the UDP length. Subtract length of extension hdrs */ 6578 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6579 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6580 6581 /* 6582 * We make it easy for IP to include our pseudo header 6583 * by putting our length in uh_checksum, modified (if 6584 * we have a routing header) by the checksum difference 6585 * between the ultimate destination and first hop addresses. 6586 * Note: UDP over IPv6 must always checksum the packet. 6587 */ 6588 csum += udph->uha_length; 6589 csum = (csum & 0xFFFF) + (csum >> 16); 6590 udph->uha_checksum = (uint16_t)csum; 6591 6592 #ifdef _LITTLE_ENDIAN 6593 ip_len = htons(ip_len); 6594 #endif 6595 ip6h->ip6_plen = ip_len; 6596 6597 if (DB_TYPE(mp) != M_DATA) { 6598 cred_t *cr; 6599 pid_t cpid; 6600 6601 /* Move any cred from the T_UNITDATA_REQ to the packet */ 6602 cr = msg_extractcred(mp, &cpid); 6603 if (cr != NULL) { 6604 if (mp1->b_datap->db_credp != NULL) 6605 crfree(mp1->b_datap->db_credp); 6606 mp1->b_datap->db_credp = cr; 6607 mp1->b_datap->db_cpid = cpid; 6608 } 6609 6610 ASSERT(mp != mp1); 6611 freeb(mp); 6612 } 6613 6614 /* mp has been consumed and we'll return success */ 6615 ASSERT(*error == 0); 6616 mp = NULL; 6617 6618 /* We're done. Pass the packet to IP */ 6619 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6620 ip_output_v6(connp, mp1, q, IP_WPUT); 6621 6622 done: 6623 if (sth_wroff != 0) { 6624 (void) proto_set_tx_wroff(RD(q), connp, 6625 udp->udp_max_hdr_len + us->us_wroff_extra); 6626 } 6627 if (hopoptsptr != NULL && !is_ancillary) { 6628 kmem_free(hopoptsptr, hopoptslen); 6629 hopoptsptr = NULL; 6630 } 6631 if (*error != 0) { 6632 ASSERT(mp != NULL); 6633 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6634 } 6635 return (mp); 6636 } 6637 6638 6639 static int 6640 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6641 { 6642 sin_t *sin = (sin_t *)sa; 6643 sin6_t *sin6 = (sin6_t *)sa; 6644 6645 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6646 6647 if (udp->udp_state != TS_DATA_XFER) 6648 return (ENOTCONN); 6649 6650 switch (udp->udp_family) { 6651 case AF_INET: 6652 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6653 6654 if (*salenp < sizeof (sin_t)) 6655 return (EINVAL); 6656 6657 *salenp = sizeof (sin_t); 6658 *sin = sin_null; 6659 sin->sin_family = AF_INET; 6660 sin->sin_port = udp->udp_dstport; 6661 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6662 break; 6663 6664 case AF_INET6: 6665 if (*salenp < sizeof (sin6_t)) 6666 return (EINVAL); 6667 6668 *salenp = sizeof (sin6_t); 6669 *sin6 = sin6_null; 6670 sin6->sin6_family = AF_INET6; 6671 sin6->sin6_port = udp->udp_dstport; 6672 sin6->sin6_addr = udp->udp_v6dst; 6673 sin6->sin6_flowinfo = udp->udp_flowinfo; 6674 break; 6675 } 6676 6677 return (0); 6678 } 6679 6680 static int 6681 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6682 { 6683 sin_t *sin = (sin_t *)sa; 6684 sin6_t *sin6 = (sin6_t *)sa; 6685 6686 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6687 6688 switch (udp->udp_family) { 6689 case AF_INET: 6690 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6691 6692 if (*salenp < sizeof (sin_t)) 6693 return (EINVAL); 6694 6695 *salenp = sizeof (sin_t); 6696 *sin = sin_null; 6697 sin->sin_family = AF_INET; 6698 sin->sin_port = udp->udp_port; 6699 6700 /* 6701 * If udp_v6src is unspecified, we might be bound to broadcast 6702 * / multicast. Use udp_bound_v6src as local address instead 6703 * (that could also still be unspecified). 6704 */ 6705 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6706 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6707 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6708 } else { 6709 sin->sin_addr.s_addr = 6710 V4_PART_OF_V6(udp->udp_bound_v6src); 6711 } 6712 break; 6713 6714 case AF_INET6: 6715 if (*salenp < sizeof (sin6_t)) 6716 return (EINVAL); 6717 6718 *salenp = sizeof (sin6_t); 6719 *sin6 = sin6_null; 6720 sin6->sin6_family = AF_INET6; 6721 sin6->sin6_port = udp->udp_port; 6722 sin6->sin6_flowinfo = udp->udp_flowinfo; 6723 6724 /* 6725 * If udp_v6src is unspecified, we might be bound to broadcast 6726 * / multicast. Use udp_bound_v6src as local address instead 6727 * (that could also still be unspecified). 6728 */ 6729 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6730 sin6->sin6_addr = udp->udp_v6src; 6731 else 6732 sin6->sin6_addr = udp->udp_bound_v6src; 6733 break; 6734 } 6735 6736 return (0); 6737 } 6738 6739 /* 6740 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6741 */ 6742 static void 6743 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6744 { 6745 void *data; 6746 mblk_t *datamp = mp->b_cont; 6747 udp_t *udp = Q_TO_UDP(q); 6748 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6749 6750 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6751 cmdp->cb_error = EPROTO; 6752 qreply(q, mp); 6753 return; 6754 } 6755 data = datamp->b_rptr; 6756 6757 rw_enter(&udp->udp_rwlock, RW_READER); 6758 switch (cmdp->cb_cmd) { 6759 case TI_GETPEERNAME: 6760 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6761 break; 6762 case TI_GETMYNAME: 6763 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6764 break; 6765 default: 6766 cmdp->cb_error = EINVAL; 6767 break; 6768 } 6769 rw_exit(&udp->udp_rwlock); 6770 6771 qreply(q, mp); 6772 } 6773 6774 static void 6775 udp_disable_direct_sockfs(udp_t *udp) 6776 { 6777 udp->udp_issocket = B_FALSE; 6778 if (udp->udp_direct_sockfs) { 6779 /* 6780 * Disable read-side synchronous stream interface and 6781 * drain any queued data. 6782 */ 6783 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6784 ASSERT(!udp->udp_direct_sockfs); 6785 UDP_STAT(udp->udp_us, udp_sock_fallback); 6786 } 6787 } 6788 6789 static void 6790 udp_wput_other(queue_t *q, mblk_t *mp) 6791 { 6792 uchar_t *rptr = mp->b_rptr; 6793 struct datab *db; 6794 struct iocblk *iocp; 6795 cred_t *cr; 6796 conn_t *connp = Q_TO_CONN(q); 6797 udp_t *udp = connp->conn_udp; 6798 udp_stack_t *us; 6799 6800 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6801 "udp_wput_other_start: q %p", q); 6802 6803 us = udp->udp_us; 6804 db = mp->b_datap; 6805 6806 switch (db->db_type) { 6807 case M_CMD: 6808 udp_wput_cmdblk(q, mp); 6809 return; 6810 6811 case M_PROTO: 6812 case M_PCPROTO: 6813 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6814 freemsg(mp); 6815 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6816 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6817 return; 6818 } 6819 switch (((t_primp_t)rptr)->type) { 6820 case T_ADDR_REQ: 6821 udp_addr_req(q, mp); 6822 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6823 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6824 return; 6825 case O_T_BIND_REQ: 6826 case T_BIND_REQ: 6827 udp_tpi_bind(q, mp); 6828 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6829 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6830 return; 6831 case T_CONN_REQ: 6832 udp_tpi_connect(q, mp); 6833 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6834 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6835 return; 6836 case T_CAPABILITY_REQ: 6837 udp_capability_req(q, mp); 6838 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6839 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6840 return; 6841 case T_INFO_REQ: 6842 udp_info_req(q, mp); 6843 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6844 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6845 return; 6846 case T_UNITDATA_REQ: 6847 /* 6848 * If a T_UNITDATA_REQ gets here, the address must 6849 * be bad. Valid T_UNITDATA_REQs are handled 6850 * in udp_wput. 6851 */ 6852 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6853 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6854 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6855 return; 6856 case T_UNBIND_REQ: 6857 udp_tpi_unbind(q, mp); 6858 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6859 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6860 return; 6861 case T_SVR4_OPTMGMT_REQ: 6862 /* 6863 * All Solaris components should pass a db_credp 6864 * for this TPI message, hence we ASSERT. 6865 * But in case there is some other M_PROTO that looks 6866 * like a TPI message sent by some other kernel 6867 * component, we check and return an error. 6868 */ 6869 cr = msg_getcred(mp, NULL); 6870 ASSERT(cr != NULL); 6871 if (cr == NULL) { 6872 udp_err_ack(q, mp, TSYSERR, EINVAL); 6873 return; 6874 } 6875 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6876 cr)) { 6877 (void) svr4_optcom_req(q, 6878 mp, cr, &udp_opt_obj, B_TRUE); 6879 } 6880 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6881 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6882 return; 6883 6884 case T_OPTMGMT_REQ: 6885 /* 6886 * All Solaris components should pass a db_credp 6887 * for this TPI message, hence we ASSERT. 6888 * But in case there is some other M_PROTO that looks 6889 * like a TPI message sent by some other kernel 6890 * component, we check and return an error. 6891 */ 6892 cr = msg_getcred(mp, NULL); 6893 ASSERT(cr != NULL); 6894 if (cr == NULL) { 6895 udp_err_ack(q, mp, TSYSERR, EINVAL); 6896 return; 6897 } 6898 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6899 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6900 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6901 return; 6902 6903 case T_DISCON_REQ: 6904 udp_tpi_disconnect(q, mp); 6905 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6906 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6907 return; 6908 6909 /* The following TPI message is not supported by udp. */ 6910 case O_T_CONN_RES: 6911 case T_CONN_RES: 6912 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6913 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6914 "udp_wput_other_end: q %p (%S)", q, 6915 "connres/disconreq"); 6916 return; 6917 6918 /* The following 3 TPI messages are illegal for udp. */ 6919 case T_DATA_REQ: 6920 case T_EXDATA_REQ: 6921 case T_ORDREL_REQ: 6922 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6923 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6924 "udp_wput_other_end: q %p (%S)", q, 6925 "data/exdata/ordrel"); 6926 return; 6927 default: 6928 break; 6929 } 6930 break; 6931 case M_FLUSH: 6932 if (*rptr & FLUSHW) 6933 flushq(q, FLUSHDATA); 6934 break; 6935 case M_IOCTL: 6936 iocp = (struct iocblk *)mp->b_rptr; 6937 switch (iocp->ioc_cmd) { 6938 case TI_GETPEERNAME: 6939 if (udp->udp_state != TS_DATA_XFER) { 6940 /* 6941 * If a default destination address has not 6942 * been associated with the stream, then we 6943 * don't know the peer's name. 6944 */ 6945 iocp->ioc_error = ENOTCONN; 6946 iocp->ioc_count = 0; 6947 mp->b_datap->db_type = M_IOCACK; 6948 qreply(q, mp); 6949 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6950 "udp_wput_other_end: q %p (%S)", q, 6951 "getpeername"); 6952 return; 6953 } 6954 /* FALLTHRU */ 6955 case TI_GETMYNAME: { 6956 /* 6957 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6958 * need to copyin the user's strbuf structure. 6959 * Processing will continue in the M_IOCDATA case 6960 * below. 6961 */ 6962 mi_copyin(q, mp, NULL, 6963 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6964 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6965 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6966 return; 6967 } 6968 case ND_SET: 6969 /* nd_getset performs the necessary checking */ 6970 case ND_GET: 6971 if (nd_getset(q, us->us_nd, mp)) { 6972 qreply(q, mp); 6973 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6974 "udp_wput_other_end: q %p (%S)", q, "get"); 6975 return; 6976 } 6977 break; 6978 case _SIOCSOCKFALLBACK: 6979 /* 6980 * Either sockmod is about to be popped and the 6981 * socket would now be treated as a plain stream, 6982 * or a module is about to be pushed so we could 6983 * no longer use read-side synchronous stream. 6984 * Drain any queued data and disable direct sockfs 6985 * interface from now on. 6986 */ 6987 if (!udp->udp_issocket) { 6988 DB_TYPE(mp) = M_IOCNAK; 6989 iocp->ioc_error = EINVAL; 6990 } else { 6991 udp_disable_direct_sockfs(udp); 6992 6993 DB_TYPE(mp) = M_IOCACK; 6994 iocp->ioc_error = 0; 6995 } 6996 iocp->ioc_count = 0; 6997 iocp->ioc_rval = 0; 6998 qreply(q, mp); 6999 return; 7000 default: 7001 break; 7002 } 7003 break; 7004 case M_IOCDATA: 7005 udp_wput_iocdata(q, mp); 7006 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7007 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7008 return; 7009 default: 7010 /* Unrecognized messages are passed through without change. */ 7011 break; 7012 } 7013 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7014 "udp_wput_other_end: q %p (%S)", q, "end"); 7015 ip_output(connp, mp, q, IP_WPUT); 7016 } 7017 7018 /* 7019 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7020 * messages. 7021 */ 7022 static void 7023 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7024 { 7025 mblk_t *mp1; 7026 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7027 STRUCT_HANDLE(strbuf, sb); 7028 udp_t *udp = Q_TO_UDP(q); 7029 int error; 7030 uint_t addrlen; 7031 7032 /* Make sure it is one of ours. */ 7033 switch (iocp->ioc_cmd) { 7034 case TI_GETMYNAME: 7035 case TI_GETPEERNAME: 7036 break; 7037 default: 7038 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7039 return; 7040 } 7041 7042 switch (mi_copy_state(q, mp, &mp1)) { 7043 case -1: 7044 return; 7045 case MI_COPY_CASE(MI_COPY_IN, 1): 7046 break; 7047 case MI_COPY_CASE(MI_COPY_OUT, 1): 7048 /* 7049 * The address has been copied out, so now 7050 * copyout the strbuf. 7051 */ 7052 mi_copyout(q, mp); 7053 return; 7054 case MI_COPY_CASE(MI_COPY_OUT, 2): 7055 /* 7056 * The address and strbuf have been copied out. 7057 * We're done, so just acknowledge the original 7058 * M_IOCTL. 7059 */ 7060 mi_copy_done(q, mp, 0); 7061 return; 7062 default: 7063 /* 7064 * Something strange has happened, so acknowledge 7065 * the original M_IOCTL with an EPROTO error. 7066 */ 7067 mi_copy_done(q, mp, EPROTO); 7068 return; 7069 } 7070 7071 /* 7072 * Now we have the strbuf structure for TI_GETMYNAME 7073 * and TI_GETPEERNAME. Next we copyout the requested 7074 * address and then we'll copyout the strbuf. 7075 */ 7076 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7077 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7078 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7079 mi_copy_done(q, mp, EINVAL); 7080 return; 7081 } 7082 7083 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7084 7085 if (mp1 == NULL) 7086 return; 7087 7088 rw_enter(&udp->udp_rwlock, RW_READER); 7089 switch (iocp->ioc_cmd) { 7090 case TI_GETMYNAME: 7091 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7092 break; 7093 case TI_GETPEERNAME: 7094 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7095 break; 7096 } 7097 rw_exit(&udp->udp_rwlock); 7098 7099 if (error != 0) { 7100 mi_copy_done(q, mp, error); 7101 } else { 7102 mp1->b_wptr += addrlen; 7103 STRUCT_FSET(sb, len, addrlen); 7104 7105 /* Copy out the address */ 7106 mi_copyout(q, mp); 7107 } 7108 } 7109 7110 static int 7111 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7112 udpattrs_t *udpattrs) 7113 { 7114 struct T_unitdata_req *udreqp; 7115 int is_absreq_failure; 7116 cred_t *cr; 7117 7118 ASSERT(((t_primp_t)mp->b_rptr)->type); 7119 7120 /* 7121 * All Solaris components should pass a db_credp 7122 * for this TPI message, hence we should ASSERT. 7123 * However, RPC (svc_clts_ksend) does this odd thing where it 7124 * passes the options from a T_UNITDATA_IND unchanged in a 7125 * T_UNITDATA_REQ. While that is the right thing to do for 7126 * some options, SCM_UCRED being the key one, this also makes it 7127 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here. 7128 */ 7129 cr = msg_getcred(mp, NULL); 7130 if (cr == NULL) { 7131 cr = Q_TO_CONN(q)->conn_cred; 7132 } 7133 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7134 7135 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7136 udreqp->OPT_offset, cr, &udp_opt_obj, 7137 udpattrs, &is_absreq_failure); 7138 7139 if (*errorp != 0) { 7140 /* 7141 * Note: No special action needed in this 7142 * module for "is_absreq_failure" 7143 */ 7144 return (-1); /* failure */ 7145 } 7146 ASSERT(is_absreq_failure == 0); 7147 return (0); /* success */ 7148 } 7149 7150 void 7151 udp_ddi_g_init(void) 7152 { 7153 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7154 udp_opt_obj.odb_opt_arr_cnt); 7155 7156 /* 7157 * We want to be informed each time a stack is created or 7158 * destroyed in the kernel, so we can maintain the 7159 * set of udp_stack_t's. 7160 */ 7161 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7162 } 7163 7164 void 7165 udp_ddi_g_destroy(void) 7166 { 7167 netstack_unregister(NS_UDP); 7168 } 7169 7170 #define INET_NAME "ip" 7171 7172 /* 7173 * Initialize the UDP stack instance. 7174 */ 7175 static void * 7176 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7177 { 7178 udp_stack_t *us; 7179 udpparam_t *pa; 7180 int i; 7181 int error = 0; 7182 major_t major; 7183 7184 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7185 us->us_netstack = ns; 7186 7187 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7188 us->us_epriv_ports[0] = 2049; 7189 us->us_epriv_ports[1] = 4045; 7190 7191 /* 7192 * The smallest anonymous port in the priviledged port range which UDP 7193 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7194 */ 7195 us->us_min_anonpriv_port = 512; 7196 7197 us->us_bind_fanout_size = udp_bind_fanout_size; 7198 7199 /* Roundup variable that might have been modified in /etc/system */ 7200 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7201 /* Not a power of two. Round up to nearest power of two */ 7202 for (i = 0; i < 31; i++) { 7203 if (us->us_bind_fanout_size < (1 << i)) 7204 break; 7205 } 7206 us->us_bind_fanout_size = 1 << i; 7207 } 7208 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7209 sizeof (udp_fanout_t), KM_SLEEP); 7210 for (i = 0; i < us->us_bind_fanout_size; i++) { 7211 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7212 NULL); 7213 } 7214 7215 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7216 7217 us->us_param_arr = pa; 7218 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7219 7220 (void) udp_param_register(&us->us_nd, 7221 us->us_param_arr, A_CNT(udp_param_arr)); 7222 7223 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7224 us->us_mibkp = udp_kstat_init(stackid); 7225 7226 major = mod_name_to_major(INET_NAME); 7227 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7228 ASSERT(error == 0); 7229 return (us); 7230 } 7231 7232 /* 7233 * Free the UDP stack instance. 7234 */ 7235 static void 7236 udp_stack_fini(netstackid_t stackid, void *arg) 7237 { 7238 udp_stack_t *us = (udp_stack_t *)arg; 7239 int i; 7240 7241 for (i = 0; i < us->us_bind_fanout_size; i++) { 7242 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7243 } 7244 7245 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7246 sizeof (udp_fanout_t)); 7247 7248 us->us_bind_fanout = NULL; 7249 7250 nd_free(&us->us_nd); 7251 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7252 us->us_param_arr = NULL; 7253 7254 udp_kstat_fini(stackid, us->us_mibkp); 7255 us->us_mibkp = NULL; 7256 7257 udp_kstat2_fini(stackid, us->us_kstat); 7258 us->us_kstat = NULL; 7259 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7260 7261 ldi_ident_release(us->us_ldi_ident); 7262 kmem_free(us, sizeof (*us)); 7263 } 7264 7265 static void * 7266 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7267 { 7268 kstat_t *ksp; 7269 7270 udp_stat_t template = { 7271 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7272 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7273 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7274 { "udp_drain", KSTAT_DATA_UINT64 }, 7275 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7276 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7277 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7278 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7279 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7280 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7281 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7282 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7283 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7284 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7285 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7286 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7287 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7288 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7289 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7290 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7291 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7292 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7293 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7294 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7295 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7296 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7297 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7298 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7299 #ifdef DEBUG 7300 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7301 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7302 #endif 7303 }; 7304 7305 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7306 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7307 KSTAT_FLAG_VIRTUAL, stackid); 7308 7309 if (ksp == NULL) 7310 return (NULL); 7311 7312 bcopy(&template, us_statisticsp, sizeof (template)); 7313 ksp->ks_data = (void *)us_statisticsp; 7314 ksp->ks_private = (void *)(uintptr_t)stackid; 7315 7316 kstat_install(ksp); 7317 return (ksp); 7318 } 7319 7320 static void 7321 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7322 { 7323 if (ksp != NULL) { 7324 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7325 kstat_delete_netstack(ksp, stackid); 7326 } 7327 } 7328 7329 static void * 7330 udp_kstat_init(netstackid_t stackid) 7331 { 7332 kstat_t *ksp; 7333 7334 udp_named_kstat_t template = { 7335 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7336 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7337 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7338 { "entrySize", KSTAT_DATA_INT32, 0 }, 7339 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7340 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7341 }; 7342 7343 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7344 KSTAT_TYPE_NAMED, 7345 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7346 7347 if (ksp == NULL || ksp->ks_data == NULL) 7348 return (NULL); 7349 7350 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7351 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7352 7353 bcopy(&template, ksp->ks_data, sizeof (template)); 7354 ksp->ks_update = udp_kstat_update; 7355 ksp->ks_private = (void *)(uintptr_t)stackid; 7356 7357 kstat_install(ksp); 7358 return (ksp); 7359 } 7360 7361 static void 7362 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7363 { 7364 if (ksp != NULL) { 7365 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7366 kstat_delete_netstack(ksp, stackid); 7367 } 7368 } 7369 7370 static int 7371 udp_kstat_update(kstat_t *kp, int rw) 7372 { 7373 udp_named_kstat_t *udpkp; 7374 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7375 netstack_t *ns; 7376 udp_stack_t *us; 7377 7378 if ((kp == NULL) || (kp->ks_data == NULL)) 7379 return (EIO); 7380 7381 if (rw == KSTAT_WRITE) 7382 return (EACCES); 7383 7384 ns = netstack_find_by_stackid(stackid); 7385 if (ns == NULL) 7386 return (-1); 7387 us = ns->netstack_udp; 7388 if (us == NULL) { 7389 netstack_rele(ns); 7390 return (-1); 7391 } 7392 udpkp = (udp_named_kstat_t *)kp->ks_data; 7393 7394 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7395 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7396 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7397 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7398 netstack_rele(ns); 7399 return (0); 7400 } 7401 7402 /* 7403 * Read-side synchronous stream info entry point, called as a 7404 * result of handling certain STREAMS ioctl operations. 7405 */ 7406 static int 7407 udp_rinfop(queue_t *q, infod_t *dp) 7408 { 7409 mblk_t *mp; 7410 uint_t cmd = dp->d_cmd; 7411 int res = 0; 7412 int error = 0; 7413 udp_t *udp = Q_TO_UDP(q); 7414 struct stdata *stp = STREAM(q); 7415 7416 mutex_enter(&udp->udp_drain_lock); 7417 /* If shutdown on read has happened, return nothing */ 7418 mutex_enter(&stp->sd_lock); 7419 if (stp->sd_flag & STREOF) { 7420 mutex_exit(&stp->sd_lock); 7421 goto done; 7422 } 7423 mutex_exit(&stp->sd_lock); 7424 7425 if ((mp = udp->udp_rcv_list_head) == NULL) 7426 goto done; 7427 7428 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7429 7430 if (cmd & INFOD_COUNT) { 7431 /* 7432 * Return the number of messages. 7433 */ 7434 dp->d_count += udp->udp_rcv_msgcnt; 7435 res |= INFOD_COUNT; 7436 } 7437 if (cmd & INFOD_BYTES) { 7438 /* 7439 * Return size of all data messages. 7440 */ 7441 dp->d_bytes += udp->udp_rcv_cnt; 7442 res |= INFOD_BYTES; 7443 } 7444 if (cmd & INFOD_FIRSTBYTES) { 7445 /* 7446 * Return size of first data message. 7447 */ 7448 dp->d_bytes = msgdsize(mp); 7449 res |= INFOD_FIRSTBYTES; 7450 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7451 } 7452 if (cmd & INFOD_COPYOUT) { 7453 mblk_t *mp1 = mp->b_cont; 7454 int n; 7455 /* 7456 * Return data contents of first message. 7457 */ 7458 ASSERT(DB_TYPE(mp1) == M_DATA); 7459 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7460 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7461 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7462 UIO_READ, dp->d_uiop)) != 0) { 7463 goto done; 7464 } 7465 mp1 = mp1->b_cont; 7466 } 7467 res |= INFOD_COPYOUT; 7468 dp->d_cmd &= ~INFOD_COPYOUT; 7469 } 7470 done: 7471 mutex_exit(&udp->udp_drain_lock); 7472 7473 dp->d_res |= res; 7474 7475 return (error); 7476 } 7477 7478 /* 7479 * Read-side synchronous stream entry point. This is called as a result 7480 * of recv/read operation done at sockfs, and is guaranteed to execute 7481 * outside of the interrupt thread context. It returns a single datagram 7482 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7483 */ 7484 static int 7485 udp_rrw(queue_t *q, struiod_t *dp) 7486 { 7487 mblk_t *mp; 7488 udp_t *udp = Q_TO_UDP(q); 7489 udp_stack_t *us = udp->udp_us; 7490 7491 /* 7492 * Dequeue datagram from the head of the list and return 7493 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7494 * set/cleared depending on whether or not there's data 7495 * remaining in the list. 7496 */ 7497 mutex_enter(&udp->udp_drain_lock); 7498 if (!udp->udp_direct_sockfs) { 7499 mutex_exit(&udp->udp_drain_lock); 7500 UDP_STAT(us, udp_rrw_busy); 7501 return (EBUSY); 7502 } 7503 if ((mp = udp->udp_rcv_list_head) != NULL) { 7504 uint_t size = msgdsize(mp); 7505 7506 /* Last datagram in the list? */ 7507 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7508 udp->udp_rcv_list_tail = NULL; 7509 mp->b_next = NULL; 7510 7511 udp->udp_rcv_cnt -= size; 7512 udp->udp_rcv_msgcnt--; 7513 UDP_STAT(us, udp_rrw_msgcnt); 7514 7515 /* No longer flow-controlling? */ 7516 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7517 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7518 udp->udp_drain_qfull = B_FALSE; 7519 } 7520 if (udp->udp_rcv_list_head == NULL) { 7521 /* 7522 * Either we just dequeued the last datagram or 7523 * we get here from sockfs and have nothing to 7524 * return; in this case clear RSLEEP. 7525 */ 7526 ASSERT(udp->udp_rcv_cnt == 0); 7527 ASSERT(udp->udp_rcv_msgcnt == 0); 7528 ASSERT(udp->udp_rcv_list_tail == NULL); 7529 STR_WAKEUP_CLEAR(STREAM(q)); 7530 } else { 7531 /* 7532 * More data follows; we need udp_rrw() to be 7533 * called in future to pick up the rest. 7534 */ 7535 STR_WAKEUP_SET(STREAM(q)); 7536 } 7537 mutex_exit(&udp->udp_drain_lock); 7538 dp->d_mp = mp; 7539 return (0); 7540 } 7541 7542 /* 7543 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7544 * list; this is typically executed within the interrupt thread context 7545 * and so we do things as quickly as possible. 7546 */ 7547 static void 7548 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7549 { 7550 ASSERT(q == RD(q)); 7551 ASSERT(pkt_len == msgdsize(mp)); 7552 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7553 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7554 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7555 7556 mutex_enter(&udp->udp_drain_lock); 7557 /* 7558 * Wake up and signal the receiving app; it is okay to do this 7559 * before enqueueing the mp because we are holding the drain lock. 7560 * One of the advantages of synchronous stream is the ability for 7561 * us to find out when the application performs a read on the 7562 * socket by way of udp_rrw() entry point being called. We need 7563 * to generate SIGPOLL/SIGIO for each received data in the case 7564 * of asynchronous socket just as in the strrput() case. However, 7565 * we only wake the application up when necessary, i.e. during the 7566 * first enqueue. When udp_rrw() is called, we send up a single 7567 * datagram upstream and call STR_WAKEUP_SET() again when there 7568 * are still data remaining in our receive queue. 7569 */ 7570 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7571 if (udp->udp_rcv_list_head == NULL) 7572 udp->udp_rcv_list_head = mp; 7573 else 7574 udp->udp_rcv_list_tail->b_next = mp; 7575 udp->udp_rcv_list_tail = mp; 7576 udp->udp_rcv_cnt += pkt_len; 7577 udp->udp_rcv_msgcnt++; 7578 7579 /* Need to flow-control? */ 7580 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7581 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7582 udp->udp_drain_qfull = B_TRUE; 7583 7584 mutex_exit(&udp->udp_drain_lock); 7585 } 7586 7587 /* 7588 * Drain the contents of receive list to the module upstream; we do 7589 * this during close or when we fallback to the slow mode due to 7590 * sockmod being popped or a module being pushed on top of us. 7591 */ 7592 static void 7593 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7594 { 7595 mblk_t *mp; 7596 udp_stack_t *us = udp->udp_us; 7597 7598 mutex_enter(&udp->udp_drain_lock); 7599 /* 7600 * There is no race with a concurrent udp_input() sending 7601 * up packets using putnext() after we have cleared the 7602 * udp_direct_sockfs flag but before we have completed 7603 * sending up the packets in udp_rcv_list, since we are 7604 * either a writer or we have quiesced the conn. 7605 */ 7606 udp->udp_direct_sockfs = B_FALSE; 7607 mutex_exit(&udp->udp_drain_lock); 7608 7609 if (udp->udp_rcv_list_head != NULL) 7610 UDP_STAT(us, udp_drain); 7611 7612 /* 7613 * Send up everything via putnext(); note here that we 7614 * don't need the udp_drain_lock to protect us since 7615 * nothing can enter udp_rrw() and that we currently 7616 * have exclusive access to this udp. 7617 */ 7618 while ((mp = udp->udp_rcv_list_head) != NULL) { 7619 udp->udp_rcv_list_head = mp->b_next; 7620 mp->b_next = NULL; 7621 udp->udp_rcv_cnt -= msgdsize(mp); 7622 udp->udp_rcv_msgcnt--; 7623 if (closing) { 7624 freemsg(mp); 7625 } else { 7626 ASSERT(q == RD(q)); 7627 putnext(q, mp); 7628 } 7629 } 7630 ASSERT(udp->udp_rcv_cnt == 0); 7631 ASSERT(udp->udp_rcv_msgcnt == 0); 7632 ASSERT(udp->udp_rcv_list_head == NULL); 7633 udp->udp_rcv_list_tail = NULL; 7634 udp->udp_drain_qfull = B_FALSE; 7635 } 7636 7637 static size_t 7638 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7639 { 7640 udp_stack_t *us = udp->udp_us; 7641 7642 /* We add a bit of extra buffering */ 7643 size += size >> 1; 7644 if (size > us->us_max_buf) 7645 size = us->us_max_buf; 7646 7647 udp->udp_rcv_hiwat = size; 7648 return (size); 7649 } 7650 7651 /* 7652 * For the lower queue so that UDP can be a dummy mux. 7653 * Nobody should be sending 7654 * packets up this stream 7655 */ 7656 static void 7657 udp_lrput(queue_t *q, mblk_t *mp) 7658 { 7659 mblk_t *mp1; 7660 7661 switch (mp->b_datap->db_type) { 7662 case M_FLUSH: 7663 /* Turn around */ 7664 if (*mp->b_rptr & FLUSHW) { 7665 *mp->b_rptr &= ~FLUSHR; 7666 qreply(q, mp); 7667 return; 7668 } 7669 break; 7670 } 7671 /* Could receive messages that passed through ar_rput */ 7672 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7673 mp1->b_prev = mp1->b_next = NULL; 7674 freemsg(mp); 7675 } 7676 7677 /* 7678 * For the lower queue so that UDP can be a dummy mux. 7679 * Nobody should be sending packets down this stream. 7680 */ 7681 /* ARGSUSED */ 7682 void 7683 udp_lwput(queue_t *q, mblk_t *mp) 7684 { 7685 freemsg(mp); 7686 } 7687 7688 /* 7689 * Below routines for UDP socket module. 7690 */ 7691 7692 static conn_t * 7693 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7694 { 7695 udp_t *udp; 7696 conn_t *connp; 7697 zoneid_t zoneid; 7698 netstack_t *ns; 7699 udp_stack_t *us; 7700 7701 ns = netstack_find_by_cred(credp); 7702 ASSERT(ns != NULL); 7703 us = ns->netstack_udp; 7704 ASSERT(us != NULL); 7705 7706 /* 7707 * For exclusive stacks we set the zoneid to zero 7708 * to make UDP operate as if in the global zone. 7709 */ 7710 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7711 zoneid = GLOBAL_ZONEID; 7712 else 7713 zoneid = crgetzoneid(credp); 7714 7715 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7716 7717 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7718 if (connp == NULL) { 7719 netstack_rele(ns); 7720 return (NULL); 7721 } 7722 udp = connp->conn_udp; 7723 7724 /* 7725 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7726 * done by netstack_find_by_cred() 7727 */ 7728 netstack_rele(ns); 7729 7730 rw_enter(&udp->udp_rwlock, RW_WRITER); 7731 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7732 ASSERT(connp->conn_udp == udp); 7733 ASSERT(udp->udp_connp == connp); 7734 7735 /* Set the initial state of the stream and the privilege status. */ 7736 udp->udp_state = TS_UNBND; 7737 if (isv6) { 7738 udp->udp_family = AF_INET6; 7739 udp->udp_ipversion = IPV6_VERSION; 7740 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7741 udp->udp_ttl = us->us_ipv6_hoplimit; 7742 connp->conn_af_isv6 = B_TRUE; 7743 connp->conn_flags |= IPCL_ISV6; 7744 } else { 7745 udp->udp_family = AF_INET; 7746 udp->udp_ipversion = IPV4_VERSION; 7747 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7748 udp->udp_ttl = us->us_ipv4_ttl; 7749 connp->conn_af_isv6 = B_FALSE; 7750 connp->conn_flags &= ~IPCL_ISV6; 7751 } 7752 7753 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7754 udp->udp_pending_op = -1; 7755 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7756 connp->conn_zoneid = zoneid; 7757 7758 udp->udp_open_time = lbolt64; 7759 udp->udp_open_pid = curproc->p_pid; 7760 7761 /* 7762 * If the caller has the process-wide flag set, then default to MAC 7763 * exempt mode. This allows read-down to unlabeled hosts. 7764 */ 7765 if (getpflags(NET_MAC_AWARE, credp) != 0) 7766 connp->conn_mac_exempt = B_TRUE; 7767 7768 connp->conn_ulp_labeled = is_system_labeled(); 7769 7770 udp->udp_us = us; 7771 7772 connp->conn_recv = udp_input; 7773 crhold(credp); 7774 connp->conn_cred = credp; 7775 7776 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7777 7778 rw_exit(&udp->udp_rwlock); 7779 7780 return (connp); 7781 } 7782 7783 /* ARGSUSED */ 7784 sock_lower_handle_t 7785 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7786 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7787 { 7788 udp_t *udp = NULL; 7789 udp_stack_t *us; 7790 conn_t *connp; 7791 boolean_t isv6; 7792 7793 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7794 (proto != 0 && proto != IPPROTO_UDP)) { 7795 *errorp = EPROTONOSUPPORT; 7796 return (NULL); 7797 } 7798 7799 if (family == AF_INET6) 7800 isv6 = B_TRUE; 7801 else 7802 isv6 = B_FALSE; 7803 7804 connp = udp_do_open(credp, isv6, flags); 7805 if (connp == NULL) { 7806 *errorp = ENOMEM; 7807 return (NULL); 7808 } 7809 7810 udp = connp->conn_udp; 7811 ASSERT(udp != NULL); 7812 us = udp->udp_us; 7813 ASSERT(us != NULL); 7814 7815 udp->udp_issocket = B_TRUE; 7816 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7817 7818 /* Set flow control */ 7819 rw_enter(&udp->udp_rwlock, RW_WRITER); 7820 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7821 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7822 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7823 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7824 udp->udp_xmit_lowat = us->us_xmit_lowat; 7825 7826 if (udp->udp_family == AF_INET6) { 7827 /* Build initial header template for transmit */ 7828 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7829 rw_exit(&udp->udp_rwlock); 7830 ipcl_conn_destroy(connp); 7831 return (NULL); 7832 } 7833 } 7834 rw_exit(&udp->udp_rwlock); 7835 7836 connp->conn_flow_cntrld = B_FALSE; 7837 7838 ASSERT(us->us_ldi_ident != NULL); 7839 7840 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7841 ip1dbg(("udp_create: create of IP helper stream failed\n")); 7842 udp_do_close(connp); 7843 return (NULL); 7844 } 7845 7846 /* Set the send flow control */ 7847 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7848 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7849 7850 mutex_enter(&connp->conn_lock); 7851 connp->conn_state_flags &= ~CONN_INCIPIENT; 7852 mutex_exit(&connp->conn_lock); 7853 7854 *errorp = 0; 7855 *smodep = SM_ATOMIC; 7856 *sock_downcalls = &sock_udp_downcalls; 7857 return ((sock_lower_handle_t)connp); 7858 } 7859 7860 /* ARGSUSED */ 7861 void 7862 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7863 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7864 { 7865 conn_t *connp = (conn_t *)proto_handle; 7866 udp_t *udp = connp->conn_udp; 7867 udp_stack_t *us = udp->udp_us; 7868 struct sock_proto_props sopp; 7869 7870 /* All Solaris components should pass a cred for this operation. */ 7871 ASSERT(cr != NULL); 7872 7873 connp->conn_upcalls = sock_upcalls; 7874 connp->conn_upper_handle = sock_handle; 7875 7876 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7877 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7878 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7879 sopp.sopp_maxblk = INFPSZ; 7880 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7881 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7882 sopp.sopp_maxpsz = 7883 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7884 UDP_MAXPACKET_IPV6; 7885 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7886 udp_mod_info.mi_minpsz; 7887 7888 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7889 &sopp); 7890 } 7891 7892 static void 7893 udp_do_close(conn_t *connp) 7894 { 7895 udp_t *udp; 7896 7897 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7898 udp = connp->conn_udp; 7899 7900 udp_quiesce_conn(connp); 7901 ip_quiesce_conn(connp); 7902 7903 if (!IPCL_IS_NONSTR(connp)) { 7904 /* 7905 * Disable read-side synchronous stream 7906 * interface and drain any queued data. 7907 */ 7908 ASSERT(connp->conn_wq != NULL); 7909 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7910 ASSERT(!udp->udp_direct_sockfs); 7911 7912 ASSERT(connp->conn_rq != NULL); 7913 qprocsoff(connp->conn_rq); 7914 } 7915 7916 ASSERT(udp->udp_rcv_cnt == 0); 7917 ASSERT(udp->udp_rcv_msgcnt == 0); 7918 ASSERT(udp->udp_rcv_list_head == NULL); 7919 ASSERT(udp->udp_rcv_list_tail == NULL); 7920 7921 udp_close_free(connp); 7922 7923 /* 7924 * Now we are truly single threaded on this stream, and can 7925 * delete the things hanging off the connp, and finally the connp. 7926 * We removed this connp from the fanout list, it cannot be 7927 * accessed thru the fanouts, and we already waited for the 7928 * conn_ref to drop to 0. We are already in close, so 7929 * there cannot be any other thread from the top. qprocsoff 7930 * has completed, and service has completed or won't run in 7931 * future. 7932 */ 7933 ASSERT(connp->conn_ref == 1); 7934 if (!IPCL_IS_NONSTR(connp)) { 7935 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7936 } else { 7937 ip_free_helper_stream(connp); 7938 } 7939 7940 connp->conn_ref--; 7941 ipcl_conn_destroy(connp); 7942 } 7943 7944 /* ARGSUSED */ 7945 int 7946 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7947 { 7948 conn_t *connp = (conn_t *)proto_handle; 7949 7950 /* All Solaris components should pass a cred for this operation. */ 7951 ASSERT(cr != NULL); 7952 7953 udp_do_close(connp); 7954 return (0); 7955 } 7956 7957 static int 7958 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7959 boolean_t bind_to_req_port_only) 7960 { 7961 sin_t *sin; 7962 sin6_t *sin6; 7963 sin6_t sin6addr; 7964 in_port_t port; /* Host byte order */ 7965 in_port_t requested_port; /* Host byte order */ 7966 int count; 7967 in6_addr_t v6src; 7968 int loopmax; 7969 udp_fanout_t *udpf; 7970 in_port_t lport; /* Network byte order */ 7971 zoneid_t zoneid; 7972 udp_t *udp; 7973 boolean_t is_inaddr_any; 7974 mlp_type_t addrtype, mlptype; 7975 udp_stack_t *us; 7976 int error = 0; 7977 mblk_t *mp = NULL; 7978 7979 udp = connp->conn_udp; 7980 us = udp->udp_us; 7981 7982 if (udp->udp_state != TS_UNBND) { 7983 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7984 "udp_bind: bad state, %u", udp->udp_state); 7985 return (-TOUTSTATE); 7986 } 7987 7988 switch (len) { 7989 case 0: 7990 if (udp->udp_family == AF_INET) { 7991 sin = (sin_t *)&sin6addr; 7992 *sin = sin_null; 7993 sin->sin_family = AF_INET; 7994 sin->sin_addr.s_addr = INADDR_ANY; 7995 udp->udp_ipversion = IPV4_VERSION; 7996 } else { 7997 ASSERT(udp->udp_family == AF_INET6); 7998 sin6 = (sin6_t *)&sin6addr; 7999 *sin6 = sin6_null; 8000 sin6->sin6_family = AF_INET6; 8001 V6_SET_ZERO(sin6->sin6_addr); 8002 udp->udp_ipversion = IPV6_VERSION; 8003 } 8004 port = 0; 8005 break; 8006 8007 case sizeof (sin_t): /* Complete IPv4 address */ 8008 sin = (sin_t *)sa; 8009 8010 if (sin == NULL || !OK_32PTR((char *)sin)) 8011 return (EINVAL); 8012 8013 if (udp->udp_family != AF_INET || 8014 sin->sin_family != AF_INET) { 8015 return (EAFNOSUPPORT); 8016 } 8017 port = ntohs(sin->sin_port); 8018 break; 8019 8020 case sizeof (sin6_t): /* complete IPv6 address */ 8021 sin6 = (sin6_t *)sa; 8022 8023 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 8024 return (EINVAL); 8025 8026 if (udp->udp_family != AF_INET6 || 8027 sin6->sin6_family != AF_INET6) { 8028 return (EAFNOSUPPORT); 8029 } 8030 port = ntohs(sin6->sin6_port); 8031 break; 8032 8033 default: /* Invalid request */ 8034 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8035 "udp_bind: bad ADDR_length length %u", len); 8036 return (-TBADADDR); 8037 } 8038 8039 requested_port = port; 8040 8041 if (requested_port == 0 || !bind_to_req_port_only) 8042 bind_to_req_port_only = B_FALSE; 8043 else /* T_BIND_REQ and requested_port != 0 */ 8044 bind_to_req_port_only = B_TRUE; 8045 8046 if (requested_port == 0) { 8047 /* 8048 * If the application passed in zero for the port number, it 8049 * doesn't care which port number we bind to. Get one in the 8050 * valid range. 8051 */ 8052 if (udp->udp_anon_priv_bind) { 8053 port = udp_get_next_priv_port(udp); 8054 } else { 8055 port = udp_update_next_port(udp, 8056 us->us_next_port_to_try, B_TRUE); 8057 } 8058 } else { 8059 /* 8060 * If the port is in the well-known privileged range, 8061 * make sure the caller was privileged. 8062 */ 8063 int i; 8064 boolean_t priv = B_FALSE; 8065 8066 if (port < us->us_smallest_nonpriv_port) { 8067 priv = B_TRUE; 8068 } else { 8069 for (i = 0; i < us->us_num_epriv_ports; i++) { 8070 if (port == us->us_epriv_ports[i]) { 8071 priv = B_TRUE; 8072 break; 8073 } 8074 } 8075 } 8076 8077 if (priv) { 8078 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 8079 return (-TACCES); 8080 } 8081 } 8082 8083 if (port == 0) 8084 return (-TNOADDR); 8085 8086 /* 8087 * The state must be TS_UNBND. TPI mandates that users must send 8088 * TPI primitives only 1 at a time and wait for the response before 8089 * sending the next primitive. 8090 */ 8091 rw_enter(&udp->udp_rwlock, RW_WRITER); 8092 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 8093 rw_exit(&udp->udp_rwlock); 8094 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8095 "udp_bind: bad state, %u", udp->udp_state); 8096 return (-TOUTSTATE); 8097 } 8098 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 8099 udp->udp_pending_op = T_BIND_REQ; 8100 /* 8101 * Copy the source address into our udp structure. This address 8102 * may still be zero; if so, IP will fill in the correct address 8103 * each time an outbound packet is passed to it. Since the udp is 8104 * not yet in the bind hash list, we don't grab the uf_lock to 8105 * change udp_ipversion 8106 */ 8107 if (udp->udp_family == AF_INET) { 8108 ASSERT(sin != NULL); 8109 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8110 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8111 udp->udp_ip_snd_options_len; 8112 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8113 } else { 8114 ASSERT(sin6 != NULL); 8115 v6src = sin6->sin6_addr; 8116 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8117 /* 8118 * no need to hold the uf_lock to set the udp_ipversion 8119 * since we are not yet in the fanout list 8120 */ 8121 udp->udp_ipversion = IPV4_VERSION; 8122 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8123 UDPH_SIZE + udp->udp_ip_snd_options_len; 8124 } else { 8125 udp->udp_ipversion = IPV6_VERSION; 8126 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8127 } 8128 } 8129 8130 /* 8131 * If udp_reuseaddr is not set, then we have to make sure that 8132 * the IP address and port number the application requested 8133 * (or we selected for the application) is not being used by 8134 * another stream. If another stream is already using the 8135 * requested IP address and port, the behavior depends on 8136 * "bind_to_req_port_only". If set the bind fails; otherwise we 8137 * search for any an unused port to bind to the the stream. 8138 * 8139 * As per the BSD semantics, as modified by the Deering multicast 8140 * changes, if udp_reuseaddr is set, then we allow multiple binds 8141 * to the same port independent of the local IP address. 8142 * 8143 * This is slightly different than in SunOS 4.X which did not 8144 * support IP multicast. Note that the change implemented by the 8145 * Deering multicast code effects all binds - not only binding 8146 * to IP multicast addresses. 8147 * 8148 * Note that when binding to port zero we ignore SO_REUSEADDR in 8149 * order to guarantee a unique port. 8150 */ 8151 8152 count = 0; 8153 if (udp->udp_anon_priv_bind) { 8154 /* 8155 * loopmax = (IPPORT_RESERVED-1) - 8156 * us->us_min_anonpriv_port + 1 8157 */ 8158 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8159 } else { 8160 loopmax = us->us_largest_anon_port - 8161 us->us_smallest_anon_port + 1; 8162 } 8163 8164 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8165 zoneid = connp->conn_zoneid; 8166 8167 for (;;) { 8168 udp_t *udp1; 8169 boolean_t found_exclbind = B_FALSE; 8170 8171 /* 8172 * Walk through the list of udp streams bound to 8173 * requested port with the same IP address. 8174 */ 8175 lport = htons(port); 8176 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8177 us->us_bind_fanout_size)]; 8178 mutex_enter(&udpf->uf_lock); 8179 for (udp1 = udpf->uf_udp; udp1 != NULL; 8180 udp1 = udp1->udp_bind_hash) { 8181 if (lport != udp1->udp_port) 8182 continue; 8183 8184 /* 8185 * On a labeled system, we must treat bindings to ports 8186 * on shared IP addresses by sockets with MAC exemption 8187 * privilege as being in all zones, as there's 8188 * otherwise no way to identify the right receiver. 8189 */ 8190 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8191 IPCL_ZONE_MATCH(connp, 8192 udp1->udp_connp->conn_zoneid)) && 8193 !connp->conn_mac_exempt && \ 8194 !udp1->udp_connp->conn_mac_exempt) 8195 continue; 8196 8197 /* 8198 * If UDP_EXCLBIND is set for either the bound or 8199 * binding endpoint, the semantics of bind 8200 * is changed according to the following chart. 8201 * 8202 * spec = specified address (v4 or v6) 8203 * unspec = unspecified address (v4 or v6) 8204 * A = specified addresses are different for endpoints 8205 * 8206 * bound bind to allowed? 8207 * ------------------------------------- 8208 * unspec unspec no 8209 * unspec spec no 8210 * spec unspec no 8211 * spec spec yes if A 8212 * 8213 * For labeled systems, SO_MAC_EXEMPT behaves the same 8214 * as UDP_EXCLBIND, except that zoneid is ignored. 8215 */ 8216 if (udp1->udp_exclbind || udp->udp_exclbind || 8217 udp1->udp_connp->conn_mac_exempt || 8218 connp->conn_mac_exempt) { 8219 if (V6_OR_V4_INADDR_ANY( 8220 udp1->udp_bound_v6src) || 8221 is_inaddr_any || 8222 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8223 &v6src)) { 8224 found_exclbind = B_TRUE; 8225 break; 8226 } 8227 continue; 8228 } 8229 8230 /* 8231 * Check ipversion to allow IPv4 and IPv6 sockets to 8232 * have disjoint port number spaces. 8233 */ 8234 if (udp->udp_ipversion != udp1->udp_ipversion) { 8235 8236 /* 8237 * On the first time through the loop, if the 8238 * the user intentionally specified a 8239 * particular port number, then ignore any 8240 * bindings of the other protocol that may 8241 * conflict. This allows the user to bind IPv6 8242 * alone and get both v4 and v6, or bind both 8243 * both and get each seperately. On subsequent 8244 * times through the loop, we're checking a 8245 * port that we chose (not the user) and thus 8246 * we do not allow casual duplicate bindings. 8247 */ 8248 if (count == 0 && requested_port != 0) 8249 continue; 8250 } 8251 8252 /* 8253 * No difference depending on SO_REUSEADDR. 8254 * 8255 * If existing port is bound to a 8256 * non-wildcard IP address and 8257 * the requesting stream is bound to 8258 * a distinct different IP addresses 8259 * (non-wildcard, also), keep going. 8260 */ 8261 if (!is_inaddr_any && 8262 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8263 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8264 &v6src)) { 8265 continue; 8266 } 8267 break; 8268 } 8269 8270 if (!found_exclbind && 8271 (udp->udp_reuseaddr && requested_port != 0)) { 8272 break; 8273 } 8274 8275 if (udp1 == NULL) { 8276 /* 8277 * No other stream has this IP address 8278 * and port number. We can use it. 8279 */ 8280 break; 8281 } 8282 mutex_exit(&udpf->uf_lock); 8283 if (bind_to_req_port_only) { 8284 /* 8285 * We get here only when requested port 8286 * is bound (and only first of the for() 8287 * loop iteration). 8288 * 8289 * The semantics of this bind request 8290 * require it to fail so we return from 8291 * the routine (and exit the loop). 8292 * 8293 */ 8294 udp->udp_pending_op = -1; 8295 rw_exit(&udp->udp_rwlock); 8296 return (-TADDRBUSY); 8297 } 8298 8299 if (udp->udp_anon_priv_bind) { 8300 port = udp_get_next_priv_port(udp); 8301 } else { 8302 if ((count == 0) && (requested_port != 0)) { 8303 /* 8304 * If the application wants us to find 8305 * a port, get one to start with. Set 8306 * requested_port to 0, so that we will 8307 * update us->us_next_port_to_try below. 8308 */ 8309 port = udp_update_next_port(udp, 8310 us->us_next_port_to_try, B_TRUE); 8311 requested_port = 0; 8312 } else { 8313 port = udp_update_next_port(udp, port + 1, 8314 B_FALSE); 8315 } 8316 } 8317 8318 if (port == 0 || ++count >= loopmax) { 8319 /* 8320 * We've tried every possible port number and 8321 * there are none available, so send an error 8322 * to the user. 8323 */ 8324 udp->udp_pending_op = -1; 8325 rw_exit(&udp->udp_rwlock); 8326 return (-TNOADDR); 8327 } 8328 } 8329 8330 /* 8331 * Copy the source address into our udp structure. This address 8332 * may still be zero; if so, ip will fill in the correct address 8333 * each time an outbound packet is passed to it. 8334 * If we are binding to a broadcast or multicast address then 8335 * udp_post_ip_bind_connect will clear the source address 8336 * when udp_do_bind success. 8337 */ 8338 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8339 udp->udp_port = lport; 8340 /* 8341 * Now reset the the next anonymous port if the application requested 8342 * an anonymous port, or we handed out the next anonymous port. 8343 */ 8344 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8345 us->us_next_port_to_try = port + 1; 8346 } 8347 8348 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8349 if (udp->udp_family == AF_INET) { 8350 sin->sin_port = udp->udp_port; 8351 } else { 8352 sin6->sin6_port = udp->udp_port; 8353 /* Rebuild the header template */ 8354 error = udp_build_hdrs(udp); 8355 if (error != 0) { 8356 udp->udp_pending_op = -1; 8357 rw_exit(&udp->udp_rwlock); 8358 mutex_exit(&udpf->uf_lock); 8359 return (error); 8360 } 8361 } 8362 udp->udp_state = TS_IDLE; 8363 udp_bind_hash_insert(udpf, udp); 8364 mutex_exit(&udpf->uf_lock); 8365 rw_exit(&udp->udp_rwlock); 8366 8367 if (cl_inet_bind) { 8368 /* 8369 * Running in cluster mode - register bind information 8370 */ 8371 if (udp->udp_ipversion == IPV4_VERSION) { 8372 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8373 IPPROTO_UDP, AF_INET, 8374 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8375 (in_port_t)udp->udp_port, NULL); 8376 } else { 8377 (*cl_inet_bind)(connp->conn_netstack->netstack_stackid, 8378 IPPROTO_UDP, AF_INET6, 8379 (uint8_t *)&(udp->udp_v6src), 8380 (in_port_t)udp->udp_port, NULL); 8381 } 8382 } 8383 8384 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8385 if (is_system_labeled() && (!connp->conn_anon_port || 8386 connp->conn_anon_mlp)) { 8387 uint16_t mlpport; 8388 zone_t *zone; 8389 8390 zone = crgetzone(cr); 8391 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8392 mlptSingle; 8393 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8394 &v6src, us->us_netstack->netstack_ip); 8395 if (addrtype == mlptSingle) { 8396 rw_enter(&udp->udp_rwlock, RW_WRITER); 8397 udp->udp_pending_op = -1; 8398 rw_exit(&udp->udp_rwlock); 8399 connp->conn_anon_port = B_FALSE; 8400 connp->conn_mlp_type = mlptSingle; 8401 return (-TNOADDR); 8402 } 8403 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8404 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8405 addrtype); 8406 if (mlptype != mlptSingle && 8407 (connp->conn_mlp_type == mlptSingle || 8408 secpolicy_net_bindmlp(cr) != 0)) { 8409 if (udp->udp_debug) { 8410 (void) strlog(UDP_MOD_ID, 0, 1, 8411 SL_ERROR|SL_TRACE, 8412 "udp_bind: no priv for multilevel port %d", 8413 mlpport); 8414 } 8415 rw_enter(&udp->udp_rwlock, RW_WRITER); 8416 udp->udp_pending_op = -1; 8417 rw_exit(&udp->udp_rwlock); 8418 connp->conn_anon_port = B_FALSE; 8419 connp->conn_mlp_type = mlptSingle; 8420 return (-TACCES); 8421 } 8422 8423 /* 8424 * If we're specifically binding a shared IP address and the 8425 * port is MLP on shared addresses, then check to see if this 8426 * zone actually owns the MLP. Reject if not. 8427 */ 8428 if (mlptype == mlptShared && addrtype == mlptShared) { 8429 /* 8430 * No need to handle exclusive-stack zones since 8431 * ALL_ZONES only applies to the shared stack. 8432 */ 8433 zoneid_t mlpzone; 8434 8435 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8436 htons(mlpport)); 8437 if (connp->conn_zoneid != mlpzone) { 8438 if (udp->udp_debug) { 8439 (void) strlog(UDP_MOD_ID, 0, 1, 8440 SL_ERROR|SL_TRACE, 8441 "udp_bind: attempt to bind port " 8442 "%d on shared addr in zone %d " 8443 "(should be %d)", 8444 mlpport, connp->conn_zoneid, 8445 mlpzone); 8446 } 8447 rw_enter(&udp->udp_rwlock, RW_WRITER); 8448 udp->udp_pending_op = -1; 8449 rw_exit(&udp->udp_rwlock); 8450 connp->conn_anon_port = B_FALSE; 8451 connp->conn_mlp_type = mlptSingle; 8452 return (-TACCES); 8453 } 8454 } 8455 if (connp->conn_anon_port) { 8456 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8457 port, B_TRUE); 8458 if (error != 0) { 8459 if (udp->udp_debug) { 8460 (void) strlog(UDP_MOD_ID, 0, 1, 8461 SL_ERROR|SL_TRACE, 8462 "udp_bind: cannot establish anon " 8463 "MLP for port %d", port); 8464 } 8465 rw_enter(&udp->udp_rwlock, RW_WRITER); 8466 udp->udp_pending_op = -1; 8467 rw_exit(&udp->udp_rwlock); 8468 connp->conn_anon_port = B_FALSE; 8469 connp->conn_mlp_type = mlptSingle; 8470 return (-TACCES); 8471 } 8472 } 8473 connp->conn_mlp_type = mlptype; 8474 } 8475 8476 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8477 /* 8478 * Append a request for an IRE if udp_v6src not 8479 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8480 */ 8481 mp = allocb(sizeof (ire_t), BPRI_HI); 8482 if (!mp) { 8483 rw_enter(&udp->udp_rwlock, RW_WRITER); 8484 udp->udp_pending_op = -1; 8485 rw_exit(&udp->udp_rwlock); 8486 return (ENOMEM); 8487 } 8488 mp->b_wptr += sizeof (ire_t); 8489 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8490 } 8491 if (udp->udp_family == AF_INET6) { 8492 ASSERT(udp->udp_connp->conn_af_isv6); 8493 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8494 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8495 } else { 8496 ASSERT(!udp->udp_connp->conn_af_isv6); 8497 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8498 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8499 B_TRUE); 8500 } 8501 8502 (void) udp_post_ip_bind_connect(udp, mp, error); 8503 return (error); 8504 } 8505 8506 int 8507 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8508 socklen_t len, cred_t *cr) 8509 { 8510 int error; 8511 conn_t *connp; 8512 8513 /* All Solaris components should pass a cred for this operation. */ 8514 ASSERT(cr != NULL); 8515 8516 connp = (conn_t *)proto_handle; 8517 8518 if (sa == NULL) 8519 error = udp_do_unbind(connp); 8520 else 8521 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8522 8523 if (error < 0) { 8524 if (error == -TOUTSTATE) 8525 error = EINVAL; 8526 else 8527 error = proto_tlitosyserr(-error); 8528 } 8529 8530 return (error); 8531 } 8532 8533 static int 8534 udp_implicit_bind(conn_t *connp, cred_t *cr) 8535 { 8536 int error; 8537 8538 /* All Solaris components should pass a cred for this operation. */ 8539 ASSERT(cr != NULL); 8540 8541 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8542 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8543 } 8544 8545 /* 8546 * This routine removes a port number association from a stream. It 8547 * is called by udp_unbind and udp_tpi_unbind. 8548 */ 8549 static int 8550 udp_do_unbind(conn_t *connp) 8551 { 8552 udp_t *udp = connp->conn_udp; 8553 udp_fanout_t *udpf; 8554 udp_stack_t *us = udp->udp_us; 8555 8556 if (cl_inet_unbind != NULL) { 8557 /* 8558 * Running in cluster mode - register unbind information 8559 */ 8560 if (udp->udp_ipversion == IPV4_VERSION) { 8561 (*cl_inet_unbind)( 8562 connp->conn_netstack->netstack_stackid, 8563 IPPROTO_UDP, AF_INET, 8564 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8565 (in_port_t)udp->udp_port, NULL); 8566 } else { 8567 (*cl_inet_unbind)( 8568 connp->conn_netstack->netstack_stackid, 8569 IPPROTO_UDP, AF_INET6, 8570 (uint8_t *)&(udp->udp_v6src), 8571 (in_port_t)udp->udp_port, NULL); 8572 } 8573 } 8574 8575 rw_enter(&udp->udp_rwlock, RW_WRITER); 8576 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8577 rw_exit(&udp->udp_rwlock); 8578 return (-TOUTSTATE); 8579 } 8580 udp->udp_pending_op = T_UNBIND_REQ; 8581 rw_exit(&udp->udp_rwlock); 8582 8583 /* 8584 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8585 * and therefore ip_unbind must never return NULL. 8586 */ 8587 ip_unbind(connp); 8588 8589 /* 8590 * Once we're unbound from IP, the pending operation may be cleared 8591 * here. 8592 */ 8593 rw_enter(&udp->udp_rwlock, RW_WRITER); 8594 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8595 us->us_bind_fanout_size)]; 8596 8597 mutex_enter(&udpf->uf_lock); 8598 udp_bind_hash_remove(udp, B_TRUE); 8599 V6_SET_ZERO(udp->udp_v6src); 8600 V6_SET_ZERO(udp->udp_bound_v6src); 8601 udp->udp_port = 0; 8602 mutex_exit(&udpf->uf_lock); 8603 8604 udp->udp_pending_op = -1; 8605 udp->udp_state = TS_UNBND; 8606 if (udp->udp_family == AF_INET6) 8607 (void) udp_build_hdrs(udp); 8608 rw_exit(&udp->udp_rwlock); 8609 8610 return (0); 8611 } 8612 8613 static int 8614 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8615 { 8616 ire_t *ire; 8617 udp_fanout_t *udpf; 8618 udp_stack_t *us = udp->udp_us; 8619 8620 ASSERT(udp->udp_pending_op != -1); 8621 rw_enter(&udp->udp_rwlock, RW_WRITER); 8622 if (error == 0) { 8623 /* For udp_do_connect() success */ 8624 /* udp_do_bind() success will do nothing in here */ 8625 /* 8626 * If a broadcast/multicast address was bound, set 8627 * the source address to 0. 8628 * This ensures no datagrams with broadcast address 8629 * as source address are emitted (which would violate 8630 * RFC1122 - Hosts requirements) 8631 * 8632 * Note that when connecting the returned IRE is 8633 * for the destination address and we only perform 8634 * the broadcast check for the source address (it 8635 * is OK to connect to a broadcast/multicast address.) 8636 */ 8637 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8638 ire = (ire_t *)ire_mp->b_rptr; 8639 8640 /* 8641 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8642 * multicast local address. 8643 */ 8644 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8645 us->us_bind_fanout_size)]; 8646 if (ire->ire_type == IRE_BROADCAST && 8647 udp->udp_state != TS_DATA_XFER) { 8648 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8649 udp->udp_pending_op == O_T_BIND_REQ); 8650 /* 8651 * This was just a local bind to a broadcast 8652 * addr. 8653 */ 8654 mutex_enter(&udpf->uf_lock); 8655 V6_SET_ZERO(udp->udp_v6src); 8656 mutex_exit(&udpf->uf_lock); 8657 if (udp->udp_family == AF_INET6) 8658 (void) udp_build_hdrs(udp); 8659 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8660 if (udp->udp_family == AF_INET6) 8661 (void) udp_build_hdrs(udp); 8662 } 8663 } 8664 } else { 8665 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8666 us->us_bind_fanout_size)]; 8667 mutex_enter(&udpf->uf_lock); 8668 8669 if (udp->udp_state == TS_DATA_XFER) { 8670 /* Connect failed */ 8671 /* Revert back to the bound source */ 8672 udp->udp_v6src = udp->udp_bound_v6src; 8673 udp->udp_state = TS_IDLE; 8674 } else { 8675 /* For udp_do_bind() failed */ 8676 V6_SET_ZERO(udp->udp_v6src); 8677 V6_SET_ZERO(udp->udp_bound_v6src); 8678 udp->udp_state = TS_UNBND; 8679 udp_bind_hash_remove(udp, B_TRUE); 8680 udp->udp_port = 0; 8681 } 8682 mutex_exit(&udpf->uf_lock); 8683 if (udp->udp_family == AF_INET6) 8684 (void) udp_build_hdrs(udp); 8685 } 8686 udp->udp_pending_op = -1; 8687 rw_exit(&udp->udp_rwlock); 8688 if (ire_mp != NULL) 8689 freeb(ire_mp); 8690 return (error); 8691 } 8692 8693 /* 8694 * It associates a default destination address with the stream. 8695 */ 8696 static int 8697 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len, 8698 cred_t *cr) 8699 { 8700 sin6_t *sin6; 8701 sin_t *sin; 8702 in6_addr_t v6dst; 8703 ipaddr_t v4dst; 8704 uint16_t dstport; 8705 uint32_t flowinfo; 8706 mblk_t *ire_mp; 8707 udp_fanout_t *udpf; 8708 udp_t *udp, *udp1; 8709 ushort_t ipversion; 8710 udp_stack_t *us; 8711 int error; 8712 8713 udp = connp->conn_udp; 8714 us = udp->udp_us; 8715 8716 /* 8717 * Address has been verified by the caller 8718 */ 8719 switch (len) { 8720 default: 8721 /* 8722 * Should never happen 8723 */ 8724 return (EINVAL); 8725 8726 case sizeof (sin_t): 8727 sin = (sin_t *)sa; 8728 v4dst = sin->sin_addr.s_addr; 8729 dstport = sin->sin_port; 8730 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8731 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8732 ipversion = IPV4_VERSION; 8733 break; 8734 8735 case sizeof (sin6_t): 8736 sin6 = (sin6_t *)sa; 8737 v6dst = sin6->sin6_addr; 8738 dstport = sin6->sin6_port; 8739 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8740 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8741 ipversion = IPV4_VERSION; 8742 flowinfo = 0; 8743 } else { 8744 ipversion = IPV6_VERSION; 8745 flowinfo = sin6->sin6_flowinfo; 8746 } 8747 break; 8748 } 8749 8750 if (dstport == 0) 8751 return (-TBADADDR); 8752 8753 rw_enter(&udp->udp_rwlock, RW_WRITER); 8754 8755 /* 8756 * This UDP must have bound to a port already before doing a connect. 8757 * TPI mandates that users must send TPI primitives only 1 at a time 8758 * and wait for the response before sending the next primitive. 8759 */ 8760 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8761 rw_exit(&udp->udp_rwlock); 8762 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8763 "udp_connect: bad state, %u", udp->udp_state); 8764 return (-TOUTSTATE); 8765 } 8766 udp->udp_pending_op = T_CONN_REQ; 8767 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8768 8769 if (ipversion == IPV4_VERSION) { 8770 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8771 udp->udp_ip_snd_options_len; 8772 } else { 8773 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8774 } 8775 8776 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8777 us->us_bind_fanout_size)]; 8778 8779 mutex_enter(&udpf->uf_lock); 8780 if (udp->udp_state == TS_DATA_XFER) { 8781 /* Already connected - clear out state */ 8782 udp->udp_v6src = udp->udp_bound_v6src; 8783 udp->udp_state = TS_IDLE; 8784 } 8785 8786 /* 8787 * Create a default IP header with no IP options. 8788 */ 8789 udp->udp_dstport = dstport; 8790 udp->udp_ipversion = ipversion; 8791 if (ipversion == IPV4_VERSION) { 8792 /* 8793 * Interpret a zero destination to mean loopback. 8794 * Update the T_CONN_REQ (sin/sin6) since it is used to 8795 * generate the T_CONN_CON. 8796 */ 8797 if (v4dst == INADDR_ANY) { 8798 v4dst = htonl(INADDR_LOOPBACK); 8799 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8800 if (udp->udp_family == AF_INET) { 8801 sin->sin_addr.s_addr = v4dst; 8802 } else { 8803 sin6->sin6_addr = v6dst; 8804 } 8805 } 8806 udp->udp_v6dst = v6dst; 8807 udp->udp_flowinfo = 0; 8808 8809 /* 8810 * If the destination address is multicast and 8811 * an outgoing multicast interface has been set, 8812 * use the address of that interface as our 8813 * source address if no source address has been set. 8814 */ 8815 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8816 CLASSD(v4dst) && 8817 udp->udp_multicast_if_addr != INADDR_ANY) { 8818 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8819 &udp->udp_v6src); 8820 } 8821 } else { 8822 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8823 /* 8824 * Interpret a zero destination to mean loopback. 8825 * Update the T_CONN_REQ (sin/sin6) since it is used to 8826 * generate the T_CONN_CON. 8827 */ 8828 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8829 v6dst = ipv6_loopback; 8830 sin6->sin6_addr = v6dst; 8831 } 8832 udp->udp_v6dst = v6dst; 8833 udp->udp_flowinfo = flowinfo; 8834 /* 8835 * If the destination address is multicast and 8836 * an outgoing multicast interface has been set, 8837 * then the ip bind logic will pick the correct source 8838 * address (i.e. matching the outgoing multicast interface). 8839 */ 8840 } 8841 8842 /* 8843 * Verify that the src/port/dst/port is unique for all 8844 * connections in TS_DATA_XFER 8845 */ 8846 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8847 if (udp1->udp_state != TS_DATA_XFER) 8848 continue; 8849 if (udp->udp_port != udp1->udp_port || 8850 udp->udp_ipversion != udp1->udp_ipversion || 8851 dstport != udp1->udp_dstport || 8852 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8853 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8854 !(IPCL_ZONE_MATCH(udp->udp_connp, 8855 udp1->udp_connp->conn_zoneid) || 8856 IPCL_ZONE_MATCH(udp1->udp_connp, 8857 udp->udp_connp->conn_zoneid))) 8858 continue; 8859 mutex_exit(&udpf->uf_lock); 8860 udp->udp_pending_op = -1; 8861 rw_exit(&udp->udp_rwlock); 8862 return (-TBADADDR); 8863 } 8864 8865 if (cl_inet_connect2 != NULL) { 8866 CL_INET_UDP_CONNECT(connp, udp, B_TRUE, &v6dst, dstport, error); 8867 if (error != 0) { 8868 mutex_exit(&udpf->uf_lock); 8869 udp->udp_pending_op = -1; 8870 rw_exit(&udp->udp_rwlock); 8871 return (-TBADADDR); 8872 } 8873 } 8874 8875 udp->udp_state = TS_DATA_XFER; 8876 mutex_exit(&udpf->uf_lock); 8877 8878 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8879 if (ire_mp == NULL) { 8880 mutex_enter(&udpf->uf_lock); 8881 udp->udp_state = TS_IDLE; 8882 udp->udp_pending_op = -1; 8883 mutex_exit(&udpf->uf_lock); 8884 rw_exit(&udp->udp_rwlock); 8885 return (ENOMEM); 8886 } 8887 8888 rw_exit(&udp->udp_rwlock); 8889 8890 ire_mp->b_wptr += sizeof (ire_t); 8891 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8892 8893 if (udp->udp_family == AF_INET) { 8894 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8895 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8896 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8897 B_TRUE, B_TRUE, cr); 8898 } else { 8899 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8900 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8901 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE, cr); 8902 } 8903 8904 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8905 } 8906 8907 /* ARGSUSED */ 8908 static int 8909 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8910 socklen_t len, sock_connid_t *id, cred_t *cr) 8911 { 8912 conn_t *connp = (conn_t *)proto_handle; 8913 udp_t *udp = connp->conn_udp; 8914 int error; 8915 boolean_t did_bind = B_FALSE; 8916 8917 /* All Solaris components should pass a cred for this operation. */ 8918 ASSERT(cr != NULL); 8919 8920 if (sa == NULL) { 8921 /* 8922 * Disconnect 8923 * Make sure we are connected 8924 */ 8925 if (udp->udp_state != TS_DATA_XFER) 8926 return (EINVAL); 8927 8928 error = udp_disconnect(connp); 8929 return (error); 8930 } 8931 8932 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8933 if (error != 0) 8934 goto done; 8935 8936 /* do an implicit bind if necessary */ 8937 if (udp->udp_state == TS_UNBND) { 8938 error = udp_implicit_bind(connp, cr); 8939 /* 8940 * We could be racing with an actual bind, in which case 8941 * we would see EPROTO. We cross our fingers and try 8942 * to connect. 8943 */ 8944 if (!(error == 0 || error == EPROTO)) 8945 goto done; 8946 did_bind = B_TRUE; 8947 } 8948 /* 8949 * set SO_DGRAM_ERRIND 8950 */ 8951 udp->udp_dgram_errind = B_TRUE; 8952 8953 error = udp_do_connect(connp, sa, len, cr); 8954 8955 if (error != 0 && did_bind) { 8956 int unbind_err; 8957 8958 unbind_err = udp_do_unbind(connp); 8959 ASSERT(unbind_err == 0); 8960 } 8961 8962 if (error == 0) { 8963 *id = 0; 8964 (*connp->conn_upcalls->su_connected) 8965 (connp->conn_upper_handle, 0, NULL, -1); 8966 } else if (error < 0) { 8967 error = proto_tlitosyserr(-error); 8968 } 8969 8970 done: 8971 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8972 /* 8973 * No need to hold locks to set state 8974 * after connect failure socket state is undefined 8975 * We set the state only to imitate old sockfs behavior 8976 */ 8977 udp->udp_state = TS_IDLE; 8978 } 8979 return (error); 8980 } 8981 8982 /* ARGSUSED */ 8983 int 8984 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8985 cred_t *cr) 8986 { 8987 conn_t *connp = (conn_t *)proto_handle; 8988 udp_t *udp = connp->conn_udp; 8989 udp_stack_t *us = udp->udp_us; 8990 int error = 0; 8991 8992 ASSERT(DB_TYPE(mp) == M_DATA); 8993 8994 /* All Solaris components should pass a cred for this operation. */ 8995 ASSERT(cr != NULL); 8996 8997 /* If labeled then sockfs should have already set db_credp */ 8998 ASSERT(!is_system_labeled() || msg_getcred(mp, NULL) != NULL); 8999 9000 /* 9001 * If the socket is connected and no change in destination 9002 */ 9003 if (msg->msg_namelen == 0) { 9004 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 9005 if (error == EDESTADDRREQ) 9006 return (error); 9007 else 9008 return (udp->udp_dgram_errind ? error : 0); 9009 } 9010 9011 /* 9012 * Do an implicit bind if necessary. 9013 */ 9014 if (udp->udp_state == TS_UNBND) { 9015 error = udp_implicit_bind(connp, cr); 9016 /* 9017 * We could be racing with an actual bind, in which case 9018 * we would see EPROTO. We cross our fingers and try 9019 * to send. 9020 */ 9021 if (!(error == 0 || error == EPROTO)) { 9022 freemsg(mp); 9023 return (error); 9024 } 9025 } 9026 9027 rw_enter(&udp->udp_rwlock, RW_WRITER); 9028 9029 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 9030 rw_exit(&udp->udp_rwlock); 9031 freemsg(mp); 9032 return (EISCONN); 9033 } 9034 9035 9036 if (udp->udp_delayed_error != 0) { 9037 boolean_t match; 9038 9039 error = udp->udp_delayed_error; 9040 match = B_FALSE; 9041 udp->udp_delayed_error = 0; 9042 switch (udp->udp_family) { 9043 case AF_INET: { 9044 /* Compare just IP address and port */ 9045 sin_t *sin1 = (sin_t *)msg->msg_name; 9046 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 9047 9048 if (msg->msg_namelen == sizeof (sin_t) && 9049 sin1->sin_port == sin2->sin_port && 9050 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 9051 match = B_TRUE; 9052 9053 break; 9054 } 9055 case AF_INET6: { 9056 sin6_t *sin1 = (sin6_t *)msg->msg_name; 9057 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 9058 9059 if (msg->msg_namelen == sizeof (sin6_t) && 9060 sin1->sin6_port == sin2->sin6_port && 9061 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 9062 &sin2->sin6_addr)) 9063 match = B_TRUE; 9064 break; 9065 } 9066 default: 9067 ASSERT(0); 9068 } 9069 9070 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 9071 9072 if (match) { 9073 rw_exit(&udp->udp_rwlock); 9074 freemsg(mp); 9075 return (error); 9076 } 9077 } 9078 9079 error = proto_verify_ip_addr(udp->udp_family, 9080 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 9081 rw_exit(&udp->udp_rwlock); 9082 9083 if (error != 0) { 9084 freemsg(mp); 9085 return (error); 9086 } 9087 9088 error = udp_send_not_connected(connp, mp, 9089 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 9090 curproc->p_pid); 9091 if (error != 0) { 9092 UDP_STAT(us, udp_out_err_output); 9093 freemsg(mp); 9094 } 9095 return (udp->udp_dgram_errind ? error : 0); 9096 } 9097 9098 int 9099 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 9100 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 9101 { 9102 conn_t *connp = (conn_t *)proto_handle; 9103 udp_t *udp; 9104 struct T_capability_ack tca; 9105 struct sockaddr_in6 laddr, faddr; 9106 socklen_t laddrlen, faddrlen; 9107 short opts; 9108 struct stroptions *stropt; 9109 mblk_t *stropt_mp; 9110 int error; 9111 9112 udp = connp->conn_udp; 9113 9114 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 9115 9116 /* 9117 * setup the fallback stream that was allocated 9118 */ 9119 connp->conn_dev = (dev_t)RD(q)->q_ptr; 9120 connp->conn_minor_arena = WR(q)->q_ptr; 9121 9122 RD(q)->q_ptr = WR(q)->q_ptr = connp; 9123 9124 WR(q)->q_qinfo = &udp_winit; 9125 9126 connp->conn_rq = RD(q); 9127 connp->conn_wq = WR(q); 9128 9129 /* Notify stream head about options before sending up data */ 9130 stropt_mp->b_datap->db_type = M_SETOPTS; 9131 stropt_mp->b_wptr += sizeof (*stropt); 9132 stropt = (struct stroptions *)stropt_mp->b_rptr; 9133 stropt->so_flags = SO_WROFF | SO_HIWAT; 9134 stropt->so_wroff = 9135 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 9136 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9137 putnext(RD(q), stropt_mp); 9138 9139 /* 9140 * Free the helper stream 9141 */ 9142 ip_free_helper_stream(connp); 9143 9144 if (!direct_sockfs) 9145 udp_disable_direct_sockfs(udp); 9146 9147 /* 9148 * Collect the information needed to sync with the sonode 9149 */ 9150 udp_do_capability_ack(udp, &tca, TC1_INFO); 9151 9152 laddrlen = faddrlen = sizeof (sin6_t); 9153 (void) udp_getsockname((sock_lower_handle_t)connp, 9154 (struct sockaddr *)&laddr, &laddrlen, CRED()); 9155 error = udp_getpeername((sock_lower_handle_t)connp, 9156 (struct sockaddr *)&faddr, &faddrlen, CRED()); 9157 if (error != 0) 9158 faddrlen = 0; 9159 9160 opts = 0; 9161 if (udp->udp_dgram_errind) 9162 opts |= SO_DGRAM_ERRIND; 9163 if (udp->udp_dontroute) 9164 opts |= SO_DONTROUTE; 9165 9166 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9167 (struct sockaddr *)&laddr, laddrlen, 9168 (struct sockaddr *)&faddr, faddrlen, opts); 9169 9170 mutex_enter(&udp->udp_recv_lock); 9171 /* 9172 * Attempts to send data up during fallback will result in it being 9173 * queued in udp_t. Now we push up any queued packets. 9174 */ 9175 while (udp->udp_fallback_queue_head != NULL) { 9176 mblk_t *mp; 9177 mp = udp->udp_fallback_queue_head; 9178 udp->udp_fallback_queue_head = mp->b_next; 9179 mutex_exit(&udp->udp_recv_lock); 9180 mp->b_next = NULL; 9181 putnext(RD(q), mp); 9182 mutex_enter(&udp->udp_recv_lock); 9183 } 9184 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9185 /* 9186 * No longer a streams less socket 9187 */ 9188 rw_enter(&udp->udp_rwlock, RW_WRITER); 9189 connp->conn_flags &= ~IPCL_NONSTR; 9190 rw_exit(&udp->udp_rwlock); 9191 9192 mutex_exit(&udp->udp_recv_lock); 9193 9194 ASSERT(connp->conn_ref >= 1); 9195 9196 return (0); 9197 } 9198 9199 static int 9200 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9201 { 9202 sin_t *sin = (sin_t *)sa; 9203 sin6_t *sin6 = (sin6_t *)sa; 9204 9205 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9206 ASSERT(udp != NULL); 9207 9208 if (udp->udp_state != TS_DATA_XFER) 9209 return (ENOTCONN); 9210 9211 switch (udp->udp_family) { 9212 case AF_INET: 9213 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9214 9215 if (*salenp < sizeof (sin_t)) 9216 return (EINVAL); 9217 9218 *salenp = sizeof (sin_t); 9219 *sin = sin_null; 9220 sin->sin_family = AF_INET; 9221 sin->sin_port = udp->udp_dstport; 9222 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9223 break; 9224 case AF_INET6: 9225 if (*salenp < sizeof (sin6_t)) 9226 return (EINVAL); 9227 9228 *salenp = sizeof (sin6_t); 9229 *sin6 = sin6_null; 9230 sin6->sin6_family = AF_INET6; 9231 sin6->sin6_port = udp->udp_dstport; 9232 sin6->sin6_addr = udp->udp_v6dst; 9233 sin6->sin6_flowinfo = udp->udp_flowinfo; 9234 break; 9235 } 9236 9237 return (0); 9238 } 9239 9240 /* ARGSUSED */ 9241 int 9242 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9243 socklen_t *salenp, cred_t *cr) 9244 { 9245 conn_t *connp = (conn_t *)proto_handle; 9246 udp_t *udp = connp->conn_udp; 9247 int error; 9248 9249 /* All Solaris components should pass a cred for this operation. */ 9250 ASSERT(cr != NULL); 9251 9252 ASSERT(udp != NULL); 9253 9254 rw_enter(&udp->udp_rwlock, RW_READER); 9255 9256 error = udp_do_getpeername(udp, sa, salenp); 9257 9258 rw_exit(&udp->udp_rwlock); 9259 9260 return (error); 9261 } 9262 9263 static int 9264 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9265 { 9266 sin_t *sin = (sin_t *)sa; 9267 sin6_t *sin6 = (sin6_t *)sa; 9268 9269 ASSERT(udp != NULL); 9270 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9271 9272 switch (udp->udp_family) { 9273 case AF_INET: 9274 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9275 9276 if (*salenp < sizeof (sin_t)) 9277 return (EINVAL); 9278 9279 *salenp = sizeof (sin_t); 9280 *sin = sin_null; 9281 sin->sin_family = AF_INET; 9282 if (udp->udp_state == TS_UNBND) { 9283 break; 9284 } 9285 sin->sin_port = udp->udp_port; 9286 9287 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9288 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9289 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9290 } else { 9291 /* 9292 * INADDR_ANY 9293 * udp_v6src is not set, we might be bound to 9294 * broadcast/multicast. Use udp_bound_v6src as 9295 * local address instead (that could 9296 * also still be INADDR_ANY) 9297 */ 9298 sin->sin_addr.s_addr = 9299 V4_PART_OF_V6(udp->udp_bound_v6src); 9300 } 9301 break; 9302 9303 case AF_INET6: 9304 if (*salenp < sizeof (sin6_t)) 9305 return (EINVAL); 9306 9307 *salenp = sizeof (sin6_t); 9308 *sin6 = sin6_null; 9309 sin6->sin6_family = AF_INET6; 9310 if (udp->udp_state == TS_UNBND) { 9311 break; 9312 } 9313 sin6->sin6_port = udp->udp_port; 9314 9315 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9316 sin6->sin6_addr = udp->udp_v6src; 9317 } else { 9318 /* 9319 * UNSPECIFIED 9320 * udp_v6src is not set, we might be bound to 9321 * broadcast/multicast. Use udp_bound_v6src as 9322 * local address instead (that could 9323 * also still be UNSPECIFIED) 9324 */ 9325 sin6->sin6_addr = udp->udp_bound_v6src; 9326 } 9327 } 9328 return (0); 9329 } 9330 9331 /* ARGSUSED */ 9332 int 9333 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9334 socklen_t *salenp, cred_t *cr) 9335 { 9336 conn_t *connp = (conn_t *)proto_handle; 9337 udp_t *udp = connp->conn_udp; 9338 int error; 9339 9340 /* All Solaris components should pass a cred for this operation. */ 9341 ASSERT(cr != NULL); 9342 9343 ASSERT(udp != NULL); 9344 rw_enter(&udp->udp_rwlock, RW_READER); 9345 9346 error = udp_do_getsockname(udp, sa, salenp); 9347 9348 rw_exit(&udp->udp_rwlock); 9349 9350 return (error); 9351 } 9352 9353 int 9354 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9355 void *optvalp, socklen_t *optlen, cred_t *cr) 9356 { 9357 conn_t *connp = (conn_t *)proto_handle; 9358 udp_t *udp = connp->conn_udp; 9359 int error; 9360 t_uscalar_t max_optbuf_len; 9361 void *optvalp_buf; 9362 int len; 9363 9364 /* All Solaris components should pass a cred for this operation. */ 9365 ASSERT(cr != NULL); 9366 9367 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9368 udp_opt_obj.odb_opt_des_arr, 9369 udp_opt_obj.odb_opt_arr_cnt, 9370 udp_opt_obj.odb_topmost_tpiprovider, 9371 B_FALSE, B_TRUE, cr); 9372 if (error != 0) { 9373 if (error < 0) 9374 error = proto_tlitosyserr(-error); 9375 return (error); 9376 } 9377 9378 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9379 rw_enter(&udp->udp_rwlock, RW_READER); 9380 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9381 rw_exit(&udp->udp_rwlock); 9382 9383 if (len < 0) { 9384 /* 9385 * Pass on to IP 9386 */ 9387 kmem_free(optvalp_buf, max_optbuf_len); 9388 return (ip_get_options(connp, level, option_name, 9389 optvalp, optlen, cr)); 9390 } else { 9391 /* 9392 * update optlen and copy option value 9393 */ 9394 t_uscalar_t size = MIN(len, *optlen); 9395 bcopy(optvalp_buf, optvalp, size); 9396 bcopy(&size, optlen, sizeof (size)); 9397 9398 kmem_free(optvalp_buf, max_optbuf_len); 9399 return (0); 9400 } 9401 } 9402 9403 int 9404 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9405 const void *optvalp, socklen_t optlen, cred_t *cr) 9406 { 9407 conn_t *connp = (conn_t *)proto_handle; 9408 udp_t *udp = connp->conn_udp; 9409 int error; 9410 9411 /* All Solaris components should pass a cred for this operation. */ 9412 ASSERT(cr != NULL); 9413 9414 error = proto_opt_check(level, option_name, optlen, NULL, 9415 udp_opt_obj.odb_opt_des_arr, 9416 udp_opt_obj.odb_opt_arr_cnt, 9417 udp_opt_obj.odb_topmost_tpiprovider, 9418 B_TRUE, B_FALSE, cr); 9419 9420 if (error != 0) { 9421 if (error < 0) 9422 error = proto_tlitosyserr(-error); 9423 return (error); 9424 } 9425 9426 rw_enter(&udp->udp_rwlock, RW_WRITER); 9427 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9428 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9429 NULL, cr); 9430 rw_exit(&udp->udp_rwlock); 9431 9432 if (error < 0) { 9433 /* 9434 * Pass on to ip 9435 */ 9436 error = ip_set_options(connp, level, option_name, optvalp, 9437 optlen, cr); 9438 } 9439 9440 return (error); 9441 } 9442 9443 void 9444 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9445 { 9446 conn_t *connp = (conn_t *)proto_handle; 9447 udp_t *udp = connp->conn_udp; 9448 9449 mutex_enter(&udp->udp_recv_lock); 9450 connp->conn_flow_cntrld = B_FALSE; 9451 mutex_exit(&udp->udp_recv_lock); 9452 } 9453 9454 /* ARGSUSED */ 9455 int 9456 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9457 { 9458 conn_t *connp = (conn_t *)proto_handle; 9459 9460 /* All Solaris components should pass a cred for this operation. */ 9461 ASSERT(cr != NULL); 9462 9463 /* shut down the send side */ 9464 if (how != SHUT_RD) 9465 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9466 SOCK_OPCTL_SHUT_SEND, 0); 9467 /* shut down the recv side */ 9468 if (how != SHUT_WR) 9469 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9470 SOCK_OPCTL_SHUT_RECV, 0); 9471 return (0); 9472 } 9473 9474 int 9475 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9476 int mode, int32_t *rvalp, cred_t *cr) 9477 { 9478 conn_t *connp = (conn_t *)proto_handle; 9479 int error; 9480 9481 /* All Solaris components should pass a cred for this operation. */ 9482 ASSERT(cr != NULL); 9483 9484 switch (cmd) { 9485 case ND_SET: 9486 case ND_GET: 9487 case _SIOCSOCKFALLBACK: 9488 case TI_GETPEERNAME: 9489 case TI_GETMYNAME: 9490 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9491 cmd)); 9492 error = EINVAL; 9493 break; 9494 default: 9495 /* 9496 * Pass on to IP using helper stream 9497 */ 9498 error = ldi_ioctl(connp->conn_helper_info->iphs_handle, 9499 cmd, arg, mode, cr, rvalp); 9500 break; 9501 } 9502 return (error); 9503 } 9504 9505 /* ARGSUSED */ 9506 int 9507 udp_accept(sock_lower_handle_t lproto_handle, 9508 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9509 cred_t *cr) 9510 { 9511 return (EOPNOTSUPP); 9512 } 9513 9514 /* ARGSUSED */ 9515 int 9516 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9517 { 9518 return (EOPNOTSUPP); 9519 } 9520 9521 sock_downcalls_t sock_udp_downcalls = { 9522 udp_activate, /* sd_activate */ 9523 udp_accept, /* sd_accept */ 9524 udp_bind, /* sd_bind */ 9525 udp_listen, /* sd_listen */ 9526 udp_connect, /* sd_connect */ 9527 udp_getpeername, /* sd_getpeername */ 9528 udp_getsockname, /* sd_getsockname */ 9529 udp_getsockopt, /* sd_getsockopt */ 9530 udp_setsockopt, /* sd_setsockopt */ 9531 udp_send, /* sd_send */ 9532 NULL, /* sd_send_uio */ 9533 NULL, /* sd_recv_uio */ 9534 NULL, /* sd_poll */ 9535 udp_shutdown, /* sd_shutdown */ 9536 udp_clr_flowctrl, /* sd_setflowctrl */ 9537 udp_ioctl, /* sd_ioctl */ 9538 udp_close /* sd_close */ 9539 }; 9540