1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/kmem.h> 44 #include <sys/policy.h> 45 #include <sys/ucred.h> 46 #include <sys/zone.h> 47 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/proto_set.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 100 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 101 * We also use conn_lock when updating things that affect the IP classifier 102 * lookup. 103 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 104 * 105 * The fanout lock uf_lock: 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure and a few other fields in the udp_t. A UDP endpoint is removed 113 * from the bind hash list only when it is being unbound or being closed. 114 * The per bucket lock also protects a UDP endpoint's state changes. 115 * 116 * The udp_rwlock: 117 * This protects most of the other fields in the udp_t. The exact list of 118 * fields which are protected by each of the above locks is documented in 119 * the udp_t structure definition. 120 * 121 * Plumbing notes: 122 * UDP is always a device driver. For compatibility with mibopen() code 123 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 124 * dummy module. 125 * 126 * The above implies that we don't support any intermediate module to 127 * reside in between /dev/ip and udp -- in fact, we never supported such 128 * scenario in the past as the inter-layer communication semantics have 129 * always been private. 130 */ 131 132 /* For /etc/system control */ 133 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 134 135 #define NDD_TOO_QUICK_MSG \ 136 "ndd get info rate too high for non-privileged users, try again " \ 137 "later.\n" 138 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 139 140 /* Option processing attrs */ 141 typedef struct udpattrs_s { 142 union { 143 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 144 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 145 } udpattr_ippu; 146 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 147 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 148 mblk_t *udpattr_mb; 149 boolean_t udpattr_credset; 150 } udpattrs_t; 151 152 static void udp_addr_req(queue_t *q, mblk_t *mp); 153 static void udp_tpi_bind(queue_t *q, mblk_t *mp); 154 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 155 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 156 static int udp_build_hdrs(udp_t *udp); 157 static void udp_capability_req(queue_t *q, mblk_t *mp); 158 static int udp_tpi_close(queue_t *q, int flags); 159 static void udp_tpi_connect(queue_t *q, mblk_t *mp); 160 static void udp_tpi_disconnect(queue_t *q, mblk_t *mp); 161 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 162 int sys_error); 163 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 164 t_scalar_t tlierr, int unixerr); 165 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 166 cred_t *cr); 167 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 168 char *value, caddr_t cp, cred_t *cr); 169 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 170 char *value, caddr_t cp, cred_t *cr); 171 static void udp_icmp_error(conn_t *, mblk_t *); 172 static void udp_icmp_error_ipv6(conn_t *, mblk_t *); 173 static void udp_info_req(queue_t *q, mblk_t *mp); 174 static void udp_input(void *, mblk_t *, void *); 175 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 176 t_scalar_t addr_length); 177 static void udp_lrput(queue_t *, mblk_t *); 178 static void udp_lwput(queue_t *, mblk_t *); 179 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 180 cred_t *credp, boolean_t isv6); 181 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 182 cred_t *credp); 183 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 184 cred_t *credp); 185 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 186 int *errorp, udpattrs_t *udpattrs); 187 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 188 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 189 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 190 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 191 cred_t *cr); 192 static void udp_report_item(mblk_t *mp, udp_t *udp); 193 static int udp_rinfop(queue_t *q, infod_t *dp); 194 static int udp_rrw(queue_t *q, struiod_t *dp); 195 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 196 cred_t *cr); 197 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 198 ipha_t *ipha); 199 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 200 t_scalar_t destlen, t_scalar_t err); 201 static void udp_tpi_unbind(queue_t *q, mblk_t *mp); 202 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 203 boolean_t random); 204 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 205 int *, boolean_t, struct nmsghdr *, cred_t *, pid_t); 206 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 207 int *error, struct nmsghdr *msg, cred_t *cr, pid_t pid); 208 static void udp_wput_other(queue_t *q, mblk_t *mp); 209 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 210 static void udp_wput_fallback(queue_t *q, mblk_t *mp); 211 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 212 213 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 214 static void udp_stack_fini(netstackid_t stackid, void *arg); 215 216 static void *udp_kstat_init(netstackid_t stackid); 217 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 218 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 219 static void udp_kstat2_fini(netstackid_t, kstat_t *); 220 static int udp_kstat_update(kstat_t *kp, int rw); 221 222 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 223 uint_t pkt_len); 224 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 225 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 226 227 static int udp_send_connected(conn_t *, mblk_t *, struct nmsghdr *, 228 cred_t *, pid_t); 229 230 /* Common routine for TPI and socket module */ 231 static conn_t *udp_do_open(cred_t *, boolean_t, int); 232 static void udp_do_close(conn_t *); 233 static int udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *, 234 boolean_t); 235 static int udp_do_unbind(conn_t *); 236 static int udp_do_getsockname(udp_t *, struct sockaddr *, uint_t *); 237 static int udp_do_getpeername(udp_t *, struct sockaddr *, uint_t *); 238 239 int udp_getsockname(sock_lower_handle_t, 240 struct sockaddr *, socklen_t *, cred_t *); 241 int udp_getpeername(sock_lower_handle_t, 242 struct sockaddr *, socklen_t *, cred_t *); 243 static int udp_do_connect(conn_t *, const struct sockaddr *, socklen_t); 244 static int udp_post_ip_bind_connect(udp_t *, mblk_t *, int); 245 246 #define UDP_RECV_HIWATER (56 * 1024) 247 #define UDP_RECV_LOWATER 128 248 #define UDP_XMIT_HIWATER (56 * 1024) 249 #define UDP_XMIT_LOWATER 1024 250 251 static struct module_info udp_mod_info = { 252 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 253 }; 254 255 /* 256 * Entry points for UDP as a device. 257 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 258 */ 259 static struct qinit udp_rinitv4 = { 260 NULL, NULL, udp_openv4, udp_tpi_close, NULL, 261 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 262 }; 263 264 static struct qinit udp_rinitv6 = { 265 NULL, NULL, udp_openv6, udp_tpi_close, NULL, 266 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 267 }; 268 269 static struct qinit udp_winit = { 270 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 271 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 272 }; 273 274 /* UDP entry point during fallback */ 275 struct qinit udp_fallback_sock_winit = { 276 (pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info 277 }; 278 279 /* 280 * UDP needs to handle I_LINK and I_PLINK since ifconfig 281 * likes to use it as a place to hang the various streams. 282 */ 283 static struct qinit udp_lrinit = { 284 (pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, 285 &udp_mod_info 286 }; 287 288 static struct qinit udp_lwinit = { 289 (pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, 290 &udp_mod_info 291 }; 292 293 /* For AF_INET aka /dev/udp */ 294 struct streamtab udpinfov4 = { 295 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 296 }; 297 298 /* For AF_INET6 aka /dev/udp6 */ 299 struct streamtab udpinfov6 = { 300 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 301 }; 302 303 static sin_t sin_null; /* Zero address for quick clears */ 304 static sin6_t sin6_null; /* Zero address for quick clears */ 305 306 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 307 308 /* Default structure copied into T_INFO_ACK messages */ 309 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 310 T_INFO_ACK, 311 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 312 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 313 T_INVALID, /* CDATA_size. udp does not support connect data. */ 314 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 315 sizeof (sin_t), /* ADDR_size. */ 316 0, /* OPT_size - not initialized here */ 317 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 318 T_CLTS, /* SERV_type. udp supports connection-less. */ 319 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 320 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 321 }; 322 323 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 324 325 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 326 T_INFO_ACK, 327 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 328 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 329 T_INVALID, /* CDATA_size. udp does not support connect data. */ 330 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 331 sizeof (sin6_t), /* ADDR_size. */ 332 0, /* OPT_size - not initialized here */ 333 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 334 T_CLTS, /* SERV_type. udp supports connection-less. */ 335 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 336 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 337 }; 338 339 /* largest UDP port number */ 340 #define UDP_MAX_PORT 65535 341 342 /* 343 * Table of ND variables supported by udp. These are loaded into us_nd 344 * in udp_open. 345 * All of these are alterable, within the min/max values given, at run time. 346 */ 347 /* BEGIN CSTYLED */ 348 udpparam_t udp_param_arr[] = { 349 /*min max value name */ 350 { 0L, 256, 32, "udp_wroff_extra" }, 351 { 1L, 255, 255, "udp_ipv4_ttl" }, 352 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 353 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 354 { 0, 1, 1, "udp_do_checksum" }, 355 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 356 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 357 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 358 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 359 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 360 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 361 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 362 }; 363 /* END CSTYLED */ 364 365 /* Setable in /etc/system */ 366 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 367 uint32_t udp_random_anon_port = 1; 368 369 /* 370 * Hook functions to enable cluster networking. 371 * On non-clustered systems these vectors must always be NULL 372 */ 373 374 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 375 uint8_t *laddrp, in_port_t lport) = NULL; 376 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 377 uint8_t *laddrp, in_port_t lport) = NULL; 378 379 typedef union T_primitives *t_primp_t; 380 381 /* 382 * Return the next anonymous port in the privileged port range for 383 * bind checking. 384 * 385 * Trusted Extension (TX) notes: TX allows administrator to mark or 386 * reserve ports as Multilevel ports (MLP). MLP has special function 387 * on TX systems. Once a port is made MLP, it's not available as 388 * ordinary port. This creates "holes" in the port name space. It 389 * may be necessary to skip the "holes" find a suitable anon port. 390 */ 391 static in_port_t 392 udp_get_next_priv_port(udp_t *udp) 393 { 394 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 395 in_port_t nextport; 396 boolean_t restart = B_FALSE; 397 udp_stack_t *us = udp->udp_us; 398 399 retry: 400 if (next_priv_port < us->us_min_anonpriv_port || 401 next_priv_port >= IPPORT_RESERVED) { 402 next_priv_port = IPPORT_RESERVED - 1; 403 if (restart) 404 return (0); 405 restart = B_TRUE; 406 } 407 408 if (is_system_labeled() && 409 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 410 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 411 next_priv_port = nextport; 412 goto retry; 413 } 414 415 return (next_priv_port--); 416 } 417 418 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 419 /* ARGSUSED */ 420 static int 421 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 422 { 423 udp_fanout_t *udpf; 424 int i; 425 zoneid_t zoneid; 426 conn_t *connp; 427 udp_t *udp; 428 udp_stack_t *us; 429 430 connp = Q_TO_CONN(q); 431 udp = connp->conn_udp; 432 us = udp->udp_us; 433 434 /* Refer to comments in udp_status_report(). */ 435 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 436 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 437 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 438 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 439 return (0); 440 } 441 } 442 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 443 /* The following may work even if we cannot get a large buf. */ 444 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 445 return (0); 446 } 447 448 (void) mi_mpprintf(mp, 449 "UDP " MI_COL_HDRPAD_STR 450 /* 12345678[89ABCDEF] */ 451 " zone lport src addr dest addr port state"); 452 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 453 454 zoneid = connp->conn_zoneid; 455 456 for (i = 0; i < us->us_bind_fanout_size; i++) { 457 udpf = &us->us_bind_fanout[i]; 458 mutex_enter(&udpf->uf_lock); 459 460 /* Print the hash index. */ 461 udp = udpf->uf_udp; 462 if (zoneid != GLOBAL_ZONEID) { 463 /* skip to first entry in this zone; might be none */ 464 while (udp != NULL && 465 udp->udp_connp->conn_zoneid != zoneid) 466 udp = udp->udp_bind_hash; 467 } 468 if (udp != NULL) { 469 uint_t print_len, buf_len; 470 471 buf_len = mp->b_cont->b_datap->db_lim - 472 mp->b_cont->b_wptr; 473 print_len = snprintf((char *)mp->b_cont->b_wptr, 474 buf_len, "%d\n", i); 475 if (print_len < buf_len) { 476 mp->b_cont->b_wptr += print_len; 477 } else { 478 mp->b_cont->b_wptr += buf_len; 479 } 480 for (; udp != NULL; udp = udp->udp_bind_hash) { 481 if (zoneid == GLOBAL_ZONEID || 482 zoneid == udp->udp_connp->conn_zoneid) 483 udp_report_item(mp->b_cont, udp); 484 } 485 } 486 mutex_exit(&udpf->uf_lock); 487 } 488 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 489 return (0); 490 } 491 492 /* 493 * Hash list removal routine for udp_t structures. 494 */ 495 static void 496 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 497 { 498 udp_t *udpnext; 499 kmutex_t *lockp; 500 udp_stack_t *us = udp->udp_us; 501 502 if (udp->udp_ptpbhn == NULL) 503 return; 504 505 /* 506 * Extract the lock pointer in case there are concurrent 507 * hash_remove's for this instance. 508 */ 509 ASSERT(udp->udp_port != 0); 510 if (!caller_holds_lock) { 511 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 512 us->us_bind_fanout_size)].uf_lock; 513 ASSERT(lockp != NULL); 514 mutex_enter(lockp); 515 } 516 if (udp->udp_ptpbhn != NULL) { 517 udpnext = udp->udp_bind_hash; 518 if (udpnext != NULL) { 519 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 520 udp->udp_bind_hash = NULL; 521 } 522 *udp->udp_ptpbhn = udpnext; 523 udp->udp_ptpbhn = NULL; 524 } 525 if (!caller_holds_lock) { 526 mutex_exit(lockp); 527 } 528 } 529 530 static void 531 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 532 { 533 udp_t **udpp; 534 udp_t *udpnext; 535 536 ASSERT(MUTEX_HELD(&uf->uf_lock)); 537 ASSERT(udp->udp_ptpbhn == NULL); 538 udpp = &uf->uf_udp; 539 udpnext = udpp[0]; 540 if (udpnext != NULL) { 541 /* 542 * If the new udp bound to the INADDR_ANY address 543 * and the first one in the list is not bound to 544 * INADDR_ANY we skip all entries until we find the 545 * first one bound to INADDR_ANY. 546 * This makes sure that applications binding to a 547 * specific address get preference over those binding to 548 * INADDR_ANY. 549 */ 550 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 551 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 552 while ((udpnext = udpp[0]) != NULL && 553 !V6_OR_V4_INADDR_ANY( 554 udpnext->udp_bound_v6src)) { 555 udpp = &(udpnext->udp_bind_hash); 556 } 557 if (udpnext != NULL) 558 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 559 } else { 560 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 561 } 562 } 563 udp->udp_bind_hash = udpnext; 564 udp->udp_ptpbhn = udpp; 565 udpp[0] = udp; 566 } 567 568 /* 569 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 570 * passed to udp_wput. 571 * It associates a port number and local address with the stream. 572 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 573 * protocol type (IPPROTO_UDP) placed in the message following the address. 574 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 575 * (Called as writer.) 576 * 577 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 578 * without setting SO_REUSEADDR. This is needed so that they 579 * can be viewed as two independent transport protocols. 580 * However, anonymouns ports are allocated from the same range to avoid 581 * duplicating the us->us_next_port_to_try. 582 */ 583 static void 584 udp_tpi_bind(queue_t *q, mblk_t *mp) 585 { 586 sin_t *sin; 587 sin6_t *sin6; 588 mblk_t *mp1; 589 struct T_bind_req *tbr; 590 conn_t *connp; 591 udp_t *udp; 592 int error; 593 struct sockaddr *sa; 594 595 connp = Q_TO_CONN(q); 596 udp = connp->conn_udp; 597 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 598 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 599 "udp_bind: bad req, len %u", 600 (uint_t)(mp->b_wptr - mp->b_rptr)); 601 udp_err_ack(q, mp, TPROTO, 0); 602 return; 603 } 604 if (udp->udp_state != TS_UNBND) { 605 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 606 "udp_bind: bad state, %u", udp->udp_state); 607 udp_err_ack(q, mp, TOUTSTATE, 0); 608 return; 609 } 610 /* 611 * Reallocate the message to make sure we have enough room for an 612 * address and the protocol type. 613 */ 614 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 615 if (!mp1) { 616 udp_err_ack(q, mp, TSYSERR, ENOMEM); 617 return; 618 } 619 620 mp = mp1; 621 622 /* Reset the message type in preparation for shipping it back. */ 623 DB_TYPE(mp) = M_PCPROTO; 624 625 tbr = (struct T_bind_req *)mp->b_rptr; 626 switch (tbr->ADDR_length) { 627 case 0: /* Request for a generic port */ 628 tbr->ADDR_offset = sizeof (struct T_bind_req); 629 if (udp->udp_family == AF_INET) { 630 tbr->ADDR_length = sizeof (sin_t); 631 sin = (sin_t *)&tbr[1]; 632 *sin = sin_null; 633 sin->sin_family = AF_INET; 634 mp->b_wptr = (uchar_t *)&sin[1]; 635 sa = (struct sockaddr *)sin; 636 } else { 637 ASSERT(udp->udp_family == AF_INET6); 638 tbr->ADDR_length = sizeof (sin6_t); 639 sin6 = (sin6_t *)&tbr[1]; 640 *sin6 = sin6_null; 641 sin6->sin6_family = AF_INET6; 642 mp->b_wptr = (uchar_t *)&sin6[1]; 643 sa = (struct sockaddr *)sin6; 644 } 645 break; 646 647 case sizeof (sin_t): /* Complete IPv4 address */ 648 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 649 sizeof (sin_t)); 650 if (sa == NULL || !OK_32PTR((char *)sa)) { 651 udp_err_ack(q, mp, TSYSERR, EINVAL); 652 return; 653 } 654 if (udp->udp_family != AF_INET || 655 sa->sa_family != AF_INET) { 656 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 657 return; 658 } 659 break; 660 661 case sizeof (sin6_t): /* complete IPv6 address */ 662 sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset, 663 sizeof (sin6_t)); 664 if (sa == NULL || !OK_32PTR((char *)sa)) { 665 udp_err_ack(q, mp, TSYSERR, EINVAL); 666 return; 667 } 668 if (udp->udp_family != AF_INET6 || 669 sa->sa_family != AF_INET6) { 670 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 671 return; 672 } 673 break; 674 675 default: /* Invalid request */ 676 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 677 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 678 udp_err_ack(q, mp, TBADADDR, 0); 679 return; 680 } 681 682 683 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 684 error = udp_do_bind(connp, sa, tbr->ADDR_length, cr, 685 tbr->PRIM_type != O_T_BIND_REQ); 686 687 if (error != 0) { 688 if (error > 0) { 689 udp_err_ack(q, mp, TSYSERR, error); 690 } else { 691 udp_err_ack(q, mp, -error, 0); 692 } 693 } else { 694 tbr->PRIM_type = T_BIND_ACK; 695 qreply(q, mp); 696 } 697 } 698 699 /* 700 * This routine handles each T_CONN_REQ message passed to udp. It 701 * associates a default destination address with the stream. 702 * 703 * This routine sends down a T_BIND_REQ to IP with the following mblks: 704 * T_BIND_REQ - specifying local and remote address/port 705 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 706 * T_OK_ACK - for the T_CONN_REQ 707 * T_CONN_CON - to keep the TPI user happy 708 * 709 * The connect completes in udp_do_connect. 710 * When a T_BIND_ACK is received information is extracted from the IRE 711 * and the two appended messages are sent to the TPI user. 712 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 713 * convert it to an error ack for the appropriate primitive. 714 */ 715 static void 716 udp_tpi_connect(queue_t *q, mblk_t *mp) 717 { 718 mblk_t *mp1; 719 udp_t *udp; 720 conn_t *connp = Q_TO_CONN(q); 721 int error; 722 socklen_t len; 723 struct sockaddr *sa; 724 struct T_conn_req *tcr; 725 726 udp = connp->conn_udp; 727 tcr = (struct T_conn_req *)mp->b_rptr; 728 729 /* A bit of sanity checking */ 730 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 731 udp_err_ack(q, mp, TPROTO, 0); 732 return; 733 } 734 735 if (tcr->OPT_length != 0) { 736 udp_err_ack(q, mp, TBADOPT, 0); 737 return; 738 } 739 740 /* 741 * Determine packet type based on type of address passed in 742 * the request should contain an IPv4 or IPv6 address. 743 * Make sure that address family matches the type of 744 * family of the the address passed down 745 */ 746 len = tcr->DEST_length; 747 switch (tcr->DEST_length) { 748 default: 749 udp_err_ack(q, mp, TBADADDR, 0); 750 return; 751 752 case sizeof (sin_t): 753 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 754 sizeof (sin_t)); 755 break; 756 757 case sizeof (sin6_t): 758 sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset, 759 sizeof (sin6_t)); 760 break; 761 } 762 763 error = proto_verify_ip_addr(udp->udp_family, sa, len); 764 if (error != 0) { 765 udp_err_ack(q, mp, TSYSERR, error); 766 return; 767 } 768 769 /* 770 * We have to send a connection confirmation to 771 * keep TLI happy. 772 */ 773 if (udp->udp_family == AF_INET) { 774 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 775 sizeof (sin_t), NULL, 0); 776 } else { 777 mp1 = mi_tpi_conn_con(NULL, (char *)sa, 778 sizeof (sin6_t), NULL, 0); 779 } 780 if (mp1 == NULL) { 781 udp_err_ack(q, mp, TSYSERR, ENOMEM); 782 return; 783 } 784 785 /* 786 * ok_ack for T_CONN_REQ 787 */ 788 mp = mi_tpi_ok_ack_alloc(mp); 789 if (mp == NULL) { 790 /* Unable to reuse the T_CONN_REQ for the ack. */ 791 freemsg(mp1); 792 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 793 return; 794 } 795 796 error = udp_do_connect(connp, sa, len); 797 if (error != 0) { 798 freeb(mp1); 799 if (error < 0) 800 udp_err_ack(q, mp, -error, 0); 801 else 802 udp_err_ack(q, mp, TSYSERR, error); 803 } else { 804 putnext(connp->conn_rq, mp); 805 putnext(connp->conn_rq, mp1); 806 } 807 } 808 809 static int 810 udp_tpi_close(queue_t *q, int flags) 811 { 812 conn_t *connp; 813 814 if (flags & SO_FALLBACK) { 815 /* 816 * stream is being closed while in fallback 817 * simply free the resources that were allocated 818 */ 819 inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr)); 820 qprocsoff(q); 821 goto done; 822 } 823 824 connp = Q_TO_CONN(q); 825 udp_do_close(connp); 826 done: 827 q->q_ptr = WR(q)->q_ptr = NULL; 828 return (0); 829 } 830 831 /* 832 * Called in the close path to quiesce the conn 833 */ 834 void 835 udp_quiesce_conn(conn_t *connp) 836 { 837 udp_t *udp = connp->conn_udp; 838 839 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 840 /* 841 * Running in cluster mode - register unbind information 842 */ 843 if (udp->udp_ipversion == IPV4_VERSION) { 844 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 845 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 846 (in_port_t)udp->udp_port); 847 } else { 848 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 849 (uint8_t *)(&(udp->udp_v6src)), 850 (in_port_t)udp->udp_port); 851 } 852 } 853 854 udp_bind_hash_remove(udp, B_FALSE); 855 856 } 857 858 void 859 udp_close_free(conn_t *connp) 860 { 861 udp_t *udp = connp->conn_udp; 862 863 /* If there are any options associated with the stream, free them. */ 864 if (udp->udp_ip_snd_options != NULL) { 865 mi_free((char *)udp->udp_ip_snd_options); 866 udp->udp_ip_snd_options = NULL; 867 udp->udp_ip_snd_options_len = 0; 868 } 869 870 if (udp->udp_ip_rcv_options != NULL) { 871 mi_free((char *)udp->udp_ip_rcv_options); 872 udp->udp_ip_rcv_options = NULL; 873 udp->udp_ip_rcv_options_len = 0; 874 } 875 876 /* Free memory associated with sticky options */ 877 if (udp->udp_sticky_hdrs_len != 0) { 878 kmem_free(udp->udp_sticky_hdrs, 879 udp->udp_sticky_hdrs_len); 880 udp->udp_sticky_hdrs = NULL; 881 udp->udp_sticky_hdrs_len = 0; 882 } 883 884 ip6_pkt_free(&udp->udp_sticky_ipp); 885 886 /* 887 * Clear any fields which the kmem_cache constructor clears. 888 * Only udp_connp needs to be preserved. 889 * TBD: We should make this more efficient to avoid clearing 890 * everything. 891 */ 892 ASSERT(udp->udp_connp == connp); 893 bzero(udp, sizeof (udp_t)); 894 udp->udp_connp = connp; 895 } 896 897 static int 898 udp_do_disconnect(conn_t *connp) 899 { 900 udp_t *udp; 901 mblk_t *ire_mp; 902 udp_fanout_t *udpf; 903 udp_stack_t *us; 904 int error; 905 906 udp = connp->conn_udp; 907 us = udp->udp_us; 908 rw_enter(&udp->udp_rwlock, RW_WRITER); 909 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 910 rw_exit(&udp->udp_rwlock); 911 return (-TOUTSTATE); 912 } 913 udp->udp_pending_op = T_DISCON_REQ; 914 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 915 us->us_bind_fanout_size)]; 916 mutex_enter(&udpf->uf_lock); 917 udp->udp_v6src = udp->udp_bound_v6src; 918 udp->udp_state = TS_IDLE; 919 mutex_exit(&udpf->uf_lock); 920 921 if (udp->udp_family == AF_INET6) { 922 /* Rebuild the header template */ 923 error = udp_build_hdrs(udp); 924 if (error != 0) { 925 udp->udp_pending_op = -1; 926 rw_exit(&udp->udp_rwlock); 927 return (error); 928 } 929 } 930 931 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 932 if (ire_mp == NULL) { 933 mutex_enter(&udpf->uf_lock); 934 udp->udp_pending_op = -1; 935 mutex_exit(&udpf->uf_lock); 936 rw_exit(&udp->udp_rwlock); 937 return (ENOMEM); 938 } 939 940 rw_exit(&udp->udp_rwlock); 941 942 if (udp->udp_family == AF_INET6) { 943 error = ip_proto_bind_laddr_v6(connp, &ire_mp, IPPROTO_UDP, 944 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 945 } else { 946 error = ip_proto_bind_laddr_v4(connp, &ire_mp, IPPROTO_UDP, 947 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, B_TRUE); 948 } 949 950 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 951 } 952 953 954 static void 955 udp_tpi_disconnect(queue_t *q, mblk_t *mp) 956 { 957 conn_t *connp = Q_TO_CONN(q); 958 int error; 959 960 /* 961 * Allocate the largest primitive we need to send back 962 * T_error_ack is > than T_ok_ack 963 */ 964 mp = reallocb(mp, sizeof (struct T_error_ack), 1); 965 if (mp == NULL) { 966 /* Unable to reuse the T_DISCON_REQ for the ack. */ 967 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM); 968 return; 969 } 970 971 error = udp_do_disconnect(connp); 972 973 if (error != 0) { 974 if (error < 0) { 975 udp_err_ack(q, mp, -error, 0); 976 } else { 977 udp_err_ack(q, mp, TSYSERR, error); 978 } 979 } else { 980 mp = mi_tpi_ok_ack_alloc(mp); 981 ASSERT(mp != NULL); 982 qreply(q, mp); 983 } 984 } 985 986 int 987 udp_disconnect(conn_t *connp) 988 { 989 int error; 990 udp_t *udp = connp->conn_udp; 991 992 udp->udp_dgram_errind = B_FALSE; 993 994 error = udp_do_disconnect(connp); 995 996 if (error < 0) 997 error = proto_tlitosyserr(-error); 998 999 return (error); 1000 } 1001 1002 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1003 static void 1004 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1005 { 1006 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1007 qreply(q, mp); 1008 } 1009 1010 /* Shorthand to generate and send TPI error acks to our client */ 1011 static void 1012 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1013 int sys_error) 1014 { 1015 struct T_error_ack *teackp; 1016 1017 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1018 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1019 teackp = (struct T_error_ack *)mp->b_rptr; 1020 teackp->ERROR_prim = primitive; 1021 teackp->TLI_error = t_error; 1022 teackp->UNIX_error = sys_error; 1023 qreply(q, mp); 1024 } 1025 } 1026 1027 /*ARGSUSED*/ 1028 static int 1029 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1030 { 1031 int i; 1032 udp_t *udp = Q_TO_UDP(q); 1033 udp_stack_t *us = udp->udp_us; 1034 1035 for (i = 0; i < us->us_num_epriv_ports; i++) { 1036 if (us->us_epriv_ports[i] != 0) 1037 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1038 } 1039 return (0); 1040 } 1041 1042 /* ARGSUSED */ 1043 static int 1044 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1045 cred_t *cr) 1046 { 1047 long new_value; 1048 int i; 1049 udp_t *udp = Q_TO_UDP(q); 1050 udp_stack_t *us = udp->udp_us; 1051 1052 /* 1053 * Fail the request if the new value does not lie within the 1054 * port number limits. 1055 */ 1056 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1057 new_value <= 0 || new_value >= 65536) { 1058 return (EINVAL); 1059 } 1060 1061 /* Check if the value is already in the list */ 1062 for (i = 0; i < us->us_num_epriv_ports; i++) { 1063 if (new_value == us->us_epriv_ports[i]) { 1064 return (EEXIST); 1065 } 1066 } 1067 /* Find an empty slot */ 1068 for (i = 0; i < us->us_num_epriv_ports; i++) { 1069 if (us->us_epriv_ports[i] == 0) 1070 break; 1071 } 1072 if (i == us->us_num_epriv_ports) { 1073 return (EOVERFLOW); 1074 } 1075 1076 /* Set the new value */ 1077 us->us_epriv_ports[i] = (in_port_t)new_value; 1078 return (0); 1079 } 1080 1081 /* ARGSUSED */ 1082 static int 1083 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1084 cred_t *cr) 1085 { 1086 long new_value; 1087 int i; 1088 udp_t *udp = Q_TO_UDP(q); 1089 udp_stack_t *us = udp->udp_us; 1090 1091 /* 1092 * Fail the request if the new value does not lie within the 1093 * port number limits. 1094 */ 1095 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1096 new_value <= 0 || new_value >= 65536) { 1097 return (EINVAL); 1098 } 1099 1100 /* Check that the value is already in the list */ 1101 for (i = 0; i < us->us_num_epriv_ports; i++) { 1102 if (us->us_epriv_ports[i] == new_value) 1103 break; 1104 } 1105 if (i == us->us_num_epriv_ports) { 1106 return (ESRCH); 1107 } 1108 1109 /* Clear the value */ 1110 us->us_epriv_ports[i] = 0; 1111 return (0); 1112 } 1113 1114 /* At minimum we need 4 bytes of UDP header */ 1115 #define ICMP_MIN_UDP_HDR 4 1116 1117 /* 1118 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1119 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1120 * Assumes that IP has pulled up everything up to and including the ICMP header. 1121 */ 1122 static void 1123 udp_icmp_error(conn_t *connp, mblk_t *mp) 1124 { 1125 icmph_t *icmph; 1126 ipha_t *ipha; 1127 int iph_hdr_length; 1128 udpha_t *udpha; 1129 sin_t sin; 1130 sin6_t sin6; 1131 mblk_t *mp1; 1132 int error = 0; 1133 udp_t *udp = connp->conn_udp; 1134 1135 mp1 = NULL; 1136 ipha = (ipha_t *)mp->b_rptr; 1137 1138 ASSERT(OK_32PTR(mp->b_rptr)); 1139 1140 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1141 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1142 udp_icmp_error_ipv6(connp, mp); 1143 return; 1144 } 1145 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1146 1147 /* Skip past the outer IP and ICMP headers */ 1148 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1149 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1150 ipha = (ipha_t *)&icmph[1]; 1151 1152 /* Skip past the inner IP and find the ULP header */ 1153 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1154 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1155 1156 switch (icmph->icmph_type) { 1157 case ICMP_DEST_UNREACHABLE: 1158 switch (icmph->icmph_code) { 1159 case ICMP_FRAGMENTATION_NEEDED: 1160 /* 1161 * IP has already adjusted the path MTU. 1162 */ 1163 break; 1164 case ICMP_PORT_UNREACHABLE: 1165 case ICMP_PROTOCOL_UNREACHABLE: 1166 error = ECONNREFUSED; 1167 break; 1168 default: 1169 /* Transient errors */ 1170 break; 1171 } 1172 break; 1173 default: 1174 /* Transient errors */ 1175 break; 1176 } 1177 if (error == 0) { 1178 freemsg(mp); 1179 return; 1180 } 1181 1182 /* 1183 * Deliver T_UDERROR_IND when the application has asked for it. 1184 * The socket layer enables this automatically when connected. 1185 */ 1186 if (!udp->udp_dgram_errind) { 1187 freemsg(mp); 1188 return; 1189 } 1190 1191 1192 switch (udp->udp_family) { 1193 case AF_INET: 1194 sin = sin_null; 1195 sin.sin_family = AF_INET; 1196 sin.sin_addr.s_addr = ipha->ipha_dst; 1197 sin.sin_port = udpha->uha_dst_port; 1198 if (IPCL_IS_NONSTR(connp)) { 1199 rw_enter(&udp->udp_rwlock, RW_WRITER); 1200 if (udp->udp_state == TS_DATA_XFER) { 1201 if (sin.sin_port == udp->udp_dstport && 1202 sin.sin_addr.s_addr == 1203 V4_PART_OF_V6(udp->udp_v6dst)) { 1204 1205 rw_exit(&udp->udp_rwlock); 1206 (*connp->conn_upcalls->su_set_error) 1207 (connp->conn_upper_handle, error); 1208 goto done; 1209 } 1210 } else { 1211 udp->udp_delayed_error = error; 1212 *((sin_t *)&udp->udp_delayed_addr) = sin; 1213 } 1214 rw_exit(&udp->udp_rwlock); 1215 } else { 1216 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), 1217 NULL, 0, error); 1218 } 1219 break; 1220 case AF_INET6: 1221 sin6 = sin6_null; 1222 sin6.sin6_family = AF_INET6; 1223 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1224 sin6.sin6_port = udpha->uha_dst_port; 1225 if (IPCL_IS_NONSTR(connp)) { 1226 rw_enter(&udp->udp_rwlock, RW_WRITER); 1227 if (udp->udp_state == TS_DATA_XFER) { 1228 if (sin6.sin6_port == udp->udp_dstport && 1229 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1230 &udp->udp_v6dst)) { 1231 rw_exit(&udp->udp_rwlock); 1232 (*connp->conn_upcalls->su_set_error) 1233 (connp->conn_upper_handle, error); 1234 goto done; 1235 } 1236 } else { 1237 udp->udp_delayed_error = error; 1238 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1239 } 1240 rw_exit(&udp->udp_rwlock); 1241 } else { 1242 1243 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1244 NULL, 0, error); 1245 } 1246 break; 1247 } 1248 if (mp1 != NULL) 1249 putnext(connp->conn_rq, mp1); 1250 done: 1251 freemsg(mp); 1252 } 1253 1254 /* 1255 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1256 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1257 * Assumes that IP has pulled up all the extension headers as well as the 1258 * ICMPv6 header. 1259 */ 1260 static void 1261 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp) 1262 { 1263 icmp6_t *icmp6; 1264 ip6_t *ip6h, *outer_ip6h; 1265 uint16_t iph_hdr_length; 1266 uint8_t *nexthdrp; 1267 udpha_t *udpha; 1268 sin6_t sin6; 1269 mblk_t *mp1; 1270 int error = 0; 1271 udp_t *udp = connp->conn_udp; 1272 udp_stack_t *us = udp->udp_us; 1273 1274 outer_ip6h = (ip6_t *)mp->b_rptr; 1275 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1276 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1277 else 1278 iph_hdr_length = IPV6_HDR_LEN; 1279 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1280 ip6h = (ip6_t *)&icmp6[1]; 1281 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1282 freemsg(mp); 1283 return; 1284 } 1285 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1286 1287 switch (icmp6->icmp6_type) { 1288 case ICMP6_DST_UNREACH: 1289 switch (icmp6->icmp6_code) { 1290 case ICMP6_DST_UNREACH_NOPORT: 1291 error = ECONNREFUSED; 1292 break; 1293 case ICMP6_DST_UNREACH_ADMIN: 1294 case ICMP6_DST_UNREACH_NOROUTE: 1295 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1296 case ICMP6_DST_UNREACH_ADDR: 1297 /* Transient errors */ 1298 break; 1299 default: 1300 break; 1301 } 1302 break; 1303 case ICMP6_PACKET_TOO_BIG: { 1304 struct T_unitdata_ind *tudi; 1305 struct T_opthdr *toh; 1306 size_t udi_size; 1307 mblk_t *newmp; 1308 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1309 sizeof (struct ip6_mtuinfo); 1310 sin6_t *sin6; 1311 struct ip6_mtuinfo *mtuinfo; 1312 1313 /* 1314 * If the application has requested to receive path mtu 1315 * information, send up an empty message containing an 1316 * IPV6_PATHMTU ancillary data item. 1317 */ 1318 if (!udp->udp_ipv6_recvpathmtu) 1319 break; 1320 1321 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1322 opt_length; 1323 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1324 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1325 break; 1326 } 1327 1328 /* 1329 * newmp->b_cont is left to NULL on purpose. This is an 1330 * empty message containing only ancillary data. 1331 */ 1332 newmp->b_datap->db_type = M_PROTO; 1333 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1334 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1335 tudi->PRIM_type = T_UNITDATA_IND; 1336 tudi->SRC_length = sizeof (sin6_t); 1337 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1338 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1339 tudi->OPT_length = opt_length; 1340 1341 sin6 = (sin6_t *)&tudi[1]; 1342 bzero(sin6, sizeof (sin6_t)); 1343 sin6->sin6_family = AF_INET6; 1344 sin6->sin6_addr = udp->udp_v6dst; 1345 1346 toh = (struct T_opthdr *)&sin6[1]; 1347 toh->level = IPPROTO_IPV6; 1348 toh->name = IPV6_PATHMTU; 1349 toh->len = opt_length; 1350 toh->status = 0; 1351 1352 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1353 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1354 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1355 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1356 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1357 /* 1358 * We've consumed everything we need from the original 1359 * message. Free it, then send our empty message. 1360 */ 1361 freemsg(mp); 1362 if (!IPCL_IS_NONSTR(connp)) { 1363 putnext(connp->conn_rq, newmp); 1364 } else { 1365 (*connp->conn_upcalls->su_recv) 1366 (connp->conn_upper_handle, newmp, 0, 0, &error, 1367 NULL); 1368 } 1369 return; 1370 } 1371 case ICMP6_TIME_EXCEEDED: 1372 /* Transient errors */ 1373 break; 1374 case ICMP6_PARAM_PROB: 1375 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1376 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1377 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1378 (uchar_t *)nexthdrp) { 1379 error = ECONNREFUSED; 1380 break; 1381 } 1382 break; 1383 } 1384 if (error == 0) { 1385 freemsg(mp); 1386 return; 1387 } 1388 1389 /* 1390 * Deliver T_UDERROR_IND when the application has asked for it. 1391 * The socket layer enables this automatically when connected. 1392 */ 1393 if (!udp->udp_dgram_errind) { 1394 freemsg(mp); 1395 return; 1396 } 1397 1398 sin6 = sin6_null; 1399 sin6.sin6_family = AF_INET6; 1400 sin6.sin6_addr = ip6h->ip6_dst; 1401 sin6.sin6_port = udpha->uha_dst_port; 1402 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 1403 1404 if (IPCL_IS_NONSTR(connp)) { 1405 rw_enter(&udp->udp_rwlock, RW_WRITER); 1406 if (udp->udp_state == TS_DATA_XFER) { 1407 if (sin6.sin6_port == udp->udp_dstport && 1408 IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr, 1409 &udp->udp_v6dst)) { 1410 rw_exit(&udp->udp_rwlock); 1411 (*connp->conn_upcalls->su_set_error) 1412 (connp->conn_upper_handle, error); 1413 goto done; 1414 } 1415 } else { 1416 udp->udp_delayed_error = error; 1417 *((sin6_t *)&udp->udp_delayed_addr) = sin6; 1418 } 1419 rw_exit(&udp->udp_rwlock); 1420 } else { 1421 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1422 NULL, 0, error); 1423 if (mp1 != NULL) 1424 putnext(connp->conn_rq, mp1); 1425 } 1426 1427 done: 1428 freemsg(mp); 1429 } 1430 1431 /* 1432 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 1433 * The local address is filled in if endpoint is bound. The remote address 1434 * is filled in if remote address has been precified ("connected endpoint") 1435 * (The concept of connected CLTS sockets is alien to published TPI 1436 * but we support it anyway). 1437 */ 1438 static void 1439 udp_addr_req(queue_t *q, mblk_t *mp) 1440 { 1441 sin_t *sin; 1442 sin6_t *sin6; 1443 mblk_t *ackmp; 1444 struct T_addr_ack *taa; 1445 udp_t *udp = Q_TO_UDP(q); 1446 1447 /* Make it large enough for worst case */ 1448 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 1449 2 * sizeof (sin6_t), 1); 1450 if (ackmp == NULL) { 1451 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1452 return; 1453 } 1454 taa = (struct T_addr_ack *)ackmp->b_rptr; 1455 1456 bzero(taa, sizeof (struct T_addr_ack)); 1457 ackmp->b_wptr = (uchar_t *)&taa[1]; 1458 1459 taa->PRIM_type = T_ADDR_ACK; 1460 ackmp->b_datap->db_type = M_PCPROTO; 1461 rw_enter(&udp->udp_rwlock, RW_READER); 1462 /* 1463 * Note: Following code assumes 32 bit alignment of basic 1464 * data structures like sin_t and struct T_addr_ack. 1465 */ 1466 if (udp->udp_state != TS_UNBND) { 1467 /* 1468 * Fill in local address first 1469 */ 1470 taa->LOCADDR_offset = sizeof (*taa); 1471 if (udp->udp_family == AF_INET) { 1472 taa->LOCADDR_length = sizeof (sin_t); 1473 sin = (sin_t *)&taa[1]; 1474 /* Fill zeroes and then initialize non-zero fields */ 1475 *sin = sin_null; 1476 sin->sin_family = AF_INET; 1477 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 1478 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1479 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 1480 sin->sin_addr.s_addr); 1481 } else { 1482 /* 1483 * INADDR_ANY 1484 * udp_v6src is not set, we might be bound to 1485 * broadcast/multicast. Use udp_bound_v6src as 1486 * local address instead (that could 1487 * also still be INADDR_ANY) 1488 */ 1489 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 1490 sin->sin_addr.s_addr); 1491 } 1492 sin->sin_port = udp->udp_port; 1493 ackmp->b_wptr = (uchar_t *)&sin[1]; 1494 if (udp->udp_state == TS_DATA_XFER) { 1495 /* 1496 * connected, fill remote address too 1497 */ 1498 taa->REMADDR_length = sizeof (sin_t); 1499 /* assumed 32-bit alignment */ 1500 taa->REMADDR_offset = taa->LOCADDR_offset + 1501 taa->LOCADDR_length; 1502 1503 sin = (sin_t *)(ackmp->b_rptr + 1504 taa->REMADDR_offset); 1505 /* initialize */ 1506 *sin = sin_null; 1507 sin->sin_family = AF_INET; 1508 sin->sin_addr.s_addr = 1509 V4_PART_OF_V6(udp->udp_v6dst); 1510 sin->sin_port = udp->udp_dstport; 1511 ackmp->b_wptr = (uchar_t *)&sin[1]; 1512 } 1513 } else { 1514 taa->LOCADDR_length = sizeof (sin6_t); 1515 sin6 = (sin6_t *)&taa[1]; 1516 /* Fill zeroes and then initialize non-zero fields */ 1517 *sin6 = sin6_null; 1518 sin6->sin6_family = AF_INET6; 1519 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 1520 sin6->sin6_addr = udp->udp_v6src; 1521 } else { 1522 /* 1523 * UNSPECIFIED 1524 * udp_v6src is not set, we might be bound to 1525 * broadcast/multicast. Use udp_bound_v6src as 1526 * local address instead (that could 1527 * also still be UNSPECIFIED) 1528 */ 1529 sin6->sin6_addr = 1530 udp->udp_bound_v6src; 1531 } 1532 sin6->sin6_port = udp->udp_port; 1533 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1534 if (udp->udp_state == TS_DATA_XFER) { 1535 /* 1536 * connected, fill remote address too 1537 */ 1538 taa->REMADDR_length = sizeof (sin6_t); 1539 /* assumed 32-bit alignment */ 1540 taa->REMADDR_offset = taa->LOCADDR_offset + 1541 taa->LOCADDR_length; 1542 1543 sin6 = (sin6_t *)(ackmp->b_rptr + 1544 taa->REMADDR_offset); 1545 /* initialize */ 1546 *sin6 = sin6_null; 1547 sin6->sin6_family = AF_INET6; 1548 sin6->sin6_addr = udp->udp_v6dst; 1549 sin6->sin6_port = udp->udp_dstport; 1550 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1551 } 1552 ackmp->b_wptr = (uchar_t *)&sin6[1]; 1553 } 1554 } 1555 rw_exit(&udp->udp_rwlock); 1556 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 1557 qreply(q, ackmp); 1558 } 1559 1560 static void 1561 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 1562 { 1563 if (udp->udp_family == AF_INET) { 1564 *tap = udp_g_t_info_ack_ipv4; 1565 } else { 1566 *tap = udp_g_t_info_ack_ipv6; 1567 } 1568 tap->CURRENT_state = udp->udp_state; 1569 tap->OPT_size = udp_max_optsize; 1570 } 1571 1572 static void 1573 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap, 1574 t_uscalar_t cap_bits1) 1575 { 1576 tcap->CAP_bits1 = 0; 1577 1578 if (cap_bits1 & TC1_INFO) { 1579 udp_copy_info(&tcap->INFO_ack, udp); 1580 tcap->CAP_bits1 |= TC1_INFO; 1581 } 1582 } 1583 1584 /* 1585 * This routine responds to T_CAPABILITY_REQ messages. It is called by 1586 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 1587 * udp_g_t_info_ack. The current state of the stream is copied from 1588 * udp_state. 1589 */ 1590 static void 1591 udp_capability_req(queue_t *q, mblk_t *mp) 1592 { 1593 t_uscalar_t cap_bits1; 1594 struct T_capability_ack *tcap; 1595 udp_t *udp = Q_TO_UDP(q); 1596 1597 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 1598 1599 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 1600 mp->b_datap->db_type, T_CAPABILITY_ACK); 1601 if (!mp) 1602 return; 1603 1604 tcap = (struct T_capability_ack *)mp->b_rptr; 1605 udp_do_capability_ack(udp, tcap, cap_bits1); 1606 1607 qreply(q, mp); 1608 } 1609 1610 /* 1611 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 1612 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 1613 * The current state of the stream is copied from udp_state. 1614 */ 1615 static void 1616 udp_info_req(queue_t *q, mblk_t *mp) 1617 { 1618 udp_t *udp = Q_TO_UDP(q); 1619 1620 /* Create a T_INFO_ACK message. */ 1621 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 1622 T_INFO_ACK); 1623 if (!mp) 1624 return; 1625 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 1626 qreply(q, mp); 1627 } 1628 1629 /* 1630 * IP recognizes seven kinds of bind requests: 1631 * 1632 * - A zero-length address binds only to the protocol number. 1633 * 1634 * - A 4-byte address is treated as a request to 1635 * validate that the address is a valid local IPv4 1636 * address, appropriate for an application to bind to. 1637 * IP does the verification, but does not make any note 1638 * of the address at this time. 1639 * 1640 * - A 16-byte address contains is treated as a request 1641 * to validate a local IPv6 address, as the 4-byte 1642 * address case above. 1643 * 1644 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 1645 * use it for the inbound fanout of packets. 1646 * 1647 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 1648 * use it for the inbound fanout of packets. 1649 * 1650 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 1651 * information consisting of local and remote addresses 1652 * and ports. In this case, the addresses are both 1653 * validated as appropriate for this operation, and, if 1654 * so, the information is retained for use in the 1655 * inbound fanout. 1656 * 1657 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 1658 * fanout information, like the 12-byte case above. 1659 * 1660 * IP will also fill in the IRE request mblk with information 1661 * regarding our peer. In all cases, we notify IP of our protocol 1662 * type by appending a single protocol byte to the bind request. 1663 */ 1664 static mblk_t * 1665 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 1666 { 1667 char *cp; 1668 mblk_t *mp; 1669 struct T_bind_req *tbr; 1670 ipa_conn_t *ac; 1671 ipa6_conn_t *ac6; 1672 sin_t *sin; 1673 sin6_t *sin6; 1674 1675 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 1676 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 1677 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 1678 if (!mp) 1679 return (mp); 1680 mp->b_datap->db_type = M_PROTO; 1681 tbr = (struct T_bind_req *)mp->b_rptr; 1682 tbr->PRIM_type = bind_prim; 1683 tbr->ADDR_offset = sizeof (*tbr); 1684 tbr->CONIND_number = 0; 1685 tbr->ADDR_length = addr_length; 1686 cp = (char *)&tbr[1]; 1687 switch (addr_length) { 1688 case sizeof (ipa_conn_t): 1689 ASSERT(udp->udp_family == AF_INET); 1690 /* Append a request for an IRE */ 1691 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1692 if (!mp->b_cont) { 1693 freemsg(mp); 1694 return (NULL); 1695 } 1696 mp->b_cont->b_wptr += sizeof (ire_t); 1697 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1698 1699 /* cp known to be 32 bit aligned */ 1700 ac = (ipa_conn_t *)cp; 1701 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 1702 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 1703 ac->ac_fport = udp->udp_dstport; 1704 ac->ac_lport = udp->udp_port; 1705 break; 1706 1707 case sizeof (ipa6_conn_t): 1708 ASSERT(udp->udp_family == AF_INET6); 1709 /* Append a request for an IRE */ 1710 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1711 if (!mp->b_cont) { 1712 freemsg(mp); 1713 return (NULL); 1714 } 1715 mp->b_cont->b_wptr += sizeof (ire_t); 1716 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1717 1718 /* cp known to be 32 bit aligned */ 1719 ac6 = (ipa6_conn_t *)cp; 1720 ac6->ac6_laddr = udp->udp_v6src; 1721 ac6->ac6_faddr = udp->udp_v6dst; 1722 ac6->ac6_fport = udp->udp_dstport; 1723 ac6->ac6_lport = udp->udp_port; 1724 break; 1725 1726 case sizeof (sin_t): 1727 ASSERT(udp->udp_family == AF_INET); 1728 /* Append a request for an IRE */ 1729 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1730 if (!mp->b_cont) { 1731 freemsg(mp); 1732 return (NULL); 1733 } 1734 mp->b_cont->b_wptr += sizeof (ire_t); 1735 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1736 1737 sin = (sin_t *)cp; 1738 *sin = sin_null; 1739 sin->sin_family = AF_INET; 1740 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 1741 sin->sin_port = udp->udp_port; 1742 break; 1743 1744 case sizeof (sin6_t): 1745 ASSERT(udp->udp_family == AF_INET6); 1746 /* Append a request for an IRE */ 1747 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1748 if (!mp->b_cont) { 1749 freemsg(mp); 1750 return (NULL); 1751 } 1752 mp->b_cont->b_wptr += sizeof (ire_t); 1753 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1754 1755 sin6 = (sin6_t *)cp; 1756 *sin6 = sin6_null; 1757 sin6->sin6_family = AF_INET6; 1758 sin6->sin6_addr = udp->udp_bound_v6src; 1759 sin6->sin6_port = udp->udp_port; 1760 break; 1761 } 1762 /* Add protocol number to end */ 1763 cp[addr_length] = (char)IPPROTO_UDP; 1764 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 1765 return (mp); 1766 } 1767 1768 /* For /dev/udp aka AF_INET open */ 1769 static int 1770 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1771 { 1772 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 1773 } 1774 1775 /* For /dev/udp6 aka AF_INET6 open */ 1776 static int 1777 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 1778 { 1779 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 1780 } 1781 1782 /* 1783 * This is the open routine for udp. It allocates a udp_t structure for 1784 * the stream and, on the first open of the module, creates an ND table. 1785 */ 1786 /*ARGSUSED2*/ 1787 static int 1788 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 1789 boolean_t isv6) 1790 { 1791 int error; 1792 udp_t *udp; 1793 conn_t *connp; 1794 dev_t conn_dev; 1795 udp_stack_t *us; 1796 vmem_t *minor_arena; 1797 1798 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 1799 1800 /* If the stream is already open, return immediately. */ 1801 if (q->q_ptr != NULL) 1802 return (0); 1803 1804 if (sflag == MODOPEN) 1805 return (EINVAL); 1806 1807 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 1808 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 1809 minor_arena = ip_minor_arena_la; 1810 } else { 1811 /* 1812 * Either minor numbers in the large arena were exhausted 1813 * or a non socket application is doing the open. 1814 * Try to allocate from the small arena. 1815 */ 1816 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) 1817 return (EBUSY); 1818 1819 minor_arena = ip_minor_arena_sa; 1820 } 1821 1822 if (flag & SO_FALLBACK) { 1823 /* 1824 * Non streams socket needs a stream to fallback to 1825 */ 1826 RD(q)->q_ptr = (void *)conn_dev; 1827 WR(q)->q_qinfo = &udp_fallback_sock_winit; 1828 WR(q)->q_ptr = (void *)minor_arena; 1829 qprocson(q); 1830 return (0); 1831 } 1832 1833 connp = udp_do_open(credp, isv6, KM_SLEEP); 1834 if (connp == NULL) { 1835 inet_minor_free(minor_arena, conn_dev); 1836 return (ENOMEM); 1837 } 1838 udp = connp->conn_udp; 1839 us = udp->udp_us; 1840 1841 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 1842 connp->conn_dev = conn_dev; 1843 connp->conn_minor_arena = minor_arena; 1844 1845 /* 1846 * Initialize the udp_t structure for this stream. 1847 */ 1848 q->q_ptr = connp; 1849 WR(q)->q_ptr = connp; 1850 connp->conn_rq = q; 1851 connp->conn_wq = WR(q); 1852 1853 rw_enter(&udp->udp_rwlock, RW_WRITER); 1854 ASSERT(connp->conn_ulp == IPPROTO_UDP); 1855 ASSERT(connp->conn_udp == udp); 1856 ASSERT(udp->udp_connp == connp); 1857 1858 if (flag & SO_SOCKSTR) { 1859 connp->conn_flags |= IPCL_SOCKET; 1860 udp->udp_issocket = B_TRUE; 1861 udp->udp_direct_sockfs = B_TRUE; 1862 } 1863 1864 q->q_hiwat = us->us_recv_hiwat; 1865 WR(q)->q_hiwat = us->us_xmit_hiwat; 1866 WR(q)->q_lowat = us->us_xmit_lowat; 1867 1868 qprocson(q); 1869 1870 if (udp->udp_family == AF_INET6) { 1871 /* Build initial header template for transmit */ 1872 if ((error = udp_build_hdrs(udp)) != 0) { 1873 rw_exit(&udp->udp_rwlock); 1874 qprocsoff(q); 1875 inet_minor_free(minor_arena, conn_dev); 1876 ipcl_conn_destroy(connp); 1877 return (error); 1878 } 1879 } 1880 rw_exit(&udp->udp_rwlock); 1881 1882 /* Set the Stream head write offset and high watermark. */ 1883 (void) proto_set_tx_wroff(q, connp, 1884 udp->udp_max_hdr_len + us->us_wroff_extra); 1885 /* XXX udp_set_rcv_hiwat() doesn't hold the lock, is it a bug??? */ 1886 (void) proto_set_rx_hiwat(q, connp, udp_set_rcv_hiwat(udp, q->q_hiwat)); 1887 1888 mutex_enter(&connp->conn_lock); 1889 connp->conn_state_flags &= ~CONN_INCIPIENT; 1890 mutex_exit(&connp->conn_lock); 1891 return (0); 1892 } 1893 1894 /* 1895 * Which UDP options OK to set through T_UNITDATA_REQ... 1896 */ 1897 /* ARGSUSED */ 1898 static boolean_t 1899 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 1900 { 1901 return (B_TRUE); 1902 } 1903 1904 /* 1905 * This routine gets default values of certain options whose default 1906 * values are maintained by protcol specific code 1907 */ 1908 /* ARGSUSED */ 1909 int 1910 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 1911 { 1912 udp_t *udp = Q_TO_UDP(q); 1913 udp_stack_t *us = udp->udp_us; 1914 int *i1 = (int *)ptr; 1915 1916 switch (level) { 1917 case IPPROTO_IP: 1918 switch (name) { 1919 case IP_MULTICAST_TTL: 1920 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 1921 return (sizeof (uchar_t)); 1922 case IP_MULTICAST_LOOP: 1923 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 1924 return (sizeof (uchar_t)); 1925 } 1926 break; 1927 case IPPROTO_IPV6: 1928 switch (name) { 1929 case IPV6_MULTICAST_HOPS: 1930 *i1 = IP_DEFAULT_MULTICAST_TTL; 1931 return (sizeof (int)); 1932 case IPV6_MULTICAST_LOOP: 1933 *i1 = IP_DEFAULT_MULTICAST_LOOP; 1934 return (sizeof (int)); 1935 case IPV6_UNICAST_HOPS: 1936 *i1 = us->us_ipv6_hoplimit; 1937 return (sizeof (int)); 1938 } 1939 break; 1940 } 1941 return (-1); 1942 } 1943 1944 /* 1945 * This routine retrieves the current status of socket options. 1946 * It returns the size of the option retrieved. 1947 */ 1948 static int 1949 udp_opt_get(conn_t *connp, int level, int name, uchar_t *ptr) 1950 { 1951 udp_t *udp = connp->conn_udp; 1952 udp_stack_t *us = udp->udp_us; 1953 int *i1 = (int *)ptr; 1954 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 1955 int len; 1956 1957 ASSERT(RW_READ_HELD(&udp->udp_rwlock)); 1958 switch (level) { 1959 case SOL_SOCKET: 1960 switch (name) { 1961 case SO_DEBUG: 1962 *i1 = udp->udp_debug; 1963 break; /* goto sizeof (int) option return */ 1964 case SO_REUSEADDR: 1965 *i1 = udp->udp_reuseaddr; 1966 break; /* goto sizeof (int) option return */ 1967 case SO_TYPE: 1968 *i1 = SOCK_DGRAM; 1969 break; /* goto sizeof (int) option return */ 1970 1971 /* 1972 * The following three items are available here, 1973 * but are only meaningful to IP. 1974 */ 1975 case SO_DONTROUTE: 1976 *i1 = udp->udp_dontroute; 1977 break; /* goto sizeof (int) option return */ 1978 case SO_USELOOPBACK: 1979 *i1 = udp->udp_useloopback; 1980 break; /* goto sizeof (int) option return */ 1981 case SO_BROADCAST: 1982 *i1 = udp->udp_broadcast; 1983 break; /* goto sizeof (int) option return */ 1984 1985 case SO_SNDBUF: 1986 *i1 = udp->udp_xmit_hiwat; 1987 break; /* goto sizeof (int) option return */ 1988 case SO_RCVBUF: 1989 *i1 = udp->udp_rcv_disply_hiwat; 1990 break; /* goto sizeof (int) option return */ 1991 case SO_DGRAM_ERRIND: 1992 *i1 = udp->udp_dgram_errind; 1993 break; /* goto sizeof (int) option return */ 1994 case SO_RECVUCRED: 1995 *i1 = udp->udp_recvucred; 1996 break; /* goto sizeof (int) option return */ 1997 case SO_TIMESTAMP: 1998 *i1 = udp->udp_timestamp; 1999 break; /* goto sizeof (int) option return */ 2000 case SO_ANON_MLP: 2001 *i1 = connp->conn_anon_mlp; 2002 break; /* goto sizeof (int) option return */ 2003 case SO_MAC_EXEMPT: 2004 *i1 = connp->conn_mac_exempt; 2005 break; /* goto sizeof (int) option return */ 2006 case SO_ALLZONES: 2007 *i1 = connp->conn_allzones; 2008 break; /* goto sizeof (int) option return */ 2009 case SO_EXCLBIND: 2010 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2011 break; 2012 case SO_PROTOTYPE: 2013 *i1 = IPPROTO_UDP; 2014 break; 2015 case SO_DOMAIN: 2016 *i1 = udp->udp_family; 2017 break; 2018 default: 2019 return (-1); 2020 } 2021 break; 2022 case IPPROTO_IP: 2023 if (udp->udp_family != AF_INET) 2024 return (-1); 2025 switch (name) { 2026 case IP_OPTIONS: 2027 case T_IP_OPTIONS: 2028 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2029 if (len > 0) { 2030 bcopy(udp->udp_ip_rcv_options + 2031 udp->udp_label_len, ptr, len); 2032 } 2033 return (len); 2034 case IP_TOS: 2035 case T_IP_TOS: 2036 *i1 = (int)udp->udp_type_of_service; 2037 break; /* goto sizeof (int) option return */ 2038 case IP_TTL: 2039 *i1 = (int)udp->udp_ttl; 2040 break; /* goto sizeof (int) option return */ 2041 case IP_DHCPINIT_IF: 2042 return (-EINVAL); 2043 case IP_NEXTHOP: 2044 case IP_RECVPKTINFO: 2045 /* 2046 * This also handles IP_PKTINFO. 2047 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2048 * Differentiation is based on the size of the argument 2049 * passed in. 2050 * This option is handled in IP which will return an 2051 * error for IP_PKTINFO as it's not supported as a 2052 * sticky option. 2053 */ 2054 return (-EINVAL); 2055 case IP_MULTICAST_IF: 2056 /* 0 address if not set */ 2057 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2058 return (sizeof (ipaddr_t)); 2059 case IP_MULTICAST_TTL: 2060 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2061 return (sizeof (uchar_t)); 2062 case IP_MULTICAST_LOOP: 2063 *ptr = connp->conn_multicast_loop; 2064 return (sizeof (uint8_t)); 2065 case IP_RECVOPTS: 2066 *i1 = udp->udp_recvopts; 2067 break; /* goto sizeof (int) option return */ 2068 case IP_RECVDSTADDR: 2069 *i1 = udp->udp_recvdstaddr; 2070 break; /* goto sizeof (int) option return */ 2071 case IP_RECVIF: 2072 *i1 = udp->udp_recvif; 2073 break; /* goto sizeof (int) option return */ 2074 case IP_RECVSLLA: 2075 *i1 = udp->udp_recvslla; 2076 break; /* goto sizeof (int) option return */ 2077 case IP_RECVTTL: 2078 *i1 = udp->udp_recvttl; 2079 break; /* goto sizeof (int) option return */ 2080 case IP_ADD_MEMBERSHIP: 2081 case IP_DROP_MEMBERSHIP: 2082 case IP_BLOCK_SOURCE: 2083 case IP_UNBLOCK_SOURCE: 2084 case IP_ADD_SOURCE_MEMBERSHIP: 2085 case IP_DROP_SOURCE_MEMBERSHIP: 2086 case MCAST_JOIN_GROUP: 2087 case MCAST_LEAVE_GROUP: 2088 case MCAST_BLOCK_SOURCE: 2089 case MCAST_UNBLOCK_SOURCE: 2090 case MCAST_JOIN_SOURCE_GROUP: 2091 case MCAST_LEAVE_SOURCE_GROUP: 2092 case IP_DONTFAILOVER_IF: 2093 /* cannot "get" the value for these */ 2094 return (-1); 2095 case IP_BOUND_IF: 2096 /* Zero if not set */ 2097 *i1 = udp->udp_bound_if; 2098 break; /* goto sizeof (int) option return */ 2099 case IP_UNSPEC_SRC: 2100 *i1 = udp->udp_unspec_source; 2101 break; /* goto sizeof (int) option return */ 2102 case IP_BROADCAST_TTL: 2103 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2104 return (sizeof (uchar_t)); 2105 default: 2106 return (-1); 2107 } 2108 break; 2109 case IPPROTO_IPV6: 2110 if (udp->udp_family != AF_INET6) 2111 return (-1); 2112 switch (name) { 2113 case IPV6_UNICAST_HOPS: 2114 *i1 = (unsigned int)udp->udp_ttl; 2115 break; /* goto sizeof (int) option return */ 2116 case IPV6_MULTICAST_IF: 2117 /* 0 index if not set */ 2118 *i1 = udp->udp_multicast_if_index; 2119 break; /* goto sizeof (int) option return */ 2120 case IPV6_MULTICAST_HOPS: 2121 *i1 = udp->udp_multicast_ttl; 2122 break; /* goto sizeof (int) option return */ 2123 case IPV6_MULTICAST_LOOP: 2124 *i1 = connp->conn_multicast_loop; 2125 break; /* goto sizeof (int) option return */ 2126 case IPV6_JOIN_GROUP: 2127 case IPV6_LEAVE_GROUP: 2128 case MCAST_JOIN_GROUP: 2129 case MCAST_LEAVE_GROUP: 2130 case MCAST_BLOCK_SOURCE: 2131 case MCAST_UNBLOCK_SOURCE: 2132 case MCAST_JOIN_SOURCE_GROUP: 2133 case MCAST_LEAVE_SOURCE_GROUP: 2134 /* cannot "get" the value for these */ 2135 return (-1); 2136 case IPV6_BOUND_IF: 2137 /* Zero if not set */ 2138 *i1 = udp->udp_bound_if; 2139 break; /* goto sizeof (int) option return */ 2140 case IPV6_UNSPEC_SRC: 2141 *i1 = udp->udp_unspec_source; 2142 break; /* goto sizeof (int) option return */ 2143 case IPV6_RECVPKTINFO: 2144 *i1 = udp->udp_ip_recvpktinfo; 2145 break; /* goto sizeof (int) option return */ 2146 case IPV6_RECVTCLASS: 2147 *i1 = udp->udp_ipv6_recvtclass; 2148 break; /* goto sizeof (int) option return */ 2149 case IPV6_RECVPATHMTU: 2150 *i1 = udp->udp_ipv6_recvpathmtu; 2151 break; /* goto sizeof (int) option return */ 2152 case IPV6_RECVHOPLIMIT: 2153 *i1 = udp->udp_ipv6_recvhoplimit; 2154 break; /* goto sizeof (int) option return */ 2155 case IPV6_RECVHOPOPTS: 2156 *i1 = udp->udp_ipv6_recvhopopts; 2157 break; /* goto sizeof (int) option return */ 2158 case IPV6_RECVDSTOPTS: 2159 *i1 = udp->udp_ipv6_recvdstopts; 2160 break; /* goto sizeof (int) option return */ 2161 case _OLD_IPV6_RECVDSTOPTS: 2162 *i1 = udp->udp_old_ipv6_recvdstopts; 2163 break; /* goto sizeof (int) option return */ 2164 case IPV6_RECVRTHDRDSTOPTS: 2165 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2166 break; /* goto sizeof (int) option return */ 2167 case IPV6_RECVRTHDR: 2168 *i1 = udp->udp_ipv6_recvrthdr; 2169 break; /* goto sizeof (int) option return */ 2170 case IPV6_PKTINFO: { 2171 /* XXX assumes that caller has room for max size! */ 2172 struct in6_pktinfo *pkti; 2173 2174 pkti = (struct in6_pktinfo *)ptr; 2175 if (ipp->ipp_fields & IPPF_IFINDEX) 2176 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2177 else 2178 pkti->ipi6_ifindex = 0; 2179 if (ipp->ipp_fields & IPPF_ADDR) 2180 pkti->ipi6_addr = ipp->ipp_addr; 2181 else 2182 pkti->ipi6_addr = ipv6_all_zeros; 2183 return (sizeof (struct in6_pktinfo)); 2184 } 2185 case IPV6_TCLASS: 2186 if (ipp->ipp_fields & IPPF_TCLASS) 2187 *i1 = ipp->ipp_tclass; 2188 else 2189 *i1 = IPV6_FLOW_TCLASS( 2190 IPV6_DEFAULT_VERS_AND_FLOW); 2191 break; /* goto sizeof (int) option return */ 2192 case IPV6_NEXTHOP: { 2193 sin6_t *sin6 = (sin6_t *)ptr; 2194 2195 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2196 return (0); 2197 *sin6 = sin6_null; 2198 sin6->sin6_family = AF_INET6; 2199 sin6->sin6_addr = ipp->ipp_nexthop; 2200 return (sizeof (sin6_t)); 2201 } 2202 case IPV6_HOPOPTS: 2203 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2204 return (0); 2205 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2206 return (0); 2207 /* 2208 * The cipso/label option is added by kernel. 2209 * User is not usually aware of this option. 2210 * We copy out the hbh opt after the label option. 2211 */ 2212 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2213 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2214 if (udp->udp_label_len_v6 > 0) { 2215 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2216 ptr[1] = (ipp->ipp_hopoptslen - 2217 udp->udp_label_len_v6 + 7) / 8 - 1; 2218 } 2219 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2220 case IPV6_RTHDRDSTOPTS: 2221 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2222 return (0); 2223 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2224 return (ipp->ipp_rtdstoptslen); 2225 case IPV6_RTHDR: 2226 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2227 return (0); 2228 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2229 return (ipp->ipp_rthdrlen); 2230 case IPV6_DSTOPTS: 2231 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2232 return (0); 2233 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2234 return (ipp->ipp_dstoptslen); 2235 case IPV6_PATHMTU: 2236 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2237 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2238 us->us_netstack)); 2239 default: 2240 return (-1); 2241 } 2242 break; 2243 case IPPROTO_UDP: 2244 switch (name) { 2245 case UDP_ANONPRIVBIND: 2246 *i1 = udp->udp_anon_priv_bind; 2247 break; 2248 case UDP_EXCLBIND: 2249 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2250 break; 2251 case UDP_RCVHDR: 2252 *i1 = udp->udp_rcvhdr ? 1 : 0; 2253 break; 2254 case UDP_NAT_T_ENDPOINT: 2255 *i1 = udp->udp_nat_t_endpoint; 2256 break; 2257 default: 2258 return (-1); 2259 } 2260 break; 2261 default: 2262 return (-1); 2263 } 2264 return (sizeof (int)); 2265 } 2266 2267 int 2268 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2269 { 2270 udp_t *udp; 2271 int err; 2272 2273 udp = Q_TO_UDP(q); 2274 2275 rw_enter(&udp->udp_rwlock, RW_READER); 2276 err = udp_opt_get(Q_TO_CONN(q), level, name, ptr); 2277 rw_exit(&udp->udp_rwlock); 2278 return (err); 2279 } 2280 2281 /* 2282 * This routine sets socket options. 2283 */ 2284 /* ARGSUSED */ 2285 static int 2286 udp_do_opt_set(conn_t *connp, int level, int name, uint_t inlen, 2287 uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, cred_t *cr, 2288 void *thisdg_attrs, boolean_t checkonly) 2289 { 2290 udpattrs_t *attrs = thisdg_attrs; 2291 int *i1 = (int *)invalp; 2292 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2293 udp_t *udp = connp->conn_udp; 2294 udp_stack_t *us = udp->udp_us; 2295 int error; 2296 uint_t newlen; 2297 size_t sth_wroff; 2298 2299 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 2300 /* 2301 * For fixed length options, no sanity check 2302 * of passed in length is done. It is assumed *_optcom_req() 2303 * routines do the right thing. 2304 */ 2305 switch (level) { 2306 case SOL_SOCKET: 2307 switch (name) { 2308 case SO_REUSEADDR: 2309 if (!checkonly) { 2310 udp->udp_reuseaddr = onoff; 2311 PASS_OPT_TO_IP(connp); 2312 } 2313 break; 2314 case SO_DEBUG: 2315 if (!checkonly) 2316 udp->udp_debug = onoff; 2317 break; 2318 /* 2319 * The following three items are available here, 2320 * but are only meaningful to IP. 2321 */ 2322 case SO_DONTROUTE: 2323 if (!checkonly) { 2324 udp->udp_dontroute = onoff; 2325 PASS_OPT_TO_IP(connp); 2326 } 2327 break; 2328 case SO_USELOOPBACK: 2329 if (!checkonly) { 2330 udp->udp_useloopback = onoff; 2331 PASS_OPT_TO_IP(connp); 2332 } 2333 break; 2334 case SO_BROADCAST: 2335 if (!checkonly) { 2336 udp->udp_broadcast = onoff; 2337 PASS_OPT_TO_IP(connp); 2338 } 2339 break; 2340 2341 case SO_SNDBUF: 2342 if (*i1 > us->us_max_buf) { 2343 *outlenp = 0; 2344 return (ENOBUFS); 2345 } 2346 if (!checkonly) { 2347 udp->udp_xmit_hiwat = *i1; 2348 connp->conn_wq->q_hiwat = *i1; 2349 } 2350 break; 2351 case SO_RCVBUF: 2352 if (*i1 > us->us_max_buf) { 2353 *outlenp = 0; 2354 return (ENOBUFS); 2355 } 2356 if (!checkonly) { 2357 int size; 2358 2359 udp->udp_rcv_disply_hiwat = *i1; 2360 size = udp_set_rcv_hiwat(udp, *i1); 2361 rw_exit(&udp->udp_rwlock); 2362 (void) proto_set_rx_hiwat(connp->conn_rq, connp, 2363 size); 2364 rw_enter(&udp->udp_rwlock, RW_WRITER); 2365 } 2366 break; 2367 case SO_DGRAM_ERRIND: 2368 if (!checkonly) 2369 udp->udp_dgram_errind = onoff; 2370 break; 2371 case SO_RECVUCRED: 2372 if (!checkonly) 2373 udp->udp_recvucred = onoff; 2374 break; 2375 case SO_ALLZONES: 2376 /* 2377 * "soft" error (negative) 2378 * option not handled at this level 2379 * Do not modify *outlenp. 2380 */ 2381 return (-EINVAL); 2382 case SO_TIMESTAMP: 2383 if (!checkonly) 2384 udp->udp_timestamp = onoff; 2385 break; 2386 case SO_ANON_MLP: 2387 if (!checkonly) { 2388 connp->conn_anon_mlp = onoff; 2389 PASS_OPT_TO_IP(connp); 2390 } 2391 break; 2392 case SO_MAC_EXEMPT: 2393 if (secpolicy_net_mac_aware(cr) != 0 || 2394 udp->udp_state != TS_UNBND) 2395 return (EACCES); 2396 if (!checkonly) { 2397 connp->conn_mac_exempt = onoff; 2398 PASS_OPT_TO_IP(connp); 2399 } 2400 break; 2401 case SCM_UCRED: { 2402 struct ucred_s *ucr; 2403 cred_t *cr, *newcr; 2404 ts_label_t *tsl; 2405 2406 /* 2407 * Only sockets that have proper privileges and are 2408 * bound to MLPs will have any other value here, so 2409 * this implicitly tests for privilege to set label. 2410 */ 2411 if (connp->conn_mlp_type == mlptSingle) 2412 break; 2413 ucr = (struct ucred_s *)invalp; 2414 if (inlen != ucredsize || 2415 ucr->uc_labeloff < sizeof (*ucr) || 2416 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 2417 return (EINVAL); 2418 if (!checkonly) { 2419 mblk_t *mb; 2420 2421 if (attrs == NULL || 2422 (mb = attrs->udpattr_mb) == NULL) 2423 return (EINVAL); 2424 if ((cr = DB_CRED(mb)) == NULL) 2425 cr = udp->udp_connp->conn_cred; 2426 ASSERT(cr != NULL); 2427 if ((tsl = crgetlabel(cr)) == NULL) 2428 return (EINVAL); 2429 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 2430 tsl->tsl_doi, KM_NOSLEEP); 2431 if (newcr == NULL) 2432 return (ENOSR); 2433 mblk_setcred(mb, newcr); 2434 attrs->udpattr_credset = B_TRUE; 2435 crfree(newcr); 2436 } 2437 break; 2438 } 2439 case SO_EXCLBIND: 2440 if (!checkonly) 2441 udp->udp_exclbind = onoff; 2442 break; 2443 default: 2444 *outlenp = 0; 2445 return (EINVAL); 2446 } 2447 break; 2448 case IPPROTO_IP: 2449 if (udp->udp_family != AF_INET) { 2450 *outlenp = 0; 2451 return (ENOPROTOOPT); 2452 } 2453 switch (name) { 2454 case IP_OPTIONS: 2455 case T_IP_OPTIONS: 2456 /* Save options for use by IP. */ 2457 newlen = inlen + udp->udp_label_len; 2458 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 2459 *outlenp = 0; 2460 return (EINVAL); 2461 } 2462 if (checkonly) 2463 break; 2464 2465 /* 2466 * Update the stored options taking into account 2467 * any CIPSO option which we should not overwrite. 2468 */ 2469 if (!tsol_option_set(&udp->udp_ip_snd_options, 2470 &udp->udp_ip_snd_options_len, 2471 udp->udp_label_len, invalp, inlen)) { 2472 *outlenp = 0; 2473 return (ENOMEM); 2474 } 2475 2476 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 2477 UDPH_SIZE + udp->udp_ip_snd_options_len; 2478 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 2479 rw_exit(&udp->udp_rwlock); 2480 (void) proto_set_tx_wroff(connp->conn_rq, connp, 2481 sth_wroff); 2482 rw_enter(&udp->udp_rwlock, RW_WRITER); 2483 break; 2484 2485 case IP_TTL: 2486 if (!checkonly) { 2487 udp->udp_ttl = (uchar_t)*i1; 2488 } 2489 break; 2490 case IP_TOS: 2491 case T_IP_TOS: 2492 if (!checkonly) { 2493 udp->udp_type_of_service = (uchar_t)*i1; 2494 } 2495 break; 2496 case IP_MULTICAST_IF: { 2497 /* 2498 * TODO should check OPTMGMT reply and undo this if 2499 * there is an error. 2500 */ 2501 struct in_addr *inap = (struct in_addr *)invalp; 2502 if (!checkonly) { 2503 udp->udp_multicast_if_addr = 2504 inap->s_addr; 2505 PASS_OPT_TO_IP(connp); 2506 } 2507 break; 2508 } 2509 case IP_MULTICAST_TTL: 2510 if (!checkonly) 2511 udp->udp_multicast_ttl = *invalp; 2512 break; 2513 case IP_MULTICAST_LOOP: 2514 if (!checkonly) { 2515 connp->conn_multicast_loop = *invalp; 2516 PASS_OPT_TO_IP(connp); 2517 } 2518 break; 2519 case IP_RECVOPTS: 2520 if (!checkonly) 2521 udp->udp_recvopts = onoff; 2522 break; 2523 case IP_RECVDSTADDR: 2524 if (!checkonly) 2525 udp->udp_recvdstaddr = onoff; 2526 break; 2527 case IP_RECVIF: 2528 if (!checkonly) { 2529 udp->udp_recvif = onoff; 2530 PASS_OPT_TO_IP(connp); 2531 } 2532 break; 2533 case IP_RECVSLLA: 2534 if (!checkonly) { 2535 udp->udp_recvslla = onoff; 2536 PASS_OPT_TO_IP(connp); 2537 } 2538 break; 2539 case IP_RECVTTL: 2540 if (!checkonly) 2541 udp->udp_recvttl = onoff; 2542 break; 2543 case IP_PKTINFO: { 2544 /* 2545 * This also handles IP_RECVPKTINFO. 2546 * IP_PKTINFO and IP_RECVPKTINFO have same value. 2547 * Differentiation is based on the size of the 2548 * argument passed in. 2549 */ 2550 struct in_pktinfo *pktinfop; 2551 ip4_pkt_t *attr_pktinfop; 2552 2553 if (checkonly) 2554 break; 2555 2556 if (inlen == sizeof (int)) { 2557 /* 2558 * This is IP_RECVPKTINFO option. 2559 * Keep a local copy of whether this option is 2560 * set or not and pass it down to IP for 2561 * processing. 2562 */ 2563 2564 udp->udp_ip_recvpktinfo = onoff; 2565 return (-EINVAL); 2566 } 2567 2568 if (attrs == NULL || 2569 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 2570 /* 2571 * sticky option or no buffer to return 2572 * the results. 2573 */ 2574 return (EINVAL); 2575 } 2576 2577 if (inlen != sizeof (struct in_pktinfo)) 2578 return (EINVAL); 2579 2580 pktinfop = (struct in_pktinfo *)invalp; 2581 2582 /* 2583 * At least one of the values should be specified 2584 */ 2585 if (pktinfop->ipi_ifindex == 0 && 2586 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 2587 return (EINVAL); 2588 } 2589 2590 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 2591 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 2592 2593 break; 2594 } 2595 case IP_ADD_MEMBERSHIP: 2596 case IP_DROP_MEMBERSHIP: 2597 case IP_BLOCK_SOURCE: 2598 case IP_UNBLOCK_SOURCE: 2599 case IP_ADD_SOURCE_MEMBERSHIP: 2600 case IP_DROP_SOURCE_MEMBERSHIP: 2601 case MCAST_JOIN_GROUP: 2602 case MCAST_LEAVE_GROUP: 2603 case MCAST_BLOCK_SOURCE: 2604 case MCAST_UNBLOCK_SOURCE: 2605 case MCAST_JOIN_SOURCE_GROUP: 2606 case MCAST_LEAVE_SOURCE_GROUP: 2607 case IP_SEC_OPT: 2608 case IP_NEXTHOP: 2609 case IP_DHCPINIT_IF: 2610 /* 2611 * "soft" error (negative) 2612 * option not handled at this level 2613 * Do not modify *outlenp. 2614 */ 2615 return (-EINVAL); 2616 case IP_BOUND_IF: 2617 if (!checkonly) { 2618 udp->udp_bound_if = *i1; 2619 PASS_OPT_TO_IP(connp); 2620 } 2621 break; 2622 case IP_UNSPEC_SRC: 2623 if (!checkonly) { 2624 udp->udp_unspec_source = onoff; 2625 PASS_OPT_TO_IP(connp); 2626 } 2627 break; 2628 case IP_BROADCAST_TTL: 2629 if (!checkonly) 2630 connp->conn_broadcast_ttl = *invalp; 2631 break; 2632 default: 2633 *outlenp = 0; 2634 return (EINVAL); 2635 } 2636 break; 2637 case IPPROTO_IPV6: { 2638 ip6_pkt_t *ipp; 2639 boolean_t sticky; 2640 2641 if (udp->udp_family != AF_INET6) { 2642 *outlenp = 0; 2643 return (ENOPROTOOPT); 2644 } 2645 /* 2646 * Deal with both sticky options and ancillary data 2647 */ 2648 sticky = B_FALSE; 2649 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 2650 NULL) { 2651 /* sticky options, or none */ 2652 ipp = &udp->udp_sticky_ipp; 2653 sticky = B_TRUE; 2654 } 2655 2656 switch (name) { 2657 case IPV6_MULTICAST_IF: 2658 if (!checkonly) { 2659 udp->udp_multicast_if_index = *i1; 2660 PASS_OPT_TO_IP(connp); 2661 } 2662 break; 2663 case IPV6_UNICAST_HOPS: 2664 /* -1 means use default */ 2665 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2666 *outlenp = 0; 2667 return (EINVAL); 2668 } 2669 if (!checkonly) { 2670 if (*i1 == -1) { 2671 udp->udp_ttl = ipp->ipp_unicast_hops = 2672 us->us_ipv6_hoplimit; 2673 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 2674 /* Pass modified value to IP. */ 2675 *i1 = udp->udp_ttl; 2676 } else { 2677 udp->udp_ttl = ipp->ipp_unicast_hops = 2678 (uint8_t)*i1; 2679 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 2680 } 2681 /* Rebuild the header template */ 2682 error = udp_build_hdrs(udp); 2683 if (error != 0) { 2684 *outlenp = 0; 2685 return (error); 2686 } 2687 } 2688 break; 2689 case IPV6_MULTICAST_HOPS: 2690 /* -1 means use default */ 2691 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 2692 *outlenp = 0; 2693 return (EINVAL); 2694 } 2695 if (!checkonly) { 2696 if (*i1 == -1) { 2697 udp->udp_multicast_ttl = 2698 ipp->ipp_multicast_hops = 2699 IP_DEFAULT_MULTICAST_TTL; 2700 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 2701 /* Pass modified value to IP. */ 2702 *i1 = udp->udp_multicast_ttl; 2703 } else { 2704 udp->udp_multicast_ttl = 2705 ipp->ipp_multicast_hops = 2706 (uint8_t)*i1; 2707 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 2708 } 2709 } 2710 break; 2711 case IPV6_MULTICAST_LOOP: 2712 if (*i1 != 0 && *i1 != 1) { 2713 *outlenp = 0; 2714 return (EINVAL); 2715 } 2716 if (!checkonly) { 2717 connp->conn_multicast_loop = *i1; 2718 PASS_OPT_TO_IP(connp); 2719 } 2720 break; 2721 case IPV6_JOIN_GROUP: 2722 case IPV6_LEAVE_GROUP: 2723 case MCAST_JOIN_GROUP: 2724 case MCAST_LEAVE_GROUP: 2725 case MCAST_BLOCK_SOURCE: 2726 case MCAST_UNBLOCK_SOURCE: 2727 case MCAST_JOIN_SOURCE_GROUP: 2728 case MCAST_LEAVE_SOURCE_GROUP: 2729 /* 2730 * "soft" error (negative) 2731 * option not handled at this level 2732 * Note: Do not modify *outlenp 2733 */ 2734 return (-EINVAL); 2735 case IPV6_BOUND_IF: 2736 if (!checkonly) { 2737 udp->udp_bound_if = *i1; 2738 PASS_OPT_TO_IP(connp); 2739 } 2740 break; 2741 case IPV6_UNSPEC_SRC: 2742 if (!checkonly) { 2743 udp->udp_unspec_source = onoff; 2744 PASS_OPT_TO_IP(connp); 2745 } 2746 break; 2747 /* 2748 * Set boolean switches for ancillary data delivery 2749 */ 2750 case IPV6_RECVPKTINFO: 2751 if (!checkonly) { 2752 udp->udp_ip_recvpktinfo = onoff; 2753 PASS_OPT_TO_IP(connp); 2754 } 2755 break; 2756 case IPV6_RECVTCLASS: 2757 if (!checkonly) { 2758 udp->udp_ipv6_recvtclass = onoff; 2759 PASS_OPT_TO_IP(connp); 2760 } 2761 break; 2762 case IPV6_RECVPATHMTU: 2763 if (!checkonly) { 2764 udp->udp_ipv6_recvpathmtu = onoff; 2765 PASS_OPT_TO_IP(connp); 2766 } 2767 break; 2768 case IPV6_RECVHOPLIMIT: 2769 if (!checkonly) { 2770 udp->udp_ipv6_recvhoplimit = onoff; 2771 PASS_OPT_TO_IP(connp); 2772 } 2773 break; 2774 case IPV6_RECVHOPOPTS: 2775 if (!checkonly) { 2776 udp->udp_ipv6_recvhopopts = onoff; 2777 PASS_OPT_TO_IP(connp); 2778 } 2779 break; 2780 case IPV6_RECVDSTOPTS: 2781 if (!checkonly) { 2782 udp->udp_ipv6_recvdstopts = onoff; 2783 PASS_OPT_TO_IP(connp); 2784 } 2785 break; 2786 case _OLD_IPV6_RECVDSTOPTS: 2787 if (!checkonly) 2788 udp->udp_old_ipv6_recvdstopts = onoff; 2789 break; 2790 case IPV6_RECVRTHDRDSTOPTS: 2791 if (!checkonly) { 2792 udp->udp_ipv6_recvrthdrdstopts = onoff; 2793 PASS_OPT_TO_IP(connp); 2794 } 2795 break; 2796 case IPV6_RECVRTHDR: 2797 if (!checkonly) { 2798 udp->udp_ipv6_recvrthdr = onoff; 2799 PASS_OPT_TO_IP(connp); 2800 } 2801 break; 2802 /* 2803 * Set sticky options or ancillary data. 2804 * If sticky options, (re)build any extension headers 2805 * that might be needed as a result. 2806 */ 2807 case IPV6_PKTINFO: 2808 /* 2809 * The source address and ifindex are verified 2810 * in ip_opt_set(). For ancillary data the 2811 * source address is checked in ip_wput_v6. 2812 */ 2813 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 2814 return (EINVAL); 2815 if (checkonly) 2816 break; 2817 2818 if (inlen == 0) { 2819 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 2820 ipp->ipp_sticky_ignored |= 2821 (IPPF_IFINDEX|IPPF_ADDR); 2822 } else { 2823 struct in6_pktinfo *pkti; 2824 2825 pkti = (struct in6_pktinfo *)invalp; 2826 ipp->ipp_ifindex = pkti->ipi6_ifindex; 2827 ipp->ipp_addr = pkti->ipi6_addr; 2828 if (ipp->ipp_ifindex != 0) 2829 ipp->ipp_fields |= IPPF_IFINDEX; 2830 else 2831 ipp->ipp_fields &= ~IPPF_IFINDEX; 2832 if (!IN6_IS_ADDR_UNSPECIFIED( 2833 &ipp->ipp_addr)) 2834 ipp->ipp_fields |= IPPF_ADDR; 2835 else 2836 ipp->ipp_fields &= ~IPPF_ADDR; 2837 } 2838 if (sticky) { 2839 error = udp_build_hdrs(udp); 2840 if (error != 0) 2841 return (error); 2842 PASS_OPT_TO_IP(connp); 2843 } 2844 break; 2845 case IPV6_HOPLIMIT: 2846 if (sticky) 2847 return (EINVAL); 2848 if (inlen != 0 && inlen != sizeof (int)) 2849 return (EINVAL); 2850 if (checkonly) 2851 break; 2852 2853 if (inlen == 0) { 2854 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 2855 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 2856 } else { 2857 if (*i1 > 255 || *i1 < -1) 2858 return (EINVAL); 2859 if (*i1 == -1) 2860 ipp->ipp_hoplimit = 2861 us->us_ipv6_hoplimit; 2862 else 2863 ipp->ipp_hoplimit = *i1; 2864 ipp->ipp_fields |= IPPF_HOPLIMIT; 2865 } 2866 break; 2867 case IPV6_TCLASS: 2868 if (inlen != 0 && inlen != sizeof (int)) 2869 return (EINVAL); 2870 if (checkonly) 2871 break; 2872 2873 if (inlen == 0) { 2874 ipp->ipp_fields &= ~IPPF_TCLASS; 2875 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 2876 } else { 2877 if (*i1 > 255 || *i1 < -1) 2878 return (EINVAL); 2879 if (*i1 == -1) 2880 ipp->ipp_tclass = 0; 2881 else 2882 ipp->ipp_tclass = *i1; 2883 ipp->ipp_fields |= IPPF_TCLASS; 2884 } 2885 if (sticky) { 2886 error = udp_build_hdrs(udp); 2887 if (error != 0) 2888 return (error); 2889 } 2890 break; 2891 case IPV6_NEXTHOP: 2892 /* 2893 * IP will verify that the nexthop is reachable 2894 * and fail for sticky options. 2895 */ 2896 if (inlen != 0 && inlen != sizeof (sin6_t)) 2897 return (EINVAL); 2898 if (checkonly) 2899 break; 2900 2901 if (inlen == 0) { 2902 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2903 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 2904 } else { 2905 sin6_t *sin6 = (sin6_t *)invalp; 2906 2907 if (sin6->sin6_family != AF_INET6) { 2908 return (EAFNOSUPPORT); 2909 } 2910 if (IN6_IS_ADDR_V4MAPPED( 2911 &sin6->sin6_addr)) 2912 return (EADDRNOTAVAIL); 2913 ipp->ipp_nexthop = sin6->sin6_addr; 2914 if (!IN6_IS_ADDR_UNSPECIFIED( 2915 &ipp->ipp_nexthop)) 2916 ipp->ipp_fields |= IPPF_NEXTHOP; 2917 else 2918 ipp->ipp_fields &= ~IPPF_NEXTHOP; 2919 } 2920 if (sticky) { 2921 error = udp_build_hdrs(udp); 2922 if (error != 0) 2923 return (error); 2924 PASS_OPT_TO_IP(connp); 2925 } 2926 break; 2927 case IPV6_HOPOPTS: { 2928 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 2929 /* 2930 * Sanity checks - minimum size, size a multiple of 2931 * eight bytes, and matching size passed in. 2932 */ 2933 if (inlen != 0 && 2934 inlen != (8 * (hopts->ip6h_len + 1))) 2935 return (EINVAL); 2936 2937 if (checkonly) 2938 break; 2939 2940 error = optcom_pkt_set(invalp, inlen, sticky, 2941 (uchar_t **)&ipp->ipp_hopopts, 2942 &ipp->ipp_hopoptslen, 2943 sticky ? udp->udp_label_len_v6 : 0); 2944 if (error != 0) 2945 return (error); 2946 if (ipp->ipp_hopoptslen == 0) { 2947 ipp->ipp_fields &= ~IPPF_HOPOPTS; 2948 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 2949 } else { 2950 ipp->ipp_fields |= IPPF_HOPOPTS; 2951 } 2952 if (sticky) { 2953 error = udp_build_hdrs(udp); 2954 if (error != 0) 2955 return (error); 2956 } 2957 break; 2958 } 2959 case IPV6_RTHDRDSTOPTS: { 2960 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 2961 2962 /* 2963 * Sanity checks - minimum size, size a multiple of 2964 * eight bytes, and matching size passed in. 2965 */ 2966 if (inlen != 0 && 2967 inlen != (8 * (dopts->ip6d_len + 1))) 2968 return (EINVAL); 2969 2970 if (checkonly) 2971 break; 2972 2973 if (inlen == 0) { 2974 if (sticky && 2975 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 2976 kmem_free(ipp->ipp_rtdstopts, 2977 ipp->ipp_rtdstoptslen); 2978 ipp->ipp_rtdstopts = NULL; 2979 ipp->ipp_rtdstoptslen = 0; 2980 } 2981 2982 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 2983 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 2984 } else { 2985 error = optcom_pkt_set(invalp, inlen, sticky, 2986 (uchar_t **)&ipp->ipp_rtdstopts, 2987 &ipp->ipp_rtdstoptslen, 0); 2988 if (error != 0) 2989 return (error); 2990 ipp->ipp_fields |= IPPF_RTDSTOPTS; 2991 } 2992 if (sticky) { 2993 error = udp_build_hdrs(udp); 2994 if (error != 0) 2995 return (error); 2996 } 2997 break; 2998 } 2999 case IPV6_DSTOPTS: { 3000 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3001 3002 /* 3003 * Sanity checks - minimum size, size a multiple of 3004 * eight bytes, and matching size passed in. 3005 */ 3006 if (inlen != 0 && 3007 inlen != (8 * (dopts->ip6d_len + 1))) 3008 return (EINVAL); 3009 3010 if (checkonly) 3011 break; 3012 3013 if (inlen == 0) { 3014 if (sticky && 3015 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3016 kmem_free(ipp->ipp_dstopts, 3017 ipp->ipp_dstoptslen); 3018 ipp->ipp_dstopts = NULL; 3019 ipp->ipp_dstoptslen = 0; 3020 } 3021 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3022 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3023 } else { 3024 error = optcom_pkt_set(invalp, inlen, sticky, 3025 (uchar_t **)&ipp->ipp_dstopts, 3026 &ipp->ipp_dstoptslen, 0); 3027 if (error != 0) 3028 return (error); 3029 ipp->ipp_fields |= IPPF_DSTOPTS; 3030 } 3031 if (sticky) { 3032 error = udp_build_hdrs(udp); 3033 if (error != 0) 3034 return (error); 3035 } 3036 break; 3037 } 3038 case IPV6_RTHDR: { 3039 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3040 3041 /* 3042 * Sanity checks - minimum size, size a multiple of 3043 * eight bytes, and matching size passed in. 3044 */ 3045 if (inlen != 0 && 3046 inlen != (8 * (rt->ip6r_len + 1))) 3047 return (EINVAL); 3048 3049 if (checkonly) 3050 break; 3051 3052 if (inlen == 0) { 3053 if (sticky && 3054 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3055 kmem_free(ipp->ipp_rthdr, 3056 ipp->ipp_rthdrlen); 3057 ipp->ipp_rthdr = NULL; 3058 ipp->ipp_rthdrlen = 0; 3059 } 3060 ipp->ipp_fields &= ~IPPF_RTHDR; 3061 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3062 } else { 3063 error = optcom_pkt_set(invalp, inlen, sticky, 3064 (uchar_t **)&ipp->ipp_rthdr, 3065 &ipp->ipp_rthdrlen, 0); 3066 if (error != 0) 3067 return (error); 3068 ipp->ipp_fields |= IPPF_RTHDR; 3069 } 3070 if (sticky) { 3071 error = udp_build_hdrs(udp); 3072 if (error != 0) 3073 return (error); 3074 } 3075 break; 3076 } 3077 3078 case IPV6_DONTFRAG: 3079 if (checkonly) 3080 break; 3081 3082 if (onoff) { 3083 ipp->ipp_fields |= IPPF_DONTFRAG; 3084 } else { 3085 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3086 } 3087 break; 3088 3089 case IPV6_USE_MIN_MTU: 3090 if (inlen != sizeof (int)) 3091 return (EINVAL); 3092 3093 if (*i1 < -1 || *i1 > 1) 3094 return (EINVAL); 3095 3096 if (checkonly) 3097 break; 3098 3099 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3100 ipp->ipp_use_min_mtu = *i1; 3101 break; 3102 3103 case IPV6_BOUND_PIF: 3104 case IPV6_SEC_OPT: 3105 case IPV6_DONTFAILOVER_IF: 3106 case IPV6_SRC_PREFERENCES: 3107 case IPV6_V6ONLY: 3108 /* Handled at the IP level */ 3109 return (-EINVAL); 3110 default: 3111 *outlenp = 0; 3112 return (EINVAL); 3113 } 3114 break; 3115 } /* end IPPROTO_IPV6 */ 3116 case IPPROTO_UDP: 3117 switch (name) { 3118 case UDP_ANONPRIVBIND: 3119 if ((error = secpolicy_net_privaddr(cr, 0, 3120 IPPROTO_UDP)) != 0) { 3121 *outlenp = 0; 3122 return (error); 3123 } 3124 if (!checkonly) { 3125 udp->udp_anon_priv_bind = onoff; 3126 } 3127 break; 3128 case UDP_EXCLBIND: 3129 if (!checkonly) 3130 udp->udp_exclbind = onoff; 3131 break; 3132 case UDP_RCVHDR: 3133 if (!checkonly) 3134 udp->udp_rcvhdr = onoff; 3135 break; 3136 case UDP_NAT_T_ENDPOINT: 3137 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3138 *outlenp = 0; 3139 return (error); 3140 } 3141 3142 /* 3143 * Use udp_family instead so we can avoid ambiguitites 3144 * with AF_INET6 sockets that may switch from IPv4 3145 * to IPv6. 3146 */ 3147 if (udp->udp_family != AF_INET) { 3148 *outlenp = 0; 3149 return (EAFNOSUPPORT); 3150 } 3151 3152 if (!checkonly) { 3153 int size; 3154 3155 udp->udp_nat_t_endpoint = onoff; 3156 3157 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3158 UDPH_SIZE + udp->udp_ip_snd_options_len; 3159 3160 /* Also, adjust wroff */ 3161 if (onoff) { 3162 udp->udp_max_hdr_len += 3163 sizeof (uint32_t); 3164 } 3165 size = udp->udp_max_hdr_len + 3166 us->us_wroff_extra; 3167 (void) proto_set_tx_wroff(connp->conn_rq, connp, 3168 size); 3169 } 3170 break; 3171 default: 3172 *outlenp = 0; 3173 return (EINVAL); 3174 } 3175 break; 3176 default: 3177 *outlenp = 0; 3178 return (EINVAL); 3179 } 3180 /* 3181 * Common case of OK return with outval same as inval. 3182 */ 3183 if (invalp != outvalp) { 3184 /* don't trust bcopy for identical src/dst */ 3185 (void) bcopy(invalp, outvalp, inlen); 3186 } 3187 *outlenp = inlen; 3188 return (0); 3189 } 3190 3191 int 3192 udp_opt_set(conn_t *connp, uint_t optset_context, int level, int name, 3193 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3194 void *thisdg_attrs, cred_t *cr) 3195 { 3196 int error; 3197 boolean_t checkonly; 3198 3199 error = 0; 3200 switch (optset_context) { 3201 case SETFN_OPTCOM_CHECKONLY: 3202 checkonly = B_TRUE; 3203 /* 3204 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3205 * inlen != 0 implies value supplied and 3206 * we have to "pretend" to set it. 3207 * inlen == 0 implies that there is no 3208 * value part in T_CHECK request and just validation 3209 * done elsewhere should be enough, we just return here. 3210 */ 3211 if (inlen == 0) { 3212 *outlenp = 0; 3213 goto done; 3214 } 3215 break; 3216 case SETFN_OPTCOM_NEGOTIATE: 3217 checkonly = B_FALSE; 3218 break; 3219 case SETFN_UD_NEGOTIATE: 3220 case SETFN_CONN_NEGOTIATE: 3221 checkonly = B_FALSE; 3222 /* 3223 * Negotiating local and "association-related" options 3224 * through T_UNITDATA_REQ. 3225 * 3226 * Following routine can filter out ones we do not 3227 * want to be "set" this way. 3228 */ 3229 if (!udp_opt_allow_udr_set(level, name)) { 3230 *outlenp = 0; 3231 error = EINVAL; 3232 goto done; 3233 } 3234 break; 3235 default: 3236 /* 3237 * We should never get here 3238 */ 3239 *outlenp = 0; 3240 error = EINVAL; 3241 goto done; 3242 } 3243 3244 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3245 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3246 3247 error = udp_do_opt_set(connp, level, name, inlen, invalp, outlenp, 3248 outvalp, cr, thisdg_attrs, checkonly); 3249 done: 3250 return (error); 3251 } 3252 3253 /* ARGSUSED */ 3254 int 3255 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name, 3256 uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp, 3257 void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3258 { 3259 conn_t *connp = Q_TO_CONN(q); 3260 int error; 3261 udp_t *udp = connp->conn_udp; 3262 3263 rw_enter(&udp->udp_rwlock, RW_WRITER); 3264 error = udp_opt_set(connp, optset_context, level, name, inlen, invalp, 3265 outlenp, outvalp, thisdg_attrs, cr); 3266 rw_exit(&udp->udp_rwlock); 3267 return (error); 3268 } 3269 3270 /* 3271 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3272 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3273 * headers, and the udp header. 3274 * Returns failure if can't allocate memory. 3275 */ 3276 static int 3277 udp_build_hdrs(udp_t *udp) 3278 { 3279 udp_stack_t *us = udp->udp_us; 3280 uchar_t *hdrs; 3281 uint_t hdrs_len; 3282 ip6_t *ip6h; 3283 ip6i_t *ip6i; 3284 udpha_t *udpha; 3285 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3286 size_t sth_wroff; 3287 conn_t *connp = udp->udp_connp; 3288 3289 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3290 ASSERT(connp != NULL); 3291 3292 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3293 ASSERT(hdrs_len != 0); 3294 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3295 /* Need to reallocate */ 3296 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3297 if (hdrs == NULL) 3298 return (ENOMEM); 3299 3300 if (udp->udp_sticky_hdrs_len != 0) { 3301 kmem_free(udp->udp_sticky_hdrs, 3302 udp->udp_sticky_hdrs_len); 3303 } 3304 udp->udp_sticky_hdrs = hdrs; 3305 udp->udp_sticky_hdrs_len = hdrs_len; 3306 } 3307 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3308 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3309 3310 /* Set header fields not in ipp */ 3311 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3312 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3313 ip6h = (ip6_t *)&ip6i[1]; 3314 } else { 3315 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3316 } 3317 3318 if (!(ipp->ipp_fields & IPPF_ADDR)) 3319 ip6h->ip6_src = udp->udp_v6src; 3320 3321 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3322 udpha->uha_src_port = udp->udp_port; 3323 3324 /* Try to get everything in a single mblk */ 3325 if (hdrs_len > udp->udp_max_hdr_len) { 3326 udp->udp_max_hdr_len = hdrs_len; 3327 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3328 rw_exit(&udp->udp_rwlock); 3329 (void) proto_set_tx_wroff(udp->udp_connp->conn_rq, 3330 udp->udp_connp, sth_wroff); 3331 rw_enter(&udp->udp_rwlock, RW_WRITER); 3332 } 3333 return (0); 3334 } 3335 3336 /* 3337 * This routine retrieves the value of an ND variable in a udpparam_t 3338 * structure. It is called through nd_getset when a user reads the 3339 * variable. 3340 */ 3341 /* ARGSUSED */ 3342 static int 3343 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3344 { 3345 udpparam_t *udppa = (udpparam_t *)cp; 3346 3347 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3348 return (0); 3349 } 3350 3351 /* 3352 * Walk through the param array specified registering each element with the 3353 * named dispatch (ND) handler. 3354 */ 3355 static boolean_t 3356 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3357 { 3358 for (; cnt-- > 0; udppa++) { 3359 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3360 if (!nd_load(ndp, udppa->udp_param_name, 3361 udp_param_get, udp_param_set, 3362 (caddr_t)udppa)) { 3363 nd_free(ndp); 3364 return (B_FALSE); 3365 } 3366 } 3367 } 3368 if (!nd_load(ndp, "udp_extra_priv_ports", 3369 udp_extra_priv_ports_get, NULL, NULL)) { 3370 nd_free(ndp); 3371 return (B_FALSE); 3372 } 3373 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3374 NULL, udp_extra_priv_ports_add, NULL)) { 3375 nd_free(ndp); 3376 return (B_FALSE); 3377 } 3378 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3379 NULL, udp_extra_priv_ports_del, NULL)) { 3380 nd_free(ndp); 3381 return (B_FALSE); 3382 } 3383 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3384 NULL)) { 3385 nd_free(ndp); 3386 return (B_FALSE); 3387 } 3388 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3389 NULL)) { 3390 nd_free(ndp); 3391 return (B_FALSE); 3392 } 3393 return (B_TRUE); 3394 } 3395 3396 /* This routine sets an ND variable in a udpparam_t structure. */ 3397 /* ARGSUSED */ 3398 static int 3399 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3400 { 3401 long new_value; 3402 udpparam_t *udppa = (udpparam_t *)cp; 3403 3404 /* 3405 * Fail the request if the new value does not lie within the 3406 * required bounds. 3407 */ 3408 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3409 new_value < udppa->udp_param_min || 3410 new_value > udppa->udp_param_max) { 3411 return (EINVAL); 3412 } 3413 3414 /* Set the new value */ 3415 udppa->udp_param_value = new_value; 3416 return (0); 3417 } 3418 3419 /* 3420 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3421 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3422 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3423 * then it's assumed to be allocated to be large enough. 3424 * 3425 * Returns zero if trimming of the security option causes all options to go 3426 * away. 3427 */ 3428 static size_t 3429 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3430 { 3431 struct T_opthdr *toh; 3432 size_t hol = ipp->ipp_hopoptslen; 3433 ip6_hbh_t *dstopt = NULL; 3434 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3435 size_t tlen, olen, plen; 3436 boolean_t deleting; 3437 const struct ip6_opt *sopt, *lastpad; 3438 struct ip6_opt *dopt; 3439 3440 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3441 toh->level = IPPROTO_IPV6; 3442 toh->name = IPV6_HOPOPTS; 3443 toh->status = 0; 3444 dstopt = (ip6_hbh_t *)(toh + 1); 3445 } 3446 3447 /* 3448 * If labeling is enabled, then skip the label option 3449 * but get other options if there are any. 3450 */ 3451 if (is_system_labeled()) { 3452 dopt = NULL; 3453 if (dstopt != NULL) { 3454 /* will fill in ip6h_len later */ 3455 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3456 dopt = (struct ip6_opt *)(dstopt + 1); 3457 } 3458 sopt = (const struct ip6_opt *)(srcopt + 1); 3459 hol -= sizeof (*srcopt); 3460 tlen = sizeof (*dstopt); 3461 lastpad = NULL; 3462 deleting = B_FALSE; 3463 /* 3464 * This loop finds the first (lastpad pointer) of any number of 3465 * pads that preceeds the security option, then treats the 3466 * security option as though it were a pad, and then finds the 3467 * next non-pad option (or end of list). 3468 * 3469 * It then treats the entire block as one big pad. To preserve 3470 * alignment of any options that follow, or just the end of the 3471 * list, it computes a minimal new padding size that keeps the 3472 * same alignment for the next option. 3473 * 3474 * If it encounters just a sequence of pads with no security 3475 * option, those are copied as-is rather than collapsed. 3476 * 3477 * Note that to handle the end of list case, the code makes one 3478 * loop with 'hol' set to zero. 3479 */ 3480 for (;;) { 3481 if (hol > 0) { 3482 if (sopt->ip6o_type == IP6OPT_PAD1) { 3483 if (lastpad == NULL) 3484 lastpad = sopt; 3485 sopt = (const struct ip6_opt *) 3486 &sopt->ip6o_len; 3487 hol--; 3488 continue; 3489 } 3490 olen = sopt->ip6o_len + sizeof (*sopt); 3491 if (olen > hol) 3492 olen = hol; 3493 if (sopt->ip6o_type == IP6OPT_PADN || 3494 sopt->ip6o_type == ip6opt_ls) { 3495 if (sopt->ip6o_type == ip6opt_ls) 3496 deleting = B_TRUE; 3497 if (lastpad == NULL) 3498 lastpad = sopt; 3499 sopt = (const struct ip6_opt *) 3500 ((const char *)sopt + olen); 3501 hol -= olen; 3502 continue; 3503 } 3504 } else { 3505 /* if nothing was copied at all, then delete */ 3506 if (tlen == sizeof (*dstopt)) 3507 return (0); 3508 /* last pass; pick up any trailing padding */ 3509 olen = 0; 3510 } 3511 if (deleting) { 3512 /* 3513 * compute aligning effect of deleted material 3514 * to reproduce with pad. 3515 */ 3516 plen = ((const char *)sopt - 3517 (const char *)lastpad) & 7; 3518 tlen += plen; 3519 if (dopt != NULL) { 3520 if (plen == 1) { 3521 dopt->ip6o_type = IP6OPT_PAD1; 3522 } else if (plen > 1) { 3523 plen -= sizeof (*dopt); 3524 dopt->ip6o_type = IP6OPT_PADN; 3525 dopt->ip6o_len = plen; 3526 if (plen > 0) 3527 bzero(dopt + 1, plen); 3528 } 3529 dopt = (struct ip6_opt *) 3530 ((char *)dopt + plen); 3531 } 3532 deleting = B_FALSE; 3533 lastpad = NULL; 3534 } 3535 /* if there's uncopied padding, then copy that now */ 3536 if (lastpad != NULL) { 3537 olen += (const char *)sopt - 3538 (const char *)lastpad; 3539 sopt = lastpad; 3540 lastpad = NULL; 3541 } 3542 if (dopt != NULL && olen > 0) { 3543 bcopy(sopt, dopt, olen); 3544 dopt = (struct ip6_opt *)((char *)dopt + olen); 3545 } 3546 if (hol == 0) 3547 break; 3548 tlen += olen; 3549 sopt = (const struct ip6_opt *) 3550 ((const char *)sopt + olen); 3551 hol -= olen; 3552 } 3553 /* go back and patch up the length value, rounded upward */ 3554 if (dstopt != NULL) 3555 dstopt->ip6h_len = (tlen - 1) >> 3; 3556 } else { 3557 tlen = hol; 3558 if (dstopt != NULL) 3559 bcopy(srcopt, dstopt, hol); 3560 } 3561 3562 tlen += sizeof (*toh); 3563 if (toh != NULL) 3564 toh->len = tlen; 3565 3566 return (tlen); 3567 } 3568 3569 /* 3570 * Update udp_rcv_opt_len from the packet. 3571 * Called when options received, and when no options received but 3572 * udp_ip_recv_opt_len has previously recorded options. 3573 */ 3574 static void 3575 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 3576 { 3577 /* Save the options if any */ 3578 if (opt_len > 0) { 3579 if (opt_len > udp->udp_ip_rcv_options_len) { 3580 /* Need to allocate larger buffer */ 3581 if (udp->udp_ip_rcv_options_len != 0) 3582 mi_free((char *)udp->udp_ip_rcv_options); 3583 udp->udp_ip_rcv_options_len = 0; 3584 udp->udp_ip_rcv_options = 3585 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 3586 if (udp->udp_ip_rcv_options != NULL) 3587 udp->udp_ip_rcv_options_len = opt_len; 3588 } 3589 if (udp->udp_ip_rcv_options_len != 0) { 3590 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 3591 /* Adjust length if we are resusing the space */ 3592 udp->udp_ip_rcv_options_len = opt_len; 3593 } 3594 } else if (udp->udp_ip_rcv_options_len != 0) { 3595 /* Clear out previously recorded options */ 3596 mi_free((char *)udp->udp_ip_rcv_options); 3597 udp->udp_ip_rcv_options = NULL; 3598 udp->udp_ip_rcv_options_len = 0; 3599 } 3600 } 3601 3602 static void 3603 udp_queue_fallback(udp_t *udp, mblk_t *mp) 3604 { 3605 ASSERT(MUTEX_HELD(&udp->udp_recv_lock)); 3606 if (IPCL_IS_NONSTR(udp->udp_connp)) { 3607 /* 3608 * fallback has started but messages have not been moved yet 3609 */ 3610 if (udp->udp_fallback_queue_head == NULL) { 3611 ASSERT(udp->udp_fallback_queue_tail == NULL); 3612 udp->udp_fallback_queue_head = mp; 3613 udp->udp_fallback_queue_tail = mp; 3614 } else { 3615 ASSERT(udp->udp_fallback_queue_tail != NULL); 3616 udp->udp_fallback_queue_tail->b_next = mp; 3617 udp->udp_fallback_queue_tail = mp; 3618 } 3619 mutex_exit(&udp->udp_recv_lock); 3620 } else { 3621 /* 3622 * no more fallbacks possible, ok to drop lock. 3623 */ 3624 mutex_exit(&udp->udp_recv_lock); 3625 putnext(udp->udp_connp->conn_rq, mp); 3626 } 3627 } 3628 3629 /* ARGSUSED2 */ 3630 static void 3631 udp_input(void *arg1, mblk_t *mp, void *arg2) 3632 { 3633 conn_t *connp = (conn_t *)arg1; 3634 struct T_unitdata_ind *tudi; 3635 uchar_t *rptr; /* Pointer to IP header */ 3636 int hdr_length; /* Length of IP+UDP headers */ 3637 int opt_len; 3638 int udi_size; /* Size of T_unitdata_ind */ 3639 int mp_len; 3640 udp_t *udp; 3641 udpha_t *udpha; 3642 int ipversion; 3643 ip6_pkt_t ipp; 3644 ip6_t *ip6h; 3645 ip6i_t *ip6i; 3646 mblk_t *mp1; 3647 mblk_t *options_mp = NULL; 3648 ip_pktinfo_t *pinfo = NULL; 3649 cred_t *cr = NULL; 3650 pid_t cpid; 3651 uint32_t udp_ip_rcv_options_len; 3652 udp_bits_t udp_bits; 3653 cred_t *rcr = connp->conn_cred; 3654 udp_stack_t *us; 3655 3656 ASSERT(connp->conn_flags & IPCL_UDPCONN); 3657 3658 udp = connp->conn_udp; 3659 us = udp->udp_us; 3660 rptr = mp->b_rptr; 3661 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 3662 ASSERT(OK_32PTR(rptr)); 3663 3664 /* 3665 * IP should have prepended the options data in an M_CTL 3666 * Check M_CTL "type" to make sure are not here bcos of 3667 * a valid ICMP message 3668 */ 3669 if (DB_TYPE(mp) == M_CTL) { 3670 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 3671 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 3672 IN_PKTINFO) { 3673 /* 3674 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 3675 * has been prepended to the packet by IP. We need to 3676 * extract the mblk and adjust the rptr 3677 */ 3678 pinfo = (ip_pktinfo_t *)mp->b_rptr; 3679 options_mp = mp; 3680 mp = mp->b_cont; 3681 rptr = mp->b_rptr; 3682 UDP_STAT(us, udp_in_pktinfo); 3683 } else { 3684 /* 3685 * ICMP messages. 3686 */ 3687 udp_icmp_error(connp, mp); 3688 return; 3689 } 3690 } 3691 3692 mp_len = msgdsize(mp); 3693 /* 3694 * This is the inbound data path. 3695 * First, we check to make sure the IP version number is correct, 3696 * and then pull the IP and UDP headers into the first mblk. 3697 */ 3698 3699 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 3700 ipp.ipp_fields = 0; 3701 3702 ipversion = IPH_HDR_VERSION(rptr); 3703 3704 rw_enter(&udp->udp_rwlock, RW_READER); 3705 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 3706 udp_bits = udp->udp_bits; 3707 rw_exit(&udp->udp_rwlock); 3708 3709 switch (ipversion) { 3710 case IPV4_VERSION: 3711 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 3712 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 3713 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 3714 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 3715 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 3716 udp->udp_family == AF_INET) { 3717 /* 3718 * Record/update udp_ip_rcv_options with the lock 3719 * held. Not needed for AF_INET6 sockets 3720 * since they don't support a getsockopt of IP_OPTIONS. 3721 */ 3722 rw_enter(&udp->udp_rwlock, RW_WRITER); 3723 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 3724 opt_len); 3725 rw_exit(&udp->udp_rwlock); 3726 } 3727 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 3728 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 3729 udp->udp_ip_recvpktinfo) { 3730 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 3731 ipp.ipp_fields |= IPPF_IFINDEX; 3732 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 3733 } 3734 } 3735 break; 3736 case IPV6_VERSION: 3737 /* 3738 * IPv6 packets can only be received by applications 3739 * that are prepared to receive IPv6 addresses. 3740 * The IP fanout must ensure this. 3741 */ 3742 ASSERT(udp->udp_family == AF_INET6); 3743 3744 ip6h = (ip6_t *)rptr; 3745 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 3746 3747 if (ip6h->ip6_nxt != IPPROTO_UDP) { 3748 uint8_t nexthdrp; 3749 /* Look for ifindex information */ 3750 if (ip6h->ip6_nxt == IPPROTO_RAW) { 3751 ip6i = (ip6i_t *)ip6h; 3752 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 3753 goto tossit; 3754 3755 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 3756 ASSERT(ip6i->ip6i_ifindex != 0); 3757 ipp.ipp_fields |= IPPF_IFINDEX; 3758 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 3759 } 3760 rptr = (uchar_t *)&ip6i[1]; 3761 mp->b_rptr = rptr; 3762 if (rptr == mp->b_wptr) { 3763 mp1 = mp->b_cont; 3764 freeb(mp); 3765 mp = mp1; 3766 rptr = mp->b_rptr; 3767 } 3768 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 3769 goto tossit; 3770 ip6h = (ip6_t *)rptr; 3771 mp_len = msgdsize(mp); 3772 } 3773 /* 3774 * Find any potentially interesting extension headers 3775 * as well as the length of the IPv6 + extension 3776 * headers. 3777 */ 3778 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 3779 UDPH_SIZE; 3780 ASSERT(nexthdrp == IPPROTO_UDP); 3781 } else { 3782 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 3783 ip6i = NULL; 3784 } 3785 break; 3786 default: 3787 ASSERT(0); 3788 } 3789 3790 /* 3791 * IP inspected the UDP header thus all of it must be in the mblk. 3792 * UDP length check is performed for IPv6 packets and IPv4 packets 3793 * to check if the size of the packet as specified 3794 * by the header is the same as the physical size of the packet. 3795 * FIXME? Didn't IP already check this? 3796 */ 3797 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 3798 if ((MBLKL(mp) < hdr_length) || 3799 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 3800 goto tossit; 3801 } 3802 3803 3804 /* Walk past the headers unless IP_RECVHDR was set. */ 3805 if (!udp_bits.udpb_rcvhdr) { 3806 mp->b_rptr = rptr + hdr_length; 3807 mp_len -= hdr_length; 3808 } 3809 3810 /* 3811 * This is the inbound data path. Packets are passed upstream as 3812 * T_UNITDATA_IND messages with full IP headers still attached. 3813 */ 3814 if (udp->udp_family == AF_INET) { 3815 sin_t *sin; 3816 3817 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 3818 3819 /* 3820 * Normally only send up the source address. 3821 * If IP_RECVDSTADDR is set we include the destination IP 3822 * address as an option. With IP_RECVOPTS we include all 3823 * the IP options. 3824 */ 3825 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 3826 if (udp_bits.udpb_recvdstaddr) { 3827 udi_size += sizeof (struct T_opthdr) + 3828 sizeof (struct in_addr); 3829 UDP_STAT(us, udp_in_recvdstaddr); 3830 } 3831 3832 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 3833 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3834 udi_size += sizeof (struct T_opthdr) + 3835 sizeof (struct in_pktinfo); 3836 UDP_STAT(us, udp_ip_rcvpktinfo); 3837 } 3838 3839 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 3840 udi_size += sizeof (struct T_opthdr) + opt_len; 3841 UDP_STAT(us, udp_in_recvopts); 3842 } 3843 3844 /* 3845 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 3846 * space accordingly 3847 */ 3848 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 3849 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 3850 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 3851 UDP_STAT(us, udp_in_recvif); 3852 } 3853 3854 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3855 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3856 udi_size += sizeof (struct T_opthdr) + 3857 sizeof (struct sockaddr_dl); 3858 UDP_STAT(us, udp_in_recvslla); 3859 } 3860 3861 if ((udp_bits.udpb_recvucred) && 3862 (cr = DB_CRED(mp)) != NULL) { 3863 udi_size += sizeof (struct T_opthdr) + ucredsize; 3864 cpid = DB_CPID(mp); 3865 UDP_STAT(us, udp_in_recvucred); 3866 } 3867 3868 /* 3869 * If SO_TIMESTAMP is set allocate the appropriate sized 3870 * buffer. Since gethrestime() expects a pointer aligned 3871 * argument, we allocate space necessary for extra 3872 * alignment (even though it might not be used). 3873 */ 3874 if (udp_bits.udpb_timestamp) { 3875 udi_size += sizeof (struct T_opthdr) + 3876 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 3877 UDP_STAT(us, udp_in_timestamp); 3878 } 3879 3880 /* 3881 * If IP_RECVTTL is set allocate the appropriate sized buffer 3882 */ 3883 if (udp_bits.udpb_recvttl) { 3884 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 3885 UDP_STAT(us, udp_in_recvttl); 3886 } 3887 3888 /* Allocate a message block for the T_UNITDATA_IND structure. */ 3889 mp1 = allocb(udi_size, BPRI_MED); 3890 if (mp1 == NULL) { 3891 freemsg(mp); 3892 if (options_mp != NULL) 3893 freeb(options_mp); 3894 BUMP_MIB(&us->us_udp_mib, udpInErrors); 3895 return; 3896 } 3897 mp1->b_cont = mp; 3898 mp = mp1; 3899 mp->b_datap->db_type = M_PROTO; 3900 tudi = (struct T_unitdata_ind *)mp->b_rptr; 3901 mp->b_wptr = (uchar_t *)tudi + udi_size; 3902 tudi->PRIM_type = T_UNITDATA_IND; 3903 tudi->SRC_length = sizeof (sin_t); 3904 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 3905 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 3906 sizeof (sin_t); 3907 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 3908 tudi->OPT_length = udi_size; 3909 sin = (sin_t *)&tudi[1]; 3910 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 3911 sin->sin_port = udpha->uha_src_port; 3912 sin->sin_family = udp->udp_family; 3913 *(uint32_t *)&sin->sin_zero[0] = 0; 3914 *(uint32_t *)&sin->sin_zero[4] = 0; 3915 3916 /* 3917 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 3918 * IP_RECVTTL has been set. 3919 */ 3920 if (udi_size != 0) { 3921 /* 3922 * Copy in destination address before options to avoid 3923 * any padding issues. 3924 */ 3925 char *dstopt; 3926 3927 dstopt = (char *)&sin[1]; 3928 if (udp_bits.udpb_recvdstaddr) { 3929 struct T_opthdr *toh; 3930 ipaddr_t *dstptr; 3931 3932 toh = (struct T_opthdr *)dstopt; 3933 toh->level = IPPROTO_IP; 3934 toh->name = IP_RECVDSTADDR; 3935 toh->len = sizeof (struct T_opthdr) + 3936 sizeof (ipaddr_t); 3937 toh->status = 0; 3938 dstopt += sizeof (struct T_opthdr); 3939 dstptr = (ipaddr_t *)dstopt; 3940 *dstptr = ((ipha_t *)rptr)->ipha_dst; 3941 dstopt += sizeof (ipaddr_t); 3942 udi_size -= toh->len; 3943 } 3944 3945 if (udp_bits.udpb_recvopts && opt_len > 0) { 3946 struct T_opthdr *toh; 3947 3948 toh = (struct T_opthdr *)dstopt; 3949 toh->level = IPPROTO_IP; 3950 toh->name = IP_RECVOPTS; 3951 toh->len = sizeof (struct T_opthdr) + opt_len; 3952 toh->status = 0; 3953 dstopt += sizeof (struct T_opthdr); 3954 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 3955 opt_len); 3956 dstopt += opt_len; 3957 udi_size -= toh->len; 3958 } 3959 3960 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 3961 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 3962 struct T_opthdr *toh; 3963 struct in_pktinfo *pktinfop; 3964 3965 toh = (struct T_opthdr *)dstopt; 3966 toh->level = IPPROTO_IP; 3967 toh->name = IP_PKTINFO; 3968 toh->len = sizeof (struct T_opthdr) + 3969 sizeof (*pktinfop); 3970 toh->status = 0; 3971 dstopt += sizeof (struct T_opthdr); 3972 pktinfop = (struct in_pktinfo *)dstopt; 3973 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 3974 pktinfop->ipi_spec_dst = 3975 pinfo->ip_pkt_match_addr; 3976 pktinfop->ipi_addr.s_addr = 3977 ((ipha_t *)rptr)->ipha_dst; 3978 3979 dstopt += sizeof (struct in_pktinfo); 3980 udi_size -= toh->len; 3981 } 3982 3983 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 3984 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 3985 3986 struct T_opthdr *toh; 3987 struct sockaddr_dl *dstptr; 3988 3989 toh = (struct T_opthdr *)dstopt; 3990 toh->level = IPPROTO_IP; 3991 toh->name = IP_RECVSLLA; 3992 toh->len = sizeof (struct T_opthdr) + 3993 sizeof (struct sockaddr_dl); 3994 toh->status = 0; 3995 dstopt += sizeof (struct T_opthdr); 3996 dstptr = (struct sockaddr_dl *)dstopt; 3997 bcopy(&pinfo->ip_pkt_slla, dstptr, 3998 sizeof (struct sockaddr_dl)); 3999 dstopt += sizeof (struct sockaddr_dl); 4000 udi_size -= toh->len; 4001 } 4002 4003 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4004 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4005 4006 struct T_opthdr *toh; 4007 uint_t *dstptr; 4008 4009 toh = (struct T_opthdr *)dstopt; 4010 toh->level = IPPROTO_IP; 4011 toh->name = IP_RECVIF; 4012 toh->len = sizeof (struct T_opthdr) + 4013 sizeof (uint_t); 4014 toh->status = 0; 4015 dstopt += sizeof (struct T_opthdr); 4016 dstptr = (uint_t *)dstopt; 4017 *dstptr = pinfo->ip_pkt_ifindex; 4018 dstopt += sizeof (uint_t); 4019 udi_size -= toh->len; 4020 } 4021 4022 if (cr != NULL) { 4023 struct T_opthdr *toh; 4024 4025 toh = (struct T_opthdr *)dstopt; 4026 toh->level = SOL_SOCKET; 4027 toh->name = SCM_UCRED; 4028 toh->len = sizeof (struct T_opthdr) + ucredsize; 4029 toh->status = 0; 4030 dstopt += sizeof (struct T_opthdr); 4031 (void) cred2ucred(cr, cpid, dstopt, rcr); 4032 dstopt += ucredsize; 4033 udi_size -= toh->len; 4034 } 4035 4036 if (udp_bits.udpb_timestamp) { 4037 struct T_opthdr *toh; 4038 4039 toh = (struct T_opthdr *)dstopt; 4040 toh->level = SOL_SOCKET; 4041 toh->name = SCM_TIMESTAMP; 4042 toh->len = sizeof (struct T_opthdr) + 4043 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4044 toh->status = 0; 4045 dstopt += sizeof (struct T_opthdr); 4046 /* Align for gethrestime() */ 4047 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4048 sizeof (intptr_t)); 4049 gethrestime((timestruc_t *)dstopt); 4050 dstopt = (char *)toh + toh->len; 4051 udi_size -= toh->len; 4052 } 4053 4054 /* 4055 * CAUTION: 4056 * Due to aligment issues 4057 * Processing of IP_RECVTTL option 4058 * should always be the last. Adding 4059 * any option processing after this will 4060 * cause alignment panic. 4061 */ 4062 if (udp_bits.udpb_recvttl) { 4063 struct T_opthdr *toh; 4064 uint8_t *dstptr; 4065 4066 toh = (struct T_opthdr *)dstopt; 4067 toh->level = IPPROTO_IP; 4068 toh->name = IP_RECVTTL; 4069 toh->len = sizeof (struct T_opthdr) + 4070 sizeof (uint8_t); 4071 toh->status = 0; 4072 dstopt += sizeof (struct T_opthdr); 4073 dstptr = (uint8_t *)dstopt; 4074 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4075 dstopt += sizeof (uint8_t); 4076 udi_size -= toh->len; 4077 } 4078 4079 /* Consumed all of allocated space */ 4080 ASSERT(udi_size == 0); 4081 } 4082 } else { 4083 sin6_t *sin6; 4084 4085 /* 4086 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4087 * 4088 * Normally we only send up the address. If receiving of any 4089 * optional receive side information is enabled, we also send 4090 * that up as options. 4091 */ 4092 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4093 4094 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4095 IPPF_RTHDR|IPPF_IFINDEX)) { 4096 if ((udp_bits.udpb_ipv6_recvhopopts) && 4097 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4098 size_t hlen; 4099 4100 UDP_STAT(us, udp_in_recvhopopts); 4101 hlen = copy_hop_opts(&ipp, NULL); 4102 if (hlen == 0) 4103 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4104 udi_size += hlen; 4105 } 4106 if (((udp_bits.udpb_ipv6_recvdstopts) || 4107 udp_bits.udpb_old_ipv6_recvdstopts) && 4108 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4109 udi_size += sizeof (struct T_opthdr) + 4110 ipp.ipp_dstoptslen; 4111 UDP_STAT(us, udp_in_recvdstopts); 4112 } 4113 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4114 udp_bits.udpb_ipv6_recvrthdr && 4115 (ipp.ipp_fields & IPPF_RTHDR)) || 4116 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4117 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4118 udi_size += sizeof (struct T_opthdr) + 4119 ipp.ipp_rtdstoptslen; 4120 UDP_STAT(us, udp_in_recvrtdstopts); 4121 } 4122 if ((udp_bits.udpb_ipv6_recvrthdr) && 4123 (ipp.ipp_fields & IPPF_RTHDR)) { 4124 udi_size += sizeof (struct T_opthdr) + 4125 ipp.ipp_rthdrlen; 4126 UDP_STAT(us, udp_in_recvrthdr); 4127 } 4128 if ((udp_bits.udpb_ip_recvpktinfo) && 4129 (ipp.ipp_fields & IPPF_IFINDEX)) { 4130 udi_size += sizeof (struct T_opthdr) + 4131 sizeof (struct in6_pktinfo); 4132 UDP_STAT(us, udp_in_recvpktinfo); 4133 } 4134 4135 } 4136 if ((udp_bits.udpb_recvucred) && 4137 (cr = DB_CRED(mp)) != NULL) { 4138 udi_size += sizeof (struct T_opthdr) + ucredsize; 4139 cpid = DB_CPID(mp); 4140 UDP_STAT(us, udp_in_recvucred); 4141 } 4142 4143 /* 4144 * If SO_TIMESTAMP is set allocate the appropriate sized 4145 * buffer. Since gethrestime() expects a pointer aligned 4146 * argument, we allocate space necessary for extra 4147 * alignment (even though it might not be used). 4148 */ 4149 if (udp_bits.udpb_timestamp) { 4150 udi_size += sizeof (struct T_opthdr) + 4151 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4152 UDP_STAT(us, udp_in_timestamp); 4153 } 4154 4155 if (udp_bits.udpb_ipv6_recvhoplimit) { 4156 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4157 UDP_STAT(us, udp_in_recvhoplimit); 4158 } 4159 4160 if (udp_bits.udpb_ipv6_recvtclass) { 4161 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4162 UDP_STAT(us, udp_in_recvtclass); 4163 } 4164 4165 mp1 = allocb(udi_size, BPRI_MED); 4166 if (mp1 == NULL) { 4167 freemsg(mp); 4168 if (options_mp != NULL) 4169 freeb(options_mp); 4170 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4171 return; 4172 } 4173 mp1->b_cont = mp; 4174 mp = mp1; 4175 mp->b_datap->db_type = M_PROTO; 4176 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4177 mp->b_wptr = (uchar_t *)tudi + udi_size; 4178 tudi->PRIM_type = T_UNITDATA_IND; 4179 tudi->SRC_length = sizeof (sin6_t); 4180 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4181 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4182 sizeof (sin6_t); 4183 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4184 tudi->OPT_length = udi_size; 4185 sin6 = (sin6_t *)&tudi[1]; 4186 if (ipversion == IPV4_VERSION) { 4187 in6_addr_t v6dst; 4188 4189 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4190 &sin6->sin6_addr); 4191 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4192 &v6dst); 4193 sin6->sin6_flowinfo = 0; 4194 sin6->sin6_scope_id = 0; 4195 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4196 connp->conn_zoneid, us->us_netstack); 4197 } else { 4198 sin6->sin6_addr = ip6h->ip6_src; 4199 /* No sin6_flowinfo per API */ 4200 sin6->sin6_flowinfo = 0; 4201 /* For link-scope source pass up scope id */ 4202 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4203 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4204 sin6->sin6_scope_id = ipp.ipp_ifindex; 4205 else 4206 sin6->sin6_scope_id = 0; 4207 sin6->__sin6_src_id = ip_srcid_find_addr( 4208 &ip6h->ip6_dst, connp->conn_zoneid, 4209 us->us_netstack); 4210 } 4211 sin6->sin6_port = udpha->uha_src_port; 4212 sin6->sin6_family = udp->udp_family; 4213 4214 if (udi_size != 0) { 4215 uchar_t *dstopt; 4216 4217 dstopt = (uchar_t *)&sin6[1]; 4218 if ((udp_bits.udpb_ip_recvpktinfo) && 4219 (ipp.ipp_fields & IPPF_IFINDEX)) { 4220 struct T_opthdr *toh; 4221 struct in6_pktinfo *pkti; 4222 4223 toh = (struct T_opthdr *)dstopt; 4224 toh->level = IPPROTO_IPV6; 4225 toh->name = IPV6_PKTINFO; 4226 toh->len = sizeof (struct T_opthdr) + 4227 sizeof (*pkti); 4228 toh->status = 0; 4229 dstopt += sizeof (struct T_opthdr); 4230 pkti = (struct in6_pktinfo *)dstopt; 4231 if (ipversion == IPV6_VERSION) 4232 pkti->ipi6_addr = ip6h->ip6_dst; 4233 else 4234 IN6_IPADDR_TO_V4MAPPED( 4235 ((ipha_t *)rptr)->ipha_dst, 4236 &pkti->ipi6_addr); 4237 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4238 dstopt += sizeof (*pkti); 4239 udi_size -= toh->len; 4240 } 4241 if (udp_bits.udpb_ipv6_recvhoplimit) { 4242 struct T_opthdr *toh; 4243 4244 toh = (struct T_opthdr *)dstopt; 4245 toh->level = IPPROTO_IPV6; 4246 toh->name = IPV6_HOPLIMIT; 4247 toh->len = sizeof (struct T_opthdr) + 4248 sizeof (uint_t); 4249 toh->status = 0; 4250 dstopt += sizeof (struct T_opthdr); 4251 if (ipversion == IPV6_VERSION) 4252 *(uint_t *)dstopt = ip6h->ip6_hops; 4253 else 4254 *(uint_t *)dstopt = 4255 ((ipha_t *)rptr)->ipha_ttl; 4256 dstopt += sizeof (uint_t); 4257 udi_size -= toh->len; 4258 } 4259 if (udp_bits.udpb_ipv6_recvtclass) { 4260 struct T_opthdr *toh; 4261 4262 toh = (struct T_opthdr *)dstopt; 4263 toh->level = IPPROTO_IPV6; 4264 toh->name = IPV6_TCLASS; 4265 toh->len = sizeof (struct T_opthdr) + 4266 sizeof (uint_t); 4267 toh->status = 0; 4268 dstopt += sizeof (struct T_opthdr); 4269 if (ipversion == IPV6_VERSION) { 4270 *(uint_t *)dstopt = 4271 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4272 } else { 4273 ipha_t *ipha = (ipha_t *)rptr; 4274 *(uint_t *)dstopt = 4275 ipha->ipha_type_of_service; 4276 } 4277 dstopt += sizeof (uint_t); 4278 udi_size -= toh->len; 4279 } 4280 if ((udp_bits.udpb_ipv6_recvhopopts) && 4281 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4282 size_t hlen; 4283 4284 hlen = copy_hop_opts(&ipp, dstopt); 4285 dstopt += hlen; 4286 udi_size -= hlen; 4287 } 4288 if ((udp_bits.udpb_ipv6_recvdstopts) && 4289 (udp_bits.udpb_ipv6_recvrthdr) && 4290 (ipp.ipp_fields & IPPF_RTHDR) && 4291 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4292 struct T_opthdr *toh; 4293 4294 toh = (struct T_opthdr *)dstopt; 4295 toh->level = IPPROTO_IPV6; 4296 toh->name = IPV6_DSTOPTS; 4297 toh->len = sizeof (struct T_opthdr) + 4298 ipp.ipp_rtdstoptslen; 4299 toh->status = 0; 4300 dstopt += sizeof (struct T_opthdr); 4301 bcopy(ipp.ipp_rtdstopts, dstopt, 4302 ipp.ipp_rtdstoptslen); 4303 dstopt += ipp.ipp_rtdstoptslen; 4304 udi_size -= toh->len; 4305 } 4306 if ((udp_bits.udpb_ipv6_recvrthdr) && 4307 (ipp.ipp_fields & IPPF_RTHDR)) { 4308 struct T_opthdr *toh; 4309 4310 toh = (struct T_opthdr *)dstopt; 4311 toh->level = IPPROTO_IPV6; 4312 toh->name = IPV6_RTHDR; 4313 toh->len = sizeof (struct T_opthdr) + 4314 ipp.ipp_rthdrlen; 4315 toh->status = 0; 4316 dstopt += sizeof (struct T_opthdr); 4317 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4318 dstopt += ipp.ipp_rthdrlen; 4319 udi_size -= toh->len; 4320 } 4321 if ((udp_bits.udpb_ipv6_recvdstopts) && 4322 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4323 struct T_opthdr *toh; 4324 4325 toh = (struct T_opthdr *)dstopt; 4326 toh->level = IPPROTO_IPV6; 4327 toh->name = IPV6_DSTOPTS; 4328 toh->len = sizeof (struct T_opthdr) + 4329 ipp.ipp_dstoptslen; 4330 toh->status = 0; 4331 dstopt += sizeof (struct T_opthdr); 4332 bcopy(ipp.ipp_dstopts, dstopt, 4333 ipp.ipp_dstoptslen); 4334 dstopt += ipp.ipp_dstoptslen; 4335 udi_size -= toh->len; 4336 } 4337 if (cr != NULL) { 4338 struct T_opthdr *toh; 4339 4340 toh = (struct T_opthdr *)dstopt; 4341 toh->level = SOL_SOCKET; 4342 toh->name = SCM_UCRED; 4343 toh->len = sizeof (struct T_opthdr) + ucredsize; 4344 toh->status = 0; 4345 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4346 dstopt += toh->len; 4347 udi_size -= toh->len; 4348 } 4349 if (udp_bits.udpb_timestamp) { 4350 struct T_opthdr *toh; 4351 4352 toh = (struct T_opthdr *)dstopt; 4353 toh->level = SOL_SOCKET; 4354 toh->name = SCM_TIMESTAMP; 4355 toh->len = sizeof (struct T_opthdr) + 4356 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4357 toh->status = 0; 4358 dstopt += sizeof (struct T_opthdr); 4359 /* Align for gethrestime() */ 4360 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4361 sizeof (intptr_t)); 4362 gethrestime((timestruc_t *)dstopt); 4363 dstopt = (uchar_t *)toh + toh->len; 4364 udi_size -= toh->len; 4365 } 4366 4367 /* Consumed all of allocated space */ 4368 ASSERT(udi_size == 0); 4369 } 4370 #undef sin6 4371 /* No IP_RECVDSTADDR for IPv6. */ 4372 } 4373 4374 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4375 if (options_mp != NULL) 4376 freeb(options_mp); 4377 4378 if (IPCL_IS_NONSTR(connp)) { 4379 int error; 4380 4381 if ((*connp->conn_upcalls->su_recv) 4382 (connp->conn_upper_handle, mp, msgdsize(mp), 0, &error, 4383 NULL) < 0) { 4384 mutex_enter(&udp->udp_recv_lock); 4385 if (error == ENOSPC) { 4386 /* 4387 * let's confirm while holding the lock 4388 */ 4389 if ((*connp->conn_upcalls->su_recv) 4390 (connp->conn_upper_handle, NULL, 0, 0, 4391 &error, NULL) < 0) { 4392 if (error == ENOSPC) { 4393 connp->conn_flow_cntrld = 4394 B_TRUE; 4395 } else { 4396 ASSERT(error == EOPNOTSUPP); 4397 } 4398 } 4399 mutex_exit(&udp->udp_recv_lock); 4400 } else { 4401 ASSERT(error == EOPNOTSUPP); 4402 udp_queue_fallback(udp, mp); 4403 } 4404 } 4405 } else { 4406 putnext(connp->conn_rq, mp); 4407 } 4408 ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock)); 4409 return; 4410 4411 tossit: 4412 freemsg(mp); 4413 if (options_mp != NULL) 4414 freeb(options_mp); 4415 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4416 } 4417 4418 /* 4419 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 4420 * information that can be changing beneath us. 4421 */ 4422 mblk_t * 4423 udp_snmp_get(queue_t *q, mblk_t *mpctl) 4424 { 4425 mblk_t *mpdata; 4426 mblk_t *mp_conn_ctl; 4427 mblk_t *mp_attr_ctl; 4428 mblk_t *mp6_conn_ctl; 4429 mblk_t *mp6_attr_ctl; 4430 mblk_t *mp_conn_tail; 4431 mblk_t *mp_attr_tail; 4432 mblk_t *mp6_conn_tail; 4433 mblk_t *mp6_attr_tail; 4434 struct opthdr *optp; 4435 mib2_udpEntry_t ude; 4436 mib2_udp6Entry_t ude6; 4437 mib2_transportMLPEntry_t mlp; 4438 int state; 4439 zoneid_t zoneid; 4440 int i; 4441 connf_t *connfp; 4442 conn_t *connp = Q_TO_CONN(q); 4443 int v4_conn_idx; 4444 int v6_conn_idx; 4445 boolean_t needattr; 4446 udp_t *udp; 4447 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4448 udp_stack_t *us = connp->conn_netstack->netstack_udp; 4449 mblk_t *mp2ctl; 4450 4451 /* 4452 * make a copy of the original message 4453 */ 4454 mp2ctl = copymsg(mpctl); 4455 4456 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 4457 if (mpctl == NULL || 4458 (mpdata = mpctl->b_cont) == NULL || 4459 (mp_conn_ctl = copymsg(mpctl)) == NULL || 4460 (mp_attr_ctl = copymsg(mpctl)) == NULL || 4461 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 4462 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 4463 freemsg(mp_conn_ctl); 4464 freemsg(mp_attr_ctl); 4465 freemsg(mp6_conn_ctl); 4466 freemsg(mpctl); 4467 freemsg(mp2ctl); 4468 return (0); 4469 } 4470 4471 zoneid = connp->conn_zoneid; 4472 4473 /* fixed length structure for IPv4 and IPv6 counters */ 4474 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 4475 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 4476 /* synchronize 64- and 32-bit counters */ 4477 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 4478 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 4479 4480 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 4481 optp->level = MIB2_UDP; 4482 optp->name = 0; 4483 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 4484 sizeof (us->us_udp_mib)); 4485 optp->len = msgdsize(mpdata); 4486 qreply(q, mpctl); 4487 4488 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 4489 v4_conn_idx = v6_conn_idx = 0; 4490 4491 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4492 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4493 connp = NULL; 4494 4495 while ((connp = ipcl_get_next_conn(connfp, connp, 4496 IPCL_UDPCONN))) { 4497 udp = connp->conn_udp; 4498 if (zoneid != connp->conn_zoneid) 4499 continue; 4500 4501 /* 4502 * Note that the port numbers are sent in 4503 * host byte order 4504 */ 4505 4506 if (udp->udp_state == TS_UNBND) 4507 state = MIB2_UDP_unbound; 4508 else if (udp->udp_state == TS_IDLE) 4509 state = MIB2_UDP_idle; 4510 else if (udp->udp_state == TS_DATA_XFER) 4511 state = MIB2_UDP_connected; 4512 else 4513 state = MIB2_UDP_unknown; 4514 4515 needattr = B_FALSE; 4516 bzero(&mlp, sizeof (mlp)); 4517 if (connp->conn_mlp_type != mlptSingle) { 4518 if (connp->conn_mlp_type == mlptShared || 4519 connp->conn_mlp_type == mlptBoth) 4520 mlp.tme_flags |= MIB2_TMEF_SHARED; 4521 if (connp->conn_mlp_type == mlptPrivate || 4522 connp->conn_mlp_type == mlptBoth) 4523 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 4524 needattr = B_TRUE; 4525 } 4526 4527 /* 4528 * Create an IPv4 table entry for IPv4 entries and also 4529 * any IPv6 entries which are bound to in6addr_any 4530 * (i.e. anything a IPv4 peer could connect/send to). 4531 */ 4532 if (udp->udp_ipversion == IPV4_VERSION || 4533 (udp->udp_state <= TS_IDLE && 4534 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 4535 ude.udpEntryInfo.ue_state = state; 4536 /* 4537 * If in6addr_any this will set it to 4538 * INADDR_ANY 4539 */ 4540 ude.udpLocalAddress = 4541 V4_PART_OF_V6(udp->udp_v6src); 4542 ude.udpLocalPort = ntohs(udp->udp_port); 4543 if (udp->udp_state == TS_DATA_XFER) { 4544 /* 4545 * Can potentially get here for 4546 * v6 socket if another process 4547 * (say, ping) has just done a 4548 * sendto(), changing the state 4549 * from the TS_IDLE above to 4550 * TS_DATA_XFER by the time we hit 4551 * this part of the code. 4552 */ 4553 ude.udpEntryInfo.ue_RemoteAddress = 4554 V4_PART_OF_V6(udp->udp_v6dst); 4555 ude.udpEntryInfo.ue_RemotePort = 4556 ntohs(udp->udp_dstport); 4557 } else { 4558 ude.udpEntryInfo.ue_RemoteAddress = 0; 4559 ude.udpEntryInfo.ue_RemotePort = 0; 4560 } 4561 4562 /* 4563 * We make the assumption that all udp_t 4564 * structs will be created within an address 4565 * region no larger than 32-bits. 4566 */ 4567 ude.udpInstance = (uint32_t)(uintptr_t)udp; 4568 ude.udpCreationProcess = 4569 (udp->udp_open_pid < 0) ? 4570 MIB2_UNKNOWN_PROCESS : 4571 udp->udp_open_pid; 4572 ude.udpCreationTime = udp->udp_open_time; 4573 4574 (void) snmp_append_data2(mp_conn_ctl->b_cont, 4575 &mp_conn_tail, (char *)&ude, sizeof (ude)); 4576 mlp.tme_connidx = v4_conn_idx++; 4577 if (needattr) 4578 (void) snmp_append_data2( 4579 mp_attr_ctl->b_cont, &mp_attr_tail, 4580 (char *)&mlp, sizeof (mlp)); 4581 } 4582 if (udp->udp_ipversion == IPV6_VERSION) { 4583 ude6.udp6EntryInfo.ue_state = state; 4584 ude6.udp6LocalAddress = udp->udp_v6src; 4585 ude6.udp6LocalPort = ntohs(udp->udp_port); 4586 ude6.udp6IfIndex = udp->udp_bound_if; 4587 if (udp->udp_state == TS_DATA_XFER) { 4588 ude6.udp6EntryInfo.ue_RemoteAddress = 4589 udp->udp_v6dst; 4590 ude6.udp6EntryInfo.ue_RemotePort = 4591 ntohs(udp->udp_dstport); 4592 } else { 4593 ude6.udp6EntryInfo.ue_RemoteAddress = 4594 sin6_null.sin6_addr; 4595 ude6.udp6EntryInfo.ue_RemotePort = 0; 4596 } 4597 /* 4598 * We make the assumption that all udp_t 4599 * structs will be created within an address 4600 * region no larger than 32-bits. 4601 */ 4602 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 4603 ude6.udp6CreationProcess = 4604 (udp->udp_open_pid < 0) ? 4605 MIB2_UNKNOWN_PROCESS : 4606 udp->udp_open_pid; 4607 ude6.udp6CreationTime = udp->udp_open_time; 4608 4609 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 4610 &mp6_conn_tail, (char *)&ude6, 4611 sizeof (ude6)); 4612 mlp.tme_connidx = v6_conn_idx++; 4613 if (needattr) 4614 (void) snmp_append_data2( 4615 mp6_attr_ctl->b_cont, 4616 &mp6_attr_tail, (char *)&mlp, 4617 sizeof (mlp)); 4618 } 4619 } 4620 } 4621 4622 /* IPv4 UDP endpoints */ 4623 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 4624 sizeof (struct T_optmgmt_ack)]; 4625 optp->level = MIB2_UDP; 4626 optp->name = MIB2_UDP_ENTRY; 4627 optp->len = msgdsize(mp_conn_ctl->b_cont); 4628 qreply(q, mp_conn_ctl); 4629 4630 /* table of MLP attributes... */ 4631 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 4632 sizeof (struct T_optmgmt_ack)]; 4633 optp->level = MIB2_UDP; 4634 optp->name = EXPER_XPORT_MLP; 4635 optp->len = msgdsize(mp_attr_ctl->b_cont); 4636 if (optp->len == 0) 4637 freemsg(mp_attr_ctl); 4638 else 4639 qreply(q, mp_attr_ctl); 4640 4641 /* IPv6 UDP endpoints */ 4642 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 4643 sizeof (struct T_optmgmt_ack)]; 4644 optp->level = MIB2_UDP6; 4645 optp->name = MIB2_UDP6_ENTRY; 4646 optp->len = msgdsize(mp6_conn_ctl->b_cont); 4647 qreply(q, mp6_conn_ctl); 4648 4649 /* table of MLP attributes... */ 4650 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 4651 sizeof (struct T_optmgmt_ack)]; 4652 optp->level = MIB2_UDP6; 4653 optp->name = EXPER_XPORT_MLP; 4654 optp->len = msgdsize(mp6_attr_ctl->b_cont); 4655 if (optp->len == 0) 4656 freemsg(mp6_attr_ctl); 4657 else 4658 qreply(q, mp6_attr_ctl); 4659 4660 return (mp2ctl); 4661 } 4662 4663 /* 4664 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 4665 * NOTE: Per MIB-II, UDP has no writable data. 4666 * TODO: If this ever actually tries to set anything, it needs to be 4667 * to do the appropriate locking. 4668 */ 4669 /* ARGSUSED */ 4670 int 4671 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 4672 uchar_t *ptr, int len) 4673 { 4674 switch (level) { 4675 case MIB2_UDP: 4676 return (0); 4677 default: 4678 return (1); 4679 } 4680 } 4681 4682 static void 4683 udp_report_item(mblk_t *mp, udp_t *udp) 4684 { 4685 char *state; 4686 char addrbuf1[INET6_ADDRSTRLEN]; 4687 char addrbuf2[INET6_ADDRSTRLEN]; 4688 uint_t print_len, buf_len; 4689 4690 buf_len = mp->b_datap->db_lim - mp->b_wptr; 4691 ASSERT(buf_len >= 0); 4692 if (buf_len == 0) 4693 return; 4694 4695 if (udp->udp_state == TS_UNBND) 4696 state = "UNBOUND"; 4697 else if (udp->udp_state == TS_IDLE) 4698 state = "IDLE"; 4699 else if (udp->udp_state == TS_DATA_XFER) 4700 state = "CONNECTED"; 4701 else 4702 state = "UnkState"; 4703 print_len = snprintf((char *)mp->b_wptr, buf_len, 4704 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 4705 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 4706 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 4707 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 4708 ntohs(udp->udp_dstport), state); 4709 if (print_len < buf_len) { 4710 mp->b_wptr += print_len; 4711 } else { 4712 mp->b_wptr += buf_len; 4713 } 4714 } 4715 4716 /* Report for ndd "udp_status" */ 4717 /* ARGSUSED */ 4718 static int 4719 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4720 { 4721 zoneid_t zoneid; 4722 connf_t *connfp; 4723 conn_t *connp = Q_TO_CONN(q); 4724 udp_t *udp = connp->conn_udp; 4725 int i; 4726 udp_stack_t *us = udp->udp_us; 4727 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4728 4729 /* 4730 * Because of the ndd constraint, at most we can have 64K buffer 4731 * to put in all UDP info. So to be more efficient, just 4732 * allocate a 64K buffer here, assuming we need that large buffer. 4733 * This may be a problem as any user can read udp_status. Therefore 4734 * we limit the rate of doing this using us_ndd_get_info_interval. 4735 * This should be OK as normal users should not do this too often. 4736 */ 4737 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 4738 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 4739 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 4740 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 4741 return (0); 4742 } 4743 } 4744 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 4745 /* The following may work even if we cannot get a large buf. */ 4746 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 4747 return (0); 4748 } 4749 (void) mi_mpprintf(mp, 4750 "UDP " MI_COL_HDRPAD_STR 4751 /* 12345678[89ABCDEF] */ 4752 " zone lport src addr dest addr port state"); 4753 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 4754 4755 zoneid = connp->conn_zoneid; 4756 4757 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 4758 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 4759 connp = NULL; 4760 4761 while ((connp = ipcl_get_next_conn(connfp, connp, 4762 IPCL_UDPCONN))) { 4763 udp = connp->conn_udp; 4764 if (zoneid != GLOBAL_ZONEID && 4765 zoneid != connp->conn_zoneid) 4766 continue; 4767 4768 udp_report_item(mp->b_cont, udp); 4769 } 4770 } 4771 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 4772 return (0); 4773 } 4774 4775 /* 4776 * This routine creates a T_UDERROR_IND message and passes it upstream. 4777 * The address and options are copied from the T_UNITDATA_REQ message 4778 * passed in mp. This message is freed. 4779 */ 4780 static void 4781 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 4782 t_scalar_t err) 4783 { 4784 struct T_unitdata_req *tudr; 4785 mblk_t *mp1; 4786 uchar_t *optaddr; 4787 t_scalar_t optlen; 4788 4789 if (DB_TYPE(mp) == M_DATA) { 4790 ASSERT(destaddr != NULL && destlen != 0); 4791 optaddr = NULL; 4792 optlen = 0; 4793 } else { 4794 if ((mp->b_wptr < mp->b_rptr) || 4795 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 4796 goto done; 4797 } 4798 tudr = (struct T_unitdata_req *)mp->b_rptr; 4799 destaddr = mp->b_rptr + tudr->DEST_offset; 4800 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 4801 destaddr + tudr->DEST_length < mp->b_rptr || 4802 destaddr + tudr->DEST_length > mp->b_wptr) { 4803 goto done; 4804 } 4805 optaddr = mp->b_rptr + tudr->OPT_offset; 4806 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 4807 optaddr + tudr->OPT_length < mp->b_rptr || 4808 optaddr + tudr->OPT_length > mp->b_wptr) { 4809 goto done; 4810 } 4811 destlen = tudr->DEST_length; 4812 optlen = tudr->OPT_length; 4813 } 4814 4815 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 4816 (char *)optaddr, optlen, err); 4817 if (mp1 != NULL) 4818 qreply(q, mp1); 4819 4820 done: 4821 freemsg(mp); 4822 } 4823 4824 /* 4825 * This routine removes a port number association from a stream. It 4826 * is called by udp_wput to handle T_UNBIND_REQ messages. 4827 */ 4828 static void 4829 udp_tpi_unbind(queue_t *q, mblk_t *mp) 4830 { 4831 conn_t *connp = Q_TO_CONN(q); 4832 int error; 4833 4834 error = udp_do_unbind(connp); 4835 if (error) { 4836 if (error < 0) 4837 udp_err_ack(q, mp, -error, 0); 4838 else 4839 udp_err_ack(q, mp, TSYSERR, error); 4840 return; 4841 } 4842 4843 mp = mi_tpi_ok_ack_alloc(mp); 4844 ASSERT(mp != NULL); 4845 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 4846 qreply(q, mp); 4847 } 4848 4849 /* 4850 * Don't let port fall into the privileged range. 4851 * Since the extra privileged ports can be arbitrary we also 4852 * ensure that we exclude those from consideration. 4853 * us->us_epriv_ports is not sorted thus we loop over it until 4854 * there are no changes. 4855 */ 4856 static in_port_t 4857 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 4858 { 4859 int i; 4860 in_port_t nextport; 4861 boolean_t restart = B_FALSE; 4862 udp_stack_t *us = udp->udp_us; 4863 4864 if (random && udp_random_anon_port != 0) { 4865 (void) random_get_pseudo_bytes((uint8_t *)&port, 4866 sizeof (in_port_t)); 4867 /* 4868 * Unless changed by a sys admin, the smallest anon port 4869 * is 32768 and the largest anon port is 65535. It is 4870 * very likely (50%) for the random port to be smaller 4871 * than the smallest anon port. When that happens, 4872 * add port % (anon port range) to the smallest anon 4873 * port to get the random port. It should fall into the 4874 * valid anon port range. 4875 */ 4876 if (port < us->us_smallest_anon_port) { 4877 port = us->us_smallest_anon_port + 4878 port % (us->us_largest_anon_port - 4879 us->us_smallest_anon_port); 4880 } 4881 } 4882 4883 retry: 4884 if (port < us->us_smallest_anon_port) 4885 port = us->us_smallest_anon_port; 4886 4887 if (port > us->us_largest_anon_port) { 4888 port = us->us_smallest_anon_port; 4889 if (restart) 4890 return (0); 4891 restart = B_TRUE; 4892 } 4893 4894 if (port < us->us_smallest_nonpriv_port) 4895 port = us->us_smallest_nonpriv_port; 4896 4897 for (i = 0; i < us->us_num_epriv_ports; i++) { 4898 if (port == us->us_epriv_ports[i]) { 4899 port++; 4900 /* 4901 * Make sure that the port is in the 4902 * valid range. 4903 */ 4904 goto retry; 4905 } 4906 } 4907 4908 if (is_system_labeled() && 4909 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 4910 port, IPPROTO_UDP, B_TRUE)) != 0) { 4911 port = nextport; 4912 goto retry; 4913 } 4914 4915 return (port); 4916 } 4917 4918 static int 4919 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 4920 { 4921 int err; 4922 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 4923 udp_t *udp = Q_TO_UDP(wq); 4924 udp_stack_t *us = udp->udp_us; 4925 4926 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 4927 opt_storage, udp->udp_connp->conn_mac_exempt, 4928 us->us_netstack->netstack_ip); 4929 if (err == 0) { 4930 err = tsol_update_options(&udp->udp_ip_snd_options, 4931 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 4932 opt_storage); 4933 } 4934 if (err != 0) { 4935 DTRACE_PROBE4( 4936 tx__ip__log__info__updatelabel__udp, 4937 char *, "queue(1) failed to update options(2) on mp(3)", 4938 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 4939 } else { 4940 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 4941 } 4942 return (err); 4943 } 4944 4945 static mblk_t * 4946 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 4947 uint_t srcid, int *error, boolean_t insert_spi, struct nmsghdr *msg, 4948 cred_t *cr, pid_t pid) 4949 { 4950 udp_t *udp = connp->conn_udp; 4951 mblk_t *mp1 = mp; 4952 mblk_t *mp2; 4953 ipha_t *ipha; 4954 int ip_hdr_length; 4955 uint32_t ip_len; 4956 udpha_t *udpha; 4957 boolean_t lock_held = B_FALSE; 4958 in_port_t uha_src_port; 4959 udpattrs_t attrs; 4960 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 4961 uint32_t ip_snd_opt_len = 0; 4962 ip4_pkt_t pktinfo; 4963 ip4_pkt_t *pktinfop = &pktinfo; 4964 ip_opt_info_t optinfo; 4965 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 4966 udp_stack_t *us = udp->udp_us; 4967 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 4968 queue_t *q = connp->conn_wq; 4969 ire_t *ire; 4970 4971 4972 *error = 0; 4973 pktinfop->ip4_ill_index = 0; 4974 pktinfop->ip4_addr = INADDR_ANY; 4975 optinfo.ip_opt_flags = 0; 4976 optinfo.ip_opt_ill_index = 0; 4977 4978 if (v4dst == INADDR_ANY) 4979 v4dst = htonl(INADDR_LOOPBACK); 4980 4981 /* 4982 * If options passed in, feed it for verification and handling 4983 */ 4984 attrs.udpattr_credset = B_FALSE; 4985 if (IPCL_IS_NONSTR(connp)) { 4986 if (msg->msg_controllen != 0) { 4987 attrs.udpattr_ipp4 = pktinfop; 4988 attrs.udpattr_mb = mp; 4989 4990 rw_enter(&udp->udp_rwlock, RW_WRITER); 4991 *error = process_auxiliary_options(connp, 4992 msg->msg_control, msg->msg_controllen, 4993 &attrs, &udp_opt_obj, udp_opt_set); 4994 rw_exit(&udp->udp_rwlock); 4995 if (*error) 4996 goto done; 4997 } 4998 } else { 4999 if (DB_TYPE(mp) != M_DATA) { 5000 mp1 = mp->b_cont; 5001 if (((struct T_unitdata_req *) 5002 mp->b_rptr)->OPT_length != 0) { 5003 attrs.udpattr_ipp4 = pktinfop; 5004 attrs.udpattr_mb = mp; 5005 if (udp_unitdata_opt_process(q, mp, error, 5006 &attrs) < 0) 5007 goto done; 5008 /* 5009 * Note: success in processing options. 5010 * mp option buffer represented by 5011 * OPT_length/offset now potentially modified 5012 * and contain option setting results 5013 */ 5014 ASSERT(*error == 0); 5015 } 5016 } 5017 } 5018 5019 /* mp1 points to the M_DATA mblk carrying the packet */ 5020 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5021 5022 /* 5023 * Determine whether we need to mark the mblk with the user's 5024 * credentials. 5025 */ 5026 ire = connp->conn_ire_cache; 5027 if (is_system_labeled() || CLASSD(v4dst) || (ire == NULL) || 5028 (ire->ire_addr != v4dst) || 5029 (ire->ire_type & (IRE_BROADCAST | IRE_LOCAL | IRE_LOOPBACK))) { 5030 if (cr != NULL && DB_CRED(mp) == NULL) 5031 msg_setcredpid(mp, cr, pid); 5032 } 5033 5034 rw_enter(&udp->udp_rwlock, RW_READER); 5035 lock_held = B_TRUE; 5036 /* 5037 * Check if our saved options are valid; update if not. 5038 * TSOL Note: Since we are not in WRITER mode, UDP packets 5039 * to different destination may require different labels, 5040 * or worse, UDP packets to same IP address may require 5041 * different labels due to use of shared all-zones address. 5042 * We use conn_lock to ensure that lastdst, ip_snd_options, 5043 * and ip_snd_options_len are consistent for the current 5044 * destination and are updated atomically. 5045 */ 5046 mutex_enter(&connp->conn_lock); 5047 if (is_system_labeled()) { 5048 /* Using UDP MLP requires SCM_UCRED from user */ 5049 if (connp->conn_mlp_type != mlptSingle && 5050 !attrs.udpattr_credset) { 5051 mutex_exit(&connp->conn_lock); 5052 DTRACE_PROBE4( 5053 tx__ip__log__info__output__udp, 5054 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5055 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5056 *error = ECONNREFUSED; 5057 goto done; 5058 } 5059 /* 5060 * update label option for this UDP socket if 5061 * - the destination has changed, or 5062 * - the UDP socket is MLP 5063 */ 5064 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5065 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5066 connp->conn_mlp_type != mlptSingle) && 5067 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5068 mutex_exit(&connp->conn_lock); 5069 goto done; 5070 } 5071 } 5072 if (udp->udp_ip_snd_options_len > 0) { 5073 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5074 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5075 } 5076 mutex_exit(&connp->conn_lock); 5077 5078 /* Add an IP header */ 5079 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5080 (insert_spi ? sizeof (uint32_t) : 0); 5081 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5082 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5083 !OK_32PTR(ipha)) { 5084 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5085 if (mp2 == NULL) { 5086 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5087 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5088 *error = ENOMEM; 5089 goto done; 5090 } 5091 mp2->b_wptr = DB_LIM(mp2); 5092 mp2->b_cont = mp1; 5093 mp1 = mp2; 5094 if (DB_TYPE(mp) != M_DATA) 5095 mp->b_cont = mp1; 5096 else 5097 mp = mp1; 5098 5099 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5100 } 5101 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5102 #ifdef _BIG_ENDIAN 5103 /* Set version, header length, and tos */ 5104 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5105 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5106 udp->udp_type_of_service); 5107 /* Set ttl and protocol */ 5108 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5109 #else 5110 /* Set version, header length, and tos */ 5111 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5112 ((udp->udp_type_of_service << 8) | 5113 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5114 /* Set ttl and protocol */ 5115 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5116 #endif 5117 if (pktinfop->ip4_addr != INADDR_ANY) { 5118 ipha->ipha_src = pktinfop->ip4_addr; 5119 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5120 } else { 5121 /* 5122 * Copy our address into the packet. If this is zero, 5123 * first look at __sin6_src_id for a hint. If we leave the 5124 * source as INADDR_ANY then ip will fill in the real source 5125 * address. 5126 */ 5127 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5128 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5129 in6_addr_t v6src; 5130 5131 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5132 us->us_netstack); 5133 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5134 } 5135 } 5136 uha_src_port = udp->udp_port; 5137 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5138 rw_exit(&udp->udp_rwlock); 5139 lock_held = B_FALSE; 5140 } 5141 5142 if (pktinfop->ip4_ill_index != 0) { 5143 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5144 } 5145 5146 ipha->ipha_fragment_offset_and_flags = 0; 5147 ipha->ipha_ident = 0; 5148 5149 mp1->b_rptr = (uchar_t *)ipha; 5150 5151 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5152 (uintptr_t)UINT_MAX); 5153 5154 /* Determine length of packet */ 5155 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5156 if ((mp2 = mp1->b_cont) != NULL) { 5157 do { 5158 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5159 ip_len += (uint32_t)MBLKL(mp2); 5160 } while ((mp2 = mp2->b_cont) != NULL); 5161 } 5162 /* 5163 * If the size of the packet is greater than the maximum allowed by 5164 * ip, return an error. Passing this down could cause panics because 5165 * the size will have wrapped and be inconsistent with the msg size. 5166 */ 5167 if (ip_len > IP_MAXPACKET) { 5168 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5169 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5170 *error = EMSGSIZE; 5171 goto done; 5172 } 5173 ipha->ipha_length = htons((uint16_t)ip_len); 5174 ip_len -= ip_hdr_length; 5175 ip_len = htons((uint16_t)ip_len); 5176 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5177 5178 /* Insert all-0s SPI now. */ 5179 if (insert_spi) 5180 *((uint32_t *)(udpha + 1)) = 0; 5181 5182 /* 5183 * Copy in the destination address 5184 */ 5185 ipha->ipha_dst = v4dst; 5186 5187 /* 5188 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5189 */ 5190 if (CLASSD(v4dst)) 5191 ipha->ipha_ttl = udp->udp_multicast_ttl; 5192 5193 udpha->uha_dst_port = port; 5194 udpha->uha_src_port = uha_src_port; 5195 5196 if (ip_snd_opt_len > 0) { 5197 uint32_t cksum; 5198 5199 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5200 lock_held = B_FALSE; 5201 rw_exit(&udp->udp_rwlock); 5202 /* 5203 * Massage source route putting first source route in ipha_dst. 5204 * Ignore the destination in T_unitdata_req. 5205 * Create a checksum adjustment for a source route, if any. 5206 */ 5207 cksum = ip_massage_options(ipha, us->us_netstack); 5208 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5209 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5210 (ipha->ipha_dst & 0xFFFF); 5211 if ((int)cksum < 0) 5212 cksum--; 5213 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5214 /* 5215 * IP does the checksum if uha_checksum is non-zero, 5216 * We make it easy for IP to include our pseudo header 5217 * by putting our length in uha_checksum. 5218 */ 5219 cksum += ip_len; 5220 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5221 /* There might be a carry. */ 5222 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5223 #ifdef _LITTLE_ENDIAN 5224 if (us->us_do_checksum) 5225 ip_len = (cksum << 16) | ip_len; 5226 #else 5227 if (us->us_do_checksum) 5228 ip_len = (ip_len << 16) | cksum; 5229 else 5230 ip_len <<= 16; 5231 #endif 5232 } else { 5233 /* 5234 * IP does the checksum if uha_checksum is non-zero, 5235 * We make it easy for IP to include our pseudo header 5236 * by putting our length in uha_checksum. 5237 */ 5238 if (us->us_do_checksum) 5239 ip_len |= (ip_len << 16); 5240 #ifndef _LITTLE_ENDIAN 5241 else 5242 ip_len <<= 16; 5243 #endif 5244 } 5245 ASSERT(!lock_held); 5246 /* Set UDP length and checksum */ 5247 *((uint32_t *)&udpha->uha_length) = ip_len; 5248 if (DB_CRED(mp) != NULL) 5249 mblk_setcred(mp1, DB_CRED(mp)); 5250 5251 if (DB_TYPE(mp) != M_DATA) { 5252 ASSERT(mp != mp1); 5253 freeb(mp); 5254 } 5255 5256 /* mp has been consumed and we'll return success */ 5257 ASSERT(*error == 0); 5258 mp = NULL; 5259 5260 /* We're done. Pass the packet to ip. */ 5261 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5262 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5263 "udp_wput_end: q %p (%S)", q, "end"); 5264 5265 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5266 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5267 connp->conn_dontroute || 5268 connp->conn_nofailover_ill != NULL || 5269 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5270 optinfo.ip_opt_ill_index != 0 || 5271 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5272 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5273 ipst->ips_ip_g_mrouter != NULL) { 5274 UDP_STAT(us, udp_ip_send); 5275 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5276 &optinfo); 5277 } else { 5278 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5279 } 5280 5281 done: 5282 if (lock_held) 5283 rw_exit(&udp->udp_rwlock); 5284 if (*error != 0) { 5285 ASSERT(mp != NULL); 5286 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5287 } 5288 return (mp); 5289 } 5290 5291 static void 5292 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5293 { 5294 conn_t *connp = udp->udp_connp; 5295 ipaddr_t src, dst; 5296 ire_t *ire; 5297 ipif_t *ipif = NULL; 5298 mblk_t *ire_fp_mp; 5299 boolean_t retry_caching; 5300 udp_stack_t *us = udp->udp_us; 5301 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5302 5303 dst = ipha->ipha_dst; 5304 src = ipha->ipha_src; 5305 ASSERT(ipha->ipha_ident == 0); 5306 5307 if (CLASSD(dst)) { 5308 int err; 5309 5310 ipif = conn_get_held_ipif(connp, 5311 &connp->conn_multicast_ipif, &err); 5312 5313 if (ipif == NULL || ipif->ipif_isv6 || 5314 (ipif->ipif_ill->ill_phyint->phyint_flags & 5315 PHYI_LOOPBACK)) { 5316 if (ipif != NULL) 5317 ipif_refrele(ipif); 5318 UDP_STAT(us, udp_ip_send); 5319 ip_output(connp, mp, q, IP_WPUT); 5320 return; 5321 } 5322 } 5323 5324 retry_caching = B_FALSE; 5325 mutex_enter(&connp->conn_lock); 5326 ire = connp->conn_ire_cache; 5327 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5328 5329 if (ire == NULL || ire->ire_addr != dst || 5330 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5331 retry_caching = B_TRUE; 5332 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5333 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5334 5335 ASSERT(ipif != NULL); 5336 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 5337 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 5338 retry_caching = B_TRUE; 5339 } 5340 5341 if (!retry_caching) { 5342 ASSERT(ire != NULL); 5343 IRE_REFHOLD(ire); 5344 mutex_exit(&connp->conn_lock); 5345 } else { 5346 boolean_t cached = B_FALSE; 5347 5348 connp->conn_ire_cache = NULL; 5349 mutex_exit(&connp->conn_lock); 5350 5351 /* Release the old ire */ 5352 if (ire != NULL) { 5353 IRE_REFRELE_NOTR(ire); 5354 ire = NULL; 5355 } 5356 5357 if (CLASSD(dst)) { 5358 ASSERT(ipif != NULL); 5359 ire = ire_ctable_lookup(dst, 0, 0, ipif, 5360 connp->conn_zoneid, MBLK_GETLABEL(mp), 5361 MATCH_IRE_ILL_GROUP, ipst); 5362 } else { 5363 ASSERT(ipif == NULL); 5364 ire = ire_cache_lookup(dst, connp->conn_zoneid, 5365 MBLK_GETLABEL(mp), ipst); 5366 } 5367 5368 if (ire == NULL) { 5369 if (ipif != NULL) 5370 ipif_refrele(ipif); 5371 UDP_STAT(us, udp_ire_null); 5372 ip_output(connp, mp, q, IP_WPUT); 5373 return; 5374 } 5375 IRE_REFHOLD_NOTR(ire); 5376 5377 mutex_enter(&connp->conn_lock); 5378 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 5379 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 5380 irb_t *irb = ire->ire_bucket; 5381 5382 /* 5383 * IRE's created for non-connection oriented transports 5384 * are normally initialized with IRE_MARK_TEMPORARY set 5385 * in the ire_marks. These IRE's are preferentially 5386 * reaped when the hash chain length in the cache 5387 * bucket exceeds the maximum value specified in 5388 * ip[6]_ire_max_bucket_cnt. This can severely affect 5389 * UDP performance if IRE cache entries that we need 5390 * to reuse are continually removed. To remedy this, 5391 * when we cache the IRE in the conn_t, we remove the 5392 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 5393 * set. 5394 */ 5395 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5396 rw_enter(&irb->irb_lock, RW_WRITER); 5397 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 5398 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 5399 irb->irb_tmp_ire_cnt--; 5400 } 5401 rw_exit(&irb->irb_lock); 5402 } 5403 connp->conn_ire_cache = ire; 5404 cached = B_TRUE; 5405 } 5406 mutex_exit(&connp->conn_lock); 5407 5408 /* 5409 * We can continue to use the ire but since it was not 5410 * cached, we should drop the extra reference. 5411 */ 5412 if (!cached) 5413 IRE_REFRELE_NOTR(ire); 5414 } 5415 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 5416 ASSERT(!CLASSD(dst) || ipif != NULL); 5417 5418 /* 5419 * Check if we can take the fast-path. 5420 * Note that "incomplete" ire's (where the link-layer for next hop 5421 * is not resolved, or where the fast-path header in nce_fp_mp is not 5422 * available yet) are sent down the legacy (slow) path 5423 */ 5424 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 5425 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 5426 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 5427 ((ire->ire_nce == NULL) || 5428 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 5429 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 5430 if (ipif != NULL) 5431 ipif_refrele(ipif); 5432 UDP_STAT(us, udp_ip_ire_send); 5433 IRE_REFRELE(ire); 5434 ip_output(connp, mp, q, IP_WPUT); 5435 return; 5436 } 5437 5438 if (src == INADDR_ANY && !connp->conn_unspec_src) { 5439 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 5440 ipha->ipha_src = ipif->ipif_src_addr; 5441 else 5442 ipha->ipha_src = ire->ire_src_addr; 5443 } 5444 5445 if (ipif != NULL) 5446 ipif_refrele(ipif); 5447 5448 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 5449 } 5450 5451 static void 5452 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 5453 { 5454 ipaddr_t src, dst; 5455 ill_t *ill; 5456 mblk_t *ire_fp_mp; 5457 uint_t ire_fp_mp_len; 5458 uint16_t *up; 5459 uint32_t cksum, hcksum_txflags; 5460 queue_t *dev_q; 5461 udp_t *udp = connp->conn_udp; 5462 ipha_t *ipha = (ipha_t *)mp->b_rptr; 5463 udp_stack_t *us = udp->udp_us; 5464 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5465 boolean_t ll_multicast = B_FALSE; 5466 5467 dev_q = ire->ire_stq->q_next; 5468 ASSERT(dev_q != NULL); 5469 5470 ill = ire_to_ill(ire); 5471 ASSERT(ill != NULL); 5472 5473 /* is queue flow controlled? */ 5474 if (q->q_first != NULL || connp->conn_draining || 5475 DEV_Q_FLOW_BLOCKED(dev_q)) { 5476 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 5477 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 5478 5479 if (ipst->ips_ip_output_queue) 5480 (void) putq(connp->conn_wq, mp); 5481 else 5482 freemsg(mp); 5483 ire_refrele(ire); 5484 return; 5485 } 5486 5487 ire_fp_mp = ire->ire_nce->nce_fp_mp; 5488 ire_fp_mp_len = MBLKL(ire_fp_mp); 5489 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 5490 5491 dst = ipha->ipha_dst; 5492 src = ipha->ipha_src; 5493 5494 5495 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 5496 5497 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 5498 #ifndef _BIG_ENDIAN 5499 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 5500 #endif 5501 5502 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 5503 ASSERT(ill->ill_hcksum_capab != NULL); 5504 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 5505 } else { 5506 hcksum_txflags = 0; 5507 } 5508 5509 /* pseudo-header checksum (do it in parts for IP header checksum) */ 5510 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 5511 5512 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 5513 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 5514 if (*up != 0) { 5515 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 5516 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 5517 ntohs(ipha->ipha_length), cksum); 5518 5519 /* Software checksum? */ 5520 if (DB_CKSUMFLAGS(mp) == 0) { 5521 UDP_STAT(us, udp_out_sw_cksum); 5522 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 5523 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 5524 } 5525 } 5526 5527 if (!CLASSD(dst)) { 5528 ipha->ipha_fragment_offset_and_flags |= 5529 (uint32_t)htons(ire->ire_frag_flag); 5530 } 5531 5532 /* Calculate IP header checksum if hardware isn't capable */ 5533 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 5534 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 5535 ((uint16_t *)ipha)[4]); 5536 } 5537 5538 if (CLASSD(dst)) { 5539 boolean_t ilm_exists; 5540 5541 ILM_WALKER_HOLD(ill); 5542 ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL); 5543 ILM_WALKER_RELE(ill); 5544 if (ilm_exists) { 5545 ip_multicast_loopback(q, ill, mp, 5546 connp->conn_multicast_loop ? 0 : 5547 IP_FF_NO_MCAST_LOOP, zoneid); 5548 } 5549 5550 /* If multicast TTL is 0 then we are done */ 5551 if (ipha->ipha_ttl == 0) { 5552 freemsg(mp); 5553 ire_refrele(ire); 5554 return; 5555 } 5556 ll_multicast = B_TRUE; 5557 } 5558 5559 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 5560 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 5561 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 5562 5563 UPDATE_OB_PKT_COUNT(ire); 5564 ire->ire_last_used_time = lbolt; 5565 5566 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 5567 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 5568 ntohs(ipha->ipha_length)); 5569 5570 DTRACE_PROBE4(ip4__physical__out__start, 5571 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 5572 FW_HOOKS(ipst->ips_ip4_physical_out_event, 5573 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 5574 ll_multicast, ipst); 5575 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 5576 if (ipst->ips_ipobs_enabled && mp != NULL) { 5577 zoneid_t szone; 5578 5579 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 5580 ipst, ALL_ZONES); 5581 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 5582 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 5583 } 5584 5585 if (mp != NULL) { 5586 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 5587 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 5588 ipha_t *, ipha, ip6_t *, NULL, int, 0); 5589 5590 if (ILL_DIRECT_CAPABLE(ill)) { 5591 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 5592 5593 (void) idd->idd_tx_df(idd->idd_tx_dh, mp, 5594 (uintptr_t)connp, 0); 5595 } else { 5596 putnext(ire->ire_stq, mp); 5597 } 5598 } 5599 IRE_REFRELE(ire); 5600 } 5601 5602 static boolean_t 5603 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 5604 { 5605 udp_t *udp = Q_TO_UDP(wq); 5606 int err; 5607 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 5608 udp_stack_t *us = udp->udp_us; 5609 5610 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 5611 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 5612 us->us_netstack->netstack_ip); 5613 if (err == 0) { 5614 err = tsol_update_sticky(&udp->udp_sticky_ipp, 5615 &udp->udp_label_len_v6, opt_storage); 5616 } 5617 if (err != 0) { 5618 DTRACE_PROBE4( 5619 tx__ip__log__drop__updatelabel__udp6, 5620 char *, "queue(1) failed to update options(2) on mp(3)", 5621 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5622 } else { 5623 udp->udp_v6lastdst = *dst; 5624 } 5625 return (err); 5626 } 5627 5628 static int 5629 udp_send_connected(conn_t *connp, mblk_t *mp, struct nmsghdr *msg, cred_t *cr, 5630 pid_t pid) 5631 { 5632 udp_t *udp = connp->conn_udp; 5633 udp_stack_t *us = udp->udp_us; 5634 ipaddr_t v4dst; 5635 in_port_t dstport; 5636 boolean_t mapped_addr; 5637 struct sockaddr_storage ss; 5638 sin_t *sin; 5639 sin6_t *sin6; 5640 struct sockaddr *addr; 5641 socklen_t addrlen; 5642 int error; 5643 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5644 5645 /* M_DATA for connected socket */ 5646 5647 ASSERT(udp->udp_issocket || IPCL_IS_NONSTR(connp)); 5648 UDP_DBGSTAT(us, udp_data_conn); 5649 5650 mutex_enter(&connp->conn_lock); 5651 if (udp->udp_state != TS_DATA_XFER) { 5652 mutex_exit(&connp->conn_lock); 5653 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5654 UDP_STAT(us, udp_out_err_notconn); 5655 freemsg(mp); 5656 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5657 "udp_wput_end: connp %p (%S)", connp, 5658 "not-connected; address required"); 5659 return (EDESTADDRREQ); 5660 } 5661 5662 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 5663 if (mapped_addr) 5664 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 5665 5666 /* Initialize addr and addrlen as if they're passed in */ 5667 if (udp->udp_family == AF_INET) { 5668 sin = (sin_t *)&ss; 5669 sin->sin_family = AF_INET; 5670 dstport = sin->sin_port = udp->udp_dstport; 5671 ASSERT(mapped_addr); 5672 sin->sin_addr.s_addr = v4dst; 5673 addr = (struct sockaddr *)sin; 5674 addrlen = sizeof (*sin); 5675 } else { 5676 sin6 = (sin6_t *)&ss; 5677 sin6->sin6_family = AF_INET6; 5678 dstport = sin6->sin6_port = udp->udp_dstport; 5679 sin6->sin6_flowinfo = udp->udp_flowinfo; 5680 sin6->sin6_addr = udp->udp_v6dst; 5681 sin6->sin6_scope_id = 0; 5682 sin6->__sin6_src_id = 0; 5683 addr = (struct sockaddr *)sin6; 5684 addrlen = sizeof (*sin6); 5685 } 5686 mutex_exit(&connp->conn_lock); 5687 5688 if (mapped_addr) { 5689 /* 5690 * Handle both AF_INET and AF_INET6; the latter 5691 * for IPV4 mapped destination addresses. Note 5692 * here that both addr and addrlen point to the 5693 * corresponding struct depending on the address 5694 * family of the socket. 5695 */ 5696 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 5697 insert_spi, msg, cr, pid); 5698 } else { 5699 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, pid); 5700 } 5701 if (error == 0) { 5702 ASSERT(mp == NULL); 5703 return (0); 5704 } 5705 5706 UDP_STAT(us, udp_out_err_output); 5707 ASSERT(mp != NULL); 5708 if (IPCL_IS_NONSTR(connp)) { 5709 freemsg(mp); 5710 return (error); 5711 } else { 5712 /* mp is freed by the following routine */ 5713 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, 5714 (t_scalar_t)addrlen, (t_scalar_t)error); 5715 return (0); 5716 } 5717 } 5718 5719 /* ARGSUSED */ 5720 static int 5721 udp_send_not_connected(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 5722 socklen_t addrlen, struct nmsghdr *msg, cred_t *cr, pid_t pid) 5723 { 5724 5725 udp_t *udp = connp->conn_udp; 5726 boolean_t insert_spi = udp->udp_nat_t_endpoint; 5727 int error = 0; 5728 sin6_t *sin6; 5729 sin_t *sin; 5730 uint_t srcid; 5731 uint16_t port; 5732 ipaddr_t v4dst; 5733 5734 5735 ASSERT(addr != NULL); 5736 5737 switch (udp->udp_family) { 5738 case AF_INET6: 5739 sin6 = (sin6_t *)addr; 5740 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 5741 /* 5742 * Destination is a non-IPv4-compatible IPv6 address. 5743 * Send out an IPv6 format packet. 5744 */ 5745 mp = udp_output_v6(connp, mp, sin6, &error, msg, cr, 5746 pid); 5747 if (error != 0) 5748 goto ud_error; 5749 5750 return (0); 5751 } 5752 /* 5753 * If the local address is not zero or a mapped address 5754 * return an error. It would be possible to send an IPv4 5755 * packet but the response would never make it back to the 5756 * application since it is bound to a non-mapped address. 5757 */ 5758 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 5759 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 5760 error = EADDRNOTAVAIL; 5761 goto ud_error; 5762 } 5763 /* Send IPv4 packet without modifying udp_ipversion */ 5764 /* Extract port and ipaddr */ 5765 port = sin6->sin6_port; 5766 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 5767 srcid = sin6->__sin6_src_id; 5768 break; 5769 5770 case AF_INET: 5771 sin = (sin_t *)addr; 5772 /* Extract port and ipaddr */ 5773 port = sin->sin_port; 5774 v4dst = sin->sin_addr.s_addr; 5775 srcid = 0; 5776 break; 5777 } 5778 5779 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi, 5780 msg, cr, pid); 5781 5782 if (error == 0) { 5783 ASSERT(mp == NULL); 5784 return (0); 5785 } 5786 5787 ud_error: 5788 ASSERT(mp != NULL); 5789 5790 return (error); 5791 } 5792 5793 /* 5794 * This routine handles all messages passed downstream. It either 5795 * consumes the message or passes it downstream; it never queues a 5796 * a message. 5797 * 5798 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 5799 * is valid when we are directly beneath the stream head, and thus sockfs 5800 * is able to bypass STREAMS and directly call us, passing along the sockaddr 5801 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 5802 * connected endpoints. 5803 */ 5804 void 5805 udp_wput(queue_t *q, mblk_t *mp) 5806 { 5807 conn_t *connp = Q_TO_CONN(q); 5808 udp_t *udp = connp->conn_udp; 5809 int error = 0; 5810 struct sockaddr *addr; 5811 socklen_t addrlen; 5812 udp_stack_t *us = udp->udp_us; 5813 5814 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 5815 "udp_wput_start: queue %p mp %p", q, mp); 5816 5817 /* 5818 * We directly handle several cases here: T_UNITDATA_REQ message 5819 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 5820 * socket. 5821 */ 5822 switch (DB_TYPE(mp)) { 5823 case M_DATA: 5824 /* 5825 * Quick check for error cases. Checks will be done again 5826 * under the lock later on 5827 */ 5828 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 5829 /* Not connected; address is required */ 5830 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 5831 UDP_STAT(us, udp_out_err_notconn); 5832 freemsg(mp); 5833 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5834 "udp_wput_end: connp %p (%S)", connp, 5835 "not-connected; address required"); 5836 return; 5837 } 5838 (void) udp_send_connected(connp, mp, NULL, NULL, -1); 5839 return; 5840 5841 case M_PROTO: 5842 case M_PCPROTO: { 5843 struct T_unitdata_req *tudr; 5844 5845 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 5846 tudr = (struct T_unitdata_req *)mp->b_rptr; 5847 5848 /* Handle valid T_UNITDATA_REQ here */ 5849 if (MBLKL(mp) >= sizeof (*tudr) && 5850 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 5851 if (mp->b_cont == NULL) { 5852 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5853 "udp_wput_end: q %p (%S)", q, "badaddr"); 5854 error = EPROTO; 5855 goto ud_error; 5856 } 5857 5858 if (!MBLKIN(mp, 0, tudr->DEST_offset + 5859 tudr->DEST_length)) { 5860 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5861 "udp_wput_end: q %p (%S)", q, "badaddr"); 5862 error = EADDRNOTAVAIL; 5863 goto ud_error; 5864 } 5865 /* 5866 * If a port has not been bound to the stream, fail. 5867 * This is not a problem when sockfs is directly 5868 * above us, because it will ensure that the socket 5869 * is first bound before allowing data to be sent. 5870 */ 5871 if (udp->udp_state == TS_UNBND) { 5872 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5873 "udp_wput_end: q %p (%S)", q, "outstate"); 5874 error = EPROTO; 5875 goto ud_error; 5876 } 5877 addr = (struct sockaddr *) 5878 &mp->b_rptr[tudr->DEST_offset]; 5879 addrlen = tudr->DEST_length; 5880 if (tudr->OPT_length != 0) 5881 UDP_STAT(us, udp_out_opt); 5882 break; 5883 } 5884 /* FALLTHRU */ 5885 } 5886 default: 5887 udp_wput_other(q, mp); 5888 return; 5889 } 5890 ASSERT(addr != NULL); 5891 5892 error = udp_send_not_connected(connp, mp, addr, addrlen, NULL, NULL, 5893 -1); 5894 if (error != 0) { 5895 ud_error: 5896 UDP_STAT(us, udp_out_err_output); 5897 ASSERT(mp != NULL); 5898 /* mp is freed by the following routine */ 5899 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 5900 (t_scalar_t)error); 5901 } 5902 } 5903 5904 /* ARGSUSED */ 5905 static void 5906 udp_wput_fallback(queue_t *wq, mblk_t *mp) 5907 { 5908 #ifdef DEBUG 5909 cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n"); 5910 #endif 5911 freemsg(mp); 5912 } 5913 5914 5915 /* 5916 * udp_output_v6(): 5917 * Assumes that udp_wput did some sanity checking on the destination 5918 * address. 5919 */ 5920 static mblk_t * 5921 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error, 5922 struct nmsghdr *msg, cred_t *cr, pid_t pid) 5923 { 5924 ip6_t *ip6h; 5925 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 5926 mblk_t *mp1 = mp; 5927 mblk_t *mp2; 5928 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 5929 size_t ip_len; 5930 udpha_t *udph; 5931 udp_t *udp = connp->conn_udp; 5932 udp_stack_t *us = udp->udp_us; 5933 queue_t *q = connp->conn_wq; 5934 ip6_pkt_t ipp_s; /* For ancillary data options */ 5935 ip6_pkt_t *ipp = &ipp_s; 5936 ip6_pkt_t *tipp; /* temporary ipp */ 5937 uint32_t csum = 0; 5938 uint_t ignore = 0; 5939 uint_t option_exists = 0, is_sticky = 0; 5940 uint8_t *cp; 5941 uint8_t *nxthdr_ptr; 5942 in6_addr_t ip6_dst; 5943 udpattrs_t attrs; 5944 boolean_t opt_present; 5945 ip6_hbh_t *hopoptsptr = NULL; 5946 uint_t hopoptslen = 0; 5947 boolean_t is_ancillary = B_FALSE; 5948 size_t sth_wroff = 0; 5949 ire_t *ire; 5950 5951 *error = 0; 5952 5953 /* 5954 * If the local address is a mapped address return 5955 * an error. 5956 * It would be possible to send an IPv6 packet but the 5957 * response would never make it back to the application 5958 * since it is bound to a mapped address. 5959 */ 5960 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 5961 *error = EADDRNOTAVAIL; 5962 goto done; 5963 } 5964 5965 ipp->ipp_fields = 0; 5966 ipp->ipp_sticky_ignored = 0; 5967 5968 /* 5969 * If TPI options passed in, feed it for verification and handling 5970 */ 5971 attrs.udpattr_credset = B_FALSE; 5972 opt_present = B_FALSE; 5973 if (IPCL_IS_NONSTR(connp)) { 5974 if (msg->msg_controllen != 0) { 5975 attrs.udpattr_ipp6 = ipp; 5976 attrs.udpattr_mb = mp; 5977 5978 rw_enter(&udp->udp_rwlock, RW_WRITER); 5979 *error = process_auxiliary_options(connp, 5980 msg->msg_control, msg->msg_controllen, 5981 &attrs, &udp_opt_obj, udp_opt_set); 5982 rw_exit(&udp->udp_rwlock); 5983 if (*error) 5984 goto done; 5985 ASSERT(*error == 0); 5986 opt_present = B_TRUE; 5987 } 5988 } else { 5989 if (DB_TYPE(mp) != M_DATA) { 5990 mp1 = mp->b_cont; 5991 if (((struct T_unitdata_req *) 5992 mp->b_rptr)->OPT_length != 0) { 5993 attrs.udpattr_ipp6 = ipp; 5994 attrs.udpattr_mb = mp; 5995 if (udp_unitdata_opt_process(q, mp, error, 5996 &attrs) < 0) { 5997 goto done; 5998 } 5999 ASSERT(*error == 0); 6000 opt_present = B_TRUE; 6001 } 6002 } 6003 } 6004 6005 /* 6006 * Determine whether we need to mark the mblk with the user's 6007 * credentials. 6008 */ 6009 ire = connp->conn_ire_cache; 6010 if (is_system_labeled() || IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) || 6011 (ire == NULL) || 6012 (!IN6_ARE_ADDR_EQUAL(&ire->ire_addr_v6, &sin6->sin6_addr)) || 6013 (ire->ire_type & (IRE_LOCAL | IRE_LOOPBACK))) { 6014 if (cr != NULL && DB_CRED(mp) == NULL) 6015 msg_setcredpid(mp, cr, pid); 6016 } 6017 6018 rw_enter(&udp->udp_rwlock, RW_READER); 6019 ignore = ipp->ipp_sticky_ignored; 6020 6021 /* mp1 points to the M_DATA mblk carrying the packet */ 6022 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6023 6024 if (sin6->sin6_scope_id != 0 && 6025 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6026 /* 6027 * IPPF_SCOPE_ID is special. It's neither a sticky 6028 * option nor ancillary data. It needs to be 6029 * explicitly set in options_exists. 6030 */ 6031 option_exists |= IPPF_SCOPE_ID; 6032 } 6033 6034 /* 6035 * Compute the destination address 6036 */ 6037 ip6_dst = sin6->sin6_addr; 6038 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6039 ip6_dst = ipv6_loopback; 6040 6041 /* 6042 * If we're not going to the same destination as last time, then 6043 * recompute the label required. This is done in a separate routine to 6044 * avoid blowing up our stack here. 6045 * 6046 * TSOL Note: Since we are not in WRITER mode, UDP packets 6047 * to different destination may require different labels, 6048 * or worse, UDP packets to same IP address may require 6049 * different labels due to use of shared all-zones address. 6050 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6051 * and sticky ipp_hopoptslen are consistent for the current 6052 * destination and are updated atomically. 6053 */ 6054 mutex_enter(&connp->conn_lock); 6055 if (is_system_labeled()) { 6056 /* Using UDP MLP requires SCM_UCRED from user */ 6057 if (connp->conn_mlp_type != mlptSingle && 6058 !attrs.udpattr_credset) { 6059 DTRACE_PROBE4( 6060 tx__ip__log__info__output__udp6, 6061 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6062 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6063 *error = ECONNREFUSED; 6064 rw_exit(&udp->udp_rwlock); 6065 mutex_exit(&connp->conn_lock); 6066 goto done; 6067 } 6068 /* 6069 * update label option for this UDP socket if 6070 * - the destination has changed, or 6071 * - the UDP socket is MLP 6072 */ 6073 if ((opt_present || 6074 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6075 connp->conn_mlp_type != mlptSingle) && 6076 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6077 rw_exit(&udp->udp_rwlock); 6078 mutex_exit(&connp->conn_lock); 6079 goto done; 6080 } 6081 } 6082 6083 /* 6084 * If there's a security label here, then we ignore any options the 6085 * user may try to set. We keep the peer's label as a hidden sticky 6086 * option. We make a private copy of this label before releasing the 6087 * lock so that label is kept consistent with the destination addr. 6088 */ 6089 if (udp->udp_label_len_v6 > 0) { 6090 ignore &= ~IPPF_HOPOPTS; 6091 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6092 } 6093 6094 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6095 /* No sticky options nor ancillary data. */ 6096 mutex_exit(&connp->conn_lock); 6097 goto no_options; 6098 } 6099 6100 /* 6101 * Go through the options figuring out where each is going to 6102 * come from and build two masks. The first mask indicates if 6103 * the option exists at all. The second mask indicates if the 6104 * option is sticky or ancillary. 6105 */ 6106 if (!(ignore & IPPF_HOPOPTS)) { 6107 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6108 option_exists |= IPPF_HOPOPTS; 6109 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6110 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6111 option_exists |= IPPF_HOPOPTS; 6112 is_sticky |= IPPF_HOPOPTS; 6113 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6114 hopoptsptr = kmem_alloc( 6115 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6116 if (hopoptsptr == NULL) { 6117 *error = ENOMEM; 6118 mutex_exit(&connp->conn_lock); 6119 goto done; 6120 } 6121 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6122 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6123 hopoptslen); 6124 udp_ip_hdr_len += hopoptslen; 6125 } 6126 } 6127 mutex_exit(&connp->conn_lock); 6128 6129 if (!(ignore & IPPF_RTHDR)) { 6130 if (ipp->ipp_fields & IPPF_RTHDR) { 6131 option_exists |= IPPF_RTHDR; 6132 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6133 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6134 option_exists |= IPPF_RTHDR; 6135 is_sticky |= IPPF_RTHDR; 6136 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6137 } 6138 } 6139 6140 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6141 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6142 option_exists |= IPPF_RTDSTOPTS; 6143 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6144 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6145 option_exists |= IPPF_RTDSTOPTS; 6146 is_sticky |= IPPF_RTDSTOPTS; 6147 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6148 } 6149 } 6150 6151 if (!(ignore & IPPF_DSTOPTS)) { 6152 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6153 option_exists |= IPPF_DSTOPTS; 6154 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6155 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6156 option_exists |= IPPF_DSTOPTS; 6157 is_sticky |= IPPF_DSTOPTS; 6158 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6159 } 6160 } 6161 6162 if (!(ignore & IPPF_IFINDEX)) { 6163 if (ipp->ipp_fields & IPPF_IFINDEX) { 6164 option_exists |= IPPF_IFINDEX; 6165 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6166 option_exists |= IPPF_IFINDEX; 6167 is_sticky |= IPPF_IFINDEX; 6168 } 6169 } 6170 6171 if (!(ignore & IPPF_ADDR)) { 6172 if (ipp->ipp_fields & IPPF_ADDR) { 6173 option_exists |= IPPF_ADDR; 6174 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6175 option_exists |= IPPF_ADDR; 6176 is_sticky |= IPPF_ADDR; 6177 } 6178 } 6179 6180 if (!(ignore & IPPF_DONTFRAG)) { 6181 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6182 option_exists |= IPPF_DONTFRAG; 6183 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6184 option_exists |= IPPF_DONTFRAG; 6185 is_sticky |= IPPF_DONTFRAG; 6186 } 6187 } 6188 6189 if (!(ignore & IPPF_USE_MIN_MTU)) { 6190 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6191 option_exists |= IPPF_USE_MIN_MTU; 6192 } else if (udp->udp_sticky_ipp.ipp_fields & 6193 IPPF_USE_MIN_MTU) { 6194 option_exists |= IPPF_USE_MIN_MTU; 6195 is_sticky |= IPPF_USE_MIN_MTU; 6196 } 6197 } 6198 6199 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6200 option_exists |= IPPF_HOPLIMIT; 6201 /* IPV6_HOPLIMIT can never be sticky */ 6202 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6203 6204 if (!(ignore & IPPF_UNICAST_HOPS) && 6205 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6206 option_exists |= IPPF_UNICAST_HOPS; 6207 is_sticky |= IPPF_UNICAST_HOPS; 6208 } 6209 6210 if (!(ignore & IPPF_MULTICAST_HOPS) && 6211 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6212 option_exists |= IPPF_MULTICAST_HOPS; 6213 is_sticky |= IPPF_MULTICAST_HOPS; 6214 } 6215 6216 if (!(ignore & IPPF_TCLASS)) { 6217 if (ipp->ipp_fields & IPPF_TCLASS) { 6218 option_exists |= IPPF_TCLASS; 6219 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6220 option_exists |= IPPF_TCLASS; 6221 is_sticky |= IPPF_TCLASS; 6222 } 6223 } 6224 6225 if (!(ignore & IPPF_NEXTHOP) && 6226 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6227 option_exists |= IPPF_NEXTHOP; 6228 is_sticky |= IPPF_NEXTHOP; 6229 } 6230 6231 no_options: 6232 6233 /* 6234 * If any options carried in the ip6i_t were specified, we 6235 * need to account for the ip6i_t in the data we'll be sending 6236 * down. 6237 */ 6238 if (option_exists & IPPF_HAS_IP6I) 6239 udp_ip_hdr_len += sizeof (ip6i_t); 6240 6241 /* check/fix buffer config, setup pointers into it */ 6242 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6243 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6244 !OK_32PTR(ip6h)) { 6245 6246 /* Try to get everything in a single mblk next time */ 6247 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6248 udp->udp_max_hdr_len = udp_ip_hdr_len; 6249 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6250 } 6251 6252 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6253 if (mp2 == NULL) { 6254 *error = ENOMEM; 6255 rw_exit(&udp->udp_rwlock); 6256 goto done; 6257 } 6258 mp2->b_wptr = DB_LIM(mp2); 6259 mp2->b_cont = mp1; 6260 mp1 = mp2; 6261 if (DB_TYPE(mp) != M_DATA) 6262 mp->b_cont = mp1; 6263 else 6264 mp = mp1; 6265 6266 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6267 } 6268 mp1->b_rptr = (unsigned char *)ip6h; 6269 ip6i = (ip6i_t *)ip6h; 6270 6271 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6272 if (option_exists & IPPF_HAS_IP6I) { 6273 ip6h = (ip6_t *)&ip6i[1]; 6274 ip6i->ip6i_flags = 0; 6275 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6276 6277 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6278 if (option_exists & IPPF_SCOPE_ID) { 6279 ip6i->ip6i_flags |= IP6I_IFINDEX; 6280 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6281 } else if (option_exists & IPPF_IFINDEX) { 6282 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6283 ASSERT(tipp->ipp_ifindex != 0); 6284 ip6i->ip6i_flags |= IP6I_IFINDEX; 6285 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6286 } 6287 6288 if (option_exists & IPPF_ADDR) { 6289 /* 6290 * Enable per-packet source address verification if 6291 * IPV6_PKTINFO specified the source address. 6292 * ip6_src is set in the transport's _wput function. 6293 */ 6294 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6295 } 6296 6297 if (option_exists & IPPF_DONTFRAG) { 6298 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6299 } 6300 6301 if (option_exists & IPPF_USE_MIN_MTU) { 6302 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6303 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6304 } 6305 6306 if (option_exists & IPPF_NEXTHOP) { 6307 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6308 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6309 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6310 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6311 } 6312 6313 /* 6314 * tell IP this is an ip6i_t private header 6315 */ 6316 ip6i->ip6i_nxt = IPPROTO_RAW; 6317 } 6318 6319 /* Initialize IPv6 header */ 6320 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6321 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6322 6323 /* Set the hoplimit of the outgoing packet. */ 6324 if (option_exists & IPPF_HOPLIMIT) { 6325 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6326 ip6h->ip6_hops = ipp->ipp_hoplimit; 6327 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6328 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6329 ip6h->ip6_hops = udp->udp_multicast_ttl; 6330 if (option_exists & IPPF_MULTICAST_HOPS) 6331 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6332 } else { 6333 ip6h->ip6_hops = udp->udp_ttl; 6334 if (option_exists & IPPF_UNICAST_HOPS) 6335 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6336 } 6337 6338 if (option_exists & IPPF_ADDR) { 6339 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6340 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6341 ip6h->ip6_src = tipp->ipp_addr; 6342 } else { 6343 /* 6344 * The source address was not set using IPV6_PKTINFO. 6345 * First look at the bound source. 6346 * If unspecified fallback to __sin6_src_id. 6347 */ 6348 ip6h->ip6_src = udp->udp_v6src; 6349 if (sin6->__sin6_src_id != 0 && 6350 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6351 ip_srcid_find_id(sin6->__sin6_src_id, 6352 &ip6h->ip6_src, connp->conn_zoneid, 6353 us->us_netstack); 6354 } 6355 } 6356 6357 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6358 cp = (uint8_t *)&ip6h[1]; 6359 6360 /* 6361 * Here's where we have to start stringing together 6362 * any extension headers in the right order: 6363 * Hop-by-hop, destination, routing, and final destination opts. 6364 */ 6365 if (option_exists & IPPF_HOPOPTS) { 6366 /* Hop-by-hop options */ 6367 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6368 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6369 if (hopoptslen == 0) { 6370 hopoptsptr = tipp->ipp_hopopts; 6371 hopoptslen = tipp->ipp_hopoptslen; 6372 is_ancillary = B_TRUE; 6373 } 6374 6375 *nxthdr_ptr = IPPROTO_HOPOPTS; 6376 nxthdr_ptr = &hbh->ip6h_nxt; 6377 6378 bcopy(hopoptsptr, cp, hopoptslen); 6379 cp += hopoptslen; 6380 6381 if (hopoptsptr != NULL && !is_ancillary) { 6382 kmem_free(hopoptsptr, hopoptslen); 6383 hopoptsptr = NULL; 6384 hopoptslen = 0; 6385 } 6386 } 6387 /* 6388 * En-route destination options 6389 * Only do them if there's a routing header as well 6390 */ 6391 if (option_exists & IPPF_RTDSTOPTS) { 6392 ip6_dest_t *dst = (ip6_dest_t *)cp; 6393 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6394 6395 *nxthdr_ptr = IPPROTO_DSTOPTS; 6396 nxthdr_ptr = &dst->ip6d_nxt; 6397 6398 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6399 cp += tipp->ipp_rtdstoptslen; 6400 } 6401 /* 6402 * Routing header next 6403 */ 6404 if (option_exists & IPPF_RTHDR) { 6405 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6406 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6407 6408 *nxthdr_ptr = IPPROTO_ROUTING; 6409 nxthdr_ptr = &rt->ip6r_nxt; 6410 6411 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6412 cp += tipp->ipp_rthdrlen; 6413 } 6414 /* 6415 * Do ultimate destination options 6416 */ 6417 if (option_exists & IPPF_DSTOPTS) { 6418 ip6_dest_t *dest = (ip6_dest_t *)cp; 6419 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6420 6421 *nxthdr_ptr = IPPROTO_DSTOPTS; 6422 nxthdr_ptr = &dest->ip6d_nxt; 6423 6424 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6425 cp += tipp->ipp_dstoptslen; 6426 } 6427 /* 6428 * Now set the last header pointer to the proto passed in 6429 */ 6430 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6431 *nxthdr_ptr = IPPROTO_UDP; 6432 6433 /* Update UDP header */ 6434 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6435 udph->uha_dst_port = sin6->sin6_port; 6436 udph->uha_src_port = udp->udp_port; 6437 6438 /* 6439 * Copy in the destination address 6440 */ 6441 ip6h->ip6_dst = ip6_dst; 6442 6443 ip6h->ip6_vcf = 6444 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6445 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6446 6447 if (option_exists & IPPF_TCLASS) { 6448 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6449 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6450 tipp->ipp_tclass); 6451 } 6452 rw_exit(&udp->udp_rwlock); 6453 6454 if (option_exists & IPPF_RTHDR) { 6455 ip6_rthdr_t *rth; 6456 6457 /* 6458 * Perform any processing needed for source routing. 6459 * We know that all extension headers will be in the same mblk 6460 * as the IPv6 header. 6461 */ 6462 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6463 if (rth != NULL && rth->ip6r_segleft != 0) { 6464 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6465 /* 6466 * Drop packet - only support Type 0 routing. 6467 * Notify the application as well. 6468 */ 6469 *error = EPROTO; 6470 goto done; 6471 } 6472 6473 /* 6474 * rth->ip6r_len is twice the number of 6475 * addresses in the header. Thus it must be even. 6476 */ 6477 if (rth->ip6r_len & 0x1) { 6478 *error = EPROTO; 6479 goto done; 6480 } 6481 /* 6482 * Shuffle the routing header and ip6_dst 6483 * addresses, and get the checksum difference 6484 * between the first hop (in ip6_dst) and 6485 * the destination (in the last routing hdr entry). 6486 */ 6487 csum = ip_massage_options_v6(ip6h, rth, 6488 us->us_netstack); 6489 /* 6490 * Verify that the first hop isn't a mapped address. 6491 * Routers along the path need to do this verification 6492 * for subsequent hops. 6493 */ 6494 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6495 *error = EADDRNOTAVAIL; 6496 goto done; 6497 } 6498 6499 cp += (rth->ip6r_len + 1)*8; 6500 } 6501 } 6502 6503 /* count up length of UDP packet */ 6504 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6505 if ((mp2 = mp1->b_cont) != NULL) { 6506 do { 6507 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6508 ip_len += (uint32_t)MBLKL(mp2); 6509 } while ((mp2 = mp2->b_cont) != NULL); 6510 } 6511 6512 /* 6513 * If the size of the packet is greater than the maximum allowed by 6514 * ip, return an error. Passing this down could cause panics because 6515 * the size will have wrapped and be inconsistent with the msg size. 6516 */ 6517 if (ip_len > IP_MAXPACKET) { 6518 *error = EMSGSIZE; 6519 goto done; 6520 } 6521 6522 /* Store the UDP length. Subtract length of extension hdrs */ 6523 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6524 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6525 6526 /* 6527 * We make it easy for IP to include our pseudo header 6528 * by putting our length in uh_checksum, modified (if 6529 * we have a routing header) by the checksum difference 6530 * between the ultimate destination and first hop addresses. 6531 * Note: UDP over IPv6 must always checksum the packet. 6532 */ 6533 csum += udph->uha_length; 6534 csum = (csum & 0xFFFF) + (csum >> 16); 6535 udph->uha_checksum = (uint16_t)csum; 6536 6537 #ifdef _LITTLE_ENDIAN 6538 ip_len = htons(ip_len); 6539 #endif 6540 ip6h->ip6_plen = ip_len; 6541 if (DB_CRED(mp) != NULL) 6542 mblk_setcred(mp1, DB_CRED(mp)); 6543 6544 if (DB_TYPE(mp) != M_DATA) { 6545 ASSERT(mp != mp1); 6546 freeb(mp); 6547 } 6548 6549 /* mp has been consumed and we'll return success */ 6550 ASSERT(*error == 0); 6551 mp = NULL; 6552 6553 /* We're done. Pass the packet to IP */ 6554 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6555 ip_output_v6(connp, mp1, q, IP_WPUT); 6556 6557 done: 6558 if (sth_wroff != 0) { 6559 (void) proto_set_tx_wroff(RD(q), connp, 6560 udp->udp_max_hdr_len + us->us_wroff_extra); 6561 } 6562 if (hopoptsptr != NULL && !is_ancillary) { 6563 kmem_free(hopoptsptr, hopoptslen); 6564 hopoptsptr = NULL; 6565 } 6566 if (*error != 0) { 6567 ASSERT(mp != NULL); 6568 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6569 } 6570 return (mp); 6571 } 6572 6573 6574 static int 6575 i_udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6576 { 6577 sin_t *sin = (sin_t *)sa; 6578 sin6_t *sin6 = (sin6_t *)sa; 6579 6580 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6581 6582 if (udp->udp_state != TS_DATA_XFER) 6583 return (ENOTCONN); 6584 6585 switch (udp->udp_family) { 6586 case AF_INET: 6587 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6588 6589 if (*salenp < sizeof (sin_t)) 6590 return (EINVAL); 6591 6592 *salenp = sizeof (sin_t); 6593 *sin = sin_null; 6594 sin->sin_family = AF_INET; 6595 sin->sin_port = udp->udp_dstport; 6596 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 6597 break; 6598 6599 case AF_INET6: 6600 if (*salenp < sizeof (sin6_t)) 6601 return (EINVAL); 6602 6603 *salenp = sizeof (sin6_t); 6604 *sin6 = sin6_null; 6605 sin6->sin6_family = AF_INET6; 6606 sin6->sin6_port = udp->udp_dstport; 6607 sin6->sin6_addr = udp->udp_v6dst; 6608 sin6->sin6_flowinfo = udp->udp_flowinfo; 6609 break; 6610 } 6611 6612 return (0); 6613 } 6614 6615 static int 6616 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 6617 { 6618 sin_t *sin = (sin_t *)sa; 6619 sin6_t *sin6 = (sin6_t *)sa; 6620 6621 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 6622 6623 switch (udp->udp_family) { 6624 case AF_INET: 6625 ASSERT(udp->udp_ipversion == IPV4_VERSION); 6626 6627 if (*salenp < sizeof (sin_t)) 6628 return (EINVAL); 6629 6630 *salenp = sizeof (sin_t); 6631 *sin = sin_null; 6632 sin->sin_family = AF_INET; 6633 sin->sin_port = udp->udp_port; 6634 6635 /* 6636 * If udp_v6src is unspecified, we might be bound to broadcast 6637 * / multicast. Use udp_bound_v6src as local address instead 6638 * (that could also still be unspecified). 6639 */ 6640 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 6641 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6642 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 6643 } else { 6644 sin->sin_addr.s_addr = 6645 V4_PART_OF_V6(udp->udp_bound_v6src); 6646 } 6647 break; 6648 6649 case AF_INET6: 6650 if (*salenp < sizeof (sin6_t)) 6651 return (EINVAL); 6652 6653 *salenp = sizeof (sin6_t); 6654 *sin6 = sin6_null; 6655 sin6->sin6_family = AF_INET6; 6656 sin6->sin6_port = udp->udp_port; 6657 sin6->sin6_flowinfo = udp->udp_flowinfo; 6658 6659 /* 6660 * If udp_v6src is unspecified, we might be bound to broadcast 6661 * / multicast. Use udp_bound_v6src as local address instead 6662 * (that could also still be unspecified). 6663 */ 6664 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 6665 sin6->sin6_addr = udp->udp_v6src; 6666 else 6667 sin6->sin6_addr = udp->udp_bound_v6src; 6668 break; 6669 } 6670 6671 return (0); 6672 } 6673 6674 /* 6675 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 6676 */ 6677 static void 6678 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 6679 { 6680 void *data; 6681 mblk_t *datamp = mp->b_cont; 6682 udp_t *udp = Q_TO_UDP(q); 6683 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 6684 6685 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 6686 cmdp->cb_error = EPROTO; 6687 qreply(q, mp); 6688 return; 6689 } 6690 data = datamp->b_rptr; 6691 6692 rw_enter(&udp->udp_rwlock, RW_READER); 6693 switch (cmdp->cb_cmd) { 6694 case TI_GETPEERNAME: 6695 cmdp->cb_error = i_udp_getpeername(udp, data, &cmdp->cb_len); 6696 break; 6697 case TI_GETMYNAME: 6698 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 6699 break; 6700 default: 6701 cmdp->cb_error = EINVAL; 6702 break; 6703 } 6704 rw_exit(&udp->udp_rwlock); 6705 6706 qreply(q, mp); 6707 } 6708 6709 static void 6710 udp_disable_direct_sockfs(udp_t *udp) 6711 { 6712 udp->udp_issocket = B_FALSE; 6713 if (udp->udp_direct_sockfs) { 6714 /* 6715 * Disable read-side synchronous stream interface and 6716 * drain any queued data. 6717 */ 6718 udp_rcv_drain(udp->udp_connp->conn_rq, udp, B_FALSE); 6719 ASSERT(!udp->udp_direct_sockfs); 6720 UDP_STAT(udp->udp_us, udp_sock_fallback); 6721 } 6722 } 6723 6724 static void 6725 udp_wput_other(queue_t *q, mblk_t *mp) 6726 { 6727 uchar_t *rptr = mp->b_rptr; 6728 struct datab *db; 6729 struct iocblk *iocp; 6730 cred_t *cr; 6731 conn_t *connp = Q_TO_CONN(q); 6732 udp_t *udp = connp->conn_udp; 6733 udp_stack_t *us; 6734 6735 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 6736 "udp_wput_other_start: q %p", q); 6737 6738 us = udp->udp_us; 6739 db = mp->b_datap; 6740 6741 cr = DB_CREDDEF(mp, connp->conn_cred); 6742 6743 switch (db->db_type) { 6744 case M_CMD: 6745 udp_wput_cmdblk(q, mp); 6746 return; 6747 6748 case M_PROTO: 6749 case M_PCPROTO: 6750 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 6751 freemsg(mp); 6752 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6753 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 6754 return; 6755 } 6756 switch (((t_primp_t)rptr)->type) { 6757 case T_ADDR_REQ: 6758 udp_addr_req(q, mp); 6759 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6760 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 6761 return; 6762 case O_T_BIND_REQ: 6763 case T_BIND_REQ: 6764 udp_tpi_bind(q, mp); 6765 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6766 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 6767 return; 6768 case T_CONN_REQ: 6769 udp_tpi_connect(q, mp); 6770 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6771 "udp_wput_other_end: q %p (%S)", q, "connreq"); 6772 return; 6773 case T_CAPABILITY_REQ: 6774 udp_capability_req(q, mp); 6775 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6776 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 6777 return; 6778 case T_INFO_REQ: 6779 udp_info_req(q, mp); 6780 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6781 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 6782 return; 6783 case T_UNITDATA_REQ: 6784 /* 6785 * If a T_UNITDATA_REQ gets here, the address must 6786 * be bad. Valid T_UNITDATA_REQs are handled 6787 * in udp_wput. 6788 */ 6789 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 6790 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6791 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 6792 return; 6793 case T_UNBIND_REQ: 6794 udp_tpi_unbind(q, mp); 6795 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6796 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 6797 return; 6798 case T_SVR4_OPTMGMT_REQ: 6799 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 6800 cr)) { 6801 (void) svr4_optcom_req(q, 6802 mp, cr, &udp_opt_obj, B_TRUE); 6803 } 6804 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6805 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6806 return; 6807 6808 case T_OPTMGMT_REQ: 6809 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 6810 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6811 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 6812 return; 6813 6814 case T_DISCON_REQ: 6815 udp_tpi_disconnect(q, mp); 6816 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6817 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 6818 return; 6819 6820 /* The following TPI message is not supported by udp. */ 6821 case O_T_CONN_RES: 6822 case T_CONN_RES: 6823 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6824 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6825 "udp_wput_other_end: q %p (%S)", q, 6826 "connres/disconreq"); 6827 return; 6828 6829 /* The following 3 TPI messages are illegal for udp. */ 6830 case T_DATA_REQ: 6831 case T_EXDATA_REQ: 6832 case T_ORDREL_REQ: 6833 udp_err_ack(q, mp, TNOTSUPPORT, 0); 6834 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6835 "udp_wput_other_end: q %p (%S)", q, 6836 "data/exdata/ordrel"); 6837 return; 6838 default: 6839 break; 6840 } 6841 break; 6842 case M_FLUSH: 6843 if (*rptr & FLUSHW) 6844 flushq(q, FLUSHDATA); 6845 break; 6846 case M_IOCTL: 6847 iocp = (struct iocblk *)mp->b_rptr; 6848 switch (iocp->ioc_cmd) { 6849 case TI_GETPEERNAME: 6850 if (udp->udp_state != TS_DATA_XFER) { 6851 /* 6852 * If a default destination address has not 6853 * been associated with the stream, then we 6854 * don't know the peer's name. 6855 */ 6856 iocp->ioc_error = ENOTCONN; 6857 iocp->ioc_count = 0; 6858 mp->b_datap->db_type = M_IOCACK; 6859 qreply(q, mp); 6860 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6861 "udp_wput_other_end: q %p (%S)", q, 6862 "getpeername"); 6863 return; 6864 } 6865 /* FALLTHRU */ 6866 case TI_GETMYNAME: { 6867 /* 6868 * For TI_GETPEERNAME and TI_GETMYNAME, we first 6869 * need to copyin the user's strbuf structure. 6870 * Processing will continue in the M_IOCDATA case 6871 * below. 6872 */ 6873 mi_copyin(q, mp, NULL, 6874 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 6875 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6876 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 6877 return; 6878 } 6879 case ND_SET: 6880 /* nd_getset performs the necessary checking */ 6881 case ND_GET: 6882 if (nd_getset(q, us->us_nd, mp)) { 6883 qreply(q, mp); 6884 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6885 "udp_wput_other_end: q %p (%S)", q, "get"); 6886 return; 6887 } 6888 break; 6889 case _SIOCSOCKFALLBACK: 6890 /* 6891 * Either sockmod is about to be popped and the 6892 * socket would now be treated as a plain stream, 6893 * or a module is about to be pushed so we could 6894 * no longer use read-side synchronous stream. 6895 * Drain any queued data and disable direct sockfs 6896 * interface from now on. 6897 */ 6898 if (!udp->udp_issocket) { 6899 DB_TYPE(mp) = M_IOCNAK; 6900 iocp->ioc_error = EINVAL; 6901 } else { 6902 udp_disable_direct_sockfs(udp); 6903 6904 DB_TYPE(mp) = M_IOCACK; 6905 iocp->ioc_error = 0; 6906 } 6907 iocp->ioc_count = 0; 6908 iocp->ioc_rval = 0; 6909 qreply(q, mp); 6910 return; 6911 default: 6912 break; 6913 } 6914 break; 6915 case M_IOCDATA: 6916 udp_wput_iocdata(q, mp); 6917 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6918 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 6919 return; 6920 default: 6921 /* Unrecognized messages are passed through without change. */ 6922 break; 6923 } 6924 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 6925 "udp_wput_other_end: q %p (%S)", q, "end"); 6926 ip_output(connp, mp, q, IP_WPUT); 6927 } 6928 6929 /* 6930 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 6931 * messages. 6932 */ 6933 static void 6934 udp_wput_iocdata(queue_t *q, mblk_t *mp) 6935 { 6936 mblk_t *mp1; 6937 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 6938 STRUCT_HANDLE(strbuf, sb); 6939 udp_t *udp = Q_TO_UDP(q); 6940 int error; 6941 uint_t addrlen; 6942 6943 /* Make sure it is one of ours. */ 6944 switch (iocp->ioc_cmd) { 6945 case TI_GETMYNAME: 6946 case TI_GETPEERNAME: 6947 break; 6948 default: 6949 ip_output(udp->udp_connp, mp, q, IP_WPUT); 6950 return; 6951 } 6952 6953 switch (mi_copy_state(q, mp, &mp1)) { 6954 case -1: 6955 return; 6956 case MI_COPY_CASE(MI_COPY_IN, 1): 6957 break; 6958 case MI_COPY_CASE(MI_COPY_OUT, 1): 6959 /* 6960 * The address has been copied out, so now 6961 * copyout the strbuf. 6962 */ 6963 mi_copyout(q, mp); 6964 return; 6965 case MI_COPY_CASE(MI_COPY_OUT, 2): 6966 /* 6967 * The address and strbuf have been copied out. 6968 * We're done, so just acknowledge the original 6969 * M_IOCTL. 6970 */ 6971 mi_copy_done(q, mp, 0); 6972 return; 6973 default: 6974 /* 6975 * Something strange has happened, so acknowledge 6976 * the original M_IOCTL with an EPROTO error. 6977 */ 6978 mi_copy_done(q, mp, EPROTO); 6979 return; 6980 } 6981 6982 /* 6983 * Now we have the strbuf structure for TI_GETMYNAME 6984 * and TI_GETPEERNAME. Next we copyout the requested 6985 * address and then we'll copyout the strbuf. 6986 */ 6987 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 6988 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 6989 if (STRUCT_FGET(sb, maxlen) < addrlen) { 6990 mi_copy_done(q, mp, EINVAL); 6991 return; 6992 } 6993 6994 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 6995 6996 if (mp1 == NULL) 6997 return; 6998 6999 rw_enter(&udp->udp_rwlock, RW_READER); 7000 switch (iocp->ioc_cmd) { 7001 case TI_GETMYNAME: 7002 error = udp_do_getsockname(udp, (void *)mp1->b_rptr, &addrlen); 7003 break; 7004 case TI_GETPEERNAME: 7005 error = udp_do_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7006 break; 7007 } 7008 rw_exit(&udp->udp_rwlock); 7009 7010 if (error != 0) { 7011 mi_copy_done(q, mp, error); 7012 } else { 7013 mp1->b_wptr += addrlen; 7014 STRUCT_FSET(sb, len, addrlen); 7015 7016 /* Copy out the address */ 7017 mi_copyout(q, mp); 7018 } 7019 } 7020 7021 static int 7022 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7023 udpattrs_t *udpattrs) 7024 { 7025 struct T_unitdata_req *udreqp; 7026 int is_absreq_failure; 7027 cred_t *cr; 7028 conn_t *connp = Q_TO_CONN(q); 7029 7030 ASSERT(((t_primp_t)mp->b_rptr)->type); 7031 7032 cr = DB_CREDDEF(mp, connp->conn_cred); 7033 7034 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7035 7036 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7037 udreqp->OPT_offset, cr, &udp_opt_obj, 7038 udpattrs, &is_absreq_failure); 7039 7040 if (*errorp != 0) { 7041 /* 7042 * Note: No special action needed in this 7043 * module for "is_absreq_failure" 7044 */ 7045 return (-1); /* failure */ 7046 } 7047 ASSERT(is_absreq_failure == 0); 7048 return (0); /* success */ 7049 } 7050 7051 void 7052 udp_ddi_g_init(void) 7053 { 7054 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7055 udp_opt_obj.odb_opt_arr_cnt); 7056 7057 /* 7058 * We want to be informed each time a stack is created or 7059 * destroyed in the kernel, so we can maintain the 7060 * set of udp_stack_t's. 7061 */ 7062 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7063 } 7064 7065 void 7066 udp_ddi_g_destroy(void) 7067 { 7068 netstack_unregister(NS_UDP); 7069 } 7070 7071 #define INET_NAME "ip" 7072 7073 /* 7074 * Initialize the UDP stack instance. 7075 */ 7076 static void * 7077 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7078 { 7079 udp_stack_t *us; 7080 udpparam_t *pa; 7081 int i; 7082 int error = 0; 7083 major_t major; 7084 7085 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7086 us->us_netstack = ns; 7087 7088 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7089 us->us_epriv_ports[0] = 2049; 7090 us->us_epriv_ports[1] = 4045; 7091 7092 /* 7093 * The smallest anonymous port in the priviledged port range which UDP 7094 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7095 */ 7096 us->us_min_anonpriv_port = 512; 7097 7098 us->us_bind_fanout_size = udp_bind_fanout_size; 7099 7100 /* Roundup variable that might have been modified in /etc/system */ 7101 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7102 /* Not a power of two. Round up to nearest power of two */ 7103 for (i = 0; i < 31; i++) { 7104 if (us->us_bind_fanout_size < (1 << i)) 7105 break; 7106 } 7107 us->us_bind_fanout_size = 1 << i; 7108 } 7109 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7110 sizeof (udp_fanout_t), KM_SLEEP); 7111 for (i = 0; i < us->us_bind_fanout_size; i++) { 7112 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7113 NULL); 7114 } 7115 7116 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7117 7118 us->us_param_arr = pa; 7119 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7120 7121 (void) udp_param_register(&us->us_nd, 7122 us->us_param_arr, A_CNT(udp_param_arr)); 7123 7124 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7125 us->us_mibkp = udp_kstat_init(stackid); 7126 7127 major = mod_name_to_major(INET_NAME); 7128 error = ldi_ident_from_major(major, &us->us_ldi_ident); 7129 ASSERT(error == 0); 7130 return (us); 7131 } 7132 7133 /* 7134 * Free the UDP stack instance. 7135 */ 7136 static void 7137 udp_stack_fini(netstackid_t stackid, void *arg) 7138 { 7139 udp_stack_t *us = (udp_stack_t *)arg; 7140 int i; 7141 7142 for (i = 0; i < us->us_bind_fanout_size; i++) { 7143 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7144 } 7145 7146 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7147 sizeof (udp_fanout_t)); 7148 7149 us->us_bind_fanout = NULL; 7150 7151 nd_free(&us->us_nd); 7152 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7153 us->us_param_arr = NULL; 7154 7155 udp_kstat_fini(stackid, us->us_mibkp); 7156 us->us_mibkp = NULL; 7157 7158 udp_kstat2_fini(stackid, us->us_kstat); 7159 us->us_kstat = NULL; 7160 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7161 7162 ldi_ident_release(us->us_ldi_ident); 7163 kmem_free(us, sizeof (*us)); 7164 } 7165 7166 static void * 7167 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7168 { 7169 kstat_t *ksp; 7170 7171 udp_stat_t template = { 7172 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7173 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7174 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7175 { "udp_drain", KSTAT_DATA_UINT64 }, 7176 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7177 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7178 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7179 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7180 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7181 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7182 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7183 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7184 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7185 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7186 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7187 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7188 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7189 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7190 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7191 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7192 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7193 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7194 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7195 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7196 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7197 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7198 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7199 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7200 #ifdef DEBUG 7201 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7202 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7203 #endif 7204 }; 7205 7206 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7207 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7208 KSTAT_FLAG_VIRTUAL, stackid); 7209 7210 if (ksp == NULL) 7211 return (NULL); 7212 7213 bcopy(&template, us_statisticsp, sizeof (template)); 7214 ksp->ks_data = (void *)us_statisticsp; 7215 ksp->ks_private = (void *)(uintptr_t)stackid; 7216 7217 kstat_install(ksp); 7218 return (ksp); 7219 } 7220 7221 static void 7222 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7223 { 7224 if (ksp != NULL) { 7225 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7226 kstat_delete_netstack(ksp, stackid); 7227 } 7228 } 7229 7230 static void * 7231 udp_kstat_init(netstackid_t stackid) 7232 { 7233 kstat_t *ksp; 7234 7235 udp_named_kstat_t template = { 7236 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7237 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7238 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7239 { "entrySize", KSTAT_DATA_INT32, 0 }, 7240 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7241 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7242 }; 7243 7244 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7245 KSTAT_TYPE_NAMED, 7246 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7247 7248 if (ksp == NULL || ksp->ks_data == NULL) 7249 return (NULL); 7250 7251 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7252 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7253 7254 bcopy(&template, ksp->ks_data, sizeof (template)); 7255 ksp->ks_update = udp_kstat_update; 7256 ksp->ks_private = (void *)(uintptr_t)stackid; 7257 7258 kstat_install(ksp); 7259 return (ksp); 7260 } 7261 7262 static void 7263 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7264 { 7265 if (ksp != NULL) { 7266 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7267 kstat_delete_netstack(ksp, stackid); 7268 } 7269 } 7270 7271 static int 7272 udp_kstat_update(kstat_t *kp, int rw) 7273 { 7274 udp_named_kstat_t *udpkp; 7275 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7276 netstack_t *ns; 7277 udp_stack_t *us; 7278 7279 if ((kp == NULL) || (kp->ks_data == NULL)) 7280 return (EIO); 7281 7282 if (rw == KSTAT_WRITE) 7283 return (EACCES); 7284 7285 ns = netstack_find_by_stackid(stackid); 7286 if (ns == NULL) 7287 return (-1); 7288 us = ns->netstack_udp; 7289 if (us == NULL) { 7290 netstack_rele(ns); 7291 return (-1); 7292 } 7293 udpkp = (udp_named_kstat_t *)kp->ks_data; 7294 7295 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7296 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7297 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7298 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7299 netstack_rele(ns); 7300 return (0); 7301 } 7302 7303 /* 7304 * Read-side synchronous stream info entry point, called as a 7305 * result of handling certain STREAMS ioctl operations. 7306 */ 7307 static int 7308 udp_rinfop(queue_t *q, infod_t *dp) 7309 { 7310 mblk_t *mp; 7311 uint_t cmd = dp->d_cmd; 7312 int res = 0; 7313 int error = 0; 7314 udp_t *udp = Q_TO_UDP(q); 7315 struct stdata *stp = STREAM(q); 7316 7317 mutex_enter(&udp->udp_drain_lock); 7318 /* If shutdown on read has happened, return nothing */ 7319 mutex_enter(&stp->sd_lock); 7320 if (stp->sd_flag & STREOF) { 7321 mutex_exit(&stp->sd_lock); 7322 goto done; 7323 } 7324 mutex_exit(&stp->sd_lock); 7325 7326 if ((mp = udp->udp_rcv_list_head) == NULL) 7327 goto done; 7328 7329 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7330 7331 if (cmd & INFOD_COUNT) { 7332 /* 7333 * Return the number of messages. 7334 */ 7335 dp->d_count += udp->udp_rcv_msgcnt; 7336 res |= INFOD_COUNT; 7337 } 7338 if (cmd & INFOD_BYTES) { 7339 /* 7340 * Return size of all data messages. 7341 */ 7342 dp->d_bytes += udp->udp_rcv_cnt; 7343 res |= INFOD_BYTES; 7344 } 7345 if (cmd & INFOD_FIRSTBYTES) { 7346 /* 7347 * Return size of first data message. 7348 */ 7349 dp->d_bytes = msgdsize(mp); 7350 res |= INFOD_FIRSTBYTES; 7351 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7352 } 7353 if (cmd & INFOD_COPYOUT) { 7354 mblk_t *mp1 = mp->b_cont; 7355 int n; 7356 /* 7357 * Return data contents of first message. 7358 */ 7359 ASSERT(DB_TYPE(mp1) == M_DATA); 7360 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7361 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7362 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7363 UIO_READ, dp->d_uiop)) != 0) { 7364 goto done; 7365 } 7366 mp1 = mp1->b_cont; 7367 } 7368 res |= INFOD_COPYOUT; 7369 dp->d_cmd &= ~INFOD_COPYOUT; 7370 } 7371 done: 7372 mutex_exit(&udp->udp_drain_lock); 7373 7374 dp->d_res |= res; 7375 7376 return (error); 7377 } 7378 7379 /* 7380 * Read-side synchronous stream entry point. This is called as a result 7381 * of recv/read operation done at sockfs, and is guaranteed to execute 7382 * outside of the interrupt thread context. It returns a single datagram 7383 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7384 */ 7385 static int 7386 udp_rrw(queue_t *q, struiod_t *dp) 7387 { 7388 mblk_t *mp; 7389 udp_t *udp = Q_TO_UDP(q); 7390 udp_stack_t *us = udp->udp_us; 7391 7392 /* 7393 * Dequeue datagram from the head of the list and return 7394 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7395 * set/cleared depending on whether or not there's data 7396 * remaining in the list. 7397 */ 7398 mutex_enter(&udp->udp_drain_lock); 7399 if (!udp->udp_direct_sockfs) { 7400 mutex_exit(&udp->udp_drain_lock); 7401 UDP_STAT(us, udp_rrw_busy); 7402 return (EBUSY); 7403 } 7404 if ((mp = udp->udp_rcv_list_head) != NULL) { 7405 uint_t size = msgdsize(mp); 7406 7407 /* Last datagram in the list? */ 7408 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7409 udp->udp_rcv_list_tail = NULL; 7410 mp->b_next = NULL; 7411 7412 udp->udp_rcv_cnt -= size; 7413 udp->udp_rcv_msgcnt--; 7414 UDP_STAT(us, udp_rrw_msgcnt); 7415 7416 /* No longer flow-controlling? */ 7417 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7418 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7419 udp->udp_drain_qfull = B_FALSE; 7420 } 7421 if (udp->udp_rcv_list_head == NULL) { 7422 /* 7423 * Either we just dequeued the last datagram or 7424 * we get here from sockfs and have nothing to 7425 * return; in this case clear RSLEEP. 7426 */ 7427 ASSERT(udp->udp_rcv_cnt == 0); 7428 ASSERT(udp->udp_rcv_msgcnt == 0); 7429 ASSERT(udp->udp_rcv_list_tail == NULL); 7430 STR_WAKEUP_CLEAR(STREAM(q)); 7431 } else { 7432 /* 7433 * More data follows; we need udp_rrw() to be 7434 * called in future to pick up the rest. 7435 */ 7436 STR_WAKEUP_SET(STREAM(q)); 7437 } 7438 mutex_exit(&udp->udp_drain_lock); 7439 dp->d_mp = mp; 7440 return (0); 7441 } 7442 7443 /* 7444 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7445 * list; this is typically executed within the interrupt thread context 7446 * and so we do things as quickly as possible. 7447 */ 7448 static void 7449 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7450 { 7451 ASSERT(q == RD(q)); 7452 ASSERT(pkt_len == msgdsize(mp)); 7453 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7454 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7455 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7456 7457 mutex_enter(&udp->udp_drain_lock); 7458 /* 7459 * Wake up and signal the receiving app; it is okay to do this 7460 * before enqueueing the mp because we are holding the drain lock. 7461 * One of the advantages of synchronous stream is the ability for 7462 * us to find out when the application performs a read on the 7463 * socket by way of udp_rrw() entry point being called. We need 7464 * to generate SIGPOLL/SIGIO for each received data in the case 7465 * of asynchronous socket just as in the strrput() case. However, 7466 * we only wake the application up when necessary, i.e. during the 7467 * first enqueue. When udp_rrw() is called, we send up a single 7468 * datagram upstream and call STR_WAKEUP_SET() again when there 7469 * are still data remaining in our receive queue. 7470 */ 7471 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 7472 if (udp->udp_rcv_list_head == NULL) 7473 udp->udp_rcv_list_head = mp; 7474 else 7475 udp->udp_rcv_list_tail->b_next = mp; 7476 udp->udp_rcv_list_tail = mp; 7477 udp->udp_rcv_cnt += pkt_len; 7478 udp->udp_rcv_msgcnt++; 7479 7480 /* Need to flow-control? */ 7481 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7482 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7483 udp->udp_drain_qfull = B_TRUE; 7484 7485 mutex_exit(&udp->udp_drain_lock); 7486 } 7487 7488 /* 7489 * Drain the contents of receive list to the module upstream; we do 7490 * this during close or when we fallback to the slow mode due to 7491 * sockmod being popped or a module being pushed on top of us. 7492 */ 7493 static void 7494 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7495 { 7496 mblk_t *mp; 7497 udp_stack_t *us = udp->udp_us; 7498 7499 mutex_enter(&udp->udp_drain_lock); 7500 /* 7501 * There is no race with a concurrent udp_input() sending 7502 * up packets using putnext() after we have cleared the 7503 * udp_direct_sockfs flag but before we have completed 7504 * sending up the packets in udp_rcv_list, since we are 7505 * either a writer or we have quiesced the conn. 7506 */ 7507 udp->udp_direct_sockfs = B_FALSE; 7508 mutex_exit(&udp->udp_drain_lock); 7509 7510 if (udp->udp_rcv_list_head != NULL) 7511 UDP_STAT(us, udp_drain); 7512 7513 /* 7514 * Send up everything via putnext(); note here that we 7515 * don't need the udp_drain_lock to protect us since 7516 * nothing can enter udp_rrw() and that we currently 7517 * have exclusive access to this udp. 7518 */ 7519 while ((mp = udp->udp_rcv_list_head) != NULL) { 7520 udp->udp_rcv_list_head = mp->b_next; 7521 mp->b_next = NULL; 7522 udp->udp_rcv_cnt -= msgdsize(mp); 7523 udp->udp_rcv_msgcnt--; 7524 if (closing) { 7525 freemsg(mp); 7526 } else { 7527 ASSERT(q == RD(q)); 7528 putnext(q, mp); 7529 } 7530 } 7531 ASSERT(udp->udp_rcv_cnt == 0); 7532 ASSERT(udp->udp_rcv_msgcnt == 0); 7533 ASSERT(udp->udp_rcv_list_head == NULL); 7534 udp->udp_rcv_list_tail = NULL; 7535 udp->udp_drain_qfull = B_FALSE; 7536 } 7537 7538 static size_t 7539 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7540 { 7541 udp_stack_t *us = udp->udp_us; 7542 7543 /* We add a bit of extra buffering */ 7544 size += size >> 1; 7545 if (size > us->us_max_buf) 7546 size = us->us_max_buf; 7547 7548 udp->udp_rcv_hiwat = size; 7549 return (size); 7550 } 7551 7552 /* 7553 * For the lower queue so that UDP can be a dummy mux. 7554 * Nobody should be sending 7555 * packets up this stream 7556 */ 7557 static void 7558 udp_lrput(queue_t *q, mblk_t *mp) 7559 { 7560 mblk_t *mp1; 7561 7562 switch (mp->b_datap->db_type) { 7563 case M_FLUSH: 7564 /* Turn around */ 7565 if (*mp->b_rptr & FLUSHW) { 7566 *mp->b_rptr &= ~FLUSHR; 7567 qreply(q, mp); 7568 return; 7569 } 7570 break; 7571 } 7572 /* Could receive messages that passed through ar_rput */ 7573 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 7574 mp1->b_prev = mp1->b_next = NULL; 7575 freemsg(mp); 7576 } 7577 7578 /* 7579 * For the lower queue so that UDP can be a dummy mux. 7580 * Nobody should be sending packets down this stream. 7581 */ 7582 /* ARGSUSED */ 7583 void 7584 udp_lwput(queue_t *q, mblk_t *mp) 7585 { 7586 freemsg(mp); 7587 } 7588 7589 /* 7590 * Below routines for UDP socket module. 7591 */ 7592 7593 static conn_t * 7594 udp_do_open(cred_t *credp, boolean_t isv6, int flags) 7595 { 7596 udp_t *udp; 7597 conn_t *connp; 7598 zoneid_t zoneid; 7599 netstack_t *ns; 7600 udp_stack_t *us; 7601 7602 ns = netstack_find_by_cred(credp); 7603 ASSERT(ns != NULL); 7604 us = ns->netstack_udp; 7605 ASSERT(us != NULL); 7606 7607 /* 7608 * For exclusive stacks we set the zoneid to zero 7609 * to make UDP operate as if in the global zone. 7610 */ 7611 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 7612 zoneid = GLOBAL_ZONEID; 7613 else 7614 zoneid = crgetzoneid(credp); 7615 7616 ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP); 7617 7618 connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns); 7619 if (connp == NULL) { 7620 netstack_rele(ns); 7621 return (NULL); 7622 } 7623 udp = connp->conn_udp; 7624 7625 /* 7626 * ipcl_conn_create did a netstack_hold. Undo the hold that was 7627 * done by netstack_find_by_cred() 7628 */ 7629 netstack_rele(ns); 7630 7631 rw_enter(&udp->udp_rwlock, RW_WRITER); 7632 ASSERT(connp->conn_ulp == IPPROTO_UDP); 7633 ASSERT(connp->conn_udp == udp); 7634 ASSERT(udp->udp_connp == connp); 7635 7636 /* Set the initial state of the stream and the privilege status. */ 7637 udp->udp_state = TS_UNBND; 7638 if (isv6) { 7639 udp->udp_family = AF_INET6; 7640 udp->udp_ipversion = IPV6_VERSION; 7641 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7642 udp->udp_ttl = us->us_ipv6_hoplimit; 7643 connp->conn_af_isv6 = B_TRUE; 7644 connp->conn_flags |= IPCL_ISV6; 7645 } else { 7646 udp->udp_family = AF_INET; 7647 udp->udp_ipversion = IPV4_VERSION; 7648 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 7649 udp->udp_ttl = us->us_ipv4_ttl; 7650 connp->conn_af_isv6 = B_FALSE; 7651 connp->conn_flags &= ~IPCL_ISV6; 7652 } 7653 7654 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 7655 udp->udp_pending_op = -1; 7656 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 7657 connp->conn_zoneid = zoneid; 7658 7659 udp->udp_open_time = lbolt64; 7660 udp->udp_open_pid = curproc->p_pid; 7661 7662 /* 7663 * If the caller has the process-wide flag set, then default to MAC 7664 * exempt mode. This allows read-down to unlabeled hosts. 7665 */ 7666 if (getpflags(NET_MAC_AWARE, credp) != 0) 7667 connp->conn_mac_exempt = B_TRUE; 7668 7669 connp->conn_ulp_labeled = is_system_labeled(); 7670 7671 udp->udp_us = us; 7672 7673 connp->conn_recv = udp_input; 7674 crhold(credp); 7675 connp->conn_cred = credp; 7676 7677 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 7678 7679 rw_exit(&udp->udp_rwlock); 7680 7681 return (connp); 7682 } 7683 7684 /* ARGSUSED */ 7685 sock_lower_handle_t 7686 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls, 7687 uint_t *smodep, int *errorp, int flags, cred_t *credp) 7688 { 7689 udp_t *udp = NULL; 7690 udp_stack_t *us; 7691 conn_t *connp; 7692 boolean_t isv6; 7693 7694 if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) || 7695 (proto != 0 && proto != IPPROTO_UDP)) { 7696 *errorp = EPROTONOSUPPORT; 7697 return (NULL); 7698 } 7699 7700 if (family == AF_INET6) 7701 isv6 = B_TRUE; 7702 else 7703 isv6 = B_FALSE; 7704 7705 connp = udp_do_open(credp, isv6, flags); 7706 if (connp == NULL) { 7707 *errorp = ENOMEM; 7708 return (NULL); 7709 } 7710 7711 udp = connp->conn_udp; 7712 ASSERT(udp != NULL); 7713 us = udp->udp_us; 7714 ASSERT(us != NULL); 7715 7716 connp->conn_flags |= IPCL_NONSTR | IPCL_SOCKET; 7717 7718 /* Set flow control */ 7719 rw_enter(&udp->udp_rwlock, RW_WRITER); 7720 (void) udp_set_rcv_hiwat(udp, us->us_recv_hiwat); 7721 udp->udp_rcv_disply_hiwat = us->us_recv_hiwat; 7722 udp->udp_rcv_lowat = udp_mod_info.mi_lowat; 7723 udp->udp_xmit_hiwat = us->us_xmit_hiwat; 7724 udp->udp_xmit_lowat = us->us_xmit_lowat; 7725 7726 if (udp->udp_family == AF_INET6) { 7727 /* Build initial header template for transmit */ 7728 if ((*errorp = udp_build_hdrs(udp)) != 0) { 7729 rw_exit(&udp->udp_rwlock); 7730 ipcl_conn_destroy(connp); 7731 return (NULL); 7732 } 7733 } 7734 rw_exit(&udp->udp_rwlock); 7735 7736 connp->conn_flow_cntrld = B_FALSE; 7737 7738 ASSERT(us->us_ldi_ident != NULL); 7739 7740 if ((*errorp = ip_create_helper_stream(connp, us->us_ldi_ident)) != 0) { 7741 ip1dbg(("create of IP helper stream failed\n")); 7742 udp_do_close(connp); 7743 return (NULL); 7744 } 7745 7746 /* Set the send flow control */ 7747 connp->conn_wq->q_hiwat = us->us_xmit_hiwat; 7748 connp->conn_wq->q_lowat = us->us_xmit_lowat; 7749 7750 mutex_enter(&connp->conn_lock); 7751 connp->conn_state_flags &= ~CONN_INCIPIENT; 7752 mutex_exit(&connp->conn_lock); 7753 7754 *errorp = 0; 7755 *smodep = SM_ATOMIC; 7756 *sock_downcalls = &sock_udp_downcalls; 7757 return ((sock_lower_handle_t)connp); 7758 } 7759 7760 /* ARGSUSED */ 7761 void 7762 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle, 7763 sock_upcalls_t *sock_upcalls, int flags, cred_t *cr) 7764 { 7765 conn_t *connp = (conn_t *)proto_handle; 7766 udp_t *udp = connp->conn_udp; 7767 udp_stack_t *us = udp->udp_us; 7768 struct sock_proto_props sopp; 7769 7770 connp->conn_upcalls = sock_upcalls; 7771 connp->conn_upper_handle = sock_handle; 7772 7773 sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | 7774 SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ; 7775 sopp.sopp_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 7776 sopp.sopp_maxblk = INFPSZ; 7777 sopp.sopp_rxhiwat = udp->udp_rcv_hiwat; 7778 sopp.sopp_maxaddrlen = sizeof (sin6_t); 7779 sopp.sopp_maxpsz = 7780 (udp->udp_family == AF_INET) ? UDP_MAXPACKET_IPV4 : 7781 UDP_MAXPACKET_IPV6; 7782 sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 : 7783 udp_mod_info.mi_minpsz; 7784 7785 (*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle, 7786 &sopp); 7787 } 7788 7789 static void 7790 udp_do_close(conn_t *connp) 7791 { 7792 udp_t *udp; 7793 7794 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 7795 udp = connp->conn_udp; 7796 7797 udp_quiesce_conn(connp); 7798 ip_quiesce_conn(connp); 7799 7800 if (!IPCL_IS_NONSTR(connp)) { 7801 /* 7802 * Disable read-side synchronous stream 7803 * interface and drain any queued data. 7804 */ 7805 ASSERT(connp->conn_wq != NULL); 7806 udp_rcv_drain(connp->conn_wq, udp, B_TRUE); 7807 ASSERT(!udp->udp_direct_sockfs); 7808 7809 ASSERT(connp->conn_rq != NULL); 7810 qprocsoff(connp->conn_rq); 7811 } 7812 7813 ASSERT(udp->udp_rcv_cnt == 0); 7814 ASSERT(udp->udp_rcv_msgcnt == 0); 7815 ASSERT(udp->udp_rcv_list_head == NULL); 7816 ASSERT(udp->udp_rcv_list_tail == NULL); 7817 7818 udp_close_free(connp); 7819 7820 /* 7821 * Now we are truly single threaded on this stream, and can 7822 * delete the things hanging off the connp, and finally the connp. 7823 * We removed this connp from the fanout list, it cannot be 7824 * accessed thru the fanouts, and we already waited for the 7825 * conn_ref to drop to 0. We are already in close, so 7826 * there cannot be any other thread from the top. qprocsoff 7827 * has completed, and service has completed or won't run in 7828 * future. 7829 */ 7830 ASSERT(connp->conn_ref == 1); 7831 if (!IPCL_IS_NONSTR(connp)) { 7832 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 7833 } else { 7834 ip_close_helper_stream(connp); 7835 } 7836 7837 connp->conn_ref--; 7838 ipcl_conn_destroy(connp); 7839 } 7840 7841 /* ARGSUSED */ 7842 int 7843 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr) 7844 { 7845 conn_t *connp = (conn_t *)proto_handle; 7846 7847 udp_do_close(connp); 7848 return (0); 7849 } 7850 7851 static int 7852 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr, 7853 boolean_t bind_to_req_port_only) 7854 { 7855 sin_t *sin; 7856 sin6_t *sin6; 7857 sin6_t sin6addr; 7858 in_port_t port; /* Host byte order */ 7859 in_port_t requested_port; /* Host byte order */ 7860 int count; 7861 in6_addr_t v6src; 7862 int loopmax; 7863 udp_fanout_t *udpf; 7864 in_port_t lport; /* Network byte order */ 7865 zoneid_t zoneid; 7866 udp_t *udp; 7867 boolean_t is_inaddr_any; 7868 mlp_type_t addrtype, mlptype; 7869 udp_stack_t *us; 7870 int error = 0; 7871 mblk_t *mp = NULL; 7872 7873 udp = connp->conn_udp; 7874 us = udp->udp_us; 7875 7876 if (udp->udp_state != TS_UNBND) { 7877 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7878 "udp_bind: bad state, %u", udp->udp_state); 7879 return (-TOUTSTATE); 7880 } 7881 7882 switch (len) { 7883 case 0: 7884 if (udp->udp_family == AF_INET) { 7885 sin = (sin_t *)&sin6addr; 7886 *sin = sin_null; 7887 sin->sin_family = AF_INET; 7888 sin->sin_addr.s_addr = INADDR_ANY; 7889 udp->udp_ipversion = IPV4_VERSION; 7890 } else { 7891 ASSERT(udp->udp_family == AF_INET6); 7892 sin6 = (sin6_t *)&sin6addr; 7893 *sin6 = sin6_null; 7894 sin6->sin6_family = AF_INET6; 7895 V6_SET_ZERO(sin6->sin6_addr); 7896 udp->udp_ipversion = IPV6_VERSION; 7897 } 7898 port = 0; 7899 break; 7900 7901 case sizeof (sin_t): /* Complete IPv4 address */ 7902 sin = (sin_t *)sa; 7903 7904 if (sin == NULL || !OK_32PTR((char *)sin)) 7905 return (EINVAL); 7906 7907 if (udp->udp_family != AF_INET || 7908 sin->sin_family != AF_INET) { 7909 return (EAFNOSUPPORT); 7910 } 7911 port = ntohs(sin->sin_port); 7912 break; 7913 7914 case sizeof (sin6_t): /* complete IPv6 address */ 7915 sin6 = (sin6_t *)sa; 7916 7917 if (sin6 == NULL || !OK_32PTR((char *)sin6)) 7918 return (EINVAL); 7919 7920 if (udp->udp_family != AF_INET6 || 7921 sin6->sin6_family != AF_INET6) { 7922 return (EAFNOSUPPORT); 7923 } 7924 port = ntohs(sin6->sin6_port); 7925 break; 7926 7927 default: /* Invalid request */ 7928 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7929 "udp_bind: bad ADDR_length length %u", len); 7930 return (-TBADADDR); 7931 } 7932 7933 requested_port = port; 7934 7935 if (requested_port == 0 || !bind_to_req_port_only) 7936 bind_to_req_port_only = B_FALSE; 7937 else /* T_BIND_REQ and requested_port != 0 */ 7938 bind_to_req_port_only = B_TRUE; 7939 7940 if (requested_port == 0) { 7941 /* 7942 * If the application passed in zero for the port number, it 7943 * doesn't care which port number we bind to. Get one in the 7944 * valid range. 7945 */ 7946 if (udp->udp_anon_priv_bind) { 7947 port = udp_get_next_priv_port(udp); 7948 } else { 7949 port = udp_update_next_port(udp, 7950 us->us_next_port_to_try, B_TRUE); 7951 } 7952 } else { 7953 /* 7954 * If the port is in the well-known privileged range, 7955 * make sure the caller was privileged. 7956 */ 7957 int i; 7958 boolean_t priv = B_FALSE; 7959 7960 if (port < us->us_smallest_nonpriv_port) { 7961 priv = B_TRUE; 7962 } else { 7963 for (i = 0; i < us->us_num_epriv_ports; i++) { 7964 if (port == us->us_epriv_ports[i]) { 7965 priv = B_TRUE; 7966 break; 7967 } 7968 } 7969 } 7970 7971 if (priv) { 7972 if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0) 7973 return (-TACCES); 7974 } 7975 } 7976 7977 if (port == 0) 7978 return (-TNOADDR); 7979 7980 /* 7981 * The state must be TS_UNBND. TPI mandates that users must send 7982 * TPI primitives only 1 at a time and wait for the response before 7983 * sending the next primitive. 7984 */ 7985 rw_enter(&udp->udp_rwlock, RW_WRITER); 7986 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 7987 rw_exit(&udp->udp_rwlock); 7988 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 7989 "udp_bind: bad state, %u", udp->udp_state); 7990 return (-TOUTSTATE); 7991 } 7992 /* XXX how to remove the T_BIND_REQ? Should set it before calling */ 7993 udp->udp_pending_op = T_BIND_REQ; 7994 /* 7995 * Copy the source address into our udp structure. This address 7996 * may still be zero; if so, IP will fill in the correct address 7997 * each time an outbound packet is passed to it. Since the udp is 7998 * not yet in the bind hash list, we don't grab the uf_lock to 7999 * change udp_ipversion 8000 */ 8001 if (udp->udp_family == AF_INET) { 8002 ASSERT(sin != NULL); 8003 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8004 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8005 udp->udp_ip_snd_options_len; 8006 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 8007 } else { 8008 ASSERT(sin6 != NULL); 8009 v6src = sin6->sin6_addr; 8010 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 8011 /* 8012 * no need to hold the uf_lock to set the udp_ipversion 8013 * since we are not yet in the fanout list 8014 */ 8015 udp->udp_ipversion = IPV4_VERSION; 8016 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 8017 UDPH_SIZE + udp->udp_ip_snd_options_len; 8018 } else { 8019 udp->udp_ipversion = IPV6_VERSION; 8020 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8021 } 8022 } 8023 8024 /* 8025 * If udp_reuseaddr is not set, then we have to make sure that 8026 * the IP address and port number the application requested 8027 * (or we selected for the application) is not being used by 8028 * another stream. If another stream is already using the 8029 * requested IP address and port, the behavior depends on 8030 * "bind_to_req_port_only". If set the bind fails; otherwise we 8031 * search for any an unused port to bind to the the stream. 8032 * 8033 * As per the BSD semantics, as modified by the Deering multicast 8034 * changes, if udp_reuseaddr is set, then we allow multiple binds 8035 * to the same port independent of the local IP address. 8036 * 8037 * This is slightly different than in SunOS 4.X which did not 8038 * support IP multicast. Note that the change implemented by the 8039 * Deering multicast code effects all binds - not only binding 8040 * to IP multicast addresses. 8041 * 8042 * Note that when binding to port zero we ignore SO_REUSEADDR in 8043 * order to guarantee a unique port. 8044 */ 8045 8046 count = 0; 8047 if (udp->udp_anon_priv_bind) { 8048 /* 8049 * loopmax = (IPPORT_RESERVED-1) - 8050 * us->us_min_anonpriv_port + 1 8051 */ 8052 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 8053 } else { 8054 loopmax = us->us_largest_anon_port - 8055 us->us_smallest_anon_port + 1; 8056 } 8057 8058 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 8059 zoneid = connp->conn_zoneid; 8060 8061 for (;;) { 8062 udp_t *udp1; 8063 boolean_t found_exclbind = B_FALSE; 8064 8065 /* 8066 * Walk through the list of udp streams bound to 8067 * requested port with the same IP address. 8068 */ 8069 lport = htons(port); 8070 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 8071 us->us_bind_fanout_size)]; 8072 mutex_enter(&udpf->uf_lock); 8073 for (udp1 = udpf->uf_udp; udp1 != NULL; 8074 udp1 = udp1->udp_bind_hash) { 8075 if (lport != udp1->udp_port) 8076 continue; 8077 8078 /* 8079 * On a labeled system, we must treat bindings to ports 8080 * on shared IP addresses by sockets with MAC exemption 8081 * privilege as being in all zones, as there's 8082 * otherwise no way to identify the right receiver. 8083 */ 8084 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 8085 IPCL_ZONE_MATCH(connp, 8086 udp1->udp_connp->conn_zoneid)) && 8087 !connp->conn_mac_exempt && \ 8088 !udp1->udp_connp->conn_mac_exempt) 8089 continue; 8090 8091 /* 8092 * If UDP_EXCLBIND is set for either the bound or 8093 * binding endpoint, the semantics of bind 8094 * is changed according to the following chart. 8095 * 8096 * spec = specified address (v4 or v6) 8097 * unspec = unspecified address (v4 or v6) 8098 * A = specified addresses are different for endpoints 8099 * 8100 * bound bind to allowed? 8101 * ------------------------------------- 8102 * unspec unspec no 8103 * unspec spec no 8104 * spec unspec no 8105 * spec spec yes if A 8106 * 8107 * For labeled systems, SO_MAC_EXEMPT behaves the same 8108 * as UDP_EXCLBIND, except that zoneid is ignored. 8109 */ 8110 if (udp1->udp_exclbind || udp->udp_exclbind || 8111 udp1->udp_connp->conn_mac_exempt || 8112 connp->conn_mac_exempt) { 8113 if (V6_OR_V4_INADDR_ANY( 8114 udp1->udp_bound_v6src) || 8115 is_inaddr_any || 8116 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8117 &v6src)) { 8118 found_exclbind = B_TRUE; 8119 break; 8120 } 8121 continue; 8122 } 8123 8124 /* 8125 * Check ipversion to allow IPv4 and IPv6 sockets to 8126 * have disjoint port number spaces. 8127 */ 8128 if (udp->udp_ipversion != udp1->udp_ipversion) { 8129 8130 /* 8131 * On the first time through the loop, if the 8132 * the user intentionally specified a 8133 * particular port number, then ignore any 8134 * bindings of the other protocol that may 8135 * conflict. This allows the user to bind IPv6 8136 * alone and get both v4 and v6, or bind both 8137 * both and get each seperately. On subsequent 8138 * times through the loop, we're checking a 8139 * port that we chose (not the user) and thus 8140 * we do not allow casual duplicate bindings. 8141 */ 8142 if (count == 0 && requested_port != 0) 8143 continue; 8144 } 8145 8146 /* 8147 * No difference depending on SO_REUSEADDR. 8148 * 8149 * If existing port is bound to a 8150 * non-wildcard IP address and 8151 * the requesting stream is bound to 8152 * a distinct different IP addresses 8153 * (non-wildcard, also), keep going. 8154 */ 8155 if (!is_inaddr_any && 8156 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 8157 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 8158 &v6src)) { 8159 continue; 8160 } 8161 break; 8162 } 8163 8164 if (!found_exclbind && 8165 (udp->udp_reuseaddr && requested_port != 0)) { 8166 break; 8167 } 8168 8169 if (udp1 == NULL) { 8170 /* 8171 * No other stream has this IP address 8172 * and port number. We can use it. 8173 */ 8174 break; 8175 } 8176 mutex_exit(&udpf->uf_lock); 8177 if (bind_to_req_port_only) { 8178 /* 8179 * We get here only when requested port 8180 * is bound (and only first of the for() 8181 * loop iteration). 8182 * 8183 * The semantics of this bind request 8184 * require it to fail so we return from 8185 * the routine (and exit the loop). 8186 * 8187 */ 8188 udp->udp_pending_op = -1; 8189 rw_exit(&udp->udp_rwlock); 8190 return (-TADDRBUSY); 8191 } 8192 8193 if (udp->udp_anon_priv_bind) { 8194 port = udp_get_next_priv_port(udp); 8195 } else { 8196 if ((count == 0) && (requested_port != 0)) { 8197 /* 8198 * If the application wants us to find 8199 * a port, get one to start with. Set 8200 * requested_port to 0, so that we will 8201 * update us->us_next_port_to_try below. 8202 */ 8203 port = udp_update_next_port(udp, 8204 us->us_next_port_to_try, B_TRUE); 8205 requested_port = 0; 8206 } else { 8207 port = udp_update_next_port(udp, port + 1, 8208 B_FALSE); 8209 } 8210 } 8211 8212 if (port == 0 || ++count >= loopmax) { 8213 /* 8214 * We've tried every possible port number and 8215 * there are none available, so send an error 8216 * to the user. 8217 */ 8218 udp->udp_pending_op = -1; 8219 rw_exit(&udp->udp_rwlock); 8220 return (-TNOADDR); 8221 } 8222 } 8223 8224 /* 8225 * Copy the source address into our udp structure. This address 8226 * may still be zero; if so, ip will fill in the correct address 8227 * each time an outbound packet is passed to it. 8228 * If we are binding to a broadcast or multicast address then 8229 * udp_post_ip_bind_connect will clear the source address 8230 * when udp_do_bind success. 8231 */ 8232 udp->udp_v6src = udp->udp_bound_v6src = v6src; 8233 udp->udp_port = lport; 8234 /* 8235 * Now reset the the next anonymous port if the application requested 8236 * an anonymous port, or we handed out the next anonymous port. 8237 */ 8238 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 8239 us->us_next_port_to_try = port + 1; 8240 } 8241 8242 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 8243 if (udp->udp_family == AF_INET) { 8244 sin->sin_port = udp->udp_port; 8245 } else { 8246 sin6->sin6_port = udp->udp_port; 8247 /* Rebuild the header template */ 8248 error = udp_build_hdrs(udp); 8249 if (error != 0) { 8250 udp->udp_pending_op = -1; 8251 rw_exit(&udp->udp_rwlock); 8252 mutex_exit(&udpf->uf_lock); 8253 return (error); 8254 } 8255 } 8256 udp->udp_state = TS_IDLE; 8257 udp_bind_hash_insert(udpf, udp); 8258 mutex_exit(&udpf->uf_lock); 8259 rw_exit(&udp->udp_rwlock); 8260 8261 if (cl_inet_bind) { 8262 /* 8263 * Running in cluster mode - register bind information 8264 */ 8265 if (udp->udp_ipversion == IPV4_VERSION) { 8266 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 8267 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8268 (in_port_t)udp->udp_port); 8269 } else { 8270 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 8271 (uint8_t *)&(udp->udp_v6src), 8272 (in_port_t)udp->udp_port); 8273 } 8274 8275 } 8276 8277 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 8278 if (is_system_labeled() && (!connp->conn_anon_port || 8279 connp->conn_anon_mlp)) { 8280 uint16_t mlpport; 8281 cred_t *cr = connp->conn_cred; 8282 zone_t *zone; 8283 8284 zone = crgetzone(cr); 8285 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 8286 mlptSingle; 8287 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 8288 &v6src, us->us_netstack->netstack_ip); 8289 if (addrtype == mlptSingle) { 8290 rw_enter(&udp->udp_rwlock, RW_WRITER); 8291 udp->udp_pending_op = -1; 8292 rw_exit(&udp->udp_rwlock); 8293 connp->conn_anon_port = B_FALSE; 8294 connp->conn_mlp_type = mlptSingle; 8295 return (-TNOADDR); 8296 } 8297 mlpport = connp->conn_anon_port ? PMAPPORT : port; 8298 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 8299 addrtype); 8300 if (mlptype != mlptSingle && 8301 (connp->conn_mlp_type == mlptSingle || 8302 secpolicy_net_bindmlp(cr) != 0)) { 8303 if (udp->udp_debug) { 8304 (void) strlog(UDP_MOD_ID, 0, 1, 8305 SL_ERROR|SL_TRACE, 8306 "udp_bind: no priv for multilevel port %d", 8307 mlpport); 8308 } 8309 rw_enter(&udp->udp_rwlock, RW_WRITER); 8310 udp->udp_pending_op = -1; 8311 rw_exit(&udp->udp_rwlock); 8312 connp->conn_anon_port = B_FALSE; 8313 connp->conn_mlp_type = mlptSingle; 8314 return (-TACCES); 8315 } 8316 8317 /* 8318 * If we're specifically binding a shared IP address and the 8319 * port is MLP on shared addresses, then check to see if this 8320 * zone actually owns the MLP. Reject if not. 8321 */ 8322 if (mlptype == mlptShared && addrtype == mlptShared) { 8323 /* 8324 * No need to handle exclusive-stack zones since 8325 * ALL_ZONES only applies to the shared stack. 8326 */ 8327 zoneid_t mlpzone; 8328 8329 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 8330 htons(mlpport)); 8331 if (connp->conn_zoneid != mlpzone) { 8332 if (udp->udp_debug) { 8333 (void) strlog(UDP_MOD_ID, 0, 1, 8334 SL_ERROR|SL_TRACE, 8335 "udp_bind: attempt to bind port " 8336 "%d on shared addr in zone %d " 8337 "(should be %d)", 8338 mlpport, connp->conn_zoneid, 8339 mlpzone); 8340 } 8341 rw_enter(&udp->udp_rwlock, RW_WRITER); 8342 udp->udp_pending_op = -1; 8343 rw_exit(&udp->udp_rwlock); 8344 connp->conn_anon_port = B_FALSE; 8345 connp->conn_mlp_type = mlptSingle; 8346 return (-TACCES); 8347 } 8348 } 8349 if (connp->conn_anon_port) { 8350 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 8351 port, B_TRUE); 8352 if (error != 0) { 8353 if (udp->udp_debug) { 8354 (void) strlog(UDP_MOD_ID, 0, 1, 8355 SL_ERROR|SL_TRACE, 8356 "udp_bind: cannot establish anon " 8357 "MLP for port %d", port); 8358 } 8359 rw_enter(&udp->udp_rwlock, RW_WRITER); 8360 udp->udp_pending_op = -1; 8361 rw_exit(&udp->udp_rwlock); 8362 connp->conn_anon_port = B_FALSE; 8363 connp->conn_mlp_type = mlptSingle; 8364 return (-TACCES); 8365 } 8366 } 8367 connp->conn_mlp_type = mlptype; 8368 } 8369 8370 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8371 /* 8372 * Append a request for an IRE if udp_v6src not 8373 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 8374 */ 8375 mp = allocb(sizeof (ire_t), BPRI_HI); 8376 if (!mp) { 8377 rw_enter(&udp->udp_rwlock, RW_WRITER); 8378 udp->udp_pending_op = -1; 8379 rw_exit(&udp->udp_rwlock); 8380 return (ENOMEM); 8381 } 8382 mp->b_wptr += sizeof (ire_t); 8383 mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8384 } 8385 if (udp->udp_family == AF_INET6) { 8386 ASSERT(udp->udp_connp->conn_af_isv6); 8387 error = ip_proto_bind_laddr_v6(connp, &mp, IPPROTO_UDP, 8388 &udp->udp_bound_v6src, udp->udp_port, B_TRUE); 8389 } else { 8390 ASSERT(!udp->udp_connp->conn_af_isv6); 8391 error = ip_proto_bind_laddr_v4(connp, &mp, IPPROTO_UDP, 8392 V4_PART_OF_V6(udp->udp_bound_v6src), udp->udp_port, 8393 B_TRUE); 8394 } 8395 8396 (void) udp_post_ip_bind_connect(udp, mp, error); 8397 return (error); 8398 } 8399 8400 int 8401 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, 8402 socklen_t len, cred_t *cr) 8403 { 8404 int error; 8405 conn_t *connp; 8406 8407 connp = (conn_t *)proto_handle; 8408 8409 if (sa == NULL) 8410 error = udp_do_unbind(connp); 8411 else 8412 error = udp_do_bind(connp, sa, len, cr, B_TRUE); 8413 8414 if (error < 0) { 8415 if (error == -TOUTSTATE) 8416 error = EINVAL; 8417 else 8418 error = proto_tlitosyserr(-error); 8419 } 8420 8421 return (error); 8422 } 8423 8424 static int 8425 udp_implicit_bind(conn_t *connp, cred_t *cr) 8426 { 8427 int error; 8428 8429 error = udp_do_bind(connp, NULL, 0, cr, B_FALSE); 8430 return ((error < 0) ? proto_tlitosyserr(-error) : error); 8431 } 8432 8433 /* 8434 * This routine removes a port number association from a stream. It 8435 * is called by udp_unbind and udp_tpi_unbind. 8436 */ 8437 static int 8438 udp_do_unbind(conn_t *connp) 8439 { 8440 udp_t *udp = connp->conn_udp; 8441 udp_fanout_t *udpf; 8442 udp_stack_t *us = udp->udp_us; 8443 8444 if (cl_inet_unbind != NULL) { 8445 /* 8446 * Running in cluster mode - register unbind information 8447 */ 8448 if (udp->udp_ipversion == IPV4_VERSION) { 8449 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 8450 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 8451 (in_port_t)udp->udp_port); 8452 } else { 8453 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 8454 (uint8_t *)&(udp->udp_v6src), 8455 (in_port_t)udp->udp_port); 8456 } 8457 } 8458 8459 rw_enter(&udp->udp_rwlock, RW_WRITER); 8460 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8461 rw_exit(&udp->udp_rwlock); 8462 return (-TOUTSTATE); 8463 } 8464 udp->udp_pending_op = T_UNBIND_REQ; 8465 rw_exit(&udp->udp_rwlock); 8466 8467 /* 8468 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 8469 * and therefore ip_unbind must never return NULL. 8470 */ 8471 ip_unbind(connp); 8472 8473 /* 8474 * Once we're unbound from IP, the pending operation may be cleared 8475 * here. 8476 */ 8477 rw_enter(&udp->udp_rwlock, RW_WRITER); 8478 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8479 us->us_bind_fanout_size)]; 8480 8481 mutex_enter(&udpf->uf_lock); 8482 udp_bind_hash_remove(udp, B_TRUE); 8483 V6_SET_ZERO(udp->udp_v6src); 8484 V6_SET_ZERO(udp->udp_bound_v6src); 8485 udp->udp_port = 0; 8486 mutex_exit(&udpf->uf_lock); 8487 8488 udp->udp_pending_op = -1; 8489 udp->udp_state = TS_UNBND; 8490 if (udp->udp_family == AF_INET6) 8491 (void) udp_build_hdrs(udp); 8492 rw_exit(&udp->udp_rwlock); 8493 8494 return (0); 8495 } 8496 8497 static int 8498 udp_post_ip_bind_connect(udp_t *udp, mblk_t *ire_mp, int error) 8499 { 8500 ire_t *ire; 8501 udp_fanout_t *udpf; 8502 udp_stack_t *us = udp->udp_us; 8503 8504 ASSERT(udp->udp_pending_op != -1); 8505 rw_enter(&udp->udp_rwlock, RW_WRITER); 8506 if (error == 0) { 8507 /* For udp_do_connect() success */ 8508 /* udp_do_bind() success will do nothing in here */ 8509 /* 8510 * If a broadcast/multicast address was bound, set 8511 * the source address to 0. 8512 * This ensures no datagrams with broadcast address 8513 * as source address are emitted (which would violate 8514 * RFC1122 - Hosts requirements) 8515 * 8516 * Note that when connecting the returned IRE is 8517 * for the destination address and we only perform 8518 * the broadcast check for the source address (it 8519 * is OK to connect to a broadcast/multicast address.) 8520 */ 8521 if (ire_mp != NULL && ire_mp->b_datap->db_type == IRE_DB_TYPE) { 8522 ire = (ire_t *)ire_mp->b_rptr; 8523 8524 /* 8525 * Note: we get IRE_BROADCAST for IPv6 to "mark" a 8526 * multicast local address. 8527 */ 8528 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8529 us->us_bind_fanout_size)]; 8530 if (ire->ire_type == IRE_BROADCAST && 8531 udp->udp_state != TS_DATA_XFER) { 8532 ASSERT(udp->udp_pending_op == T_BIND_REQ || 8533 udp->udp_pending_op == O_T_BIND_REQ); 8534 /* 8535 * This was just a local bind to a broadcast 8536 * addr. 8537 */ 8538 mutex_enter(&udpf->uf_lock); 8539 V6_SET_ZERO(udp->udp_v6src); 8540 mutex_exit(&udpf->uf_lock); 8541 if (udp->udp_family == AF_INET6) 8542 (void) udp_build_hdrs(udp); 8543 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 8544 if (udp->udp_family == AF_INET6) 8545 (void) udp_build_hdrs(udp); 8546 } 8547 } 8548 } else { 8549 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8550 us->us_bind_fanout_size)]; 8551 mutex_enter(&udpf->uf_lock); 8552 8553 if (udp->udp_state == TS_DATA_XFER) { 8554 /* Connect failed */ 8555 /* Revert back to the bound source */ 8556 udp->udp_v6src = udp->udp_bound_v6src; 8557 udp->udp_state = TS_IDLE; 8558 } else { 8559 /* For udp_do_bind() failed */ 8560 V6_SET_ZERO(udp->udp_v6src); 8561 V6_SET_ZERO(udp->udp_bound_v6src); 8562 udp->udp_state = TS_UNBND; 8563 udp_bind_hash_remove(udp, B_TRUE); 8564 udp->udp_port = 0; 8565 } 8566 mutex_exit(&udpf->uf_lock); 8567 if (udp->udp_family == AF_INET6) 8568 (void) udp_build_hdrs(udp); 8569 } 8570 udp->udp_pending_op = -1; 8571 rw_exit(&udp->udp_rwlock); 8572 if (ire_mp != NULL) 8573 freeb(ire_mp); 8574 return (error); 8575 } 8576 8577 /* 8578 * It associates a default destination address with the stream. 8579 */ 8580 static int 8581 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len) 8582 { 8583 sin6_t *sin6; 8584 sin_t *sin; 8585 in6_addr_t v6dst; 8586 ipaddr_t v4dst; 8587 uint16_t dstport; 8588 uint32_t flowinfo; 8589 mblk_t *ire_mp; 8590 udp_fanout_t *udpf; 8591 udp_t *udp, *udp1; 8592 ushort_t ipversion; 8593 udp_stack_t *us; 8594 int error; 8595 8596 udp = connp->conn_udp; 8597 us = udp->udp_us; 8598 8599 /* 8600 * Address has been verified by the caller 8601 */ 8602 switch (len) { 8603 default: 8604 /* 8605 * Should never happen 8606 */ 8607 return (EINVAL); 8608 8609 case sizeof (sin_t): 8610 sin = (sin_t *)sa; 8611 v4dst = sin->sin_addr.s_addr; 8612 dstport = sin->sin_port; 8613 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8614 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8615 ipversion = IPV4_VERSION; 8616 break; 8617 8618 case sizeof (sin6_t): 8619 sin6 = (sin6_t *)sa; 8620 v6dst = sin6->sin6_addr; 8621 dstport = sin6->sin6_port; 8622 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 8623 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 8624 ipversion = IPV4_VERSION; 8625 flowinfo = 0; 8626 } else { 8627 ipversion = IPV6_VERSION; 8628 flowinfo = sin6->sin6_flowinfo; 8629 } 8630 break; 8631 } 8632 8633 if (dstport == 0) 8634 return (-TBADADDR); 8635 8636 rw_enter(&udp->udp_rwlock, RW_WRITER); 8637 8638 /* 8639 * This UDP must have bound to a port already before doing a connect. 8640 * TPI mandates that users must send TPI primitives only 1 at a time 8641 * and wait for the response before sending the next primitive. 8642 */ 8643 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 8644 rw_exit(&udp->udp_rwlock); 8645 (void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE, 8646 "udp_connect: bad state, %u", udp->udp_state); 8647 return (-TOUTSTATE); 8648 } 8649 udp->udp_pending_op = T_CONN_REQ; 8650 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 8651 8652 if (ipversion == IPV4_VERSION) { 8653 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 8654 udp->udp_ip_snd_options_len; 8655 } else { 8656 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 8657 } 8658 8659 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 8660 us->us_bind_fanout_size)]; 8661 8662 mutex_enter(&udpf->uf_lock); 8663 if (udp->udp_state == TS_DATA_XFER) { 8664 /* Already connected - clear out state */ 8665 udp->udp_v6src = udp->udp_bound_v6src; 8666 udp->udp_state = TS_IDLE; 8667 } 8668 8669 /* 8670 * Create a default IP header with no IP options. 8671 */ 8672 udp->udp_dstport = dstport; 8673 udp->udp_ipversion = ipversion; 8674 if (ipversion == IPV4_VERSION) { 8675 /* 8676 * Interpret a zero destination to mean loopback. 8677 * Update the T_CONN_REQ (sin/sin6) since it is used to 8678 * generate the T_CONN_CON. 8679 */ 8680 if (v4dst == INADDR_ANY) { 8681 v4dst = htonl(INADDR_LOOPBACK); 8682 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 8683 if (udp->udp_family == AF_INET) { 8684 sin->sin_addr.s_addr = v4dst; 8685 } else { 8686 sin6->sin6_addr = v6dst; 8687 } 8688 } 8689 udp->udp_v6dst = v6dst; 8690 udp->udp_flowinfo = 0; 8691 8692 /* 8693 * If the destination address is multicast and 8694 * an outgoing multicast interface has been set, 8695 * use the address of that interface as our 8696 * source address if no source address has been set. 8697 */ 8698 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 8699 CLASSD(v4dst) && 8700 udp->udp_multicast_if_addr != INADDR_ANY) { 8701 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 8702 &udp->udp_v6src); 8703 } 8704 } else { 8705 ASSERT(udp->udp_ipversion == IPV6_VERSION); 8706 /* 8707 * Interpret a zero destination to mean loopback. 8708 * Update the T_CONN_REQ (sin/sin6) since it is used to 8709 * generate the T_CONN_CON. 8710 */ 8711 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 8712 v6dst = ipv6_loopback; 8713 sin6->sin6_addr = v6dst; 8714 } 8715 udp->udp_v6dst = v6dst; 8716 udp->udp_flowinfo = flowinfo; 8717 /* 8718 * If the destination address is multicast and 8719 * an outgoing multicast interface has been set, 8720 * then the ip bind logic will pick the correct source 8721 * address (i.e. matching the outgoing multicast interface). 8722 */ 8723 } 8724 8725 /* 8726 * Verify that the src/port/dst/port is unique for all 8727 * connections in TS_DATA_XFER 8728 */ 8729 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 8730 if (udp1->udp_state != TS_DATA_XFER) 8731 continue; 8732 if (udp->udp_port != udp1->udp_port || 8733 udp->udp_ipversion != udp1->udp_ipversion || 8734 dstport != udp1->udp_dstport || 8735 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 8736 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 8737 !(IPCL_ZONE_MATCH(udp->udp_connp, 8738 udp1->udp_connp->conn_zoneid) || 8739 IPCL_ZONE_MATCH(udp1->udp_connp, 8740 udp->udp_connp->conn_zoneid))) 8741 continue; 8742 mutex_exit(&udpf->uf_lock); 8743 udp->udp_pending_op = -1; 8744 rw_exit(&udp->udp_rwlock); 8745 return (-TBADADDR); 8746 } 8747 udp->udp_state = TS_DATA_XFER; 8748 mutex_exit(&udpf->uf_lock); 8749 8750 ire_mp = allocb(sizeof (ire_t), BPRI_HI); 8751 if (ire_mp == NULL) { 8752 mutex_enter(&udpf->uf_lock); 8753 udp->udp_state = TS_IDLE; 8754 udp->udp_pending_op = -1; 8755 mutex_exit(&udpf->uf_lock); 8756 rw_exit(&udp->udp_rwlock); 8757 return (ENOMEM); 8758 } 8759 8760 rw_exit(&udp->udp_rwlock); 8761 8762 ire_mp->b_wptr += sizeof (ire_t); 8763 ire_mp->b_datap->db_type = IRE_DB_REQ_TYPE; 8764 8765 if (udp->udp_family == AF_INET) { 8766 error = ip_proto_bind_connected_v4(connp, &ire_mp, IPPROTO_UDP, 8767 &V4_PART_OF_V6(udp->udp_v6src), udp->udp_port, 8768 V4_PART_OF_V6(udp->udp_v6dst), udp->udp_dstport, 8769 B_TRUE, B_TRUE); 8770 } else { 8771 error = ip_proto_bind_connected_v6(connp, &ire_mp, IPPROTO_UDP, 8772 &udp->udp_v6src, udp->udp_port, &udp->udp_v6dst, 8773 &udp->udp_sticky_ipp, udp->udp_dstport, B_TRUE, B_TRUE); 8774 } 8775 8776 return (udp_post_ip_bind_connect(udp, ire_mp, error)); 8777 } 8778 8779 /* ARGSUSED */ 8780 static int 8781 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa, 8782 socklen_t len, sock_connid_t *id, cred_t *cr) 8783 { 8784 conn_t *connp = (conn_t *)proto_handle; 8785 udp_t *udp = connp->conn_udp; 8786 int error; 8787 boolean_t did_bind = B_FALSE; 8788 8789 if (sa == NULL) { 8790 /* 8791 * Disconnect 8792 * Make sure we are connected 8793 */ 8794 if (udp->udp_state != TS_DATA_XFER) 8795 return (EINVAL); 8796 8797 error = udp_disconnect(connp); 8798 return (error); 8799 } 8800 8801 error = proto_verify_ip_addr(udp->udp_family, sa, len); 8802 if (error != 0) 8803 goto done; 8804 8805 /* do an implicit bind if necessary */ 8806 if (udp->udp_state == TS_UNBND) { 8807 error = udp_implicit_bind(connp, cr); 8808 /* 8809 * We could be racing with an actual bind, in which case 8810 * we would see EPROTO. We cross our fingers and try 8811 * to connect. 8812 */ 8813 if (!(error == 0 || error == EPROTO)) 8814 goto done; 8815 did_bind = B_TRUE; 8816 } 8817 /* 8818 * set SO_DGRAM_ERRIND 8819 */ 8820 udp->udp_dgram_errind = B_TRUE; 8821 8822 error = udp_do_connect(connp, sa, len); 8823 8824 if (error != 0 && did_bind) { 8825 int unbind_err; 8826 8827 unbind_err = udp_do_unbind(connp); 8828 ASSERT(unbind_err == 0); 8829 } 8830 8831 if (error == 0) { 8832 *id = 0; 8833 (*connp->conn_upcalls->su_connected) 8834 (connp->conn_upper_handle, 0, NULL, -1); 8835 } else if (error < 0) { 8836 error = proto_tlitosyserr(-error); 8837 } 8838 8839 done: 8840 if (error != 0 && udp->udp_state == TS_DATA_XFER) { 8841 /* 8842 * No need to hold locks to set state 8843 * after connect failure socket state is undefined 8844 * We set the state only to imitate old sockfs behavior 8845 */ 8846 udp->udp_state = TS_IDLE; 8847 } 8848 return (error); 8849 } 8850 8851 /* ARGSUSED */ 8852 int 8853 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg, 8854 cred_t *cr) 8855 { 8856 conn_t *connp = (conn_t *)proto_handle; 8857 udp_t *udp = connp->conn_udp; 8858 udp_stack_t *us = udp->udp_us; 8859 int error = 0; 8860 8861 ASSERT(DB_TYPE(mp) == M_DATA); 8862 8863 /* 8864 * If the socket is connected and no change in destination 8865 */ 8866 if (msg->msg_namelen == 0) { 8867 error = udp_send_connected(connp, mp, msg, cr, curproc->p_pid); 8868 if (error == EDESTADDRREQ) 8869 return (error); 8870 else 8871 return (udp->udp_dgram_errind ? error : 0); 8872 } 8873 8874 /* 8875 * Do an implicit bind if necessary. 8876 */ 8877 if (udp->udp_state == TS_UNBND) { 8878 error = udp_implicit_bind(connp, cr); 8879 /* 8880 * We could be racing with an actual bind, in which case 8881 * we would see EPROTO. We cross our fingers and try 8882 * to send. 8883 */ 8884 if (!(error == 0 || error == EPROTO)) { 8885 freemsg(mp); 8886 return (error); 8887 } 8888 } 8889 8890 rw_enter(&udp->udp_rwlock, RW_WRITER); 8891 8892 if (msg->msg_name != NULL && udp->udp_state == TS_DATA_XFER) { 8893 rw_exit(&udp->udp_rwlock); 8894 freemsg(mp); 8895 return (EISCONN); 8896 } 8897 8898 8899 if (udp->udp_delayed_error != 0) { 8900 boolean_t match; 8901 8902 error = udp->udp_delayed_error; 8903 match = B_FALSE; 8904 udp->udp_delayed_error = 0; 8905 switch (udp->udp_family) { 8906 case AF_INET: { 8907 /* Compare just IP address and port */ 8908 sin_t *sin1 = (sin_t *)msg->msg_name; 8909 sin_t *sin2 = (sin_t *)&udp->udp_delayed_addr; 8910 8911 if (msg->msg_namelen == sizeof (sin_t) && 8912 sin1->sin_port == sin2->sin_port && 8913 sin1->sin_addr.s_addr == sin2->sin_addr.s_addr) 8914 match = B_TRUE; 8915 8916 break; 8917 } 8918 case AF_INET6: { 8919 sin6_t *sin1 = (sin6_t *)msg->msg_name; 8920 sin6_t *sin2 = (sin6_t *)&udp->udp_delayed_addr; 8921 8922 if (msg->msg_namelen == sizeof (sin6_t) && 8923 sin1->sin6_port == sin2->sin6_port && 8924 IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr, 8925 &sin2->sin6_addr)) 8926 match = B_TRUE; 8927 break; 8928 } 8929 default: 8930 ASSERT(0); 8931 } 8932 8933 *((sin6_t *)&udp->udp_delayed_addr) = sin6_null; 8934 8935 if (match) { 8936 rw_exit(&udp->udp_rwlock); 8937 freemsg(mp); 8938 return (error); 8939 } 8940 } 8941 8942 error = proto_verify_ip_addr(udp->udp_family, 8943 (struct sockaddr *)msg->msg_name, msg->msg_namelen); 8944 rw_exit(&udp->udp_rwlock); 8945 8946 if (error != 0) { 8947 freemsg(mp); 8948 return (error); 8949 } 8950 8951 error = udp_send_not_connected(connp, mp, 8952 (struct sockaddr *)msg->msg_name, msg->msg_namelen, msg, cr, 8953 curproc->p_pid); 8954 if (error != 0) { 8955 UDP_STAT(us, udp_out_err_output); 8956 freemsg(mp); 8957 } 8958 return (udp->udp_dgram_errind ? error : 0); 8959 } 8960 8961 void 8962 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q, 8963 boolean_t direct_sockfs, so_proto_quiesced_cb_t quiesced_cb) 8964 { 8965 conn_t *connp = (conn_t *)proto_handle; 8966 udp_t *udp; 8967 struct T_capability_ack tca; 8968 struct sockaddr_in6 laddr, faddr; 8969 socklen_t laddrlen, faddrlen; 8970 short opts; 8971 struct stroptions *stropt; 8972 mblk_t *stropt_mp; 8973 int error; 8974 8975 udp = connp->conn_udp; 8976 8977 stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL); 8978 8979 /* 8980 * setup the fallback stream that was allocated 8981 */ 8982 connp->conn_dev = (dev_t)RD(q)->q_ptr; 8983 connp->conn_minor_arena = WR(q)->q_ptr; 8984 8985 RD(q)->q_ptr = WR(q)->q_ptr = connp; 8986 8987 WR(q)->q_qinfo = &udp_winit; 8988 8989 connp->conn_rq = RD(q); 8990 connp->conn_wq = WR(q); 8991 8992 /* Notify stream head about options before sending up data */ 8993 stropt_mp->b_datap->db_type = M_SETOPTS; 8994 stropt_mp->b_wptr += sizeof (*stropt); 8995 stropt = (struct stroptions *)stropt_mp->b_rptr; 8996 stropt->so_flags = SO_WROFF | SO_HIWAT; 8997 stropt->so_wroff = 8998 (ushort_t)(udp->udp_max_hdr_len + udp->udp_us->us_wroff_extra); 8999 stropt->so_hiwat = udp->udp_rcv_disply_hiwat; 9000 putnext(RD(q), stropt_mp); 9001 9002 /* 9003 * Free the helper stream 9004 */ 9005 ip_close_helper_stream(connp); 9006 9007 if (!direct_sockfs) 9008 udp_disable_direct_sockfs(udp); 9009 9010 /* 9011 * Collect the information needed to sync with the sonode 9012 */ 9013 udp_do_capability_ack(udp, &tca, TC1_INFO); 9014 9015 laddrlen = faddrlen = sizeof (sin6_t); 9016 (void) udp_getsockname((sock_lower_handle_t)connp, 9017 (struct sockaddr *)&laddr, &laddrlen, NULL); 9018 error = udp_getpeername((sock_lower_handle_t)connp, 9019 (struct sockaddr *)&faddr, &faddrlen, NULL); 9020 if (error != 0) 9021 faddrlen = 0; 9022 9023 opts = 0; 9024 if (udp->udp_dgram_errind) 9025 opts |= SO_DGRAM_ERRIND; 9026 if (udp->udp_dontroute) 9027 opts |= SO_DONTROUTE; 9028 9029 /* 9030 * Once we grab the drain lock, no data will be send up 9031 * to the socket. So we notify the socket that the endpoint 9032 * is quiescent and it's therefore safe move data from 9033 * the socket to the stream head. 9034 */ 9035 (*quiesced_cb)(connp->conn_upper_handle, q, &tca, 9036 (struct sockaddr *)&laddr, laddrlen, 9037 (struct sockaddr *)&faddr, faddrlen, opts); 9038 9039 /* 9040 * push up any packets that were queued in udp_t 9041 */ 9042 9043 mutex_enter(&udp->udp_recv_lock); 9044 while (udp->udp_fallback_queue_head != NULL) { 9045 mblk_t *mp; 9046 mp = udp->udp_fallback_queue_head; 9047 udp->udp_fallback_queue_head = mp->b_next; 9048 mutex_exit(&udp->udp_recv_lock); 9049 mp->b_next = NULL; 9050 putnext(RD(q), mp); 9051 mutex_enter(&udp->udp_recv_lock); 9052 } 9053 udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head; 9054 /* 9055 * No longer a streams less socket 9056 */ 9057 connp->conn_flags &= ~IPCL_NONSTR; 9058 mutex_exit(&udp->udp_recv_lock); 9059 9060 ASSERT(connp->conn_ref >= 1); 9061 } 9062 9063 static int 9064 udp_do_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9065 { 9066 sin_t *sin = (sin_t *)sa; 9067 sin6_t *sin6 = (sin6_t *)sa; 9068 9069 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9070 ASSERT(udp != NULL); 9071 9072 if (udp->udp_state != TS_DATA_XFER) 9073 return (ENOTCONN); 9074 9075 switch (udp->udp_family) { 9076 case AF_INET: 9077 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9078 9079 if (*salenp < sizeof (sin_t)) 9080 return (EINVAL); 9081 9082 *salenp = sizeof (sin_t); 9083 *sin = sin_null; 9084 sin->sin_family = AF_INET; 9085 sin->sin_port = udp->udp_dstport; 9086 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 9087 break; 9088 case AF_INET6: 9089 if (*salenp < sizeof (sin6_t)) 9090 return (EINVAL); 9091 9092 *salenp = sizeof (sin6_t); 9093 *sin6 = sin6_null; 9094 sin6->sin6_family = AF_INET6; 9095 sin6->sin6_port = udp->udp_dstport; 9096 sin6->sin6_addr = udp->udp_v6dst; 9097 sin6->sin6_flowinfo = udp->udp_flowinfo; 9098 break; 9099 } 9100 9101 return (0); 9102 } 9103 9104 /* ARGSUSED */ 9105 int 9106 udp_getpeername(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9107 socklen_t *salenp, cred_t *cr) 9108 { 9109 conn_t *connp = (conn_t *)proto_handle; 9110 udp_t *udp = connp->conn_udp; 9111 int error; 9112 9113 ASSERT(udp != NULL); 9114 9115 rw_enter(&udp->udp_rwlock, RW_READER); 9116 9117 error = udp_do_getpeername(udp, sa, salenp); 9118 9119 rw_exit(&udp->udp_rwlock); 9120 9121 return (error); 9122 } 9123 9124 static int 9125 udp_do_getsockname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 9126 { 9127 sin_t *sin = (sin_t *)sa; 9128 sin6_t *sin6 = (sin6_t *)sa; 9129 9130 ASSERT(udp != NULL); 9131 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 9132 9133 switch (udp->udp_family) { 9134 case AF_INET: 9135 ASSERT(udp->udp_ipversion == IPV4_VERSION); 9136 9137 if (*salenp < sizeof (sin_t)) 9138 return (EINVAL); 9139 9140 *salenp = sizeof (sin_t); 9141 *sin = sin_null; 9142 sin->sin_family = AF_INET; 9143 if (udp->udp_state == TS_UNBND) { 9144 break; 9145 } 9146 sin->sin_port = udp->udp_port; 9147 9148 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 9149 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9150 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 9151 } else { 9152 /* 9153 * INADDR_ANY 9154 * udp_v6src is not set, we might be bound to 9155 * broadcast/multicast. Use udp_bound_v6src as 9156 * local address instead (that could 9157 * also still be INADDR_ANY) 9158 */ 9159 sin->sin_addr.s_addr = 9160 V4_PART_OF_V6(udp->udp_bound_v6src); 9161 } 9162 break; 9163 9164 case AF_INET6: 9165 if (*salenp < sizeof (sin6_t)) 9166 return (EINVAL); 9167 9168 *salenp = sizeof (sin6_t); 9169 *sin6 = sin6_null; 9170 sin6->sin6_family = AF_INET6; 9171 if (udp->udp_state == TS_UNBND) { 9172 break; 9173 } 9174 sin6->sin6_port = udp->udp_port; 9175 9176 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 9177 sin6->sin6_addr = udp->udp_v6src; 9178 } else { 9179 /* 9180 * UNSPECIFIED 9181 * udp_v6src is not set, we might be bound to 9182 * broadcast/multicast. Use udp_bound_v6src as 9183 * local address instead (that could 9184 * also still be UNSPECIFIED) 9185 */ 9186 sin6->sin6_addr = udp->udp_bound_v6src; 9187 } 9188 } 9189 return (0); 9190 } 9191 9192 /* ARGSUSED */ 9193 int 9194 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa, 9195 socklen_t *salenp, cred_t *cr) 9196 { 9197 conn_t *connp = (conn_t *)proto_handle; 9198 udp_t *udp = connp->conn_udp; 9199 int error; 9200 9201 ASSERT(udp != NULL); 9202 rw_enter(&udp->udp_rwlock, RW_READER); 9203 9204 error = udp_do_getsockname(udp, sa, salenp); 9205 9206 rw_exit(&udp->udp_rwlock); 9207 9208 return (error); 9209 } 9210 9211 int 9212 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9213 void *optvalp, socklen_t *optlen, cred_t *cr) 9214 { 9215 conn_t *connp = (conn_t *)proto_handle; 9216 udp_t *udp = connp->conn_udp; 9217 int error; 9218 t_uscalar_t max_optbuf_len; 9219 void *optvalp_buf; 9220 int len; 9221 9222 error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len, 9223 udp_opt_obj.odb_opt_des_arr, 9224 udp_opt_obj.odb_opt_arr_cnt, 9225 udp_opt_obj.odb_topmost_tpiprovider, 9226 B_FALSE, B_TRUE, cr); 9227 if (error != 0) { 9228 if (error < 0) 9229 error = proto_tlitosyserr(-error); 9230 return (error); 9231 } 9232 9233 optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP); 9234 rw_enter(&udp->udp_rwlock, RW_READER); 9235 len = udp_opt_get(connp, level, option_name, optvalp_buf); 9236 rw_exit(&udp->udp_rwlock); 9237 9238 if (len < 0) { 9239 /* 9240 * Pass on to IP 9241 */ 9242 kmem_free(optvalp_buf, max_optbuf_len); 9243 return (ip_get_options(connp, level, option_name, 9244 optvalp, optlen, cr)); 9245 } else { 9246 /* 9247 * update optlen and copy option value 9248 */ 9249 t_uscalar_t size = MIN(len, *optlen); 9250 bcopy(optvalp_buf, optvalp, size); 9251 bcopy(&size, optlen, sizeof (size)); 9252 9253 kmem_free(optvalp_buf, max_optbuf_len); 9254 return (0); 9255 } 9256 } 9257 9258 int 9259 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name, 9260 const void *optvalp, socklen_t optlen, cred_t *cr) 9261 { 9262 conn_t *connp = (conn_t *)proto_handle; 9263 udp_t *udp = connp->conn_udp; 9264 int error; 9265 9266 error = proto_opt_check(level, option_name, optlen, NULL, 9267 udp_opt_obj.odb_opt_des_arr, 9268 udp_opt_obj.odb_opt_arr_cnt, 9269 udp_opt_obj.odb_topmost_tpiprovider, 9270 B_TRUE, B_FALSE, cr); 9271 9272 if (error != 0) { 9273 if (error < 0) 9274 error = proto_tlitosyserr(-error); 9275 return (error); 9276 } 9277 9278 rw_enter(&udp->udp_rwlock, RW_WRITER); 9279 error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name, 9280 optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp, 9281 NULL, cr); 9282 rw_exit(&udp->udp_rwlock); 9283 9284 if (error < 0) { 9285 /* 9286 * Pass on to ip 9287 */ 9288 error = ip_set_options(connp, level, option_name, optvalp, 9289 optlen, cr); 9290 } 9291 9292 return (error); 9293 } 9294 9295 void 9296 udp_clr_flowctrl(sock_lower_handle_t proto_handle) 9297 { 9298 conn_t *connp = (conn_t *)proto_handle; 9299 udp_t *udp = connp->conn_udp; 9300 9301 mutex_enter(&udp->udp_recv_lock); 9302 connp->conn_flow_cntrld = B_FALSE; 9303 mutex_exit(&udp->udp_recv_lock); 9304 } 9305 9306 /* ARGSUSED */ 9307 int 9308 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr) 9309 { 9310 conn_t *connp = (conn_t *)proto_handle; 9311 9312 /* shut down the send side */ 9313 if (how != SHUT_RD) 9314 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9315 SOCK_OPCTL_SHUT_SEND, 0); 9316 /* shut down the recv side */ 9317 if (how != SHUT_WR) 9318 (*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle, 9319 SOCK_OPCTL_SHUT_RECV, 0); 9320 return (0); 9321 } 9322 9323 int 9324 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg, 9325 int mode, int32_t *rvalp, cred_t *cr) 9326 { 9327 conn_t *connp = (conn_t *)proto_handle; 9328 int error; 9329 9330 switch (cmd) { 9331 case ND_SET: 9332 case ND_GET: 9333 case _SIOCSOCKFALLBACK: 9334 case TI_GETPEERNAME: 9335 case TI_GETMYNAME: 9336 ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket", 9337 cmd)); 9338 error = EINVAL; 9339 break; 9340 default: 9341 /* 9342 * Pass on to IP using helper stream 9343 */ 9344 error = ldi_ioctl( 9345 connp->conn_helper_info->ip_helper_stream_handle, 9346 cmd, arg, mode, cr, rvalp); 9347 break; 9348 } 9349 return (error); 9350 } 9351 9352 /* ARGSUSED */ 9353 int 9354 udp_accept(sock_lower_handle_t lproto_handle, 9355 sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle, 9356 cred_t *cr) 9357 { 9358 return (EOPNOTSUPP); 9359 } 9360 9361 /* ARGSUSED */ 9362 int 9363 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr) 9364 { 9365 return (EOPNOTSUPP); 9366 } 9367 9368 sock_downcalls_t sock_udp_downcalls = { 9369 udp_activate, /* sd_activate */ 9370 udp_accept, /* sd_accept */ 9371 udp_bind, /* sd_bind */ 9372 udp_listen, /* sd_listen */ 9373 udp_connect, /* sd_connect */ 9374 udp_getpeername, /* sd_getpeername */ 9375 udp_getsockname, /* sd_getsockname */ 9376 udp_getsockopt, /* sd_getsockopt */ 9377 udp_setsockopt, /* sd_setsockopt */ 9378 udp_send, /* sd_send */ 9379 NULL, /* sd_send_uio */ 9380 NULL, /* sd_recv_uio */ 9381 NULL, /* sd_poll */ 9382 udp_shutdown, /* sd_shutdown */ 9383 udp_clr_flowctrl, /* sd_setflowctrl */ 9384 udp_ioctl, /* sd_ioctl */ 9385 udp_close /* sd_close */ 9386 }; 9387