1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 102 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 103 * We also use conn_lock when updating things that affect the IP classifier 104 * lookup. 105 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 106 * 107 * The fanout lock uf_lock: 108 * When a UDP endpoint is bound to a local port, it is inserted into 109 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 110 * The size of the array is controlled by the udp_bind_fanout_size variable. 111 * This variable can be changed in /etc/system if the default value is 112 * not large enough. Each bind hash bucket is protected by a per bucket 113 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 114 * structure and a few other fields in the udp_t. A UDP endpoint is removed 115 * from the bind hash list only when it is being unbound or being closed. 116 * The per bucket lock also protects a UDP endpoint's state changes. 117 * 118 * The udp_rwlock: 119 * This protects most of the other fields in the udp_t. The exact list of 120 * fields which are protected by each of the above locks is documented in 121 * the udp_t structure definition. 122 * 123 * Plumbing notes: 124 * UDP is always a device driver. For compatibility with mibopen() code 125 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 126 * dummy module. 127 * 128 * The above implies that we don't support any intermediate module to 129 * reside in between /dev/ip and udp -- in fact, we never supported such 130 * scenario in the past as the inter-layer communication semantics have 131 * always been private. 132 */ 133 134 /* For /etc/system control */ 135 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 136 137 #define NDD_TOO_QUICK_MSG \ 138 "ndd get info rate too high for non-privileged users, try again " \ 139 "later.\n" 140 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 141 142 /* Option processing attrs */ 143 typedef struct udpattrs_s { 144 union { 145 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 146 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 147 } udpattr_ippu; 148 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 149 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 150 mblk_t *udpattr_mb; 151 boolean_t udpattr_credset; 152 } udpattrs_t; 153 154 static void udp_addr_req(queue_t *q, mblk_t *mp); 155 static void udp_bind(queue_t *q, mblk_t *mp); 156 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 157 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 158 static void udp_bind_result(conn_t *, mblk_t *); 159 static void udp_bind_ack(conn_t *, mblk_t *mp); 160 static void udp_bind_error(conn_t *, mblk_t *mp); 161 static int udp_build_hdrs(udp_t *udp); 162 static void udp_capability_req(queue_t *q, mblk_t *mp); 163 static int udp_close(queue_t *q); 164 static void udp_connect(queue_t *q, mblk_t *mp); 165 static void udp_disconnect(queue_t *q, mblk_t *mp); 166 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 167 int sys_error); 168 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 169 t_scalar_t tlierr, int unixerr); 170 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 171 cred_t *cr); 172 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 173 char *value, caddr_t cp, cred_t *cr); 174 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 175 char *value, caddr_t cp, cred_t *cr); 176 static void udp_icmp_error(queue_t *q, mblk_t *mp); 177 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 178 static void udp_info_req(queue_t *q, mblk_t *mp); 179 static void udp_input(void *, mblk_t *, void *); 180 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 181 t_scalar_t addr_length); 182 static void udp_lrput(queue_t *, mblk_t *); 183 static void udp_lwput(queue_t *, mblk_t *); 184 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp, boolean_t isv6); 186 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 187 cred_t *credp); 188 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 189 cred_t *credp); 190 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 191 int *errorp, udpattrs_t *udpattrs); 192 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 193 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 194 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 195 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 196 cred_t *cr); 197 static void udp_report_item(mblk_t *mp, udp_t *udp); 198 static int udp_rinfop(queue_t *q, infod_t *dp); 199 static int udp_rrw(queue_t *q, struiod_t *dp); 200 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 201 cred_t *cr); 202 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 203 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 204 t_scalar_t destlen, t_scalar_t err); 205 static void udp_unbind(queue_t *q, mblk_t *mp); 206 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 207 boolean_t random); 208 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 209 int *, boolean_t); 210 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 211 int *error); 212 static void udp_wput_other(queue_t *q, mblk_t *mp); 213 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 214 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 215 216 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 217 static void udp_stack_fini(netstackid_t stackid, void *arg); 218 219 static void *udp_kstat_init(netstackid_t stackid); 220 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 221 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 222 static void udp_kstat2_fini(netstackid_t, kstat_t *); 223 static int udp_kstat_update(kstat_t *kp, int rw); 224 225 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 226 uint_t pkt_len); 227 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 228 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 229 230 #define UDP_RECV_HIWATER (56 * 1024) 231 #define UDP_RECV_LOWATER 128 232 #define UDP_XMIT_HIWATER (56 * 1024) 233 #define UDP_XMIT_LOWATER 1024 234 235 static struct module_info udp_mod_info = { 236 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 237 }; 238 239 /* 240 * Entry points for UDP as a device. 241 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 242 */ 243 static struct qinit udp_rinitv4 = { 244 NULL, NULL, udp_openv4, udp_close, NULL, 245 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 246 }; 247 248 static struct qinit udp_rinitv6 = { 249 NULL, NULL, udp_openv6, udp_close, NULL, 250 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 251 }; 252 253 static struct qinit udp_winit = { 254 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 255 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 256 }; 257 258 /* 259 * UDP needs to handle I_LINK and I_PLINK since ifconfig 260 * likes to use it as a place to hang the various streams. 261 */ 262 static struct qinit udp_lrinit = { 263 (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, 264 &udp_mod_info 265 }; 266 267 static struct qinit udp_lwinit = { 268 (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, 269 &udp_mod_info 270 }; 271 272 /* For AF_INET aka /dev/udp */ 273 struct streamtab udpinfov4 = { 274 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 275 }; 276 277 /* For AF_INET6 aka /dev/udp6 */ 278 struct streamtab udpinfov6 = { 279 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 280 }; 281 282 static sin_t sin_null; /* Zero address for quick clears */ 283 static sin6_t sin6_null; /* Zero address for quick clears */ 284 285 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 286 287 /* Default structure copied into T_INFO_ACK messages */ 288 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 289 T_INFO_ACK, 290 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 291 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 292 T_INVALID, /* CDATA_size. udp does not support connect data. */ 293 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 294 sizeof (sin_t), /* ADDR_size. */ 295 0, /* OPT_size - not initialized here */ 296 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 297 T_CLTS, /* SERV_type. udp supports connection-less. */ 298 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 299 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 300 }; 301 302 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 303 304 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 305 T_INFO_ACK, 306 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 307 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 308 T_INVALID, /* CDATA_size. udp does not support connect data. */ 309 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 310 sizeof (sin6_t), /* ADDR_size. */ 311 0, /* OPT_size - not initialized here */ 312 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 313 T_CLTS, /* SERV_type. udp supports connection-less. */ 314 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 315 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 316 }; 317 318 /* largest UDP port number */ 319 #define UDP_MAX_PORT 65535 320 321 /* 322 * Table of ND variables supported by udp. These are loaded into us_nd 323 * in udp_open. 324 * All of these are alterable, within the min/max values given, at run time. 325 */ 326 /* BEGIN CSTYLED */ 327 udpparam_t udp_param_arr[] = { 328 /*min max value name */ 329 { 0L, 256, 32, "udp_wroff_extra" }, 330 { 1L, 255, 255, "udp_ipv4_ttl" }, 331 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 332 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 333 { 0, 1, 1, "udp_do_checksum" }, 334 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 335 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 336 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 337 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 338 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 339 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 340 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 341 }; 342 /* END CSTYLED */ 343 344 /* Setable in /etc/system */ 345 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 346 uint32_t udp_random_anon_port = 1; 347 348 /* 349 * Hook functions to enable cluster networking. 350 * On non-clustered systems these vectors must always be NULL 351 */ 352 353 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 354 uint8_t *laddrp, in_port_t lport) = NULL; 355 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 356 uint8_t *laddrp, in_port_t lport) = NULL; 357 358 typedef union T_primitives *t_primp_t; 359 360 /* 361 * Return the next anonymous port in the privileged port range for 362 * bind checking. 363 * 364 * Trusted Extension (TX) notes: TX allows administrator to mark or 365 * reserve ports as Multilevel ports (MLP). MLP has special function 366 * on TX systems. Once a port is made MLP, it's not available as 367 * ordinary port. This creates "holes" in the port name space. It 368 * may be necessary to skip the "holes" find a suitable anon port. 369 */ 370 static in_port_t 371 udp_get_next_priv_port(udp_t *udp) 372 { 373 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 374 in_port_t nextport; 375 boolean_t restart = B_FALSE; 376 udp_stack_t *us = udp->udp_us; 377 378 retry: 379 if (next_priv_port < us->us_min_anonpriv_port || 380 next_priv_port >= IPPORT_RESERVED) { 381 next_priv_port = IPPORT_RESERVED - 1; 382 if (restart) 383 return (0); 384 restart = B_TRUE; 385 } 386 387 if (is_system_labeled() && 388 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 389 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 390 next_priv_port = nextport; 391 goto retry; 392 } 393 394 return (next_priv_port--); 395 } 396 397 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 398 /* ARGSUSED */ 399 static int 400 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 401 { 402 udp_fanout_t *udpf; 403 int i; 404 zoneid_t zoneid; 405 conn_t *connp; 406 udp_t *udp; 407 udp_stack_t *us; 408 409 connp = Q_TO_CONN(q); 410 udp = connp->conn_udp; 411 us = udp->udp_us; 412 413 /* Refer to comments in udp_status_report(). */ 414 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 415 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 416 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 417 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 418 return (0); 419 } 420 } 421 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 422 /* The following may work even if we cannot get a large buf. */ 423 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 424 return (0); 425 } 426 427 (void) mi_mpprintf(mp, 428 "UDP " MI_COL_HDRPAD_STR 429 /* 12345678[89ABCDEF] */ 430 " zone lport src addr dest addr port state"); 431 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 432 433 zoneid = connp->conn_zoneid; 434 435 for (i = 0; i < us->us_bind_fanout_size; i++) { 436 udpf = &us->us_bind_fanout[i]; 437 mutex_enter(&udpf->uf_lock); 438 439 /* Print the hash index. */ 440 udp = udpf->uf_udp; 441 if (zoneid != GLOBAL_ZONEID) { 442 /* skip to first entry in this zone; might be none */ 443 while (udp != NULL && 444 udp->udp_connp->conn_zoneid != zoneid) 445 udp = udp->udp_bind_hash; 446 } 447 if (udp != NULL) { 448 uint_t print_len, buf_len; 449 450 buf_len = mp->b_cont->b_datap->db_lim - 451 mp->b_cont->b_wptr; 452 print_len = snprintf((char *)mp->b_cont->b_wptr, 453 buf_len, "%d\n", i); 454 if (print_len < buf_len) { 455 mp->b_cont->b_wptr += print_len; 456 } else { 457 mp->b_cont->b_wptr += buf_len; 458 } 459 for (; udp != NULL; udp = udp->udp_bind_hash) { 460 if (zoneid == GLOBAL_ZONEID || 461 zoneid == udp->udp_connp->conn_zoneid) 462 udp_report_item(mp->b_cont, udp); 463 } 464 } 465 mutex_exit(&udpf->uf_lock); 466 } 467 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 468 return (0); 469 } 470 471 /* 472 * Hash list removal routine for udp_t structures. 473 */ 474 static void 475 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 476 { 477 udp_t *udpnext; 478 kmutex_t *lockp; 479 udp_stack_t *us = udp->udp_us; 480 481 if (udp->udp_ptpbhn == NULL) 482 return; 483 484 /* 485 * Extract the lock pointer in case there are concurrent 486 * hash_remove's for this instance. 487 */ 488 ASSERT(udp->udp_port != 0); 489 if (!caller_holds_lock) { 490 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 491 us->us_bind_fanout_size)].uf_lock; 492 ASSERT(lockp != NULL); 493 mutex_enter(lockp); 494 } 495 if (udp->udp_ptpbhn != NULL) { 496 udpnext = udp->udp_bind_hash; 497 if (udpnext != NULL) { 498 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 499 udp->udp_bind_hash = NULL; 500 } 501 *udp->udp_ptpbhn = udpnext; 502 udp->udp_ptpbhn = NULL; 503 } 504 if (!caller_holds_lock) { 505 mutex_exit(lockp); 506 } 507 } 508 509 static void 510 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 511 { 512 udp_t **udpp; 513 udp_t *udpnext; 514 515 ASSERT(MUTEX_HELD(&uf->uf_lock)); 516 ASSERT(udp->udp_ptpbhn == NULL); 517 udpp = &uf->uf_udp; 518 udpnext = udpp[0]; 519 if (udpnext != NULL) { 520 /* 521 * If the new udp bound to the INADDR_ANY address 522 * and the first one in the list is not bound to 523 * INADDR_ANY we skip all entries until we find the 524 * first one bound to INADDR_ANY. 525 * This makes sure that applications binding to a 526 * specific address get preference over those binding to 527 * INADDR_ANY. 528 */ 529 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 530 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 531 while ((udpnext = udpp[0]) != NULL && 532 !V6_OR_V4_INADDR_ANY( 533 udpnext->udp_bound_v6src)) { 534 udpp = &(udpnext->udp_bind_hash); 535 } 536 if (udpnext != NULL) 537 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 538 } else { 539 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 540 } 541 } 542 udp->udp_bind_hash = udpnext; 543 udp->udp_ptpbhn = udpp; 544 udpp[0] = udp; 545 } 546 547 /* 548 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 549 * passed to udp_wput. 550 * It associates a port number and local address with the stream. 551 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 552 * protocol type (IPPROTO_UDP) placed in the message following the address. 553 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 554 * (Called as writer.) 555 * 556 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 557 * without setting SO_REUSEADDR. This is needed so that they 558 * can be viewed as two independent transport protocols. 559 * However, anonymouns ports are allocated from the same range to avoid 560 * duplicating the us->us_next_port_to_try. 561 */ 562 static void 563 udp_bind(queue_t *q, mblk_t *mp) 564 { 565 sin_t *sin; 566 sin6_t *sin6; 567 mblk_t *mp1; 568 in_port_t port; /* Host byte order */ 569 in_port_t requested_port; /* Host byte order */ 570 struct T_bind_req *tbr; 571 int count; 572 in6_addr_t v6src; 573 boolean_t bind_to_req_port_only; 574 int loopmax; 575 udp_fanout_t *udpf; 576 in_port_t lport; /* Network byte order */ 577 zoneid_t zoneid; 578 conn_t *connp; 579 udp_t *udp; 580 boolean_t is_inaddr_any; 581 mlp_type_t addrtype, mlptype; 582 udp_stack_t *us; 583 584 connp = Q_TO_CONN(q); 585 udp = connp->conn_udp; 586 us = udp->udp_us; 587 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 588 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 589 "udp_bind: bad req, len %u", 590 (uint_t)(mp->b_wptr - mp->b_rptr)); 591 udp_err_ack(q, mp, TPROTO, 0); 592 return; 593 } 594 if (udp->udp_state != TS_UNBND) { 595 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 596 "udp_bind: bad state, %u", udp->udp_state); 597 udp_err_ack(q, mp, TOUTSTATE, 0); 598 return; 599 } 600 /* 601 * Reallocate the message to make sure we have enough room for an 602 * address and the protocol type. 603 */ 604 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 605 if (!mp1) { 606 udp_err_ack(q, mp, TSYSERR, ENOMEM); 607 return; 608 } 609 610 mp = mp1; 611 tbr = (struct T_bind_req *)mp->b_rptr; 612 switch (tbr->ADDR_length) { 613 case 0: /* Request for a generic port */ 614 tbr->ADDR_offset = sizeof (struct T_bind_req); 615 if (udp->udp_family == AF_INET) { 616 tbr->ADDR_length = sizeof (sin_t); 617 sin = (sin_t *)&tbr[1]; 618 *sin = sin_null; 619 sin->sin_family = AF_INET; 620 mp->b_wptr = (uchar_t *)&sin[1]; 621 } else { 622 ASSERT(udp->udp_family == AF_INET6); 623 tbr->ADDR_length = sizeof (sin6_t); 624 sin6 = (sin6_t *)&tbr[1]; 625 *sin6 = sin6_null; 626 sin6->sin6_family = AF_INET6; 627 mp->b_wptr = (uchar_t *)&sin6[1]; 628 } 629 port = 0; 630 break; 631 632 case sizeof (sin_t): /* Complete IPv4 address */ 633 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 634 sizeof (sin_t)); 635 if (sin == NULL || !OK_32PTR((char *)sin)) { 636 udp_err_ack(q, mp, TSYSERR, EINVAL); 637 return; 638 } 639 if (udp->udp_family != AF_INET || 640 sin->sin_family != AF_INET) { 641 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 642 return; 643 } 644 port = ntohs(sin->sin_port); 645 break; 646 647 case sizeof (sin6_t): /* complete IPv6 address */ 648 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 649 sizeof (sin6_t)); 650 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 651 udp_err_ack(q, mp, TSYSERR, EINVAL); 652 return; 653 } 654 if (udp->udp_family != AF_INET6 || 655 sin6->sin6_family != AF_INET6) { 656 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 657 return; 658 } 659 port = ntohs(sin6->sin6_port); 660 break; 661 662 default: /* Invalid request */ 663 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 664 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 665 udp_err_ack(q, mp, TBADADDR, 0); 666 return; 667 } 668 669 requested_port = port; 670 671 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 672 bind_to_req_port_only = B_FALSE; 673 else /* T_BIND_REQ and requested_port != 0 */ 674 bind_to_req_port_only = B_TRUE; 675 676 if (requested_port == 0) { 677 /* 678 * If the application passed in zero for the port number, it 679 * doesn't care which port number we bind to. Get one in the 680 * valid range. 681 */ 682 if (udp->udp_anon_priv_bind) { 683 port = udp_get_next_priv_port(udp); 684 } else { 685 port = udp_update_next_port(udp, 686 us->us_next_port_to_try, B_TRUE); 687 } 688 } else { 689 /* 690 * If the port is in the well-known privileged range, 691 * make sure the caller was privileged. 692 */ 693 int i; 694 boolean_t priv = B_FALSE; 695 696 if (port < us->us_smallest_nonpriv_port) { 697 priv = B_TRUE; 698 } else { 699 for (i = 0; i < us->us_num_epriv_ports; i++) { 700 if (port == us->us_epriv_ports[i]) { 701 priv = B_TRUE; 702 break; 703 } 704 } 705 } 706 707 if (priv) { 708 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 709 710 if (secpolicy_net_privaddr(cr, port) != 0) { 711 udp_err_ack(q, mp, TACCES, 0); 712 return; 713 } 714 } 715 } 716 717 if (port == 0) { 718 udp_err_ack(q, mp, TNOADDR, 0); 719 return; 720 } 721 722 /* 723 * The state must be TS_UNBND. TPI mandates that users must send 724 * TPI primitives only 1 at a time and wait for the response before 725 * sending the next primitive. 726 */ 727 rw_enter(&udp->udp_rwlock, RW_WRITER); 728 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 729 rw_exit(&udp->udp_rwlock); 730 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 731 "udp_bind: bad state, %u", udp->udp_state); 732 udp_err_ack(q, mp, TOUTSTATE, 0); 733 return; 734 } 735 udp->udp_pending_op = tbr->PRIM_type; 736 /* 737 * Copy the source address into our udp structure. This address 738 * may still be zero; if so, IP will fill in the correct address 739 * each time an outbound packet is passed to it. Since the udp is 740 * not yet in the bind hash list, we don't grab the uf_lock to 741 * change udp_ipversion 742 */ 743 if (udp->udp_family == AF_INET) { 744 ASSERT(sin != NULL); 745 ASSERT(udp->udp_ipversion == IPV4_VERSION); 746 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 747 udp->udp_ip_snd_options_len; 748 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 749 } else { 750 ASSERT(sin6 != NULL); 751 v6src = sin6->sin6_addr; 752 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 753 /* 754 * no need to hold the uf_lock to set the udp_ipversion 755 * since we are not yet in the fanout list 756 */ 757 udp->udp_ipversion = IPV4_VERSION; 758 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 759 UDPH_SIZE + udp->udp_ip_snd_options_len; 760 } else { 761 udp->udp_ipversion = IPV6_VERSION; 762 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 763 } 764 } 765 766 /* 767 * If udp_reuseaddr is not set, then we have to make sure that 768 * the IP address and port number the application requested 769 * (or we selected for the application) is not being used by 770 * another stream. If another stream is already using the 771 * requested IP address and port, the behavior depends on 772 * "bind_to_req_port_only". If set the bind fails; otherwise we 773 * search for any an unused port to bind to the the stream. 774 * 775 * As per the BSD semantics, as modified by the Deering multicast 776 * changes, if udp_reuseaddr is set, then we allow multiple binds 777 * to the same port independent of the local IP address. 778 * 779 * This is slightly different than in SunOS 4.X which did not 780 * support IP multicast. Note that the change implemented by the 781 * Deering multicast code effects all binds - not only binding 782 * to IP multicast addresses. 783 * 784 * Note that when binding to port zero we ignore SO_REUSEADDR in 785 * order to guarantee a unique port. 786 */ 787 788 count = 0; 789 if (udp->udp_anon_priv_bind) { 790 /* 791 * loopmax = (IPPORT_RESERVED-1) - 792 * us->us_min_anonpriv_port + 1 793 */ 794 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 795 } else { 796 loopmax = us->us_largest_anon_port - 797 us->us_smallest_anon_port + 1; 798 } 799 800 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 801 zoneid = connp->conn_zoneid; 802 803 for (;;) { 804 udp_t *udp1; 805 boolean_t found_exclbind = B_FALSE; 806 807 /* 808 * Walk through the list of udp streams bound to 809 * requested port with the same IP address. 810 */ 811 lport = htons(port); 812 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 813 us->us_bind_fanout_size)]; 814 mutex_enter(&udpf->uf_lock); 815 for (udp1 = udpf->uf_udp; udp1 != NULL; 816 udp1 = udp1->udp_bind_hash) { 817 if (lport != udp1->udp_port) 818 continue; 819 820 /* 821 * On a labeled system, we must treat bindings to ports 822 * on shared IP addresses by sockets with MAC exemption 823 * privilege as being in all zones, as there's 824 * otherwise no way to identify the right receiver. 825 */ 826 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 827 IPCL_ZONE_MATCH(connp, 828 udp1->udp_connp->conn_zoneid)) && 829 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 830 continue; 831 832 /* 833 * If UDP_EXCLBIND is set for either the bound or 834 * binding endpoint, the semantics of bind 835 * is changed according to the following chart. 836 * 837 * spec = specified address (v4 or v6) 838 * unspec = unspecified address (v4 or v6) 839 * A = specified addresses are different for endpoints 840 * 841 * bound bind to allowed? 842 * ------------------------------------- 843 * unspec unspec no 844 * unspec spec no 845 * spec unspec no 846 * spec spec yes if A 847 * 848 * For labeled systems, SO_MAC_EXEMPT behaves the same 849 * as UDP_EXCLBIND, except that zoneid is ignored. 850 */ 851 if (udp1->udp_exclbind || udp->udp_exclbind || 852 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 853 if (V6_OR_V4_INADDR_ANY( 854 udp1->udp_bound_v6src) || 855 is_inaddr_any || 856 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 857 &v6src)) { 858 found_exclbind = B_TRUE; 859 break; 860 } 861 continue; 862 } 863 864 /* 865 * Check ipversion to allow IPv4 and IPv6 sockets to 866 * have disjoint port number spaces. 867 */ 868 if (udp->udp_ipversion != udp1->udp_ipversion) { 869 870 /* 871 * On the first time through the loop, if the 872 * the user intentionally specified a 873 * particular port number, then ignore any 874 * bindings of the other protocol that may 875 * conflict. This allows the user to bind IPv6 876 * alone and get both v4 and v6, or bind both 877 * both and get each seperately. On subsequent 878 * times through the loop, we're checking a 879 * port that we chose (not the user) and thus 880 * we do not allow casual duplicate bindings. 881 */ 882 if (count == 0 && requested_port != 0) 883 continue; 884 } 885 886 /* 887 * No difference depending on SO_REUSEADDR. 888 * 889 * If existing port is bound to a 890 * non-wildcard IP address and 891 * the requesting stream is bound to 892 * a distinct different IP addresses 893 * (non-wildcard, also), keep going. 894 */ 895 if (!is_inaddr_any && 896 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 897 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 898 &v6src)) { 899 continue; 900 } 901 break; 902 } 903 904 if (!found_exclbind && 905 (udp->udp_reuseaddr && requested_port != 0)) { 906 break; 907 } 908 909 if (udp1 == NULL) { 910 /* 911 * No other stream has this IP address 912 * and port number. We can use it. 913 */ 914 break; 915 } 916 mutex_exit(&udpf->uf_lock); 917 if (bind_to_req_port_only) { 918 /* 919 * We get here only when requested port 920 * is bound (and only first of the for() 921 * loop iteration). 922 * 923 * The semantics of this bind request 924 * require it to fail so we return from 925 * the routine (and exit the loop). 926 * 927 */ 928 udp->udp_pending_op = -1; 929 rw_exit(&udp->udp_rwlock); 930 udp_err_ack(q, mp, TADDRBUSY, 0); 931 return; 932 } 933 934 if (udp->udp_anon_priv_bind) { 935 port = udp_get_next_priv_port(udp); 936 } else { 937 if ((count == 0) && (requested_port != 0)) { 938 /* 939 * If the application wants us to find 940 * a port, get one to start with. Set 941 * requested_port to 0, so that we will 942 * update us->us_next_port_to_try below. 943 */ 944 port = udp_update_next_port(udp, 945 us->us_next_port_to_try, B_TRUE); 946 requested_port = 0; 947 } else { 948 port = udp_update_next_port(udp, port + 1, 949 B_FALSE); 950 } 951 } 952 953 if (port == 0 || ++count >= loopmax) { 954 /* 955 * We've tried every possible port number and 956 * there are none available, so send an error 957 * to the user. 958 */ 959 udp->udp_pending_op = -1; 960 rw_exit(&udp->udp_rwlock); 961 udp_err_ack(q, mp, TNOADDR, 0); 962 return; 963 } 964 } 965 966 /* 967 * Copy the source address into our udp structure. This address 968 * may still be zero; if so, ip will fill in the correct address 969 * each time an outbound packet is passed to it. 970 * If we are binding to a broadcast or multicast address then 971 * udp_bind_ack will clear the source address when it receives 972 * the T_BIND_ACK. 973 */ 974 udp->udp_v6src = udp->udp_bound_v6src = v6src; 975 udp->udp_port = lport; 976 /* 977 * Now reset the the next anonymous port if the application requested 978 * an anonymous port, or we handed out the next anonymous port. 979 */ 980 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 981 us->us_next_port_to_try = port + 1; 982 } 983 984 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 985 if (udp->udp_family == AF_INET) { 986 sin->sin_port = udp->udp_port; 987 } else { 988 int error; 989 990 sin6->sin6_port = udp->udp_port; 991 /* Rebuild the header template */ 992 error = udp_build_hdrs(udp); 993 if (error != 0) { 994 udp->udp_pending_op = -1; 995 rw_exit(&udp->udp_rwlock); 996 mutex_exit(&udpf->uf_lock); 997 udp_err_ack(q, mp, TSYSERR, error); 998 return; 999 } 1000 } 1001 udp->udp_state = TS_IDLE; 1002 udp_bind_hash_insert(udpf, udp); 1003 mutex_exit(&udpf->uf_lock); 1004 rw_exit(&udp->udp_rwlock); 1005 1006 if (cl_inet_bind) { 1007 /* 1008 * Running in cluster mode - register bind information 1009 */ 1010 if (udp->udp_ipversion == IPV4_VERSION) { 1011 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1012 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1013 (in_port_t)udp->udp_port); 1014 } else { 1015 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1016 (uint8_t *)&(udp->udp_v6src), 1017 (in_port_t)udp->udp_port); 1018 } 1019 1020 } 1021 1022 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1023 if (is_system_labeled() && (!connp->conn_anon_port || 1024 connp->conn_anon_mlp)) { 1025 uint16_t mlpport; 1026 cred_t *cr = connp->conn_cred; 1027 zone_t *zone; 1028 1029 zone = crgetzone(cr); 1030 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1031 mlptSingle; 1032 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1033 &v6src, us->us_netstack->netstack_ip); 1034 if (addrtype == mlptSingle) { 1035 rw_enter(&udp->udp_rwlock, RW_WRITER); 1036 udp->udp_pending_op = -1; 1037 rw_exit(&udp->udp_rwlock); 1038 udp_err_ack(q, mp, TNOADDR, 0); 1039 connp->conn_anon_port = B_FALSE; 1040 connp->conn_mlp_type = mlptSingle; 1041 return; 1042 } 1043 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1044 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1045 addrtype); 1046 if (mlptype != mlptSingle && 1047 (connp->conn_mlp_type == mlptSingle || 1048 secpolicy_net_bindmlp(cr) != 0)) { 1049 if (udp->udp_debug) { 1050 (void) strlog(UDP_MOD_ID, 0, 1, 1051 SL_ERROR|SL_TRACE, 1052 "udp_bind: no priv for multilevel port %d", 1053 mlpport); 1054 } 1055 rw_enter(&udp->udp_rwlock, RW_WRITER); 1056 udp->udp_pending_op = -1; 1057 rw_exit(&udp->udp_rwlock); 1058 udp_err_ack(q, mp, TACCES, 0); 1059 connp->conn_anon_port = B_FALSE; 1060 connp->conn_mlp_type = mlptSingle; 1061 return; 1062 } 1063 1064 /* 1065 * If we're specifically binding a shared IP address and the 1066 * port is MLP on shared addresses, then check to see if this 1067 * zone actually owns the MLP. Reject if not. 1068 */ 1069 if (mlptype == mlptShared && addrtype == mlptShared) { 1070 /* 1071 * No need to handle exclusive-stack zones since 1072 * ALL_ZONES only applies to the shared stack. 1073 */ 1074 zoneid_t mlpzone; 1075 1076 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1077 htons(mlpport)); 1078 if (connp->conn_zoneid != mlpzone) { 1079 if (udp->udp_debug) { 1080 (void) strlog(UDP_MOD_ID, 0, 1, 1081 SL_ERROR|SL_TRACE, 1082 "udp_bind: attempt to bind port " 1083 "%d on shared addr in zone %d " 1084 "(should be %d)", 1085 mlpport, connp->conn_zoneid, 1086 mlpzone); 1087 } 1088 rw_enter(&udp->udp_rwlock, RW_WRITER); 1089 udp->udp_pending_op = -1; 1090 rw_exit(&udp->udp_rwlock); 1091 udp_err_ack(q, mp, TACCES, 0); 1092 connp->conn_anon_port = B_FALSE; 1093 connp->conn_mlp_type = mlptSingle; 1094 return; 1095 } 1096 } 1097 if (connp->conn_anon_port) { 1098 int error; 1099 1100 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1101 port, B_TRUE); 1102 if (error != 0) { 1103 if (udp->udp_debug) { 1104 (void) strlog(UDP_MOD_ID, 0, 1, 1105 SL_ERROR|SL_TRACE, 1106 "udp_bind: cannot establish anon " 1107 "MLP for port %d", port); 1108 } 1109 rw_enter(&udp->udp_rwlock, RW_WRITER); 1110 udp->udp_pending_op = -1; 1111 rw_exit(&udp->udp_rwlock); 1112 udp_err_ack(q, mp, TACCES, 0); 1113 connp->conn_anon_port = B_FALSE; 1114 connp->conn_mlp_type = mlptSingle; 1115 return; 1116 } 1117 } 1118 connp->conn_mlp_type = mlptype; 1119 } 1120 1121 /* Pass the protocol number in the message following the address. */ 1122 *mp->b_wptr++ = IPPROTO_UDP; 1123 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1124 /* 1125 * Append a request for an IRE if udp_v6src not 1126 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1127 */ 1128 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1129 if (!mp->b_cont) { 1130 rw_enter(&udp->udp_rwlock, RW_WRITER); 1131 udp->udp_pending_op = -1; 1132 rw_exit(&udp->udp_rwlock); 1133 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1134 return; 1135 } 1136 mp->b_cont->b_wptr += sizeof (ire_t); 1137 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1138 } 1139 if (udp->udp_family == AF_INET6) 1140 mp = ip_bind_v6(q, mp, connp, NULL); 1141 else 1142 mp = ip_bind_v4(q, mp, connp); 1143 1144 /* The above return NULL if the bind needs to be deferred */ 1145 if (mp != NULL) 1146 udp_bind_result(connp, mp); 1147 else 1148 CONN_INC_REF(connp); 1149 } 1150 1151 /* 1152 * This is called from ip_wput_nondata to handle the results of a 1153 * deferred UDP bind. It is called once the bind has been completed. 1154 */ 1155 void 1156 udp_resume_bind(conn_t *connp, mblk_t *mp) 1157 { 1158 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1159 1160 udp_bind_result(connp, mp); 1161 1162 CONN_OPER_PENDING_DONE(connp); 1163 } 1164 1165 /* 1166 * This routine handles each T_CONN_REQ message passed to udp. It 1167 * associates a default destination address with the stream. 1168 * 1169 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1170 * T_BIND_REQ - specifying local and remote address/port 1171 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1172 * T_OK_ACK - for the T_CONN_REQ 1173 * T_CONN_CON - to keep the TPI user happy 1174 * 1175 * The connect completes in udp_bind_result. 1176 * When a T_BIND_ACK is received information is extracted from the IRE 1177 * and the two appended messages are sent to the TPI user. 1178 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1179 * convert it to an error ack for the appropriate primitive. 1180 */ 1181 static void 1182 udp_connect(queue_t *q, mblk_t *mp) 1183 { 1184 sin6_t *sin6; 1185 sin_t *sin; 1186 struct T_conn_req *tcr; 1187 in6_addr_t v6dst; 1188 ipaddr_t v4dst; 1189 uint16_t dstport; 1190 uint32_t flowinfo; 1191 mblk_t *mp1, *mp2; 1192 udp_fanout_t *udpf; 1193 udp_t *udp, *udp1; 1194 ushort_t ipversion; 1195 udp_stack_t *us; 1196 conn_t *connp = Q_TO_CONN(q); 1197 1198 udp = connp->conn_udp; 1199 tcr = (struct T_conn_req *)mp->b_rptr; 1200 us = udp->udp_us; 1201 1202 /* A bit of sanity checking */ 1203 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1204 udp_err_ack(q, mp, TPROTO, 0); 1205 return; 1206 } 1207 1208 if (tcr->OPT_length != 0) { 1209 udp_err_ack(q, mp, TBADOPT, 0); 1210 return; 1211 } 1212 1213 /* 1214 * Determine packet type based on type of address passed in 1215 * the request should contain an IPv4 or IPv6 address. 1216 * Make sure that address family matches the type of 1217 * family of the the address passed down 1218 */ 1219 switch (tcr->DEST_length) { 1220 default: 1221 udp_err_ack(q, mp, TBADADDR, 0); 1222 return; 1223 1224 case sizeof (sin_t): 1225 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1226 sizeof (sin_t)); 1227 if (sin == NULL || !OK_32PTR((char *)sin)) { 1228 udp_err_ack(q, mp, TSYSERR, EINVAL); 1229 return; 1230 } 1231 if (udp->udp_family != AF_INET || 1232 sin->sin_family != AF_INET) { 1233 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1234 return; 1235 } 1236 v4dst = sin->sin_addr.s_addr; 1237 dstport = sin->sin_port; 1238 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1239 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1240 ipversion = IPV4_VERSION; 1241 break; 1242 1243 case sizeof (sin6_t): 1244 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1245 sizeof (sin6_t)); 1246 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1247 udp_err_ack(q, mp, TSYSERR, EINVAL); 1248 return; 1249 } 1250 if (udp->udp_family != AF_INET6 || 1251 sin6->sin6_family != AF_INET6) { 1252 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1253 return; 1254 } 1255 v6dst = sin6->sin6_addr; 1256 dstport = sin6->sin6_port; 1257 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1258 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1259 ipversion = IPV4_VERSION; 1260 flowinfo = 0; 1261 } else { 1262 ipversion = IPV6_VERSION; 1263 flowinfo = sin6->sin6_flowinfo; 1264 } 1265 break; 1266 } 1267 if (dstport == 0) { 1268 udp_err_ack(q, mp, TBADADDR, 0); 1269 return; 1270 } 1271 1272 rw_enter(&udp->udp_rwlock, RW_WRITER); 1273 1274 /* 1275 * This UDP must have bound to a port already before doing a connect. 1276 * TPI mandates that users must send TPI primitives only 1 at a time 1277 * and wait for the response before sending the next primitive. 1278 */ 1279 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 1280 rw_exit(&udp->udp_rwlock); 1281 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1282 "udp_connect: bad state, %u", udp->udp_state); 1283 udp_err_ack(q, mp, TOUTSTATE, 0); 1284 return; 1285 } 1286 udp->udp_pending_op = T_CONN_REQ; 1287 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1288 1289 if (ipversion == IPV4_VERSION) { 1290 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1291 udp->udp_ip_snd_options_len; 1292 } else { 1293 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1294 } 1295 1296 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1297 us->us_bind_fanout_size)]; 1298 1299 mutex_enter(&udpf->uf_lock); 1300 if (udp->udp_state == TS_DATA_XFER) { 1301 /* Already connected - clear out state */ 1302 udp->udp_v6src = udp->udp_bound_v6src; 1303 udp->udp_state = TS_IDLE; 1304 } 1305 1306 /* 1307 * Create a default IP header with no IP options. 1308 */ 1309 udp->udp_dstport = dstport; 1310 udp->udp_ipversion = ipversion; 1311 if (ipversion == IPV4_VERSION) { 1312 /* 1313 * Interpret a zero destination to mean loopback. 1314 * Update the T_CONN_REQ (sin/sin6) since it is used to 1315 * generate the T_CONN_CON. 1316 */ 1317 if (v4dst == INADDR_ANY) { 1318 v4dst = htonl(INADDR_LOOPBACK); 1319 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1320 if (udp->udp_family == AF_INET) { 1321 sin->sin_addr.s_addr = v4dst; 1322 } else { 1323 sin6->sin6_addr = v6dst; 1324 } 1325 } 1326 udp->udp_v6dst = v6dst; 1327 udp->udp_flowinfo = 0; 1328 1329 /* 1330 * If the destination address is multicast and 1331 * an outgoing multicast interface has been set, 1332 * use the address of that interface as our 1333 * source address if no source address has been set. 1334 */ 1335 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1336 CLASSD(v4dst) && 1337 udp->udp_multicast_if_addr != INADDR_ANY) { 1338 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1339 &udp->udp_v6src); 1340 } 1341 } else { 1342 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1343 /* 1344 * Interpret a zero destination to mean loopback. 1345 * Update the T_CONN_REQ (sin/sin6) since it is used to 1346 * generate the T_CONN_CON. 1347 */ 1348 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1349 v6dst = ipv6_loopback; 1350 sin6->sin6_addr = v6dst; 1351 } 1352 udp->udp_v6dst = v6dst; 1353 udp->udp_flowinfo = flowinfo; 1354 /* 1355 * If the destination address is multicast and 1356 * an outgoing multicast interface has been set, 1357 * then the ip bind logic will pick the correct source 1358 * address (i.e. matching the outgoing multicast interface). 1359 */ 1360 } 1361 1362 /* 1363 * Verify that the src/port/dst/port is unique for all 1364 * connections in TS_DATA_XFER 1365 */ 1366 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1367 if (udp1->udp_state != TS_DATA_XFER) 1368 continue; 1369 if (udp->udp_port != udp1->udp_port || 1370 udp->udp_ipversion != udp1->udp_ipversion || 1371 dstport != udp1->udp_dstport || 1372 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1373 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1374 !(IPCL_ZONE_MATCH(udp->udp_connp, 1375 udp1->udp_connp->conn_zoneid) || 1376 IPCL_ZONE_MATCH(udp1->udp_connp, 1377 udp->udp_connp->conn_zoneid))) 1378 continue; 1379 mutex_exit(&udpf->uf_lock); 1380 udp->udp_pending_op = -1; 1381 rw_exit(&udp->udp_rwlock); 1382 udp_err_ack(q, mp, TBADADDR, 0); 1383 return; 1384 } 1385 udp->udp_state = TS_DATA_XFER; 1386 mutex_exit(&udpf->uf_lock); 1387 1388 /* 1389 * Send down bind to IP to verify that there is a route 1390 * and to determine the source address. 1391 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1392 */ 1393 if (udp->udp_family == AF_INET) 1394 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1395 else 1396 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1397 if (mp1 == NULL) { 1398 bind_failed: 1399 mutex_enter(&udpf->uf_lock); 1400 udp->udp_state = TS_IDLE; 1401 udp->udp_pending_op = -1; 1402 mutex_exit(&udpf->uf_lock); 1403 rw_exit(&udp->udp_rwlock); 1404 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1405 return; 1406 } 1407 1408 rw_exit(&udp->udp_rwlock); 1409 /* 1410 * We also have to send a connection confirmation to 1411 * keep TLI happy. Prepare it for udp_bind_result. 1412 */ 1413 if (udp->udp_family == AF_INET) 1414 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1415 sizeof (*sin), NULL, 0); 1416 else 1417 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1418 sizeof (*sin6), NULL, 0); 1419 if (mp2 == NULL) { 1420 freemsg(mp1); 1421 rw_enter(&udp->udp_rwlock, RW_WRITER); 1422 goto bind_failed; 1423 } 1424 1425 mp = mi_tpi_ok_ack_alloc(mp); 1426 if (mp == NULL) { 1427 /* Unable to reuse the T_CONN_REQ for the ack. */ 1428 freemsg(mp2); 1429 rw_enter(&udp->udp_rwlock, RW_WRITER); 1430 mutex_enter(&udpf->uf_lock); 1431 udp->udp_state = TS_IDLE; 1432 udp->udp_pending_op = -1; 1433 mutex_exit(&udpf->uf_lock); 1434 rw_exit(&udp->udp_rwlock); 1435 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1436 return; 1437 } 1438 1439 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1440 linkb(mp1, mp); 1441 linkb(mp1, mp2); 1442 1443 mblk_setcred(mp1, connp->conn_cred); 1444 if (udp->udp_family == AF_INET) 1445 mp1 = ip_bind_v4(q, mp1, connp); 1446 else 1447 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1448 1449 /* The above return NULL if the bind needs to be deferred */ 1450 if (mp1 != NULL) 1451 udp_bind_result(connp, mp1); 1452 else 1453 CONN_INC_REF(connp); 1454 } 1455 1456 static int 1457 udp_close(queue_t *q) 1458 { 1459 conn_t *connp = (conn_t *)q->q_ptr; 1460 udp_t *udp; 1461 1462 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1463 udp = connp->conn_udp; 1464 1465 udp_quiesce_conn(connp); 1466 ip_quiesce_conn(connp); 1467 /* 1468 * Disable read-side synchronous stream 1469 * interface and drain any queued data. 1470 */ 1471 udp_rcv_drain(q, udp, B_TRUE); 1472 ASSERT(!udp->udp_direct_sockfs); 1473 1474 qprocsoff(q); 1475 1476 ASSERT(udp->udp_rcv_cnt == 0); 1477 ASSERT(udp->udp_rcv_msgcnt == 0); 1478 ASSERT(udp->udp_rcv_list_head == NULL); 1479 ASSERT(udp->udp_rcv_list_tail == NULL); 1480 1481 udp_close_free(connp); 1482 1483 /* 1484 * Now we are truly single threaded on this stream, and can 1485 * delete the things hanging off the connp, and finally the connp. 1486 * We removed this connp from the fanout list, it cannot be 1487 * accessed thru the fanouts, and we already waited for the 1488 * conn_ref to drop to 0. We are already in close, so 1489 * there cannot be any other thread from the top. qprocsoff 1490 * has completed, and service has completed or won't run in 1491 * future. 1492 */ 1493 ASSERT(connp->conn_ref == 1); 1494 1495 inet_minor_free(ip_minor_arena, connp->conn_dev); 1496 1497 connp->conn_ref--; 1498 ipcl_conn_destroy(connp); 1499 1500 q->q_ptr = WR(q)->q_ptr = NULL; 1501 return (0); 1502 } 1503 1504 /* 1505 * Called in the close path to quiesce the conn 1506 */ 1507 void 1508 udp_quiesce_conn(conn_t *connp) 1509 { 1510 udp_t *udp = connp->conn_udp; 1511 1512 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1513 /* 1514 * Running in cluster mode - register unbind information 1515 */ 1516 if (udp->udp_ipversion == IPV4_VERSION) { 1517 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1518 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1519 (in_port_t)udp->udp_port); 1520 } else { 1521 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1522 (uint8_t *)(&(udp->udp_v6src)), 1523 (in_port_t)udp->udp_port); 1524 } 1525 } 1526 1527 udp_bind_hash_remove(udp, B_FALSE); 1528 1529 } 1530 1531 void 1532 udp_close_free(conn_t *connp) 1533 { 1534 udp_t *udp = connp->conn_udp; 1535 1536 /* If there are any options associated with the stream, free them. */ 1537 if (udp->udp_ip_snd_options != NULL) { 1538 mi_free((char *)udp->udp_ip_snd_options); 1539 udp->udp_ip_snd_options = NULL; 1540 udp->udp_ip_snd_options_len = 0; 1541 } 1542 1543 if (udp->udp_ip_rcv_options != NULL) { 1544 mi_free((char *)udp->udp_ip_rcv_options); 1545 udp->udp_ip_rcv_options = NULL; 1546 udp->udp_ip_rcv_options_len = 0; 1547 } 1548 1549 /* Free memory associated with sticky options */ 1550 if (udp->udp_sticky_hdrs_len != 0) { 1551 kmem_free(udp->udp_sticky_hdrs, 1552 udp->udp_sticky_hdrs_len); 1553 udp->udp_sticky_hdrs = NULL; 1554 udp->udp_sticky_hdrs_len = 0; 1555 } 1556 1557 ip6_pkt_free(&udp->udp_sticky_ipp); 1558 1559 /* 1560 * Clear any fields which the kmem_cache constructor clears. 1561 * Only udp_connp needs to be preserved. 1562 * TBD: We should make this more efficient to avoid clearing 1563 * everything. 1564 */ 1565 ASSERT(udp->udp_connp == connp); 1566 bzero(udp, sizeof (udp_t)); 1567 udp->udp_connp = connp; 1568 } 1569 1570 /* 1571 * This routine handles each T_DISCON_REQ message passed to udp 1572 * as an indicating that UDP is no longer connected. This results 1573 * in sending a T_BIND_REQ to IP to restore the binding to just 1574 * the local address/port. 1575 * 1576 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1577 * T_BIND_REQ - specifying just the local address/port 1578 * T_OK_ACK - for the T_DISCON_REQ 1579 * 1580 * The disconnect completes in udp_bind_result. 1581 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1582 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1583 * convert it to an error ack for the appropriate primitive. 1584 */ 1585 static void 1586 udp_disconnect(queue_t *q, mblk_t *mp) 1587 { 1588 udp_t *udp; 1589 mblk_t *mp1; 1590 udp_fanout_t *udpf; 1591 udp_stack_t *us; 1592 conn_t *connp = Q_TO_CONN(q); 1593 1594 udp = connp->conn_udp; 1595 us = udp->udp_us; 1596 rw_enter(&udp->udp_rwlock, RW_WRITER); 1597 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 1598 rw_exit(&udp->udp_rwlock); 1599 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1600 "udp_disconnect: bad state, %u", udp->udp_state); 1601 udp_err_ack(q, mp, TOUTSTATE, 0); 1602 return; 1603 } 1604 udp->udp_pending_op = T_DISCON_REQ; 1605 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1606 us->us_bind_fanout_size)]; 1607 mutex_enter(&udpf->uf_lock); 1608 udp->udp_v6src = udp->udp_bound_v6src; 1609 udp->udp_state = TS_IDLE; 1610 mutex_exit(&udpf->uf_lock); 1611 1612 /* 1613 * Send down bind to IP to remove the full binding and revert 1614 * to the local address binding. 1615 */ 1616 if (udp->udp_family == AF_INET) 1617 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1618 else 1619 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1620 if (mp1 == NULL) { 1621 udp->udp_pending_op = -1; 1622 rw_exit(&udp->udp_rwlock); 1623 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1624 return; 1625 } 1626 mp = mi_tpi_ok_ack_alloc(mp); 1627 if (mp == NULL) { 1628 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1629 udp->udp_pending_op = -1; 1630 rw_exit(&udp->udp_rwlock); 1631 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1632 return; 1633 } 1634 1635 if (udp->udp_family == AF_INET6) { 1636 int error; 1637 1638 /* Rebuild the header template */ 1639 error = udp_build_hdrs(udp); 1640 if (error != 0) { 1641 udp->udp_pending_op = -1; 1642 rw_exit(&udp->udp_rwlock); 1643 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1644 freemsg(mp1); 1645 return; 1646 } 1647 } 1648 1649 rw_exit(&udp->udp_rwlock); 1650 /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ 1651 linkb(mp1, mp); 1652 1653 if (udp->udp_family == AF_INET6) 1654 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1655 else 1656 mp1 = ip_bind_v4(q, mp1, connp); 1657 1658 /* The above return NULL if the bind needs to be deferred */ 1659 if (mp1 != NULL) 1660 udp_bind_result(connp, mp1); 1661 else 1662 CONN_INC_REF(connp); 1663 } 1664 1665 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1666 static void 1667 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1668 { 1669 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1670 qreply(q, mp); 1671 } 1672 1673 /* Shorthand to generate and send TPI error acks to our client */ 1674 static void 1675 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1676 int sys_error) 1677 { 1678 struct T_error_ack *teackp; 1679 1680 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1681 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1682 teackp = (struct T_error_ack *)mp->b_rptr; 1683 teackp->ERROR_prim = primitive; 1684 teackp->TLI_error = t_error; 1685 teackp->UNIX_error = sys_error; 1686 qreply(q, mp); 1687 } 1688 } 1689 1690 /*ARGSUSED*/ 1691 static int 1692 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1693 { 1694 int i; 1695 udp_t *udp = Q_TO_UDP(q); 1696 udp_stack_t *us = udp->udp_us; 1697 1698 for (i = 0; i < us->us_num_epriv_ports; i++) { 1699 if (us->us_epriv_ports[i] != 0) 1700 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1701 } 1702 return (0); 1703 } 1704 1705 /* ARGSUSED */ 1706 static int 1707 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1708 cred_t *cr) 1709 { 1710 long new_value; 1711 int i; 1712 udp_t *udp = Q_TO_UDP(q); 1713 udp_stack_t *us = udp->udp_us; 1714 1715 /* 1716 * Fail the request if the new value does not lie within the 1717 * port number limits. 1718 */ 1719 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1720 new_value <= 0 || new_value >= 65536) { 1721 return (EINVAL); 1722 } 1723 1724 /* Check if the value is already in the list */ 1725 for (i = 0; i < us->us_num_epriv_ports; i++) { 1726 if (new_value == us->us_epriv_ports[i]) { 1727 return (EEXIST); 1728 } 1729 } 1730 /* Find an empty slot */ 1731 for (i = 0; i < us->us_num_epriv_ports; i++) { 1732 if (us->us_epriv_ports[i] == 0) 1733 break; 1734 } 1735 if (i == us->us_num_epriv_ports) { 1736 return (EOVERFLOW); 1737 } 1738 1739 /* Set the new value */ 1740 us->us_epriv_ports[i] = (in_port_t)new_value; 1741 return (0); 1742 } 1743 1744 /* ARGSUSED */ 1745 static int 1746 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1747 cred_t *cr) 1748 { 1749 long new_value; 1750 int i; 1751 udp_t *udp = Q_TO_UDP(q); 1752 udp_stack_t *us = udp->udp_us; 1753 1754 /* 1755 * Fail the request if the new value does not lie within the 1756 * port number limits. 1757 */ 1758 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1759 new_value <= 0 || new_value >= 65536) { 1760 return (EINVAL); 1761 } 1762 1763 /* Check that the value is already in the list */ 1764 for (i = 0; i < us->us_num_epriv_ports; i++) { 1765 if (us->us_epriv_ports[i] == new_value) 1766 break; 1767 } 1768 if (i == us->us_num_epriv_ports) { 1769 return (ESRCH); 1770 } 1771 1772 /* Clear the value */ 1773 us->us_epriv_ports[i] = 0; 1774 return (0); 1775 } 1776 1777 /* At minimum we need 4 bytes of UDP header */ 1778 #define ICMP_MIN_UDP_HDR 4 1779 1780 /* 1781 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1782 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1783 * Assumes that IP has pulled up everything up to and including the ICMP header. 1784 */ 1785 static void 1786 udp_icmp_error(queue_t *q, mblk_t *mp) 1787 { 1788 icmph_t *icmph; 1789 ipha_t *ipha; 1790 int iph_hdr_length; 1791 udpha_t *udpha; 1792 sin_t sin; 1793 sin6_t sin6; 1794 mblk_t *mp1; 1795 int error = 0; 1796 udp_t *udp = Q_TO_UDP(q); 1797 1798 ipha = (ipha_t *)mp->b_rptr; 1799 1800 ASSERT(OK_32PTR(mp->b_rptr)); 1801 1802 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1803 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1804 udp_icmp_error_ipv6(q, mp); 1805 return; 1806 } 1807 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1808 1809 /* Skip past the outer IP and ICMP headers */ 1810 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1811 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1812 ipha = (ipha_t *)&icmph[1]; 1813 1814 /* Skip past the inner IP and find the ULP header */ 1815 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1816 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1817 1818 switch (icmph->icmph_type) { 1819 case ICMP_DEST_UNREACHABLE: 1820 switch (icmph->icmph_code) { 1821 case ICMP_FRAGMENTATION_NEEDED: 1822 /* 1823 * IP has already adjusted the path MTU. 1824 */ 1825 break; 1826 case ICMP_PORT_UNREACHABLE: 1827 case ICMP_PROTOCOL_UNREACHABLE: 1828 error = ECONNREFUSED; 1829 break; 1830 default: 1831 /* Transient errors */ 1832 break; 1833 } 1834 break; 1835 default: 1836 /* Transient errors */ 1837 break; 1838 } 1839 if (error == 0) { 1840 freemsg(mp); 1841 return; 1842 } 1843 1844 /* 1845 * Deliver T_UDERROR_IND when the application has asked for it. 1846 * The socket layer enables this automatically when connected. 1847 */ 1848 if (!udp->udp_dgram_errind) { 1849 freemsg(mp); 1850 return; 1851 } 1852 1853 switch (udp->udp_family) { 1854 case AF_INET: 1855 sin = sin_null; 1856 sin.sin_family = AF_INET; 1857 sin.sin_addr.s_addr = ipha->ipha_dst; 1858 sin.sin_port = udpha->uha_dst_port; 1859 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1860 error); 1861 break; 1862 case AF_INET6: 1863 sin6 = sin6_null; 1864 sin6.sin6_family = AF_INET6; 1865 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1866 sin6.sin6_port = udpha->uha_dst_port; 1867 1868 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1869 NULL, 0, error); 1870 break; 1871 } 1872 if (mp1) 1873 putnext(q, mp1); 1874 freemsg(mp); 1875 } 1876 1877 /* 1878 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1879 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1880 * Assumes that IP has pulled up all the extension headers as well as the 1881 * ICMPv6 header. 1882 */ 1883 static void 1884 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1885 { 1886 icmp6_t *icmp6; 1887 ip6_t *ip6h, *outer_ip6h; 1888 uint16_t iph_hdr_length; 1889 uint8_t *nexthdrp; 1890 udpha_t *udpha; 1891 sin6_t sin6; 1892 mblk_t *mp1; 1893 int error = 0; 1894 udp_t *udp = Q_TO_UDP(q); 1895 udp_stack_t *us = udp->udp_us; 1896 1897 outer_ip6h = (ip6_t *)mp->b_rptr; 1898 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1899 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1900 else 1901 iph_hdr_length = IPV6_HDR_LEN; 1902 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1903 ip6h = (ip6_t *)&icmp6[1]; 1904 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1905 freemsg(mp); 1906 return; 1907 } 1908 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1909 1910 switch (icmp6->icmp6_type) { 1911 case ICMP6_DST_UNREACH: 1912 switch (icmp6->icmp6_code) { 1913 case ICMP6_DST_UNREACH_NOPORT: 1914 error = ECONNREFUSED; 1915 break; 1916 case ICMP6_DST_UNREACH_ADMIN: 1917 case ICMP6_DST_UNREACH_NOROUTE: 1918 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1919 case ICMP6_DST_UNREACH_ADDR: 1920 /* Transient errors */ 1921 break; 1922 default: 1923 break; 1924 } 1925 break; 1926 case ICMP6_PACKET_TOO_BIG: { 1927 struct T_unitdata_ind *tudi; 1928 struct T_opthdr *toh; 1929 size_t udi_size; 1930 mblk_t *newmp; 1931 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1932 sizeof (struct ip6_mtuinfo); 1933 sin6_t *sin6; 1934 struct ip6_mtuinfo *mtuinfo; 1935 1936 /* 1937 * If the application has requested to receive path mtu 1938 * information, send up an empty message containing an 1939 * IPV6_PATHMTU ancillary data item. 1940 */ 1941 if (!udp->udp_ipv6_recvpathmtu) 1942 break; 1943 1944 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1945 opt_length; 1946 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1947 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1948 break; 1949 } 1950 1951 /* 1952 * newmp->b_cont is left to NULL on purpose. This is an 1953 * empty message containing only ancillary data. 1954 */ 1955 newmp->b_datap->db_type = M_PROTO; 1956 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1957 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1958 tudi->PRIM_type = T_UNITDATA_IND; 1959 tudi->SRC_length = sizeof (sin6_t); 1960 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1961 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1962 tudi->OPT_length = opt_length; 1963 1964 sin6 = (sin6_t *)&tudi[1]; 1965 bzero(sin6, sizeof (sin6_t)); 1966 sin6->sin6_family = AF_INET6; 1967 sin6->sin6_addr = udp->udp_v6dst; 1968 1969 toh = (struct T_opthdr *)&sin6[1]; 1970 toh->level = IPPROTO_IPV6; 1971 toh->name = IPV6_PATHMTU; 1972 toh->len = opt_length; 1973 toh->status = 0; 1974 1975 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1976 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1977 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1978 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1979 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1980 /* 1981 * We've consumed everything we need from the original 1982 * message. Free it, then send our empty message. 1983 */ 1984 freemsg(mp); 1985 putnext(q, newmp); 1986 return; 1987 } 1988 case ICMP6_TIME_EXCEEDED: 1989 /* Transient errors */ 1990 break; 1991 case ICMP6_PARAM_PROB: 1992 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1993 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1994 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1995 (uchar_t *)nexthdrp) { 1996 error = ECONNREFUSED; 1997 break; 1998 } 1999 break; 2000 } 2001 if (error == 0) { 2002 freemsg(mp); 2003 return; 2004 } 2005 2006 /* 2007 * Deliver T_UDERROR_IND when the application has asked for it. 2008 * The socket layer enables this automatically when connected. 2009 */ 2010 if (!udp->udp_dgram_errind) { 2011 freemsg(mp); 2012 return; 2013 } 2014 2015 sin6 = sin6_null; 2016 sin6.sin6_family = AF_INET6; 2017 sin6.sin6_addr = ip6h->ip6_dst; 2018 sin6.sin6_port = udpha->uha_dst_port; 2019 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2020 2021 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2022 error); 2023 if (mp1) 2024 putnext(q, mp1); 2025 freemsg(mp); 2026 } 2027 2028 /* 2029 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2030 * The local address is filled in if endpoint is bound. The remote address 2031 * is filled in if remote address has been precified ("connected endpoint") 2032 * (The concept of connected CLTS sockets is alien to published TPI 2033 * but we support it anyway). 2034 */ 2035 static void 2036 udp_addr_req(queue_t *q, mblk_t *mp) 2037 { 2038 sin_t *sin; 2039 sin6_t *sin6; 2040 mblk_t *ackmp; 2041 struct T_addr_ack *taa; 2042 udp_t *udp = Q_TO_UDP(q); 2043 2044 /* Make it large enough for worst case */ 2045 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2046 2 * sizeof (sin6_t), 1); 2047 if (ackmp == NULL) { 2048 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2049 return; 2050 } 2051 taa = (struct T_addr_ack *)ackmp->b_rptr; 2052 2053 bzero(taa, sizeof (struct T_addr_ack)); 2054 ackmp->b_wptr = (uchar_t *)&taa[1]; 2055 2056 taa->PRIM_type = T_ADDR_ACK; 2057 ackmp->b_datap->db_type = M_PCPROTO; 2058 rw_enter(&udp->udp_rwlock, RW_READER); 2059 /* 2060 * Note: Following code assumes 32 bit alignment of basic 2061 * data structures like sin_t and struct T_addr_ack. 2062 */ 2063 if (udp->udp_state != TS_UNBND) { 2064 /* 2065 * Fill in local address first 2066 */ 2067 taa->LOCADDR_offset = sizeof (*taa); 2068 if (udp->udp_family == AF_INET) { 2069 taa->LOCADDR_length = sizeof (sin_t); 2070 sin = (sin_t *)&taa[1]; 2071 /* Fill zeroes and then initialize non-zero fields */ 2072 *sin = sin_null; 2073 sin->sin_family = AF_INET; 2074 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2075 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2076 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2077 sin->sin_addr.s_addr); 2078 } else { 2079 /* 2080 * INADDR_ANY 2081 * udp_v6src is not set, we might be bound to 2082 * broadcast/multicast. Use udp_bound_v6src as 2083 * local address instead (that could 2084 * also still be INADDR_ANY) 2085 */ 2086 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2087 sin->sin_addr.s_addr); 2088 } 2089 sin->sin_port = udp->udp_port; 2090 ackmp->b_wptr = (uchar_t *)&sin[1]; 2091 if (udp->udp_state == TS_DATA_XFER) { 2092 /* 2093 * connected, fill remote address too 2094 */ 2095 taa->REMADDR_length = sizeof (sin_t); 2096 /* assumed 32-bit alignment */ 2097 taa->REMADDR_offset = taa->LOCADDR_offset + 2098 taa->LOCADDR_length; 2099 2100 sin = (sin_t *)(ackmp->b_rptr + 2101 taa->REMADDR_offset); 2102 /* initialize */ 2103 *sin = sin_null; 2104 sin->sin_family = AF_INET; 2105 sin->sin_addr.s_addr = 2106 V4_PART_OF_V6(udp->udp_v6dst); 2107 sin->sin_port = udp->udp_dstport; 2108 ackmp->b_wptr = (uchar_t *)&sin[1]; 2109 } 2110 } else { 2111 taa->LOCADDR_length = sizeof (sin6_t); 2112 sin6 = (sin6_t *)&taa[1]; 2113 /* Fill zeroes and then initialize non-zero fields */ 2114 *sin6 = sin6_null; 2115 sin6->sin6_family = AF_INET6; 2116 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2117 sin6->sin6_addr = udp->udp_v6src; 2118 } else { 2119 /* 2120 * UNSPECIFIED 2121 * udp_v6src is not set, we might be bound to 2122 * broadcast/multicast. Use udp_bound_v6src as 2123 * local address instead (that could 2124 * also still be UNSPECIFIED) 2125 */ 2126 sin6->sin6_addr = 2127 udp->udp_bound_v6src; 2128 } 2129 sin6->sin6_port = udp->udp_port; 2130 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2131 if (udp->udp_state == TS_DATA_XFER) { 2132 /* 2133 * connected, fill remote address too 2134 */ 2135 taa->REMADDR_length = sizeof (sin6_t); 2136 /* assumed 32-bit alignment */ 2137 taa->REMADDR_offset = taa->LOCADDR_offset + 2138 taa->LOCADDR_length; 2139 2140 sin6 = (sin6_t *)(ackmp->b_rptr + 2141 taa->REMADDR_offset); 2142 /* initialize */ 2143 *sin6 = sin6_null; 2144 sin6->sin6_family = AF_INET6; 2145 sin6->sin6_addr = udp->udp_v6dst; 2146 sin6->sin6_port = udp->udp_dstport; 2147 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2148 } 2149 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2150 } 2151 } 2152 rw_exit(&udp->udp_rwlock); 2153 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2154 qreply(q, ackmp); 2155 } 2156 2157 static void 2158 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2159 { 2160 if (udp->udp_family == AF_INET) { 2161 *tap = udp_g_t_info_ack_ipv4; 2162 } else { 2163 *tap = udp_g_t_info_ack_ipv6; 2164 } 2165 tap->CURRENT_state = udp->udp_state; 2166 tap->OPT_size = udp_max_optsize; 2167 } 2168 2169 /* 2170 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2171 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2172 * udp_g_t_info_ack. The current state of the stream is copied from 2173 * udp_state. 2174 */ 2175 static void 2176 udp_capability_req(queue_t *q, mblk_t *mp) 2177 { 2178 t_uscalar_t cap_bits1; 2179 struct T_capability_ack *tcap; 2180 udp_t *udp = Q_TO_UDP(q); 2181 2182 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2183 2184 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2185 mp->b_datap->db_type, T_CAPABILITY_ACK); 2186 if (!mp) 2187 return; 2188 2189 tcap = (struct T_capability_ack *)mp->b_rptr; 2190 tcap->CAP_bits1 = 0; 2191 2192 if (cap_bits1 & TC1_INFO) { 2193 udp_copy_info(&tcap->INFO_ack, udp); 2194 tcap->CAP_bits1 |= TC1_INFO; 2195 } 2196 2197 qreply(q, mp); 2198 } 2199 2200 /* 2201 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2202 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2203 * The current state of the stream is copied from udp_state. 2204 */ 2205 static void 2206 udp_info_req(queue_t *q, mblk_t *mp) 2207 { 2208 udp_t *udp = Q_TO_UDP(q); 2209 2210 /* Create a T_INFO_ACK message. */ 2211 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2212 T_INFO_ACK); 2213 if (!mp) 2214 return; 2215 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2216 qreply(q, mp); 2217 } 2218 2219 /* 2220 * IP recognizes seven kinds of bind requests: 2221 * 2222 * - A zero-length address binds only to the protocol number. 2223 * 2224 * - A 4-byte address is treated as a request to 2225 * validate that the address is a valid local IPv4 2226 * address, appropriate for an application to bind to. 2227 * IP does the verification, but does not make any note 2228 * of the address at this time. 2229 * 2230 * - A 16-byte address contains is treated as a request 2231 * to validate a local IPv6 address, as the 4-byte 2232 * address case above. 2233 * 2234 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2235 * use it for the inbound fanout of packets. 2236 * 2237 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2238 * use it for the inbound fanout of packets. 2239 * 2240 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2241 * information consisting of local and remote addresses 2242 * and ports. In this case, the addresses are both 2243 * validated as appropriate for this operation, and, if 2244 * so, the information is retained for use in the 2245 * inbound fanout. 2246 * 2247 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2248 * fanout information, like the 12-byte case above. 2249 * 2250 * IP will also fill in the IRE request mblk with information 2251 * regarding our peer. In all cases, we notify IP of our protocol 2252 * type by appending a single protocol byte to the bind request. 2253 */ 2254 static mblk_t * 2255 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2256 { 2257 char *cp; 2258 mblk_t *mp; 2259 struct T_bind_req *tbr; 2260 ipa_conn_t *ac; 2261 ipa6_conn_t *ac6; 2262 sin_t *sin; 2263 sin6_t *sin6; 2264 2265 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2266 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 2267 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2268 if (!mp) 2269 return (mp); 2270 mp->b_datap->db_type = M_PROTO; 2271 tbr = (struct T_bind_req *)mp->b_rptr; 2272 tbr->PRIM_type = bind_prim; 2273 tbr->ADDR_offset = sizeof (*tbr); 2274 tbr->CONIND_number = 0; 2275 tbr->ADDR_length = addr_length; 2276 cp = (char *)&tbr[1]; 2277 switch (addr_length) { 2278 case sizeof (ipa_conn_t): 2279 ASSERT(udp->udp_family == AF_INET); 2280 /* Append a request for an IRE */ 2281 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2282 if (!mp->b_cont) { 2283 freemsg(mp); 2284 return (NULL); 2285 } 2286 mp->b_cont->b_wptr += sizeof (ire_t); 2287 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2288 2289 /* cp known to be 32 bit aligned */ 2290 ac = (ipa_conn_t *)cp; 2291 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2292 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2293 ac->ac_fport = udp->udp_dstport; 2294 ac->ac_lport = udp->udp_port; 2295 break; 2296 2297 case sizeof (ipa6_conn_t): 2298 ASSERT(udp->udp_family == AF_INET6); 2299 /* Append a request for an IRE */ 2300 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2301 if (!mp->b_cont) { 2302 freemsg(mp); 2303 return (NULL); 2304 } 2305 mp->b_cont->b_wptr += sizeof (ire_t); 2306 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2307 2308 /* cp known to be 32 bit aligned */ 2309 ac6 = (ipa6_conn_t *)cp; 2310 ac6->ac6_laddr = udp->udp_v6src; 2311 ac6->ac6_faddr = udp->udp_v6dst; 2312 ac6->ac6_fport = udp->udp_dstport; 2313 ac6->ac6_lport = udp->udp_port; 2314 break; 2315 2316 case sizeof (sin_t): 2317 ASSERT(udp->udp_family == AF_INET); 2318 /* Append a request for an IRE */ 2319 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2320 if (!mp->b_cont) { 2321 freemsg(mp); 2322 return (NULL); 2323 } 2324 mp->b_cont->b_wptr += sizeof (ire_t); 2325 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2326 2327 sin = (sin_t *)cp; 2328 *sin = sin_null; 2329 sin->sin_family = AF_INET; 2330 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2331 sin->sin_port = udp->udp_port; 2332 break; 2333 2334 case sizeof (sin6_t): 2335 ASSERT(udp->udp_family == AF_INET6); 2336 /* Append a request for an IRE */ 2337 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2338 if (!mp->b_cont) { 2339 freemsg(mp); 2340 return (NULL); 2341 } 2342 mp->b_cont->b_wptr += sizeof (ire_t); 2343 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2344 2345 sin6 = (sin6_t *)cp; 2346 *sin6 = sin6_null; 2347 sin6->sin6_family = AF_INET6; 2348 sin6->sin6_addr = udp->udp_bound_v6src; 2349 sin6->sin6_port = udp->udp_port; 2350 break; 2351 } 2352 /* Add protocol number to end */ 2353 cp[addr_length] = (char)IPPROTO_UDP; 2354 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2355 return (mp); 2356 } 2357 2358 /* For /dev/udp aka AF_INET open */ 2359 static int 2360 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2361 { 2362 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 2363 } 2364 2365 /* For /dev/udp6 aka AF_INET6 open */ 2366 static int 2367 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2368 { 2369 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 2370 } 2371 2372 /* 2373 * This is the open routine for udp. It allocates a udp_t structure for 2374 * the stream and, on the first open of the module, creates an ND table. 2375 */ 2376 /*ARGSUSED2*/ 2377 static int 2378 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 2379 boolean_t isv6) 2380 { 2381 int err; 2382 udp_t *udp; 2383 conn_t *connp; 2384 dev_t conn_dev; 2385 zoneid_t zoneid; 2386 netstack_t *ns; 2387 udp_stack_t *us; 2388 2389 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2390 2391 /* If the stream is already open, return immediately. */ 2392 if (q->q_ptr != NULL) 2393 return (0); 2394 2395 if (sflag == MODOPEN) 2396 return (EINVAL); 2397 2398 ns = netstack_find_by_cred(credp); 2399 ASSERT(ns != NULL); 2400 us = ns->netstack_udp; 2401 ASSERT(us != NULL); 2402 2403 /* 2404 * For exclusive stacks we set the zoneid to zero 2405 * to make UDP operate as if in the global zone. 2406 */ 2407 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 2408 zoneid = GLOBAL_ZONEID; 2409 else 2410 zoneid = crgetzoneid(credp); 2411 2412 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 2413 netstack_rele(ns); 2414 return (EBUSY); 2415 } 2416 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 2417 2418 connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); 2419 connp->conn_dev = conn_dev; 2420 udp = connp->conn_udp; 2421 2422 /* 2423 * ipcl_conn_create did a netstack_hold. Undo the hold that was 2424 * done by netstack_find_by_cred() 2425 */ 2426 netstack_rele(ns); 2427 2428 /* 2429 * Initialize the udp_t structure for this stream. 2430 */ 2431 q->q_ptr = connp; 2432 WR(q)->q_ptr = connp; 2433 connp->conn_rq = q; 2434 connp->conn_wq = WR(q); 2435 2436 rw_enter(&udp->udp_rwlock, RW_WRITER); 2437 ASSERT(connp->conn_ulp == IPPROTO_UDP); 2438 ASSERT(connp->conn_udp == udp); 2439 ASSERT(udp->udp_connp == connp); 2440 2441 /* Set the initial state of the stream and the privilege status. */ 2442 udp->udp_state = TS_UNBND; 2443 if (isv6) { 2444 udp->udp_family = AF_INET6; 2445 udp->udp_ipversion = IPV6_VERSION; 2446 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2447 udp->udp_ttl = us->us_ipv6_hoplimit; 2448 connp->conn_af_isv6 = B_TRUE; 2449 connp->conn_flags |= IPCL_ISV6; 2450 } else { 2451 udp->udp_family = AF_INET; 2452 udp->udp_ipversion = IPV4_VERSION; 2453 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2454 udp->udp_ttl = us->us_ipv4_ttl; 2455 connp->conn_af_isv6 = B_FALSE; 2456 connp->conn_flags &= ~IPCL_ISV6; 2457 } 2458 2459 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2460 udp->udp_pending_op = -1; 2461 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2462 connp->conn_zoneid = zoneid; 2463 2464 udp->udp_open_time = lbolt64; 2465 udp->udp_open_pid = curproc->p_pid; 2466 2467 /* 2468 * If the caller has the process-wide flag set, then default to MAC 2469 * exempt mode. This allows read-down to unlabeled hosts. 2470 */ 2471 if (getpflags(NET_MAC_AWARE, credp) != 0) 2472 udp->udp_mac_exempt = B_TRUE; 2473 2474 if (flag & SO_SOCKSTR) { 2475 connp->conn_flags |= IPCL_SOCKET; 2476 udp->udp_issocket = B_TRUE; 2477 udp->udp_direct_sockfs = B_TRUE; 2478 } 2479 2480 connp->conn_ulp_labeled = is_system_labeled(); 2481 2482 udp->udp_us = us; 2483 2484 q->q_hiwat = us->us_recv_hiwat; 2485 WR(q)->q_hiwat = us->us_xmit_hiwat; 2486 WR(q)->q_lowat = us->us_xmit_lowat; 2487 2488 connp->conn_recv = udp_input; 2489 crhold(credp); 2490 connp->conn_cred = credp; 2491 2492 mutex_enter(&connp->conn_lock); 2493 connp->conn_state_flags &= ~CONN_INCIPIENT; 2494 mutex_exit(&connp->conn_lock); 2495 2496 qprocson(q); 2497 2498 if (udp->udp_family == AF_INET6) { 2499 /* Build initial header template for transmit */ 2500 if ((err = udp_build_hdrs(udp)) != 0) { 2501 rw_exit(&udp->udp_rwlock); 2502 qprocsoff(q); 2503 ipcl_conn_destroy(connp); 2504 return (err); 2505 } 2506 } 2507 rw_exit(&udp->udp_rwlock); 2508 2509 /* Set the Stream head write offset and high watermark. */ 2510 (void) mi_set_sth_wroff(q, 2511 udp->udp_max_hdr_len + us->us_wroff_extra); 2512 (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); 2513 2514 return (0); 2515 } 2516 2517 /* 2518 * Which UDP options OK to set through T_UNITDATA_REQ... 2519 */ 2520 /* ARGSUSED */ 2521 static boolean_t 2522 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2523 { 2524 return (B_TRUE); 2525 } 2526 2527 /* 2528 * This routine gets default values of certain options whose default 2529 * values are maintained by protcol specific code 2530 */ 2531 /* ARGSUSED */ 2532 int 2533 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2534 { 2535 udp_t *udp = Q_TO_UDP(q); 2536 udp_stack_t *us = udp->udp_us; 2537 int *i1 = (int *)ptr; 2538 2539 switch (level) { 2540 case IPPROTO_IP: 2541 switch (name) { 2542 case IP_MULTICAST_TTL: 2543 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2544 return (sizeof (uchar_t)); 2545 case IP_MULTICAST_LOOP: 2546 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2547 return (sizeof (uchar_t)); 2548 } 2549 break; 2550 case IPPROTO_IPV6: 2551 switch (name) { 2552 case IPV6_MULTICAST_HOPS: 2553 *i1 = IP_DEFAULT_MULTICAST_TTL; 2554 return (sizeof (int)); 2555 case IPV6_MULTICAST_LOOP: 2556 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2557 return (sizeof (int)); 2558 case IPV6_UNICAST_HOPS: 2559 *i1 = us->us_ipv6_hoplimit; 2560 return (sizeof (int)); 2561 } 2562 break; 2563 } 2564 return (-1); 2565 } 2566 2567 /* 2568 * This routine retrieves the current status of socket options. 2569 * It returns the size of the option retrieved. 2570 */ 2571 int 2572 udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2573 { 2574 int *i1 = (int *)ptr; 2575 conn_t *connp; 2576 udp_t *udp; 2577 ip6_pkt_t *ipp; 2578 int len; 2579 udp_stack_t *us; 2580 2581 connp = Q_TO_CONN(q); 2582 udp = connp->conn_udp; 2583 ipp = &udp->udp_sticky_ipp; 2584 us = udp->udp_us; 2585 2586 switch (level) { 2587 case SOL_SOCKET: 2588 switch (name) { 2589 case SO_DEBUG: 2590 *i1 = udp->udp_debug; 2591 break; /* goto sizeof (int) option return */ 2592 case SO_REUSEADDR: 2593 *i1 = udp->udp_reuseaddr; 2594 break; /* goto sizeof (int) option return */ 2595 case SO_TYPE: 2596 *i1 = SOCK_DGRAM; 2597 break; /* goto sizeof (int) option return */ 2598 2599 /* 2600 * The following three items are available here, 2601 * but are only meaningful to IP. 2602 */ 2603 case SO_DONTROUTE: 2604 *i1 = udp->udp_dontroute; 2605 break; /* goto sizeof (int) option return */ 2606 case SO_USELOOPBACK: 2607 *i1 = udp->udp_useloopback; 2608 break; /* goto sizeof (int) option return */ 2609 case SO_BROADCAST: 2610 *i1 = udp->udp_broadcast; 2611 break; /* goto sizeof (int) option return */ 2612 2613 case SO_SNDBUF: 2614 *i1 = q->q_hiwat; 2615 break; /* goto sizeof (int) option return */ 2616 case SO_RCVBUF: 2617 *i1 = RD(q)->q_hiwat; 2618 break; /* goto sizeof (int) option return */ 2619 case SO_DGRAM_ERRIND: 2620 *i1 = udp->udp_dgram_errind; 2621 break; /* goto sizeof (int) option return */ 2622 case SO_RECVUCRED: 2623 *i1 = udp->udp_recvucred; 2624 break; /* goto sizeof (int) option return */ 2625 case SO_TIMESTAMP: 2626 *i1 = udp->udp_timestamp; 2627 break; /* goto sizeof (int) option return */ 2628 case SO_ANON_MLP: 2629 *i1 = udp->udp_anon_mlp; 2630 break; /* goto sizeof (int) option return */ 2631 case SO_MAC_EXEMPT: 2632 *i1 = udp->udp_mac_exempt; 2633 break; /* goto sizeof (int) option return */ 2634 case SO_ALLZONES: 2635 *i1 = connp->conn_allzones; 2636 break; /* goto sizeof (int) option return */ 2637 case SO_EXCLBIND: 2638 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2639 break; 2640 case SO_PROTOTYPE: 2641 *i1 = IPPROTO_UDP; 2642 break; 2643 case SO_DOMAIN: 2644 *i1 = udp->udp_family; 2645 break; 2646 default: 2647 return (-1); 2648 } 2649 break; 2650 case IPPROTO_IP: 2651 if (udp->udp_family != AF_INET) 2652 return (-1); 2653 switch (name) { 2654 case IP_OPTIONS: 2655 case T_IP_OPTIONS: 2656 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2657 if (len > 0) { 2658 bcopy(udp->udp_ip_rcv_options + 2659 udp->udp_label_len, ptr, len); 2660 } 2661 return (len); 2662 case IP_TOS: 2663 case T_IP_TOS: 2664 *i1 = (int)udp->udp_type_of_service; 2665 break; /* goto sizeof (int) option return */ 2666 case IP_TTL: 2667 *i1 = (int)udp->udp_ttl; 2668 break; /* goto sizeof (int) option return */ 2669 case IP_DHCPINIT_IF: 2670 return (-EINVAL); 2671 case IP_NEXTHOP: 2672 case IP_RECVPKTINFO: 2673 /* 2674 * This also handles IP_PKTINFO. 2675 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2676 * Differentiation is based on the size of the argument 2677 * passed in. 2678 * This option is handled in IP which will return an 2679 * error for IP_PKTINFO as it's not supported as a 2680 * sticky option. 2681 */ 2682 return (-EINVAL); 2683 case IP_MULTICAST_IF: 2684 /* 0 address if not set */ 2685 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2686 return (sizeof (ipaddr_t)); 2687 case IP_MULTICAST_TTL: 2688 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2689 return (sizeof (uchar_t)); 2690 case IP_MULTICAST_LOOP: 2691 *ptr = connp->conn_multicast_loop; 2692 return (sizeof (uint8_t)); 2693 case IP_RECVOPTS: 2694 *i1 = udp->udp_recvopts; 2695 break; /* goto sizeof (int) option return */ 2696 case IP_RECVDSTADDR: 2697 *i1 = udp->udp_recvdstaddr; 2698 break; /* goto sizeof (int) option return */ 2699 case IP_RECVIF: 2700 *i1 = udp->udp_recvif; 2701 break; /* goto sizeof (int) option return */ 2702 case IP_RECVSLLA: 2703 *i1 = udp->udp_recvslla; 2704 break; /* goto sizeof (int) option return */ 2705 case IP_RECVTTL: 2706 *i1 = udp->udp_recvttl; 2707 break; /* goto sizeof (int) option return */ 2708 case IP_ADD_MEMBERSHIP: 2709 case IP_DROP_MEMBERSHIP: 2710 case IP_BLOCK_SOURCE: 2711 case IP_UNBLOCK_SOURCE: 2712 case IP_ADD_SOURCE_MEMBERSHIP: 2713 case IP_DROP_SOURCE_MEMBERSHIP: 2714 case MCAST_JOIN_GROUP: 2715 case MCAST_LEAVE_GROUP: 2716 case MCAST_BLOCK_SOURCE: 2717 case MCAST_UNBLOCK_SOURCE: 2718 case MCAST_JOIN_SOURCE_GROUP: 2719 case MCAST_LEAVE_SOURCE_GROUP: 2720 case IP_DONTFAILOVER_IF: 2721 /* cannot "get" the value for these */ 2722 return (-1); 2723 case IP_BOUND_IF: 2724 /* Zero if not set */ 2725 *i1 = udp->udp_bound_if; 2726 break; /* goto sizeof (int) option return */ 2727 case IP_UNSPEC_SRC: 2728 *i1 = udp->udp_unspec_source; 2729 break; /* goto sizeof (int) option return */ 2730 case IP_BROADCAST_TTL: 2731 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2732 return (sizeof (uchar_t)); 2733 default: 2734 return (-1); 2735 } 2736 break; 2737 case IPPROTO_IPV6: 2738 if (udp->udp_family != AF_INET6) 2739 return (-1); 2740 switch (name) { 2741 case IPV6_UNICAST_HOPS: 2742 *i1 = (unsigned int)udp->udp_ttl; 2743 break; /* goto sizeof (int) option return */ 2744 case IPV6_MULTICAST_IF: 2745 /* 0 index if not set */ 2746 *i1 = udp->udp_multicast_if_index; 2747 break; /* goto sizeof (int) option return */ 2748 case IPV6_MULTICAST_HOPS: 2749 *i1 = udp->udp_multicast_ttl; 2750 break; /* goto sizeof (int) option return */ 2751 case IPV6_MULTICAST_LOOP: 2752 *i1 = connp->conn_multicast_loop; 2753 break; /* goto sizeof (int) option return */ 2754 case IPV6_JOIN_GROUP: 2755 case IPV6_LEAVE_GROUP: 2756 case MCAST_JOIN_GROUP: 2757 case MCAST_LEAVE_GROUP: 2758 case MCAST_BLOCK_SOURCE: 2759 case MCAST_UNBLOCK_SOURCE: 2760 case MCAST_JOIN_SOURCE_GROUP: 2761 case MCAST_LEAVE_SOURCE_GROUP: 2762 /* cannot "get" the value for these */ 2763 return (-1); 2764 case IPV6_BOUND_IF: 2765 /* Zero if not set */ 2766 *i1 = udp->udp_bound_if; 2767 break; /* goto sizeof (int) option return */ 2768 case IPV6_UNSPEC_SRC: 2769 *i1 = udp->udp_unspec_source; 2770 break; /* goto sizeof (int) option return */ 2771 case IPV6_RECVPKTINFO: 2772 *i1 = udp->udp_ip_recvpktinfo; 2773 break; /* goto sizeof (int) option return */ 2774 case IPV6_RECVTCLASS: 2775 *i1 = udp->udp_ipv6_recvtclass; 2776 break; /* goto sizeof (int) option return */ 2777 case IPV6_RECVPATHMTU: 2778 *i1 = udp->udp_ipv6_recvpathmtu; 2779 break; /* goto sizeof (int) option return */ 2780 case IPV6_RECVHOPLIMIT: 2781 *i1 = udp->udp_ipv6_recvhoplimit; 2782 break; /* goto sizeof (int) option return */ 2783 case IPV6_RECVHOPOPTS: 2784 *i1 = udp->udp_ipv6_recvhopopts; 2785 break; /* goto sizeof (int) option return */ 2786 case IPV6_RECVDSTOPTS: 2787 *i1 = udp->udp_ipv6_recvdstopts; 2788 break; /* goto sizeof (int) option return */ 2789 case _OLD_IPV6_RECVDSTOPTS: 2790 *i1 = udp->udp_old_ipv6_recvdstopts; 2791 break; /* goto sizeof (int) option return */ 2792 case IPV6_RECVRTHDRDSTOPTS: 2793 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2794 break; /* goto sizeof (int) option return */ 2795 case IPV6_RECVRTHDR: 2796 *i1 = udp->udp_ipv6_recvrthdr; 2797 break; /* goto sizeof (int) option return */ 2798 case IPV6_PKTINFO: { 2799 /* XXX assumes that caller has room for max size! */ 2800 struct in6_pktinfo *pkti; 2801 2802 pkti = (struct in6_pktinfo *)ptr; 2803 if (ipp->ipp_fields & IPPF_IFINDEX) 2804 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2805 else 2806 pkti->ipi6_ifindex = 0; 2807 if (ipp->ipp_fields & IPPF_ADDR) 2808 pkti->ipi6_addr = ipp->ipp_addr; 2809 else 2810 pkti->ipi6_addr = ipv6_all_zeros; 2811 return (sizeof (struct in6_pktinfo)); 2812 } 2813 case IPV6_TCLASS: 2814 if (ipp->ipp_fields & IPPF_TCLASS) 2815 *i1 = ipp->ipp_tclass; 2816 else 2817 *i1 = IPV6_FLOW_TCLASS( 2818 IPV6_DEFAULT_VERS_AND_FLOW); 2819 break; /* goto sizeof (int) option return */ 2820 case IPV6_NEXTHOP: { 2821 sin6_t *sin6 = (sin6_t *)ptr; 2822 2823 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2824 return (0); 2825 *sin6 = sin6_null; 2826 sin6->sin6_family = AF_INET6; 2827 sin6->sin6_addr = ipp->ipp_nexthop; 2828 return (sizeof (sin6_t)); 2829 } 2830 case IPV6_HOPOPTS: 2831 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2832 return (0); 2833 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2834 return (0); 2835 /* 2836 * The cipso/label option is added by kernel. 2837 * User is not usually aware of this option. 2838 * We copy out the hbh opt after the label option. 2839 */ 2840 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2841 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2842 if (udp->udp_label_len_v6 > 0) { 2843 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2844 ptr[1] = (ipp->ipp_hopoptslen - 2845 udp->udp_label_len_v6 + 7) / 8 - 1; 2846 } 2847 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2848 case IPV6_RTHDRDSTOPTS: 2849 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2850 return (0); 2851 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2852 return (ipp->ipp_rtdstoptslen); 2853 case IPV6_RTHDR: 2854 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2855 return (0); 2856 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2857 return (ipp->ipp_rthdrlen); 2858 case IPV6_DSTOPTS: 2859 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2860 return (0); 2861 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2862 return (ipp->ipp_dstoptslen); 2863 case IPV6_PATHMTU: 2864 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2865 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2866 us->us_netstack)); 2867 default: 2868 return (-1); 2869 } 2870 break; 2871 case IPPROTO_UDP: 2872 switch (name) { 2873 case UDP_ANONPRIVBIND: 2874 *i1 = udp->udp_anon_priv_bind; 2875 break; 2876 case UDP_EXCLBIND: 2877 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2878 break; 2879 case UDP_RCVHDR: 2880 *i1 = udp->udp_rcvhdr ? 1 : 0; 2881 break; 2882 case UDP_NAT_T_ENDPOINT: 2883 *i1 = udp->udp_nat_t_endpoint; 2884 break; 2885 default: 2886 return (-1); 2887 } 2888 break; 2889 default: 2890 return (-1); 2891 } 2892 return (sizeof (int)); 2893 } 2894 2895 int 2896 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2897 { 2898 udp_t *udp; 2899 int err; 2900 2901 udp = Q_TO_UDP(q); 2902 2903 rw_enter(&udp->udp_rwlock, RW_READER); 2904 err = udp_opt_get_locked(q, level, name, ptr); 2905 rw_exit(&udp->udp_rwlock); 2906 return (err); 2907 } 2908 2909 /* 2910 * This routine sets socket options. 2911 */ 2912 /* ARGSUSED */ 2913 int 2914 udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, 2915 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 2916 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2917 { 2918 udpattrs_t *attrs = thisdg_attrs; 2919 int *i1 = (int *)invalp; 2920 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2921 boolean_t checkonly; 2922 int error; 2923 conn_t *connp; 2924 udp_t *udp; 2925 uint_t newlen; 2926 udp_stack_t *us; 2927 size_t sth_wroff; 2928 2929 connp = Q_TO_CONN(q); 2930 udp = connp->conn_udp; 2931 us = udp->udp_us; 2932 2933 switch (optset_context) { 2934 case SETFN_OPTCOM_CHECKONLY: 2935 checkonly = B_TRUE; 2936 /* 2937 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2938 * inlen != 0 implies value supplied and 2939 * we have to "pretend" to set it. 2940 * inlen == 0 implies that there is no 2941 * value part in T_CHECK request and just validation 2942 * done elsewhere should be enough, we just return here. 2943 */ 2944 if (inlen == 0) { 2945 *outlenp = 0; 2946 return (0); 2947 } 2948 break; 2949 case SETFN_OPTCOM_NEGOTIATE: 2950 checkonly = B_FALSE; 2951 break; 2952 case SETFN_UD_NEGOTIATE: 2953 case SETFN_CONN_NEGOTIATE: 2954 checkonly = B_FALSE; 2955 /* 2956 * Negotiating local and "association-related" options 2957 * through T_UNITDATA_REQ. 2958 * 2959 * Following routine can filter out ones we do not 2960 * want to be "set" this way. 2961 */ 2962 if (!udp_opt_allow_udr_set(level, name)) { 2963 *outlenp = 0; 2964 return (EINVAL); 2965 } 2966 break; 2967 default: 2968 /* 2969 * We should never get here 2970 */ 2971 *outlenp = 0; 2972 return (EINVAL); 2973 } 2974 2975 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2976 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2977 2978 /* 2979 * For fixed length options, no sanity check 2980 * of passed in length is done. It is assumed *_optcom_req() 2981 * routines do the right thing. 2982 */ 2983 2984 switch (level) { 2985 case SOL_SOCKET: 2986 switch (name) { 2987 case SO_REUSEADDR: 2988 if (!checkonly) 2989 udp->udp_reuseaddr = onoff; 2990 break; 2991 case SO_DEBUG: 2992 if (!checkonly) 2993 udp->udp_debug = onoff; 2994 break; 2995 /* 2996 * The following three items are available here, 2997 * but are only meaningful to IP. 2998 */ 2999 case SO_DONTROUTE: 3000 if (!checkonly) 3001 udp->udp_dontroute = onoff; 3002 break; 3003 case SO_USELOOPBACK: 3004 if (!checkonly) 3005 udp->udp_useloopback = onoff; 3006 break; 3007 case SO_BROADCAST: 3008 if (!checkonly) 3009 udp->udp_broadcast = onoff; 3010 break; 3011 3012 case SO_SNDBUF: 3013 if (*i1 > us->us_max_buf) { 3014 *outlenp = 0; 3015 return (ENOBUFS); 3016 } 3017 if (!checkonly) { 3018 q->q_hiwat = *i1; 3019 } 3020 break; 3021 case SO_RCVBUF: 3022 if (*i1 > us->us_max_buf) { 3023 *outlenp = 0; 3024 return (ENOBUFS); 3025 } 3026 if (!checkonly) { 3027 RD(q)->q_hiwat = *i1; 3028 rw_exit(&udp->udp_rwlock); 3029 (void) mi_set_sth_hiwat(RD(q), 3030 udp_set_rcv_hiwat(udp, *i1)); 3031 rw_enter(&udp->udp_rwlock, RW_WRITER); 3032 } 3033 break; 3034 case SO_DGRAM_ERRIND: 3035 if (!checkonly) 3036 udp->udp_dgram_errind = onoff; 3037 break; 3038 case SO_RECVUCRED: 3039 if (!checkonly) 3040 udp->udp_recvucred = onoff; 3041 break; 3042 case SO_ALLZONES: 3043 /* 3044 * "soft" error (negative) 3045 * option not handled at this level 3046 * Do not modify *outlenp. 3047 */ 3048 return (-EINVAL); 3049 case SO_TIMESTAMP: 3050 if (!checkonly) 3051 udp->udp_timestamp = onoff; 3052 break; 3053 case SO_ANON_MLP: 3054 if (!checkonly) 3055 udp->udp_anon_mlp = onoff; 3056 break; 3057 case SO_MAC_EXEMPT: 3058 if (secpolicy_net_mac_aware(cr) != 0 || 3059 udp->udp_state != TS_UNBND) 3060 return (EACCES); 3061 if (!checkonly) 3062 udp->udp_mac_exempt = onoff; 3063 break; 3064 case SCM_UCRED: { 3065 struct ucred_s *ucr; 3066 cred_t *cr, *newcr; 3067 ts_label_t *tsl; 3068 3069 /* 3070 * Only sockets that have proper privileges and are 3071 * bound to MLPs will have any other value here, so 3072 * this implicitly tests for privilege to set label. 3073 */ 3074 if (connp->conn_mlp_type == mlptSingle) 3075 break; 3076 ucr = (struct ucred_s *)invalp; 3077 if (inlen != ucredsize || 3078 ucr->uc_labeloff < sizeof (*ucr) || 3079 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3080 return (EINVAL); 3081 if (!checkonly) { 3082 mblk_t *mb; 3083 3084 if (attrs == NULL || 3085 (mb = attrs->udpattr_mb) == NULL) 3086 return (EINVAL); 3087 if ((cr = DB_CRED(mb)) == NULL) 3088 cr = udp->udp_connp->conn_cred; 3089 ASSERT(cr != NULL); 3090 if ((tsl = crgetlabel(cr)) == NULL) 3091 return (EINVAL); 3092 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3093 tsl->tsl_doi, KM_NOSLEEP); 3094 if (newcr == NULL) 3095 return (ENOSR); 3096 mblk_setcred(mb, newcr); 3097 attrs->udpattr_credset = B_TRUE; 3098 crfree(newcr); 3099 } 3100 break; 3101 } 3102 case SO_EXCLBIND: 3103 if (!checkonly) 3104 udp->udp_exclbind = onoff; 3105 break; 3106 default: 3107 *outlenp = 0; 3108 return (EINVAL); 3109 } 3110 break; 3111 case IPPROTO_IP: 3112 if (udp->udp_family != AF_INET) { 3113 *outlenp = 0; 3114 return (ENOPROTOOPT); 3115 } 3116 switch (name) { 3117 case IP_OPTIONS: 3118 case T_IP_OPTIONS: 3119 /* Save options for use by IP. */ 3120 newlen = inlen + udp->udp_label_len; 3121 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3122 *outlenp = 0; 3123 return (EINVAL); 3124 } 3125 if (checkonly) 3126 break; 3127 3128 /* 3129 * Update the stored options taking into account 3130 * any CIPSO option which we should not overwrite. 3131 */ 3132 if (!tsol_option_set(&udp->udp_ip_snd_options, 3133 &udp->udp_ip_snd_options_len, 3134 udp->udp_label_len, invalp, inlen)) { 3135 *outlenp = 0; 3136 return (ENOMEM); 3137 } 3138 3139 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3140 UDPH_SIZE + udp->udp_ip_snd_options_len; 3141 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3142 rw_exit(&udp->udp_rwlock); 3143 (void) mi_set_sth_wroff(RD(q), sth_wroff); 3144 rw_enter(&udp->udp_rwlock, RW_WRITER); 3145 break; 3146 3147 case IP_TTL: 3148 if (!checkonly) { 3149 udp->udp_ttl = (uchar_t)*i1; 3150 } 3151 break; 3152 case IP_TOS: 3153 case T_IP_TOS: 3154 if (!checkonly) { 3155 udp->udp_type_of_service = (uchar_t)*i1; 3156 } 3157 break; 3158 case IP_MULTICAST_IF: { 3159 /* 3160 * TODO should check OPTMGMT reply and undo this if 3161 * there is an error. 3162 */ 3163 struct in_addr *inap = (struct in_addr *)invalp; 3164 if (!checkonly) { 3165 udp->udp_multicast_if_addr = 3166 inap->s_addr; 3167 } 3168 break; 3169 } 3170 case IP_MULTICAST_TTL: 3171 if (!checkonly) 3172 udp->udp_multicast_ttl = *invalp; 3173 break; 3174 case IP_MULTICAST_LOOP: 3175 if (!checkonly) 3176 connp->conn_multicast_loop = *invalp; 3177 break; 3178 case IP_RECVOPTS: 3179 if (!checkonly) 3180 udp->udp_recvopts = onoff; 3181 break; 3182 case IP_RECVDSTADDR: 3183 if (!checkonly) 3184 udp->udp_recvdstaddr = onoff; 3185 break; 3186 case IP_RECVIF: 3187 if (!checkonly) 3188 udp->udp_recvif = onoff; 3189 break; 3190 case IP_RECVSLLA: 3191 if (!checkonly) 3192 udp->udp_recvslla = onoff; 3193 break; 3194 case IP_RECVTTL: 3195 if (!checkonly) 3196 udp->udp_recvttl = onoff; 3197 break; 3198 case IP_PKTINFO: { 3199 /* 3200 * This also handles IP_RECVPKTINFO. 3201 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3202 * Differentiation is based on the size of the 3203 * argument passed in. 3204 */ 3205 struct in_pktinfo *pktinfop; 3206 ip4_pkt_t *attr_pktinfop; 3207 3208 if (checkonly) 3209 break; 3210 3211 if (inlen == sizeof (int)) { 3212 /* 3213 * This is IP_RECVPKTINFO option. 3214 * Keep a local copy of whether this option is 3215 * set or not and pass it down to IP for 3216 * processing. 3217 */ 3218 3219 udp->udp_ip_recvpktinfo = onoff; 3220 return (-EINVAL); 3221 } 3222 3223 if (attrs == NULL || 3224 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3225 /* 3226 * sticky option or no buffer to return 3227 * the results. 3228 */ 3229 return (EINVAL); 3230 } 3231 3232 if (inlen != sizeof (struct in_pktinfo)) 3233 return (EINVAL); 3234 3235 pktinfop = (struct in_pktinfo *)invalp; 3236 3237 /* 3238 * At least one of the values should be specified 3239 */ 3240 if (pktinfop->ipi_ifindex == 0 && 3241 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3242 return (EINVAL); 3243 } 3244 3245 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3246 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3247 3248 break; 3249 } 3250 case IP_ADD_MEMBERSHIP: 3251 case IP_DROP_MEMBERSHIP: 3252 case IP_BLOCK_SOURCE: 3253 case IP_UNBLOCK_SOURCE: 3254 case IP_ADD_SOURCE_MEMBERSHIP: 3255 case IP_DROP_SOURCE_MEMBERSHIP: 3256 case MCAST_JOIN_GROUP: 3257 case MCAST_LEAVE_GROUP: 3258 case MCAST_BLOCK_SOURCE: 3259 case MCAST_UNBLOCK_SOURCE: 3260 case MCAST_JOIN_SOURCE_GROUP: 3261 case MCAST_LEAVE_SOURCE_GROUP: 3262 case IP_SEC_OPT: 3263 case IP_NEXTHOP: 3264 case IP_DHCPINIT_IF: 3265 /* 3266 * "soft" error (negative) 3267 * option not handled at this level 3268 * Do not modify *outlenp. 3269 */ 3270 return (-EINVAL); 3271 case IP_BOUND_IF: 3272 if (!checkonly) 3273 udp->udp_bound_if = *i1; 3274 break; 3275 case IP_UNSPEC_SRC: 3276 if (!checkonly) 3277 udp->udp_unspec_source = onoff; 3278 break; 3279 case IP_BROADCAST_TTL: 3280 if (!checkonly) 3281 connp->conn_broadcast_ttl = *invalp; 3282 break; 3283 default: 3284 *outlenp = 0; 3285 return (EINVAL); 3286 } 3287 break; 3288 case IPPROTO_IPV6: { 3289 ip6_pkt_t *ipp; 3290 boolean_t sticky; 3291 3292 if (udp->udp_family != AF_INET6) { 3293 *outlenp = 0; 3294 return (ENOPROTOOPT); 3295 } 3296 /* 3297 * Deal with both sticky options and ancillary data 3298 */ 3299 sticky = B_FALSE; 3300 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3301 NULL) { 3302 /* sticky options, or none */ 3303 ipp = &udp->udp_sticky_ipp; 3304 sticky = B_TRUE; 3305 } 3306 3307 switch (name) { 3308 case IPV6_MULTICAST_IF: 3309 if (!checkonly) 3310 udp->udp_multicast_if_index = *i1; 3311 break; 3312 case IPV6_UNICAST_HOPS: 3313 /* -1 means use default */ 3314 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3315 *outlenp = 0; 3316 return (EINVAL); 3317 } 3318 if (!checkonly) { 3319 if (*i1 == -1) { 3320 udp->udp_ttl = ipp->ipp_unicast_hops = 3321 us->us_ipv6_hoplimit; 3322 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3323 /* Pass modified value to IP. */ 3324 *i1 = udp->udp_ttl; 3325 } else { 3326 udp->udp_ttl = ipp->ipp_unicast_hops = 3327 (uint8_t)*i1; 3328 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3329 } 3330 /* Rebuild the header template */ 3331 error = udp_build_hdrs(udp); 3332 if (error != 0) { 3333 *outlenp = 0; 3334 return (error); 3335 } 3336 } 3337 break; 3338 case IPV6_MULTICAST_HOPS: 3339 /* -1 means use default */ 3340 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3341 *outlenp = 0; 3342 return (EINVAL); 3343 } 3344 if (!checkonly) { 3345 if (*i1 == -1) { 3346 udp->udp_multicast_ttl = 3347 ipp->ipp_multicast_hops = 3348 IP_DEFAULT_MULTICAST_TTL; 3349 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3350 /* Pass modified value to IP. */ 3351 *i1 = udp->udp_multicast_ttl; 3352 } else { 3353 udp->udp_multicast_ttl = 3354 ipp->ipp_multicast_hops = 3355 (uint8_t)*i1; 3356 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3357 } 3358 } 3359 break; 3360 case IPV6_MULTICAST_LOOP: 3361 if (*i1 != 0 && *i1 != 1) { 3362 *outlenp = 0; 3363 return (EINVAL); 3364 } 3365 if (!checkonly) 3366 connp->conn_multicast_loop = *i1; 3367 break; 3368 case IPV6_JOIN_GROUP: 3369 case IPV6_LEAVE_GROUP: 3370 case MCAST_JOIN_GROUP: 3371 case MCAST_LEAVE_GROUP: 3372 case MCAST_BLOCK_SOURCE: 3373 case MCAST_UNBLOCK_SOURCE: 3374 case MCAST_JOIN_SOURCE_GROUP: 3375 case MCAST_LEAVE_SOURCE_GROUP: 3376 /* 3377 * "soft" error (negative) 3378 * option not handled at this level 3379 * Note: Do not modify *outlenp 3380 */ 3381 return (-EINVAL); 3382 case IPV6_BOUND_IF: 3383 if (!checkonly) 3384 udp->udp_bound_if = *i1; 3385 break; 3386 case IPV6_UNSPEC_SRC: 3387 if (!checkonly) 3388 udp->udp_unspec_source = onoff; 3389 break; 3390 /* 3391 * Set boolean switches for ancillary data delivery 3392 */ 3393 case IPV6_RECVPKTINFO: 3394 if (!checkonly) 3395 udp->udp_ip_recvpktinfo = onoff; 3396 break; 3397 case IPV6_RECVTCLASS: 3398 if (!checkonly) { 3399 udp->udp_ipv6_recvtclass = onoff; 3400 } 3401 break; 3402 case IPV6_RECVPATHMTU: 3403 if (!checkonly) { 3404 udp->udp_ipv6_recvpathmtu = onoff; 3405 } 3406 break; 3407 case IPV6_RECVHOPLIMIT: 3408 if (!checkonly) 3409 udp->udp_ipv6_recvhoplimit = onoff; 3410 break; 3411 case IPV6_RECVHOPOPTS: 3412 if (!checkonly) 3413 udp->udp_ipv6_recvhopopts = onoff; 3414 break; 3415 case IPV6_RECVDSTOPTS: 3416 if (!checkonly) 3417 udp->udp_ipv6_recvdstopts = onoff; 3418 break; 3419 case _OLD_IPV6_RECVDSTOPTS: 3420 if (!checkonly) 3421 udp->udp_old_ipv6_recvdstopts = onoff; 3422 break; 3423 case IPV6_RECVRTHDRDSTOPTS: 3424 if (!checkonly) 3425 udp->udp_ipv6_recvrthdrdstopts = onoff; 3426 break; 3427 case IPV6_RECVRTHDR: 3428 if (!checkonly) 3429 udp->udp_ipv6_recvrthdr = onoff; 3430 break; 3431 /* 3432 * Set sticky options or ancillary data. 3433 * If sticky options, (re)build any extension headers 3434 * that might be needed as a result. 3435 */ 3436 case IPV6_PKTINFO: 3437 /* 3438 * The source address and ifindex are verified 3439 * in ip_opt_set(). For ancillary data the 3440 * source address is checked in ip_wput_v6. 3441 */ 3442 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3443 return (EINVAL); 3444 if (checkonly) 3445 break; 3446 3447 if (inlen == 0) { 3448 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3449 ipp->ipp_sticky_ignored |= 3450 (IPPF_IFINDEX|IPPF_ADDR); 3451 } else { 3452 struct in6_pktinfo *pkti; 3453 3454 pkti = (struct in6_pktinfo *)invalp; 3455 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3456 ipp->ipp_addr = pkti->ipi6_addr; 3457 if (ipp->ipp_ifindex != 0) 3458 ipp->ipp_fields |= IPPF_IFINDEX; 3459 else 3460 ipp->ipp_fields &= ~IPPF_IFINDEX; 3461 if (!IN6_IS_ADDR_UNSPECIFIED( 3462 &ipp->ipp_addr)) 3463 ipp->ipp_fields |= IPPF_ADDR; 3464 else 3465 ipp->ipp_fields &= ~IPPF_ADDR; 3466 } 3467 if (sticky) { 3468 error = udp_build_hdrs(udp); 3469 if (error != 0) 3470 return (error); 3471 } 3472 break; 3473 case IPV6_HOPLIMIT: 3474 if (sticky) 3475 return (EINVAL); 3476 if (inlen != 0 && inlen != sizeof (int)) 3477 return (EINVAL); 3478 if (checkonly) 3479 break; 3480 3481 if (inlen == 0) { 3482 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3483 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3484 } else { 3485 if (*i1 > 255 || *i1 < -1) 3486 return (EINVAL); 3487 if (*i1 == -1) 3488 ipp->ipp_hoplimit = 3489 us->us_ipv6_hoplimit; 3490 else 3491 ipp->ipp_hoplimit = *i1; 3492 ipp->ipp_fields |= IPPF_HOPLIMIT; 3493 } 3494 break; 3495 case IPV6_TCLASS: 3496 if (inlen != 0 && inlen != sizeof (int)) 3497 return (EINVAL); 3498 if (checkonly) 3499 break; 3500 3501 if (inlen == 0) { 3502 ipp->ipp_fields &= ~IPPF_TCLASS; 3503 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3504 } else { 3505 if (*i1 > 255 || *i1 < -1) 3506 return (EINVAL); 3507 if (*i1 == -1) 3508 ipp->ipp_tclass = 0; 3509 else 3510 ipp->ipp_tclass = *i1; 3511 ipp->ipp_fields |= IPPF_TCLASS; 3512 } 3513 if (sticky) { 3514 error = udp_build_hdrs(udp); 3515 if (error != 0) 3516 return (error); 3517 } 3518 break; 3519 case IPV6_NEXTHOP: 3520 /* 3521 * IP will verify that the nexthop is reachable 3522 * and fail for sticky options. 3523 */ 3524 if (inlen != 0 && inlen != sizeof (sin6_t)) 3525 return (EINVAL); 3526 if (checkonly) 3527 break; 3528 3529 if (inlen == 0) { 3530 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3531 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3532 } else { 3533 sin6_t *sin6 = (sin6_t *)invalp; 3534 3535 if (sin6->sin6_family != AF_INET6) 3536 return (EAFNOSUPPORT); 3537 if (IN6_IS_ADDR_V4MAPPED( 3538 &sin6->sin6_addr)) 3539 return (EADDRNOTAVAIL); 3540 ipp->ipp_nexthop = sin6->sin6_addr; 3541 if (!IN6_IS_ADDR_UNSPECIFIED( 3542 &ipp->ipp_nexthop)) 3543 ipp->ipp_fields |= IPPF_NEXTHOP; 3544 else 3545 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3546 } 3547 if (sticky) { 3548 error = udp_build_hdrs(udp); 3549 if (error != 0) 3550 return (error); 3551 } 3552 break; 3553 case IPV6_HOPOPTS: { 3554 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3555 /* 3556 * Sanity checks - minimum size, size a multiple of 3557 * eight bytes, and matching size passed in. 3558 */ 3559 if (inlen != 0 && 3560 inlen != (8 * (hopts->ip6h_len + 1))) 3561 return (EINVAL); 3562 3563 if (checkonly) 3564 break; 3565 3566 error = optcom_pkt_set(invalp, inlen, sticky, 3567 (uchar_t **)&ipp->ipp_hopopts, 3568 &ipp->ipp_hopoptslen, 3569 sticky ? udp->udp_label_len_v6 : 0); 3570 if (error != 0) 3571 return (error); 3572 if (ipp->ipp_hopoptslen == 0) { 3573 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3574 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3575 } else { 3576 ipp->ipp_fields |= IPPF_HOPOPTS; 3577 } 3578 if (sticky) { 3579 error = udp_build_hdrs(udp); 3580 if (error != 0) 3581 return (error); 3582 } 3583 break; 3584 } 3585 case IPV6_RTHDRDSTOPTS: { 3586 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3587 3588 /* 3589 * Sanity checks - minimum size, size a multiple of 3590 * eight bytes, and matching size passed in. 3591 */ 3592 if (inlen != 0 && 3593 inlen != (8 * (dopts->ip6d_len + 1))) 3594 return (EINVAL); 3595 3596 if (checkonly) 3597 break; 3598 3599 if (inlen == 0) { 3600 if (sticky && 3601 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3602 kmem_free(ipp->ipp_rtdstopts, 3603 ipp->ipp_rtdstoptslen); 3604 ipp->ipp_rtdstopts = NULL; 3605 ipp->ipp_rtdstoptslen = 0; 3606 } 3607 3608 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3609 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3610 } else { 3611 error = optcom_pkt_set(invalp, inlen, sticky, 3612 (uchar_t **)&ipp->ipp_rtdstopts, 3613 &ipp->ipp_rtdstoptslen, 0); 3614 if (error != 0) 3615 return (error); 3616 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3617 } 3618 if (sticky) { 3619 error = udp_build_hdrs(udp); 3620 if (error != 0) 3621 return (error); 3622 } 3623 break; 3624 } 3625 case IPV6_DSTOPTS: { 3626 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3627 3628 /* 3629 * Sanity checks - minimum size, size a multiple of 3630 * eight bytes, and matching size passed in. 3631 */ 3632 if (inlen != 0 && 3633 inlen != (8 * (dopts->ip6d_len + 1))) 3634 return (EINVAL); 3635 3636 if (checkonly) 3637 break; 3638 3639 if (inlen == 0) { 3640 if (sticky && 3641 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3642 kmem_free(ipp->ipp_dstopts, 3643 ipp->ipp_dstoptslen); 3644 ipp->ipp_dstopts = NULL; 3645 ipp->ipp_dstoptslen = 0; 3646 } 3647 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3648 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3649 } else { 3650 error = optcom_pkt_set(invalp, inlen, sticky, 3651 (uchar_t **)&ipp->ipp_dstopts, 3652 &ipp->ipp_dstoptslen, 0); 3653 if (error != 0) 3654 return (error); 3655 ipp->ipp_fields |= IPPF_DSTOPTS; 3656 } 3657 if (sticky) { 3658 error = udp_build_hdrs(udp); 3659 if (error != 0) 3660 return (error); 3661 } 3662 break; 3663 } 3664 case IPV6_RTHDR: { 3665 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3666 3667 /* 3668 * Sanity checks - minimum size, size a multiple of 3669 * eight bytes, and matching size passed in. 3670 */ 3671 if (inlen != 0 && 3672 inlen != (8 * (rt->ip6r_len + 1))) 3673 return (EINVAL); 3674 3675 if (checkonly) 3676 break; 3677 3678 if (inlen == 0) { 3679 if (sticky && 3680 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3681 kmem_free(ipp->ipp_rthdr, 3682 ipp->ipp_rthdrlen); 3683 ipp->ipp_rthdr = NULL; 3684 ipp->ipp_rthdrlen = 0; 3685 } 3686 ipp->ipp_fields &= ~IPPF_RTHDR; 3687 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3688 } else { 3689 error = optcom_pkt_set(invalp, inlen, sticky, 3690 (uchar_t **)&ipp->ipp_rthdr, 3691 &ipp->ipp_rthdrlen, 0); 3692 if (error != 0) 3693 return (error); 3694 ipp->ipp_fields |= IPPF_RTHDR; 3695 } 3696 if (sticky) { 3697 error = udp_build_hdrs(udp); 3698 if (error != 0) 3699 return (error); 3700 } 3701 break; 3702 } 3703 3704 case IPV6_DONTFRAG: 3705 if (checkonly) 3706 break; 3707 3708 if (onoff) { 3709 ipp->ipp_fields |= IPPF_DONTFRAG; 3710 } else { 3711 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3712 } 3713 break; 3714 3715 case IPV6_USE_MIN_MTU: 3716 if (inlen != sizeof (int)) 3717 return (EINVAL); 3718 3719 if (*i1 < -1 || *i1 > 1) 3720 return (EINVAL); 3721 3722 if (checkonly) 3723 break; 3724 3725 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3726 ipp->ipp_use_min_mtu = *i1; 3727 break; 3728 3729 case IPV6_BOUND_PIF: 3730 case IPV6_SEC_OPT: 3731 case IPV6_DONTFAILOVER_IF: 3732 case IPV6_SRC_PREFERENCES: 3733 case IPV6_V6ONLY: 3734 /* Handled at the IP level */ 3735 return (-EINVAL); 3736 default: 3737 *outlenp = 0; 3738 return (EINVAL); 3739 } 3740 break; 3741 } /* end IPPROTO_IPV6 */ 3742 case IPPROTO_UDP: 3743 switch (name) { 3744 case UDP_ANONPRIVBIND: 3745 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3746 *outlenp = 0; 3747 return (error); 3748 } 3749 if (!checkonly) { 3750 udp->udp_anon_priv_bind = onoff; 3751 } 3752 break; 3753 case UDP_EXCLBIND: 3754 if (!checkonly) 3755 udp->udp_exclbind = onoff; 3756 break; 3757 case UDP_RCVHDR: 3758 if (!checkonly) 3759 udp->udp_rcvhdr = onoff; 3760 break; 3761 case UDP_NAT_T_ENDPOINT: 3762 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3763 *outlenp = 0; 3764 return (error); 3765 } 3766 3767 /* 3768 * Use udp_family instead so we can avoid ambiguitites 3769 * with AF_INET6 sockets that may switch from IPv4 3770 * to IPv6. 3771 */ 3772 if (udp->udp_family != AF_INET) { 3773 *outlenp = 0; 3774 return (EAFNOSUPPORT); 3775 } 3776 3777 if (!checkonly) { 3778 udp->udp_nat_t_endpoint = onoff; 3779 3780 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3781 UDPH_SIZE + udp->udp_ip_snd_options_len; 3782 3783 /* Also, adjust wroff */ 3784 if (onoff) { 3785 udp->udp_max_hdr_len += 3786 sizeof (uint32_t); 3787 } 3788 (void) mi_set_sth_wroff(RD(q), 3789 udp->udp_max_hdr_len + us->us_wroff_extra); 3790 } 3791 break; 3792 default: 3793 *outlenp = 0; 3794 return (EINVAL); 3795 } 3796 break; 3797 default: 3798 *outlenp = 0; 3799 return (EINVAL); 3800 } 3801 /* 3802 * Common case of OK return with outval same as inval. 3803 */ 3804 if (invalp != outvalp) { 3805 /* don't trust bcopy for identical src/dst */ 3806 (void) bcopy(invalp, outvalp, inlen); 3807 } 3808 *outlenp = inlen; 3809 return (0); 3810 } 3811 3812 int 3813 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3814 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3815 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3816 { 3817 udp_t *udp; 3818 int err; 3819 3820 udp = Q_TO_UDP(q); 3821 3822 rw_enter(&udp->udp_rwlock, RW_WRITER); 3823 err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 3824 outlenp, outvalp, thisdg_attrs, cr, mblk); 3825 rw_exit(&udp->udp_rwlock); 3826 return (err); 3827 } 3828 3829 /* 3830 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3831 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3832 * headers, and the udp header. 3833 * Returns failure if can't allocate memory. 3834 */ 3835 static int 3836 udp_build_hdrs(udp_t *udp) 3837 { 3838 udp_stack_t *us = udp->udp_us; 3839 uchar_t *hdrs; 3840 uint_t hdrs_len; 3841 ip6_t *ip6h; 3842 ip6i_t *ip6i; 3843 udpha_t *udpha; 3844 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3845 size_t sth_wroff; 3846 3847 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3848 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3849 ASSERT(hdrs_len != 0); 3850 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3851 /* Need to reallocate */ 3852 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3853 if (hdrs == NULL) 3854 return (ENOMEM); 3855 3856 if (udp->udp_sticky_hdrs_len != 0) { 3857 kmem_free(udp->udp_sticky_hdrs, 3858 udp->udp_sticky_hdrs_len); 3859 } 3860 udp->udp_sticky_hdrs = hdrs; 3861 udp->udp_sticky_hdrs_len = hdrs_len; 3862 } 3863 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3864 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3865 3866 /* Set header fields not in ipp */ 3867 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3868 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3869 ip6h = (ip6_t *)&ip6i[1]; 3870 } else { 3871 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3872 } 3873 3874 if (!(ipp->ipp_fields & IPPF_ADDR)) 3875 ip6h->ip6_src = udp->udp_v6src; 3876 3877 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3878 udpha->uha_src_port = udp->udp_port; 3879 3880 /* Try to get everything in a single mblk */ 3881 if (hdrs_len > udp->udp_max_hdr_len) { 3882 udp->udp_max_hdr_len = hdrs_len; 3883 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3884 rw_exit(&udp->udp_rwlock); 3885 (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); 3886 rw_enter(&udp->udp_rwlock, RW_WRITER); 3887 } 3888 return (0); 3889 } 3890 3891 /* 3892 * This routine retrieves the value of an ND variable in a udpparam_t 3893 * structure. It is called through nd_getset when a user reads the 3894 * variable. 3895 */ 3896 /* ARGSUSED */ 3897 static int 3898 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3899 { 3900 udpparam_t *udppa = (udpparam_t *)cp; 3901 3902 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3903 return (0); 3904 } 3905 3906 /* 3907 * Walk through the param array specified registering each element with the 3908 * named dispatch (ND) handler. 3909 */ 3910 static boolean_t 3911 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3912 { 3913 for (; cnt-- > 0; udppa++) { 3914 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3915 if (!nd_load(ndp, udppa->udp_param_name, 3916 udp_param_get, udp_param_set, 3917 (caddr_t)udppa)) { 3918 nd_free(ndp); 3919 return (B_FALSE); 3920 } 3921 } 3922 } 3923 if (!nd_load(ndp, "udp_extra_priv_ports", 3924 udp_extra_priv_ports_get, NULL, NULL)) { 3925 nd_free(ndp); 3926 return (B_FALSE); 3927 } 3928 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3929 NULL, udp_extra_priv_ports_add, NULL)) { 3930 nd_free(ndp); 3931 return (B_FALSE); 3932 } 3933 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3934 NULL, udp_extra_priv_ports_del, NULL)) { 3935 nd_free(ndp); 3936 return (B_FALSE); 3937 } 3938 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3939 NULL)) { 3940 nd_free(ndp); 3941 return (B_FALSE); 3942 } 3943 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3944 NULL)) { 3945 nd_free(ndp); 3946 return (B_FALSE); 3947 } 3948 return (B_TRUE); 3949 } 3950 3951 /* This routine sets an ND variable in a udpparam_t structure. */ 3952 /* ARGSUSED */ 3953 static int 3954 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3955 { 3956 long new_value; 3957 udpparam_t *udppa = (udpparam_t *)cp; 3958 3959 /* 3960 * Fail the request if the new value does not lie within the 3961 * required bounds. 3962 */ 3963 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3964 new_value < udppa->udp_param_min || 3965 new_value > udppa->udp_param_max) { 3966 return (EINVAL); 3967 } 3968 3969 /* Set the new value */ 3970 udppa->udp_param_value = new_value; 3971 return (0); 3972 } 3973 3974 /* 3975 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3976 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3977 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3978 * then it's assumed to be allocated to be large enough. 3979 * 3980 * Returns zero if trimming of the security option causes all options to go 3981 * away. 3982 */ 3983 static size_t 3984 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3985 { 3986 struct T_opthdr *toh; 3987 size_t hol = ipp->ipp_hopoptslen; 3988 ip6_hbh_t *dstopt = NULL; 3989 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3990 size_t tlen, olen, plen; 3991 boolean_t deleting; 3992 const struct ip6_opt *sopt, *lastpad; 3993 struct ip6_opt *dopt; 3994 3995 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3996 toh->level = IPPROTO_IPV6; 3997 toh->name = IPV6_HOPOPTS; 3998 toh->status = 0; 3999 dstopt = (ip6_hbh_t *)(toh + 1); 4000 } 4001 4002 /* 4003 * If labeling is enabled, then skip the label option 4004 * but get other options if there are any. 4005 */ 4006 if (is_system_labeled()) { 4007 dopt = NULL; 4008 if (dstopt != NULL) { 4009 /* will fill in ip6h_len later */ 4010 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4011 dopt = (struct ip6_opt *)(dstopt + 1); 4012 } 4013 sopt = (const struct ip6_opt *)(srcopt + 1); 4014 hol -= sizeof (*srcopt); 4015 tlen = sizeof (*dstopt); 4016 lastpad = NULL; 4017 deleting = B_FALSE; 4018 /* 4019 * This loop finds the first (lastpad pointer) of any number of 4020 * pads that preceeds the security option, then treats the 4021 * security option as though it were a pad, and then finds the 4022 * next non-pad option (or end of list). 4023 * 4024 * It then treats the entire block as one big pad. To preserve 4025 * alignment of any options that follow, or just the end of the 4026 * list, it computes a minimal new padding size that keeps the 4027 * same alignment for the next option. 4028 * 4029 * If it encounters just a sequence of pads with no security 4030 * option, those are copied as-is rather than collapsed. 4031 * 4032 * Note that to handle the end of list case, the code makes one 4033 * loop with 'hol' set to zero. 4034 */ 4035 for (;;) { 4036 if (hol > 0) { 4037 if (sopt->ip6o_type == IP6OPT_PAD1) { 4038 if (lastpad == NULL) 4039 lastpad = sopt; 4040 sopt = (const struct ip6_opt *) 4041 &sopt->ip6o_len; 4042 hol--; 4043 continue; 4044 } 4045 olen = sopt->ip6o_len + sizeof (*sopt); 4046 if (olen > hol) 4047 olen = hol; 4048 if (sopt->ip6o_type == IP6OPT_PADN || 4049 sopt->ip6o_type == ip6opt_ls) { 4050 if (sopt->ip6o_type == ip6opt_ls) 4051 deleting = B_TRUE; 4052 if (lastpad == NULL) 4053 lastpad = sopt; 4054 sopt = (const struct ip6_opt *) 4055 ((const char *)sopt + olen); 4056 hol -= olen; 4057 continue; 4058 } 4059 } else { 4060 /* if nothing was copied at all, then delete */ 4061 if (tlen == sizeof (*dstopt)) 4062 return (0); 4063 /* last pass; pick up any trailing padding */ 4064 olen = 0; 4065 } 4066 if (deleting) { 4067 /* 4068 * compute aligning effect of deleted material 4069 * to reproduce with pad. 4070 */ 4071 plen = ((const char *)sopt - 4072 (const char *)lastpad) & 7; 4073 tlen += plen; 4074 if (dopt != NULL) { 4075 if (plen == 1) { 4076 dopt->ip6o_type = IP6OPT_PAD1; 4077 } else if (plen > 1) { 4078 plen -= sizeof (*dopt); 4079 dopt->ip6o_type = IP6OPT_PADN; 4080 dopt->ip6o_len = plen; 4081 if (plen > 0) 4082 bzero(dopt + 1, plen); 4083 } 4084 dopt = (struct ip6_opt *) 4085 ((char *)dopt + plen); 4086 } 4087 deleting = B_FALSE; 4088 lastpad = NULL; 4089 } 4090 /* if there's uncopied padding, then copy that now */ 4091 if (lastpad != NULL) { 4092 olen += (const char *)sopt - 4093 (const char *)lastpad; 4094 sopt = lastpad; 4095 lastpad = NULL; 4096 } 4097 if (dopt != NULL && olen > 0) { 4098 bcopy(sopt, dopt, olen); 4099 dopt = (struct ip6_opt *)((char *)dopt + olen); 4100 } 4101 if (hol == 0) 4102 break; 4103 tlen += olen; 4104 sopt = (const struct ip6_opt *) 4105 ((const char *)sopt + olen); 4106 hol -= olen; 4107 } 4108 /* go back and patch up the length value, rounded upward */ 4109 if (dstopt != NULL) 4110 dstopt->ip6h_len = (tlen - 1) >> 3; 4111 } else { 4112 tlen = hol; 4113 if (dstopt != NULL) 4114 bcopy(srcopt, dstopt, hol); 4115 } 4116 4117 tlen += sizeof (*toh); 4118 if (toh != NULL) 4119 toh->len = tlen; 4120 4121 return (tlen); 4122 } 4123 4124 /* 4125 * Update udp_rcv_opt_len from the packet. 4126 * Called when options received, and when no options received but 4127 * udp_ip_recv_opt_len has previously recorded options. 4128 */ 4129 static void 4130 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 4131 { 4132 /* Save the options if any */ 4133 if (opt_len > 0) { 4134 if (opt_len > udp->udp_ip_rcv_options_len) { 4135 /* Need to allocate larger buffer */ 4136 if (udp->udp_ip_rcv_options_len != 0) 4137 mi_free((char *)udp->udp_ip_rcv_options); 4138 udp->udp_ip_rcv_options_len = 0; 4139 udp->udp_ip_rcv_options = 4140 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4141 if (udp->udp_ip_rcv_options != NULL) 4142 udp->udp_ip_rcv_options_len = opt_len; 4143 } 4144 if (udp->udp_ip_rcv_options_len != 0) { 4145 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 4146 /* Adjust length if we are resusing the space */ 4147 udp->udp_ip_rcv_options_len = opt_len; 4148 } 4149 } else if (udp->udp_ip_rcv_options_len != 0) { 4150 /* Clear out previously recorded options */ 4151 mi_free((char *)udp->udp_ip_rcv_options); 4152 udp->udp_ip_rcv_options = NULL; 4153 udp->udp_ip_rcv_options_len = 0; 4154 } 4155 } 4156 4157 /* ARGSUSED2 */ 4158 static void 4159 udp_input(void *arg1, mblk_t *mp, void *arg2) 4160 { 4161 conn_t *connp = (conn_t *)arg1; 4162 struct T_unitdata_ind *tudi; 4163 uchar_t *rptr; /* Pointer to IP header */ 4164 int hdr_length; /* Length of IP+UDP headers */ 4165 int opt_len; 4166 int udi_size; /* Size of T_unitdata_ind */ 4167 int mp_len; 4168 udp_t *udp; 4169 udpha_t *udpha; 4170 int ipversion; 4171 ip6_pkt_t ipp; 4172 ip6_t *ip6h; 4173 ip6i_t *ip6i; 4174 mblk_t *mp1; 4175 mblk_t *options_mp = NULL; 4176 ip_pktinfo_t *pinfo = NULL; 4177 cred_t *cr = NULL; 4178 pid_t cpid; 4179 uint32_t udp_ip_rcv_options_len; 4180 udp_bits_t udp_bits; 4181 cred_t *rcr = connp->conn_cred; 4182 udp_stack_t *us; 4183 4184 ASSERT(connp->conn_flags & IPCL_UDPCONN); 4185 4186 udp = connp->conn_udp; 4187 us = udp->udp_us; 4188 rptr = mp->b_rptr; 4189 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4190 ASSERT(OK_32PTR(rptr)); 4191 4192 /* 4193 * IP should have prepended the options data in an M_CTL 4194 * Check M_CTL "type" to make sure are not here bcos of 4195 * a valid ICMP message 4196 */ 4197 if (DB_TYPE(mp) == M_CTL) { 4198 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4199 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4200 IN_PKTINFO) { 4201 /* 4202 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4203 * has been prepended to the packet by IP. We need to 4204 * extract the mblk and adjust the rptr 4205 */ 4206 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4207 options_mp = mp; 4208 mp = mp->b_cont; 4209 rptr = mp->b_rptr; 4210 UDP_STAT(us, udp_in_pktinfo); 4211 } else { 4212 /* 4213 * ICMP messages. 4214 */ 4215 udp_icmp_error(connp->conn_rq, mp); 4216 return; 4217 } 4218 } 4219 4220 mp_len = msgdsize(mp); 4221 /* 4222 * This is the inbound data path. 4223 * First, we check to make sure the IP version number is correct, 4224 * and then pull the IP and UDP headers into the first mblk. 4225 */ 4226 4227 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4228 ipp.ipp_fields = 0; 4229 4230 ipversion = IPH_HDR_VERSION(rptr); 4231 4232 rw_enter(&udp->udp_rwlock, RW_READER); 4233 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 4234 udp_bits = udp->udp_bits; 4235 rw_exit(&udp->udp_rwlock); 4236 4237 switch (ipversion) { 4238 case IPV4_VERSION: 4239 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4240 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4241 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4242 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4243 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 4244 udp->udp_family == AF_INET) { 4245 /* 4246 * Record/update udp_ip_rcv_options with the lock 4247 * held. Not needed for AF_INET6 sockets 4248 * since they don't support a getsockopt of IP_OPTIONS. 4249 */ 4250 rw_enter(&udp->udp_rwlock, RW_WRITER); 4251 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 4252 opt_len); 4253 rw_exit(&udp->udp_rwlock); 4254 } 4255 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 4256 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4257 udp->udp_ip_recvpktinfo) { 4258 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4259 ipp.ipp_fields |= IPPF_IFINDEX; 4260 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4261 } 4262 } 4263 break; 4264 case IPV6_VERSION: 4265 /* 4266 * IPv6 packets can only be received by applications 4267 * that are prepared to receive IPv6 addresses. 4268 * The IP fanout must ensure this. 4269 */ 4270 ASSERT(udp->udp_family == AF_INET6); 4271 4272 ip6h = (ip6_t *)rptr; 4273 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4274 4275 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4276 uint8_t nexthdrp; 4277 /* Look for ifindex information */ 4278 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4279 ip6i = (ip6i_t *)ip6h; 4280 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4281 goto tossit; 4282 4283 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4284 ASSERT(ip6i->ip6i_ifindex != 0); 4285 ipp.ipp_fields |= IPPF_IFINDEX; 4286 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4287 } 4288 rptr = (uchar_t *)&ip6i[1]; 4289 mp->b_rptr = rptr; 4290 if (rptr == mp->b_wptr) { 4291 mp1 = mp->b_cont; 4292 freeb(mp); 4293 mp = mp1; 4294 rptr = mp->b_rptr; 4295 } 4296 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4297 goto tossit; 4298 ip6h = (ip6_t *)rptr; 4299 mp_len = msgdsize(mp); 4300 } 4301 /* 4302 * Find any potentially interesting extension headers 4303 * as well as the length of the IPv6 + extension 4304 * headers. 4305 */ 4306 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4307 UDPH_SIZE; 4308 ASSERT(nexthdrp == IPPROTO_UDP); 4309 } else { 4310 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4311 ip6i = NULL; 4312 } 4313 break; 4314 default: 4315 ASSERT(0); 4316 } 4317 4318 /* 4319 * IP inspected the UDP header thus all of it must be in the mblk. 4320 * UDP length check is performed for IPv6 packets and IPv4 packets 4321 * to check if the size of the packet as specified 4322 * by the header is the same as the physical size of the packet. 4323 * FIXME? Didn't IP already check this? 4324 */ 4325 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4326 if ((MBLKL(mp) < hdr_length) || 4327 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4328 goto tossit; 4329 } 4330 4331 4332 /* Walk past the headers unless IP_RECVHDR was set. */ 4333 if (!udp_bits.udpb_rcvhdr) { 4334 mp->b_rptr = rptr + hdr_length; 4335 mp_len -= hdr_length; 4336 } 4337 4338 /* 4339 * This is the inbound data path. Packets are passed upstream as 4340 * T_UNITDATA_IND messages with full IP headers still attached. 4341 */ 4342 if (udp->udp_family == AF_INET) { 4343 sin_t *sin; 4344 4345 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4346 4347 /* 4348 * Normally only send up the source address. 4349 * If IP_RECVDSTADDR is set we include the destination IP 4350 * address as an option. With IP_RECVOPTS we include all 4351 * the IP options. 4352 */ 4353 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4354 if (udp_bits.udpb_recvdstaddr) { 4355 udi_size += sizeof (struct T_opthdr) + 4356 sizeof (struct in_addr); 4357 UDP_STAT(us, udp_in_recvdstaddr); 4358 } 4359 4360 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 4361 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4362 udi_size += sizeof (struct T_opthdr) + 4363 sizeof (struct in_pktinfo); 4364 UDP_STAT(us, udp_ip_rcvpktinfo); 4365 } 4366 4367 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 4368 udi_size += sizeof (struct T_opthdr) + opt_len; 4369 UDP_STAT(us, udp_in_recvopts); 4370 } 4371 4372 /* 4373 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4374 * space accordingly 4375 */ 4376 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4377 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4378 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4379 UDP_STAT(us, udp_in_recvif); 4380 } 4381 4382 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4383 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4384 udi_size += sizeof (struct T_opthdr) + 4385 sizeof (struct sockaddr_dl); 4386 UDP_STAT(us, udp_in_recvslla); 4387 } 4388 4389 if ((udp_bits.udpb_recvucred) && 4390 (cr = DB_CRED(mp)) != NULL) { 4391 udi_size += sizeof (struct T_opthdr) + ucredsize; 4392 cpid = DB_CPID(mp); 4393 UDP_STAT(us, udp_in_recvucred); 4394 } 4395 4396 /* 4397 * If SO_TIMESTAMP is set allocate the appropriate sized 4398 * buffer. Since gethrestime() expects a pointer aligned 4399 * argument, we allocate space necessary for extra 4400 * alignment (even though it might not be used). 4401 */ 4402 if (udp_bits.udpb_timestamp) { 4403 udi_size += sizeof (struct T_opthdr) + 4404 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4405 UDP_STAT(us, udp_in_timestamp); 4406 } 4407 4408 /* 4409 * If IP_RECVTTL is set allocate the appropriate sized buffer 4410 */ 4411 if (udp_bits.udpb_recvttl) { 4412 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4413 UDP_STAT(us, udp_in_recvttl); 4414 } 4415 4416 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4417 mp1 = allocb(udi_size, BPRI_MED); 4418 if (mp1 == NULL) { 4419 freemsg(mp); 4420 if (options_mp != NULL) 4421 freeb(options_mp); 4422 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4423 return; 4424 } 4425 mp1->b_cont = mp; 4426 mp = mp1; 4427 mp->b_datap->db_type = M_PROTO; 4428 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4429 mp->b_wptr = (uchar_t *)tudi + udi_size; 4430 tudi->PRIM_type = T_UNITDATA_IND; 4431 tudi->SRC_length = sizeof (sin_t); 4432 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4433 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4434 sizeof (sin_t); 4435 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4436 tudi->OPT_length = udi_size; 4437 sin = (sin_t *)&tudi[1]; 4438 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4439 sin->sin_port = udpha->uha_src_port; 4440 sin->sin_family = udp->udp_family; 4441 *(uint32_t *)&sin->sin_zero[0] = 0; 4442 *(uint32_t *)&sin->sin_zero[4] = 0; 4443 4444 /* 4445 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4446 * IP_RECVTTL has been set. 4447 */ 4448 if (udi_size != 0) { 4449 /* 4450 * Copy in destination address before options to avoid 4451 * any padding issues. 4452 */ 4453 char *dstopt; 4454 4455 dstopt = (char *)&sin[1]; 4456 if (udp_bits.udpb_recvdstaddr) { 4457 struct T_opthdr *toh; 4458 ipaddr_t *dstptr; 4459 4460 toh = (struct T_opthdr *)dstopt; 4461 toh->level = IPPROTO_IP; 4462 toh->name = IP_RECVDSTADDR; 4463 toh->len = sizeof (struct T_opthdr) + 4464 sizeof (ipaddr_t); 4465 toh->status = 0; 4466 dstopt += sizeof (struct T_opthdr); 4467 dstptr = (ipaddr_t *)dstopt; 4468 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4469 dstopt += sizeof (ipaddr_t); 4470 udi_size -= toh->len; 4471 } 4472 4473 if (udp_bits.udpb_recvopts && opt_len > 0) { 4474 struct T_opthdr *toh; 4475 4476 toh = (struct T_opthdr *)dstopt; 4477 toh->level = IPPROTO_IP; 4478 toh->name = IP_RECVOPTS; 4479 toh->len = sizeof (struct T_opthdr) + opt_len; 4480 toh->status = 0; 4481 dstopt += sizeof (struct T_opthdr); 4482 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4483 opt_len); 4484 dstopt += opt_len; 4485 udi_size -= toh->len; 4486 } 4487 4488 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4489 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4490 struct T_opthdr *toh; 4491 struct in_pktinfo *pktinfop; 4492 4493 toh = (struct T_opthdr *)dstopt; 4494 toh->level = IPPROTO_IP; 4495 toh->name = IP_PKTINFO; 4496 toh->len = sizeof (struct T_opthdr) + 4497 sizeof (*pktinfop); 4498 toh->status = 0; 4499 dstopt += sizeof (struct T_opthdr); 4500 pktinfop = (struct in_pktinfo *)dstopt; 4501 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4502 pktinfop->ipi_spec_dst = 4503 pinfo->ip_pkt_match_addr; 4504 pktinfop->ipi_addr.s_addr = 4505 ((ipha_t *)rptr)->ipha_dst; 4506 4507 dstopt += sizeof (struct in_pktinfo); 4508 udi_size -= toh->len; 4509 } 4510 4511 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4512 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4513 4514 struct T_opthdr *toh; 4515 struct sockaddr_dl *dstptr; 4516 4517 toh = (struct T_opthdr *)dstopt; 4518 toh->level = IPPROTO_IP; 4519 toh->name = IP_RECVSLLA; 4520 toh->len = sizeof (struct T_opthdr) + 4521 sizeof (struct sockaddr_dl); 4522 toh->status = 0; 4523 dstopt += sizeof (struct T_opthdr); 4524 dstptr = (struct sockaddr_dl *)dstopt; 4525 bcopy(&pinfo->ip_pkt_slla, dstptr, 4526 sizeof (struct sockaddr_dl)); 4527 dstopt += sizeof (struct sockaddr_dl); 4528 udi_size -= toh->len; 4529 } 4530 4531 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4532 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4533 4534 struct T_opthdr *toh; 4535 uint_t *dstptr; 4536 4537 toh = (struct T_opthdr *)dstopt; 4538 toh->level = IPPROTO_IP; 4539 toh->name = IP_RECVIF; 4540 toh->len = sizeof (struct T_opthdr) + 4541 sizeof (uint_t); 4542 toh->status = 0; 4543 dstopt += sizeof (struct T_opthdr); 4544 dstptr = (uint_t *)dstopt; 4545 *dstptr = pinfo->ip_pkt_ifindex; 4546 dstopt += sizeof (uint_t); 4547 udi_size -= toh->len; 4548 } 4549 4550 if (cr != NULL) { 4551 struct T_opthdr *toh; 4552 4553 toh = (struct T_opthdr *)dstopt; 4554 toh->level = SOL_SOCKET; 4555 toh->name = SCM_UCRED; 4556 toh->len = sizeof (struct T_opthdr) + ucredsize; 4557 toh->status = 0; 4558 dstopt += sizeof (struct T_opthdr); 4559 (void) cred2ucred(cr, cpid, dstopt, rcr); 4560 dstopt += ucredsize; 4561 udi_size -= toh->len; 4562 } 4563 4564 if (udp_bits.udpb_timestamp) { 4565 struct T_opthdr *toh; 4566 4567 toh = (struct T_opthdr *)dstopt; 4568 toh->level = SOL_SOCKET; 4569 toh->name = SCM_TIMESTAMP; 4570 toh->len = sizeof (struct T_opthdr) + 4571 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4572 toh->status = 0; 4573 dstopt += sizeof (struct T_opthdr); 4574 /* Align for gethrestime() */ 4575 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4576 sizeof (intptr_t)); 4577 gethrestime((timestruc_t *)dstopt); 4578 dstopt = (char *)toh + toh->len; 4579 udi_size -= toh->len; 4580 } 4581 4582 /* 4583 * CAUTION: 4584 * Due to aligment issues 4585 * Processing of IP_RECVTTL option 4586 * should always be the last. Adding 4587 * any option processing after this will 4588 * cause alignment panic. 4589 */ 4590 if (udp_bits.udpb_recvttl) { 4591 struct T_opthdr *toh; 4592 uint8_t *dstptr; 4593 4594 toh = (struct T_opthdr *)dstopt; 4595 toh->level = IPPROTO_IP; 4596 toh->name = IP_RECVTTL; 4597 toh->len = sizeof (struct T_opthdr) + 4598 sizeof (uint8_t); 4599 toh->status = 0; 4600 dstopt += sizeof (struct T_opthdr); 4601 dstptr = (uint8_t *)dstopt; 4602 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4603 dstopt += sizeof (uint8_t); 4604 udi_size -= toh->len; 4605 } 4606 4607 /* Consumed all of allocated space */ 4608 ASSERT(udi_size == 0); 4609 } 4610 } else { 4611 sin6_t *sin6; 4612 4613 /* 4614 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4615 * 4616 * Normally we only send up the address. If receiving of any 4617 * optional receive side information is enabled, we also send 4618 * that up as options. 4619 */ 4620 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4621 4622 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4623 IPPF_RTHDR|IPPF_IFINDEX)) { 4624 if ((udp_bits.udpb_ipv6_recvhopopts) && 4625 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4626 size_t hlen; 4627 4628 UDP_STAT(us, udp_in_recvhopopts); 4629 hlen = copy_hop_opts(&ipp, NULL); 4630 if (hlen == 0) 4631 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4632 udi_size += hlen; 4633 } 4634 if (((udp_bits.udpb_ipv6_recvdstopts) || 4635 udp_bits.udpb_old_ipv6_recvdstopts) && 4636 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4637 udi_size += sizeof (struct T_opthdr) + 4638 ipp.ipp_dstoptslen; 4639 UDP_STAT(us, udp_in_recvdstopts); 4640 } 4641 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4642 udp_bits.udpb_ipv6_recvrthdr && 4643 (ipp.ipp_fields & IPPF_RTHDR)) || 4644 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4645 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4646 udi_size += sizeof (struct T_opthdr) + 4647 ipp.ipp_rtdstoptslen; 4648 UDP_STAT(us, udp_in_recvrtdstopts); 4649 } 4650 if ((udp_bits.udpb_ipv6_recvrthdr) && 4651 (ipp.ipp_fields & IPPF_RTHDR)) { 4652 udi_size += sizeof (struct T_opthdr) + 4653 ipp.ipp_rthdrlen; 4654 UDP_STAT(us, udp_in_recvrthdr); 4655 } 4656 if ((udp_bits.udpb_ip_recvpktinfo) && 4657 (ipp.ipp_fields & IPPF_IFINDEX)) { 4658 udi_size += sizeof (struct T_opthdr) + 4659 sizeof (struct in6_pktinfo); 4660 UDP_STAT(us, udp_in_recvpktinfo); 4661 } 4662 4663 } 4664 if ((udp_bits.udpb_recvucred) && 4665 (cr = DB_CRED(mp)) != NULL) { 4666 udi_size += sizeof (struct T_opthdr) + ucredsize; 4667 cpid = DB_CPID(mp); 4668 UDP_STAT(us, udp_in_recvucred); 4669 } 4670 4671 /* 4672 * If SO_TIMESTAMP is set allocate the appropriate sized 4673 * buffer. Since gethrestime() expects a pointer aligned 4674 * argument, we allocate space necessary for extra 4675 * alignment (even though it might not be used). 4676 */ 4677 if (udp_bits.udpb_timestamp) { 4678 udi_size += sizeof (struct T_opthdr) + 4679 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4680 UDP_STAT(us, udp_in_timestamp); 4681 } 4682 4683 if (udp_bits.udpb_ipv6_recvhoplimit) { 4684 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4685 UDP_STAT(us, udp_in_recvhoplimit); 4686 } 4687 4688 if (udp_bits.udpb_ipv6_recvtclass) { 4689 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4690 UDP_STAT(us, udp_in_recvtclass); 4691 } 4692 4693 mp1 = allocb(udi_size, BPRI_MED); 4694 if (mp1 == NULL) { 4695 freemsg(mp); 4696 if (options_mp != NULL) 4697 freeb(options_mp); 4698 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4699 return; 4700 } 4701 mp1->b_cont = mp; 4702 mp = mp1; 4703 mp->b_datap->db_type = M_PROTO; 4704 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4705 mp->b_wptr = (uchar_t *)tudi + udi_size; 4706 tudi->PRIM_type = T_UNITDATA_IND; 4707 tudi->SRC_length = sizeof (sin6_t); 4708 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4709 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4710 sizeof (sin6_t); 4711 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4712 tudi->OPT_length = udi_size; 4713 sin6 = (sin6_t *)&tudi[1]; 4714 if (ipversion == IPV4_VERSION) { 4715 in6_addr_t v6dst; 4716 4717 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4718 &sin6->sin6_addr); 4719 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4720 &v6dst); 4721 sin6->sin6_flowinfo = 0; 4722 sin6->sin6_scope_id = 0; 4723 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4724 connp->conn_zoneid, us->us_netstack); 4725 } else { 4726 sin6->sin6_addr = ip6h->ip6_src; 4727 /* No sin6_flowinfo per API */ 4728 sin6->sin6_flowinfo = 0; 4729 /* For link-scope source pass up scope id */ 4730 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4731 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4732 sin6->sin6_scope_id = ipp.ipp_ifindex; 4733 else 4734 sin6->sin6_scope_id = 0; 4735 sin6->__sin6_src_id = ip_srcid_find_addr( 4736 &ip6h->ip6_dst, connp->conn_zoneid, 4737 us->us_netstack); 4738 } 4739 sin6->sin6_port = udpha->uha_src_port; 4740 sin6->sin6_family = udp->udp_family; 4741 4742 if (udi_size != 0) { 4743 uchar_t *dstopt; 4744 4745 dstopt = (uchar_t *)&sin6[1]; 4746 if ((udp_bits.udpb_ip_recvpktinfo) && 4747 (ipp.ipp_fields & IPPF_IFINDEX)) { 4748 struct T_opthdr *toh; 4749 struct in6_pktinfo *pkti; 4750 4751 toh = (struct T_opthdr *)dstopt; 4752 toh->level = IPPROTO_IPV6; 4753 toh->name = IPV6_PKTINFO; 4754 toh->len = sizeof (struct T_opthdr) + 4755 sizeof (*pkti); 4756 toh->status = 0; 4757 dstopt += sizeof (struct T_opthdr); 4758 pkti = (struct in6_pktinfo *)dstopt; 4759 if (ipversion == IPV6_VERSION) 4760 pkti->ipi6_addr = ip6h->ip6_dst; 4761 else 4762 IN6_IPADDR_TO_V4MAPPED( 4763 ((ipha_t *)rptr)->ipha_dst, 4764 &pkti->ipi6_addr); 4765 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4766 dstopt += sizeof (*pkti); 4767 udi_size -= toh->len; 4768 } 4769 if (udp_bits.udpb_ipv6_recvhoplimit) { 4770 struct T_opthdr *toh; 4771 4772 toh = (struct T_opthdr *)dstopt; 4773 toh->level = IPPROTO_IPV6; 4774 toh->name = IPV6_HOPLIMIT; 4775 toh->len = sizeof (struct T_opthdr) + 4776 sizeof (uint_t); 4777 toh->status = 0; 4778 dstopt += sizeof (struct T_opthdr); 4779 if (ipversion == IPV6_VERSION) 4780 *(uint_t *)dstopt = ip6h->ip6_hops; 4781 else 4782 *(uint_t *)dstopt = 4783 ((ipha_t *)rptr)->ipha_ttl; 4784 dstopt += sizeof (uint_t); 4785 udi_size -= toh->len; 4786 } 4787 if (udp_bits.udpb_ipv6_recvtclass) { 4788 struct T_opthdr *toh; 4789 4790 toh = (struct T_opthdr *)dstopt; 4791 toh->level = IPPROTO_IPV6; 4792 toh->name = IPV6_TCLASS; 4793 toh->len = sizeof (struct T_opthdr) + 4794 sizeof (uint_t); 4795 toh->status = 0; 4796 dstopt += sizeof (struct T_opthdr); 4797 if (ipversion == IPV6_VERSION) { 4798 *(uint_t *)dstopt = 4799 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4800 } else { 4801 ipha_t *ipha = (ipha_t *)rptr; 4802 *(uint_t *)dstopt = 4803 ipha->ipha_type_of_service; 4804 } 4805 dstopt += sizeof (uint_t); 4806 udi_size -= toh->len; 4807 } 4808 if ((udp_bits.udpb_ipv6_recvhopopts) && 4809 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4810 size_t hlen; 4811 4812 hlen = copy_hop_opts(&ipp, dstopt); 4813 dstopt += hlen; 4814 udi_size -= hlen; 4815 } 4816 if ((udp_bits.udpb_ipv6_recvdstopts) && 4817 (udp_bits.udpb_ipv6_recvrthdr) && 4818 (ipp.ipp_fields & IPPF_RTHDR) && 4819 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4820 struct T_opthdr *toh; 4821 4822 toh = (struct T_opthdr *)dstopt; 4823 toh->level = IPPROTO_IPV6; 4824 toh->name = IPV6_DSTOPTS; 4825 toh->len = sizeof (struct T_opthdr) + 4826 ipp.ipp_rtdstoptslen; 4827 toh->status = 0; 4828 dstopt += sizeof (struct T_opthdr); 4829 bcopy(ipp.ipp_rtdstopts, dstopt, 4830 ipp.ipp_rtdstoptslen); 4831 dstopt += ipp.ipp_rtdstoptslen; 4832 udi_size -= toh->len; 4833 } 4834 if ((udp_bits.udpb_ipv6_recvrthdr) && 4835 (ipp.ipp_fields & IPPF_RTHDR)) { 4836 struct T_opthdr *toh; 4837 4838 toh = (struct T_opthdr *)dstopt; 4839 toh->level = IPPROTO_IPV6; 4840 toh->name = IPV6_RTHDR; 4841 toh->len = sizeof (struct T_opthdr) + 4842 ipp.ipp_rthdrlen; 4843 toh->status = 0; 4844 dstopt += sizeof (struct T_opthdr); 4845 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4846 dstopt += ipp.ipp_rthdrlen; 4847 udi_size -= toh->len; 4848 } 4849 if ((udp_bits.udpb_ipv6_recvdstopts) && 4850 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4851 struct T_opthdr *toh; 4852 4853 toh = (struct T_opthdr *)dstopt; 4854 toh->level = IPPROTO_IPV6; 4855 toh->name = IPV6_DSTOPTS; 4856 toh->len = sizeof (struct T_opthdr) + 4857 ipp.ipp_dstoptslen; 4858 toh->status = 0; 4859 dstopt += sizeof (struct T_opthdr); 4860 bcopy(ipp.ipp_dstopts, dstopt, 4861 ipp.ipp_dstoptslen); 4862 dstopt += ipp.ipp_dstoptslen; 4863 udi_size -= toh->len; 4864 } 4865 4866 if (cr != NULL) { 4867 struct T_opthdr *toh; 4868 4869 toh = (struct T_opthdr *)dstopt; 4870 toh->level = SOL_SOCKET; 4871 toh->name = SCM_UCRED; 4872 toh->len = sizeof (struct T_opthdr) + ucredsize; 4873 toh->status = 0; 4874 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4875 dstopt += toh->len; 4876 udi_size -= toh->len; 4877 } 4878 if (udp_bits.udpb_timestamp) { 4879 struct T_opthdr *toh; 4880 4881 toh = (struct T_opthdr *)dstopt; 4882 toh->level = SOL_SOCKET; 4883 toh->name = SCM_TIMESTAMP; 4884 toh->len = sizeof (struct T_opthdr) + 4885 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4886 toh->status = 0; 4887 dstopt += sizeof (struct T_opthdr); 4888 /* Align for gethrestime() */ 4889 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4890 sizeof (intptr_t)); 4891 gethrestime((timestruc_t *)dstopt); 4892 dstopt = (uchar_t *)toh + toh->len; 4893 udi_size -= toh->len; 4894 } 4895 4896 /* Consumed all of allocated space */ 4897 ASSERT(udi_size == 0); 4898 } 4899 #undef sin6 4900 /* No IP_RECVDSTADDR for IPv6. */ 4901 } 4902 4903 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4904 if (options_mp != NULL) 4905 freeb(options_mp); 4906 4907 if (udp_bits.udpb_direct_sockfs) { 4908 /* 4909 * There is nothing above us except for the stream head; 4910 * use the read-side synchronous stream interface in 4911 * order to reduce the time spent in interrupt thread. 4912 */ 4913 ASSERT(udp->udp_issocket); 4914 udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); 4915 } else { 4916 /* 4917 * Use regular STREAMS interface to pass data upstream 4918 * if this is not a socket endpoint, or if we have 4919 * switched over to the slow mode due to sockmod being 4920 * popped or a module being pushed on top of us. 4921 */ 4922 putnext(connp->conn_rq, mp); 4923 } 4924 return; 4925 4926 tossit: 4927 freemsg(mp); 4928 if (options_mp != NULL) 4929 freeb(options_mp); 4930 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4931 } 4932 4933 /* 4934 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 4935 * immediately. 4936 */ 4937 static void 4938 udp_bind_result(conn_t *connp, mblk_t *mp) 4939 { 4940 struct T_error_ack *tea; 4941 4942 switch (mp->b_datap->db_type) { 4943 case M_PROTO: 4944 case M_PCPROTO: 4945 /* M_PROTO messages contain some type of TPI message. */ 4946 ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= 4947 (uintptr_t)INT_MAX); 4948 if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { 4949 freemsg(mp); 4950 return; 4951 } 4952 tea = (struct T_error_ack *)mp->b_rptr; 4953 4954 switch (tea->PRIM_type) { 4955 case T_ERROR_ACK: 4956 switch (tea->ERROR_prim) { 4957 case O_T_BIND_REQ: 4958 case T_BIND_REQ: 4959 udp_bind_error(connp, mp); 4960 return; 4961 default: 4962 break; 4963 } 4964 ASSERT(0); 4965 freemsg(mp); 4966 return; 4967 4968 case T_BIND_ACK: 4969 udp_bind_ack(connp, mp); 4970 return; 4971 4972 default: 4973 break; 4974 } 4975 freemsg(mp); 4976 return; 4977 default: 4978 /* FIXME: other cases? */ 4979 ASSERT(0); 4980 freemsg(mp); 4981 return; 4982 } 4983 } 4984 4985 /* 4986 * Process a T_BIND_ACK 4987 */ 4988 static void 4989 udp_bind_ack(conn_t *connp, mblk_t *mp) 4990 { 4991 udp_t *udp = connp->conn_udp; 4992 mblk_t *mp1; 4993 ire_t *ire; 4994 struct T_bind_ack *tba; 4995 uchar_t *addrp; 4996 ipa_conn_t *ac; 4997 ipa6_conn_t *ac6; 4998 udp_fanout_t *udpf; 4999 udp_stack_t *us = udp->udp_us; 5000 5001 ASSERT(udp->udp_pending_op != -1); 5002 rw_enter(&udp->udp_rwlock, RW_WRITER); 5003 /* 5004 * If a broadcast/multicast address was bound set 5005 * the source address to 0. 5006 * This ensures no datagrams with broadcast address 5007 * as source address are emitted (which would violate 5008 * RFC1122 - Hosts requirements) 5009 * 5010 * Note that when connecting the returned IRE is 5011 * for the destination address and we only perform 5012 * the broadcast check for the source address (it 5013 * is OK to connect to a broadcast/multicast address.) 5014 */ 5015 mp1 = mp->b_cont; 5016 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5017 ire = (ire_t *)mp1->b_rptr; 5018 5019 /* 5020 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5021 * local address. 5022 */ 5023 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5024 us->us_bind_fanout_size)]; 5025 if (ire->ire_type == IRE_BROADCAST && 5026 udp->udp_state != TS_DATA_XFER) { 5027 ASSERT(udp->udp_pending_op == T_BIND_REQ || 5028 udp->udp_pending_op == O_T_BIND_REQ); 5029 /* This was just a local bind to a broadcast addr */ 5030 mutex_enter(&udpf->uf_lock); 5031 V6_SET_ZERO(udp->udp_v6src); 5032 mutex_exit(&udpf->uf_lock); 5033 if (udp->udp_family == AF_INET6) 5034 (void) udp_build_hdrs(udp); 5035 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5036 /* 5037 * Local address not yet set - pick it from the 5038 * T_bind_ack 5039 */ 5040 tba = (struct T_bind_ack *)mp->b_rptr; 5041 addrp = &mp->b_rptr[tba->ADDR_offset]; 5042 switch (udp->udp_family) { 5043 case AF_INET: 5044 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5045 ac = (ipa_conn_t *)addrp; 5046 } else { 5047 ASSERT(tba->ADDR_length == 5048 sizeof (ipa_conn_x_t)); 5049 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5050 } 5051 mutex_enter(&udpf->uf_lock); 5052 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5053 &udp->udp_v6src); 5054 mutex_exit(&udpf->uf_lock); 5055 break; 5056 case AF_INET6: 5057 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5058 ac6 = (ipa6_conn_t *)addrp; 5059 } else { 5060 ASSERT(tba->ADDR_length == 5061 sizeof (ipa6_conn_x_t)); 5062 ac6 = &((ipa6_conn_x_t *) 5063 addrp)->ac6x_conn; 5064 } 5065 mutex_enter(&udpf->uf_lock); 5066 udp->udp_v6src = ac6->ac6_laddr; 5067 mutex_exit(&udpf->uf_lock); 5068 (void) udp_build_hdrs(udp); 5069 break; 5070 } 5071 } 5072 mp1 = mp1->b_cont; 5073 } 5074 udp->udp_pending_op = -1; 5075 rw_exit(&udp->udp_rwlock); 5076 /* 5077 * Look for one or more appended ACK message added by 5078 * udp_connect or udp_disconnect. 5079 * If none found just send up the T_BIND_ACK. 5080 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5081 * udp_disconnect has appended a T_OK_ACK. 5082 */ 5083 if (mp1 != NULL) { 5084 if (mp->b_cont == mp1) 5085 mp->b_cont = NULL; 5086 else { 5087 ASSERT(mp->b_cont->b_cont == mp1); 5088 mp->b_cont->b_cont = NULL; 5089 } 5090 freemsg(mp); 5091 mp = mp1; 5092 while (mp != NULL) { 5093 mp1 = mp->b_cont; 5094 mp->b_cont = NULL; 5095 putnext(connp->conn_rq, mp); 5096 mp = mp1; 5097 } 5098 return; 5099 } 5100 freemsg(mp->b_cont); 5101 mp->b_cont = NULL; 5102 putnext(connp->conn_rq, mp); 5103 } 5104 5105 static void 5106 udp_bind_error(conn_t *connp, mblk_t *mp) 5107 { 5108 udp_t *udp = connp->conn_udp; 5109 struct T_error_ack *tea; 5110 udp_fanout_t *udpf; 5111 udp_stack_t *us = udp->udp_us; 5112 5113 tea = (struct T_error_ack *)mp->b_rptr; 5114 5115 /* 5116 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5117 * clear out the associated port and source 5118 * address before passing the message 5119 * upstream. If this was caused by a T_CONN_REQ 5120 * revert back to bound state. 5121 */ 5122 5123 rw_enter(&udp->udp_rwlock, RW_WRITER); 5124 ASSERT(udp->udp_pending_op != -1); 5125 tea->ERROR_prim = udp->udp_pending_op; 5126 udp->udp_pending_op = -1; 5127 udpf = &us->us_bind_fanout[ 5128 UDP_BIND_HASH(udp->udp_port, 5129 us->us_bind_fanout_size)]; 5130 mutex_enter(&udpf->uf_lock); 5131 5132 switch (tea->ERROR_prim) { 5133 case T_CONN_REQ: 5134 ASSERT(udp->udp_state == TS_DATA_XFER); 5135 /* Connect failed */ 5136 /* Revert back to the bound source */ 5137 udp->udp_v6src = udp->udp_bound_v6src; 5138 udp->udp_state = TS_IDLE; 5139 mutex_exit(&udpf->uf_lock); 5140 if (udp->udp_family == AF_INET6) 5141 (void) udp_build_hdrs(udp); 5142 rw_exit(&udp->udp_rwlock); 5143 break; 5144 5145 case T_DISCON_REQ: 5146 case T_BIND_REQ: 5147 case O_T_BIND_REQ: 5148 V6_SET_ZERO(udp->udp_v6src); 5149 V6_SET_ZERO(udp->udp_bound_v6src); 5150 udp->udp_state = TS_UNBND; 5151 udp_bind_hash_remove(udp, B_TRUE); 5152 udp->udp_port = 0; 5153 mutex_exit(&udpf->uf_lock); 5154 if (udp->udp_family == AF_INET6) 5155 (void) udp_build_hdrs(udp); 5156 rw_exit(&udp->udp_rwlock); 5157 break; 5158 5159 default: 5160 mutex_exit(&udpf->uf_lock); 5161 rw_exit(&udp->udp_rwlock); 5162 (void) mi_strlog(connp->conn_rq, 1, 5163 SL_ERROR|SL_TRACE, 5164 "udp_input_other: bad ERROR_prim, " 5165 "len %d", tea->ERROR_prim); 5166 } 5167 putnext(connp->conn_rq, mp); 5168 } 5169 5170 /* 5171 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 5172 * information that can be changing beneath us. 5173 */ 5174 mblk_t * 5175 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5176 { 5177 mblk_t *mpdata; 5178 mblk_t *mp_conn_ctl; 5179 mblk_t *mp_attr_ctl; 5180 mblk_t *mp6_conn_ctl; 5181 mblk_t *mp6_attr_ctl; 5182 mblk_t *mp_conn_tail; 5183 mblk_t *mp_attr_tail; 5184 mblk_t *mp6_conn_tail; 5185 mblk_t *mp6_attr_tail; 5186 struct opthdr *optp; 5187 mib2_udpEntry_t ude; 5188 mib2_udp6Entry_t ude6; 5189 mib2_transportMLPEntry_t mlp; 5190 int state; 5191 zoneid_t zoneid; 5192 int i; 5193 connf_t *connfp; 5194 conn_t *connp = Q_TO_CONN(q); 5195 int v4_conn_idx; 5196 int v6_conn_idx; 5197 boolean_t needattr; 5198 udp_t *udp; 5199 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5200 udp_stack_t *us = connp->conn_netstack->netstack_udp; 5201 mblk_t *mp2ctl; 5202 5203 /* 5204 * make a copy of the original message 5205 */ 5206 mp2ctl = copymsg(mpctl); 5207 5208 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5209 if (mpctl == NULL || 5210 (mpdata = mpctl->b_cont) == NULL || 5211 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5212 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5213 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5214 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5215 freemsg(mp_conn_ctl); 5216 freemsg(mp_attr_ctl); 5217 freemsg(mp6_conn_ctl); 5218 freemsg(mpctl); 5219 freemsg(mp2ctl); 5220 return (0); 5221 } 5222 5223 zoneid = connp->conn_zoneid; 5224 5225 /* fixed length structure for IPv4 and IPv6 counters */ 5226 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5227 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5228 /* synchronize 64- and 32-bit counters */ 5229 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 5230 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5231 5232 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5233 optp->level = MIB2_UDP; 5234 optp->name = 0; 5235 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 5236 sizeof (us->us_udp_mib)); 5237 optp->len = msgdsize(mpdata); 5238 qreply(q, mpctl); 5239 5240 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5241 v4_conn_idx = v6_conn_idx = 0; 5242 5243 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5244 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5245 connp = NULL; 5246 5247 while ((connp = ipcl_get_next_conn(connfp, connp, 5248 IPCL_UDPCONN))) { 5249 udp = connp->conn_udp; 5250 if (zoneid != connp->conn_zoneid) 5251 continue; 5252 5253 /* 5254 * Note that the port numbers are sent in 5255 * host byte order 5256 */ 5257 5258 if (udp->udp_state == TS_UNBND) 5259 state = MIB2_UDP_unbound; 5260 else if (udp->udp_state == TS_IDLE) 5261 state = MIB2_UDP_idle; 5262 else if (udp->udp_state == TS_DATA_XFER) 5263 state = MIB2_UDP_connected; 5264 else 5265 state = MIB2_UDP_unknown; 5266 5267 needattr = B_FALSE; 5268 bzero(&mlp, sizeof (mlp)); 5269 if (connp->conn_mlp_type != mlptSingle) { 5270 if (connp->conn_mlp_type == mlptShared || 5271 connp->conn_mlp_type == mlptBoth) 5272 mlp.tme_flags |= MIB2_TMEF_SHARED; 5273 if (connp->conn_mlp_type == mlptPrivate || 5274 connp->conn_mlp_type == mlptBoth) 5275 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5276 needattr = B_TRUE; 5277 } 5278 5279 /* 5280 * Create an IPv4 table entry for IPv4 entries and also 5281 * any IPv6 entries which are bound to in6addr_any 5282 * (i.e. anything a IPv4 peer could connect/send to). 5283 */ 5284 if (udp->udp_ipversion == IPV4_VERSION || 5285 (udp->udp_state <= TS_IDLE && 5286 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5287 ude.udpEntryInfo.ue_state = state; 5288 /* 5289 * If in6addr_any this will set it to 5290 * INADDR_ANY 5291 */ 5292 ude.udpLocalAddress = 5293 V4_PART_OF_V6(udp->udp_v6src); 5294 ude.udpLocalPort = ntohs(udp->udp_port); 5295 if (udp->udp_state == TS_DATA_XFER) { 5296 /* 5297 * Can potentially get here for 5298 * v6 socket if another process 5299 * (say, ping) has just done a 5300 * sendto(), changing the state 5301 * from the TS_IDLE above to 5302 * TS_DATA_XFER by the time we hit 5303 * this part of the code. 5304 */ 5305 ude.udpEntryInfo.ue_RemoteAddress = 5306 V4_PART_OF_V6(udp->udp_v6dst); 5307 ude.udpEntryInfo.ue_RemotePort = 5308 ntohs(udp->udp_dstport); 5309 } else { 5310 ude.udpEntryInfo.ue_RemoteAddress = 0; 5311 ude.udpEntryInfo.ue_RemotePort = 0; 5312 } 5313 5314 /* 5315 * We make the assumption that all udp_t 5316 * structs will be created within an address 5317 * region no larger than 32-bits. 5318 */ 5319 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5320 ude.udpCreationProcess = 5321 (udp->udp_open_pid < 0) ? 5322 MIB2_UNKNOWN_PROCESS : 5323 udp->udp_open_pid; 5324 ude.udpCreationTime = udp->udp_open_time; 5325 5326 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5327 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5328 mlp.tme_connidx = v4_conn_idx++; 5329 if (needattr) 5330 (void) snmp_append_data2( 5331 mp_attr_ctl->b_cont, &mp_attr_tail, 5332 (char *)&mlp, sizeof (mlp)); 5333 } 5334 if (udp->udp_ipversion == IPV6_VERSION) { 5335 ude6.udp6EntryInfo.ue_state = state; 5336 ude6.udp6LocalAddress = udp->udp_v6src; 5337 ude6.udp6LocalPort = ntohs(udp->udp_port); 5338 ude6.udp6IfIndex = udp->udp_bound_if; 5339 if (udp->udp_state == TS_DATA_XFER) { 5340 ude6.udp6EntryInfo.ue_RemoteAddress = 5341 udp->udp_v6dst; 5342 ude6.udp6EntryInfo.ue_RemotePort = 5343 ntohs(udp->udp_dstport); 5344 } else { 5345 ude6.udp6EntryInfo.ue_RemoteAddress = 5346 sin6_null.sin6_addr; 5347 ude6.udp6EntryInfo.ue_RemotePort = 0; 5348 } 5349 /* 5350 * We make the assumption that all udp_t 5351 * structs will be created within an address 5352 * region no larger than 32-bits. 5353 */ 5354 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5355 ude6.udp6CreationProcess = 5356 (udp->udp_open_pid < 0) ? 5357 MIB2_UNKNOWN_PROCESS : 5358 udp->udp_open_pid; 5359 ude6.udp6CreationTime = udp->udp_open_time; 5360 5361 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5362 &mp6_conn_tail, (char *)&ude6, 5363 sizeof (ude6)); 5364 mlp.tme_connidx = v6_conn_idx++; 5365 if (needattr) 5366 (void) snmp_append_data2( 5367 mp6_attr_ctl->b_cont, 5368 &mp6_attr_tail, (char *)&mlp, 5369 sizeof (mlp)); 5370 } 5371 } 5372 } 5373 5374 /* IPv4 UDP endpoints */ 5375 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5376 sizeof (struct T_optmgmt_ack)]; 5377 optp->level = MIB2_UDP; 5378 optp->name = MIB2_UDP_ENTRY; 5379 optp->len = msgdsize(mp_conn_ctl->b_cont); 5380 qreply(q, mp_conn_ctl); 5381 5382 /* table of MLP attributes... */ 5383 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5384 sizeof (struct T_optmgmt_ack)]; 5385 optp->level = MIB2_UDP; 5386 optp->name = EXPER_XPORT_MLP; 5387 optp->len = msgdsize(mp_attr_ctl->b_cont); 5388 if (optp->len == 0) 5389 freemsg(mp_attr_ctl); 5390 else 5391 qreply(q, mp_attr_ctl); 5392 5393 /* IPv6 UDP endpoints */ 5394 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5395 sizeof (struct T_optmgmt_ack)]; 5396 optp->level = MIB2_UDP6; 5397 optp->name = MIB2_UDP6_ENTRY; 5398 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5399 qreply(q, mp6_conn_ctl); 5400 5401 /* table of MLP attributes... */ 5402 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5403 sizeof (struct T_optmgmt_ack)]; 5404 optp->level = MIB2_UDP6; 5405 optp->name = EXPER_XPORT_MLP; 5406 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5407 if (optp->len == 0) 5408 freemsg(mp6_attr_ctl); 5409 else 5410 qreply(q, mp6_attr_ctl); 5411 5412 return (mp2ctl); 5413 } 5414 5415 /* 5416 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5417 * NOTE: Per MIB-II, UDP has no writable data. 5418 * TODO: If this ever actually tries to set anything, it needs to be 5419 * to do the appropriate locking. 5420 */ 5421 /* ARGSUSED */ 5422 int 5423 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5424 uchar_t *ptr, int len) 5425 { 5426 switch (level) { 5427 case MIB2_UDP: 5428 return (0); 5429 default: 5430 return (1); 5431 } 5432 } 5433 5434 static void 5435 udp_report_item(mblk_t *mp, udp_t *udp) 5436 { 5437 char *state; 5438 char addrbuf1[INET6_ADDRSTRLEN]; 5439 char addrbuf2[INET6_ADDRSTRLEN]; 5440 uint_t print_len, buf_len; 5441 5442 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5443 ASSERT(buf_len >= 0); 5444 if (buf_len == 0) 5445 return; 5446 5447 if (udp->udp_state == TS_UNBND) 5448 state = "UNBOUND"; 5449 else if (udp->udp_state == TS_IDLE) 5450 state = "IDLE"; 5451 else if (udp->udp_state == TS_DATA_XFER) 5452 state = "CONNECTED"; 5453 else 5454 state = "UnkState"; 5455 print_len = snprintf((char *)mp->b_wptr, buf_len, 5456 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5457 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5458 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 5459 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 5460 ntohs(udp->udp_dstport), state); 5461 if (print_len < buf_len) { 5462 mp->b_wptr += print_len; 5463 } else { 5464 mp->b_wptr += buf_len; 5465 } 5466 } 5467 5468 /* Report for ndd "udp_status" */ 5469 /* ARGSUSED */ 5470 static int 5471 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5472 { 5473 zoneid_t zoneid; 5474 connf_t *connfp; 5475 conn_t *connp = Q_TO_CONN(q); 5476 udp_t *udp = connp->conn_udp; 5477 int i; 5478 udp_stack_t *us = udp->udp_us; 5479 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5480 5481 /* 5482 * Because of the ndd constraint, at most we can have 64K buffer 5483 * to put in all UDP info. So to be more efficient, just 5484 * allocate a 64K buffer here, assuming we need that large buffer. 5485 * This may be a problem as any user can read udp_status. Therefore 5486 * we limit the rate of doing this using us_ndd_get_info_interval. 5487 * This should be OK as normal users should not do this too often. 5488 */ 5489 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 5490 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 5491 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 5492 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5493 return (0); 5494 } 5495 } 5496 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5497 /* The following may work even if we cannot get a large buf. */ 5498 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5499 return (0); 5500 } 5501 (void) mi_mpprintf(mp, 5502 "UDP " MI_COL_HDRPAD_STR 5503 /* 12345678[89ABCDEF] */ 5504 " zone lport src addr dest addr port state"); 5505 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5506 5507 zoneid = connp->conn_zoneid; 5508 5509 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5510 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5511 connp = NULL; 5512 5513 while ((connp = ipcl_get_next_conn(connfp, connp, 5514 IPCL_UDPCONN))) { 5515 udp = connp->conn_udp; 5516 if (zoneid != GLOBAL_ZONEID && 5517 zoneid != connp->conn_zoneid) 5518 continue; 5519 5520 udp_report_item(mp->b_cont, udp); 5521 } 5522 } 5523 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 5524 return (0); 5525 } 5526 5527 /* 5528 * This routine creates a T_UDERROR_IND message and passes it upstream. 5529 * The address and options are copied from the T_UNITDATA_REQ message 5530 * passed in mp. This message is freed. 5531 */ 5532 static void 5533 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5534 t_scalar_t err) 5535 { 5536 struct T_unitdata_req *tudr; 5537 mblk_t *mp1; 5538 uchar_t *optaddr; 5539 t_scalar_t optlen; 5540 5541 if (DB_TYPE(mp) == M_DATA) { 5542 ASSERT(destaddr != NULL && destlen != 0); 5543 optaddr = NULL; 5544 optlen = 0; 5545 } else { 5546 if ((mp->b_wptr < mp->b_rptr) || 5547 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5548 goto done; 5549 } 5550 tudr = (struct T_unitdata_req *)mp->b_rptr; 5551 destaddr = mp->b_rptr + tudr->DEST_offset; 5552 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5553 destaddr + tudr->DEST_length < mp->b_rptr || 5554 destaddr + tudr->DEST_length > mp->b_wptr) { 5555 goto done; 5556 } 5557 optaddr = mp->b_rptr + tudr->OPT_offset; 5558 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5559 optaddr + tudr->OPT_length < mp->b_rptr || 5560 optaddr + tudr->OPT_length > mp->b_wptr) { 5561 goto done; 5562 } 5563 destlen = tudr->DEST_length; 5564 optlen = tudr->OPT_length; 5565 } 5566 5567 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5568 (char *)optaddr, optlen, err); 5569 if (mp1 != NULL) 5570 qreply(q, mp1); 5571 5572 done: 5573 freemsg(mp); 5574 } 5575 5576 /* 5577 * This routine removes a port number association from a stream. It 5578 * is called by udp_wput to handle T_UNBIND_REQ messages. 5579 */ 5580 static void 5581 udp_unbind(queue_t *q, mblk_t *mp) 5582 { 5583 udp_t *udp = Q_TO_UDP(q); 5584 udp_fanout_t *udpf; 5585 udp_stack_t *us = udp->udp_us; 5586 5587 if (cl_inet_unbind != NULL) { 5588 /* 5589 * Running in cluster mode - register unbind information 5590 */ 5591 if (udp->udp_ipversion == IPV4_VERSION) { 5592 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5593 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5594 (in_port_t)udp->udp_port); 5595 } else { 5596 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5597 (uint8_t *)&(udp->udp_v6src), 5598 (in_port_t)udp->udp_port); 5599 } 5600 } 5601 5602 rw_enter(&udp->udp_rwlock, RW_WRITER); 5603 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 5604 rw_exit(&udp->udp_rwlock); 5605 udp_err_ack(q, mp, TOUTSTATE, 0); 5606 return; 5607 } 5608 udp->udp_pending_op = T_UNBIND_REQ; 5609 rw_exit(&udp->udp_rwlock); 5610 5611 /* 5612 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5613 * and therefore ip_unbind must never return NULL. 5614 */ 5615 mp = ip_unbind(q, mp); 5616 ASSERT(mp != NULL); 5617 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 5618 5619 /* 5620 * Once we're unbound from IP, the pending operation may be cleared 5621 * here. 5622 */ 5623 rw_enter(&udp->udp_rwlock, RW_WRITER); 5624 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5625 us->us_bind_fanout_size)]; 5626 mutex_enter(&udpf->uf_lock); 5627 udp_bind_hash_remove(udp, B_TRUE); 5628 V6_SET_ZERO(udp->udp_v6src); 5629 V6_SET_ZERO(udp->udp_bound_v6src); 5630 udp->udp_port = 0; 5631 mutex_exit(&udpf->uf_lock); 5632 5633 udp->udp_pending_op = -1; 5634 udp->udp_state = TS_UNBND; 5635 if (udp->udp_family == AF_INET6) 5636 (void) udp_build_hdrs(udp); 5637 rw_exit(&udp->udp_rwlock); 5638 5639 qreply(q, mp); 5640 } 5641 5642 /* 5643 * Don't let port fall into the privileged range. 5644 * Since the extra privileged ports can be arbitrary we also 5645 * ensure that we exclude those from consideration. 5646 * us->us_epriv_ports is not sorted thus we loop over it until 5647 * there are no changes. 5648 */ 5649 static in_port_t 5650 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 5651 { 5652 int i; 5653 in_port_t nextport; 5654 boolean_t restart = B_FALSE; 5655 udp_stack_t *us = udp->udp_us; 5656 5657 if (random && udp_random_anon_port != 0) { 5658 (void) random_get_pseudo_bytes((uint8_t *)&port, 5659 sizeof (in_port_t)); 5660 /* 5661 * Unless changed by a sys admin, the smallest anon port 5662 * is 32768 and the largest anon port is 65535. It is 5663 * very likely (50%) for the random port to be smaller 5664 * than the smallest anon port. When that happens, 5665 * add port % (anon port range) to the smallest anon 5666 * port to get the random port. It should fall into the 5667 * valid anon port range. 5668 */ 5669 if (port < us->us_smallest_anon_port) { 5670 port = us->us_smallest_anon_port + 5671 port % (us->us_largest_anon_port - 5672 us->us_smallest_anon_port); 5673 } 5674 } 5675 5676 retry: 5677 if (port < us->us_smallest_anon_port) 5678 port = us->us_smallest_anon_port; 5679 5680 if (port > us->us_largest_anon_port) { 5681 port = us->us_smallest_anon_port; 5682 if (restart) 5683 return (0); 5684 restart = B_TRUE; 5685 } 5686 5687 if (port < us->us_smallest_nonpriv_port) 5688 port = us->us_smallest_nonpriv_port; 5689 5690 for (i = 0; i < us->us_num_epriv_ports; i++) { 5691 if (port == us->us_epriv_ports[i]) { 5692 port++; 5693 /* 5694 * Make sure that the port is in the 5695 * valid range. 5696 */ 5697 goto retry; 5698 } 5699 } 5700 5701 if (is_system_labeled() && 5702 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5703 port, IPPROTO_UDP, B_TRUE)) != 0) { 5704 port = nextport; 5705 goto retry; 5706 } 5707 5708 return (port); 5709 } 5710 5711 static int 5712 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 5713 { 5714 int err; 5715 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5716 udp_t *udp = Q_TO_UDP(wq); 5717 udp_stack_t *us = udp->udp_us; 5718 5719 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 5720 opt_storage, udp->udp_mac_exempt, 5721 us->us_netstack->netstack_ip); 5722 if (err == 0) { 5723 err = tsol_update_options(&udp->udp_ip_snd_options, 5724 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5725 opt_storage); 5726 } 5727 if (err != 0) { 5728 DTRACE_PROBE4( 5729 tx__ip__log__info__updatelabel__udp, 5730 char *, "queue(1) failed to update options(2) on mp(3)", 5731 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5732 } else { 5733 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 5734 } 5735 return (err); 5736 } 5737 5738 static mblk_t * 5739 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5740 uint_t srcid, int *error, boolean_t insert_spi) 5741 { 5742 udp_t *udp = connp->conn_udp; 5743 queue_t *q = connp->conn_wq; 5744 mblk_t *mp1 = mp; 5745 mblk_t *mp2; 5746 ipha_t *ipha; 5747 int ip_hdr_length; 5748 uint32_t ip_len; 5749 udpha_t *udpha; 5750 boolean_t lock_held = B_FALSE; 5751 in_port_t uha_src_port; 5752 udpattrs_t attrs; 5753 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5754 uint32_t ip_snd_opt_len = 0; 5755 ip4_pkt_t pktinfo; 5756 ip4_pkt_t *pktinfop = &pktinfo; 5757 ip_opt_info_t optinfo; 5758 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5759 udp_stack_t *us = udp->udp_us; 5760 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5761 5762 5763 *error = 0; 5764 pktinfop->ip4_ill_index = 0; 5765 pktinfop->ip4_addr = INADDR_ANY; 5766 optinfo.ip_opt_flags = 0; 5767 optinfo.ip_opt_ill_index = 0; 5768 5769 if (v4dst == INADDR_ANY) 5770 v4dst = htonl(INADDR_LOOPBACK); 5771 5772 /* 5773 * If options passed in, feed it for verification and handling 5774 */ 5775 attrs.udpattr_credset = B_FALSE; 5776 if (DB_TYPE(mp) != M_DATA) { 5777 mp1 = mp->b_cont; 5778 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 5779 attrs.udpattr_ipp4 = pktinfop; 5780 attrs.udpattr_mb = mp; 5781 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 5782 goto done; 5783 /* 5784 * Note: success in processing options. 5785 * mp option buffer represented by 5786 * OPT_length/offset now potentially modified 5787 * and contain option setting results 5788 */ 5789 ASSERT(*error == 0); 5790 } 5791 } 5792 5793 /* mp1 points to the M_DATA mblk carrying the packet */ 5794 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5795 5796 rw_enter(&udp->udp_rwlock, RW_READER); 5797 lock_held = B_TRUE; 5798 /* 5799 * Check if our saved options are valid; update if not. 5800 * TSOL Note: Since we are not in WRITER mode, UDP packets 5801 * to different destination may require different labels, 5802 * or worse, UDP packets to same IP address may require 5803 * different labels due to use of shared all-zones address. 5804 * We use conn_lock to ensure that lastdst, ip_snd_options, 5805 * and ip_snd_options_len are consistent for the current 5806 * destination and are updated atomically. 5807 */ 5808 mutex_enter(&connp->conn_lock); 5809 if (is_system_labeled()) { 5810 /* Using UDP MLP requires SCM_UCRED from user */ 5811 if (connp->conn_mlp_type != mlptSingle && 5812 !attrs.udpattr_credset) { 5813 mutex_exit(&connp->conn_lock); 5814 DTRACE_PROBE4( 5815 tx__ip__log__info__output__udp, 5816 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5817 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5818 *error = ECONNREFUSED; 5819 goto done; 5820 } 5821 /* 5822 * update label option for this UDP socket if 5823 * - the destination has changed, or 5824 * - the UDP socket is MLP 5825 */ 5826 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5827 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5828 connp->conn_mlp_type != mlptSingle) && 5829 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5830 mutex_exit(&connp->conn_lock); 5831 goto done; 5832 } 5833 } 5834 if (udp->udp_ip_snd_options_len > 0) { 5835 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5836 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5837 } 5838 mutex_exit(&connp->conn_lock); 5839 5840 /* Add an IP header */ 5841 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5842 (insert_spi ? sizeof (uint32_t) : 0); 5843 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5844 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5845 !OK_32PTR(ipha)) { 5846 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5847 if (mp2 == NULL) { 5848 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5849 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5850 *error = ENOMEM; 5851 goto done; 5852 } 5853 mp2->b_wptr = DB_LIM(mp2); 5854 mp2->b_cont = mp1; 5855 mp1 = mp2; 5856 if (DB_TYPE(mp) != M_DATA) 5857 mp->b_cont = mp1; 5858 else 5859 mp = mp1; 5860 5861 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5862 } 5863 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5864 #ifdef _BIG_ENDIAN 5865 /* Set version, header length, and tos */ 5866 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5867 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5868 udp->udp_type_of_service); 5869 /* Set ttl and protocol */ 5870 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5871 #else 5872 /* Set version, header length, and tos */ 5873 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5874 ((udp->udp_type_of_service << 8) | 5875 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5876 /* Set ttl and protocol */ 5877 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5878 #endif 5879 if (pktinfop->ip4_addr != INADDR_ANY) { 5880 ipha->ipha_src = pktinfop->ip4_addr; 5881 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5882 } else { 5883 /* 5884 * Copy our address into the packet. If this is zero, 5885 * first look at __sin6_src_id for a hint. If we leave the 5886 * source as INADDR_ANY then ip will fill in the real source 5887 * address. 5888 */ 5889 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5890 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5891 in6_addr_t v6src; 5892 5893 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5894 us->us_netstack); 5895 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5896 } 5897 } 5898 uha_src_port = udp->udp_port; 5899 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5900 rw_exit(&udp->udp_rwlock); 5901 lock_held = B_FALSE; 5902 } 5903 5904 if (pktinfop->ip4_ill_index != 0) { 5905 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5906 } 5907 5908 ipha->ipha_fragment_offset_and_flags = 0; 5909 ipha->ipha_ident = 0; 5910 5911 mp1->b_rptr = (uchar_t *)ipha; 5912 5913 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5914 (uintptr_t)UINT_MAX); 5915 5916 /* Determine length of packet */ 5917 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5918 if ((mp2 = mp1->b_cont) != NULL) { 5919 do { 5920 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5921 ip_len += (uint32_t)MBLKL(mp2); 5922 } while ((mp2 = mp2->b_cont) != NULL); 5923 } 5924 /* 5925 * If the size of the packet is greater than the maximum allowed by 5926 * ip, return an error. Passing this down could cause panics because 5927 * the size will have wrapped and be inconsistent with the msg size. 5928 */ 5929 if (ip_len > IP_MAXPACKET) { 5930 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5931 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5932 *error = EMSGSIZE; 5933 goto done; 5934 } 5935 ipha->ipha_length = htons((uint16_t)ip_len); 5936 ip_len -= ip_hdr_length; 5937 ip_len = htons((uint16_t)ip_len); 5938 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5939 5940 /* Insert all-0s SPI now. */ 5941 if (insert_spi) 5942 *((uint32_t *)(udpha + 1)) = 0; 5943 5944 /* 5945 * Copy in the destination address 5946 */ 5947 ipha->ipha_dst = v4dst; 5948 5949 /* 5950 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5951 */ 5952 if (CLASSD(v4dst)) 5953 ipha->ipha_ttl = udp->udp_multicast_ttl; 5954 5955 udpha->uha_dst_port = port; 5956 udpha->uha_src_port = uha_src_port; 5957 5958 if (ip_snd_opt_len > 0) { 5959 uint32_t cksum; 5960 5961 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5962 lock_held = B_FALSE; 5963 rw_exit(&udp->udp_rwlock); 5964 /* 5965 * Massage source route putting first source route in ipha_dst. 5966 * Ignore the destination in T_unitdata_req. 5967 * Create a checksum adjustment for a source route, if any. 5968 */ 5969 cksum = ip_massage_options(ipha, us->us_netstack); 5970 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5971 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5972 (ipha->ipha_dst & 0xFFFF); 5973 if ((int)cksum < 0) 5974 cksum--; 5975 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5976 /* 5977 * IP does the checksum if uha_checksum is non-zero, 5978 * We make it easy for IP to include our pseudo header 5979 * by putting our length in uha_checksum. 5980 */ 5981 cksum += ip_len; 5982 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5983 /* There might be a carry. */ 5984 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5985 #ifdef _LITTLE_ENDIAN 5986 if (us->us_do_checksum) 5987 ip_len = (cksum << 16) | ip_len; 5988 #else 5989 if (us->us_do_checksum) 5990 ip_len = (ip_len << 16) | cksum; 5991 else 5992 ip_len <<= 16; 5993 #endif 5994 } else { 5995 /* 5996 * IP does the checksum if uha_checksum is non-zero, 5997 * We make it easy for IP to include our pseudo header 5998 * by putting our length in uha_checksum. 5999 */ 6000 if (us->us_do_checksum) 6001 ip_len |= (ip_len << 16); 6002 #ifndef _LITTLE_ENDIAN 6003 else 6004 ip_len <<= 16; 6005 #endif 6006 } 6007 ASSERT(!lock_held); 6008 /* Set UDP length and checksum */ 6009 *((uint32_t *)&udpha->uha_length) = ip_len; 6010 if (DB_CRED(mp) != NULL) 6011 mblk_setcred(mp1, DB_CRED(mp)); 6012 6013 if (DB_TYPE(mp) != M_DATA) { 6014 ASSERT(mp != mp1); 6015 freeb(mp); 6016 } 6017 6018 /* mp has been consumed and we'll return success */ 6019 ASSERT(*error == 0); 6020 mp = NULL; 6021 6022 /* We're done. Pass the packet to ip. */ 6023 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6024 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6025 "udp_wput_end: q %p (%S)", q, "end"); 6026 6027 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6028 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6029 connp->conn_dontroute || 6030 connp->conn_nofailover_ill != NULL || 6031 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6032 optinfo.ip_opt_ill_index != 0 || 6033 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6034 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6035 ipst->ips_ip_g_mrouter != NULL) { 6036 UDP_STAT(us, udp_ip_send); 6037 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6038 &optinfo); 6039 } else { 6040 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6041 } 6042 6043 done: 6044 if (lock_held) 6045 rw_exit(&udp->udp_rwlock); 6046 if (*error != 0) { 6047 ASSERT(mp != NULL); 6048 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6049 } 6050 return (mp); 6051 } 6052 6053 static void 6054 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6055 { 6056 conn_t *connp = udp->udp_connp; 6057 ipaddr_t src, dst; 6058 ire_t *ire; 6059 ipif_t *ipif = NULL; 6060 mblk_t *ire_fp_mp; 6061 boolean_t retry_caching; 6062 udp_stack_t *us = udp->udp_us; 6063 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6064 6065 dst = ipha->ipha_dst; 6066 src = ipha->ipha_src; 6067 ASSERT(ipha->ipha_ident == 0); 6068 6069 if (CLASSD(dst)) { 6070 int err; 6071 6072 ipif = conn_get_held_ipif(connp, 6073 &connp->conn_multicast_ipif, &err); 6074 6075 if (ipif == NULL || ipif->ipif_isv6 || 6076 (ipif->ipif_ill->ill_phyint->phyint_flags & 6077 PHYI_LOOPBACK)) { 6078 if (ipif != NULL) 6079 ipif_refrele(ipif); 6080 UDP_STAT(us, udp_ip_send); 6081 ip_output(connp, mp, q, IP_WPUT); 6082 return; 6083 } 6084 } 6085 6086 retry_caching = B_FALSE; 6087 mutex_enter(&connp->conn_lock); 6088 ire = connp->conn_ire_cache; 6089 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6090 6091 if (ire == NULL || ire->ire_addr != dst || 6092 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6093 retry_caching = B_TRUE; 6094 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6095 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6096 6097 ASSERT(ipif != NULL); 6098 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6099 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6100 retry_caching = B_TRUE; 6101 } 6102 6103 if (!retry_caching) { 6104 ASSERT(ire != NULL); 6105 IRE_REFHOLD(ire); 6106 mutex_exit(&connp->conn_lock); 6107 } else { 6108 boolean_t cached = B_FALSE; 6109 6110 connp->conn_ire_cache = NULL; 6111 mutex_exit(&connp->conn_lock); 6112 6113 /* Release the old ire */ 6114 if (ire != NULL) { 6115 IRE_REFRELE_NOTR(ire); 6116 ire = NULL; 6117 } 6118 6119 if (CLASSD(dst)) { 6120 ASSERT(ipif != NULL); 6121 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6122 connp->conn_zoneid, MBLK_GETLABEL(mp), 6123 MATCH_IRE_ILL_GROUP, ipst); 6124 } else { 6125 ASSERT(ipif == NULL); 6126 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6127 MBLK_GETLABEL(mp), ipst); 6128 } 6129 6130 if (ire == NULL) { 6131 if (ipif != NULL) 6132 ipif_refrele(ipif); 6133 UDP_STAT(us, udp_ire_null); 6134 ip_output(connp, mp, q, IP_WPUT); 6135 return; 6136 } 6137 IRE_REFHOLD_NOTR(ire); 6138 6139 mutex_enter(&connp->conn_lock); 6140 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 6141 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6142 irb_t *irb = ire->ire_bucket; 6143 6144 /* 6145 * IRE's created for non-connection oriented transports 6146 * are normally initialized with IRE_MARK_TEMPORARY set 6147 * in the ire_marks. These IRE's are preferentially 6148 * reaped when the hash chain length in the cache 6149 * bucket exceeds the maximum value specified in 6150 * ip[6]_ire_max_bucket_cnt. This can severely affect 6151 * UDP performance if IRE cache entries that we need 6152 * to reuse are continually removed. To remedy this, 6153 * when we cache the IRE in the conn_t, we remove the 6154 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 6155 * set. 6156 */ 6157 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6158 rw_enter(&irb->irb_lock, RW_WRITER); 6159 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6160 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 6161 irb->irb_tmp_ire_cnt--; 6162 } 6163 rw_exit(&irb->irb_lock); 6164 } 6165 connp->conn_ire_cache = ire; 6166 cached = B_TRUE; 6167 } 6168 mutex_exit(&connp->conn_lock); 6169 6170 /* 6171 * We can continue to use the ire but since it was not 6172 * cached, we should drop the extra reference. 6173 */ 6174 if (!cached) 6175 IRE_REFRELE_NOTR(ire); 6176 } 6177 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6178 ASSERT(!CLASSD(dst) || ipif != NULL); 6179 6180 /* 6181 * Check if we can take the fast-path. 6182 * Note that "incomplete" ire's (where the link-layer for next hop 6183 * is not resolved, or where the fast-path header in nce_fp_mp is not 6184 * available yet) are sent down the legacy (slow) path 6185 */ 6186 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6187 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6188 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6189 ((ire->ire_nce == NULL) || 6190 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 6191 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 6192 if (ipif != NULL) 6193 ipif_refrele(ipif); 6194 UDP_STAT(us, udp_ip_ire_send); 6195 IRE_REFRELE(ire); 6196 ip_output(connp, mp, q, IP_WPUT); 6197 return; 6198 } 6199 6200 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6201 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6202 ipha->ipha_src = ipif->ipif_src_addr; 6203 else 6204 ipha->ipha_src = ire->ire_src_addr; 6205 } 6206 6207 if (ipif != NULL) 6208 ipif_refrele(ipif); 6209 6210 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 6211 } 6212 6213 static void 6214 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 6215 { 6216 ipaddr_t src, dst; 6217 ill_t *ill; 6218 mblk_t *ire_fp_mp; 6219 uint_t ire_fp_mp_len; 6220 uint16_t *up; 6221 uint32_t cksum, hcksum_txflags; 6222 queue_t *dev_q; 6223 udp_t *udp = connp->conn_udp; 6224 ipha_t *ipha = (ipha_t *)mp->b_rptr; 6225 udp_stack_t *us = udp->udp_us; 6226 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6227 6228 dev_q = ire->ire_stq->q_next; 6229 ASSERT(dev_q != NULL); 6230 6231 6232 if (DEV_Q_IS_FLOW_CTLED(dev_q)) { 6233 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 6234 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 6235 if (ipst->ips_ip_output_queue) 6236 (void) putq(connp->conn_wq, mp); 6237 else 6238 freemsg(mp); 6239 ire_refrele(ire); 6240 return; 6241 } 6242 6243 ire_fp_mp = ire->ire_nce->nce_fp_mp; 6244 ire_fp_mp_len = MBLKL(ire_fp_mp); 6245 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 6246 6247 dst = ipha->ipha_dst; 6248 src = ipha->ipha_src; 6249 6250 ill = ire_to_ill(ire); 6251 ASSERT(ill != NULL); 6252 6253 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6254 6255 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6256 #ifndef _BIG_ENDIAN 6257 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6258 #endif 6259 6260 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6261 ASSERT(ill->ill_hcksum_capab != NULL); 6262 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6263 } else { 6264 hcksum_txflags = 0; 6265 } 6266 6267 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6268 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6269 6270 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6271 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6272 if (*up != 0) { 6273 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6274 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6275 ntohs(ipha->ipha_length), cksum); 6276 6277 /* Software checksum? */ 6278 if (DB_CKSUMFLAGS(mp) == 0) { 6279 UDP_STAT(us, udp_out_sw_cksum); 6280 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6281 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6282 } 6283 } 6284 6285 if (!CLASSD(dst)) { 6286 ipha->ipha_fragment_offset_and_flags |= 6287 (uint32_t)htons(ire->ire_frag_flag); 6288 } 6289 6290 /* Calculate IP header checksum if hardware isn't capable */ 6291 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6292 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6293 ((uint16_t *)ipha)[4]); 6294 } 6295 6296 if (CLASSD(dst)) { 6297 ilm_t *ilm; 6298 6299 ILM_WALKER_HOLD(ill); 6300 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6301 ILM_WALKER_RELE(ill); 6302 if (ilm != NULL) { 6303 ip_multicast_loopback(q, ill, mp, 6304 connp->conn_multicast_loop ? 0 : 6305 IP_FF_NO_MCAST_LOOP, zoneid); 6306 } 6307 6308 /* If multicast TTL is 0 then we are done */ 6309 if (ipha->ipha_ttl == 0) { 6310 freemsg(mp); 6311 ire_refrele(ire); 6312 return; 6313 } 6314 } 6315 6316 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6317 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6318 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6319 6320 UPDATE_OB_PKT_COUNT(ire); 6321 ire->ire_last_used_time = lbolt; 6322 6323 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6324 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6325 ntohs(ipha->ipha_length)); 6326 6327 if (ILL_DLS_CAPABLE(ill)) { 6328 /* 6329 * Send the packet directly to DLD, where it may be queued 6330 * depending on the availability of transmit resources at 6331 * the media layer. 6332 */ 6333 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6334 } else { 6335 DTRACE_PROBE4(ip4__physical__out__start, 6336 ill_t *, NULL, ill_t *, ill, 6337 ipha_t *, ipha, mblk_t *, mp); 6338 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6339 ipst->ips_ipv4firewall_physical_out, 6340 NULL, ill, ipha, mp, mp, ipst); 6341 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6342 if (mp != NULL) 6343 putnext(ire->ire_stq, mp); 6344 } 6345 6346 IRE_REFRELE(ire); 6347 } 6348 6349 static boolean_t 6350 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6351 { 6352 udp_t *udp = Q_TO_UDP(wq); 6353 int err; 6354 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6355 udp_stack_t *us = udp->udp_us; 6356 6357 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6358 dst, opt_storage, udp->udp_mac_exempt, 6359 us->us_netstack->netstack_ip); 6360 if (err == 0) { 6361 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6362 &udp->udp_label_len_v6, opt_storage); 6363 } 6364 if (err != 0) { 6365 DTRACE_PROBE4( 6366 tx__ip__log__drop__updatelabel__udp6, 6367 char *, "queue(1) failed to update options(2) on mp(3)", 6368 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6369 } else { 6370 udp->udp_v6lastdst = *dst; 6371 } 6372 return (err); 6373 } 6374 6375 void 6376 udp_output_connected(void *arg, mblk_t *mp) 6377 { 6378 conn_t *connp = (conn_t *)arg; 6379 udp_t *udp = connp->conn_udp; 6380 udp_stack_t *us = udp->udp_us; 6381 ipaddr_t v4dst; 6382 in_port_t dstport; 6383 boolean_t mapped_addr; 6384 struct sockaddr_storage ss; 6385 sin_t *sin; 6386 sin6_t *sin6; 6387 struct sockaddr *addr; 6388 socklen_t addrlen; 6389 int error; 6390 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6391 6392 /* M_DATA for connected socket */ 6393 6394 ASSERT(udp->udp_issocket); 6395 UDP_DBGSTAT(us, udp_data_conn); 6396 6397 mutex_enter(&connp->conn_lock); 6398 if (udp->udp_state != TS_DATA_XFER) { 6399 mutex_exit(&connp->conn_lock); 6400 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6401 UDP_STAT(us, udp_out_err_notconn); 6402 freemsg(mp); 6403 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6404 "udp_wput_end: connp %p (%S)", connp, 6405 "not-connected; address required"); 6406 return; 6407 } 6408 6409 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 6410 if (mapped_addr) 6411 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6412 6413 /* Initialize addr and addrlen as if they're passed in */ 6414 if (udp->udp_family == AF_INET) { 6415 sin = (sin_t *)&ss; 6416 sin->sin_family = AF_INET; 6417 dstport = sin->sin_port = udp->udp_dstport; 6418 ASSERT(mapped_addr); 6419 sin->sin_addr.s_addr = v4dst; 6420 addr = (struct sockaddr *)sin; 6421 addrlen = sizeof (*sin); 6422 } else { 6423 sin6 = (sin6_t *)&ss; 6424 sin6->sin6_family = AF_INET6; 6425 dstport = sin6->sin6_port = udp->udp_dstport; 6426 sin6->sin6_flowinfo = udp->udp_flowinfo; 6427 sin6->sin6_addr = udp->udp_v6dst; 6428 sin6->sin6_scope_id = 0; 6429 sin6->__sin6_src_id = 0; 6430 addr = (struct sockaddr *)sin6; 6431 addrlen = sizeof (*sin6); 6432 } 6433 mutex_exit(&connp->conn_lock); 6434 6435 if (mapped_addr) { 6436 /* 6437 * Handle both AF_INET and AF_INET6; the latter 6438 * for IPV4 mapped destination addresses. Note 6439 * here that both addr and addrlen point to the 6440 * corresponding struct depending on the address 6441 * family of the socket. 6442 */ 6443 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 6444 insert_spi); 6445 } else { 6446 mp = udp_output_v6(connp, mp, sin6, &error); 6447 } 6448 if (error == 0) { 6449 ASSERT(mp == NULL); 6450 return; 6451 } 6452 6453 UDP_STAT(us, udp_out_err_output); 6454 ASSERT(mp != NULL); 6455 /* mp is freed by the following routine */ 6456 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6457 (t_scalar_t)error); 6458 } 6459 6460 /* 6461 * This routine handles all messages passed downstream. It either 6462 * consumes the message or passes it downstream; it never queues a 6463 * a message. 6464 * 6465 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6466 * is valid when we are directly beneath the stream head, and thus sockfs 6467 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6468 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6469 * connected endpoints. 6470 */ 6471 void 6472 udp_wput(queue_t *q, mblk_t *mp) 6473 { 6474 sin6_t *sin6; 6475 sin_t *sin; 6476 ipaddr_t v4dst; 6477 uint16_t port; 6478 uint_t srcid; 6479 conn_t *connp = Q_TO_CONN(q); 6480 udp_t *udp = connp->conn_udp; 6481 int error = 0; 6482 struct sockaddr *addr; 6483 socklen_t addrlen; 6484 udp_stack_t *us = udp->udp_us; 6485 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6486 6487 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6488 "udp_wput_start: queue %p mp %p", q, mp); 6489 6490 /* 6491 * We directly handle several cases here: T_UNITDATA_REQ message 6492 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6493 * socket. 6494 */ 6495 switch (DB_TYPE(mp)) { 6496 case M_DATA: 6497 /* 6498 * Quick check for error cases. Checks will be done again 6499 * under the lock later on 6500 */ 6501 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6502 /* Not connected; address is required */ 6503 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6504 UDP_STAT(us, udp_out_err_notconn); 6505 freemsg(mp); 6506 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6507 "udp_wput_end: connp %p (%S)", connp, 6508 "not-connected; address required"); 6509 return; 6510 } 6511 udp_output_connected(connp, mp); 6512 return; 6513 6514 case M_PROTO: 6515 case M_PCPROTO: { 6516 struct T_unitdata_req *tudr; 6517 6518 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6519 tudr = (struct T_unitdata_req *)mp->b_rptr; 6520 6521 /* Handle valid T_UNITDATA_REQ here */ 6522 if (MBLKL(mp) >= sizeof (*tudr) && 6523 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6524 if (mp->b_cont == NULL) { 6525 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6526 "udp_wput_end: q %p (%S)", q, "badaddr"); 6527 error = EPROTO; 6528 goto ud_error; 6529 } 6530 6531 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6532 tudr->DEST_length)) { 6533 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6534 "udp_wput_end: q %p (%S)", q, "badaddr"); 6535 error = EADDRNOTAVAIL; 6536 goto ud_error; 6537 } 6538 /* 6539 * If a port has not been bound to the stream, fail. 6540 * This is not a problem when sockfs is directly 6541 * above us, because it will ensure that the socket 6542 * is first bound before allowing data to be sent. 6543 */ 6544 if (udp->udp_state == TS_UNBND) { 6545 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6546 "udp_wput_end: q %p (%S)", q, "outstate"); 6547 error = EPROTO; 6548 goto ud_error; 6549 } 6550 addr = (struct sockaddr *) 6551 &mp->b_rptr[tudr->DEST_offset]; 6552 addrlen = tudr->DEST_length; 6553 if (tudr->OPT_length != 0) 6554 UDP_STAT(us, udp_out_opt); 6555 break; 6556 } 6557 /* FALLTHRU */ 6558 } 6559 default: 6560 udp_wput_other(q, mp); 6561 return; 6562 } 6563 ASSERT(addr != NULL); 6564 6565 switch (udp->udp_family) { 6566 case AF_INET6: 6567 sin6 = (sin6_t *)addr; 6568 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 6569 (sin6->sin6_family != AF_INET6)) { 6570 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6571 "udp_wput_end: q %p (%S)", q, "badaddr"); 6572 error = EADDRNOTAVAIL; 6573 goto ud_error; 6574 } 6575 6576 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6577 /* 6578 * Destination is a non-IPv4-compatible IPv6 address. 6579 * Send out an IPv6 format packet. 6580 */ 6581 mp = udp_output_v6(connp, mp, sin6, &error); 6582 if (error != 0) 6583 goto ud_error; 6584 6585 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6586 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6587 return; 6588 } 6589 /* 6590 * If the local address is not zero or a mapped address 6591 * return an error. It would be possible to send an IPv4 6592 * packet but the response would never make it back to the 6593 * application since it is bound to a non-mapped address. 6594 */ 6595 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6596 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6597 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6598 "udp_wput_end: q %p (%S)", q, "badaddr"); 6599 error = EADDRNOTAVAIL; 6600 goto ud_error; 6601 } 6602 /* Send IPv4 packet without modifying udp_ipversion */ 6603 /* Extract port and ipaddr */ 6604 port = sin6->sin6_port; 6605 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6606 srcid = sin6->__sin6_src_id; 6607 break; 6608 6609 case AF_INET: 6610 sin = (sin_t *)addr; 6611 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 6612 (sin->sin_family != AF_INET)) { 6613 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6614 "udp_wput_end: q %p (%S)", q, "badaddr"); 6615 error = EADDRNOTAVAIL; 6616 goto ud_error; 6617 } 6618 /* Extract port and ipaddr */ 6619 port = sin->sin_port; 6620 v4dst = sin->sin_addr.s_addr; 6621 srcid = 0; 6622 break; 6623 } 6624 6625 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 6626 if (error != 0) { 6627 ud_error: 6628 UDP_STAT(us, udp_out_err_output); 6629 ASSERT(mp != NULL); 6630 /* mp is freed by the following routine */ 6631 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6632 (t_scalar_t)error); 6633 } 6634 } 6635 6636 /* 6637 * udp_output_v6(): 6638 * Assumes that udp_wput did some sanity checking on the destination 6639 * address. 6640 */ 6641 static mblk_t * 6642 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 6643 { 6644 ip6_t *ip6h; 6645 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6646 mblk_t *mp1 = mp; 6647 mblk_t *mp2; 6648 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6649 size_t ip_len; 6650 udpha_t *udph; 6651 udp_t *udp = connp->conn_udp; 6652 queue_t *q = connp->conn_wq; 6653 ip6_pkt_t ipp_s; /* For ancillary data options */ 6654 ip6_pkt_t *ipp = &ipp_s; 6655 ip6_pkt_t *tipp; /* temporary ipp */ 6656 uint32_t csum = 0; 6657 uint_t ignore = 0; 6658 uint_t option_exists = 0, is_sticky = 0; 6659 uint8_t *cp; 6660 uint8_t *nxthdr_ptr; 6661 in6_addr_t ip6_dst; 6662 udpattrs_t attrs; 6663 boolean_t opt_present; 6664 ip6_hbh_t *hopoptsptr = NULL; 6665 uint_t hopoptslen = 0; 6666 boolean_t is_ancillary = B_FALSE; 6667 udp_stack_t *us = udp->udp_us; 6668 size_t sth_wroff = 0; 6669 6670 *error = 0; 6671 6672 /* 6673 * If the local address is a mapped address return 6674 * an error. 6675 * It would be possible to send an IPv6 packet but the 6676 * response would never make it back to the application 6677 * since it is bound to a mapped address. 6678 */ 6679 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6680 *error = EADDRNOTAVAIL; 6681 goto done; 6682 } 6683 6684 ipp->ipp_fields = 0; 6685 ipp->ipp_sticky_ignored = 0; 6686 6687 /* 6688 * If TPI options passed in, feed it for verification and handling 6689 */ 6690 attrs.udpattr_credset = B_FALSE; 6691 opt_present = B_FALSE; 6692 if (DB_TYPE(mp) != M_DATA) { 6693 mp1 = mp->b_cont; 6694 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6695 attrs.udpattr_ipp6 = ipp; 6696 attrs.udpattr_mb = mp; 6697 if (udp_unitdata_opt_process(q, mp, error, 6698 &attrs) < 0) { 6699 goto done; 6700 } 6701 ASSERT(*error == 0); 6702 opt_present = B_TRUE; 6703 } 6704 } 6705 rw_enter(&udp->udp_rwlock, RW_READER); 6706 ignore = ipp->ipp_sticky_ignored; 6707 6708 /* mp1 points to the M_DATA mblk carrying the packet */ 6709 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6710 6711 if (sin6->sin6_scope_id != 0 && 6712 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6713 /* 6714 * IPPF_SCOPE_ID is special. It's neither a sticky 6715 * option nor ancillary data. It needs to be 6716 * explicitly set in options_exists. 6717 */ 6718 option_exists |= IPPF_SCOPE_ID; 6719 } 6720 6721 /* 6722 * Compute the destination address 6723 */ 6724 ip6_dst = sin6->sin6_addr; 6725 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6726 ip6_dst = ipv6_loopback; 6727 6728 /* 6729 * If we're not going to the same destination as last time, then 6730 * recompute the label required. This is done in a separate routine to 6731 * avoid blowing up our stack here. 6732 * 6733 * TSOL Note: Since we are not in WRITER mode, UDP packets 6734 * to different destination may require different labels, 6735 * or worse, UDP packets to same IP address may require 6736 * different labels due to use of shared all-zones address. 6737 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6738 * and sticky ipp_hopoptslen are consistent for the current 6739 * destination and are updated atomically. 6740 */ 6741 mutex_enter(&connp->conn_lock); 6742 if (is_system_labeled()) { 6743 /* Using UDP MLP requires SCM_UCRED from user */ 6744 if (connp->conn_mlp_type != mlptSingle && 6745 !attrs.udpattr_credset) { 6746 DTRACE_PROBE4( 6747 tx__ip__log__info__output__udp6, 6748 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6749 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6750 *error = ECONNREFUSED; 6751 rw_exit(&udp->udp_rwlock); 6752 mutex_exit(&connp->conn_lock); 6753 goto done; 6754 } 6755 /* 6756 * update label option for this UDP socket if 6757 * - the destination has changed, or 6758 * - the UDP socket is MLP 6759 */ 6760 if ((opt_present || 6761 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6762 connp->conn_mlp_type != mlptSingle) && 6763 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6764 rw_exit(&udp->udp_rwlock); 6765 mutex_exit(&connp->conn_lock); 6766 goto done; 6767 } 6768 } 6769 6770 /* 6771 * If there's a security label here, then we ignore any options the 6772 * user may try to set. We keep the peer's label as a hidden sticky 6773 * option. We make a private copy of this label before releasing the 6774 * lock so that label is kept consistent with the destination addr. 6775 */ 6776 if (udp->udp_label_len_v6 > 0) { 6777 ignore &= ~IPPF_HOPOPTS; 6778 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6779 } 6780 6781 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6782 /* No sticky options nor ancillary data. */ 6783 mutex_exit(&connp->conn_lock); 6784 goto no_options; 6785 } 6786 6787 /* 6788 * Go through the options figuring out where each is going to 6789 * come from and build two masks. The first mask indicates if 6790 * the option exists at all. The second mask indicates if the 6791 * option is sticky or ancillary. 6792 */ 6793 if (!(ignore & IPPF_HOPOPTS)) { 6794 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6795 option_exists |= IPPF_HOPOPTS; 6796 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6797 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6798 option_exists |= IPPF_HOPOPTS; 6799 is_sticky |= IPPF_HOPOPTS; 6800 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6801 hopoptsptr = kmem_alloc( 6802 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6803 if (hopoptsptr == NULL) { 6804 *error = ENOMEM; 6805 mutex_exit(&connp->conn_lock); 6806 goto done; 6807 } 6808 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6809 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6810 hopoptslen); 6811 udp_ip_hdr_len += hopoptslen; 6812 } 6813 } 6814 mutex_exit(&connp->conn_lock); 6815 6816 if (!(ignore & IPPF_RTHDR)) { 6817 if (ipp->ipp_fields & IPPF_RTHDR) { 6818 option_exists |= IPPF_RTHDR; 6819 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6820 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6821 option_exists |= IPPF_RTHDR; 6822 is_sticky |= IPPF_RTHDR; 6823 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6824 } 6825 } 6826 6827 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6828 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6829 option_exists |= IPPF_RTDSTOPTS; 6830 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6831 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6832 option_exists |= IPPF_RTDSTOPTS; 6833 is_sticky |= IPPF_RTDSTOPTS; 6834 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6835 } 6836 } 6837 6838 if (!(ignore & IPPF_DSTOPTS)) { 6839 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6840 option_exists |= IPPF_DSTOPTS; 6841 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6842 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6843 option_exists |= IPPF_DSTOPTS; 6844 is_sticky |= IPPF_DSTOPTS; 6845 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6846 } 6847 } 6848 6849 if (!(ignore & IPPF_IFINDEX)) { 6850 if (ipp->ipp_fields & IPPF_IFINDEX) { 6851 option_exists |= IPPF_IFINDEX; 6852 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6853 option_exists |= IPPF_IFINDEX; 6854 is_sticky |= IPPF_IFINDEX; 6855 } 6856 } 6857 6858 if (!(ignore & IPPF_ADDR)) { 6859 if (ipp->ipp_fields & IPPF_ADDR) { 6860 option_exists |= IPPF_ADDR; 6861 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6862 option_exists |= IPPF_ADDR; 6863 is_sticky |= IPPF_ADDR; 6864 } 6865 } 6866 6867 if (!(ignore & IPPF_DONTFRAG)) { 6868 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6869 option_exists |= IPPF_DONTFRAG; 6870 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6871 option_exists |= IPPF_DONTFRAG; 6872 is_sticky |= IPPF_DONTFRAG; 6873 } 6874 } 6875 6876 if (!(ignore & IPPF_USE_MIN_MTU)) { 6877 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6878 option_exists |= IPPF_USE_MIN_MTU; 6879 } else if (udp->udp_sticky_ipp.ipp_fields & 6880 IPPF_USE_MIN_MTU) { 6881 option_exists |= IPPF_USE_MIN_MTU; 6882 is_sticky |= IPPF_USE_MIN_MTU; 6883 } 6884 } 6885 6886 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6887 option_exists |= IPPF_HOPLIMIT; 6888 /* IPV6_HOPLIMIT can never be sticky */ 6889 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6890 6891 if (!(ignore & IPPF_UNICAST_HOPS) && 6892 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6893 option_exists |= IPPF_UNICAST_HOPS; 6894 is_sticky |= IPPF_UNICAST_HOPS; 6895 } 6896 6897 if (!(ignore & IPPF_MULTICAST_HOPS) && 6898 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6899 option_exists |= IPPF_MULTICAST_HOPS; 6900 is_sticky |= IPPF_MULTICAST_HOPS; 6901 } 6902 6903 if (!(ignore & IPPF_TCLASS)) { 6904 if (ipp->ipp_fields & IPPF_TCLASS) { 6905 option_exists |= IPPF_TCLASS; 6906 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6907 option_exists |= IPPF_TCLASS; 6908 is_sticky |= IPPF_TCLASS; 6909 } 6910 } 6911 6912 if (!(ignore & IPPF_NEXTHOP) && 6913 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6914 option_exists |= IPPF_NEXTHOP; 6915 is_sticky |= IPPF_NEXTHOP; 6916 } 6917 6918 no_options: 6919 6920 /* 6921 * If any options carried in the ip6i_t were specified, we 6922 * need to account for the ip6i_t in the data we'll be sending 6923 * down. 6924 */ 6925 if (option_exists & IPPF_HAS_IP6I) 6926 udp_ip_hdr_len += sizeof (ip6i_t); 6927 6928 /* check/fix buffer config, setup pointers into it */ 6929 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6930 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6931 !OK_32PTR(ip6h)) { 6932 6933 /* Try to get everything in a single mblk next time */ 6934 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6935 udp->udp_max_hdr_len = udp_ip_hdr_len; 6936 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6937 } 6938 6939 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6940 if (mp2 == NULL) { 6941 *error = ENOMEM; 6942 rw_exit(&udp->udp_rwlock); 6943 goto done; 6944 } 6945 mp2->b_wptr = DB_LIM(mp2); 6946 mp2->b_cont = mp1; 6947 mp1 = mp2; 6948 if (DB_TYPE(mp) != M_DATA) 6949 mp->b_cont = mp1; 6950 else 6951 mp = mp1; 6952 6953 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6954 } 6955 mp1->b_rptr = (unsigned char *)ip6h; 6956 ip6i = (ip6i_t *)ip6h; 6957 6958 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6959 if (option_exists & IPPF_HAS_IP6I) { 6960 ip6h = (ip6_t *)&ip6i[1]; 6961 ip6i->ip6i_flags = 0; 6962 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6963 6964 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6965 if (option_exists & IPPF_SCOPE_ID) { 6966 ip6i->ip6i_flags |= IP6I_IFINDEX; 6967 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6968 } else if (option_exists & IPPF_IFINDEX) { 6969 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6970 ASSERT(tipp->ipp_ifindex != 0); 6971 ip6i->ip6i_flags |= IP6I_IFINDEX; 6972 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6973 } 6974 6975 if (option_exists & IPPF_ADDR) { 6976 /* 6977 * Enable per-packet source address verification if 6978 * IPV6_PKTINFO specified the source address. 6979 * ip6_src is set in the transport's _wput function. 6980 */ 6981 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6982 } 6983 6984 if (option_exists & IPPF_DONTFRAG) { 6985 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6986 } 6987 6988 if (option_exists & IPPF_USE_MIN_MTU) { 6989 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6990 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6991 } 6992 6993 if (option_exists & IPPF_NEXTHOP) { 6994 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6995 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6996 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6997 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6998 } 6999 7000 /* 7001 * tell IP this is an ip6i_t private header 7002 */ 7003 ip6i->ip6i_nxt = IPPROTO_RAW; 7004 } 7005 7006 /* Initialize IPv6 header */ 7007 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7008 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7009 7010 /* Set the hoplimit of the outgoing packet. */ 7011 if (option_exists & IPPF_HOPLIMIT) { 7012 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7013 ip6h->ip6_hops = ipp->ipp_hoplimit; 7014 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7015 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7016 ip6h->ip6_hops = udp->udp_multicast_ttl; 7017 if (option_exists & IPPF_MULTICAST_HOPS) 7018 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7019 } else { 7020 ip6h->ip6_hops = udp->udp_ttl; 7021 if (option_exists & IPPF_UNICAST_HOPS) 7022 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7023 } 7024 7025 if (option_exists & IPPF_ADDR) { 7026 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7027 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7028 ip6h->ip6_src = tipp->ipp_addr; 7029 } else { 7030 /* 7031 * The source address was not set using IPV6_PKTINFO. 7032 * First look at the bound source. 7033 * If unspecified fallback to __sin6_src_id. 7034 */ 7035 ip6h->ip6_src = udp->udp_v6src; 7036 if (sin6->__sin6_src_id != 0 && 7037 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7038 ip_srcid_find_id(sin6->__sin6_src_id, 7039 &ip6h->ip6_src, connp->conn_zoneid, 7040 us->us_netstack); 7041 } 7042 } 7043 7044 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7045 cp = (uint8_t *)&ip6h[1]; 7046 7047 /* 7048 * Here's where we have to start stringing together 7049 * any extension headers in the right order: 7050 * Hop-by-hop, destination, routing, and final destination opts. 7051 */ 7052 if (option_exists & IPPF_HOPOPTS) { 7053 /* Hop-by-hop options */ 7054 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7055 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7056 if (hopoptslen == 0) { 7057 hopoptsptr = tipp->ipp_hopopts; 7058 hopoptslen = tipp->ipp_hopoptslen; 7059 is_ancillary = B_TRUE; 7060 } 7061 7062 *nxthdr_ptr = IPPROTO_HOPOPTS; 7063 nxthdr_ptr = &hbh->ip6h_nxt; 7064 7065 bcopy(hopoptsptr, cp, hopoptslen); 7066 cp += hopoptslen; 7067 7068 if (hopoptsptr != NULL && !is_ancillary) { 7069 kmem_free(hopoptsptr, hopoptslen); 7070 hopoptsptr = NULL; 7071 hopoptslen = 0; 7072 } 7073 } 7074 /* 7075 * En-route destination options 7076 * Only do them if there's a routing header as well 7077 */ 7078 if (option_exists & IPPF_RTDSTOPTS) { 7079 ip6_dest_t *dst = (ip6_dest_t *)cp; 7080 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7081 7082 *nxthdr_ptr = IPPROTO_DSTOPTS; 7083 nxthdr_ptr = &dst->ip6d_nxt; 7084 7085 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7086 cp += tipp->ipp_rtdstoptslen; 7087 } 7088 /* 7089 * Routing header next 7090 */ 7091 if (option_exists & IPPF_RTHDR) { 7092 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7093 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7094 7095 *nxthdr_ptr = IPPROTO_ROUTING; 7096 nxthdr_ptr = &rt->ip6r_nxt; 7097 7098 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7099 cp += tipp->ipp_rthdrlen; 7100 } 7101 /* 7102 * Do ultimate destination options 7103 */ 7104 if (option_exists & IPPF_DSTOPTS) { 7105 ip6_dest_t *dest = (ip6_dest_t *)cp; 7106 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7107 7108 *nxthdr_ptr = IPPROTO_DSTOPTS; 7109 nxthdr_ptr = &dest->ip6d_nxt; 7110 7111 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7112 cp += tipp->ipp_dstoptslen; 7113 } 7114 /* 7115 * Now set the last header pointer to the proto passed in 7116 */ 7117 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7118 *nxthdr_ptr = IPPROTO_UDP; 7119 7120 /* Update UDP header */ 7121 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7122 udph->uha_dst_port = sin6->sin6_port; 7123 udph->uha_src_port = udp->udp_port; 7124 7125 /* 7126 * Copy in the destination address 7127 */ 7128 ip6h->ip6_dst = ip6_dst; 7129 7130 ip6h->ip6_vcf = 7131 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7132 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7133 7134 if (option_exists & IPPF_TCLASS) { 7135 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7136 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7137 tipp->ipp_tclass); 7138 } 7139 rw_exit(&udp->udp_rwlock); 7140 7141 if (option_exists & IPPF_RTHDR) { 7142 ip6_rthdr_t *rth; 7143 7144 /* 7145 * Perform any processing needed for source routing. 7146 * We know that all extension headers will be in the same mblk 7147 * as the IPv6 header. 7148 */ 7149 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7150 if (rth != NULL && rth->ip6r_segleft != 0) { 7151 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7152 /* 7153 * Drop packet - only support Type 0 routing. 7154 * Notify the application as well. 7155 */ 7156 *error = EPROTO; 7157 goto done; 7158 } 7159 7160 /* 7161 * rth->ip6r_len is twice the number of 7162 * addresses in the header. Thus it must be even. 7163 */ 7164 if (rth->ip6r_len & 0x1) { 7165 *error = EPROTO; 7166 goto done; 7167 } 7168 /* 7169 * Shuffle the routing header and ip6_dst 7170 * addresses, and get the checksum difference 7171 * between the first hop (in ip6_dst) and 7172 * the destination (in the last routing hdr entry). 7173 */ 7174 csum = ip_massage_options_v6(ip6h, rth, 7175 us->us_netstack); 7176 /* 7177 * Verify that the first hop isn't a mapped address. 7178 * Routers along the path need to do this verification 7179 * for subsequent hops. 7180 */ 7181 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7182 *error = EADDRNOTAVAIL; 7183 goto done; 7184 } 7185 7186 cp += (rth->ip6r_len + 1)*8; 7187 } 7188 } 7189 7190 /* count up length of UDP packet */ 7191 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7192 if ((mp2 = mp1->b_cont) != NULL) { 7193 do { 7194 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7195 ip_len += (uint32_t)MBLKL(mp2); 7196 } while ((mp2 = mp2->b_cont) != NULL); 7197 } 7198 7199 /* 7200 * If the size of the packet is greater than the maximum allowed by 7201 * ip, return an error. Passing this down could cause panics because 7202 * the size will have wrapped and be inconsistent with the msg size. 7203 */ 7204 if (ip_len > IP_MAXPACKET) { 7205 *error = EMSGSIZE; 7206 goto done; 7207 } 7208 7209 /* Store the UDP length. Subtract length of extension hdrs */ 7210 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7211 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7212 7213 /* 7214 * We make it easy for IP to include our pseudo header 7215 * by putting our length in uh_checksum, modified (if 7216 * we have a routing header) by the checksum difference 7217 * between the ultimate destination and first hop addresses. 7218 * Note: UDP over IPv6 must always checksum the packet. 7219 */ 7220 csum += udph->uha_length; 7221 csum = (csum & 0xFFFF) + (csum >> 16); 7222 udph->uha_checksum = (uint16_t)csum; 7223 7224 #ifdef _LITTLE_ENDIAN 7225 ip_len = htons(ip_len); 7226 #endif 7227 ip6h->ip6_plen = ip_len; 7228 if (DB_CRED(mp) != NULL) 7229 mblk_setcred(mp1, DB_CRED(mp)); 7230 7231 if (DB_TYPE(mp) != M_DATA) { 7232 ASSERT(mp != mp1); 7233 freeb(mp); 7234 } 7235 7236 /* mp has been consumed and we'll return success */ 7237 ASSERT(*error == 0); 7238 mp = NULL; 7239 7240 /* We're done. Pass the packet to IP */ 7241 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 7242 ip_output_v6(connp, mp1, q, IP_WPUT); 7243 7244 done: 7245 if (sth_wroff != 0) { 7246 (void) mi_set_sth_wroff(RD(q), 7247 udp->udp_max_hdr_len + us->us_wroff_extra); 7248 } 7249 if (hopoptsptr != NULL && !is_ancillary) { 7250 kmem_free(hopoptsptr, hopoptslen); 7251 hopoptsptr = NULL; 7252 } 7253 if (*error != 0) { 7254 ASSERT(mp != NULL); 7255 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 7256 } 7257 return (mp); 7258 } 7259 7260 static void 7261 udp_wput_other(queue_t *q, mblk_t *mp) 7262 { 7263 uchar_t *rptr = mp->b_rptr; 7264 struct datab *db; 7265 struct iocblk *iocp; 7266 cred_t *cr; 7267 conn_t *connp = Q_TO_CONN(q); 7268 udp_t *udp = connp->conn_udp; 7269 udp_stack_t *us; 7270 7271 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7272 "udp_wput_other_start: q %p", q); 7273 7274 us = udp->udp_us; 7275 db = mp->b_datap; 7276 7277 cr = DB_CREDDEF(mp, connp->conn_cred); 7278 7279 switch (db->db_type) { 7280 case M_PROTO: 7281 case M_PCPROTO: 7282 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7283 freemsg(mp); 7284 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7285 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7286 return; 7287 } 7288 switch (((t_primp_t)rptr)->type) { 7289 case T_ADDR_REQ: 7290 udp_addr_req(q, mp); 7291 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7292 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7293 return; 7294 case O_T_BIND_REQ: 7295 case T_BIND_REQ: 7296 udp_bind(q, mp); 7297 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7298 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7299 return; 7300 case T_CONN_REQ: 7301 udp_connect(q, mp); 7302 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7303 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7304 return; 7305 case T_CAPABILITY_REQ: 7306 udp_capability_req(q, mp); 7307 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7308 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7309 return; 7310 case T_INFO_REQ: 7311 udp_info_req(q, mp); 7312 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7313 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7314 return; 7315 case T_UNITDATA_REQ: 7316 /* 7317 * If a T_UNITDATA_REQ gets here, the address must 7318 * be bad. Valid T_UNITDATA_REQs are handled 7319 * in udp_wput. 7320 */ 7321 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7322 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7323 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7324 return; 7325 case T_UNBIND_REQ: 7326 udp_unbind(q, mp); 7327 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7328 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7329 return; 7330 case T_SVR4_OPTMGMT_REQ: 7331 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7332 cr)) { 7333 (void) svr4_optcom_req(q, 7334 mp, cr, &udp_opt_obj, B_TRUE); 7335 } 7336 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7337 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7338 return; 7339 7340 case T_OPTMGMT_REQ: 7341 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7342 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7343 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7344 return; 7345 7346 case T_DISCON_REQ: 7347 udp_disconnect(q, mp); 7348 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7349 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7350 return; 7351 7352 /* The following TPI message is not supported by udp. */ 7353 case O_T_CONN_RES: 7354 case T_CONN_RES: 7355 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7356 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7357 "udp_wput_other_end: q %p (%S)", q, 7358 "connres/disconreq"); 7359 return; 7360 7361 /* The following 3 TPI messages are illegal for udp. */ 7362 case T_DATA_REQ: 7363 case T_EXDATA_REQ: 7364 case T_ORDREL_REQ: 7365 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7366 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7367 "udp_wput_other_end: q %p (%S)", q, 7368 "data/exdata/ordrel"); 7369 return; 7370 default: 7371 break; 7372 } 7373 break; 7374 case M_FLUSH: 7375 if (*rptr & FLUSHW) 7376 flushq(q, FLUSHDATA); 7377 break; 7378 case M_IOCTL: 7379 iocp = (struct iocblk *)mp->b_rptr; 7380 switch (iocp->ioc_cmd) { 7381 case TI_GETPEERNAME: 7382 if (udp->udp_state != TS_DATA_XFER) { 7383 /* 7384 * If a default destination address has not 7385 * been associated with the stream, then we 7386 * don't know the peer's name. 7387 */ 7388 iocp->ioc_error = ENOTCONN; 7389 iocp->ioc_count = 0; 7390 mp->b_datap->db_type = M_IOCACK; 7391 qreply(q, mp); 7392 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7393 "udp_wput_other_end: q %p (%S)", q, 7394 "getpeername"); 7395 return; 7396 } 7397 /* FALLTHRU */ 7398 case TI_GETMYNAME: { 7399 /* 7400 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7401 * need to copyin the user's strbuf structure. 7402 * Processing will continue in the M_IOCDATA case 7403 * below. 7404 */ 7405 mi_copyin(q, mp, NULL, 7406 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7407 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7408 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7409 return; 7410 } 7411 case ND_SET: 7412 /* nd_getset performs the necessary checking */ 7413 case ND_GET: 7414 if (nd_getset(q, us->us_nd, mp)) { 7415 qreply(q, mp); 7416 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7417 "udp_wput_other_end: q %p (%S)", q, "get"); 7418 return; 7419 } 7420 break; 7421 case _SIOCSOCKFALLBACK: 7422 /* 7423 * Either sockmod is about to be popped and the 7424 * socket would now be treated as a plain stream, 7425 * or a module is about to be pushed so we could 7426 * no longer use read-side synchronous stream. 7427 * Drain any queued data and disable direct sockfs 7428 * interface from now on. 7429 */ 7430 if (!udp->udp_issocket) { 7431 DB_TYPE(mp) = M_IOCNAK; 7432 iocp->ioc_error = EINVAL; 7433 } else { 7434 udp->udp_issocket = B_FALSE; 7435 if (udp->udp_direct_sockfs) { 7436 /* 7437 * Disable read-side synchronous 7438 * stream interface and drain any 7439 * queued data. 7440 */ 7441 udp_rcv_drain(RD(q), udp, 7442 B_FALSE); 7443 ASSERT(!udp->udp_direct_sockfs); 7444 UDP_STAT(us, udp_sock_fallback); 7445 } 7446 DB_TYPE(mp) = M_IOCACK; 7447 iocp->ioc_error = 0; 7448 } 7449 iocp->ioc_count = 0; 7450 iocp->ioc_rval = 0; 7451 qreply(q, mp); 7452 return; 7453 default: 7454 break; 7455 } 7456 break; 7457 case M_IOCDATA: 7458 udp_wput_iocdata(q, mp); 7459 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7460 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7461 return; 7462 default: 7463 /* Unrecognized messages are passed through without change. */ 7464 break; 7465 } 7466 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7467 "udp_wput_other_end: q %p (%S)", q, "end"); 7468 ip_output(connp, mp, q, IP_WPUT); 7469 } 7470 7471 /* 7472 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7473 * messages. 7474 */ 7475 static void 7476 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7477 { 7478 mblk_t *mp1; 7479 STRUCT_HANDLE(strbuf, sb); 7480 uint16_t port; 7481 in6_addr_t v6addr; 7482 ipaddr_t v4addr; 7483 uint32_t flowinfo = 0; 7484 int addrlen; 7485 udp_t *udp = Q_TO_UDP(q); 7486 7487 /* Make sure it is one of ours. */ 7488 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7489 case TI_GETMYNAME: 7490 case TI_GETPEERNAME: 7491 break; 7492 default: 7493 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7494 return; 7495 } 7496 7497 switch (mi_copy_state(q, mp, &mp1)) { 7498 case -1: 7499 return; 7500 case MI_COPY_CASE(MI_COPY_IN, 1): 7501 break; 7502 case MI_COPY_CASE(MI_COPY_OUT, 1): 7503 /* 7504 * The address has been copied out, so now 7505 * copyout the strbuf. 7506 */ 7507 mi_copyout(q, mp); 7508 return; 7509 case MI_COPY_CASE(MI_COPY_OUT, 2): 7510 /* 7511 * The address and strbuf have been copied out. 7512 * We're done, so just acknowledge the original 7513 * M_IOCTL. 7514 */ 7515 mi_copy_done(q, mp, 0); 7516 return; 7517 default: 7518 /* 7519 * Something strange has happened, so acknowledge 7520 * the original M_IOCTL with an EPROTO error. 7521 */ 7522 mi_copy_done(q, mp, EPROTO); 7523 return; 7524 } 7525 7526 /* 7527 * Now we have the strbuf structure for TI_GETMYNAME 7528 * and TI_GETPEERNAME. Next we copyout the requested 7529 * address and then we'll copyout the strbuf. 7530 */ 7531 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7532 (void *)mp1->b_rptr); 7533 if (udp->udp_family == AF_INET) 7534 addrlen = sizeof (sin_t); 7535 else 7536 addrlen = sizeof (sin6_t); 7537 7538 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7539 mi_copy_done(q, mp, EINVAL); 7540 return; 7541 } 7542 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7543 case TI_GETMYNAME: 7544 if (udp->udp_family == AF_INET) { 7545 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7546 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7547 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7548 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7549 } else { 7550 /* 7551 * INADDR_ANY 7552 * udp_v6src is not set, we might be bound to 7553 * broadcast/multicast. Use udp_bound_v6src as 7554 * local address instead (that could 7555 * also still be INADDR_ANY) 7556 */ 7557 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7558 } 7559 } else { 7560 /* udp->udp_family == AF_INET6 */ 7561 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7562 v6addr = udp->udp_v6src; 7563 } else { 7564 /* 7565 * UNSPECIFIED 7566 * udp_v6src is not set, we might be bound to 7567 * broadcast/multicast. Use udp_bound_v6src as 7568 * local address instead (that could 7569 * also still be UNSPECIFIED) 7570 */ 7571 v6addr = udp->udp_bound_v6src; 7572 } 7573 } 7574 port = udp->udp_port; 7575 break; 7576 case TI_GETPEERNAME: 7577 if (udp->udp_state != TS_DATA_XFER) { 7578 mi_copy_done(q, mp, ENOTCONN); 7579 return; 7580 } 7581 if (udp->udp_family == AF_INET) { 7582 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7583 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7584 } else { 7585 /* udp->udp_family == AF_INET6) */ 7586 v6addr = udp->udp_v6dst; 7587 flowinfo = udp->udp_flowinfo; 7588 } 7589 port = udp->udp_dstport; 7590 break; 7591 default: 7592 mi_copy_done(q, mp, EPROTO); 7593 return; 7594 } 7595 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7596 if (!mp1) 7597 return; 7598 7599 if (udp->udp_family == AF_INET) { 7600 sin_t *sin; 7601 7602 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7603 sin = (sin_t *)mp1->b_rptr; 7604 mp1->b_wptr = (uchar_t *)&sin[1]; 7605 *sin = sin_null; 7606 sin->sin_family = AF_INET; 7607 sin->sin_addr.s_addr = v4addr; 7608 sin->sin_port = port; 7609 } else { 7610 /* udp->udp_family == AF_INET6 */ 7611 sin6_t *sin6; 7612 7613 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7614 sin6 = (sin6_t *)mp1->b_rptr; 7615 mp1->b_wptr = (uchar_t *)&sin6[1]; 7616 *sin6 = sin6_null; 7617 sin6->sin6_family = AF_INET6; 7618 sin6->sin6_flowinfo = flowinfo; 7619 sin6->sin6_addr = v6addr; 7620 sin6->sin6_port = port; 7621 } 7622 /* Copy out the address */ 7623 mi_copyout(q, mp); 7624 } 7625 7626 7627 static int 7628 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7629 udpattrs_t *udpattrs) 7630 { 7631 struct T_unitdata_req *udreqp; 7632 int is_absreq_failure; 7633 cred_t *cr; 7634 conn_t *connp = Q_TO_CONN(q); 7635 7636 ASSERT(((t_primp_t)mp->b_rptr)->type); 7637 7638 cr = DB_CREDDEF(mp, connp->conn_cred); 7639 7640 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7641 7642 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7643 udreqp->OPT_offset, cr, &udp_opt_obj, 7644 udpattrs, &is_absreq_failure); 7645 7646 if (*errorp != 0) { 7647 /* 7648 * Note: No special action needed in this 7649 * module for "is_absreq_failure" 7650 */ 7651 return (-1); /* failure */ 7652 } 7653 ASSERT(is_absreq_failure == 0); 7654 return (0); /* success */ 7655 } 7656 7657 void 7658 udp_ddi_init(void) 7659 { 7660 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7661 udp_opt_obj.odb_opt_arr_cnt); 7662 7663 /* 7664 * We want to be informed each time a stack is created or 7665 * destroyed in the kernel, so we can maintain the 7666 * set of udp_stack_t's. 7667 */ 7668 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7669 } 7670 7671 void 7672 udp_ddi_destroy(void) 7673 { 7674 netstack_unregister(NS_UDP); 7675 } 7676 7677 /* 7678 * Initialize the UDP stack instance. 7679 */ 7680 static void * 7681 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7682 { 7683 udp_stack_t *us; 7684 udpparam_t *pa; 7685 int i; 7686 7687 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7688 us->us_netstack = ns; 7689 7690 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7691 us->us_epriv_ports[0] = 2049; 7692 us->us_epriv_ports[1] = 4045; 7693 7694 /* 7695 * The smallest anonymous port in the priviledged port range which UDP 7696 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7697 */ 7698 us->us_min_anonpriv_port = 512; 7699 7700 us->us_bind_fanout_size = udp_bind_fanout_size; 7701 7702 /* Roundup variable that might have been modified in /etc/system */ 7703 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7704 /* Not a power of two. Round up to nearest power of two */ 7705 for (i = 0; i < 31; i++) { 7706 if (us->us_bind_fanout_size < (1 << i)) 7707 break; 7708 } 7709 us->us_bind_fanout_size = 1 << i; 7710 } 7711 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7712 sizeof (udp_fanout_t), KM_SLEEP); 7713 for (i = 0; i < us->us_bind_fanout_size; i++) { 7714 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7715 NULL); 7716 } 7717 7718 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7719 7720 us->us_param_arr = pa; 7721 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7722 7723 (void) udp_param_register(&us->us_nd, 7724 us->us_param_arr, A_CNT(udp_param_arr)); 7725 7726 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7727 us->us_mibkp = udp_kstat_init(stackid); 7728 return (us); 7729 } 7730 7731 /* 7732 * Free the UDP stack instance. 7733 */ 7734 static void 7735 udp_stack_fini(netstackid_t stackid, void *arg) 7736 { 7737 udp_stack_t *us = (udp_stack_t *)arg; 7738 int i; 7739 7740 for (i = 0; i < us->us_bind_fanout_size; i++) { 7741 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7742 } 7743 7744 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7745 sizeof (udp_fanout_t)); 7746 7747 us->us_bind_fanout = NULL; 7748 7749 nd_free(&us->us_nd); 7750 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7751 us->us_param_arr = NULL; 7752 7753 udp_kstat_fini(stackid, us->us_mibkp); 7754 us->us_mibkp = NULL; 7755 7756 udp_kstat2_fini(stackid, us->us_kstat); 7757 us->us_kstat = NULL; 7758 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7759 kmem_free(us, sizeof (*us)); 7760 } 7761 7762 static void * 7763 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7764 { 7765 kstat_t *ksp; 7766 7767 udp_stat_t template = { 7768 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7769 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7770 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7771 { "udp_drain", KSTAT_DATA_UINT64 }, 7772 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7773 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7774 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7775 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7776 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7777 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7778 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7779 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7780 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7781 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7782 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7783 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7784 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7785 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7786 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7787 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7788 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7789 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7790 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7791 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7792 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7793 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7794 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7795 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7796 #ifdef DEBUG 7797 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7798 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7799 #endif 7800 }; 7801 7802 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7803 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7804 KSTAT_FLAG_VIRTUAL, stackid); 7805 7806 if (ksp == NULL) 7807 return (NULL); 7808 7809 bcopy(&template, us_statisticsp, sizeof (template)); 7810 ksp->ks_data = (void *)us_statisticsp; 7811 ksp->ks_private = (void *)(uintptr_t)stackid; 7812 7813 kstat_install(ksp); 7814 return (ksp); 7815 } 7816 7817 static void 7818 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7819 { 7820 if (ksp != NULL) { 7821 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7822 kstat_delete_netstack(ksp, stackid); 7823 } 7824 } 7825 7826 static void * 7827 udp_kstat_init(netstackid_t stackid) 7828 { 7829 kstat_t *ksp; 7830 7831 udp_named_kstat_t template = { 7832 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7833 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7834 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7835 { "entrySize", KSTAT_DATA_INT32, 0 }, 7836 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7837 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7838 }; 7839 7840 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7841 KSTAT_TYPE_NAMED, 7842 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7843 7844 if (ksp == NULL || ksp->ks_data == NULL) 7845 return (NULL); 7846 7847 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7848 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7849 7850 bcopy(&template, ksp->ks_data, sizeof (template)); 7851 ksp->ks_update = udp_kstat_update; 7852 ksp->ks_private = (void *)(uintptr_t)stackid; 7853 7854 kstat_install(ksp); 7855 return (ksp); 7856 } 7857 7858 static void 7859 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7860 { 7861 if (ksp != NULL) { 7862 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7863 kstat_delete_netstack(ksp, stackid); 7864 } 7865 } 7866 7867 static int 7868 udp_kstat_update(kstat_t *kp, int rw) 7869 { 7870 udp_named_kstat_t *udpkp; 7871 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7872 netstack_t *ns; 7873 udp_stack_t *us; 7874 7875 if ((kp == NULL) || (kp->ks_data == NULL)) 7876 return (EIO); 7877 7878 if (rw == KSTAT_WRITE) 7879 return (EACCES); 7880 7881 ns = netstack_find_by_stackid(stackid); 7882 if (ns == NULL) 7883 return (-1); 7884 us = ns->netstack_udp; 7885 if (us == NULL) { 7886 netstack_rele(ns); 7887 return (-1); 7888 } 7889 udpkp = (udp_named_kstat_t *)kp->ks_data; 7890 7891 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7892 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7893 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7894 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7895 netstack_rele(ns); 7896 return (0); 7897 } 7898 7899 /* 7900 * Read-side synchronous stream info entry point, called as a 7901 * result of handling certain STREAMS ioctl operations. 7902 */ 7903 static int 7904 udp_rinfop(queue_t *q, infod_t *dp) 7905 { 7906 mblk_t *mp; 7907 uint_t cmd = dp->d_cmd; 7908 int res = 0; 7909 int error = 0; 7910 udp_t *udp = Q_TO_UDP(q); 7911 struct stdata *stp = STREAM(q); 7912 7913 mutex_enter(&udp->udp_drain_lock); 7914 /* If shutdown on read has happened, return nothing */ 7915 mutex_enter(&stp->sd_lock); 7916 if (stp->sd_flag & STREOF) { 7917 mutex_exit(&stp->sd_lock); 7918 goto done; 7919 } 7920 mutex_exit(&stp->sd_lock); 7921 7922 if ((mp = udp->udp_rcv_list_head) == NULL) 7923 goto done; 7924 7925 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7926 7927 if (cmd & INFOD_COUNT) { 7928 /* 7929 * Return the number of messages. 7930 */ 7931 dp->d_count += udp->udp_rcv_msgcnt; 7932 res |= INFOD_COUNT; 7933 } 7934 if (cmd & INFOD_BYTES) { 7935 /* 7936 * Return size of all data messages. 7937 */ 7938 dp->d_bytes += udp->udp_rcv_cnt; 7939 res |= INFOD_BYTES; 7940 } 7941 if (cmd & INFOD_FIRSTBYTES) { 7942 /* 7943 * Return size of first data message. 7944 */ 7945 dp->d_bytes = msgdsize(mp); 7946 res |= INFOD_FIRSTBYTES; 7947 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7948 } 7949 if (cmd & INFOD_COPYOUT) { 7950 mblk_t *mp1 = mp->b_cont; 7951 int n; 7952 /* 7953 * Return data contents of first message. 7954 */ 7955 ASSERT(DB_TYPE(mp1) == M_DATA); 7956 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7957 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7958 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7959 UIO_READ, dp->d_uiop)) != 0) { 7960 goto done; 7961 } 7962 mp1 = mp1->b_cont; 7963 } 7964 res |= INFOD_COPYOUT; 7965 dp->d_cmd &= ~INFOD_COPYOUT; 7966 } 7967 done: 7968 mutex_exit(&udp->udp_drain_lock); 7969 7970 dp->d_res |= res; 7971 7972 return (error); 7973 } 7974 7975 /* 7976 * Read-side synchronous stream entry point. This is called as a result 7977 * of recv/read operation done at sockfs, and is guaranteed to execute 7978 * outside of the interrupt thread context. It returns a single datagram 7979 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7980 */ 7981 static int 7982 udp_rrw(queue_t *q, struiod_t *dp) 7983 { 7984 mblk_t *mp; 7985 udp_t *udp = Q_TO_UDP(q); 7986 udp_stack_t *us = udp->udp_us; 7987 7988 /* 7989 * Dequeue datagram from the head of the list and return 7990 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7991 * set/cleared depending on whether or not there's data 7992 * remaining in the list. 7993 */ 7994 mutex_enter(&udp->udp_drain_lock); 7995 if (!udp->udp_direct_sockfs) { 7996 mutex_exit(&udp->udp_drain_lock); 7997 UDP_STAT(us, udp_rrw_busy); 7998 return (EBUSY); 7999 } 8000 if ((mp = udp->udp_rcv_list_head) != NULL) { 8001 uint_t size = msgdsize(mp); 8002 8003 /* Last datagram in the list? */ 8004 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8005 udp->udp_rcv_list_tail = NULL; 8006 mp->b_next = NULL; 8007 8008 udp->udp_rcv_cnt -= size; 8009 udp->udp_rcv_msgcnt--; 8010 UDP_STAT(us, udp_rrw_msgcnt); 8011 8012 /* No longer flow-controlling? */ 8013 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8014 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8015 udp->udp_drain_qfull = B_FALSE; 8016 } 8017 if (udp->udp_rcv_list_head == NULL) { 8018 /* 8019 * Either we just dequeued the last datagram or 8020 * we get here from sockfs and have nothing to 8021 * return; in this case clear RSLEEP. 8022 */ 8023 ASSERT(udp->udp_rcv_cnt == 0); 8024 ASSERT(udp->udp_rcv_msgcnt == 0); 8025 ASSERT(udp->udp_rcv_list_tail == NULL); 8026 STR_WAKEUP_CLEAR(STREAM(q)); 8027 } else { 8028 /* 8029 * More data follows; we need udp_rrw() to be 8030 * called in future to pick up the rest. 8031 */ 8032 STR_WAKEUP_SET(STREAM(q)); 8033 } 8034 mutex_exit(&udp->udp_drain_lock); 8035 dp->d_mp = mp; 8036 return (0); 8037 } 8038 8039 /* 8040 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8041 * list; this is typically executed within the interrupt thread context 8042 * and so we do things as quickly as possible. 8043 */ 8044 static void 8045 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8046 { 8047 ASSERT(q == RD(q)); 8048 ASSERT(pkt_len == msgdsize(mp)); 8049 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8050 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8051 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8052 8053 mutex_enter(&udp->udp_drain_lock); 8054 /* 8055 * Wake up and signal the receiving app; it is okay to do this 8056 * before enqueueing the mp because we are holding the drain lock. 8057 * One of the advantages of synchronous stream is the ability for 8058 * us to find out when the application performs a read on the 8059 * socket by way of udp_rrw() entry point being called. We need 8060 * to generate SIGPOLL/SIGIO for each received data in the case 8061 * of asynchronous socket just as in the strrput() case. However, 8062 * we only wake the application up when necessary, i.e. during the 8063 * first enqueue. When udp_rrw() is called, we send up a single 8064 * datagram upstream and call STR_WAKEUP_SET() again when there 8065 * are still data remaining in our receive queue. 8066 */ 8067 if (udp->udp_rcv_list_head == NULL) { 8068 STR_WAKEUP_SET(STREAM(q)); 8069 udp->udp_rcv_list_head = mp; 8070 } else { 8071 udp->udp_rcv_list_tail->b_next = mp; 8072 } 8073 udp->udp_rcv_list_tail = mp; 8074 udp->udp_rcv_cnt += pkt_len; 8075 udp->udp_rcv_msgcnt++; 8076 8077 /* Need to flow-control? */ 8078 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8079 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8080 udp->udp_drain_qfull = B_TRUE; 8081 8082 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8083 STR_SENDSIG(STREAM(q)); 8084 mutex_exit(&udp->udp_drain_lock); 8085 } 8086 8087 /* 8088 * Drain the contents of receive list to the module upstream; we do 8089 * this during close or when we fallback to the slow mode due to 8090 * sockmod being popped or a module being pushed on top of us. 8091 */ 8092 static void 8093 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8094 { 8095 mblk_t *mp; 8096 udp_stack_t *us = udp->udp_us; 8097 8098 ASSERT(q == RD(q)); 8099 8100 mutex_enter(&udp->udp_drain_lock); 8101 /* 8102 * There is no race with a concurrent udp_input() sending 8103 * up packets using putnext() after we have cleared the 8104 * udp_direct_sockfs flag but before we have completed 8105 * sending up the packets in udp_rcv_list, since we are 8106 * either a writer or we have quiesced the conn. 8107 */ 8108 udp->udp_direct_sockfs = B_FALSE; 8109 mutex_exit(&udp->udp_drain_lock); 8110 8111 if (udp->udp_rcv_list_head != NULL) 8112 UDP_STAT(us, udp_drain); 8113 8114 /* 8115 * Send up everything via putnext(); note here that we 8116 * don't need the udp_drain_lock to protect us since 8117 * nothing can enter udp_rrw() and that we currently 8118 * have exclusive access to this udp. 8119 */ 8120 while ((mp = udp->udp_rcv_list_head) != NULL) { 8121 udp->udp_rcv_list_head = mp->b_next; 8122 mp->b_next = NULL; 8123 udp->udp_rcv_cnt -= msgdsize(mp); 8124 udp->udp_rcv_msgcnt--; 8125 if (closing) { 8126 freemsg(mp); 8127 } else { 8128 putnext(q, mp); 8129 } 8130 } 8131 ASSERT(udp->udp_rcv_cnt == 0); 8132 ASSERT(udp->udp_rcv_msgcnt == 0); 8133 ASSERT(udp->udp_rcv_list_head == NULL); 8134 udp->udp_rcv_list_tail = NULL; 8135 udp->udp_drain_qfull = B_FALSE; 8136 } 8137 8138 static size_t 8139 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8140 { 8141 udp_stack_t *us = udp->udp_us; 8142 8143 /* We add a bit of extra buffering */ 8144 size += size >> 1; 8145 if (size > us->us_max_buf) 8146 size = us->us_max_buf; 8147 8148 udp->udp_rcv_hiwat = size; 8149 return (size); 8150 } 8151 8152 /* 8153 * For the lower queue so that UDP can be a dummy mux. 8154 * Nobody should be sending 8155 * packets up this stream 8156 */ 8157 static void 8158 udp_lrput(queue_t *q, mblk_t *mp) 8159 { 8160 mblk_t *mp1; 8161 8162 switch (mp->b_datap->db_type) { 8163 case M_FLUSH: 8164 /* Turn around */ 8165 if (*mp->b_rptr & FLUSHW) { 8166 *mp->b_rptr &= ~FLUSHR; 8167 qreply(q, mp); 8168 return; 8169 } 8170 break; 8171 } 8172 /* Could receive messages that passed through ar_rput */ 8173 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 8174 mp1->b_prev = mp1->b_next = NULL; 8175 freemsg(mp); 8176 } 8177 8178 /* 8179 * For the lower queue so that UDP can be a dummy mux. 8180 * Nobody should be sending packets down this stream. 8181 */ 8182 /* ARGSUSED */ 8183 void 8184 udp_lwput(queue_t *q, mblk_t *mp) 8185 { 8186 freemsg(mp); 8187 } 8188