1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 const char udp_version[] = "@(#)udp.c 1.206 08/10/17 SMI"; 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/pattr.h> 33 #include <sys/stropts.h> 34 #include <sys/strlog.h> 35 #include <sys/strsun.h> 36 #include <sys/time.h> 37 #define _SUN_TPI_VERSION 2 38 #include <sys/tihdr.h> 39 #include <sys/timod.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 #include <sys/strsubr.h> 43 #include <sys/suntpi.h> 44 #include <sys/xti_inet.h> 45 #include <sys/cmn_err.h> 46 #include <sys/kmem.h> 47 #include <sys/policy.h> 48 #include <sys/ucred.h> 49 #include <sys/zone.h> 50 51 #include <sys/socket.h> 52 #include <sys/sockio.h> 53 #include <sys/vtrace.h> 54 #include <sys/sdt.h> 55 #include <sys/debug.h> 56 #include <sys/isa_defs.h> 57 #include <sys/random.h> 58 #include <netinet/in.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 #include <netinet/udp.h> 62 #include <net/if.h> 63 #include <net/route.h> 64 65 #include <inet/common.h> 66 #include <inet/ip.h> 67 #include <inet/ip_impl.h> 68 #include <inet/ip6.h> 69 #include <inet/ip_ire.h> 70 #include <inet/ip_if.h> 71 #include <inet/ip_multi.h> 72 #include <inet/ip_ndp.h> 73 #include <inet/mi.h> 74 #include <inet/mib2.h> 75 #include <inet/nd.h> 76 #include <inet/optcom.h> 77 #include <inet/snmpcom.h> 78 #include <inet/kstatcom.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipclassifier.h> 81 #include <inet/ipsec_impl.h> 82 #include <inet/ipp_common.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 100 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 101 * We also use conn_lock when updating things that affect the IP classifier 102 * lookup. 103 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 104 * 105 * The fanout lock uf_lock: 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure and a few other fields in the udp_t. A UDP endpoint is removed 113 * from the bind hash list only when it is being unbound or being closed. 114 * The per bucket lock also protects a UDP endpoint's state changes. 115 * 116 * The udp_rwlock: 117 * This protects most of the other fields in the udp_t. The exact list of 118 * fields which are protected by each of the above locks is documented in 119 * the udp_t structure definition. 120 * 121 * Plumbing notes: 122 * UDP is always a device driver. For compatibility with mibopen() code 123 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 124 * dummy module. 125 * 126 * The above implies that we don't support any intermediate module to 127 * reside in between /dev/ip and udp -- in fact, we never supported such 128 * scenario in the past as the inter-layer communication semantics have 129 * always been private. 130 */ 131 132 /* For /etc/system control */ 133 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 134 135 #define NDD_TOO_QUICK_MSG \ 136 "ndd get info rate too high for non-privileged users, try again " \ 137 "later.\n" 138 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 139 140 /* Option processing attrs */ 141 typedef struct udpattrs_s { 142 union { 143 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 144 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 145 } udpattr_ippu; 146 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 147 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 148 mblk_t *udpattr_mb; 149 boolean_t udpattr_credset; 150 } udpattrs_t; 151 152 static void udp_addr_req(queue_t *q, mblk_t *mp); 153 static void udp_bind(queue_t *q, mblk_t *mp); 154 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 155 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 156 static void udp_bind_result(conn_t *, mblk_t *); 157 static void udp_bind_ack(conn_t *, mblk_t *mp); 158 static void udp_bind_error(conn_t *, mblk_t *mp); 159 static int udp_build_hdrs(udp_t *udp); 160 static void udp_capability_req(queue_t *q, mblk_t *mp); 161 static int udp_close(queue_t *q); 162 static void udp_connect(queue_t *q, mblk_t *mp); 163 static void udp_disconnect(queue_t *q, mblk_t *mp); 164 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 165 int sys_error); 166 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 167 t_scalar_t tlierr, int unixerr); 168 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 169 cred_t *cr); 170 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 173 char *value, caddr_t cp, cred_t *cr); 174 static void udp_icmp_error(queue_t *q, mblk_t *mp); 175 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 176 static void udp_info_req(queue_t *q, mblk_t *mp); 177 static void udp_input(void *, mblk_t *, void *); 178 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 179 t_scalar_t addr_length); 180 static void udp_lrput(queue_t *, mblk_t *); 181 static void udp_lwput(queue_t *, mblk_t *); 182 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp, boolean_t isv6); 184 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 187 cred_t *credp); 188 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 189 int *errorp, udpattrs_t *udpattrs); 190 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 191 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 192 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 193 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 194 cred_t *cr); 195 static void udp_report_item(mblk_t *mp, udp_t *udp); 196 static int udp_rinfop(queue_t *q, infod_t *dp); 197 static int udp_rrw(queue_t *q, struiod_t *dp); 198 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 199 cred_t *cr); 200 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 201 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 202 t_scalar_t destlen, t_scalar_t err); 203 static void udp_unbind(queue_t *q, mblk_t *mp); 204 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 205 boolean_t random); 206 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 207 int *, boolean_t); 208 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 209 int *error); 210 static void udp_wput_other(queue_t *q, mblk_t *mp); 211 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 212 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 213 214 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 215 static void udp_stack_fini(netstackid_t stackid, void *arg); 216 217 static void *udp_kstat_init(netstackid_t stackid); 218 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 219 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 220 static void udp_kstat2_fini(netstackid_t, kstat_t *); 221 static int udp_kstat_update(kstat_t *kp, int rw); 222 223 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 224 uint_t pkt_len); 225 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 226 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 227 228 #define UDP_RECV_HIWATER (56 * 1024) 229 #define UDP_RECV_LOWATER 128 230 #define UDP_XMIT_HIWATER (56 * 1024) 231 #define UDP_XMIT_LOWATER 1024 232 233 static struct module_info udp_mod_info = { 234 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 235 }; 236 237 /* 238 * Entry points for UDP as a device. 239 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 240 */ 241 static struct qinit udp_rinitv4 = { 242 NULL, NULL, udp_openv4, udp_close, NULL, 243 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 244 }; 245 246 static struct qinit udp_rinitv6 = { 247 NULL, NULL, udp_openv6, udp_close, NULL, 248 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 249 }; 250 251 static struct qinit udp_winit = { 252 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 253 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 254 }; 255 256 /* 257 * UDP needs to handle I_LINK and I_PLINK since ifconfig 258 * likes to use it as a place to hang the various streams. 259 */ 260 static struct qinit udp_lrinit = { 261 (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, 262 &udp_mod_info 263 }; 264 265 static struct qinit udp_lwinit = { 266 (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, 267 &udp_mod_info 268 }; 269 270 /* For AF_INET aka /dev/udp */ 271 struct streamtab udpinfov4 = { 272 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 273 }; 274 275 /* For AF_INET6 aka /dev/udp6 */ 276 struct streamtab udpinfov6 = { 277 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 278 }; 279 280 static sin_t sin_null; /* Zero address for quick clears */ 281 static sin6_t sin6_null; /* Zero address for quick clears */ 282 283 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 284 285 /* Default structure copied into T_INFO_ACK messages */ 286 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 287 T_INFO_ACK, 288 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 289 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 290 T_INVALID, /* CDATA_size. udp does not support connect data. */ 291 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 292 sizeof (sin_t), /* ADDR_size. */ 293 0, /* OPT_size - not initialized here */ 294 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 295 T_CLTS, /* SERV_type. udp supports connection-less. */ 296 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 297 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 298 }; 299 300 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 301 302 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 303 T_INFO_ACK, 304 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 305 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 306 T_INVALID, /* CDATA_size. udp does not support connect data. */ 307 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 308 sizeof (sin6_t), /* ADDR_size. */ 309 0, /* OPT_size - not initialized here */ 310 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 311 T_CLTS, /* SERV_type. udp supports connection-less. */ 312 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 313 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 314 }; 315 316 /* largest UDP port number */ 317 #define UDP_MAX_PORT 65535 318 319 /* 320 * Table of ND variables supported by udp. These are loaded into us_nd 321 * in udp_open. 322 * All of these are alterable, within the min/max values given, at run time. 323 */ 324 /* BEGIN CSTYLED */ 325 udpparam_t udp_param_arr[] = { 326 /*min max value name */ 327 { 0L, 256, 32, "udp_wroff_extra" }, 328 { 1L, 255, 255, "udp_ipv4_ttl" }, 329 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 330 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 331 { 0, 1, 1, "udp_do_checksum" }, 332 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 333 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 334 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 335 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 336 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 337 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 338 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 339 }; 340 /* END CSTYLED */ 341 342 /* Setable in /etc/system */ 343 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 344 uint32_t udp_random_anon_port = 1; 345 346 /* 347 * Hook functions to enable cluster networking. 348 * On non-clustered systems these vectors must always be NULL 349 */ 350 351 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 352 uint8_t *laddrp, in_port_t lport) = NULL; 353 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 354 uint8_t *laddrp, in_port_t lport) = NULL; 355 356 typedef union T_primitives *t_primp_t; 357 358 /* 359 * Return the next anonymous port in the privileged port range for 360 * bind checking. 361 * 362 * Trusted Extension (TX) notes: TX allows administrator to mark or 363 * reserve ports as Multilevel ports (MLP). MLP has special function 364 * on TX systems. Once a port is made MLP, it's not available as 365 * ordinary port. This creates "holes" in the port name space. It 366 * may be necessary to skip the "holes" find a suitable anon port. 367 */ 368 static in_port_t 369 udp_get_next_priv_port(udp_t *udp) 370 { 371 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 372 in_port_t nextport; 373 boolean_t restart = B_FALSE; 374 udp_stack_t *us = udp->udp_us; 375 376 retry: 377 if (next_priv_port < us->us_min_anonpriv_port || 378 next_priv_port >= IPPORT_RESERVED) { 379 next_priv_port = IPPORT_RESERVED - 1; 380 if (restart) 381 return (0); 382 restart = B_TRUE; 383 } 384 385 if (is_system_labeled() && 386 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 387 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 388 next_priv_port = nextport; 389 goto retry; 390 } 391 392 return (next_priv_port--); 393 } 394 395 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 396 /* ARGSUSED */ 397 static int 398 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 399 { 400 udp_fanout_t *udpf; 401 int i; 402 zoneid_t zoneid; 403 conn_t *connp; 404 udp_t *udp; 405 udp_stack_t *us; 406 407 connp = Q_TO_CONN(q); 408 udp = connp->conn_udp; 409 us = udp->udp_us; 410 411 /* Refer to comments in udp_status_report(). */ 412 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 413 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 414 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 415 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 416 return (0); 417 } 418 } 419 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 420 /* The following may work even if we cannot get a large buf. */ 421 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 422 return (0); 423 } 424 425 (void) mi_mpprintf(mp, 426 "UDP " MI_COL_HDRPAD_STR 427 /* 12345678[89ABCDEF] */ 428 " zone lport src addr dest addr port state"); 429 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 430 431 zoneid = connp->conn_zoneid; 432 433 for (i = 0; i < us->us_bind_fanout_size; i++) { 434 udpf = &us->us_bind_fanout[i]; 435 mutex_enter(&udpf->uf_lock); 436 437 /* Print the hash index. */ 438 udp = udpf->uf_udp; 439 if (zoneid != GLOBAL_ZONEID) { 440 /* skip to first entry in this zone; might be none */ 441 while (udp != NULL && 442 udp->udp_connp->conn_zoneid != zoneid) 443 udp = udp->udp_bind_hash; 444 } 445 if (udp != NULL) { 446 uint_t print_len, buf_len; 447 448 buf_len = mp->b_cont->b_datap->db_lim - 449 mp->b_cont->b_wptr; 450 print_len = snprintf((char *)mp->b_cont->b_wptr, 451 buf_len, "%d\n", i); 452 if (print_len < buf_len) { 453 mp->b_cont->b_wptr += print_len; 454 } else { 455 mp->b_cont->b_wptr += buf_len; 456 } 457 for (; udp != NULL; udp = udp->udp_bind_hash) { 458 if (zoneid == GLOBAL_ZONEID || 459 zoneid == udp->udp_connp->conn_zoneid) 460 udp_report_item(mp->b_cont, udp); 461 } 462 } 463 mutex_exit(&udpf->uf_lock); 464 } 465 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 466 return (0); 467 } 468 469 /* 470 * Hash list removal routine for udp_t structures. 471 */ 472 static void 473 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 474 { 475 udp_t *udpnext; 476 kmutex_t *lockp; 477 udp_stack_t *us = udp->udp_us; 478 479 if (udp->udp_ptpbhn == NULL) 480 return; 481 482 /* 483 * Extract the lock pointer in case there are concurrent 484 * hash_remove's for this instance. 485 */ 486 ASSERT(udp->udp_port != 0); 487 if (!caller_holds_lock) { 488 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 489 us->us_bind_fanout_size)].uf_lock; 490 ASSERT(lockp != NULL); 491 mutex_enter(lockp); 492 } 493 if (udp->udp_ptpbhn != NULL) { 494 udpnext = udp->udp_bind_hash; 495 if (udpnext != NULL) { 496 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 497 udp->udp_bind_hash = NULL; 498 } 499 *udp->udp_ptpbhn = udpnext; 500 udp->udp_ptpbhn = NULL; 501 } 502 if (!caller_holds_lock) { 503 mutex_exit(lockp); 504 } 505 } 506 507 static void 508 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 509 { 510 udp_t **udpp; 511 udp_t *udpnext; 512 513 ASSERT(MUTEX_HELD(&uf->uf_lock)); 514 ASSERT(udp->udp_ptpbhn == NULL); 515 udpp = &uf->uf_udp; 516 udpnext = udpp[0]; 517 if (udpnext != NULL) { 518 /* 519 * If the new udp bound to the INADDR_ANY address 520 * and the first one in the list is not bound to 521 * INADDR_ANY we skip all entries until we find the 522 * first one bound to INADDR_ANY. 523 * This makes sure that applications binding to a 524 * specific address get preference over those binding to 525 * INADDR_ANY. 526 */ 527 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 528 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 529 while ((udpnext = udpp[0]) != NULL && 530 !V6_OR_V4_INADDR_ANY( 531 udpnext->udp_bound_v6src)) { 532 udpp = &(udpnext->udp_bind_hash); 533 } 534 if (udpnext != NULL) 535 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 536 } else { 537 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 538 } 539 } 540 udp->udp_bind_hash = udpnext; 541 udp->udp_ptpbhn = udpp; 542 udpp[0] = udp; 543 } 544 545 /* 546 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 547 * passed to udp_wput. 548 * It associates a port number and local address with the stream. 549 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 550 * protocol type (IPPROTO_UDP) placed in the message following the address. 551 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 552 * (Called as writer.) 553 * 554 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 555 * without setting SO_REUSEADDR. This is needed so that they 556 * can be viewed as two independent transport protocols. 557 * However, anonymouns ports are allocated from the same range to avoid 558 * duplicating the us->us_next_port_to_try. 559 */ 560 static void 561 udp_bind(queue_t *q, mblk_t *mp) 562 { 563 sin_t *sin; 564 sin6_t *sin6; 565 mblk_t *mp1; 566 in_port_t port; /* Host byte order */ 567 in_port_t requested_port; /* Host byte order */ 568 struct T_bind_req *tbr; 569 int count; 570 in6_addr_t v6src; 571 boolean_t bind_to_req_port_only; 572 int loopmax; 573 udp_fanout_t *udpf; 574 in_port_t lport; /* Network byte order */ 575 zoneid_t zoneid; 576 conn_t *connp; 577 udp_t *udp; 578 boolean_t is_inaddr_any; 579 mlp_type_t addrtype, mlptype; 580 udp_stack_t *us; 581 582 connp = Q_TO_CONN(q); 583 udp = connp->conn_udp; 584 us = udp->udp_us; 585 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 586 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 587 "udp_bind: bad req, len %u", 588 (uint_t)(mp->b_wptr - mp->b_rptr)); 589 udp_err_ack(q, mp, TPROTO, 0); 590 return; 591 } 592 if (udp->udp_state != TS_UNBND) { 593 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 594 "udp_bind: bad state, %u", udp->udp_state); 595 udp_err_ack(q, mp, TOUTSTATE, 0); 596 return; 597 } 598 /* 599 * Reallocate the message to make sure we have enough room for an 600 * address and the protocol type. 601 */ 602 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 603 if (!mp1) { 604 udp_err_ack(q, mp, TSYSERR, ENOMEM); 605 return; 606 } 607 608 mp = mp1; 609 tbr = (struct T_bind_req *)mp->b_rptr; 610 switch (tbr->ADDR_length) { 611 case 0: /* Request for a generic port */ 612 tbr->ADDR_offset = sizeof (struct T_bind_req); 613 if (udp->udp_family == AF_INET) { 614 tbr->ADDR_length = sizeof (sin_t); 615 sin = (sin_t *)&tbr[1]; 616 *sin = sin_null; 617 sin->sin_family = AF_INET; 618 mp->b_wptr = (uchar_t *)&sin[1]; 619 } else { 620 ASSERT(udp->udp_family == AF_INET6); 621 tbr->ADDR_length = sizeof (sin6_t); 622 sin6 = (sin6_t *)&tbr[1]; 623 *sin6 = sin6_null; 624 sin6->sin6_family = AF_INET6; 625 mp->b_wptr = (uchar_t *)&sin6[1]; 626 } 627 port = 0; 628 break; 629 630 case sizeof (sin_t): /* Complete IPv4 address */ 631 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 632 sizeof (sin_t)); 633 if (sin == NULL || !OK_32PTR((char *)sin)) { 634 udp_err_ack(q, mp, TSYSERR, EINVAL); 635 return; 636 } 637 if (udp->udp_family != AF_INET || 638 sin->sin_family != AF_INET) { 639 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 640 return; 641 } 642 port = ntohs(sin->sin_port); 643 break; 644 645 case sizeof (sin6_t): /* complete IPv6 address */ 646 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 647 sizeof (sin6_t)); 648 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 649 udp_err_ack(q, mp, TSYSERR, EINVAL); 650 return; 651 } 652 if (udp->udp_family != AF_INET6 || 653 sin6->sin6_family != AF_INET6) { 654 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 655 return; 656 } 657 port = ntohs(sin6->sin6_port); 658 break; 659 660 default: /* Invalid request */ 661 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 662 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 663 udp_err_ack(q, mp, TBADADDR, 0); 664 return; 665 } 666 667 requested_port = port; 668 669 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 670 bind_to_req_port_only = B_FALSE; 671 else /* T_BIND_REQ and requested_port != 0 */ 672 bind_to_req_port_only = B_TRUE; 673 674 if (requested_port == 0) { 675 /* 676 * If the application passed in zero for the port number, it 677 * doesn't care which port number we bind to. Get one in the 678 * valid range. 679 */ 680 if (udp->udp_anon_priv_bind) { 681 port = udp_get_next_priv_port(udp); 682 } else { 683 port = udp_update_next_port(udp, 684 us->us_next_port_to_try, B_TRUE); 685 } 686 } else { 687 /* 688 * If the port is in the well-known privileged range, 689 * make sure the caller was privileged. 690 */ 691 int i; 692 boolean_t priv = B_FALSE; 693 694 if (port < us->us_smallest_nonpriv_port) { 695 priv = B_TRUE; 696 } else { 697 for (i = 0; i < us->us_num_epriv_ports; i++) { 698 if (port == us->us_epriv_ports[i]) { 699 priv = B_TRUE; 700 break; 701 } 702 } 703 } 704 705 if (priv) { 706 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 707 708 if (secpolicy_net_privaddr(cr, port, 709 IPPROTO_UDP) != 0) { 710 udp_err_ack(q, mp, TACCES, 0); 711 return; 712 } 713 } 714 } 715 716 if (port == 0) { 717 udp_err_ack(q, mp, TNOADDR, 0); 718 return; 719 } 720 721 /* 722 * The state must be TS_UNBND. TPI mandates that users must send 723 * TPI primitives only 1 at a time and wait for the response before 724 * sending the next primitive. 725 */ 726 rw_enter(&udp->udp_rwlock, RW_WRITER); 727 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 728 rw_exit(&udp->udp_rwlock); 729 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 730 "udp_bind: bad state, %u", udp->udp_state); 731 udp_err_ack(q, mp, TOUTSTATE, 0); 732 return; 733 } 734 udp->udp_pending_op = tbr->PRIM_type; 735 /* 736 * Copy the source address into our udp structure. This address 737 * may still be zero; if so, IP will fill in the correct address 738 * each time an outbound packet is passed to it. Since the udp is 739 * not yet in the bind hash list, we don't grab the uf_lock to 740 * change udp_ipversion 741 */ 742 if (udp->udp_family == AF_INET) { 743 ASSERT(sin != NULL); 744 ASSERT(udp->udp_ipversion == IPV4_VERSION); 745 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 746 udp->udp_ip_snd_options_len; 747 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 748 } else { 749 ASSERT(sin6 != NULL); 750 v6src = sin6->sin6_addr; 751 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 752 /* 753 * no need to hold the uf_lock to set the udp_ipversion 754 * since we are not yet in the fanout list 755 */ 756 udp->udp_ipversion = IPV4_VERSION; 757 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 758 UDPH_SIZE + udp->udp_ip_snd_options_len; 759 } else { 760 udp->udp_ipversion = IPV6_VERSION; 761 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 762 } 763 } 764 765 /* 766 * If udp_reuseaddr is not set, then we have to make sure that 767 * the IP address and port number the application requested 768 * (or we selected for the application) is not being used by 769 * another stream. If another stream is already using the 770 * requested IP address and port, the behavior depends on 771 * "bind_to_req_port_only". If set the bind fails; otherwise we 772 * search for any an unused port to bind to the the stream. 773 * 774 * As per the BSD semantics, as modified by the Deering multicast 775 * changes, if udp_reuseaddr is set, then we allow multiple binds 776 * to the same port independent of the local IP address. 777 * 778 * This is slightly different than in SunOS 4.X which did not 779 * support IP multicast. Note that the change implemented by the 780 * Deering multicast code effects all binds - not only binding 781 * to IP multicast addresses. 782 * 783 * Note that when binding to port zero we ignore SO_REUSEADDR in 784 * order to guarantee a unique port. 785 */ 786 787 count = 0; 788 if (udp->udp_anon_priv_bind) { 789 /* 790 * loopmax = (IPPORT_RESERVED-1) - 791 * us->us_min_anonpriv_port + 1 792 */ 793 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 794 } else { 795 loopmax = us->us_largest_anon_port - 796 us->us_smallest_anon_port + 1; 797 } 798 799 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 800 zoneid = connp->conn_zoneid; 801 802 for (;;) { 803 udp_t *udp1; 804 boolean_t found_exclbind = B_FALSE; 805 806 /* 807 * Walk through the list of udp streams bound to 808 * requested port with the same IP address. 809 */ 810 lport = htons(port); 811 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 812 us->us_bind_fanout_size)]; 813 mutex_enter(&udpf->uf_lock); 814 for (udp1 = udpf->uf_udp; udp1 != NULL; 815 udp1 = udp1->udp_bind_hash) { 816 if (lport != udp1->udp_port) 817 continue; 818 819 /* 820 * On a labeled system, we must treat bindings to ports 821 * on shared IP addresses by sockets with MAC exemption 822 * privilege as being in all zones, as there's 823 * otherwise no way to identify the right receiver. 824 */ 825 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 826 IPCL_ZONE_MATCH(connp, 827 udp1->udp_connp->conn_zoneid)) && 828 !connp->conn_mac_exempt && \ 829 !udp1->udp_connp->conn_mac_exempt) 830 continue; 831 832 /* 833 * If UDP_EXCLBIND is set for either the bound or 834 * binding endpoint, the semantics of bind 835 * is changed according to the following chart. 836 * 837 * spec = specified address (v4 or v6) 838 * unspec = unspecified address (v4 or v6) 839 * A = specified addresses are different for endpoints 840 * 841 * bound bind to allowed? 842 * ------------------------------------- 843 * unspec unspec no 844 * unspec spec no 845 * spec unspec no 846 * spec spec yes if A 847 * 848 * For labeled systems, SO_MAC_EXEMPT behaves the same 849 * as UDP_EXCLBIND, except that zoneid is ignored. 850 */ 851 if (udp1->udp_exclbind || udp->udp_exclbind || 852 udp1->udp_connp->conn_mac_exempt || 853 connp->conn_mac_exempt) { 854 if (V6_OR_V4_INADDR_ANY( 855 udp1->udp_bound_v6src) || 856 is_inaddr_any || 857 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 858 &v6src)) { 859 found_exclbind = B_TRUE; 860 break; 861 } 862 continue; 863 } 864 865 /* 866 * Check ipversion to allow IPv4 and IPv6 sockets to 867 * have disjoint port number spaces. 868 */ 869 if (udp->udp_ipversion != udp1->udp_ipversion) { 870 871 /* 872 * On the first time through the loop, if the 873 * the user intentionally specified a 874 * particular port number, then ignore any 875 * bindings of the other protocol that may 876 * conflict. This allows the user to bind IPv6 877 * alone and get both v4 and v6, or bind both 878 * both and get each seperately. On subsequent 879 * times through the loop, we're checking a 880 * port that we chose (not the user) and thus 881 * we do not allow casual duplicate bindings. 882 */ 883 if (count == 0 && requested_port != 0) 884 continue; 885 } 886 887 /* 888 * No difference depending on SO_REUSEADDR. 889 * 890 * If existing port is bound to a 891 * non-wildcard IP address and 892 * the requesting stream is bound to 893 * a distinct different IP addresses 894 * (non-wildcard, also), keep going. 895 */ 896 if (!is_inaddr_any && 897 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 898 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 899 &v6src)) { 900 continue; 901 } 902 break; 903 } 904 905 if (!found_exclbind && 906 (udp->udp_reuseaddr && requested_port != 0)) { 907 break; 908 } 909 910 if (udp1 == NULL) { 911 /* 912 * No other stream has this IP address 913 * and port number. We can use it. 914 */ 915 break; 916 } 917 mutex_exit(&udpf->uf_lock); 918 if (bind_to_req_port_only) { 919 /* 920 * We get here only when requested port 921 * is bound (and only first of the for() 922 * loop iteration). 923 * 924 * The semantics of this bind request 925 * require it to fail so we return from 926 * the routine (and exit the loop). 927 * 928 */ 929 udp->udp_pending_op = -1; 930 rw_exit(&udp->udp_rwlock); 931 udp_err_ack(q, mp, TADDRBUSY, 0); 932 return; 933 } 934 935 if (udp->udp_anon_priv_bind) { 936 port = udp_get_next_priv_port(udp); 937 } else { 938 if ((count == 0) && (requested_port != 0)) { 939 /* 940 * If the application wants us to find 941 * a port, get one to start with. Set 942 * requested_port to 0, so that we will 943 * update us->us_next_port_to_try below. 944 */ 945 port = udp_update_next_port(udp, 946 us->us_next_port_to_try, B_TRUE); 947 requested_port = 0; 948 } else { 949 port = udp_update_next_port(udp, port + 1, 950 B_FALSE); 951 } 952 } 953 954 if (port == 0 || ++count >= loopmax) { 955 /* 956 * We've tried every possible port number and 957 * there are none available, so send an error 958 * to the user. 959 */ 960 udp->udp_pending_op = -1; 961 rw_exit(&udp->udp_rwlock); 962 udp_err_ack(q, mp, TNOADDR, 0); 963 return; 964 } 965 } 966 967 /* 968 * Copy the source address into our udp structure. This address 969 * may still be zero; if so, ip will fill in the correct address 970 * each time an outbound packet is passed to it. 971 * If we are binding to a broadcast or multicast address then 972 * udp_bind_ack will clear the source address when it receives 973 * the T_BIND_ACK. 974 */ 975 udp->udp_v6src = udp->udp_bound_v6src = v6src; 976 udp->udp_port = lport; 977 /* 978 * Now reset the the next anonymous port if the application requested 979 * an anonymous port, or we handed out the next anonymous port. 980 */ 981 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 982 us->us_next_port_to_try = port + 1; 983 } 984 985 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 986 if (udp->udp_family == AF_INET) { 987 sin->sin_port = udp->udp_port; 988 } else { 989 int error; 990 991 sin6->sin6_port = udp->udp_port; 992 /* Rebuild the header template */ 993 error = udp_build_hdrs(udp); 994 if (error != 0) { 995 udp->udp_pending_op = -1; 996 rw_exit(&udp->udp_rwlock); 997 mutex_exit(&udpf->uf_lock); 998 udp_err_ack(q, mp, TSYSERR, error); 999 return; 1000 } 1001 } 1002 udp->udp_state = TS_IDLE; 1003 udp_bind_hash_insert(udpf, udp); 1004 mutex_exit(&udpf->uf_lock); 1005 rw_exit(&udp->udp_rwlock); 1006 1007 if (cl_inet_bind) { 1008 /* 1009 * Running in cluster mode - register bind information 1010 */ 1011 if (udp->udp_ipversion == IPV4_VERSION) { 1012 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1013 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1014 (in_port_t)udp->udp_port); 1015 } else { 1016 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1017 (uint8_t *)&(udp->udp_v6src), 1018 (in_port_t)udp->udp_port); 1019 } 1020 1021 } 1022 1023 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1024 if (is_system_labeled() && (!connp->conn_anon_port || 1025 connp->conn_anon_mlp)) { 1026 uint16_t mlpport; 1027 cred_t *cr = connp->conn_cred; 1028 zone_t *zone; 1029 1030 zone = crgetzone(cr); 1031 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1032 mlptSingle; 1033 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1034 &v6src, us->us_netstack->netstack_ip); 1035 if (addrtype == mlptSingle) { 1036 rw_enter(&udp->udp_rwlock, RW_WRITER); 1037 udp->udp_pending_op = -1; 1038 rw_exit(&udp->udp_rwlock); 1039 udp_err_ack(q, mp, TNOADDR, 0); 1040 connp->conn_anon_port = B_FALSE; 1041 connp->conn_mlp_type = mlptSingle; 1042 return; 1043 } 1044 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1045 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1046 addrtype); 1047 if (mlptype != mlptSingle && 1048 (connp->conn_mlp_type == mlptSingle || 1049 secpolicy_net_bindmlp(cr) != 0)) { 1050 if (udp->udp_debug) { 1051 (void) strlog(UDP_MOD_ID, 0, 1, 1052 SL_ERROR|SL_TRACE, 1053 "udp_bind: no priv for multilevel port %d", 1054 mlpport); 1055 } 1056 rw_enter(&udp->udp_rwlock, RW_WRITER); 1057 udp->udp_pending_op = -1; 1058 rw_exit(&udp->udp_rwlock); 1059 udp_err_ack(q, mp, TACCES, 0); 1060 connp->conn_anon_port = B_FALSE; 1061 connp->conn_mlp_type = mlptSingle; 1062 return; 1063 } 1064 1065 /* 1066 * If we're specifically binding a shared IP address and the 1067 * port is MLP on shared addresses, then check to see if this 1068 * zone actually owns the MLP. Reject if not. 1069 */ 1070 if (mlptype == mlptShared && addrtype == mlptShared) { 1071 /* 1072 * No need to handle exclusive-stack zones since 1073 * ALL_ZONES only applies to the shared stack. 1074 */ 1075 zoneid_t mlpzone; 1076 1077 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1078 htons(mlpport)); 1079 if (connp->conn_zoneid != mlpzone) { 1080 if (udp->udp_debug) { 1081 (void) strlog(UDP_MOD_ID, 0, 1, 1082 SL_ERROR|SL_TRACE, 1083 "udp_bind: attempt to bind port " 1084 "%d on shared addr in zone %d " 1085 "(should be %d)", 1086 mlpport, connp->conn_zoneid, 1087 mlpzone); 1088 } 1089 rw_enter(&udp->udp_rwlock, RW_WRITER); 1090 udp->udp_pending_op = -1; 1091 rw_exit(&udp->udp_rwlock); 1092 udp_err_ack(q, mp, TACCES, 0); 1093 connp->conn_anon_port = B_FALSE; 1094 connp->conn_mlp_type = mlptSingle; 1095 return; 1096 } 1097 } 1098 if (connp->conn_anon_port) { 1099 int error; 1100 1101 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1102 port, B_TRUE); 1103 if (error != 0) { 1104 if (udp->udp_debug) { 1105 (void) strlog(UDP_MOD_ID, 0, 1, 1106 SL_ERROR|SL_TRACE, 1107 "udp_bind: cannot establish anon " 1108 "MLP for port %d", port); 1109 } 1110 rw_enter(&udp->udp_rwlock, RW_WRITER); 1111 udp->udp_pending_op = -1; 1112 rw_exit(&udp->udp_rwlock); 1113 udp_err_ack(q, mp, TACCES, 0); 1114 connp->conn_anon_port = B_FALSE; 1115 connp->conn_mlp_type = mlptSingle; 1116 return; 1117 } 1118 } 1119 connp->conn_mlp_type = mlptype; 1120 } 1121 1122 /* Pass the protocol number in the message following the address. */ 1123 *mp->b_wptr++ = IPPROTO_UDP; 1124 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1125 /* 1126 * Append a request for an IRE if udp_v6src not 1127 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1128 */ 1129 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1130 if (!mp->b_cont) { 1131 rw_enter(&udp->udp_rwlock, RW_WRITER); 1132 udp->udp_pending_op = -1; 1133 rw_exit(&udp->udp_rwlock); 1134 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1135 return; 1136 } 1137 mp->b_cont->b_wptr += sizeof (ire_t); 1138 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1139 } 1140 if (udp->udp_family == AF_INET6) 1141 mp = ip_bind_v6(q, mp, connp, NULL); 1142 else 1143 mp = ip_bind_v4(q, mp, connp); 1144 1145 /* The above return NULL if the bind needs to be deferred */ 1146 if (mp != NULL) 1147 udp_bind_result(connp, mp); 1148 else 1149 CONN_INC_REF(connp); 1150 } 1151 1152 /* 1153 * This is called from ip_wput_nondata to handle the results of a 1154 * deferred UDP bind. It is called once the bind has been completed. 1155 */ 1156 void 1157 udp_resume_bind(conn_t *connp, mblk_t *mp) 1158 { 1159 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1160 1161 udp_bind_result(connp, mp); 1162 1163 CONN_OPER_PENDING_DONE(connp); 1164 } 1165 1166 /* 1167 * This routine handles each T_CONN_REQ message passed to udp. It 1168 * associates a default destination address with the stream. 1169 * 1170 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1171 * T_BIND_REQ - specifying local and remote address/port 1172 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1173 * T_OK_ACK - for the T_CONN_REQ 1174 * T_CONN_CON - to keep the TPI user happy 1175 * 1176 * The connect completes in udp_bind_result. 1177 * When a T_BIND_ACK is received information is extracted from the IRE 1178 * and the two appended messages are sent to the TPI user. 1179 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1180 * convert it to an error ack for the appropriate primitive. 1181 */ 1182 static void 1183 udp_connect(queue_t *q, mblk_t *mp) 1184 { 1185 sin6_t *sin6; 1186 sin_t *sin; 1187 struct T_conn_req *tcr; 1188 in6_addr_t v6dst; 1189 ipaddr_t v4dst; 1190 uint16_t dstport; 1191 uint32_t flowinfo; 1192 mblk_t *mp1, *mp2; 1193 udp_fanout_t *udpf; 1194 udp_t *udp, *udp1; 1195 ushort_t ipversion; 1196 udp_stack_t *us; 1197 conn_t *connp = Q_TO_CONN(q); 1198 1199 udp = connp->conn_udp; 1200 tcr = (struct T_conn_req *)mp->b_rptr; 1201 us = udp->udp_us; 1202 1203 /* A bit of sanity checking */ 1204 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1205 udp_err_ack(q, mp, TPROTO, 0); 1206 return; 1207 } 1208 1209 if (tcr->OPT_length != 0) { 1210 udp_err_ack(q, mp, TBADOPT, 0); 1211 return; 1212 } 1213 1214 /* 1215 * Determine packet type based on type of address passed in 1216 * the request should contain an IPv4 or IPv6 address. 1217 * Make sure that address family matches the type of 1218 * family of the the address passed down 1219 */ 1220 switch (tcr->DEST_length) { 1221 default: 1222 udp_err_ack(q, mp, TBADADDR, 0); 1223 return; 1224 1225 case sizeof (sin_t): 1226 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1227 sizeof (sin_t)); 1228 if (sin == NULL || !OK_32PTR((char *)sin)) { 1229 udp_err_ack(q, mp, TSYSERR, EINVAL); 1230 return; 1231 } 1232 if (udp->udp_family != AF_INET || 1233 sin->sin_family != AF_INET) { 1234 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1235 return; 1236 } 1237 v4dst = sin->sin_addr.s_addr; 1238 dstport = sin->sin_port; 1239 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1240 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1241 ipversion = IPV4_VERSION; 1242 break; 1243 1244 case sizeof (sin6_t): 1245 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1246 sizeof (sin6_t)); 1247 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1248 udp_err_ack(q, mp, TSYSERR, EINVAL); 1249 return; 1250 } 1251 if (udp->udp_family != AF_INET6 || 1252 sin6->sin6_family != AF_INET6) { 1253 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1254 return; 1255 } 1256 v6dst = sin6->sin6_addr; 1257 dstport = sin6->sin6_port; 1258 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1259 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1260 ipversion = IPV4_VERSION; 1261 flowinfo = 0; 1262 } else { 1263 ipversion = IPV6_VERSION; 1264 flowinfo = sin6->sin6_flowinfo; 1265 } 1266 break; 1267 } 1268 if (dstport == 0) { 1269 udp_err_ack(q, mp, TBADADDR, 0); 1270 return; 1271 } 1272 1273 rw_enter(&udp->udp_rwlock, RW_WRITER); 1274 1275 /* 1276 * This UDP must have bound to a port already before doing a connect. 1277 * TPI mandates that users must send TPI primitives only 1 at a time 1278 * and wait for the response before sending the next primitive. 1279 */ 1280 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 1281 rw_exit(&udp->udp_rwlock); 1282 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1283 "udp_connect: bad state, %u", udp->udp_state); 1284 udp_err_ack(q, mp, TOUTSTATE, 0); 1285 return; 1286 } 1287 udp->udp_pending_op = T_CONN_REQ; 1288 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1289 1290 if (ipversion == IPV4_VERSION) { 1291 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1292 udp->udp_ip_snd_options_len; 1293 } else { 1294 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1295 } 1296 1297 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1298 us->us_bind_fanout_size)]; 1299 1300 mutex_enter(&udpf->uf_lock); 1301 if (udp->udp_state == TS_DATA_XFER) { 1302 /* Already connected - clear out state */ 1303 udp->udp_v6src = udp->udp_bound_v6src; 1304 udp->udp_state = TS_IDLE; 1305 } 1306 1307 /* 1308 * Create a default IP header with no IP options. 1309 */ 1310 udp->udp_dstport = dstport; 1311 udp->udp_ipversion = ipversion; 1312 if (ipversion == IPV4_VERSION) { 1313 /* 1314 * Interpret a zero destination to mean loopback. 1315 * Update the T_CONN_REQ (sin/sin6) since it is used to 1316 * generate the T_CONN_CON. 1317 */ 1318 if (v4dst == INADDR_ANY) { 1319 v4dst = htonl(INADDR_LOOPBACK); 1320 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1321 if (udp->udp_family == AF_INET) { 1322 sin->sin_addr.s_addr = v4dst; 1323 } else { 1324 sin6->sin6_addr = v6dst; 1325 } 1326 } 1327 udp->udp_v6dst = v6dst; 1328 udp->udp_flowinfo = 0; 1329 1330 /* 1331 * If the destination address is multicast and 1332 * an outgoing multicast interface has been set, 1333 * use the address of that interface as our 1334 * source address if no source address has been set. 1335 */ 1336 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1337 CLASSD(v4dst) && 1338 udp->udp_multicast_if_addr != INADDR_ANY) { 1339 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1340 &udp->udp_v6src); 1341 } 1342 } else { 1343 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1344 /* 1345 * Interpret a zero destination to mean loopback. 1346 * Update the T_CONN_REQ (sin/sin6) since it is used to 1347 * generate the T_CONN_CON. 1348 */ 1349 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1350 v6dst = ipv6_loopback; 1351 sin6->sin6_addr = v6dst; 1352 } 1353 udp->udp_v6dst = v6dst; 1354 udp->udp_flowinfo = flowinfo; 1355 /* 1356 * If the destination address is multicast and 1357 * an outgoing multicast interface has been set, 1358 * then the ip bind logic will pick the correct source 1359 * address (i.e. matching the outgoing multicast interface). 1360 */ 1361 } 1362 1363 /* 1364 * Verify that the src/port/dst/port is unique for all 1365 * connections in TS_DATA_XFER 1366 */ 1367 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1368 if (udp1->udp_state != TS_DATA_XFER) 1369 continue; 1370 if (udp->udp_port != udp1->udp_port || 1371 udp->udp_ipversion != udp1->udp_ipversion || 1372 dstport != udp1->udp_dstport || 1373 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1374 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1375 !(IPCL_ZONE_MATCH(udp->udp_connp, 1376 udp1->udp_connp->conn_zoneid) || 1377 IPCL_ZONE_MATCH(udp1->udp_connp, 1378 udp->udp_connp->conn_zoneid))) 1379 continue; 1380 mutex_exit(&udpf->uf_lock); 1381 udp->udp_pending_op = -1; 1382 rw_exit(&udp->udp_rwlock); 1383 udp_err_ack(q, mp, TBADADDR, 0); 1384 return; 1385 } 1386 udp->udp_state = TS_DATA_XFER; 1387 mutex_exit(&udpf->uf_lock); 1388 1389 /* 1390 * Send down bind to IP to verify that there is a route 1391 * and to determine the source address. 1392 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1393 */ 1394 if (udp->udp_family == AF_INET) 1395 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1396 else 1397 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1398 if (mp1 == NULL) { 1399 bind_failed: 1400 mutex_enter(&udpf->uf_lock); 1401 udp->udp_state = TS_IDLE; 1402 udp->udp_pending_op = -1; 1403 mutex_exit(&udpf->uf_lock); 1404 rw_exit(&udp->udp_rwlock); 1405 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1406 return; 1407 } 1408 1409 rw_exit(&udp->udp_rwlock); 1410 /* 1411 * We also have to send a connection confirmation to 1412 * keep TLI happy. Prepare it for udp_bind_result. 1413 */ 1414 if (udp->udp_family == AF_INET) 1415 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1416 sizeof (*sin), NULL, 0); 1417 else 1418 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1419 sizeof (*sin6), NULL, 0); 1420 if (mp2 == NULL) { 1421 freemsg(mp1); 1422 rw_enter(&udp->udp_rwlock, RW_WRITER); 1423 goto bind_failed; 1424 } 1425 1426 mp = mi_tpi_ok_ack_alloc(mp); 1427 if (mp == NULL) { 1428 /* Unable to reuse the T_CONN_REQ for the ack. */ 1429 freemsg(mp2); 1430 rw_enter(&udp->udp_rwlock, RW_WRITER); 1431 mutex_enter(&udpf->uf_lock); 1432 udp->udp_state = TS_IDLE; 1433 udp->udp_pending_op = -1; 1434 mutex_exit(&udpf->uf_lock); 1435 rw_exit(&udp->udp_rwlock); 1436 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1437 return; 1438 } 1439 1440 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1441 linkb(mp1, mp); 1442 linkb(mp1, mp2); 1443 1444 mblk_setcred(mp1, connp->conn_cred); 1445 if (udp->udp_family == AF_INET) 1446 mp1 = ip_bind_v4(q, mp1, connp); 1447 else 1448 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1449 1450 /* The above return NULL if the bind needs to be deferred */ 1451 if (mp1 != NULL) 1452 udp_bind_result(connp, mp1); 1453 else 1454 CONN_INC_REF(connp); 1455 } 1456 1457 static int 1458 udp_close(queue_t *q) 1459 { 1460 conn_t *connp = (conn_t *)q->q_ptr; 1461 udp_t *udp; 1462 1463 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1464 udp = connp->conn_udp; 1465 1466 udp_quiesce_conn(connp); 1467 ip_quiesce_conn(connp); 1468 /* 1469 * Disable read-side synchronous stream 1470 * interface and drain any queued data. 1471 */ 1472 udp_rcv_drain(q, udp, B_TRUE); 1473 ASSERT(!udp->udp_direct_sockfs); 1474 1475 qprocsoff(q); 1476 1477 ASSERT(udp->udp_rcv_cnt == 0); 1478 ASSERT(udp->udp_rcv_msgcnt == 0); 1479 ASSERT(udp->udp_rcv_list_head == NULL); 1480 ASSERT(udp->udp_rcv_list_tail == NULL); 1481 1482 udp_close_free(connp); 1483 1484 /* 1485 * Now we are truly single threaded on this stream, and can 1486 * delete the things hanging off the connp, and finally the connp. 1487 * We removed this connp from the fanout list, it cannot be 1488 * accessed thru the fanouts, and we already waited for the 1489 * conn_ref to drop to 0. We are already in close, so 1490 * there cannot be any other thread from the top. qprocsoff 1491 * has completed, and service has completed or won't run in 1492 * future. 1493 */ 1494 ASSERT(connp->conn_ref == 1); 1495 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 1496 connp->conn_ref--; 1497 ipcl_conn_destroy(connp); 1498 1499 q->q_ptr = WR(q)->q_ptr = NULL; 1500 return (0); 1501 } 1502 1503 /* 1504 * Called in the close path to quiesce the conn 1505 */ 1506 void 1507 udp_quiesce_conn(conn_t *connp) 1508 { 1509 udp_t *udp = connp->conn_udp; 1510 1511 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1512 /* 1513 * Running in cluster mode - register unbind information 1514 */ 1515 if (udp->udp_ipversion == IPV4_VERSION) { 1516 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1517 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1518 (in_port_t)udp->udp_port); 1519 } else { 1520 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1521 (uint8_t *)(&(udp->udp_v6src)), 1522 (in_port_t)udp->udp_port); 1523 } 1524 } 1525 1526 udp_bind_hash_remove(udp, B_FALSE); 1527 1528 } 1529 1530 void 1531 udp_close_free(conn_t *connp) 1532 { 1533 udp_t *udp = connp->conn_udp; 1534 1535 /* If there are any options associated with the stream, free them. */ 1536 if (udp->udp_ip_snd_options != NULL) { 1537 mi_free((char *)udp->udp_ip_snd_options); 1538 udp->udp_ip_snd_options = NULL; 1539 udp->udp_ip_snd_options_len = 0; 1540 } 1541 1542 if (udp->udp_ip_rcv_options != NULL) { 1543 mi_free((char *)udp->udp_ip_rcv_options); 1544 udp->udp_ip_rcv_options = NULL; 1545 udp->udp_ip_rcv_options_len = 0; 1546 } 1547 1548 /* Free memory associated with sticky options */ 1549 if (udp->udp_sticky_hdrs_len != 0) { 1550 kmem_free(udp->udp_sticky_hdrs, 1551 udp->udp_sticky_hdrs_len); 1552 udp->udp_sticky_hdrs = NULL; 1553 udp->udp_sticky_hdrs_len = 0; 1554 } 1555 1556 ip6_pkt_free(&udp->udp_sticky_ipp); 1557 1558 /* 1559 * Clear any fields which the kmem_cache constructor clears. 1560 * Only udp_connp needs to be preserved. 1561 * TBD: We should make this more efficient to avoid clearing 1562 * everything. 1563 */ 1564 ASSERT(udp->udp_connp == connp); 1565 bzero(udp, sizeof (udp_t)); 1566 udp->udp_connp = connp; 1567 } 1568 1569 /* 1570 * This routine handles each T_DISCON_REQ message passed to udp 1571 * as an indicating that UDP is no longer connected. This results 1572 * in sending a T_BIND_REQ to IP to restore the binding to just 1573 * the local address/port. 1574 * 1575 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1576 * T_BIND_REQ - specifying just the local address/port 1577 * T_OK_ACK - for the T_DISCON_REQ 1578 * 1579 * The disconnect completes in udp_bind_result. 1580 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1581 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1582 * convert it to an error ack for the appropriate primitive. 1583 */ 1584 static void 1585 udp_disconnect(queue_t *q, mblk_t *mp) 1586 { 1587 udp_t *udp; 1588 mblk_t *mp1; 1589 udp_fanout_t *udpf; 1590 udp_stack_t *us; 1591 conn_t *connp = Q_TO_CONN(q); 1592 1593 udp = connp->conn_udp; 1594 us = udp->udp_us; 1595 rw_enter(&udp->udp_rwlock, RW_WRITER); 1596 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 1597 rw_exit(&udp->udp_rwlock); 1598 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1599 "udp_disconnect: bad state, %u", udp->udp_state); 1600 udp_err_ack(q, mp, TOUTSTATE, 0); 1601 return; 1602 } 1603 udp->udp_pending_op = T_DISCON_REQ; 1604 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1605 us->us_bind_fanout_size)]; 1606 mutex_enter(&udpf->uf_lock); 1607 udp->udp_v6src = udp->udp_bound_v6src; 1608 udp->udp_state = TS_IDLE; 1609 mutex_exit(&udpf->uf_lock); 1610 1611 /* 1612 * Send down bind to IP to remove the full binding and revert 1613 * to the local address binding. 1614 */ 1615 if (udp->udp_family == AF_INET) 1616 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1617 else 1618 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1619 if (mp1 == NULL) { 1620 udp->udp_pending_op = -1; 1621 rw_exit(&udp->udp_rwlock); 1622 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1623 return; 1624 } 1625 mp = mi_tpi_ok_ack_alloc(mp); 1626 if (mp == NULL) { 1627 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1628 udp->udp_pending_op = -1; 1629 rw_exit(&udp->udp_rwlock); 1630 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1631 return; 1632 } 1633 1634 if (udp->udp_family == AF_INET6) { 1635 int error; 1636 1637 /* Rebuild the header template */ 1638 error = udp_build_hdrs(udp); 1639 if (error != 0) { 1640 udp->udp_pending_op = -1; 1641 rw_exit(&udp->udp_rwlock); 1642 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1643 freemsg(mp1); 1644 return; 1645 } 1646 } 1647 1648 rw_exit(&udp->udp_rwlock); 1649 /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ 1650 linkb(mp1, mp); 1651 1652 if (udp->udp_family == AF_INET6) 1653 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1654 else 1655 mp1 = ip_bind_v4(q, mp1, connp); 1656 1657 /* The above return NULL if the bind needs to be deferred */ 1658 if (mp1 != NULL) 1659 udp_bind_result(connp, mp1); 1660 else 1661 CONN_INC_REF(connp); 1662 } 1663 1664 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1665 static void 1666 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1667 { 1668 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1669 qreply(q, mp); 1670 } 1671 1672 /* Shorthand to generate and send TPI error acks to our client */ 1673 static void 1674 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1675 int sys_error) 1676 { 1677 struct T_error_ack *teackp; 1678 1679 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1680 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1681 teackp = (struct T_error_ack *)mp->b_rptr; 1682 teackp->ERROR_prim = primitive; 1683 teackp->TLI_error = t_error; 1684 teackp->UNIX_error = sys_error; 1685 qreply(q, mp); 1686 } 1687 } 1688 1689 /*ARGSUSED*/ 1690 static int 1691 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1692 { 1693 int i; 1694 udp_t *udp = Q_TO_UDP(q); 1695 udp_stack_t *us = udp->udp_us; 1696 1697 for (i = 0; i < us->us_num_epriv_ports; i++) { 1698 if (us->us_epriv_ports[i] != 0) 1699 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1700 } 1701 return (0); 1702 } 1703 1704 /* ARGSUSED */ 1705 static int 1706 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1707 cred_t *cr) 1708 { 1709 long new_value; 1710 int i; 1711 udp_t *udp = Q_TO_UDP(q); 1712 udp_stack_t *us = udp->udp_us; 1713 1714 /* 1715 * Fail the request if the new value does not lie within the 1716 * port number limits. 1717 */ 1718 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1719 new_value <= 0 || new_value >= 65536) { 1720 return (EINVAL); 1721 } 1722 1723 /* Check if the value is already in the list */ 1724 for (i = 0; i < us->us_num_epriv_ports; i++) { 1725 if (new_value == us->us_epriv_ports[i]) { 1726 return (EEXIST); 1727 } 1728 } 1729 /* Find an empty slot */ 1730 for (i = 0; i < us->us_num_epriv_ports; i++) { 1731 if (us->us_epriv_ports[i] == 0) 1732 break; 1733 } 1734 if (i == us->us_num_epriv_ports) { 1735 return (EOVERFLOW); 1736 } 1737 1738 /* Set the new value */ 1739 us->us_epriv_ports[i] = (in_port_t)new_value; 1740 return (0); 1741 } 1742 1743 /* ARGSUSED */ 1744 static int 1745 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1746 cred_t *cr) 1747 { 1748 long new_value; 1749 int i; 1750 udp_t *udp = Q_TO_UDP(q); 1751 udp_stack_t *us = udp->udp_us; 1752 1753 /* 1754 * Fail the request if the new value does not lie within the 1755 * port number limits. 1756 */ 1757 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1758 new_value <= 0 || new_value >= 65536) { 1759 return (EINVAL); 1760 } 1761 1762 /* Check that the value is already in the list */ 1763 for (i = 0; i < us->us_num_epriv_ports; i++) { 1764 if (us->us_epriv_ports[i] == new_value) 1765 break; 1766 } 1767 if (i == us->us_num_epriv_ports) { 1768 return (ESRCH); 1769 } 1770 1771 /* Clear the value */ 1772 us->us_epriv_ports[i] = 0; 1773 return (0); 1774 } 1775 1776 /* At minimum we need 4 bytes of UDP header */ 1777 #define ICMP_MIN_UDP_HDR 4 1778 1779 /* 1780 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1781 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1782 * Assumes that IP has pulled up everything up to and including the ICMP header. 1783 */ 1784 static void 1785 udp_icmp_error(queue_t *q, mblk_t *mp) 1786 { 1787 icmph_t *icmph; 1788 ipha_t *ipha; 1789 int iph_hdr_length; 1790 udpha_t *udpha; 1791 sin_t sin; 1792 sin6_t sin6; 1793 mblk_t *mp1; 1794 int error = 0; 1795 udp_t *udp = Q_TO_UDP(q); 1796 1797 ipha = (ipha_t *)mp->b_rptr; 1798 1799 ASSERT(OK_32PTR(mp->b_rptr)); 1800 1801 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1802 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1803 udp_icmp_error_ipv6(q, mp); 1804 return; 1805 } 1806 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1807 1808 /* Skip past the outer IP and ICMP headers */ 1809 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1810 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1811 ipha = (ipha_t *)&icmph[1]; 1812 1813 /* Skip past the inner IP and find the ULP header */ 1814 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1815 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1816 1817 switch (icmph->icmph_type) { 1818 case ICMP_DEST_UNREACHABLE: 1819 switch (icmph->icmph_code) { 1820 case ICMP_FRAGMENTATION_NEEDED: 1821 /* 1822 * IP has already adjusted the path MTU. 1823 */ 1824 break; 1825 case ICMP_PORT_UNREACHABLE: 1826 case ICMP_PROTOCOL_UNREACHABLE: 1827 error = ECONNREFUSED; 1828 break; 1829 default: 1830 /* Transient errors */ 1831 break; 1832 } 1833 break; 1834 default: 1835 /* Transient errors */ 1836 break; 1837 } 1838 if (error == 0) { 1839 freemsg(mp); 1840 return; 1841 } 1842 1843 /* 1844 * Deliver T_UDERROR_IND when the application has asked for it. 1845 * The socket layer enables this automatically when connected. 1846 */ 1847 if (!udp->udp_dgram_errind) { 1848 freemsg(mp); 1849 return; 1850 } 1851 1852 switch (udp->udp_family) { 1853 case AF_INET: 1854 sin = sin_null; 1855 sin.sin_family = AF_INET; 1856 sin.sin_addr.s_addr = ipha->ipha_dst; 1857 sin.sin_port = udpha->uha_dst_port; 1858 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1859 error); 1860 break; 1861 case AF_INET6: 1862 sin6 = sin6_null; 1863 sin6.sin6_family = AF_INET6; 1864 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1865 sin6.sin6_port = udpha->uha_dst_port; 1866 1867 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1868 NULL, 0, error); 1869 break; 1870 } 1871 if (mp1) 1872 putnext(q, mp1); 1873 freemsg(mp); 1874 } 1875 1876 /* 1877 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1878 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1879 * Assumes that IP has pulled up all the extension headers as well as the 1880 * ICMPv6 header. 1881 */ 1882 static void 1883 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1884 { 1885 icmp6_t *icmp6; 1886 ip6_t *ip6h, *outer_ip6h; 1887 uint16_t iph_hdr_length; 1888 uint8_t *nexthdrp; 1889 udpha_t *udpha; 1890 sin6_t sin6; 1891 mblk_t *mp1; 1892 int error = 0; 1893 udp_t *udp = Q_TO_UDP(q); 1894 udp_stack_t *us = udp->udp_us; 1895 1896 outer_ip6h = (ip6_t *)mp->b_rptr; 1897 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1898 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1899 else 1900 iph_hdr_length = IPV6_HDR_LEN; 1901 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1902 ip6h = (ip6_t *)&icmp6[1]; 1903 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1904 freemsg(mp); 1905 return; 1906 } 1907 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1908 1909 switch (icmp6->icmp6_type) { 1910 case ICMP6_DST_UNREACH: 1911 switch (icmp6->icmp6_code) { 1912 case ICMP6_DST_UNREACH_NOPORT: 1913 error = ECONNREFUSED; 1914 break; 1915 case ICMP6_DST_UNREACH_ADMIN: 1916 case ICMP6_DST_UNREACH_NOROUTE: 1917 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1918 case ICMP6_DST_UNREACH_ADDR: 1919 /* Transient errors */ 1920 break; 1921 default: 1922 break; 1923 } 1924 break; 1925 case ICMP6_PACKET_TOO_BIG: { 1926 struct T_unitdata_ind *tudi; 1927 struct T_opthdr *toh; 1928 size_t udi_size; 1929 mblk_t *newmp; 1930 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1931 sizeof (struct ip6_mtuinfo); 1932 sin6_t *sin6; 1933 struct ip6_mtuinfo *mtuinfo; 1934 1935 /* 1936 * If the application has requested to receive path mtu 1937 * information, send up an empty message containing an 1938 * IPV6_PATHMTU ancillary data item. 1939 */ 1940 if (!udp->udp_ipv6_recvpathmtu) 1941 break; 1942 1943 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1944 opt_length; 1945 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1946 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1947 break; 1948 } 1949 1950 /* 1951 * newmp->b_cont is left to NULL on purpose. This is an 1952 * empty message containing only ancillary data. 1953 */ 1954 newmp->b_datap->db_type = M_PROTO; 1955 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1956 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1957 tudi->PRIM_type = T_UNITDATA_IND; 1958 tudi->SRC_length = sizeof (sin6_t); 1959 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1960 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1961 tudi->OPT_length = opt_length; 1962 1963 sin6 = (sin6_t *)&tudi[1]; 1964 bzero(sin6, sizeof (sin6_t)); 1965 sin6->sin6_family = AF_INET6; 1966 sin6->sin6_addr = udp->udp_v6dst; 1967 1968 toh = (struct T_opthdr *)&sin6[1]; 1969 toh->level = IPPROTO_IPV6; 1970 toh->name = IPV6_PATHMTU; 1971 toh->len = opt_length; 1972 toh->status = 0; 1973 1974 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1975 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1976 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1977 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1978 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1979 /* 1980 * We've consumed everything we need from the original 1981 * message. Free it, then send our empty message. 1982 */ 1983 freemsg(mp); 1984 putnext(q, newmp); 1985 return; 1986 } 1987 case ICMP6_TIME_EXCEEDED: 1988 /* Transient errors */ 1989 break; 1990 case ICMP6_PARAM_PROB: 1991 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1992 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1993 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1994 (uchar_t *)nexthdrp) { 1995 error = ECONNREFUSED; 1996 break; 1997 } 1998 break; 1999 } 2000 if (error == 0) { 2001 freemsg(mp); 2002 return; 2003 } 2004 2005 /* 2006 * Deliver T_UDERROR_IND when the application has asked for it. 2007 * The socket layer enables this automatically when connected. 2008 */ 2009 if (!udp->udp_dgram_errind) { 2010 freemsg(mp); 2011 return; 2012 } 2013 2014 sin6 = sin6_null; 2015 sin6.sin6_family = AF_INET6; 2016 sin6.sin6_addr = ip6h->ip6_dst; 2017 sin6.sin6_port = udpha->uha_dst_port; 2018 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2019 2020 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2021 error); 2022 if (mp1) 2023 putnext(q, mp1); 2024 freemsg(mp); 2025 } 2026 2027 /* 2028 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2029 * The local address is filled in if endpoint is bound. The remote address 2030 * is filled in if remote address has been precified ("connected endpoint") 2031 * (The concept of connected CLTS sockets is alien to published TPI 2032 * but we support it anyway). 2033 */ 2034 static void 2035 udp_addr_req(queue_t *q, mblk_t *mp) 2036 { 2037 sin_t *sin; 2038 sin6_t *sin6; 2039 mblk_t *ackmp; 2040 struct T_addr_ack *taa; 2041 udp_t *udp = Q_TO_UDP(q); 2042 2043 /* Make it large enough for worst case */ 2044 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2045 2 * sizeof (sin6_t), 1); 2046 if (ackmp == NULL) { 2047 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2048 return; 2049 } 2050 taa = (struct T_addr_ack *)ackmp->b_rptr; 2051 2052 bzero(taa, sizeof (struct T_addr_ack)); 2053 ackmp->b_wptr = (uchar_t *)&taa[1]; 2054 2055 taa->PRIM_type = T_ADDR_ACK; 2056 ackmp->b_datap->db_type = M_PCPROTO; 2057 rw_enter(&udp->udp_rwlock, RW_READER); 2058 /* 2059 * Note: Following code assumes 32 bit alignment of basic 2060 * data structures like sin_t and struct T_addr_ack. 2061 */ 2062 if (udp->udp_state != TS_UNBND) { 2063 /* 2064 * Fill in local address first 2065 */ 2066 taa->LOCADDR_offset = sizeof (*taa); 2067 if (udp->udp_family == AF_INET) { 2068 taa->LOCADDR_length = sizeof (sin_t); 2069 sin = (sin_t *)&taa[1]; 2070 /* Fill zeroes and then initialize non-zero fields */ 2071 *sin = sin_null; 2072 sin->sin_family = AF_INET; 2073 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2074 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2075 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2076 sin->sin_addr.s_addr); 2077 } else { 2078 /* 2079 * INADDR_ANY 2080 * udp_v6src is not set, we might be bound to 2081 * broadcast/multicast. Use udp_bound_v6src as 2082 * local address instead (that could 2083 * also still be INADDR_ANY) 2084 */ 2085 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2086 sin->sin_addr.s_addr); 2087 } 2088 sin->sin_port = udp->udp_port; 2089 ackmp->b_wptr = (uchar_t *)&sin[1]; 2090 if (udp->udp_state == TS_DATA_XFER) { 2091 /* 2092 * connected, fill remote address too 2093 */ 2094 taa->REMADDR_length = sizeof (sin_t); 2095 /* assumed 32-bit alignment */ 2096 taa->REMADDR_offset = taa->LOCADDR_offset + 2097 taa->LOCADDR_length; 2098 2099 sin = (sin_t *)(ackmp->b_rptr + 2100 taa->REMADDR_offset); 2101 /* initialize */ 2102 *sin = sin_null; 2103 sin->sin_family = AF_INET; 2104 sin->sin_addr.s_addr = 2105 V4_PART_OF_V6(udp->udp_v6dst); 2106 sin->sin_port = udp->udp_dstport; 2107 ackmp->b_wptr = (uchar_t *)&sin[1]; 2108 } 2109 } else { 2110 taa->LOCADDR_length = sizeof (sin6_t); 2111 sin6 = (sin6_t *)&taa[1]; 2112 /* Fill zeroes and then initialize non-zero fields */ 2113 *sin6 = sin6_null; 2114 sin6->sin6_family = AF_INET6; 2115 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2116 sin6->sin6_addr = udp->udp_v6src; 2117 } else { 2118 /* 2119 * UNSPECIFIED 2120 * udp_v6src is not set, we might be bound to 2121 * broadcast/multicast. Use udp_bound_v6src as 2122 * local address instead (that could 2123 * also still be UNSPECIFIED) 2124 */ 2125 sin6->sin6_addr = 2126 udp->udp_bound_v6src; 2127 } 2128 sin6->sin6_port = udp->udp_port; 2129 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2130 if (udp->udp_state == TS_DATA_XFER) { 2131 /* 2132 * connected, fill remote address too 2133 */ 2134 taa->REMADDR_length = sizeof (sin6_t); 2135 /* assumed 32-bit alignment */ 2136 taa->REMADDR_offset = taa->LOCADDR_offset + 2137 taa->LOCADDR_length; 2138 2139 sin6 = (sin6_t *)(ackmp->b_rptr + 2140 taa->REMADDR_offset); 2141 /* initialize */ 2142 *sin6 = sin6_null; 2143 sin6->sin6_family = AF_INET6; 2144 sin6->sin6_addr = udp->udp_v6dst; 2145 sin6->sin6_port = udp->udp_dstport; 2146 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2147 } 2148 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2149 } 2150 } 2151 rw_exit(&udp->udp_rwlock); 2152 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2153 qreply(q, ackmp); 2154 } 2155 2156 static void 2157 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2158 { 2159 if (udp->udp_family == AF_INET) { 2160 *tap = udp_g_t_info_ack_ipv4; 2161 } else { 2162 *tap = udp_g_t_info_ack_ipv6; 2163 } 2164 tap->CURRENT_state = udp->udp_state; 2165 tap->OPT_size = udp_max_optsize; 2166 } 2167 2168 /* 2169 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2170 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2171 * udp_g_t_info_ack. The current state of the stream is copied from 2172 * udp_state. 2173 */ 2174 static void 2175 udp_capability_req(queue_t *q, mblk_t *mp) 2176 { 2177 t_uscalar_t cap_bits1; 2178 struct T_capability_ack *tcap; 2179 udp_t *udp = Q_TO_UDP(q); 2180 2181 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2182 2183 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2184 mp->b_datap->db_type, T_CAPABILITY_ACK); 2185 if (!mp) 2186 return; 2187 2188 tcap = (struct T_capability_ack *)mp->b_rptr; 2189 tcap->CAP_bits1 = 0; 2190 2191 if (cap_bits1 & TC1_INFO) { 2192 udp_copy_info(&tcap->INFO_ack, udp); 2193 tcap->CAP_bits1 |= TC1_INFO; 2194 } 2195 2196 qreply(q, mp); 2197 } 2198 2199 /* 2200 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2201 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2202 * The current state of the stream is copied from udp_state. 2203 */ 2204 static void 2205 udp_info_req(queue_t *q, mblk_t *mp) 2206 { 2207 udp_t *udp = Q_TO_UDP(q); 2208 2209 /* Create a T_INFO_ACK message. */ 2210 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2211 T_INFO_ACK); 2212 if (!mp) 2213 return; 2214 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2215 qreply(q, mp); 2216 } 2217 2218 /* 2219 * IP recognizes seven kinds of bind requests: 2220 * 2221 * - A zero-length address binds only to the protocol number. 2222 * 2223 * - A 4-byte address is treated as a request to 2224 * validate that the address is a valid local IPv4 2225 * address, appropriate for an application to bind to. 2226 * IP does the verification, but does not make any note 2227 * of the address at this time. 2228 * 2229 * - A 16-byte address contains is treated as a request 2230 * to validate a local IPv6 address, as the 4-byte 2231 * address case above. 2232 * 2233 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2234 * use it for the inbound fanout of packets. 2235 * 2236 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2237 * use it for the inbound fanout of packets. 2238 * 2239 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2240 * information consisting of local and remote addresses 2241 * and ports. In this case, the addresses are both 2242 * validated as appropriate for this operation, and, if 2243 * so, the information is retained for use in the 2244 * inbound fanout. 2245 * 2246 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2247 * fanout information, like the 12-byte case above. 2248 * 2249 * IP will also fill in the IRE request mblk with information 2250 * regarding our peer. In all cases, we notify IP of our protocol 2251 * type by appending a single protocol byte to the bind request. 2252 */ 2253 static mblk_t * 2254 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2255 { 2256 char *cp; 2257 mblk_t *mp; 2258 struct T_bind_req *tbr; 2259 ipa_conn_t *ac; 2260 ipa6_conn_t *ac6; 2261 sin_t *sin; 2262 sin6_t *sin6; 2263 2264 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2265 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 2266 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2267 if (!mp) 2268 return (mp); 2269 mp->b_datap->db_type = M_PROTO; 2270 tbr = (struct T_bind_req *)mp->b_rptr; 2271 tbr->PRIM_type = bind_prim; 2272 tbr->ADDR_offset = sizeof (*tbr); 2273 tbr->CONIND_number = 0; 2274 tbr->ADDR_length = addr_length; 2275 cp = (char *)&tbr[1]; 2276 switch (addr_length) { 2277 case sizeof (ipa_conn_t): 2278 ASSERT(udp->udp_family == AF_INET); 2279 /* Append a request for an IRE */ 2280 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2281 if (!mp->b_cont) { 2282 freemsg(mp); 2283 return (NULL); 2284 } 2285 mp->b_cont->b_wptr += sizeof (ire_t); 2286 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2287 2288 /* cp known to be 32 bit aligned */ 2289 ac = (ipa_conn_t *)cp; 2290 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2291 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2292 ac->ac_fport = udp->udp_dstport; 2293 ac->ac_lport = udp->udp_port; 2294 break; 2295 2296 case sizeof (ipa6_conn_t): 2297 ASSERT(udp->udp_family == AF_INET6); 2298 /* Append a request for an IRE */ 2299 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2300 if (!mp->b_cont) { 2301 freemsg(mp); 2302 return (NULL); 2303 } 2304 mp->b_cont->b_wptr += sizeof (ire_t); 2305 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2306 2307 /* cp known to be 32 bit aligned */ 2308 ac6 = (ipa6_conn_t *)cp; 2309 ac6->ac6_laddr = udp->udp_v6src; 2310 ac6->ac6_faddr = udp->udp_v6dst; 2311 ac6->ac6_fport = udp->udp_dstport; 2312 ac6->ac6_lport = udp->udp_port; 2313 break; 2314 2315 case sizeof (sin_t): 2316 ASSERT(udp->udp_family == AF_INET); 2317 /* Append a request for an IRE */ 2318 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2319 if (!mp->b_cont) { 2320 freemsg(mp); 2321 return (NULL); 2322 } 2323 mp->b_cont->b_wptr += sizeof (ire_t); 2324 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2325 2326 sin = (sin_t *)cp; 2327 *sin = sin_null; 2328 sin->sin_family = AF_INET; 2329 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2330 sin->sin_port = udp->udp_port; 2331 break; 2332 2333 case sizeof (sin6_t): 2334 ASSERT(udp->udp_family == AF_INET6); 2335 /* Append a request for an IRE */ 2336 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2337 if (!mp->b_cont) { 2338 freemsg(mp); 2339 return (NULL); 2340 } 2341 mp->b_cont->b_wptr += sizeof (ire_t); 2342 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2343 2344 sin6 = (sin6_t *)cp; 2345 *sin6 = sin6_null; 2346 sin6->sin6_family = AF_INET6; 2347 sin6->sin6_addr = udp->udp_bound_v6src; 2348 sin6->sin6_port = udp->udp_port; 2349 break; 2350 } 2351 /* Add protocol number to end */ 2352 cp[addr_length] = (char)IPPROTO_UDP; 2353 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2354 return (mp); 2355 } 2356 2357 /* For /dev/udp aka AF_INET open */ 2358 static int 2359 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2360 { 2361 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 2362 } 2363 2364 /* For /dev/udp6 aka AF_INET6 open */ 2365 static int 2366 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2367 { 2368 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 2369 } 2370 2371 /* 2372 * This is the open routine for udp. It allocates a udp_t structure for 2373 * the stream and, on the first open of the module, creates an ND table. 2374 */ 2375 /*ARGSUSED2*/ 2376 static int 2377 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 2378 boolean_t isv6) 2379 { 2380 int err; 2381 udp_t *udp; 2382 conn_t *connp; 2383 dev_t conn_dev; 2384 zoneid_t zoneid; 2385 netstack_t *ns; 2386 udp_stack_t *us; 2387 vmem_t *minor_arena; 2388 2389 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2390 2391 /* If the stream is already open, return immediately. */ 2392 if (q->q_ptr != NULL) 2393 return (0); 2394 2395 if (sflag == MODOPEN) 2396 return (EINVAL); 2397 2398 ns = netstack_find_by_cred(credp); 2399 ASSERT(ns != NULL); 2400 us = ns->netstack_udp; 2401 ASSERT(us != NULL); 2402 2403 /* 2404 * For exclusive stacks we set the zoneid to zero 2405 * to make UDP operate as if in the global zone. 2406 */ 2407 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 2408 zoneid = GLOBAL_ZONEID; 2409 else 2410 zoneid = crgetzoneid(credp); 2411 2412 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 2413 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 2414 minor_arena = ip_minor_arena_la; 2415 } else { 2416 /* 2417 * Either minor numbers in the large arena were exhausted 2418 * or a non socket application is doing the open. 2419 * Try to allocate from the small arena. 2420 */ 2421 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 2422 netstack_rele(ns); 2423 return (EBUSY); 2424 } 2425 minor_arena = ip_minor_arena_sa; 2426 } 2427 2428 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 2429 2430 connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); 2431 connp->conn_dev = conn_dev; 2432 connp->conn_minor_arena = minor_arena; 2433 udp = connp->conn_udp; 2434 2435 /* 2436 * ipcl_conn_create did a netstack_hold. Undo the hold that was 2437 * done by netstack_find_by_cred() 2438 */ 2439 netstack_rele(ns); 2440 2441 /* 2442 * Initialize the udp_t structure for this stream. 2443 */ 2444 q->q_ptr = connp; 2445 WR(q)->q_ptr = connp; 2446 connp->conn_rq = q; 2447 connp->conn_wq = WR(q); 2448 2449 rw_enter(&udp->udp_rwlock, RW_WRITER); 2450 ASSERT(connp->conn_ulp == IPPROTO_UDP); 2451 ASSERT(connp->conn_udp == udp); 2452 ASSERT(udp->udp_connp == connp); 2453 2454 /* Set the initial state of the stream and the privilege status. */ 2455 udp->udp_state = TS_UNBND; 2456 if (isv6) { 2457 udp->udp_family = AF_INET6; 2458 udp->udp_ipversion = IPV6_VERSION; 2459 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2460 udp->udp_ttl = us->us_ipv6_hoplimit; 2461 connp->conn_af_isv6 = B_TRUE; 2462 connp->conn_flags |= IPCL_ISV6; 2463 } else { 2464 udp->udp_family = AF_INET; 2465 udp->udp_ipversion = IPV4_VERSION; 2466 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2467 udp->udp_ttl = us->us_ipv4_ttl; 2468 connp->conn_af_isv6 = B_FALSE; 2469 connp->conn_flags &= ~IPCL_ISV6; 2470 } 2471 2472 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2473 udp->udp_pending_op = -1; 2474 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2475 connp->conn_zoneid = zoneid; 2476 2477 udp->udp_open_time = lbolt64; 2478 udp->udp_open_pid = curproc->p_pid; 2479 2480 /* 2481 * If the caller has the process-wide flag set, then default to MAC 2482 * exempt mode. This allows read-down to unlabeled hosts. 2483 */ 2484 if (getpflags(NET_MAC_AWARE, credp) != 0) 2485 connp->conn_mac_exempt = B_TRUE; 2486 2487 if (flag & SO_SOCKSTR) { 2488 connp->conn_flags |= IPCL_SOCKET; 2489 udp->udp_issocket = B_TRUE; 2490 udp->udp_direct_sockfs = B_TRUE; 2491 } 2492 2493 connp->conn_ulp_labeled = is_system_labeled(); 2494 2495 udp->udp_us = us; 2496 2497 q->q_hiwat = us->us_recv_hiwat; 2498 WR(q)->q_hiwat = us->us_xmit_hiwat; 2499 WR(q)->q_lowat = us->us_xmit_lowat; 2500 2501 connp->conn_recv = udp_input; 2502 crhold(credp); 2503 connp->conn_cred = credp; 2504 2505 mutex_enter(&connp->conn_lock); 2506 connp->conn_state_flags &= ~CONN_INCIPIENT; 2507 mutex_exit(&connp->conn_lock); 2508 2509 qprocson(q); 2510 2511 if (udp->udp_family == AF_INET6) { 2512 /* Build initial header template for transmit */ 2513 if ((err = udp_build_hdrs(udp)) != 0) { 2514 rw_exit(&udp->udp_rwlock); 2515 qprocsoff(q); 2516 ipcl_conn_destroy(connp); 2517 return (err); 2518 } 2519 } 2520 rw_exit(&udp->udp_rwlock); 2521 2522 /* Set the Stream head write offset and high watermark. */ 2523 (void) mi_set_sth_wroff(q, 2524 udp->udp_max_hdr_len + us->us_wroff_extra); 2525 (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); 2526 2527 return (0); 2528 } 2529 2530 /* 2531 * Which UDP options OK to set through T_UNITDATA_REQ... 2532 */ 2533 /* ARGSUSED */ 2534 static boolean_t 2535 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2536 { 2537 return (B_TRUE); 2538 } 2539 2540 /* 2541 * This routine gets default values of certain options whose default 2542 * values are maintained by protcol specific code 2543 */ 2544 /* ARGSUSED */ 2545 int 2546 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2547 { 2548 udp_t *udp = Q_TO_UDP(q); 2549 udp_stack_t *us = udp->udp_us; 2550 int *i1 = (int *)ptr; 2551 2552 switch (level) { 2553 case IPPROTO_IP: 2554 switch (name) { 2555 case IP_MULTICAST_TTL: 2556 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2557 return (sizeof (uchar_t)); 2558 case IP_MULTICAST_LOOP: 2559 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2560 return (sizeof (uchar_t)); 2561 } 2562 break; 2563 case IPPROTO_IPV6: 2564 switch (name) { 2565 case IPV6_MULTICAST_HOPS: 2566 *i1 = IP_DEFAULT_MULTICAST_TTL; 2567 return (sizeof (int)); 2568 case IPV6_MULTICAST_LOOP: 2569 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2570 return (sizeof (int)); 2571 case IPV6_UNICAST_HOPS: 2572 *i1 = us->us_ipv6_hoplimit; 2573 return (sizeof (int)); 2574 } 2575 break; 2576 } 2577 return (-1); 2578 } 2579 2580 /* 2581 * This routine retrieves the current status of socket options. 2582 * It returns the size of the option retrieved. 2583 */ 2584 int 2585 udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2586 { 2587 int *i1 = (int *)ptr; 2588 conn_t *connp; 2589 udp_t *udp; 2590 ip6_pkt_t *ipp; 2591 int len; 2592 udp_stack_t *us; 2593 2594 connp = Q_TO_CONN(q); 2595 udp = connp->conn_udp; 2596 ipp = &udp->udp_sticky_ipp; 2597 us = udp->udp_us; 2598 2599 switch (level) { 2600 case SOL_SOCKET: 2601 switch (name) { 2602 case SO_DEBUG: 2603 *i1 = udp->udp_debug; 2604 break; /* goto sizeof (int) option return */ 2605 case SO_REUSEADDR: 2606 *i1 = udp->udp_reuseaddr; 2607 break; /* goto sizeof (int) option return */ 2608 case SO_TYPE: 2609 *i1 = SOCK_DGRAM; 2610 break; /* goto sizeof (int) option return */ 2611 2612 /* 2613 * The following three items are available here, 2614 * but are only meaningful to IP. 2615 */ 2616 case SO_DONTROUTE: 2617 *i1 = udp->udp_dontroute; 2618 break; /* goto sizeof (int) option return */ 2619 case SO_USELOOPBACK: 2620 *i1 = udp->udp_useloopback; 2621 break; /* goto sizeof (int) option return */ 2622 case SO_BROADCAST: 2623 *i1 = udp->udp_broadcast; 2624 break; /* goto sizeof (int) option return */ 2625 2626 case SO_SNDBUF: 2627 *i1 = q->q_hiwat; 2628 break; /* goto sizeof (int) option return */ 2629 case SO_RCVBUF: 2630 *i1 = RD(q)->q_hiwat; 2631 break; /* goto sizeof (int) option return */ 2632 case SO_DGRAM_ERRIND: 2633 *i1 = udp->udp_dgram_errind; 2634 break; /* goto sizeof (int) option return */ 2635 case SO_RECVUCRED: 2636 *i1 = udp->udp_recvucred; 2637 break; /* goto sizeof (int) option return */ 2638 case SO_TIMESTAMP: 2639 *i1 = udp->udp_timestamp; 2640 break; /* goto sizeof (int) option return */ 2641 case SO_ANON_MLP: 2642 *i1 = connp->conn_anon_mlp; 2643 break; /* goto sizeof (int) option return */ 2644 case SO_MAC_EXEMPT: 2645 *i1 = connp->conn_mac_exempt; 2646 break; /* goto sizeof (int) option return */ 2647 case SO_ALLZONES: 2648 *i1 = connp->conn_allzones; 2649 break; /* goto sizeof (int) option return */ 2650 case SO_EXCLBIND: 2651 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2652 break; 2653 case SO_PROTOTYPE: 2654 *i1 = IPPROTO_UDP; 2655 break; 2656 case SO_DOMAIN: 2657 *i1 = udp->udp_family; 2658 break; 2659 default: 2660 return (-1); 2661 } 2662 break; 2663 case IPPROTO_IP: 2664 if (udp->udp_family != AF_INET) 2665 return (-1); 2666 switch (name) { 2667 case IP_OPTIONS: 2668 case T_IP_OPTIONS: 2669 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2670 if (len > 0) { 2671 bcopy(udp->udp_ip_rcv_options + 2672 udp->udp_label_len, ptr, len); 2673 } 2674 return (len); 2675 case IP_TOS: 2676 case T_IP_TOS: 2677 *i1 = (int)udp->udp_type_of_service; 2678 break; /* goto sizeof (int) option return */ 2679 case IP_TTL: 2680 *i1 = (int)udp->udp_ttl; 2681 break; /* goto sizeof (int) option return */ 2682 case IP_DHCPINIT_IF: 2683 return (-EINVAL); 2684 case IP_NEXTHOP: 2685 case IP_RECVPKTINFO: 2686 /* 2687 * This also handles IP_PKTINFO. 2688 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2689 * Differentiation is based on the size of the argument 2690 * passed in. 2691 * This option is handled in IP which will return an 2692 * error for IP_PKTINFO as it's not supported as a 2693 * sticky option. 2694 */ 2695 return (-EINVAL); 2696 case IP_MULTICAST_IF: 2697 /* 0 address if not set */ 2698 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2699 return (sizeof (ipaddr_t)); 2700 case IP_MULTICAST_TTL: 2701 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2702 return (sizeof (uchar_t)); 2703 case IP_MULTICAST_LOOP: 2704 *ptr = connp->conn_multicast_loop; 2705 return (sizeof (uint8_t)); 2706 case IP_RECVOPTS: 2707 *i1 = udp->udp_recvopts; 2708 break; /* goto sizeof (int) option return */ 2709 case IP_RECVDSTADDR: 2710 *i1 = udp->udp_recvdstaddr; 2711 break; /* goto sizeof (int) option return */ 2712 case IP_RECVIF: 2713 *i1 = udp->udp_recvif; 2714 break; /* goto sizeof (int) option return */ 2715 case IP_RECVSLLA: 2716 *i1 = udp->udp_recvslla; 2717 break; /* goto sizeof (int) option return */ 2718 case IP_RECVTTL: 2719 *i1 = udp->udp_recvttl; 2720 break; /* goto sizeof (int) option return */ 2721 case IP_ADD_MEMBERSHIP: 2722 case IP_DROP_MEMBERSHIP: 2723 case IP_BLOCK_SOURCE: 2724 case IP_UNBLOCK_SOURCE: 2725 case IP_ADD_SOURCE_MEMBERSHIP: 2726 case IP_DROP_SOURCE_MEMBERSHIP: 2727 case MCAST_JOIN_GROUP: 2728 case MCAST_LEAVE_GROUP: 2729 case MCAST_BLOCK_SOURCE: 2730 case MCAST_UNBLOCK_SOURCE: 2731 case MCAST_JOIN_SOURCE_GROUP: 2732 case MCAST_LEAVE_SOURCE_GROUP: 2733 case IP_DONTFAILOVER_IF: 2734 /* cannot "get" the value for these */ 2735 return (-1); 2736 case IP_BOUND_IF: 2737 /* Zero if not set */ 2738 *i1 = udp->udp_bound_if; 2739 break; /* goto sizeof (int) option return */ 2740 case IP_UNSPEC_SRC: 2741 *i1 = udp->udp_unspec_source; 2742 break; /* goto sizeof (int) option return */ 2743 case IP_BROADCAST_TTL: 2744 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2745 return (sizeof (uchar_t)); 2746 default: 2747 return (-1); 2748 } 2749 break; 2750 case IPPROTO_IPV6: 2751 if (udp->udp_family != AF_INET6) 2752 return (-1); 2753 switch (name) { 2754 case IPV6_UNICAST_HOPS: 2755 *i1 = (unsigned int)udp->udp_ttl; 2756 break; /* goto sizeof (int) option return */ 2757 case IPV6_MULTICAST_IF: 2758 /* 0 index if not set */ 2759 *i1 = udp->udp_multicast_if_index; 2760 break; /* goto sizeof (int) option return */ 2761 case IPV6_MULTICAST_HOPS: 2762 *i1 = udp->udp_multicast_ttl; 2763 break; /* goto sizeof (int) option return */ 2764 case IPV6_MULTICAST_LOOP: 2765 *i1 = connp->conn_multicast_loop; 2766 break; /* goto sizeof (int) option return */ 2767 case IPV6_JOIN_GROUP: 2768 case IPV6_LEAVE_GROUP: 2769 case MCAST_JOIN_GROUP: 2770 case MCAST_LEAVE_GROUP: 2771 case MCAST_BLOCK_SOURCE: 2772 case MCAST_UNBLOCK_SOURCE: 2773 case MCAST_JOIN_SOURCE_GROUP: 2774 case MCAST_LEAVE_SOURCE_GROUP: 2775 /* cannot "get" the value for these */ 2776 return (-1); 2777 case IPV6_BOUND_IF: 2778 /* Zero if not set */ 2779 *i1 = udp->udp_bound_if; 2780 break; /* goto sizeof (int) option return */ 2781 case IPV6_UNSPEC_SRC: 2782 *i1 = udp->udp_unspec_source; 2783 break; /* goto sizeof (int) option return */ 2784 case IPV6_RECVPKTINFO: 2785 *i1 = udp->udp_ip_recvpktinfo; 2786 break; /* goto sizeof (int) option return */ 2787 case IPV6_RECVTCLASS: 2788 *i1 = udp->udp_ipv6_recvtclass; 2789 break; /* goto sizeof (int) option return */ 2790 case IPV6_RECVPATHMTU: 2791 *i1 = udp->udp_ipv6_recvpathmtu; 2792 break; /* goto sizeof (int) option return */ 2793 case IPV6_RECVHOPLIMIT: 2794 *i1 = udp->udp_ipv6_recvhoplimit; 2795 break; /* goto sizeof (int) option return */ 2796 case IPV6_RECVHOPOPTS: 2797 *i1 = udp->udp_ipv6_recvhopopts; 2798 break; /* goto sizeof (int) option return */ 2799 case IPV6_RECVDSTOPTS: 2800 *i1 = udp->udp_ipv6_recvdstopts; 2801 break; /* goto sizeof (int) option return */ 2802 case _OLD_IPV6_RECVDSTOPTS: 2803 *i1 = udp->udp_old_ipv6_recvdstopts; 2804 break; /* goto sizeof (int) option return */ 2805 case IPV6_RECVRTHDRDSTOPTS: 2806 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2807 break; /* goto sizeof (int) option return */ 2808 case IPV6_RECVRTHDR: 2809 *i1 = udp->udp_ipv6_recvrthdr; 2810 break; /* goto sizeof (int) option return */ 2811 case IPV6_PKTINFO: { 2812 /* XXX assumes that caller has room for max size! */ 2813 struct in6_pktinfo *pkti; 2814 2815 pkti = (struct in6_pktinfo *)ptr; 2816 if (ipp->ipp_fields & IPPF_IFINDEX) 2817 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2818 else 2819 pkti->ipi6_ifindex = 0; 2820 if (ipp->ipp_fields & IPPF_ADDR) 2821 pkti->ipi6_addr = ipp->ipp_addr; 2822 else 2823 pkti->ipi6_addr = ipv6_all_zeros; 2824 return (sizeof (struct in6_pktinfo)); 2825 } 2826 case IPV6_TCLASS: 2827 if (ipp->ipp_fields & IPPF_TCLASS) 2828 *i1 = ipp->ipp_tclass; 2829 else 2830 *i1 = IPV6_FLOW_TCLASS( 2831 IPV6_DEFAULT_VERS_AND_FLOW); 2832 break; /* goto sizeof (int) option return */ 2833 case IPV6_NEXTHOP: { 2834 sin6_t *sin6 = (sin6_t *)ptr; 2835 2836 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2837 return (0); 2838 *sin6 = sin6_null; 2839 sin6->sin6_family = AF_INET6; 2840 sin6->sin6_addr = ipp->ipp_nexthop; 2841 return (sizeof (sin6_t)); 2842 } 2843 case IPV6_HOPOPTS: 2844 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2845 return (0); 2846 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2847 return (0); 2848 /* 2849 * The cipso/label option is added by kernel. 2850 * User is not usually aware of this option. 2851 * We copy out the hbh opt after the label option. 2852 */ 2853 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2854 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2855 if (udp->udp_label_len_v6 > 0) { 2856 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2857 ptr[1] = (ipp->ipp_hopoptslen - 2858 udp->udp_label_len_v6 + 7) / 8 - 1; 2859 } 2860 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2861 case IPV6_RTHDRDSTOPTS: 2862 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2863 return (0); 2864 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2865 return (ipp->ipp_rtdstoptslen); 2866 case IPV6_RTHDR: 2867 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2868 return (0); 2869 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2870 return (ipp->ipp_rthdrlen); 2871 case IPV6_DSTOPTS: 2872 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2873 return (0); 2874 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2875 return (ipp->ipp_dstoptslen); 2876 case IPV6_PATHMTU: 2877 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2878 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2879 us->us_netstack)); 2880 default: 2881 return (-1); 2882 } 2883 break; 2884 case IPPROTO_UDP: 2885 switch (name) { 2886 case UDP_ANONPRIVBIND: 2887 *i1 = udp->udp_anon_priv_bind; 2888 break; 2889 case UDP_EXCLBIND: 2890 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2891 break; 2892 case UDP_RCVHDR: 2893 *i1 = udp->udp_rcvhdr ? 1 : 0; 2894 break; 2895 case UDP_NAT_T_ENDPOINT: 2896 *i1 = udp->udp_nat_t_endpoint; 2897 break; 2898 default: 2899 return (-1); 2900 } 2901 break; 2902 default: 2903 return (-1); 2904 } 2905 return (sizeof (int)); 2906 } 2907 2908 int 2909 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2910 { 2911 udp_t *udp; 2912 int err; 2913 2914 udp = Q_TO_UDP(q); 2915 2916 rw_enter(&udp->udp_rwlock, RW_READER); 2917 err = udp_opt_get_locked(q, level, name, ptr); 2918 rw_exit(&udp->udp_rwlock); 2919 return (err); 2920 } 2921 2922 /* 2923 * This routine sets socket options. 2924 */ 2925 /* ARGSUSED */ 2926 int 2927 udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, 2928 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 2929 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2930 { 2931 udpattrs_t *attrs = thisdg_attrs; 2932 int *i1 = (int *)invalp; 2933 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2934 boolean_t checkonly; 2935 int error; 2936 conn_t *connp; 2937 udp_t *udp; 2938 uint_t newlen; 2939 udp_stack_t *us; 2940 size_t sth_wroff; 2941 2942 connp = Q_TO_CONN(q); 2943 udp = connp->conn_udp; 2944 us = udp->udp_us; 2945 2946 switch (optset_context) { 2947 case SETFN_OPTCOM_CHECKONLY: 2948 checkonly = B_TRUE; 2949 /* 2950 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2951 * inlen != 0 implies value supplied and 2952 * we have to "pretend" to set it. 2953 * inlen == 0 implies that there is no 2954 * value part in T_CHECK request and just validation 2955 * done elsewhere should be enough, we just return here. 2956 */ 2957 if (inlen == 0) { 2958 *outlenp = 0; 2959 return (0); 2960 } 2961 break; 2962 case SETFN_OPTCOM_NEGOTIATE: 2963 checkonly = B_FALSE; 2964 break; 2965 case SETFN_UD_NEGOTIATE: 2966 case SETFN_CONN_NEGOTIATE: 2967 checkonly = B_FALSE; 2968 /* 2969 * Negotiating local and "association-related" options 2970 * through T_UNITDATA_REQ. 2971 * 2972 * Following routine can filter out ones we do not 2973 * want to be "set" this way. 2974 */ 2975 if (!udp_opt_allow_udr_set(level, name)) { 2976 *outlenp = 0; 2977 return (EINVAL); 2978 } 2979 break; 2980 default: 2981 /* 2982 * We should never get here 2983 */ 2984 *outlenp = 0; 2985 return (EINVAL); 2986 } 2987 2988 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2989 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2990 2991 /* 2992 * For fixed length options, no sanity check 2993 * of passed in length is done. It is assumed *_optcom_req() 2994 * routines do the right thing. 2995 */ 2996 2997 switch (level) { 2998 case SOL_SOCKET: 2999 switch (name) { 3000 case SO_REUSEADDR: 3001 if (!checkonly) 3002 udp->udp_reuseaddr = onoff; 3003 break; 3004 case SO_DEBUG: 3005 if (!checkonly) 3006 udp->udp_debug = onoff; 3007 break; 3008 /* 3009 * The following three items are available here, 3010 * but are only meaningful to IP. 3011 */ 3012 case SO_DONTROUTE: 3013 if (!checkonly) 3014 udp->udp_dontroute = onoff; 3015 break; 3016 case SO_USELOOPBACK: 3017 if (!checkonly) 3018 udp->udp_useloopback = onoff; 3019 break; 3020 case SO_BROADCAST: 3021 if (!checkonly) 3022 udp->udp_broadcast = onoff; 3023 break; 3024 3025 case SO_SNDBUF: 3026 if (*i1 > us->us_max_buf) { 3027 *outlenp = 0; 3028 return (ENOBUFS); 3029 } 3030 if (!checkonly) { 3031 q->q_hiwat = *i1; 3032 } 3033 break; 3034 case SO_RCVBUF: 3035 if (*i1 > us->us_max_buf) { 3036 *outlenp = 0; 3037 return (ENOBUFS); 3038 } 3039 if (!checkonly) { 3040 RD(q)->q_hiwat = *i1; 3041 rw_exit(&udp->udp_rwlock); 3042 (void) mi_set_sth_hiwat(RD(q), 3043 udp_set_rcv_hiwat(udp, *i1)); 3044 rw_enter(&udp->udp_rwlock, RW_WRITER); 3045 } 3046 break; 3047 case SO_DGRAM_ERRIND: 3048 if (!checkonly) 3049 udp->udp_dgram_errind = onoff; 3050 break; 3051 case SO_RECVUCRED: 3052 if (!checkonly) 3053 udp->udp_recvucred = onoff; 3054 break; 3055 case SO_ALLZONES: 3056 /* 3057 * "soft" error (negative) 3058 * option not handled at this level 3059 * Do not modify *outlenp. 3060 */ 3061 return (-EINVAL); 3062 case SO_TIMESTAMP: 3063 if (!checkonly) 3064 udp->udp_timestamp = onoff; 3065 break; 3066 case SO_ANON_MLP: 3067 /* Pass option along to IP level for handling */ 3068 return (-EINVAL); 3069 case SO_MAC_EXEMPT: 3070 /* Pass option along to IP level for handling */ 3071 return (-EINVAL); 3072 case SCM_UCRED: { 3073 struct ucred_s *ucr; 3074 cred_t *cr, *newcr; 3075 ts_label_t *tsl; 3076 3077 /* 3078 * Only sockets that have proper privileges and are 3079 * bound to MLPs will have any other value here, so 3080 * this implicitly tests for privilege to set label. 3081 */ 3082 if (connp->conn_mlp_type == mlptSingle) 3083 break; 3084 ucr = (struct ucred_s *)invalp; 3085 if (inlen != ucredsize || 3086 ucr->uc_labeloff < sizeof (*ucr) || 3087 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3088 return (EINVAL); 3089 if (!checkonly) { 3090 mblk_t *mb; 3091 3092 if (attrs == NULL || 3093 (mb = attrs->udpattr_mb) == NULL) 3094 return (EINVAL); 3095 if ((cr = DB_CRED(mb)) == NULL) 3096 cr = udp->udp_connp->conn_cred; 3097 ASSERT(cr != NULL); 3098 if ((tsl = crgetlabel(cr)) == NULL) 3099 return (EINVAL); 3100 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3101 tsl->tsl_doi, KM_NOSLEEP); 3102 if (newcr == NULL) 3103 return (ENOSR); 3104 mblk_setcred(mb, newcr); 3105 attrs->udpattr_credset = B_TRUE; 3106 crfree(newcr); 3107 } 3108 break; 3109 } 3110 case SO_EXCLBIND: 3111 if (!checkonly) 3112 udp->udp_exclbind = onoff; 3113 break; 3114 default: 3115 *outlenp = 0; 3116 return (EINVAL); 3117 } 3118 break; 3119 case IPPROTO_IP: 3120 if (udp->udp_family != AF_INET) { 3121 *outlenp = 0; 3122 return (ENOPROTOOPT); 3123 } 3124 switch (name) { 3125 case IP_OPTIONS: 3126 case T_IP_OPTIONS: 3127 /* Save options for use by IP. */ 3128 newlen = inlen + udp->udp_label_len; 3129 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3130 *outlenp = 0; 3131 return (EINVAL); 3132 } 3133 if (checkonly) 3134 break; 3135 3136 /* 3137 * Update the stored options taking into account 3138 * any CIPSO option which we should not overwrite. 3139 */ 3140 if (!tsol_option_set(&udp->udp_ip_snd_options, 3141 &udp->udp_ip_snd_options_len, 3142 udp->udp_label_len, invalp, inlen)) { 3143 *outlenp = 0; 3144 return (ENOMEM); 3145 } 3146 3147 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3148 UDPH_SIZE + udp->udp_ip_snd_options_len; 3149 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3150 rw_exit(&udp->udp_rwlock); 3151 (void) mi_set_sth_wroff(RD(q), sth_wroff); 3152 rw_enter(&udp->udp_rwlock, RW_WRITER); 3153 break; 3154 3155 case IP_TTL: 3156 if (!checkonly) { 3157 udp->udp_ttl = (uchar_t)*i1; 3158 } 3159 break; 3160 case IP_TOS: 3161 case T_IP_TOS: 3162 if (!checkonly) { 3163 udp->udp_type_of_service = (uchar_t)*i1; 3164 } 3165 break; 3166 case IP_MULTICAST_IF: { 3167 /* 3168 * TODO should check OPTMGMT reply and undo this if 3169 * there is an error. 3170 */ 3171 struct in_addr *inap = (struct in_addr *)invalp; 3172 if (!checkonly) { 3173 udp->udp_multicast_if_addr = 3174 inap->s_addr; 3175 } 3176 break; 3177 } 3178 case IP_MULTICAST_TTL: 3179 if (!checkonly) 3180 udp->udp_multicast_ttl = *invalp; 3181 break; 3182 case IP_MULTICAST_LOOP: 3183 if (!checkonly) 3184 connp->conn_multicast_loop = *invalp; 3185 break; 3186 case IP_RECVOPTS: 3187 if (!checkonly) 3188 udp->udp_recvopts = onoff; 3189 break; 3190 case IP_RECVDSTADDR: 3191 if (!checkonly) 3192 udp->udp_recvdstaddr = onoff; 3193 break; 3194 case IP_RECVIF: 3195 if (!checkonly) 3196 udp->udp_recvif = onoff; 3197 break; 3198 case IP_RECVSLLA: 3199 if (!checkonly) 3200 udp->udp_recvslla = onoff; 3201 break; 3202 case IP_RECVTTL: 3203 if (!checkonly) 3204 udp->udp_recvttl = onoff; 3205 break; 3206 case IP_PKTINFO: { 3207 /* 3208 * This also handles IP_RECVPKTINFO. 3209 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3210 * Differentiation is based on the size of the 3211 * argument passed in. 3212 */ 3213 struct in_pktinfo *pktinfop; 3214 ip4_pkt_t *attr_pktinfop; 3215 3216 if (checkonly) 3217 break; 3218 3219 if (inlen == sizeof (int)) { 3220 /* 3221 * This is IP_RECVPKTINFO option. 3222 * Keep a local copy of whether this option is 3223 * set or not and pass it down to IP for 3224 * processing. 3225 */ 3226 3227 udp->udp_ip_recvpktinfo = onoff; 3228 return (-EINVAL); 3229 } 3230 3231 if (attrs == NULL || 3232 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3233 /* 3234 * sticky option or no buffer to return 3235 * the results. 3236 */ 3237 return (EINVAL); 3238 } 3239 3240 if (inlen != sizeof (struct in_pktinfo)) 3241 return (EINVAL); 3242 3243 pktinfop = (struct in_pktinfo *)invalp; 3244 3245 /* 3246 * At least one of the values should be specified 3247 */ 3248 if (pktinfop->ipi_ifindex == 0 && 3249 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3250 return (EINVAL); 3251 } 3252 3253 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3254 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3255 3256 break; 3257 } 3258 case IP_ADD_MEMBERSHIP: 3259 case IP_DROP_MEMBERSHIP: 3260 case IP_BLOCK_SOURCE: 3261 case IP_UNBLOCK_SOURCE: 3262 case IP_ADD_SOURCE_MEMBERSHIP: 3263 case IP_DROP_SOURCE_MEMBERSHIP: 3264 case MCAST_JOIN_GROUP: 3265 case MCAST_LEAVE_GROUP: 3266 case MCAST_BLOCK_SOURCE: 3267 case MCAST_UNBLOCK_SOURCE: 3268 case MCAST_JOIN_SOURCE_GROUP: 3269 case MCAST_LEAVE_SOURCE_GROUP: 3270 case IP_SEC_OPT: 3271 case IP_NEXTHOP: 3272 case IP_DHCPINIT_IF: 3273 /* 3274 * "soft" error (negative) 3275 * option not handled at this level 3276 * Do not modify *outlenp. 3277 */ 3278 return (-EINVAL); 3279 case IP_BOUND_IF: 3280 if (!checkonly) 3281 udp->udp_bound_if = *i1; 3282 break; 3283 case IP_UNSPEC_SRC: 3284 if (!checkonly) 3285 udp->udp_unspec_source = onoff; 3286 break; 3287 case IP_BROADCAST_TTL: 3288 if (!checkonly) 3289 connp->conn_broadcast_ttl = *invalp; 3290 break; 3291 default: 3292 *outlenp = 0; 3293 return (EINVAL); 3294 } 3295 break; 3296 case IPPROTO_IPV6: { 3297 ip6_pkt_t *ipp; 3298 boolean_t sticky; 3299 3300 if (udp->udp_family != AF_INET6) { 3301 *outlenp = 0; 3302 return (ENOPROTOOPT); 3303 } 3304 /* 3305 * Deal with both sticky options and ancillary data 3306 */ 3307 sticky = B_FALSE; 3308 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3309 NULL) { 3310 /* sticky options, or none */ 3311 ipp = &udp->udp_sticky_ipp; 3312 sticky = B_TRUE; 3313 } 3314 3315 switch (name) { 3316 case IPV6_MULTICAST_IF: 3317 if (!checkonly) 3318 udp->udp_multicast_if_index = *i1; 3319 break; 3320 case IPV6_UNICAST_HOPS: 3321 /* -1 means use default */ 3322 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3323 *outlenp = 0; 3324 return (EINVAL); 3325 } 3326 if (!checkonly) { 3327 if (*i1 == -1) { 3328 udp->udp_ttl = ipp->ipp_unicast_hops = 3329 us->us_ipv6_hoplimit; 3330 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3331 /* Pass modified value to IP. */ 3332 *i1 = udp->udp_ttl; 3333 } else { 3334 udp->udp_ttl = ipp->ipp_unicast_hops = 3335 (uint8_t)*i1; 3336 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3337 } 3338 /* Rebuild the header template */ 3339 error = udp_build_hdrs(udp); 3340 if (error != 0) { 3341 *outlenp = 0; 3342 return (error); 3343 } 3344 } 3345 break; 3346 case IPV6_MULTICAST_HOPS: 3347 /* -1 means use default */ 3348 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3349 *outlenp = 0; 3350 return (EINVAL); 3351 } 3352 if (!checkonly) { 3353 if (*i1 == -1) { 3354 udp->udp_multicast_ttl = 3355 ipp->ipp_multicast_hops = 3356 IP_DEFAULT_MULTICAST_TTL; 3357 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3358 /* Pass modified value to IP. */ 3359 *i1 = udp->udp_multicast_ttl; 3360 } else { 3361 udp->udp_multicast_ttl = 3362 ipp->ipp_multicast_hops = 3363 (uint8_t)*i1; 3364 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3365 } 3366 } 3367 break; 3368 case IPV6_MULTICAST_LOOP: 3369 if (*i1 != 0 && *i1 != 1) { 3370 *outlenp = 0; 3371 return (EINVAL); 3372 } 3373 if (!checkonly) 3374 connp->conn_multicast_loop = *i1; 3375 break; 3376 case IPV6_JOIN_GROUP: 3377 case IPV6_LEAVE_GROUP: 3378 case MCAST_JOIN_GROUP: 3379 case MCAST_LEAVE_GROUP: 3380 case MCAST_BLOCK_SOURCE: 3381 case MCAST_UNBLOCK_SOURCE: 3382 case MCAST_JOIN_SOURCE_GROUP: 3383 case MCAST_LEAVE_SOURCE_GROUP: 3384 /* 3385 * "soft" error (negative) 3386 * option not handled at this level 3387 * Note: Do not modify *outlenp 3388 */ 3389 return (-EINVAL); 3390 case IPV6_BOUND_IF: 3391 if (!checkonly) 3392 udp->udp_bound_if = *i1; 3393 break; 3394 case IPV6_UNSPEC_SRC: 3395 if (!checkonly) 3396 udp->udp_unspec_source = onoff; 3397 break; 3398 /* 3399 * Set boolean switches for ancillary data delivery 3400 */ 3401 case IPV6_RECVPKTINFO: 3402 if (!checkonly) 3403 udp->udp_ip_recvpktinfo = onoff; 3404 break; 3405 case IPV6_RECVTCLASS: 3406 if (!checkonly) { 3407 udp->udp_ipv6_recvtclass = onoff; 3408 } 3409 break; 3410 case IPV6_RECVPATHMTU: 3411 if (!checkonly) { 3412 udp->udp_ipv6_recvpathmtu = onoff; 3413 } 3414 break; 3415 case IPV6_RECVHOPLIMIT: 3416 if (!checkonly) 3417 udp->udp_ipv6_recvhoplimit = onoff; 3418 break; 3419 case IPV6_RECVHOPOPTS: 3420 if (!checkonly) 3421 udp->udp_ipv6_recvhopopts = onoff; 3422 break; 3423 case IPV6_RECVDSTOPTS: 3424 if (!checkonly) 3425 udp->udp_ipv6_recvdstopts = onoff; 3426 break; 3427 case _OLD_IPV6_RECVDSTOPTS: 3428 if (!checkonly) 3429 udp->udp_old_ipv6_recvdstopts = onoff; 3430 break; 3431 case IPV6_RECVRTHDRDSTOPTS: 3432 if (!checkonly) 3433 udp->udp_ipv6_recvrthdrdstopts = onoff; 3434 break; 3435 case IPV6_RECVRTHDR: 3436 if (!checkonly) 3437 udp->udp_ipv6_recvrthdr = onoff; 3438 break; 3439 /* 3440 * Set sticky options or ancillary data. 3441 * If sticky options, (re)build any extension headers 3442 * that might be needed as a result. 3443 */ 3444 case IPV6_PKTINFO: 3445 /* 3446 * The source address and ifindex are verified 3447 * in ip_opt_set(). For ancillary data the 3448 * source address is checked in ip_wput_v6. 3449 */ 3450 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3451 return (EINVAL); 3452 if (checkonly) 3453 break; 3454 3455 if (inlen == 0) { 3456 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3457 ipp->ipp_sticky_ignored |= 3458 (IPPF_IFINDEX|IPPF_ADDR); 3459 } else { 3460 struct in6_pktinfo *pkti; 3461 3462 pkti = (struct in6_pktinfo *)invalp; 3463 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3464 ipp->ipp_addr = pkti->ipi6_addr; 3465 if (ipp->ipp_ifindex != 0) 3466 ipp->ipp_fields |= IPPF_IFINDEX; 3467 else 3468 ipp->ipp_fields &= ~IPPF_IFINDEX; 3469 if (!IN6_IS_ADDR_UNSPECIFIED( 3470 &ipp->ipp_addr)) 3471 ipp->ipp_fields |= IPPF_ADDR; 3472 else 3473 ipp->ipp_fields &= ~IPPF_ADDR; 3474 } 3475 if (sticky) { 3476 error = udp_build_hdrs(udp); 3477 if (error != 0) 3478 return (error); 3479 } 3480 break; 3481 case IPV6_HOPLIMIT: 3482 if (sticky) 3483 return (EINVAL); 3484 if (inlen != 0 && inlen != sizeof (int)) 3485 return (EINVAL); 3486 if (checkonly) 3487 break; 3488 3489 if (inlen == 0) { 3490 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3491 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3492 } else { 3493 if (*i1 > 255 || *i1 < -1) 3494 return (EINVAL); 3495 if (*i1 == -1) 3496 ipp->ipp_hoplimit = 3497 us->us_ipv6_hoplimit; 3498 else 3499 ipp->ipp_hoplimit = *i1; 3500 ipp->ipp_fields |= IPPF_HOPLIMIT; 3501 } 3502 break; 3503 case IPV6_TCLASS: 3504 if (inlen != 0 && inlen != sizeof (int)) 3505 return (EINVAL); 3506 if (checkonly) 3507 break; 3508 3509 if (inlen == 0) { 3510 ipp->ipp_fields &= ~IPPF_TCLASS; 3511 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3512 } else { 3513 if (*i1 > 255 || *i1 < -1) 3514 return (EINVAL); 3515 if (*i1 == -1) 3516 ipp->ipp_tclass = 0; 3517 else 3518 ipp->ipp_tclass = *i1; 3519 ipp->ipp_fields |= IPPF_TCLASS; 3520 } 3521 if (sticky) { 3522 error = udp_build_hdrs(udp); 3523 if (error != 0) 3524 return (error); 3525 } 3526 break; 3527 case IPV6_NEXTHOP: 3528 /* 3529 * IP will verify that the nexthop is reachable 3530 * and fail for sticky options. 3531 */ 3532 if (inlen != 0 && inlen != sizeof (sin6_t)) 3533 return (EINVAL); 3534 if (checkonly) 3535 break; 3536 3537 if (inlen == 0) { 3538 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3539 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3540 } else { 3541 sin6_t *sin6 = (sin6_t *)invalp; 3542 3543 if (sin6->sin6_family != AF_INET6) 3544 return (EAFNOSUPPORT); 3545 if (IN6_IS_ADDR_V4MAPPED( 3546 &sin6->sin6_addr)) 3547 return (EADDRNOTAVAIL); 3548 ipp->ipp_nexthop = sin6->sin6_addr; 3549 if (!IN6_IS_ADDR_UNSPECIFIED( 3550 &ipp->ipp_nexthop)) 3551 ipp->ipp_fields |= IPPF_NEXTHOP; 3552 else 3553 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3554 } 3555 if (sticky) { 3556 error = udp_build_hdrs(udp); 3557 if (error != 0) 3558 return (error); 3559 } 3560 break; 3561 case IPV6_HOPOPTS: { 3562 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3563 /* 3564 * Sanity checks - minimum size, size a multiple of 3565 * eight bytes, and matching size passed in. 3566 */ 3567 if (inlen != 0 && 3568 inlen != (8 * (hopts->ip6h_len + 1))) 3569 return (EINVAL); 3570 3571 if (checkonly) 3572 break; 3573 3574 error = optcom_pkt_set(invalp, inlen, sticky, 3575 (uchar_t **)&ipp->ipp_hopopts, 3576 &ipp->ipp_hopoptslen, 3577 sticky ? udp->udp_label_len_v6 : 0); 3578 if (error != 0) 3579 return (error); 3580 if (ipp->ipp_hopoptslen == 0) { 3581 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3582 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3583 } else { 3584 ipp->ipp_fields |= IPPF_HOPOPTS; 3585 } 3586 if (sticky) { 3587 error = udp_build_hdrs(udp); 3588 if (error != 0) 3589 return (error); 3590 } 3591 break; 3592 } 3593 case IPV6_RTHDRDSTOPTS: { 3594 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3595 3596 /* 3597 * Sanity checks - minimum size, size a multiple of 3598 * eight bytes, and matching size passed in. 3599 */ 3600 if (inlen != 0 && 3601 inlen != (8 * (dopts->ip6d_len + 1))) 3602 return (EINVAL); 3603 3604 if (checkonly) 3605 break; 3606 3607 if (inlen == 0) { 3608 if (sticky && 3609 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3610 kmem_free(ipp->ipp_rtdstopts, 3611 ipp->ipp_rtdstoptslen); 3612 ipp->ipp_rtdstopts = NULL; 3613 ipp->ipp_rtdstoptslen = 0; 3614 } 3615 3616 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3617 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3618 } else { 3619 error = optcom_pkt_set(invalp, inlen, sticky, 3620 (uchar_t **)&ipp->ipp_rtdstopts, 3621 &ipp->ipp_rtdstoptslen, 0); 3622 if (error != 0) 3623 return (error); 3624 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3625 } 3626 if (sticky) { 3627 error = udp_build_hdrs(udp); 3628 if (error != 0) 3629 return (error); 3630 } 3631 break; 3632 } 3633 case IPV6_DSTOPTS: { 3634 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3635 3636 /* 3637 * Sanity checks - minimum size, size a multiple of 3638 * eight bytes, and matching size passed in. 3639 */ 3640 if (inlen != 0 && 3641 inlen != (8 * (dopts->ip6d_len + 1))) 3642 return (EINVAL); 3643 3644 if (checkonly) 3645 break; 3646 3647 if (inlen == 0) { 3648 if (sticky && 3649 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3650 kmem_free(ipp->ipp_dstopts, 3651 ipp->ipp_dstoptslen); 3652 ipp->ipp_dstopts = NULL; 3653 ipp->ipp_dstoptslen = 0; 3654 } 3655 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3656 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3657 } else { 3658 error = optcom_pkt_set(invalp, inlen, sticky, 3659 (uchar_t **)&ipp->ipp_dstopts, 3660 &ipp->ipp_dstoptslen, 0); 3661 if (error != 0) 3662 return (error); 3663 ipp->ipp_fields |= IPPF_DSTOPTS; 3664 } 3665 if (sticky) { 3666 error = udp_build_hdrs(udp); 3667 if (error != 0) 3668 return (error); 3669 } 3670 break; 3671 } 3672 case IPV6_RTHDR: { 3673 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3674 3675 /* 3676 * Sanity checks - minimum size, size a multiple of 3677 * eight bytes, and matching size passed in. 3678 */ 3679 if (inlen != 0 && 3680 inlen != (8 * (rt->ip6r_len + 1))) 3681 return (EINVAL); 3682 3683 if (checkonly) 3684 break; 3685 3686 if (inlen == 0) { 3687 if (sticky && 3688 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3689 kmem_free(ipp->ipp_rthdr, 3690 ipp->ipp_rthdrlen); 3691 ipp->ipp_rthdr = NULL; 3692 ipp->ipp_rthdrlen = 0; 3693 } 3694 ipp->ipp_fields &= ~IPPF_RTHDR; 3695 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3696 } else { 3697 error = optcom_pkt_set(invalp, inlen, sticky, 3698 (uchar_t **)&ipp->ipp_rthdr, 3699 &ipp->ipp_rthdrlen, 0); 3700 if (error != 0) 3701 return (error); 3702 ipp->ipp_fields |= IPPF_RTHDR; 3703 } 3704 if (sticky) { 3705 error = udp_build_hdrs(udp); 3706 if (error != 0) 3707 return (error); 3708 } 3709 break; 3710 } 3711 3712 case IPV6_DONTFRAG: 3713 if (checkonly) 3714 break; 3715 3716 if (onoff) { 3717 ipp->ipp_fields |= IPPF_DONTFRAG; 3718 } else { 3719 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3720 } 3721 break; 3722 3723 case IPV6_USE_MIN_MTU: 3724 if (inlen != sizeof (int)) 3725 return (EINVAL); 3726 3727 if (*i1 < -1 || *i1 > 1) 3728 return (EINVAL); 3729 3730 if (checkonly) 3731 break; 3732 3733 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3734 ipp->ipp_use_min_mtu = *i1; 3735 break; 3736 3737 case IPV6_BOUND_PIF: 3738 case IPV6_SEC_OPT: 3739 case IPV6_DONTFAILOVER_IF: 3740 case IPV6_SRC_PREFERENCES: 3741 case IPV6_V6ONLY: 3742 /* Handled at the IP level */ 3743 return (-EINVAL); 3744 default: 3745 *outlenp = 0; 3746 return (EINVAL); 3747 } 3748 break; 3749 } /* end IPPROTO_IPV6 */ 3750 case IPPROTO_UDP: 3751 switch (name) { 3752 case UDP_ANONPRIVBIND: 3753 if ((error = secpolicy_net_privaddr(cr, 0, 3754 IPPROTO_UDP)) != 0) { 3755 *outlenp = 0; 3756 return (error); 3757 } 3758 if (!checkonly) { 3759 udp->udp_anon_priv_bind = onoff; 3760 } 3761 break; 3762 case UDP_EXCLBIND: 3763 if (!checkonly) 3764 udp->udp_exclbind = onoff; 3765 break; 3766 case UDP_RCVHDR: 3767 if (!checkonly) 3768 udp->udp_rcvhdr = onoff; 3769 break; 3770 case UDP_NAT_T_ENDPOINT: 3771 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3772 *outlenp = 0; 3773 return (error); 3774 } 3775 3776 /* 3777 * Use udp_family instead so we can avoid ambiguitites 3778 * with AF_INET6 sockets that may switch from IPv4 3779 * to IPv6. 3780 */ 3781 if (udp->udp_family != AF_INET) { 3782 *outlenp = 0; 3783 return (EAFNOSUPPORT); 3784 } 3785 3786 if (!checkonly) { 3787 udp->udp_nat_t_endpoint = onoff; 3788 3789 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3790 UDPH_SIZE + udp->udp_ip_snd_options_len; 3791 3792 /* Also, adjust wroff */ 3793 if (onoff) { 3794 udp->udp_max_hdr_len += 3795 sizeof (uint32_t); 3796 } 3797 (void) mi_set_sth_wroff(RD(q), 3798 udp->udp_max_hdr_len + us->us_wroff_extra); 3799 } 3800 break; 3801 default: 3802 *outlenp = 0; 3803 return (EINVAL); 3804 } 3805 break; 3806 default: 3807 *outlenp = 0; 3808 return (EINVAL); 3809 } 3810 /* 3811 * Common case of OK return with outval same as inval. 3812 */ 3813 if (invalp != outvalp) { 3814 /* don't trust bcopy for identical src/dst */ 3815 (void) bcopy(invalp, outvalp, inlen); 3816 } 3817 *outlenp = inlen; 3818 return (0); 3819 } 3820 3821 int 3822 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3823 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3824 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3825 { 3826 udp_t *udp; 3827 int err; 3828 3829 udp = Q_TO_UDP(q); 3830 3831 rw_enter(&udp->udp_rwlock, RW_WRITER); 3832 err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 3833 outlenp, outvalp, thisdg_attrs, cr, mblk); 3834 rw_exit(&udp->udp_rwlock); 3835 return (err); 3836 } 3837 3838 /* 3839 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3840 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3841 * headers, and the udp header. 3842 * Returns failure if can't allocate memory. 3843 */ 3844 static int 3845 udp_build_hdrs(udp_t *udp) 3846 { 3847 udp_stack_t *us = udp->udp_us; 3848 uchar_t *hdrs; 3849 uint_t hdrs_len; 3850 ip6_t *ip6h; 3851 ip6i_t *ip6i; 3852 udpha_t *udpha; 3853 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3854 size_t sth_wroff; 3855 3856 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3857 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3858 ASSERT(hdrs_len != 0); 3859 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3860 /* Need to reallocate */ 3861 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3862 if (hdrs == NULL) 3863 return (ENOMEM); 3864 3865 if (udp->udp_sticky_hdrs_len != 0) { 3866 kmem_free(udp->udp_sticky_hdrs, 3867 udp->udp_sticky_hdrs_len); 3868 } 3869 udp->udp_sticky_hdrs = hdrs; 3870 udp->udp_sticky_hdrs_len = hdrs_len; 3871 } 3872 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3873 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3874 3875 /* Set header fields not in ipp */ 3876 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3877 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3878 ip6h = (ip6_t *)&ip6i[1]; 3879 } else { 3880 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3881 } 3882 3883 if (!(ipp->ipp_fields & IPPF_ADDR)) 3884 ip6h->ip6_src = udp->udp_v6src; 3885 3886 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3887 udpha->uha_src_port = udp->udp_port; 3888 3889 /* Try to get everything in a single mblk */ 3890 if (hdrs_len > udp->udp_max_hdr_len) { 3891 udp->udp_max_hdr_len = hdrs_len; 3892 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3893 rw_exit(&udp->udp_rwlock); 3894 (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); 3895 rw_enter(&udp->udp_rwlock, RW_WRITER); 3896 } 3897 return (0); 3898 } 3899 3900 /* 3901 * This routine retrieves the value of an ND variable in a udpparam_t 3902 * structure. It is called through nd_getset when a user reads the 3903 * variable. 3904 */ 3905 /* ARGSUSED */ 3906 static int 3907 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3908 { 3909 udpparam_t *udppa = (udpparam_t *)cp; 3910 3911 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3912 return (0); 3913 } 3914 3915 /* 3916 * Walk through the param array specified registering each element with the 3917 * named dispatch (ND) handler. 3918 */ 3919 static boolean_t 3920 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3921 { 3922 for (; cnt-- > 0; udppa++) { 3923 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3924 if (!nd_load(ndp, udppa->udp_param_name, 3925 udp_param_get, udp_param_set, 3926 (caddr_t)udppa)) { 3927 nd_free(ndp); 3928 return (B_FALSE); 3929 } 3930 } 3931 } 3932 if (!nd_load(ndp, "udp_extra_priv_ports", 3933 udp_extra_priv_ports_get, NULL, NULL)) { 3934 nd_free(ndp); 3935 return (B_FALSE); 3936 } 3937 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3938 NULL, udp_extra_priv_ports_add, NULL)) { 3939 nd_free(ndp); 3940 return (B_FALSE); 3941 } 3942 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3943 NULL, udp_extra_priv_ports_del, NULL)) { 3944 nd_free(ndp); 3945 return (B_FALSE); 3946 } 3947 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3948 NULL)) { 3949 nd_free(ndp); 3950 return (B_FALSE); 3951 } 3952 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3953 NULL)) { 3954 nd_free(ndp); 3955 return (B_FALSE); 3956 } 3957 return (B_TRUE); 3958 } 3959 3960 /* This routine sets an ND variable in a udpparam_t structure. */ 3961 /* ARGSUSED */ 3962 static int 3963 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3964 { 3965 long new_value; 3966 udpparam_t *udppa = (udpparam_t *)cp; 3967 3968 /* 3969 * Fail the request if the new value does not lie within the 3970 * required bounds. 3971 */ 3972 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3973 new_value < udppa->udp_param_min || 3974 new_value > udppa->udp_param_max) { 3975 return (EINVAL); 3976 } 3977 3978 /* Set the new value */ 3979 udppa->udp_param_value = new_value; 3980 return (0); 3981 } 3982 3983 /* 3984 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3985 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3986 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3987 * then it's assumed to be allocated to be large enough. 3988 * 3989 * Returns zero if trimming of the security option causes all options to go 3990 * away. 3991 */ 3992 static size_t 3993 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3994 { 3995 struct T_opthdr *toh; 3996 size_t hol = ipp->ipp_hopoptslen; 3997 ip6_hbh_t *dstopt = NULL; 3998 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3999 size_t tlen, olen, plen; 4000 boolean_t deleting; 4001 const struct ip6_opt *sopt, *lastpad; 4002 struct ip6_opt *dopt; 4003 4004 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4005 toh->level = IPPROTO_IPV6; 4006 toh->name = IPV6_HOPOPTS; 4007 toh->status = 0; 4008 dstopt = (ip6_hbh_t *)(toh + 1); 4009 } 4010 4011 /* 4012 * If labeling is enabled, then skip the label option 4013 * but get other options if there are any. 4014 */ 4015 if (is_system_labeled()) { 4016 dopt = NULL; 4017 if (dstopt != NULL) { 4018 /* will fill in ip6h_len later */ 4019 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4020 dopt = (struct ip6_opt *)(dstopt + 1); 4021 } 4022 sopt = (const struct ip6_opt *)(srcopt + 1); 4023 hol -= sizeof (*srcopt); 4024 tlen = sizeof (*dstopt); 4025 lastpad = NULL; 4026 deleting = B_FALSE; 4027 /* 4028 * This loop finds the first (lastpad pointer) of any number of 4029 * pads that preceeds the security option, then treats the 4030 * security option as though it were a pad, and then finds the 4031 * next non-pad option (or end of list). 4032 * 4033 * It then treats the entire block as one big pad. To preserve 4034 * alignment of any options that follow, or just the end of the 4035 * list, it computes a minimal new padding size that keeps the 4036 * same alignment for the next option. 4037 * 4038 * If it encounters just a sequence of pads with no security 4039 * option, those are copied as-is rather than collapsed. 4040 * 4041 * Note that to handle the end of list case, the code makes one 4042 * loop with 'hol' set to zero. 4043 */ 4044 for (;;) { 4045 if (hol > 0) { 4046 if (sopt->ip6o_type == IP6OPT_PAD1) { 4047 if (lastpad == NULL) 4048 lastpad = sopt; 4049 sopt = (const struct ip6_opt *) 4050 &sopt->ip6o_len; 4051 hol--; 4052 continue; 4053 } 4054 olen = sopt->ip6o_len + sizeof (*sopt); 4055 if (olen > hol) 4056 olen = hol; 4057 if (sopt->ip6o_type == IP6OPT_PADN || 4058 sopt->ip6o_type == ip6opt_ls) { 4059 if (sopt->ip6o_type == ip6opt_ls) 4060 deleting = B_TRUE; 4061 if (lastpad == NULL) 4062 lastpad = sopt; 4063 sopt = (const struct ip6_opt *) 4064 ((const char *)sopt + olen); 4065 hol -= olen; 4066 continue; 4067 } 4068 } else { 4069 /* if nothing was copied at all, then delete */ 4070 if (tlen == sizeof (*dstopt)) 4071 return (0); 4072 /* last pass; pick up any trailing padding */ 4073 olen = 0; 4074 } 4075 if (deleting) { 4076 /* 4077 * compute aligning effect of deleted material 4078 * to reproduce with pad. 4079 */ 4080 plen = ((const char *)sopt - 4081 (const char *)lastpad) & 7; 4082 tlen += plen; 4083 if (dopt != NULL) { 4084 if (plen == 1) { 4085 dopt->ip6o_type = IP6OPT_PAD1; 4086 } else if (plen > 1) { 4087 plen -= sizeof (*dopt); 4088 dopt->ip6o_type = IP6OPT_PADN; 4089 dopt->ip6o_len = plen; 4090 if (plen > 0) 4091 bzero(dopt + 1, plen); 4092 } 4093 dopt = (struct ip6_opt *) 4094 ((char *)dopt + plen); 4095 } 4096 deleting = B_FALSE; 4097 lastpad = NULL; 4098 } 4099 /* if there's uncopied padding, then copy that now */ 4100 if (lastpad != NULL) { 4101 olen += (const char *)sopt - 4102 (const char *)lastpad; 4103 sopt = lastpad; 4104 lastpad = NULL; 4105 } 4106 if (dopt != NULL && olen > 0) { 4107 bcopy(sopt, dopt, olen); 4108 dopt = (struct ip6_opt *)((char *)dopt + olen); 4109 } 4110 if (hol == 0) 4111 break; 4112 tlen += olen; 4113 sopt = (const struct ip6_opt *) 4114 ((const char *)sopt + olen); 4115 hol -= olen; 4116 } 4117 /* go back and patch up the length value, rounded upward */ 4118 if (dstopt != NULL) 4119 dstopt->ip6h_len = (tlen - 1) >> 3; 4120 } else { 4121 tlen = hol; 4122 if (dstopt != NULL) 4123 bcopy(srcopt, dstopt, hol); 4124 } 4125 4126 tlen += sizeof (*toh); 4127 if (toh != NULL) 4128 toh->len = tlen; 4129 4130 return (tlen); 4131 } 4132 4133 /* 4134 * Update udp_rcv_opt_len from the packet. 4135 * Called when options received, and when no options received but 4136 * udp_ip_recv_opt_len has previously recorded options. 4137 */ 4138 static void 4139 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 4140 { 4141 /* Save the options if any */ 4142 if (opt_len > 0) { 4143 if (opt_len > udp->udp_ip_rcv_options_len) { 4144 /* Need to allocate larger buffer */ 4145 if (udp->udp_ip_rcv_options_len != 0) 4146 mi_free((char *)udp->udp_ip_rcv_options); 4147 udp->udp_ip_rcv_options_len = 0; 4148 udp->udp_ip_rcv_options = 4149 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4150 if (udp->udp_ip_rcv_options != NULL) 4151 udp->udp_ip_rcv_options_len = opt_len; 4152 } 4153 if (udp->udp_ip_rcv_options_len != 0) { 4154 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 4155 /* Adjust length if we are resusing the space */ 4156 udp->udp_ip_rcv_options_len = opt_len; 4157 } 4158 } else if (udp->udp_ip_rcv_options_len != 0) { 4159 /* Clear out previously recorded options */ 4160 mi_free((char *)udp->udp_ip_rcv_options); 4161 udp->udp_ip_rcv_options = NULL; 4162 udp->udp_ip_rcv_options_len = 0; 4163 } 4164 } 4165 4166 /* ARGSUSED2 */ 4167 static void 4168 udp_input(void *arg1, mblk_t *mp, void *arg2) 4169 { 4170 conn_t *connp = (conn_t *)arg1; 4171 struct T_unitdata_ind *tudi; 4172 uchar_t *rptr; /* Pointer to IP header */ 4173 int hdr_length; /* Length of IP+UDP headers */ 4174 int opt_len; 4175 int udi_size; /* Size of T_unitdata_ind */ 4176 int mp_len; 4177 udp_t *udp; 4178 udpha_t *udpha; 4179 int ipversion; 4180 ip6_pkt_t ipp; 4181 ip6_t *ip6h; 4182 ip6i_t *ip6i; 4183 mblk_t *mp1; 4184 mblk_t *options_mp = NULL; 4185 ip_pktinfo_t *pinfo = NULL; 4186 cred_t *cr = NULL; 4187 pid_t cpid; 4188 uint32_t udp_ip_rcv_options_len; 4189 udp_bits_t udp_bits; 4190 cred_t *rcr = connp->conn_cred; 4191 udp_stack_t *us; 4192 4193 ASSERT(connp->conn_flags & IPCL_UDPCONN); 4194 4195 udp = connp->conn_udp; 4196 us = udp->udp_us; 4197 rptr = mp->b_rptr; 4198 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4199 ASSERT(OK_32PTR(rptr)); 4200 4201 /* 4202 * IP should have prepended the options data in an M_CTL 4203 * Check M_CTL "type" to make sure are not here bcos of 4204 * a valid ICMP message 4205 */ 4206 if (DB_TYPE(mp) == M_CTL) { 4207 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4208 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4209 IN_PKTINFO) { 4210 /* 4211 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4212 * has been prepended to the packet by IP. We need to 4213 * extract the mblk and adjust the rptr 4214 */ 4215 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4216 options_mp = mp; 4217 mp = mp->b_cont; 4218 rptr = mp->b_rptr; 4219 UDP_STAT(us, udp_in_pktinfo); 4220 } else { 4221 /* 4222 * ICMP messages. 4223 */ 4224 udp_icmp_error(connp->conn_rq, mp); 4225 return; 4226 } 4227 } 4228 4229 mp_len = msgdsize(mp); 4230 /* 4231 * This is the inbound data path. 4232 * First, we check to make sure the IP version number is correct, 4233 * and then pull the IP and UDP headers into the first mblk. 4234 */ 4235 4236 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4237 ipp.ipp_fields = 0; 4238 4239 ipversion = IPH_HDR_VERSION(rptr); 4240 4241 rw_enter(&udp->udp_rwlock, RW_READER); 4242 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 4243 udp_bits = udp->udp_bits; 4244 rw_exit(&udp->udp_rwlock); 4245 4246 switch (ipversion) { 4247 case IPV4_VERSION: 4248 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4249 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4250 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4251 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4252 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 4253 udp->udp_family == AF_INET) { 4254 /* 4255 * Record/update udp_ip_rcv_options with the lock 4256 * held. Not needed for AF_INET6 sockets 4257 * since they don't support a getsockopt of IP_OPTIONS. 4258 */ 4259 rw_enter(&udp->udp_rwlock, RW_WRITER); 4260 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 4261 opt_len); 4262 rw_exit(&udp->udp_rwlock); 4263 } 4264 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 4265 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4266 udp->udp_ip_recvpktinfo) { 4267 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4268 ipp.ipp_fields |= IPPF_IFINDEX; 4269 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4270 } 4271 } 4272 break; 4273 case IPV6_VERSION: 4274 /* 4275 * IPv6 packets can only be received by applications 4276 * that are prepared to receive IPv6 addresses. 4277 * The IP fanout must ensure this. 4278 */ 4279 ASSERT(udp->udp_family == AF_INET6); 4280 4281 ip6h = (ip6_t *)rptr; 4282 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4283 4284 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4285 uint8_t nexthdrp; 4286 /* Look for ifindex information */ 4287 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4288 ip6i = (ip6i_t *)ip6h; 4289 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4290 goto tossit; 4291 4292 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4293 ASSERT(ip6i->ip6i_ifindex != 0); 4294 ipp.ipp_fields |= IPPF_IFINDEX; 4295 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4296 } 4297 rptr = (uchar_t *)&ip6i[1]; 4298 mp->b_rptr = rptr; 4299 if (rptr == mp->b_wptr) { 4300 mp1 = mp->b_cont; 4301 freeb(mp); 4302 mp = mp1; 4303 rptr = mp->b_rptr; 4304 } 4305 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4306 goto tossit; 4307 ip6h = (ip6_t *)rptr; 4308 mp_len = msgdsize(mp); 4309 } 4310 /* 4311 * Find any potentially interesting extension headers 4312 * as well as the length of the IPv6 + extension 4313 * headers. 4314 */ 4315 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4316 UDPH_SIZE; 4317 ASSERT(nexthdrp == IPPROTO_UDP); 4318 } else { 4319 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4320 ip6i = NULL; 4321 } 4322 break; 4323 default: 4324 ASSERT(0); 4325 } 4326 4327 /* 4328 * IP inspected the UDP header thus all of it must be in the mblk. 4329 * UDP length check is performed for IPv6 packets and IPv4 packets 4330 * to check if the size of the packet as specified 4331 * by the header is the same as the physical size of the packet. 4332 * FIXME? Didn't IP already check this? 4333 */ 4334 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4335 if ((MBLKL(mp) < hdr_length) || 4336 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4337 goto tossit; 4338 } 4339 4340 4341 /* Walk past the headers unless IP_RECVHDR was set. */ 4342 if (!udp_bits.udpb_rcvhdr) { 4343 mp->b_rptr = rptr + hdr_length; 4344 mp_len -= hdr_length; 4345 } 4346 4347 /* 4348 * This is the inbound data path. Packets are passed upstream as 4349 * T_UNITDATA_IND messages with full IP headers still attached. 4350 */ 4351 if (udp->udp_family == AF_INET) { 4352 sin_t *sin; 4353 4354 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4355 4356 /* 4357 * Normally only send up the source address. 4358 * If IP_RECVDSTADDR is set we include the destination IP 4359 * address as an option. With IP_RECVOPTS we include all 4360 * the IP options. 4361 */ 4362 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4363 if (udp_bits.udpb_recvdstaddr) { 4364 udi_size += sizeof (struct T_opthdr) + 4365 sizeof (struct in_addr); 4366 UDP_STAT(us, udp_in_recvdstaddr); 4367 } 4368 4369 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 4370 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4371 udi_size += sizeof (struct T_opthdr) + 4372 sizeof (struct in_pktinfo); 4373 UDP_STAT(us, udp_ip_rcvpktinfo); 4374 } 4375 4376 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 4377 udi_size += sizeof (struct T_opthdr) + opt_len; 4378 UDP_STAT(us, udp_in_recvopts); 4379 } 4380 4381 /* 4382 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4383 * space accordingly 4384 */ 4385 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4386 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4387 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4388 UDP_STAT(us, udp_in_recvif); 4389 } 4390 4391 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4392 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4393 udi_size += sizeof (struct T_opthdr) + 4394 sizeof (struct sockaddr_dl); 4395 UDP_STAT(us, udp_in_recvslla); 4396 } 4397 4398 if ((udp_bits.udpb_recvucred) && 4399 (cr = DB_CRED(mp)) != NULL) { 4400 udi_size += sizeof (struct T_opthdr) + ucredsize; 4401 cpid = DB_CPID(mp); 4402 UDP_STAT(us, udp_in_recvucred); 4403 } 4404 4405 /* 4406 * If SO_TIMESTAMP is set allocate the appropriate sized 4407 * buffer. Since gethrestime() expects a pointer aligned 4408 * argument, we allocate space necessary for extra 4409 * alignment (even though it might not be used). 4410 */ 4411 if (udp_bits.udpb_timestamp) { 4412 udi_size += sizeof (struct T_opthdr) + 4413 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4414 UDP_STAT(us, udp_in_timestamp); 4415 } 4416 4417 /* 4418 * If IP_RECVTTL is set allocate the appropriate sized buffer 4419 */ 4420 if (udp_bits.udpb_recvttl) { 4421 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4422 UDP_STAT(us, udp_in_recvttl); 4423 } 4424 4425 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4426 mp1 = allocb(udi_size, BPRI_MED); 4427 if (mp1 == NULL) { 4428 freemsg(mp); 4429 if (options_mp != NULL) 4430 freeb(options_mp); 4431 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4432 return; 4433 } 4434 mp1->b_cont = mp; 4435 mp = mp1; 4436 mp->b_datap->db_type = M_PROTO; 4437 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4438 mp->b_wptr = (uchar_t *)tudi + udi_size; 4439 tudi->PRIM_type = T_UNITDATA_IND; 4440 tudi->SRC_length = sizeof (sin_t); 4441 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4442 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4443 sizeof (sin_t); 4444 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4445 tudi->OPT_length = udi_size; 4446 sin = (sin_t *)&tudi[1]; 4447 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4448 sin->sin_port = udpha->uha_src_port; 4449 sin->sin_family = udp->udp_family; 4450 *(uint32_t *)&sin->sin_zero[0] = 0; 4451 *(uint32_t *)&sin->sin_zero[4] = 0; 4452 4453 /* 4454 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4455 * IP_RECVTTL has been set. 4456 */ 4457 if (udi_size != 0) { 4458 /* 4459 * Copy in destination address before options to avoid 4460 * any padding issues. 4461 */ 4462 char *dstopt; 4463 4464 dstopt = (char *)&sin[1]; 4465 if (udp_bits.udpb_recvdstaddr) { 4466 struct T_opthdr *toh; 4467 ipaddr_t *dstptr; 4468 4469 toh = (struct T_opthdr *)dstopt; 4470 toh->level = IPPROTO_IP; 4471 toh->name = IP_RECVDSTADDR; 4472 toh->len = sizeof (struct T_opthdr) + 4473 sizeof (ipaddr_t); 4474 toh->status = 0; 4475 dstopt += sizeof (struct T_opthdr); 4476 dstptr = (ipaddr_t *)dstopt; 4477 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4478 dstopt += sizeof (ipaddr_t); 4479 udi_size -= toh->len; 4480 } 4481 4482 if (udp_bits.udpb_recvopts && opt_len > 0) { 4483 struct T_opthdr *toh; 4484 4485 toh = (struct T_opthdr *)dstopt; 4486 toh->level = IPPROTO_IP; 4487 toh->name = IP_RECVOPTS; 4488 toh->len = sizeof (struct T_opthdr) + opt_len; 4489 toh->status = 0; 4490 dstopt += sizeof (struct T_opthdr); 4491 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4492 opt_len); 4493 dstopt += opt_len; 4494 udi_size -= toh->len; 4495 } 4496 4497 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4498 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4499 struct T_opthdr *toh; 4500 struct in_pktinfo *pktinfop; 4501 4502 toh = (struct T_opthdr *)dstopt; 4503 toh->level = IPPROTO_IP; 4504 toh->name = IP_PKTINFO; 4505 toh->len = sizeof (struct T_opthdr) + 4506 sizeof (*pktinfop); 4507 toh->status = 0; 4508 dstopt += sizeof (struct T_opthdr); 4509 pktinfop = (struct in_pktinfo *)dstopt; 4510 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4511 pktinfop->ipi_spec_dst = 4512 pinfo->ip_pkt_match_addr; 4513 pktinfop->ipi_addr.s_addr = 4514 ((ipha_t *)rptr)->ipha_dst; 4515 4516 dstopt += sizeof (struct in_pktinfo); 4517 udi_size -= toh->len; 4518 } 4519 4520 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4521 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4522 4523 struct T_opthdr *toh; 4524 struct sockaddr_dl *dstptr; 4525 4526 toh = (struct T_opthdr *)dstopt; 4527 toh->level = IPPROTO_IP; 4528 toh->name = IP_RECVSLLA; 4529 toh->len = sizeof (struct T_opthdr) + 4530 sizeof (struct sockaddr_dl); 4531 toh->status = 0; 4532 dstopt += sizeof (struct T_opthdr); 4533 dstptr = (struct sockaddr_dl *)dstopt; 4534 bcopy(&pinfo->ip_pkt_slla, dstptr, 4535 sizeof (struct sockaddr_dl)); 4536 dstopt += sizeof (struct sockaddr_dl); 4537 udi_size -= toh->len; 4538 } 4539 4540 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4541 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4542 4543 struct T_opthdr *toh; 4544 uint_t *dstptr; 4545 4546 toh = (struct T_opthdr *)dstopt; 4547 toh->level = IPPROTO_IP; 4548 toh->name = IP_RECVIF; 4549 toh->len = sizeof (struct T_opthdr) + 4550 sizeof (uint_t); 4551 toh->status = 0; 4552 dstopt += sizeof (struct T_opthdr); 4553 dstptr = (uint_t *)dstopt; 4554 *dstptr = pinfo->ip_pkt_ifindex; 4555 dstopt += sizeof (uint_t); 4556 udi_size -= toh->len; 4557 } 4558 4559 if (cr != NULL) { 4560 struct T_opthdr *toh; 4561 4562 toh = (struct T_opthdr *)dstopt; 4563 toh->level = SOL_SOCKET; 4564 toh->name = SCM_UCRED; 4565 toh->len = sizeof (struct T_opthdr) + ucredsize; 4566 toh->status = 0; 4567 dstopt += sizeof (struct T_opthdr); 4568 (void) cred2ucred(cr, cpid, dstopt, rcr); 4569 dstopt += ucredsize; 4570 udi_size -= toh->len; 4571 } 4572 4573 if (udp_bits.udpb_timestamp) { 4574 struct T_opthdr *toh; 4575 4576 toh = (struct T_opthdr *)dstopt; 4577 toh->level = SOL_SOCKET; 4578 toh->name = SCM_TIMESTAMP; 4579 toh->len = sizeof (struct T_opthdr) + 4580 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4581 toh->status = 0; 4582 dstopt += sizeof (struct T_opthdr); 4583 /* Align for gethrestime() */ 4584 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4585 sizeof (intptr_t)); 4586 gethrestime((timestruc_t *)dstopt); 4587 dstopt = (char *)toh + toh->len; 4588 udi_size -= toh->len; 4589 } 4590 4591 /* 4592 * CAUTION: 4593 * Due to aligment issues 4594 * Processing of IP_RECVTTL option 4595 * should always be the last. Adding 4596 * any option processing after this will 4597 * cause alignment panic. 4598 */ 4599 if (udp_bits.udpb_recvttl) { 4600 struct T_opthdr *toh; 4601 uint8_t *dstptr; 4602 4603 toh = (struct T_opthdr *)dstopt; 4604 toh->level = IPPROTO_IP; 4605 toh->name = IP_RECVTTL; 4606 toh->len = sizeof (struct T_opthdr) + 4607 sizeof (uint8_t); 4608 toh->status = 0; 4609 dstopt += sizeof (struct T_opthdr); 4610 dstptr = (uint8_t *)dstopt; 4611 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4612 dstopt += sizeof (uint8_t); 4613 udi_size -= toh->len; 4614 } 4615 4616 /* Consumed all of allocated space */ 4617 ASSERT(udi_size == 0); 4618 } 4619 } else { 4620 sin6_t *sin6; 4621 4622 /* 4623 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4624 * 4625 * Normally we only send up the address. If receiving of any 4626 * optional receive side information is enabled, we also send 4627 * that up as options. 4628 */ 4629 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4630 4631 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4632 IPPF_RTHDR|IPPF_IFINDEX)) { 4633 if ((udp_bits.udpb_ipv6_recvhopopts) && 4634 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4635 size_t hlen; 4636 4637 UDP_STAT(us, udp_in_recvhopopts); 4638 hlen = copy_hop_opts(&ipp, NULL); 4639 if (hlen == 0) 4640 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4641 udi_size += hlen; 4642 } 4643 if (((udp_bits.udpb_ipv6_recvdstopts) || 4644 udp_bits.udpb_old_ipv6_recvdstopts) && 4645 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4646 udi_size += sizeof (struct T_opthdr) + 4647 ipp.ipp_dstoptslen; 4648 UDP_STAT(us, udp_in_recvdstopts); 4649 } 4650 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4651 udp_bits.udpb_ipv6_recvrthdr && 4652 (ipp.ipp_fields & IPPF_RTHDR)) || 4653 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4654 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4655 udi_size += sizeof (struct T_opthdr) + 4656 ipp.ipp_rtdstoptslen; 4657 UDP_STAT(us, udp_in_recvrtdstopts); 4658 } 4659 if ((udp_bits.udpb_ipv6_recvrthdr) && 4660 (ipp.ipp_fields & IPPF_RTHDR)) { 4661 udi_size += sizeof (struct T_opthdr) + 4662 ipp.ipp_rthdrlen; 4663 UDP_STAT(us, udp_in_recvrthdr); 4664 } 4665 if ((udp_bits.udpb_ip_recvpktinfo) && 4666 (ipp.ipp_fields & IPPF_IFINDEX)) { 4667 udi_size += sizeof (struct T_opthdr) + 4668 sizeof (struct in6_pktinfo); 4669 UDP_STAT(us, udp_in_recvpktinfo); 4670 } 4671 4672 } 4673 if ((udp_bits.udpb_recvucred) && 4674 (cr = DB_CRED(mp)) != NULL) { 4675 udi_size += sizeof (struct T_opthdr) + ucredsize; 4676 cpid = DB_CPID(mp); 4677 UDP_STAT(us, udp_in_recvucred); 4678 } 4679 4680 /* 4681 * If SO_TIMESTAMP is set allocate the appropriate sized 4682 * buffer. Since gethrestime() expects a pointer aligned 4683 * argument, we allocate space necessary for extra 4684 * alignment (even though it might not be used). 4685 */ 4686 if (udp_bits.udpb_timestamp) { 4687 udi_size += sizeof (struct T_opthdr) + 4688 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4689 UDP_STAT(us, udp_in_timestamp); 4690 } 4691 4692 if (udp_bits.udpb_ipv6_recvhoplimit) { 4693 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4694 UDP_STAT(us, udp_in_recvhoplimit); 4695 } 4696 4697 if (udp_bits.udpb_ipv6_recvtclass) { 4698 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4699 UDP_STAT(us, udp_in_recvtclass); 4700 } 4701 4702 mp1 = allocb(udi_size, BPRI_MED); 4703 if (mp1 == NULL) { 4704 freemsg(mp); 4705 if (options_mp != NULL) 4706 freeb(options_mp); 4707 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4708 return; 4709 } 4710 mp1->b_cont = mp; 4711 mp = mp1; 4712 mp->b_datap->db_type = M_PROTO; 4713 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4714 mp->b_wptr = (uchar_t *)tudi + udi_size; 4715 tudi->PRIM_type = T_UNITDATA_IND; 4716 tudi->SRC_length = sizeof (sin6_t); 4717 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4718 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4719 sizeof (sin6_t); 4720 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4721 tudi->OPT_length = udi_size; 4722 sin6 = (sin6_t *)&tudi[1]; 4723 if (ipversion == IPV4_VERSION) { 4724 in6_addr_t v6dst; 4725 4726 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4727 &sin6->sin6_addr); 4728 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4729 &v6dst); 4730 sin6->sin6_flowinfo = 0; 4731 sin6->sin6_scope_id = 0; 4732 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4733 connp->conn_zoneid, us->us_netstack); 4734 } else { 4735 sin6->sin6_addr = ip6h->ip6_src; 4736 /* No sin6_flowinfo per API */ 4737 sin6->sin6_flowinfo = 0; 4738 /* For link-scope source pass up scope id */ 4739 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4740 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4741 sin6->sin6_scope_id = ipp.ipp_ifindex; 4742 else 4743 sin6->sin6_scope_id = 0; 4744 sin6->__sin6_src_id = ip_srcid_find_addr( 4745 &ip6h->ip6_dst, connp->conn_zoneid, 4746 us->us_netstack); 4747 } 4748 sin6->sin6_port = udpha->uha_src_port; 4749 sin6->sin6_family = udp->udp_family; 4750 4751 if (udi_size != 0) { 4752 uchar_t *dstopt; 4753 4754 dstopt = (uchar_t *)&sin6[1]; 4755 if ((udp_bits.udpb_ip_recvpktinfo) && 4756 (ipp.ipp_fields & IPPF_IFINDEX)) { 4757 struct T_opthdr *toh; 4758 struct in6_pktinfo *pkti; 4759 4760 toh = (struct T_opthdr *)dstopt; 4761 toh->level = IPPROTO_IPV6; 4762 toh->name = IPV6_PKTINFO; 4763 toh->len = sizeof (struct T_opthdr) + 4764 sizeof (*pkti); 4765 toh->status = 0; 4766 dstopt += sizeof (struct T_opthdr); 4767 pkti = (struct in6_pktinfo *)dstopt; 4768 if (ipversion == IPV6_VERSION) 4769 pkti->ipi6_addr = ip6h->ip6_dst; 4770 else 4771 IN6_IPADDR_TO_V4MAPPED( 4772 ((ipha_t *)rptr)->ipha_dst, 4773 &pkti->ipi6_addr); 4774 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4775 dstopt += sizeof (*pkti); 4776 udi_size -= toh->len; 4777 } 4778 if (udp_bits.udpb_ipv6_recvhoplimit) { 4779 struct T_opthdr *toh; 4780 4781 toh = (struct T_opthdr *)dstopt; 4782 toh->level = IPPROTO_IPV6; 4783 toh->name = IPV6_HOPLIMIT; 4784 toh->len = sizeof (struct T_opthdr) + 4785 sizeof (uint_t); 4786 toh->status = 0; 4787 dstopt += sizeof (struct T_opthdr); 4788 if (ipversion == IPV6_VERSION) 4789 *(uint_t *)dstopt = ip6h->ip6_hops; 4790 else 4791 *(uint_t *)dstopt = 4792 ((ipha_t *)rptr)->ipha_ttl; 4793 dstopt += sizeof (uint_t); 4794 udi_size -= toh->len; 4795 } 4796 if (udp_bits.udpb_ipv6_recvtclass) { 4797 struct T_opthdr *toh; 4798 4799 toh = (struct T_opthdr *)dstopt; 4800 toh->level = IPPROTO_IPV6; 4801 toh->name = IPV6_TCLASS; 4802 toh->len = sizeof (struct T_opthdr) + 4803 sizeof (uint_t); 4804 toh->status = 0; 4805 dstopt += sizeof (struct T_opthdr); 4806 if (ipversion == IPV6_VERSION) { 4807 *(uint_t *)dstopt = 4808 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4809 } else { 4810 ipha_t *ipha = (ipha_t *)rptr; 4811 *(uint_t *)dstopt = 4812 ipha->ipha_type_of_service; 4813 } 4814 dstopt += sizeof (uint_t); 4815 udi_size -= toh->len; 4816 } 4817 if ((udp_bits.udpb_ipv6_recvhopopts) && 4818 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4819 size_t hlen; 4820 4821 hlen = copy_hop_opts(&ipp, dstopt); 4822 dstopt += hlen; 4823 udi_size -= hlen; 4824 } 4825 if ((udp_bits.udpb_ipv6_recvdstopts) && 4826 (udp_bits.udpb_ipv6_recvrthdr) && 4827 (ipp.ipp_fields & IPPF_RTHDR) && 4828 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4829 struct T_opthdr *toh; 4830 4831 toh = (struct T_opthdr *)dstopt; 4832 toh->level = IPPROTO_IPV6; 4833 toh->name = IPV6_DSTOPTS; 4834 toh->len = sizeof (struct T_opthdr) + 4835 ipp.ipp_rtdstoptslen; 4836 toh->status = 0; 4837 dstopt += sizeof (struct T_opthdr); 4838 bcopy(ipp.ipp_rtdstopts, dstopt, 4839 ipp.ipp_rtdstoptslen); 4840 dstopt += ipp.ipp_rtdstoptslen; 4841 udi_size -= toh->len; 4842 } 4843 if ((udp_bits.udpb_ipv6_recvrthdr) && 4844 (ipp.ipp_fields & IPPF_RTHDR)) { 4845 struct T_opthdr *toh; 4846 4847 toh = (struct T_opthdr *)dstopt; 4848 toh->level = IPPROTO_IPV6; 4849 toh->name = IPV6_RTHDR; 4850 toh->len = sizeof (struct T_opthdr) + 4851 ipp.ipp_rthdrlen; 4852 toh->status = 0; 4853 dstopt += sizeof (struct T_opthdr); 4854 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4855 dstopt += ipp.ipp_rthdrlen; 4856 udi_size -= toh->len; 4857 } 4858 if ((udp_bits.udpb_ipv6_recvdstopts) && 4859 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4860 struct T_opthdr *toh; 4861 4862 toh = (struct T_opthdr *)dstopt; 4863 toh->level = IPPROTO_IPV6; 4864 toh->name = IPV6_DSTOPTS; 4865 toh->len = sizeof (struct T_opthdr) + 4866 ipp.ipp_dstoptslen; 4867 toh->status = 0; 4868 dstopt += sizeof (struct T_opthdr); 4869 bcopy(ipp.ipp_dstopts, dstopt, 4870 ipp.ipp_dstoptslen); 4871 dstopt += ipp.ipp_dstoptslen; 4872 udi_size -= toh->len; 4873 } 4874 4875 if (cr != NULL) { 4876 struct T_opthdr *toh; 4877 4878 toh = (struct T_opthdr *)dstopt; 4879 toh->level = SOL_SOCKET; 4880 toh->name = SCM_UCRED; 4881 toh->len = sizeof (struct T_opthdr) + ucredsize; 4882 toh->status = 0; 4883 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4884 dstopt += toh->len; 4885 udi_size -= toh->len; 4886 } 4887 if (udp_bits.udpb_timestamp) { 4888 struct T_opthdr *toh; 4889 4890 toh = (struct T_opthdr *)dstopt; 4891 toh->level = SOL_SOCKET; 4892 toh->name = SCM_TIMESTAMP; 4893 toh->len = sizeof (struct T_opthdr) + 4894 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4895 toh->status = 0; 4896 dstopt += sizeof (struct T_opthdr); 4897 /* Align for gethrestime() */ 4898 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4899 sizeof (intptr_t)); 4900 gethrestime((timestruc_t *)dstopt); 4901 dstopt = (uchar_t *)toh + toh->len; 4902 udi_size -= toh->len; 4903 } 4904 4905 /* Consumed all of allocated space */ 4906 ASSERT(udi_size == 0); 4907 } 4908 #undef sin6 4909 /* No IP_RECVDSTADDR for IPv6. */ 4910 } 4911 4912 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4913 if (options_mp != NULL) 4914 freeb(options_mp); 4915 4916 if (udp_bits.udpb_direct_sockfs) { 4917 /* 4918 * There is nothing above us except for the stream head; 4919 * use the read-side synchronous stream interface in 4920 * order to reduce the time spent in interrupt thread. 4921 */ 4922 ASSERT(udp->udp_issocket); 4923 udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); 4924 } else { 4925 /* 4926 * Use regular STREAMS interface to pass data upstream 4927 * if this is not a socket endpoint, or if we have 4928 * switched over to the slow mode due to sockmod being 4929 * popped or a module being pushed on top of us. 4930 */ 4931 putnext(connp->conn_rq, mp); 4932 } 4933 return; 4934 4935 tossit: 4936 freemsg(mp); 4937 if (options_mp != NULL) 4938 freeb(options_mp); 4939 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4940 } 4941 4942 /* 4943 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 4944 * immediately. 4945 */ 4946 static void 4947 udp_bind_result(conn_t *connp, mblk_t *mp) 4948 { 4949 struct T_error_ack *tea; 4950 4951 switch (mp->b_datap->db_type) { 4952 case M_PROTO: 4953 case M_PCPROTO: 4954 /* M_PROTO messages contain some type of TPI message. */ 4955 ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= 4956 (uintptr_t)INT_MAX); 4957 if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { 4958 freemsg(mp); 4959 return; 4960 } 4961 tea = (struct T_error_ack *)mp->b_rptr; 4962 4963 switch (tea->PRIM_type) { 4964 case T_ERROR_ACK: 4965 switch (tea->ERROR_prim) { 4966 case O_T_BIND_REQ: 4967 case T_BIND_REQ: 4968 udp_bind_error(connp, mp); 4969 return; 4970 default: 4971 break; 4972 } 4973 ASSERT(0); 4974 freemsg(mp); 4975 return; 4976 4977 case T_BIND_ACK: 4978 udp_bind_ack(connp, mp); 4979 return; 4980 4981 default: 4982 break; 4983 } 4984 freemsg(mp); 4985 return; 4986 default: 4987 /* FIXME: other cases? */ 4988 ASSERT(0); 4989 freemsg(mp); 4990 return; 4991 } 4992 } 4993 4994 /* 4995 * Process a T_BIND_ACK 4996 */ 4997 static void 4998 udp_bind_ack(conn_t *connp, mblk_t *mp) 4999 { 5000 udp_t *udp = connp->conn_udp; 5001 mblk_t *mp1; 5002 ire_t *ire; 5003 struct T_bind_ack *tba; 5004 uchar_t *addrp; 5005 ipa_conn_t *ac; 5006 ipa6_conn_t *ac6; 5007 udp_fanout_t *udpf; 5008 udp_stack_t *us = udp->udp_us; 5009 5010 ASSERT(udp->udp_pending_op != -1); 5011 rw_enter(&udp->udp_rwlock, RW_WRITER); 5012 /* 5013 * If a broadcast/multicast address was bound set 5014 * the source address to 0. 5015 * This ensures no datagrams with broadcast address 5016 * as source address are emitted (which would violate 5017 * RFC1122 - Hosts requirements) 5018 * 5019 * Note that when connecting the returned IRE is 5020 * for the destination address and we only perform 5021 * the broadcast check for the source address (it 5022 * is OK to connect to a broadcast/multicast address.) 5023 */ 5024 mp1 = mp->b_cont; 5025 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5026 ire = (ire_t *)mp1->b_rptr; 5027 5028 /* 5029 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5030 * local address. 5031 */ 5032 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5033 us->us_bind_fanout_size)]; 5034 if (ire->ire_type == IRE_BROADCAST && 5035 udp->udp_state != TS_DATA_XFER) { 5036 ASSERT(udp->udp_pending_op == T_BIND_REQ || 5037 udp->udp_pending_op == O_T_BIND_REQ); 5038 /* This was just a local bind to a broadcast addr */ 5039 mutex_enter(&udpf->uf_lock); 5040 V6_SET_ZERO(udp->udp_v6src); 5041 mutex_exit(&udpf->uf_lock); 5042 if (udp->udp_family == AF_INET6) 5043 (void) udp_build_hdrs(udp); 5044 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5045 /* 5046 * Local address not yet set - pick it from the 5047 * T_bind_ack 5048 */ 5049 tba = (struct T_bind_ack *)mp->b_rptr; 5050 addrp = &mp->b_rptr[tba->ADDR_offset]; 5051 switch (udp->udp_family) { 5052 case AF_INET: 5053 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5054 ac = (ipa_conn_t *)addrp; 5055 } else { 5056 ASSERT(tba->ADDR_length == 5057 sizeof (ipa_conn_x_t)); 5058 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5059 } 5060 mutex_enter(&udpf->uf_lock); 5061 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5062 &udp->udp_v6src); 5063 mutex_exit(&udpf->uf_lock); 5064 break; 5065 case AF_INET6: 5066 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5067 ac6 = (ipa6_conn_t *)addrp; 5068 } else { 5069 ASSERT(tba->ADDR_length == 5070 sizeof (ipa6_conn_x_t)); 5071 ac6 = &((ipa6_conn_x_t *) 5072 addrp)->ac6x_conn; 5073 } 5074 mutex_enter(&udpf->uf_lock); 5075 udp->udp_v6src = ac6->ac6_laddr; 5076 mutex_exit(&udpf->uf_lock); 5077 (void) udp_build_hdrs(udp); 5078 break; 5079 } 5080 } 5081 mp1 = mp1->b_cont; 5082 } 5083 udp->udp_pending_op = -1; 5084 rw_exit(&udp->udp_rwlock); 5085 /* 5086 * Look for one or more appended ACK message added by 5087 * udp_connect or udp_disconnect. 5088 * If none found just send up the T_BIND_ACK. 5089 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5090 * udp_disconnect has appended a T_OK_ACK. 5091 */ 5092 if (mp1 != NULL) { 5093 if (mp->b_cont == mp1) 5094 mp->b_cont = NULL; 5095 else { 5096 ASSERT(mp->b_cont->b_cont == mp1); 5097 mp->b_cont->b_cont = NULL; 5098 } 5099 freemsg(mp); 5100 mp = mp1; 5101 while (mp != NULL) { 5102 mp1 = mp->b_cont; 5103 mp->b_cont = NULL; 5104 putnext(connp->conn_rq, mp); 5105 mp = mp1; 5106 } 5107 return; 5108 } 5109 freemsg(mp->b_cont); 5110 mp->b_cont = NULL; 5111 putnext(connp->conn_rq, mp); 5112 } 5113 5114 static void 5115 udp_bind_error(conn_t *connp, mblk_t *mp) 5116 { 5117 udp_t *udp = connp->conn_udp; 5118 struct T_error_ack *tea; 5119 udp_fanout_t *udpf; 5120 udp_stack_t *us = udp->udp_us; 5121 5122 tea = (struct T_error_ack *)mp->b_rptr; 5123 5124 /* 5125 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5126 * clear out the associated port and source 5127 * address before passing the message 5128 * upstream. If this was caused by a T_CONN_REQ 5129 * revert back to bound state. 5130 */ 5131 5132 rw_enter(&udp->udp_rwlock, RW_WRITER); 5133 ASSERT(udp->udp_pending_op != -1); 5134 tea->ERROR_prim = udp->udp_pending_op; 5135 udp->udp_pending_op = -1; 5136 udpf = &us->us_bind_fanout[ 5137 UDP_BIND_HASH(udp->udp_port, 5138 us->us_bind_fanout_size)]; 5139 mutex_enter(&udpf->uf_lock); 5140 5141 switch (tea->ERROR_prim) { 5142 case T_CONN_REQ: 5143 ASSERT(udp->udp_state == TS_DATA_XFER); 5144 /* Connect failed */ 5145 /* Revert back to the bound source */ 5146 udp->udp_v6src = udp->udp_bound_v6src; 5147 udp->udp_state = TS_IDLE; 5148 mutex_exit(&udpf->uf_lock); 5149 if (udp->udp_family == AF_INET6) 5150 (void) udp_build_hdrs(udp); 5151 rw_exit(&udp->udp_rwlock); 5152 break; 5153 5154 case T_DISCON_REQ: 5155 case T_BIND_REQ: 5156 case O_T_BIND_REQ: 5157 V6_SET_ZERO(udp->udp_v6src); 5158 V6_SET_ZERO(udp->udp_bound_v6src); 5159 udp->udp_state = TS_UNBND; 5160 udp_bind_hash_remove(udp, B_TRUE); 5161 udp->udp_port = 0; 5162 mutex_exit(&udpf->uf_lock); 5163 if (udp->udp_family == AF_INET6) 5164 (void) udp_build_hdrs(udp); 5165 rw_exit(&udp->udp_rwlock); 5166 break; 5167 5168 default: 5169 mutex_exit(&udpf->uf_lock); 5170 rw_exit(&udp->udp_rwlock); 5171 (void) mi_strlog(connp->conn_rq, 1, 5172 SL_ERROR|SL_TRACE, 5173 "udp_input_other: bad ERROR_prim, " 5174 "len %d", tea->ERROR_prim); 5175 } 5176 putnext(connp->conn_rq, mp); 5177 } 5178 5179 /* 5180 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 5181 * information that can be changing beneath us. 5182 */ 5183 mblk_t * 5184 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5185 { 5186 mblk_t *mpdata; 5187 mblk_t *mp_conn_ctl; 5188 mblk_t *mp_attr_ctl; 5189 mblk_t *mp6_conn_ctl; 5190 mblk_t *mp6_attr_ctl; 5191 mblk_t *mp_conn_tail; 5192 mblk_t *mp_attr_tail; 5193 mblk_t *mp6_conn_tail; 5194 mblk_t *mp6_attr_tail; 5195 struct opthdr *optp; 5196 mib2_udpEntry_t ude; 5197 mib2_udp6Entry_t ude6; 5198 mib2_transportMLPEntry_t mlp; 5199 int state; 5200 zoneid_t zoneid; 5201 int i; 5202 connf_t *connfp; 5203 conn_t *connp = Q_TO_CONN(q); 5204 int v4_conn_idx; 5205 int v6_conn_idx; 5206 boolean_t needattr; 5207 udp_t *udp; 5208 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5209 udp_stack_t *us = connp->conn_netstack->netstack_udp; 5210 mblk_t *mp2ctl; 5211 5212 /* 5213 * make a copy of the original message 5214 */ 5215 mp2ctl = copymsg(mpctl); 5216 5217 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5218 if (mpctl == NULL || 5219 (mpdata = mpctl->b_cont) == NULL || 5220 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5221 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5222 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5223 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5224 freemsg(mp_conn_ctl); 5225 freemsg(mp_attr_ctl); 5226 freemsg(mp6_conn_ctl); 5227 freemsg(mpctl); 5228 freemsg(mp2ctl); 5229 return (0); 5230 } 5231 5232 zoneid = connp->conn_zoneid; 5233 5234 /* fixed length structure for IPv4 and IPv6 counters */ 5235 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5236 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5237 /* synchronize 64- and 32-bit counters */ 5238 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 5239 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5240 5241 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5242 optp->level = MIB2_UDP; 5243 optp->name = 0; 5244 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 5245 sizeof (us->us_udp_mib)); 5246 optp->len = msgdsize(mpdata); 5247 qreply(q, mpctl); 5248 5249 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5250 v4_conn_idx = v6_conn_idx = 0; 5251 5252 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5253 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5254 connp = NULL; 5255 5256 while ((connp = ipcl_get_next_conn(connfp, connp, 5257 IPCL_UDPCONN))) { 5258 udp = connp->conn_udp; 5259 if (zoneid != connp->conn_zoneid) 5260 continue; 5261 5262 /* 5263 * Note that the port numbers are sent in 5264 * host byte order 5265 */ 5266 5267 if (udp->udp_state == TS_UNBND) 5268 state = MIB2_UDP_unbound; 5269 else if (udp->udp_state == TS_IDLE) 5270 state = MIB2_UDP_idle; 5271 else if (udp->udp_state == TS_DATA_XFER) 5272 state = MIB2_UDP_connected; 5273 else 5274 state = MIB2_UDP_unknown; 5275 5276 needattr = B_FALSE; 5277 bzero(&mlp, sizeof (mlp)); 5278 if (connp->conn_mlp_type != mlptSingle) { 5279 if (connp->conn_mlp_type == mlptShared || 5280 connp->conn_mlp_type == mlptBoth) 5281 mlp.tme_flags |= MIB2_TMEF_SHARED; 5282 if (connp->conn_mlp_type == mlptPrivate || 5283 connp->conn_mlp_type == mlptBoth) 5284 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5285 needattr = B_TRUE; 5286 } 5287 5288 /* 5289 * Create an IPv4 table entry for IPv4 entries and also 5290 * any IPv6 entries which are bound to in6addr_any 5291 * (i.e. anything a IPv4 peer could connect/send to). 5292 */ 5293 if (udp->udp_ipversion == IPV4_VERSION || 5294 (udp->udp_state <= TS_IDLE && 5295 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5296 ude.udpEntryInfo.ue_state = state; 5297 /* 5298 * If in6addr_any this will set it to 5299 * INADDR_ANY 5300 */ 5301 ude.udpLocalAddress = 5302 V4_PART_OF_V6(udp->udp_v6src); 5303 ude.udpLocalPort = ntohs(udp->udp_port); 5304 if (udp->udp_state == TS_DATA_XFER) { 5305 /* 5306 * Can potentially get here for 5307 * v6 socket if another process 5308 * (say, ping) has just done a 5309 * sendto(), changing the state 5310 * from the TS_IDLE above to 5311 * TS_DATA_XFER by the time we hit 5312 * this part of the code. 5313 */ 5314 ude.udpEntryInfo.ue_RemoteAddress = 5315 V4_PART_OF_V6(udp->udp_v6dst); 5316 ude.udpEntryInfo.ue_RemotePort = 5317 ntohs(udp->udp_dstport); 5318 } else { 5319 ude.udpEntryInfo.ue_RemoteAddress = 0; 5320 ude.udpEntryInfo.ue_RemotePort = 0; 5321 } 5322 5323 /* 5324 * We make the assumption that all udp_t 5325 * structs will be created within an address 5326 * region no larger than 32-bits. 5327 */ 5328 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5329 ude.udpCreationProcess = 5330 (udp->udp_open_pid < 0) ? 5331 MIB2_UNKNOWN_PROCESS : 5332 udp->udp_open_pid; 5333 ude.udpCreationTime = udp->udp_open_time; 5334 5335 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5336 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5337 mlp.tme_connidx = v4_conn_idx++; 5338 if (needattr) 5339 (void) snmp_append_data2( 5340 mp_attr_ctl->b_cont, &mp_attr_tail, 5341 (char *)&mlp, sizeof (mlp)); 5342 } 5343 if (udp->udp_ipversion == IPV6_VERSION) { 5344 ude6.udp6EntryInfo.ue_state = state; 5345 ude6.udp6LocalAddress = udp->udp_v6src; 5346 ude6.udp6LocalPort = ntohs(udp->udp_port); 5347 ude6.udp6IfIndex = udp->udp_bound_if; 5348 if (udp->udp_state == TS_DATA_XFER) { 5349 ude6.udp6EntryInfo.ue_RemoteAddress = 5350 udp->udp_v6dst; 5351 ude6.udp6EntryInfo.ue_RemotePort = 5352 ntohs(udp->udp_dstport); 5353 } else { 5354 ude6.udp6EntryInfo.ue_RemoteAddress = 5355 sin6_null.sin6_addr; 5356 ude6.udp6EntryInfo.ue_RemotePort = 0; 5357 } 5358 /* 5359 * We make the assumption that all udp_t 5360 * structs will be created within an address 5361 * region no larger than 32-bits. 5362 */ 5363 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5364 ude6.udp6CreationProcess = 5365 (udp->udp_open_pid < 0) ? 5366 MIB2_UNKNOWN_PROCESS : 5367 udp->udp_open_pid; 5368 ude6.udp6CreationTime = udp->udp_open_time; 5369 5370 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5371 &mp6_conn_tail, (char *)&ude6, 5372 sizeof (ude6)); 5373 mlp.tme_connidx = v6_conn_idx++; 5374 if (needattr) 5375 (void) snmp_append_data2( 5376 mp6_attr_ctl->b_cont, 5377 &mp6_attr_tail, (char *)&mlp, 5378 sizeof (mlp)); 5379 } 5380 } 5381 } 5382 5383 /* IPv4 UDP endpoints */ 5384 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5385 sizeof (struct T_optmgmt_ack)]; 5386 optp->level = MIB2_UDP; 5387 optp->name = MIB2_UDP_ENTRY; 5388 optp->len = msgdsize(mp_conn_ctl->b_cont); 5389 qreply(q, mp_conn_ctl); 5390 5391 /* table of MLP attributes... */ 5392 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5393 sizeof (struct T_optmgmt_ack)]; 5394 optp->level = MIB2_UDP; 5395 optp->name = EXPER_XPORT_MLP; 5396 optp->len = msgdsize(mp_attr_ctl->b_cont); 5397 if (optp->len == 0) 5398 freemsg(mp_attr_ctl); 5399 else 5400 qreply(q, mp_attr_ctl); 5401 5402 /* IPv6 UDP endpoints */ 5403 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5404 sizeof (struct T_optmgmt_ack)]; 5405 optp->level = MIB2_UDP6; 5406 optp->name = MIB2_UDP6_ENTRY; 5407 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5408 qreply(q, mp6_conn_ctl); 5409 5410 /* table of MLP attributes... */ 5411 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5412 sizeof (struct T_optmgmt_ack)]; 5413 optp->level = MIB2_UDP6; 5414 optp->name = EXPER_XPORT_MLP; 5415 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5416 if (optp->len == 0) 5417 freemsg(mp6_attr_ctl); 5418 else 5419 qreply(q, mp6_attr_ctl); 5420 5421 return (mp2ctl); 5422 } 5423 5424 /* 5425 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5426 * NOTE: Per MIB-II, UDP has no writable data. 5427 * TODO: If this ever actually tries to set anything, it needs to be 5428 * to do the appropriate locking. 5429 */ 5430 /* ARGSUSED */ 5431 int 5432 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5433 uchar_t *ptr, int len) 5434 { 5435 switch (level) { 5436 case MIB2_UDP: 5437 return (0); 5438 default: 5439 return (1); 5440 } 5441 } 5442 5443 static void 5444 udp_report_item(mblk_t *mp, udp_t *udp) 5445 { 5446 char *state; 5447 char addrbuf1[INET6_ADDRSTRLEN]; 5448 char addrbuf2[INET6_ADDRSTRLEN]; 5449 uint_t print_len, buf_len; 5450 5451 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5452 ASSERT(buf_len >= 0); 5453 if (buf_len == 0) 5454 return; 5455 5456 if (udp->udp_state == TS_UNBND) 5457 state = "UNBOUND"; 5458 else if (udp->udp_state == TS_IDLE) 5459 state = "IDLE"; 5460 else if (udp->udp_state == TS_DATA_XFER) 5461 state = "CONNECTED"; 5462 else 5463 state = "UnkState"; 5464 print_len = snprintf((char *)mp->b_wptr, buf_len, 5465 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5466 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5467 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 5468 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 5469 ntohs(udp->udp_dstport), state); 5470 if (print_len < buf_len) { 5471 mp->b_wptr += print_len; 5472 } else { 5473 mp->b_wptr += buf_len; 5474 } 5475 } 5476 5477 /* Report for ndd "udp_status" */ 5478 /* ARGSUSED */ 5479 static int 5480 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5481 { 5482 zoneid_t zoneid; 5483 connf_t *connfp; 5484 conn_t *connp = Q_TO_CONN(q); 5485 udp_t *udp = connp->conn_udp; 5486 int i; 5487 udp_stack_t *us = udp->udp_us; 5488 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5489 5490 /* 5491 * Because of the ndd constraint, at most we can have 64K buffer 5492 * to put in all UDP info. So to be more efficient, just 5493 * allocate a 64K buffer here, assuming we need that large buffer. 5494 * This may be a problem as any user can read udp_status. Therefore 5495 * we limit the rate of doing this using us_ndd_get_info_interval. 5496 * This should be OK as normal users should not do this too often. 5497 */ 5498 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 5499 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 5500 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 5501 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5502 return (0); 5503 } 5504 } 5505 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5506 /* The following may work even if we cannot get a large buf. */ 5507 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5508 return (0); 5509 } 5510 (void) mi_mpprintf(mp, 5511 "UDP " MI_COL_HDRPAD_STR 5512 /* 12345678[89ABCDEF] */ 5513 " zone lport src addr dest addr port state"); 5514 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5515 5516 zoneid = connp->conn_zoneid; 5517 5518 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5519 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5520 connp = NULL; 5521 5522 while ((connp = ipcl_get_next_conn(connfp, connp, 5523 IPCL_UDPCONN))) { 5524 udp = connp->conn_udp; 5525 if (zoneid != GLOBAL_ZONEID && 5526 zoneid != connp->conn_zoneid) 5527 continue; 5528 5529 udp_report_item(mp->b_cont, udp); 5530 } 5531 } 5532 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 5533 return (0); 5534 } 5535 5536 /* 5537 * This routine creates a T_UDERROR_IND message and passes it upstream. 5538 * The address and options are copied from the T_UNITDATA_REQ message 5539 * passed in mp. This message is freed. 5540 */ 5541 static void 5542 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5543 t_scalar_t err) 5544 { 5545 struct T_unitdata_req *tudr; 5546 mblk_t *mp1; 5547 uchar_t *optaddr; 5548 t_scalar_t optlen; 5549 5550 if (DB_TYPE(mp) == M_DATA) { 5551 ASSERT(destaddr != NULL && destlen != 0); 5552 optaddr = NULL; 5553 optlen = 0; 5554 } else { 5555 if ((mp->b_wptr < mp->b_rptr) || 5556 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5557 goto done; 5558 } 5559 tudr = (struct T_unitdata_req *)mp->b_rptr; 5560 destaddr = mp->b_rptr + tudr->DEST_offset; 5561 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5562 destaddr + tudr->DEST_length < mp->b_rptr || 5563 destaddr + tudr->DEST_length > mp->b_wptr) { 5564 goto done; 5565 } 5566 optaddr = mp->b_rptr + tudr->OPT_offset; 5567 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5568 optaddr + tudr->OPT_length < mp->b_rptr || 5569 optaddr + tudr->OPT_length > mp->b_wptr) { 5570 goto done; 5571 } 5572 destlen = tudr->DEST_length; 5573 optlen = tudr->OPT_length; 5574 } 5575 5576 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5577 (char *)optaddr, optlen, err); 5578 if (mp1 != NULL) 5579 qreply(q, mp1); 5580 5581 done: 5582 freemsg(mp); 5583 } 5584 5585 /* 5586 * This routine removes a port number association from a stream. It 5587 * is called by udp_wput to handle T_UNBIND_REQ messages. 5588 */ 5589 static void 5590 udp_unbind(queue_t *q, mblk_t *mp) 5591 { 5592 udp_t *udp = Q_TO_UDP(q); 5593 udp_fanout_t *udpf; 5594 udp_stack_t *us = udp->udp_us; 5595 5596 if (cl_inet_unbind != NULL) { 5597 /* 5598 * Running in cluster mode - register unbind information 5599 */ 5600 if (udp->udp_ipversion == IPV4_VERSION) { 5601 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5602 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5603 (in_port_t)udp->udp_port); 5604 } else { 5605 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5606 (uint8_t *)&(udp->udp_v6src), 5607 (in_port_t)udp->udp_port); 5608 } 5609 } 5610 5611 rw_enter(&udp->udp_rwlock, RW_WRITER); 5612 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 5613 rw_exit(&udp->udp_rwlock); 5614 udp_err_ack(q, mp, TOUTSTATE, 0); 5615 return; 5616 } 5617 udp->udp_pending_op = T_UNBIND_REQ; 5618 rw_exit(&udp->udp_rwlock); 5619 5620 /* 5621 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5622 * and therefore ip_unbind must never return NULL. 5623 */ 5624 mp = ip_unbind(q, mp); 5625 ASSERT(mp != NULL); 5626 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 5627 5628 /* 5629 * Once we're unbound from IP, the pending operation may be cleared 5630 * here. 5631 */ 5632 rw_enter(&udp->udp_rwlock, RW_WRITER); 5633 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5634 us->us_bind_fanout_size)]; 5635 mutex_enter(&udpf->uf_lock); 5636 udp_bind_hash_remove(udp, B_TRUE); 5637 V6_SET_ZERO(udp->udp_v6src); 5638 V6_SET_ZERO(udp->udp_bound_v6src); 5639 udp->udp_port = 0; 5640 mutex_exit(&udpf->uf_lock); 5641 5642 udp->udp_pending_op = -1; 5643 udp->udp_state = TS_UNBND; 5644 if (udp->udp_family == AF_INET6) 5645 (void) udp_build_hdrs(udp); 5646 rw_exit(&udp->udp_rwlock); 5647 5648 qreply(q, mp); 5649 } 5650 5651 /* 5652 * Don't let port fall into the privileged range. 5653 * Since the extra privileged ports can be arbitrary we also 5654 * ensure that we exclude those from consideration. 5655 * us->us_epriv_ports is not sorted thus we loop over it until 5656 * there are no changes. 5657 */ 5658 static in_port_t 5659 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 5660 { 5661 int i; 5662 in_port_t nextport; 5663 boolean_t restart = B_FALSE; 5664 udp_stack_t *us = udp->udp_us; 5665 5666 if (random && udp_random_anon_port != 0) { 5667 (void) random_get_pseudo_bytes((uint8_t *)&port, 5668 sizeof (in_port_t)); 5669 /* 5670 * Unless changed by a sys admin, the smallest anon port 5671 * is 32768 and the largest anon port is 65535. It is 5672 * very likely (50%) for the random port to be smaller 5673 * than the smallest anon port. When that happens, 5674 * add port % (anon port range) to the smallest anon 5675 * port to get the random port. It should fall into the 5676 * valid anon port range. 5677 */ 5678 if (port < us->us_smallest_anon_port) { 5679 port = us->us_smallest_anon_port + 5680 port % (us->us_largest_anon_port - 5681 us->us_smallest_anon_port); 5682 } 5683 } 5684 5685 retry: 5686 if (port < us->us_smallest_anon_port) 5687 port = us->us_smallest_anon_port; 5688 5689 if (port > us->us_largest_anon_port) { 5690 port = us->us_smallest_anon_port; 5691 if (restart) 5692 return (0); 5693 restart = B_TRUE; 5694 } 5695 5696 if (port < us->us_smallest_nonpriv_port) 5697 port = us->us_smallest_nonpriv_port; 5698 5699 for (i = 0; i < us->us_num_epriv_ports; i++) { 5700 if (port == us->us_epriv_ports[i]) { 5701 port++; 5702 /* 5703 * Make sure that the port is in the 5704 * valid range. 5705 */ 5706 goto retry; 5707 } 5708 } 5709 5710 if (is_system_labeled() && 5711 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5712 port, IPPROTO_UDP, B_TRUE)) != 0) { 5713 port = nextport; 5714 goto retry; 5715 } 5716 5717 return (port); 5718 } 5719 5720 static int 5721 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 5722 { 5723 int err; 5724 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5725 udp_t *udp = Q_TO_UDP(wq); 5726 udp_stack_t *us = udp->udp_us; 5727 5728 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 5729 opt_storage, udp->udp_connp->conn_mac_exempt, 5730 us->us_netstack->netstack_ip); 5731 if (err == 0) { 5732 err = tsol_update_options(&udp->udp_ip_snd_options, 5733 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5734 opt_storage); 5735 } 5736 if (err != 0) { 5737 DTRACE_PROBE4( 5738 tx__ip__log__info__updatelabel__udp, 5739 char *, "queue(1) failed to update options(2) on mp(3)", 5740 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5741 } else { 5742 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 5743 } 5744 return (err); 5745 } 5746 5747 static mblk_t * 5748 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5749 uint_t srcid, int *error, boolean_t insert_spi) 5750 { 5751 udp_t *udp = connp->conn_udp; 5752 queue_t *q = connp->conn_wq; 5753 mblk_t *mp1 = mp; 5754 mblk_t *mp2; 5755 ipha_t *ipha; 5756 int ip_hdr_length; 5757 uint32_t ip_len; 5758 udpha_t *udpha; 5759 boolean_t lock_held = B_FALSE; 5760 in_port_t uha_src_port; 5761 udpattrs_t attrs; 5762 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5763 uint32_t ip_snd_opt_len = 0; 5764 ip4_pkt_t pktinfo; 5765 ip4_pkt_t *pktinfop = &pktinfo; 5766 ip_opt_info_t optinfo; 5767 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5768 udp_stack_t *us = udp->udp_us; 5769 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5770 5771 5772 *error = 0; 5773 pktinfop->ip4_ill_index = 0; 5774 pktinfop->ip4_addr = INADDR_ANY; 5775 optinfo.ip_opt_flags = 0; 5776 optinfo.ip_opt_ill_index = 0; 5777 5778 if (v4dst == INADDR_ANY) 5779 v4dst = htonl(INADDR_LOOPBACK); 5780 5781 /* 5782 * If options passed in, feed it for verification and handling 5783 */ 5784 attrs.udpattr_credset = B_FALSE; 5785 if (DB_TYPE(mp) != M_DATA) { 5786 mp1 = mp->b_cont; 5787 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 5788 attrs.udpattr_ipp4 = pktinfop; 5789 attrs.udpattr_mb = mp; 5790 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 5791 goto done; 5792 /* 5793 * Note: success in processing options. 5794 * mp option buffer represented by 5795 * OPT_length/offset now potentially modified 5796 * and contain option setting results 5797 */ 5798 ASSERT(*error == 0); 5799 } 5800 } 5801 5802 /* mp1 points to the M_DATA mblk carrying the packet */ 5803 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5804 5805 rw_enter(&udp->udp_rwlock, RW_READER); 5806 lock_held = B_TRUE; 5807 /* 5808 * Check if our saved options are valid; update if not. 5809 * TSOL Note: Since we are not in WRITER mode, UDP packets 5810 * to different destination may require different labels, 5811 * or worse, UDP packets to same IP address may require 5812 * different labels due to use of shared all-zones address. 5813 * We use conn_lock to ensure that lastdst, ip_snd_options, 5814 * and ip_snd_options_len are consistent for the current 5815 * destination and are updated atomically. 5816 */ 5817 mutex_enter(&connp->conn_lock); 5818 if (is_system_labeled()) { 5819 /* Using UDP MLP requires SCM_UCRED from user */ 5820 if (connp->conn_mlp_type != mlptSingle && 5821 !attrs.udpattr_credset) { 5822 mutex_exit(&connp->conn_lock); 5823 DTRACE_PROBE4( 5824 tx__ip__log__info__output__udp, 5825 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5826 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5827 *error = ECONNREFUSED; 5828 goto done; 5829 } 5830 /* 5831 * update label option for this UDP socket if 5832 * - the destination has changed, or 5833 * - the UDP socket is MLP 5834 */ 5835 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5836 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5837 connp->conn_mlp_type != mlptSingle) && 5838 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5839 mutex_exit(&connp->conn_lock); 5840 goto done; 5841 } 5842 } 5843 if (udp->udp_ip_snd_options_len > 0) { 5844 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5845 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5846 } 5847 mutex_exit(&connp->conn_lock); 5848 5849 /* Add an IP header */ 5850 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5851 (insert_spi ? sizeof (uint32_t) : 0); 5852 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5853 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5854 !OK_32PTR(ipha)) { 5855 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5856 if (mp2 == NULL) { 5857 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5858 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5859 *error = ENOMEM; 5860 goto done; 5861 } 5862 mp2->b_wptr = DB_LIM(mp2); 5863 mp2->b_cont = mp1; 5864 mp1 = mp2; 5865 if (DB_TYPE(mp) != M_DATA) 5866 mp->b_cont = mp1; 5867 else 5868 mp = mp1; 5869 5870 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5871 } 5872 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5873 #ifdef _BIG_ENDIAN 5874 /* Set version, header length, and tos */ 5875 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5876 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5877 udp->udp_type_of_service); 5878 /* Set ttl and protocol */ 5879 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5880 #else 5881 /* Set version, header length, and tos */ 5882 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5883 ((udp->udp_type_of_service << 8) | 5884 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5885 /* Set ttl and protocol */ 5886 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5887 #endif 5888 if (pktinfop->ip4_addr != INADDR_ANY) { 5889 ipha->ipha_src = pktinfop->ip4_addr; 5890 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5891 } else { 5892 /* 5893 * Copy our address into the packet. If this is zero, 5894 * first look at __sin6_src_id for a hint. If we leave the 5895 * source as INADDR_ANY then ip will fill in the real source 5896 * address. 5897 */ 5898 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5899 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5900 in6_addr_t v6src; 5901 5902 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5903 us->us_netstack); 5904 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5905 } 5906 } 5907 uha_src_port = udp->udp_port; 5908 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5909 rw_exit(&udp->udp_rwlock); 5910 lock_held = B_FALSE; 5911 } 5912 5913 if (pktinfop->ip4_ill_index != 0) { 5914 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5915 } 5916 5917 ipha->ipha_fragment_offset_and_flags = 0; 5918 ipha->ipha_ident = 0; 5919 5920 mp1->b_rptr = (uchar_t *)ipha; 5921 5922 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5923 (uintptr_t)UINT_MAX); 5924 5925 /* Determine length of packet */ 5926 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5927 if ((mp2 = mp1->b_cont) != NULL) { 5928 do { 5929 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5930 ip_len += (uint32_t)MBLKL(mp2); 5931 } while ((mp2 = mp2->b_cont) != NULL); 5932 } 5933 /* 5934 * If the size of the packet is greater than the maximum allowed by 5935 * ip, return an error. Passing this down could cause panics because 5936 * the size will have wrapped and be inconsistent with the msg size. 5937 */ 5938 if (ip_len > IP_MAXPACKET) { 5939 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5940 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5941 *error = EMSGSIZE; 5942 goto done; 5943 } 5944 ipha->ipha_length = htons((uint16_t)ip_len); 5945 ip_len -= ip_hdr_length; 5946 ip_len = htons((uint16_t)ip_len); 5947 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5948 5949 /* Insert all-0s SPI now. */ 5950 if (insert_spi) 5951 *((uint32_t *)(udpha + 1)) = 0; 5952 5953 /* 5954 * Copy in the destination address 5955 */ 5956 ipha->ipha_dst = v4dst; 5957 5958 /* 5959 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5960 */ 5961 if (CLASSD(v4dst)) 5962 ipha->ipha_ttl = udp->udp_multicast_ttl; 5963 5964 udpha->uha_dst_port = port; 5965 udpha->uha_src_port = uha_src_port; 5966 5967 if (ip_snd_opt_len > 0) { 5968 uint32_t cksum; 5969 5970 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5971 lock_held = B_FALSE; 5972 rw_exit(&udp->udp_rwlock); 5973 /* 5974 * Massage source route putting first source route in ipha_dst. 5975 * Ignore the destination in T_unitdata_req. 5976 * Create a checksum adjustment for a source route, if any. 5977 */ 5978 cksum = ip_massage_options(ipha, us->us_netstack); 5979 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5980 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5981 (ipha->ipha_dst & 0xFFFF); 5982 if ((int)cksum < 0) 5983 cksum--; 5984 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5985 /* 5986 * IP does the checksum if uha_checksum is non-zero, 5987 * We make it easy for IP to include our pseudo header 5988 * by putting our length in uha_checksum. 5989 */ 5990 cksum += ip_len; 5991 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5992 /* There might be a carry. */ 5993 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5994 #ifdef _LITTLE_ENDIAN 5995 if (us->us_do_checksum) 5996 ip_len = (cksum << 16) | ip_len; 5997 #else 5998 if (us->us_do_checksum) 5999 ip_len = (ip_len << 16) | cksum; 6000 else 6001 ip_len <<= 16; 6002 #endif 6003 } else { 6004 /* 6005 * IP does the checksum if uha_checksum is non-zero, 6006 * We make it easy for IP to include our pseudo header 6007 * by putting our length in uha_checksum. 6008 */ 6009 if (us->us_do_checksum) 6010 ip_len |= (ip_len << 16); 6011 #ifndef _LITTLE_ENDIAN 6012 else 6013 ip_len <<= 16; 6014 #endif 6015 } 6016 ASSERT(!lock_held); 6017 /* Set UDP length and checksum */ 6018 *((uint32_t *)&udpha->uha_length) = ip_len; 6019 if (DB_CRED(mp) != NULL) 6020 mblk_setcred(mp1, DB_CRED(mp)); 6021 6022 if (DB_TYPE(mp) != M_DATA) { 6023 ASSERT(mp != mp1); 6024 freeb(mp); 6025 } 6026 6027 /* mp has been consumed and we'll return success */ 6028 ASSERT(*error == 0); 6029 mp = NULL; 6030 6031 /* We're done. Pass the packet to ip. */ 6032 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6033 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6034 "udp_wput_end: q %p (%S)", q, "end"); 6035 6036 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6037 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6038 connp->conn_dontroute || 6039 connp->conn_nofailover_ill != NULL || 6040 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6041 optinfo.ip_opt_ill_index != 0 || 6042 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6043 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6044 ipst->ips_ip_g_mrouter != NULL) { 6045 UDP_STAT(us, udp_ip_send); 6046 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6047 &optinfo); 6048 } else { 6049 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6050 } 6051 6052 done: 6053 if (lock_held) 6054 rw_exit(&udp->udp_rwlock); 6055 if (*error != 0) { 6056 ASSERT(mp != NULL); 6057 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6058 } 6059 return (mp); 6060 } 6061 6062 static void 6063 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6064 { 6065 conn_t *connp = udp->udp_connp; 6066 ipaddr_t src, dst; 6067 ire_t *ire; 6068 ipif_t *ipif = NULL; 6069 mblk_t *ire_fp_mp; 6070 boolean_t retry_caching; 6071 udp_stack_t *us = udp->udp_us; 6072 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6073 6074 dst = ipha->ipha_dst; 6075 src = ipha->ipha_src; 6076 ASSERT(ipha->ipha_ident == 0); 6077 6078 if (CLASSD(dst)) { 6079 int err; 6080 6081 ipif = conn_get_held_ipif(connp, 6082 &connp->conn_multicast_ipif, &err); 6083 6084 if (ipif == NULL || ipif->ipif_isv6 || 6085 (ipif->ipif_ill->ill_phyint->phyint_flags & 6086 PHYI_LOOPBACK)) { 6087 if (ipif != NULL) 6088 ipif_refrele(ipif); 6089 UDP_STAT(us, udp_ip_send); 6090 ip_output(connp, mp, q, IP_WPUT); 6091 return; 6092 } 6093 } 6094 6095 retry_caching = B_FALSE; 6096 mutex_enter(&connp->conn_lock); 6097 ire = connp->conn_ire_cache; 6098 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6099 6100 if (ire == NULL || ire->ire_addr != dst || 6101 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6102 retry_caching = B_TRUE; 6103 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6104 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6105 6106 ASSERT(ipif != NULL); 6107 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6108 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6109 retry_caching = B_TRUE; 6110 } 6111 6112 if (!retry_caching) { 6113 ASSERT(ire != NULL); 6114 IRE_REFHOLD(ire); 6115 mutex_exit(&connp->conn_lock); 6116 } else { 6117 boolean_t cached = B_FALSE; 6118 6119 connp->conn_ire_cache = NULL; 6120 mutex_exit(&connp->conn_lock); 6121 6122 /* Release the old ire */ 6123 if (ire != NULL) { 6124 IRE_REFRELE_NOTR(ire); 6125 ire = NULL; 6126 } 6127 6128 if (CLASSD(dst)) { 6129 ASSERT(ipif != NULL); 6130 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6131 connp->conn_zoneid, MBLK_GETLABEL(mp), 6132 MATCH_IRE_ILL_GROUP, ipst); 6133 } else { 6134 ASSERT(ipif == NULL); 6135 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6136 MBLK_GETLABEL(mp), ipst); 6137 } 6138 6139 if (ire == NULL) { 6140 if (ipif != NULL) 6141 ipif_refrele(ipif); 6142 UDP_STAT(us, udp_ire_null); 6143 ip_output(connp, mp, q, IP_WPUT); 6144 return; 6145 } 6146 IRE_REFHOLD_NOTR(ire); 6147 6148 mutex_enter(&connp->conn_lock); 6149 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 6150 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6151 irb_t *irb = ire->ire_bucket; 6152 6153 /* 6154 * IRE's created for non-connection oriented transports 6155 * are normally initialized with IRE_MARK_TEMPORARY set 6156 * in the ire_marks. These IRE's are preferentially 6157 * reaped when the hash chain length in the cache 6158 * bucket exceeds the maximum value specified in 6159 * ip[6]_ire_max_bucket_cnt. This can severely affect 6160 * UDP performance if IRE cache entries that we need 6161 * to reuse are continually removed. To remedy this, 6162 * when we cache the IRE in the conn_t, we remove the 6163 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 6164 * set. 6165 */ 6166 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6167 rw_enter(&irb->irb_lock, RW_WRITER); 6168 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6169 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 6170 irb->irb_tmp_ire_cnt--; 6171 } 6172 rw_exit(&irb->irb_lock); 6173 } 6174 connp->conn_ire_cache = ire; 6175 cached = B_TRUE; 6176 } 6177 mutex_exit(&connp->conn_lock); 6178 6179 /* 6180 * We can continue to use the ire but since it was not 6181 * cached, we should drop the extra reference. 6182 */ 6183 if (!cached) 6184 IRE_REFRELE_NOTR(ire); 6185 } 6186 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6187 ASSERT(!CLASSD(dst) || ipif != NULL); 6188 6189 /* 6190 * Check if we can take the fast-path. 6191 * Note that "incomplete" ire's (where the link-layer for next hop 6192 * is not resolved, or where the fast-path header in nce_fp_mp is not 6193 * available yet) are sent down the legacy (slow) path 6194 */ 6195 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6196 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6197 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6198 ((ire->ire_nce == NULL) || 6199 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 6200 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 6201 if (ipif != NULL) 6202 ipif_refrele(ipif); 6203 UDP_STAT(us, udp_ip_ire_send); 6204 IRE_REFRELE(ire); 6205 ip_output(connp, mp, q, IP_WPUT); 6206 return; 6207 } 6208 6209 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6210 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6211 ipha->ipha_src = ipif->ipif_src_addr; 6212 else 6213 ipha->ipha_src = ire->ire_src_addr; 6214 } 6215 6216 if (ipif != NULL) 6217 ipif_refrele(ipif); 6218 6219 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 6220 } 6221 6222 static void 6223 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 6224 { 6225 ipaddr_t src, dst; 6226 ill_t *ill; 6227 mblk_t *ire_fp_mp; 6228 uint_t ire_fp_mp_len; 6229 uint16_t *up; 6230 uint32_t cksum, hcksum_txflags; 6231 queue_t *dev_q; 6232 udp_t *udp = connp->conn_udp; 6233 ipha_t *ipha = (ipha_t *)mp->b_rptr; 6234 udp_stack_t *us = udp->udp_us; 6235 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6236 boolean_t ll_multicast = B_FALSE; 6237 6238 dev_q = ire->ire_stq->q_next; 6239 ASSERT(dev_q != NULL); 6240 6241 6242 if (DEV_Q_IS_FLOW_CTLED(dev_q)) { 6243 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 6244 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 6245 if (ipst->ips_ip_output_queue) 6246 (void) putq(connp->conn_wq, mp); 6247 else 6248 freemsg(mp); 6249 ire_refrele(ire); 6250 return; 6251 } 6252 6253 ire_fp_mp = ire->ire_nce->nce_fp_mp; 6254 ire_fp_mp_len = MBLKL(ire_fp_mp); 6255 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 6256 6257 dst = ipha->ipha_dst; 6258 src = ipha->ipha_src; 6259 6260 ill = ire_to_ill(ire); 6261 ASSERT(ill != NULL); 6262 6263 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6264 6265 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6266 #ifndef _BIG_ENDIAN 6267 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6268 #endif 6269 6270 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6271 ASSERT(ill->ill_hcksum_capab != NULL); 6272 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6273 } else { 6274 hcksum_txflags = 0; 6275 } 6276 6277 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6278 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6279 6280 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6281 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6282 if (*up != 0) { 6283 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6284 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6285 ntohs(ipha->ipha_length), cksum); 6286 6287 /* Software checksum? */ 6288 if (DB_CKSUMFLAGS(mp) == 0) { 6289 UDP_STAT(us, udp_out_sw_cksum); 6290 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6291 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6292 } 6293 } 6294 6295 if (!CLASSD(dst)) { 6296 ipha->ipha_fragment_offset_and_flags |= 6297 (uint32_t)htons(ire->ire_frag_flag); 6298 } 6299 6300 /* Calculate IP header checksum if hardware isn't capable */ 6301 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6302 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6303 ((uint16_t *)ipha)[4]); 6304 } 6305 6306 if (CLASSD(dst)) { 6307 boolean_t ilm_exists; 6308 6309 ILM_WALKER_HOLD(ill); 6310 ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL); 6311 ILM_WALKER_RELE(ill); 6312 if (ilm_exists) { 6313 ip_multicast_loopback(q, ill, mp, 6314 connp->conn_multicast_loop ? 0 : 6315 IP_FF_NO_MCAST_LOOP, zoneid); 6316 } 6317 6318 /* If multicast TTL is 0 then we are done */ 6319 if (ipha->ipha_ttl == 0) { 6320 freemsg(mp); 6321 ire_refrele(ire); 6322 return; 6323 } 6324 ll_multicast = B_TRUE; 6325 } 6326 6327 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6328 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6329 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6330 6331 UPDATE_OB_PKT_COUNT(ire); 6332 ire->ire_last_used_time = lbolt; 6333 6334 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6335 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6336 ntohs(ipha->ipha_length)); 6337 6338 if (ILL_DLS_CAPABLE(ill)) { 6339 /* 6340 * Send the packet directly to DLD, where it may be queued 6341 * depending on the availability of transmit resources at 6342 * the media layer. 6343 */ 6344 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6345 } else { 6346 DTRACE_PROBE4(ip4__physical__out__start, 6347 ill_t *, NULL, ill_t *, ill, 6348 ipha_t *, ipha, mblk_t *, mp); 6349 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6350 ipst->ips_ipv4firewall_physical_out, 6351 NULL, ill, ipha, mp, mp, ll_multicast, ipst); 6352 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6353 if (mp != NULL) { 6354 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 6355 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 6356 ipha_t *, ipha, ip6_t *, NULL, int, 0); 6357 putnext(ire->ire_stq, mp); 6358 } 6359 } 6360 6361 IRE_REFRELE(ire); 6362 } 6363 6364 static boolean_t 6365 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6366 { 6367 udp_t *udp = Q_TO_UDP(wq); 6368 int err; 6369 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6370 udp_stack_t *us = udp->udp_us; 6371 6372 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6373 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 6374 us->us_netstack->netstack_ip); 6375 if (err == 0) { 6376 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6377 &udp->udp_label_len_v6, opt_storage); 6378 } 6379 if (err != 0) { 6380 DTRACE_PROBE4( 6381 tx__ip__log__drop__updatelabel__udp6, 6382 char *, "queue(1) failed to update options(2) on mp(3)", 6383 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6384 } else { 6385 udp->udp_v6lastdst = *dst; 6386 } 6387 return (err); 6388 } 6389 6390 void 6391 udp_output_connected(void *arg, mblk_t *mp) 6392 { 6393 conn_t *connp = (conn_t *)arg; 6394 udp_t *udp = connp->conn_udp; 6395 udp_stack_t *us = udp->udp_us; 6396 ipaddr_t v4dst; 6397 in_port_t dstport; 6398 boolean_t mapped_addr; 6399 struct sockaddr_storage ss; 6400 sin_t *sin; 6401 sin6_t *sin6; 6402 struct sockaddr *addr; 6403 socklen_t addrlen; 6404 int error; 6405 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6406 6407 /* M_DATA for connected socket */ 6408 6409 ASSERT(udp->udp_issocket); 6410 UDP_DBGSTAT(us, udp_data_conn); 6411 6412 mutex_enter(&connp->conn_lock); 6413 if (udp->udp_state != TS_DATA_XFER) { 6414 mutex_exit(&connp->conn_lock); 6415 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6416 UDP_STAT(us, udp_out_err_notconn); 6417 freemsg(mp); 6418 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6419 "udp_wput_end: connp %p (%S)", connp, 6420 "not-connected; address required"); 6421 return; 6422 } 6423 6424 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 6425 if (mapped_addr) 6426 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6427 6428 /* Initialize addr and addrlen as if they're passed in */ 6429 if (udp->udp_family == AF_INET) { 6430 sin = (sin_t *)&ss; 6431 sin->sin_family = AF_INET; 6432 dstport = sin->sin_port = udp->udp_dstport; 6433 ASSERT(mapped_addr); 6434 sin->sin_addr.s_addr = v4dst; 6435 addr = (struct sockaddr *)sin; 6436 addrlen = sizeof (*sin); 6437 } else { 6438 sin6 = (sin6_t *)&ss; 6439 sin6->sin6_family = AF_INET6; 6440 dstport = sin6->sin6_port = udp->udp_dstport; 6441 sin6->sin6_flowinfo = udp->udp_flowinfo; 6442 sin6->sin6_addr = udp->udp_v6dst; 6443 sin6->sin6_scope_id = 0; 6444 sin6->__sin6_src_id = 0; 6445 addr = (struct sockaddr *)sin6; 6446 addrlen = sizeof (*sin6); 6447 } 6448 mutex_exit(&connp->conn_lock); 6449 6450 if (mapped_addr) { 6451 /* 6452 * Handle both AF_INET and AF_INET6; the latter 6453 * for IPV4 mapped destination addresses. Note 6454 * here that both addr and addrlen point to the 6455 * corresponding struct depending on the address 6456 * family of the socket. 6457 */ 6458 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 6459 insert_spi); 6460 } else { 6461 mp = udp_output_v6(connp, mp, sin6, &error); 6462 } 6463 if (error == 0) { 6464 ASSERT(mp == NULL); 6465 return; 6466 } 6467 6468 UDP_STAT(us, udp_out_err_output); 6469 ASSERT(mp != NULL); 6470 /* mp is freed by the following routine */ 6471 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6472 (t_scalar_t)error); 6473 } 6474 6475 /* 6476 * This routine handles all messages passed downstream. It either 6477 * consumes the message or passes it downstream; it never queues a 6478 * a message. 6479 * 6480 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6481 * is valid when we are directly beneath the stream head, and thus sockfs 6482 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6483 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6484 * connected endpoints. 6485 */ 6486 void 6487 udp_wput(queue_t *q, mblk_t *mp) 6488 { 6489 sin6_t *sin6; 6490 sin_t *sin; 6491 ipaddr_t v4dst; 6492 uint16_t port; 6493 uint_t srcid; 6494 conn_t *connp = Q_TO_CONN(q); 6495 udp_t *udp = connp->conn_udp; 6496 int error = 0; 6497 struct sockaddr *addr; 6498 socklen_t addrlen; 6499 udp_stack_t *us = udp->udp_us; 6500 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6501 6502 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6503 "udp_wput_start: queue %p mp %p", q, mp); 6504 6505 /* 6506 * We directly handle several cases here: T_UNITDATA_REQ message 6507 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6508 * socket. 6509 */ 6510 switch (DB_TYPE(mp)) { 6511 case M_DATA: 6512 /* 6513 * Quick check for error cases. Checks will be done again 6514 * under the lock later on 6515 */ 6516 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6517 /* Not connected; address is required */ 6518 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6519 UDP_STAT(us, udp_out_err_notconn); 6520 freemsg(mp); 6521 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6522 "udp_wput_end: connp %p (%S)", connp, 6523 "not-connected; address required"); 6524 return; 6525 } 6526 udp_output_connected(connp, mp); 6527 return; 6528 6529 case M_PROTO: 6530 case M_PCPROTO: { 6531 struct T_unitdata_req *tudr; 6532 6533 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6534 tudr = (struct T_unitdata_req *)mp->b_rptr; 6535 6536 /* Handle valid T_UNITDATA_REQ here */ 6537 if (MBLKL(mp) >= sizeof (*tudr) && 6538 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6539 if (mp->b_cont == NULL) { 6540 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6541 "udp_wput_end: q %p (%S)", q, "badaddr"); 6542 error = EPROTO; 6543 goto ud_error; 6544 } 6545 6546 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6547 tudr->DEST_length)) { 6548 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6549 "udp_wput_end: q %p (%S)", q, "badaddr"); 6550 error = EADDRNOTAVAIL; 6551 goto ud_error; 6552 } 6553 /* 6554 * If a port has not been bound to the stream, fail. 6555 * This is not a problem when sockfs is directly 6556 * above us, because it will ensure that the socket 6557 * is first bound before allowing data to be sent. 6558 */ 6559 if (udp->udp_state == TS_UNBND) { 6560 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6561 "udp_wput_end: q %p (%S)", q, "outstate"); 6562 error = EPROTO; 6563 goto ud_error; 6564 } 6565 addr = (struct sockaddr *) 6566 &mp->b_rptr[tudr->DEST_offset]; 6567 addrlen = tudr->DEST_length; 6568 if (tudr->OPT_length != 0) 6569 UDP_STAT(us, udp_out_opt); 6570 break; 6571 } 6572 /* FALLTHRU */ 6573 } 6574 default: 6575 udp_wput_other(q, mp); 6576 return; 6577 } 6578 ASSERT(addr != NULL); 6579 6580 switch (udp->udp_family) { 6581 case AF_INET6: 6582 sin6 = (sin6_t *)addr; 6583 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 6584 (sin6->sin6_family != AF_INET6)) { 6585 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6586 "udp_wput_end: q %p (%S)", q, "badaddr"); 6587 error = EADDRNOTAVAIL; 6588 goto ud_error; 6589 } 6590 6591 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6592 /* 6593 * Destination is a non-IPv4-compatible IPv6 address. 6594 * Send out an IPv6 format packet. 6595 */ 6596 mp = udp_output_v6(connp, mp, sin6, &error); 6597 if (error != 0) 6598 goto ud_error; 6599 6600 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6601 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6602 return; 6603 } 6604 /* 6605 * If the local address is not zero or a mapped address 6606 * return an error. It would be possible to send an IPv4 6607 * packet but the response would never make it back to the 6608 * application since it is bound to a non-mapped address. 6609 */ 6610 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6611 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6612 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6613 "udp_wput_end: q %p (%S)", q, "badaddr"); 6614 error = EADDRNOTAVAIL; 6615 goto ud_error; 6616 } 6617 /* Send IPv4 packet without modifying udp_ipversion */ 6618 /* Extract port and ipaddr */ 6619 port = sin6->sin6_port; 6620 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6621 srcid = sin6->__sin6_src_id; 6622 break; 6623 6624 case AF_INET: 6625 sin = (sin_t *)addr; 6626 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 6627 (sin->sin_family != AF_INET)) { 6628 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6629 "udp_wput_end: q %p (%S)", q, "badaddr"); 6630 error = EADDRNOTAVAIL; 6631 goto ud_error; 6632 } 6633 /* Extract port and ipaddr */ 6634 port = sin->sin_port; 6635 v4dst = sin->sin_addr.s_addr; 6636 srcid = 0; 6637 break; 6638 } 6639 6640 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 6641 if (error != 0) { 6642 ud_error: 6643 UDP_STAT(us, udp_out_err_output); 6644 ASSERT(mp != NULL); 6645 /* mp is freed by the following routine */ 6646 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6647 (t_scalar_t)error); 6648 } 6649 } 6650 6651 /* 6652 * udp_output_v6(): 6653 * Assumes that udp_wput did some sanity checking on the destination 6654 * address. 6655 */ 6656 static mblk_t * 6657 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 6658 { 6659 ip6_t *ip6h; 6660 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6661 mblk_t *mp1 = mp; 6662 mblk_t *mp2; 6663 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6664 size_t ip_len; 6665 udpha_t *udph; 6666 udp_t *udp = connp->conn_udp; 6667 queue_t *q = connp->conn_wq; 6668 ip6_pkt_t ipp_s; /* For ancillary data options */ 6669 ip6_pkt_t *ipp = &ipp_s; 6670 ip6_pkt_t *tipp; /* temporary ipp */ 6671 uint32_t csum = 0; 6672 uint_t ignore = 0; 6673 uint_t option_exists = 0, is_sticky = 0; 6674 uint8_t *cp; 6675 uint8_t *nxthdr_ptr; 6676 in6_addr_t ip6_dst; 6677 udpattrs_t attrs; 6678 boolean_t opt_present; 6679 ip6_hbh_t *hopoptsptr = NULL; 6680 uint_t hopoptslen = 0; 6681 boolean_t is_ancillary = B_FALSE; 6682 udp_stack_t *us = udp->udp_us; 6683 size_t sth_wroff = 0; 6684 6685 *error = 0; 6686 6687 /* 6688 * If the local address is a mapped address return 6689 * an error. 6690 * It would be possible to send an IPv6 packet but the 6691 * response would never make it back to the application 6692 * since it is bound to a mapped address. 6693 */ 6694 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6695 *error = EADDRNOTAVAIL; 6696 goto done; 6697 } 6698 6699 ipp->ipp_fields = 0; 6700 ipp->ipp_sticky_ignored = 0; 6701 6702 /* 6703 * If TPI options passed in, feed it for verification and handling 6704 */ 6705 attrs.udpattr_credset = B_FALSE; 6706 opt_present = B_FALSE; 6707 if (DB_TYPE(mp) != M_DATA) { 6708 mp1 = mp->b_cont; 6709 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6710 attrs.udpattr_ipp6 = ipp; 6711 attrs.udpattr_mb = mp; 6712 if (udp_unitdata_opt_process(q, mp, error, 6713 &attrs) < 0) { 6714 goto done; 6715 } 6716 ASSERT(*error == 0); 6717 opt_present = B_TRUE; 6718 } 6719 } 6720 rw_enter(&udp->udp_rwlock, RW_READER); 6721 ignore = ipp->ipp_sticky_ignored; 6722 6723 /* mp1 points to the M_DATA mblk carrying the packet */ 6724 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6725 6726 if (sin6->sin6_scope_id != 0 && 6727 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6728 /* 6729 * IPPF_SCOPE_ID is special. It's neither a sticky 6730 * option nor ancillary data. It needs to be 6731 * explicitly set in options_exists. 6732 */ 6733 option_exists |= IPPF_SCOPE_ID; 6734 } 6735 6736 /* 6737 * Compute the destination address 6738 */ 6739 ip6_dst = sin6->sin6_addr; 6740 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6741 ip6_dst = ipv6_loopback; 6742 6743 /* 6744 * If we're not going to the same destination as last time, then 6745 * recompute the label required. This is done in a separate routine to 6746 * avoid blowing up our stack here. 6747 * 6748 * TSOL Note: Since we are not in WRITER mode, UDP packets 6749 * to different destination may require different labels, 6750 * or worse, UDP packets to same IP address may require 6751 * different labels due to use of shared all-zones address. 6752 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6753 * and sticky ipp_hopoptslen are consistent for the current 6754 * destination and are updated atomically. 6755 */ 6756 mutex_enter(&connp->conn_lock); 6757 if (is_system_labeled()) { 6758 /* Using UDP MLP requires SCM_UCRED from user */ 6759 if (connp->conn_mlp_type != mlptSingle && 6760 !attrs.udpattr_credset) { 6761 DTRACE_PROBE4( 6762 tx__ip__log__info__output__udp6, 6763 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6764 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6765 *error = ECONNREFUSED; 6766 rw_exit(&udp->udp_rwlock); 6767 mutex_exit(&connp->conn_lock); 6768 goto done; 6769 } 6770 /* 6771 * update label option for this UDP socket if 6772 * - the destination has changed, or 6773 * - the UDP socket is MLP 6774 */ 6775 if ((opt_present || 6776 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6777 connp->conn_mlp_type != mlptSingle) && 6778 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6779 rw_exit(&udp->udp_rwlock); 6780 mutex_exit(&connp->conn_lock); 6781 goto done; 6782 } 6783 } 6784 6785 /* 6786 * If there's a security label here, then we ignore any options the 6787 * user may try to set. We keep the peer's label as a hidden sticky 6788 * option. We make a private copy of this label before releasing the 6789 * lock so that label is kept consistent with the destination addr. 6790 */ 6791 if (udp->udp_label_len_v6 > 0) { 6792 ignore &= ~IPPF_HOPOPTS; 6793 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6794 } 6795 6796 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6797 /* No sticky options nor ancillary data. */ 6798 mutex_exit(&connp->conn_lock); 6799 goto no_options; 6800 } 6801 6802 /* 6803 * Go through the options figuring out where each is going to 6804 * come from and build two masks. The first mask indicates if 6805 * the option exists at all. The second mask indicates if the 6806 * option is sticky or ancillary. 6807 */ 6808 if (!(ignore & IPPF_HOPOPTS)) { 6809 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6810 option_exists |= IPPF_HOPOPTS; 6811 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6812 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6813 option_exists |= IPPF_HOPOPTS; 6814 is_sticky |= IPPF_HOPOPTS; 6815 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6816 hopoptsptr = kmem_alloc( 6817 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6818 if (hopoptsptr == NULL) { 6819 *error = ENOMEM; 6820 mutex_exit(&connp->conn_lock); 6821 goto done; 6822 } 6823 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6824 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6825 hopoptslen); 6826 udp_ip_hdr_len += hopoptslen; 6827 } 6828 } 6829 mutex_exit(&connp->conn_lock); 6830 6831 if (!(ignore & IPPF_RTHDR)) { 6832 if (ipp->ipp_fields & IPPF_RTHDR) { 6833 option_exists |= IPPF_RTHDR; 6834 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6835 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6836 option_exists |= IPPF_RTHDR; 6837 is_sticky |= IPPF_RTHDR; 6838 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6839 } 6840 } 6841 6842 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6843 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6844 option_exists |= IPPF_RTDSTOPTS; 6845 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6846 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6847 option_exists |= IPPF_RTDSTOPTS; 6848 is_sticky |= IPPF_RTDSTOPTS; 6849 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6850 } 6851 } 6852 6853 if (!(ignore & IPPF_DSTOPTS)) { 6854 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6855 option_exists |= IPPF_DSTOPTS; 6856 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6857 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6858 option_exists |= IPPF_DSTOPTS; 6859 is_sticky |= IPPF_DSTOPTS; 6860 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6861 } 6862 } 6863 6864 if (!(ignore & IPPF_IFINDEX)) { 6865 if (ipp->ipp_fields & IPPF_IFINDEX) { 6866 option_exists |= IPPF_IFINDEX; 6867 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6868 option_exists |= IPPF_IFINDEX; 6869 is_sticky |= IPPF_IFINDEX; 6870 } 6871 } 6872 6873 if (!(ignore & IPPF_ADDR)) { 6874 if (ipp->ipp_fields & IPPF_ADDR) { 6875 option_exists |= IPPF_ADDR; 6876 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6877 option_exists |= IPPF_ADDR; 6878 is_sticky |= IPPF_ADDR; 6879 } 6880 } 6881 6882 if (!(ignore & IPPF_DONTFRAG)) { 6883 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6884 option_exists |= IPPF_DONTFRAG; 6885 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6886 option_exists |= IPPF_DONTFRAG; 6887 is_sticky |= IPPF_DONTFRAG; 6888 } 6889 } 6890 6891 if (!(ignore & IPPF_USE_MIN_MTU)) { 6892 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6893 option_exists |= IPPF_USE_MIN_MTU; 6894 } else if (udp->udp_sticky_ipp.ipp_fields & 6895 IPPF_USE_MIN_MTU) { 6896 option_exists |= IPPF_USE_MIN_MTU; 6897 is_sticky |= IPPF_USE_MIN_MTU; 6898 } 6899 } 6900 6901 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6902 option_exists |= IPPF_HOPLIMIT; 6903 /* IPV6_HOPLIMIT can never be sticky */ 6904 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6905 6906 if (!(ignore & IPPF_UNICAST_HOPS) && 6907 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6908 option_exists |= IPPF_UNICAST_HOPS; 6909 is_sticky |= IPPF_UNICAST_HOPS; 6910 } 6911 6912 if (!(ignore & IPPF_MULTICAST_HOPS) && 6913 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6914 option_exists |= IPPF_MULTICAST_HOPS; 6915 is_sticky |= IPPF_MULTICAST_HOPS; 6916 } 6917 6918 if (!(ignore & IPPF_TCLASS)) { 6919 if (ipp->ipp_fields & IPPF_TCLASS) { 6920 option_exists |= IPPF_TCLASS; 6921 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6922 option_exists |= IPPF_TCLASS; 6923 is_sticky |= IPPF_TCLASS; 6924 } 6925 } 6926 6927 if (!(ignore & IPPF_NEXTHOP) && 6928 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6929 option_exists |= IPPF_NEXTHOP; 6930 is_sticky |= IPPF_NEXTHOP; 6931 } 6932 6933 no_options: 6934 6935 /* 6936 * If any options carried in the ip6i_t were specified, we 6937 * need to account for the ip6i_t in the data we'll be sending 6938 * down. 6939 */ 6940 if (option_exists & IPPF_HAS_IP6I) 6941 udp_ip_hdr_len += sizeof (ip6i_t); 6942 6943 /* check/fix buffer config, setup pointers into it */ 6944 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6945 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6946 !OK_32PTR(ip6h)) { 6947 6948 /* Try to get everything in a single mblk next time */ 6949 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6950 udp->udp_max_hdr_len = udp_ip_hdr_len; 6951 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6952 } 6953 6954 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6955 if (mp2 == NULL) { 6956 *error = ENOMEM; 6957 rw_exit(&udp->udp_rwlock); 6958 goto done; 6959 } 6960 mp2->b_wptr = DB_LIM(mp2); 6961 mp2->b_cont = mp1; 6962 mp1 = mp2; 6963 if (DB_TYPE(mp) != M_DATA) 6964 mp->b_cont = mp1; 6965 else 6966 mp = mp1; 6967 6968 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6969 } 6970 mp1->b_rptr = (unsigned char *)ip6h; 6971 ip6i = (ip6i_t *)ip6h; 6972 6973 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6974 if (option_exists & IPPF_HAS_IP6I) { 6975 ip6h = (ip6_t *)&ip6i[1]; 6976 ip6i->ip6i_flags = 0; 6977 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6978 6979 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6980 if (option_exists & IPPF_SCOPE_ID) { 6981 ip6i->ip6i_flags |= IP6I_IFINDEX; 6982 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6983 } else if (option_exists & IPPF_IFINDEX) { 6984 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6985 ASSERT(tipp->ipp_ifindex != 0); 6986 ip6i->ip6i_flags |= IP6I_IFINDEX; 6987 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6988 } 6989 6990 if (option_exists & IPPF_ADDR) { 6991 /* 6992 * Enable per-packet source address verification if 6993 * IPV6_PKTINFO specified the source address. 6994 * ip6_src is set in the transport's _wput function. 6995 */ 6996 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6997 } 6998 6999 if (option_exists & IPPF_DONTFRAG) { 7000 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7001 } 7002 7003 if (option_exists & IPPF_USE_MIN_MTU) { 7004 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7005 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7006 } 7007 7008 if (option_exists & IPPF_NEXTHOP) { 7009 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7010 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7011 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7012 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7013 } 7014 7015 /* 7016 * tell IP this is an ip6i_t private header 7017 */ 7018 ip6i->ip6i_nxt = IPPROTO_RAW; 7019 } 7020 7021 /* Initialize IPv6 header */ 7022 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7023 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7024 7025 /* Set the hoplimit of the outgoing packet. */ 7026 if (option_exists & IPPF_HOPLIMIT) { 7027 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7028 ip6h->ip6_hops = ipp->ipp_hoplimit; 7029 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7030 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7031 ip6h->ip6_hops = udp->udp_multicast_ttl; 7032 if (option_exists & IPPF_MULTICAST_HOPS) 7033 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7034 } else { 7035 ip6h->ip6_hops = udp->udp_ttl; 7036 if (option_exists & IPPF_UNICAST_HOPS) 7037 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7038 } 7039 7040 if (option_exists & IPPF_ADDR) { 7041 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7042 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7043 ip6h->ip6_src = tipp->ipp_addr; 7044 } else { 7045 /* 7046 * The source address was not set using IPV6_PKTINFO. 7047 * First look at the bound source. 7048 * If unspecified fallback to __sin6_src_id. 7049 */ 7050 ip6h->ip6_src = udp->udp_v6src; 7051 if (sin6->__sin6_src_id != 0 && 7052 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7053 ip_srcid_find_id(sin6->__sin6_src_id, 7054 &ip6h->ip6_src, connp->conn_zoneid, 7055 us->us_netstack); 7056 } 7057 } 7058 7059 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7060 cp = (uint8_t *)&ip6h[1]; 7061 7062 /* 7063 * Here's where we have to start stringing together 7064 * any extension headers in the right order: 7065 * Hop-by-hop, destination, routing, and final destination opts. 7066 */ 7067 if (option_exists & IPPF_HOPOPTS) { 7068 /* Hop-by-hop options */ 7069 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7070 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7071 if (hopoptslen == 0) { 7072 hopoptsptr = tipp->ipp_hopopts; 7073 hopoptslen = tipp->ipp_hopoptslen; 7074 is_ancillary = B_TRUE; 7075 } 7076 7077 *nxthdr_ptr = IPPROTO_HOPOPTS; 7078 nxthdr_ptr = &hbh->ip6h_nxt; 7079 7080 bcopy(hopoptsptr, cp, hopoptslen); 7081 cp += hopoptslen; 7082 7083 if (hopoptsptr != NULL && !is_ancillary) { 7084 kmem_free(hopoptsptr, hopoptslen); 7085 hopoptsptr = NULL; 7086 hopoptslen = 0; 7087 } 7088 } 7089 /* 7090 * En-route destination options 7091 * Only do them if there's a routing header as well 7092 */ 7093 if (option_exists & IPPF_RTDSTOPTS) { 7094 ip6_dest_t *dst = (ip6_dest_t *)cp; 7095 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7096 7097 *nxthdr_ptr = IPPROTO_DSTOPTS; 7098 nxthdr_ptr = &dst->ip6d_nxt; 7099 7100 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7101 cp += tipp->ipp_rtdstoptslen; 7102 } 7103 /* 7104 * Routing header next 7105 */ 7106 if (option_exists & IPPF_RTHDR) { 7107 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7108 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7109 7110 *nxthdr_ptr = IPPROTO_ROUTING; 7111 nxthdr_ptr = &rt->ip6r_nxt; 7112 7113 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7114 cp += tipp->ipp_rthdrlen; 7115 } 7116 /* 7117 * Do ultimate destination options 7118 */ 7119 if (option_exists & IPPF_DSTOPTS) { 7120 ip6_dest_t *dest = (ip6_dest_t *)cp; 7121 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7122 7123 *nxthdr_ptr = IPPROTO_DSTOPTS; 7124 nxthdr_ptr = &dest->ip6d_nxt; 7125 7126 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7127 cp += tipp->ipp_dstoptslen; 7128 } 7129 /* 7130 * Now set the last header pointer to the proto passed in 7131 */ 7132 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7133 *nxthdr_ptr = IPPROTO_UDP; 7134 7135 /* Update UDP header */ 7136 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7137 udph->uha_dst_port = sin6->sin6_port; 7138 udph->uha_src_port = udp->udp_port; 7139 7140 /* 7141 * Copy in the destination address 7142 */ 7143 ip6h->ip6_dst = ip6_dst; 7144 7145 ip6h->ip6_vcf = 7146 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7147 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7148 7149 if (option_exists & IPPF_TCLASS) { 7150 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7151 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7152 tipp->ipp_tclass); 7153 } 7154 rw_exit(&udp->udp_rwlock); 7155 7156 if (option_exists & IPPF_RTHDR) { 7157 ip6_rthdr_t *rth; 7158 7159 /* 7160 * Perform any processing needed for source routing. 7161 * We know that all extension headers will be in the same mblk 7162 * as the IPv6 header. 7163 */ 7164 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7165 if (rth != NULL && rth->ip6r_segleft != 0) { 7166 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7167 /* 7168 * Drop packet - only support Type 0 routing. 7169 * Notify the application as well. 7170 */ 7171 *error = EPROTO; 7172 goto done; 7173 } 7174 7175 /* 7176 * rth->ip6r_len is twice the number of 7177 * addresses in the header. Thus it must be even. 7178 */ 7179 if (rth->ip6r_len & 0x1) { 7180 *error = EPROTO; 7181 goto done; 7182 } 7183 /* 7184 * Shuffle the routing header and ip6_dst 7185 * addresses, and get the checksum difference 7186 * between the first hop (in ip6_dst) and 7187 * the destination (in the last routing hdr entry). 7188 */ 7189 csum = ip_massage_options_v6(ip6h, rth, 7190 us->us_netstack); 7191 /* 7192 * Verify that the first hop isn't a mapped address. 7193 * Routers along the path need to do this verification 7194 * for subsequent hops. 7195 */ 7196 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7197 *error = EADDRNOTAVAIL; 7198 goto done; 7199 } 7200 7201 cp += (rth->ip6r_len + 1)*8; 7202 } 7203 } 7204 7205 /* count up length of UDP packet */ 7206 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7207 if ((mp2 = mp1->b_cont) != NULL) { 7208 do { 7209 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7210 ip_len += (uint32_t)MBLKL(mp2); 7211 } while ((mp2 = mp2->b_cont) != NULL); 7212 } 7213 7214 /* 7215 * If the size of the packet is greater than the maximum allowed by 7216 * ip, return an error. Passing this down could cause panics because 7217 * the size will have wrapped and be inconsistent with the msg size. 7218 */ 7219 if (ip_len > IP_MAXPACKET) { 7220 *error = EMSGSIZE; 7221 goto done; 7222 } 7223 7224 /* Store the UDP length. Subtract length of extension hdrs */ 7225 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7226 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7227 7228 /* 7229 * We make it easy for IP to include our pseudo header 7230 * by putting our length in uh_checksum, modified (if 7231 * we have a routing header) by the checksum difference 7232 * between the ultimate destination and first hop addresses. 7233 * Note: UDP over IPv6 must always checksum the packet. 7234 */ 7235 csum += udph->uha_length; 7236 csum = (csum & 0xFFFF) + (csum >> 16); 7237 udph->uha_checksum = (uint16_t)csum; 7238 7239 #ifdef _LITTLE_ENDIAN 7240 ip_len = htons(ip_len); 7241 #endif 7242 ip6h->ip6_plen = ip_len; 7243 if (DB_CRED(mp) != NULL) 7244 mblk_setcred(mp1, DB_CRED(mp)); 7245 7246 if (DB_TYPE(mp) != M_DATA) { 7247 ASSERT(mp != mp1); 7248 freeb(mp); 7249 } 7250 7251 /* mp has been consumed and we'll return success */ 7252 ASSERT(*error == 0); 7253 mp = NULL; 7254 7255 /* We're done. Pass the packet to IP */ 7256 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 7257 ip_output_v6(connp, mp1, q, IP_WPUT); 7258 7259 done: 7260 if (sth_wroff != 0) { 7261 (void) mi_set_sth_wroff(RD(q), 7262 udp->udp_max_hdr_len + us->us_wroff_extra); 7263 } 7264 if (hopoptsptr != NULL && !is_ancillary) { 7265 kmem_free(hopoptsptr, hopoptslen); 7266 hopoptsptr = NULL; 7267 } 7268 if (*error != 0) { 7269 ASSERT(mp != NULL); 7270 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 7271 } 7272 return (mp); 7273 } 7274 7275 7276 static int 7277 udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7278 { 7279 sin_t *sin = (sin_t *)sa; 7280 sin6_t *sin6 = (sin6_t *)sa; 7281 7282 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7283 7284 if (udp->udp_state != TS_DATA_XFER) 7285 return (ENOTCONN); 7286 7287 switch (udp->udp_family) { 7288 case AF_INET: 7289 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7290 7291 if (*salenp < sizeof (sin_t)) 7292 return (EINVAL); 7293 7294 *salenp = sizeof (sin_t); 7295 *sin = sin_null; 7296 sin->sin_family = AF_INET; 7297 sin->sin_port = udp->udp_dstport; 7298 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 7299 break; 7300 7301 case AF_INET6: 7302 if (*salenp < sizeof (sin6_t)) 7303 return (EINVAL); 7304 7305 *salenp = sizeof (sin6_t); 7306 *sin6 = sin6_null; 7307 sin6->sin6_family = AF_INET6; 7308 sin6->sin6_port = udp->udp_dstport; 7309 sin6->sin6_addr = udp->udp_v6dst; 7310 sin6->sin6_flowinfo = udp->udp_flowinfo; 7311 break; 7312 } 7313 7314 return (0); 7315 } 7316 7317 static int 7318 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7319 { 7320 sin_t *sin = (sin_t *)sa; 7321 sin6_t *sin6 = (sin6_t *)sa; 7322 7323 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7324 7325 switch (udp->udp_family) { 7326 case AF_INET: 7327 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7328 7329 if (*salenp < sizeof (sin_t)) 7330 return (EINVAL); 7331 7332 *salenp = sizeof (sin_t); 7333 *sin = sin_null; 7334 sin->sin_family = AF_INET; 7335 sin->sin_port = udp->udp_port; 7336 7337 /* 7338 * If udp_v6src is unspecified, we might be bound to broadcast 7339 * / multicast. Use udp_bound_v6src as local address instead 7340 * (that could also still be unspecified). 7341 */ 7342 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7343 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7344 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 7345 } else { 7346 sin->sin_addr.s_addr = 7347 V4_PART_OF_V6(udp->udp_bound_v6src); 7348 } 7349 break; 7350 7351 case AF_INET6: 7352 if (*salenp < sizeof (sin6_t)) 7353 return (EINVAL); 7354 7355 *salenp = sizeof (sin6_t); 7356 *sin6 = sin6_null; 7357 sin6->sin6_family = AF_INET6; 7358 sin6->sin6_port = udp->udp_port; 7359 sin6->sin6_flowinfo = udp->udp_flowinfo; 7360 7361 /* 7362 * If udp_v6src is unspecified, we might be bound to broadcast 7363 * / multicast. Use udp_bound_v6src as local address instead 7364 * (that could also still be unspecified). 7365 */ 7366 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 7367 sin6->sin6_addr = udp->udp_v6src; 7368 else 7369 sin6->sin6_addr = udp->udp_bound_v6src; 7370 break; 7371 } 7372 7373 return (0); 7374 } 7375 7376 /* 7377 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 7378 */ 7379 static void 7380 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 7381 { 7382 void *data; 7383 mblk_t *datamp = mp->b_cont; 7384 udp_t *udp = Q_TO_UDP(q); 7385 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 7386 7387 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 7388 cmdp->cb_error = EPROTO; 7389 qreply(q, mp); 7390 return; 7391 } 7392 data = datamp->b_rptr; 7393 7394 rw_enter(&udp->udp_rwlock, RW_READER); 7395 switch (cmdp->cb_cmd) { 7396 case TI_GETPEERNAME: 7397 cmdp->cb_error = udp_getpeername(udp, data, &cmdp->cb_len); 7398 break; 7399 case TI_GETMYNAME: 7400 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 7401 break; 7402 default: 7403 cmdp->cb_error = EINVAL; 7404 break; 7405 } 7406 rw_exit(&udp->udp_rwlock); 7407 7408 qreply(q, mp); 7409 } 7410 7411 static void 7412 udp_wput_other(queue_t *q, mblk_t *mp) 7413 { 7414 uchar_t *rptr = mp->b_rptr; 7415 struct datab *db; 7416 struct iocblk *iocp; 7417 cred_t *cr; 7418 conn_t *connp = Q_TO_CONN(q); 7419 udp_t *udp = connp->conn_udp; 7420 udp_stack_t *us; 7421 7422 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7423 "udp_wput_other_start: q %p", q); 7424 7425 us = udp->udp_us; 7426 db = mp->b_datap; 7427 7428 cr = DB_CREDDEF(mp, connp->conn_cred); 7429 7430 switch (db->db_type) { 7431 case M_CMD: 7432 udp_wput_cmdblk(q, mp); 7433 return; 7434 7435 case M_PROTO: 7436 case M_PCPROTO: 7437 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7438 freemsg(mp); 7439 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7440 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7441 return; 7442 } 7443 switch (((t_primp_t)rptr)->type) { 7444 case T_ADDR_REQ: 7445 udp_addr_req(q, mp); 7446 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7447 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7448 return; 7449 case O_T_BIND_REQ: 7450 case T_BIND_REQ: 7451 udp_bind(q, mp); 7452 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7453 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7454 return; 7455 case T_CONN_REQ: 7456 udp_connect(q, mp); 7457 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7458 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7459 return; 7460 case T_CAPABILITY_REQ: 7461 udp_capability_req(q, mp); 7462 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7463 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7464 return; 7465 case T_INFO_REQ: 7466 udp_info_req(q, mp); 7467 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7468 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7469 return; 7470 case T_UNITDATA_REQ: 7471 /* 7472 * If a T_UNITDATA_REQ gets here, the address must 7473 * be bad. Valid T_UNITDATA_REQs are handled 7474 * in udp_wput. 7475 */ 7476 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7477 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7478 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7479 return; 7480 case T_UNBIND_REQ: 7481 udp_unbind(q, mp); 7482 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7483 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7484 return; 7485 case T_SVR4_OPTMGMT_REQ: 7486 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7487 cr)) { 7488 (void) svr4_optcom_req(q, 7489 mp, cr, &udp_opt_obj, B_TRUE); 7490 } 7491 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7492 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7493 return; 7494 7495 case T_OPTMGMT_REQ: 7496 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7497 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7498 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7499 return; 7500 7501 case T_DISCON_REQ: 7502 udp_disconnect(q, mp); 7503 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7504 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7505 return; 7506 7507 /* The following TPI message is not supported by udp. */ 7508 case O_T_CONN_RES: 7509 case T_CONN_RES: 7510 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7511 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7512 "udp_wput_other_end: q %p (%S)", q, 7513 "connres/disconreq"); 7514 return; 7515 7516 /* The following 3 TPI messages are illegal for udp. */ 7517 case T_DATA_REQ: 7518 case T_EXDATA_REQ: 7519 case T_ORDREL_REQ: 7520 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7521 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7522 "udp_wput_other_end: q %p (%S)", q, 7523 "data/exdata/ordrel"); 7524 return; 7525 default: 7526 break; 7527 } 7528 break; 7529 case M_FLUSH: 7530 if (*rptr & FLUSHW) 7531 flushq(q, FLUSHDATA); 7532 break; 7533 case M_IOCTL: 7534 iocp = (struct iocblk *)mp->b_rptr; 7535 switch (iocp->ioc_cmd) { 7536 case TI_GETPEERNAME: 7537 if (udp->udp_state != TS_DATA_XFER) { 7538 /* 7539 * If a default destination address has not 7540 * been associated with the stream, then we 7541 * don't know the peer's name. 7542 */ 7543 iocp->ioc_error = ENOTCONN; 7544 iocp->ioc_count = 0; 7545 mp->b_datap->db_type = M_IOCACK; 7546 qreply(q, mp); 7547 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7548 "udp_wput_other_end: q %p (%S)", q, 7549 "getpeername"); 7550 return; 7551 } 7552 /* FALLTHRU */ 7553 case TI_GETMYNAME: { 7554 /* 7555 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7556 * need to copyin the user's strbuf structure. 7557 * Processing will continue in the M_IOCDATA case 7558 * below. 7559 */ 7560 mi_copyin(q, mp, NULL, 7561 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7562 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7563 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7564 return; 7565 } 7566 case ND_SET: 7567 /* nd_getset performs the necessary checking */ 7568 case ND_GET: 7569 if (nd_getset(q, us->us_nd, mp)) { 7570 qreply(q, mp); 7571 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7572 "udp_wput_other_end: q %p (%S)", q, "get"); 7573 return; 7574 } 7575 break; 7576 case _SIOCSOCKFALLBACK: 7577 /* 7578 * Either sockmod is about to be popped and the 7579 * socket would now be treated as a plain stream, 7580 * or a module is about to be pushed so we could 7581 * no longer use read-side synchronous stream. 7582 * Drain any queued data and disable direct sockfs 7583 * interface from now on. 7584 */ 7585 if (!udp->udp_issocket) { 7586 DB_TYPE(mp) = M_IOCNAK; 7587 iocp->ioc_error = EINVAL; 7588 } else { 7589 udp->udp_issocket = B_FALSE; 7590 if (udp->udp_direct_sockfs) { 7591 /* 7592 * Disable read-side synchronous 7593 * stream interface and drain any 7594 * queued data. 7595 */ 7596 udp_rcv_drain(RD(q), udp, 7597 B_FALSE); 7598 ASSERT(!udp->udp_direct_sockfs); 7599 UDP_STAT(us, udp_sock_fallback); 7600 } 7601 DB_TYPE(mp) = M_IOCACK; 7602 iocp->ioc_error = 0; 7603 } 7604 iocp->ioc_count = 0; 7605 iocp->ioc_rval = 0; 7606 qreply(q, mp); 7607 return; 7608 default: 7609 break; 7610 } 7611 break; 7612 case M_IOCDATA: 7613 udp_wput_iocdata(q, mp); 7614 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7615 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7616 return; 7617 default: 7618 /* Unrecognized messages are passed through without change. */ 7619 break; 7620 } 7621 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7622 "udp_wput_other_end: q %p (%S)", q, "end"); 7623 ip_output(connp, mp, q, IP_WPUT); 7624 } 7625 7626 /* 7627 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7628 * messages. 7629 */ 7630 static void 7631 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7632 { 7633 mblk_t *mp1; 7634 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7635 STRUCT_HANDLE(strbuf, sb); 7636 udp_t *udp = Q_TO_UDP(q); 7637 int error; 7638 uint_t addrlen; 7639 7640 /* Make sure it is one of ours. */ 7641 switch (iocp->ioc_cmd) { 7642 case TI_GETMYNAME: 7643 case TI_GETPEERNAME: 7644 break; 7645 default: 7646 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7647 return; 7648 } 7649 7650 switch (mi_copy_state(q, mp, &mp1)) { 7651 case -1: 7652 return; 7653 case MI_COPY_CASE(MI_COPY_IN, 1): 7654 break; 7655 case MI_COPY_CASE(MI_COPY_OUT, 1): 7656 /* 7657 * The address has been copied out, so now 7658 * copyout the strbuf. 7659 */ 7660 mi_copyout(q, mp); 7661 return; 7662 case MI_COPY_CASE(MI_COPY_OUT, 2): 7663 /* 7664 * The address and strbuf have been copied out. 7665 * We're done, so just acknowledge the original 7666 * M_IOCTL. 7667 */ 7668 mi_copy_done(q, mp, 0); 7669 return; 7670 default: 7671 /* 7672 * Something strange has happened, so acknowledge 7673 * the original M_IOCTL with an EPROTO error. 7674 */ 7675 mi_copy_done(q, mp, EPROTO); 7676 return; 7677 } 7678 7679 /* 7680 * Now we have the strbuf structure for TI_GETMYNAME 7681 * and TI_GETPEERNAME. Next we copyout the requested 7682 * address and then we'll copyout the strbuf. 7683 */ 7684 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7685 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7686 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7687 mi_copy_done(q, mp, EINVAL); 7688 return; 7689 } 7690 7691 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7692 if (mp1 == NULL) 7693 return; 7694 7695 rw_enter(&udp->udp_rwlock, RW_READER); 7696 switch (iocp->ioc_cmd) { 7697 case TI_GETMYNAME: 7698 error = udp_getmyname(udp, (void *)mp1->b_rptr, &addrlen); 7699 break; 7700 case TI_GETPEERNAME: 7701 error = udp_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7702 break; 7703 } 7704 rw_exit(&udp->udp_rwlock); 7705 7706 if (error != 0) { 7707 mi_copy_done(q, mp, error); 7708 } else { 7709 mp1->b_wptr += addrlen; 7710 STRUCT_FSET(sb, len, addrlen); 7711 7712 /* Copy out the address */ 7713 mi_copyout(q, mp); 7714 } 7715 } 7716 7717 static int 7718 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7719 udpattrs_t *udpattrs) 7720 { 7721 struct T_unitdata_req *udreqp; 7722 int is_absreq_failure; 7723 cred_t *cr; 7724 conn_t *connp = Q_TO_CONN(q); 7725 7726 ASSERT(((t_primp_t)mp->b_rptr)->type); 7727 7728 cr = DB_CREDDEF(mp, connp->conn_cred); 7729 7730 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7731 7732 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7733 udreqp->OPT_offset, cr, &udp_opt_obj, 7734 udpattrs, &is_absreq_failure); 7735 7736 if (*errorp != 0) { 7737 /* 7738 * Note: No special action needed in this 7739 * module for "is_absreq_failure" 7740 */ 7741 return (-1); /* failure */ 7742 } 7743 ASSERT(is_absreq_failure == 0); 7744 return (0); /* success */ 7745 } 7746 7747 void 7748 udp_ddi_init(void) 7749 { 7750 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7751 udp_opt_obj.odb_opt_arr_cnt); 7752 7753 /* 7754 * We want to be informed each time a stack is created or 7755 * destroyed in the kernel, so we can maintain the 7756 * set of udp_stack_t's. 7757 */ 7758 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7759 } 7760 7761 void 7762 udp_ddi_destroy(void) 7763 { 7764 netstack_unregister(NS_UDP); 7765 } 7766 7767 /* 7768 * Initialize the UDP stack instance. 7769 */ 7770 static void * 7771 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7772 { 7773 udp_stack_t *us; 7774 udpparam_t *pa; 7775 int i; 7776 7777 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7778 us->us_netstack = ns; 7779 7780 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7781 us->us_epriv_ports[0] = 2049; 7782 us->us_epriv_ports[1] = 4045; 7783 7784 /* 7785 * The smallest anonymous port in the priviledged port range which UDP 7786 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7787 */ 7788 us->us_min_anonpriv_port = 512; 7789 7790 us->us_bind_fanout_size = udp_bind_fanout_size; 7791 7792 /* Roundup variable that might have been modified in /etc/system */ 7793 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7794 /* Not a power of two. Round up to nearest power of two */ 7795 for (i = 0; i < 31; i++) { 7796 if (us->us_bind_fanout_size < (1 << i)) 7797 break; 7798 } 7799 us->us_bind_fanout_size = 1 << i; 7800 } 7801 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7802 sizeof (udp_fanout_t), KM_SLEEP); 7803 for (i = 0; i < us->us_bind_fanout_size; i++) { 7804 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7805 NULL); 7806 } 7807 7808 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7809 7810 us->us_param_arr = pa; 7811 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7812 7813 (void) udp_param_register(&us->us_nd, 7814 us->us_param_arr, A_CNT(udp_param_arr)); 7815 7816 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7817 us->us_mibkp = udp_kstat_init(stackid); 7818 return (us); 7819 } 7820 7821 /* 7822 * Free the UDP stack instance. 7823 */ 7824 static void 7825 udp_stack_fini(netstackid_t stackid, void *arg) 7826 { 7827 udp_stack_t *us = (udp_stack_t *)arg; 7828 int i; 7829 7830 for (i = 0; i < us->us_bind_fanout_size; i++) { 7831 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7832 } 7833 7834 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7835 sizeof (udp_fanout_t)); 7836 7837 us->us_bind_fanout = NULL; 7838 7839 nd_free(&us->us_nd); 7840 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7841 us->us_param_arr = NULL; 7842 7843 udp_kstat_fini(stackid, us->us_mibkp); 7844 us->us_mibkp = NULL; 7845 7846 udp_kstat2_fini(stackid, us->us_kstat); 7847 us->us_kstat = NULL; 7848 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7849 kmem_free(us, sizeof (*us)); 7850 } 7851 7852 static void * 7853 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7854 { 7855 kstat_t *ksp; 7856 7857 udp_stat_t template = { 7858 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7859 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7860 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7861 { "udp_drain", KSTAT_DATA_UINT64 }, 7862 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7863 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7864 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7865 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7866 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7867 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7868 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7869 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7870 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7871 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7872 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7873 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7874 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7875 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7876 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7877 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7878 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7879 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7880 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7881 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7882 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7883 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7884 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7885 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7886 #ifdef DEBUG 7887 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7888 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7889 #endif 7890 }; 7891 7892 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7893 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7894 KSTAT_FLAG_VIRTUAL, stackid); 7895 7896 if (ksp == NULL) 7897 return (NULL); 7898 7899 bcopy(&template, us_statisticsp, sizeof (template)); 7900 ksp->ks_data = (void *)us_statisticsp; 7901 ksp->ks_private = (void *)(uintptr_t)stackid; 7902 7903 kstat_install(ksp); 7904 return (ksp); 7905 } 7906 7907 static void 7908 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7909 { 7910 if (ksp != NULL) { 7911 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7912 kstat_delete_netstack(ksp, stackid); 7913 } 7914 } 7915 7916 static void * 7917 udp_kstat_init(netstackid_t stackid) 7918 { 7919 kstat_t *ksp; 7920 7921 udp_named_kstat_t template = { 7922 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7923 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7924 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7925 { "entrySize", KSTAT_DATA_INT32, 0 }, 7926 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7927 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7928 }; 7929 7930 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7931 KSTAT_TYPE_NAMED, 7932 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7933 7934 if (ksp == NULL || ksp->ks_data == NULL) 7935 return (NULL); 7936 7937 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7938 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7939 7940 bcopy(&template, ksp->ks_data, sizeof (template)); 7941 ksp->ks_update = udp_kstat_update; 7942 ksp->ks_private = (void *)(uintptr_t)stackid; 7943 7944 kstat_install(ksp); 7945 return (ksp); 7946 } 7947 7948 static void 7949 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7950 { 7951 if (ksp != NULL) { 7952 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7953 kstat_delete_netstack(ksp, stackid); 7954 } 7955 } 7956 7957 static int 7958 udp_kstat_update(kstat_t *kp, int rw) 7959 { 7960 udp_named_kstat_t *udpkp; 7961 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7962 netstack_t *ns; 7963 udp_stack_t *us; 7964 7965 if ((kp == NULL) || (kp->ks_data == NULL)) 7966 return (EIO); 7967 7968 if (rw == KSTAT_WRITE) 7969 return (EACCES); 7970 7971 ns = netstack_find_by_stackid(stackid); 7972 if (ns == NULL) 7973 return (-1); 7974 us = ns->netstack_udp; 7975 if (us == NULL) { 7976 netstack_rele(ns); 7977 return (-1); 7978 } 7979 udpkp = (udp_named_kstat_t *)kp->ks_data; 7980 7981 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7982 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7983 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7984 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7985 netstack_rele(ns); 7986 return (0); 7987 } 7988 7989 /* 7990 * Read-side synchronous stream info entry point, called as a 7991 * result of handling certain STREAMS ioctl operations. 7992 */ 7993 static int 7994 udp_rinfop(queue_t *q, infod_t *dp) 7995 { 7996 mblk_t *mp; 7997 uint_t cmd = dp->d_cmd; 7998 int res = 0; 7999 int error = 0; 8000 udp_t *udp = Q_TO_UDP(q); 8001 struct stdata *stp = STREAM(q); 8002 8003 mutex_enter(&udp->udp_drain_lock); 8004 /* If shutdown on read has happened, return nothing */ 8005 mutex_enter(&stp->sd_lock); 8006 if (stp->sd_flag & STREOF) { 8007 mutex_exit(&stp->sd_lock); 8008 goto done; 8009 } 8010 mutex_exit(&stp->sd_lock); 8011 8012 if ((mp = udp->udp_rcv_list_head) == NULL) 8013 goto done; 8014 8015 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8016 8017 if (cmd & INFOD_COUNT) { 8018 /* 8019 * Return the number of messages. 8020 */ 8021 dp->d_count += udp->udp_rcv_msgcnt; 8022 res |= INFOD_COUNT; 8023 } 8024 if (cmd & INFOD_BYTES) { 8025 /* 8026 * Return size of all data messages. 8027 */ 8028 dp->d_bytes += udp->udp_rcv_cnt; 8029 res |= INFOD_BYTES; 8030 } 8031 if (cmd & INFOD_FIRSTBYTES) { 8032 /* 8033 * Return size of first data message. 8034 */ 8035 dp->d_bytes = msgdsize(mp); 8036 res |= INFOD_FIRSTBYTES; 8037 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8038 } 8039 if (cmd & INFOD_COPYOUT) { 8040 mblk_t *mp1 = mp->b_cont; 8041 int n; 8042 /* 8043 * Return data contents of first message. 8044 */ 8045 ASSERT(DB_TYPE(mp1) == M_DATA); 8046 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8047 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8048 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8049 UIO_READ, dp->d_uiop)) != 0) { 8050 goto done; 8051 } 8052 mp1 = mp1->b_cont; 8053 } 8054 res |= INFOD_COPYOUT; 8055 dp->d_cmd &= ~INFOD_COPYOUT; 8056 } 8057 done: 8058 mutex_exit(&udp->udp_drain_lock); 8059 8060 dp->d_res |= res; 8061 8062 return (error); 8063 } 8064 8065 /* 8066 * Read-side synchronous stream entry point. This is called as a result 8067 * of recv/read operation done at sockfs, and is guaranteed to execute 8068 * outside of the interrupt thread context. It returns a single datagram 8069 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8070 */ 8071 static int 8072 udp_rrw(queue_t *q, struiod_t *dp) 8073 { 8074 mblk_t *mp; 8075 udp_t *udp = Q_TO_UDP(q); 8076 udp_stack_t *us = udp->udp_us; 8077 8078 /* 8079 * Dequeue datagram from the head of the list and return 8080 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8081 * set/cleared depending on whether or not there's data 8082 * remaining in the list. 8083 */ 8084 mutex_enter(&udp->udp_drain_lock); 8085 if (!udp->udp_direct_sockfs) { 8086 mutex_exit(&udp->udp_drain_lock); 8087 UDP_STAT(us, udp_rrw_busy); 8088 return (EBUSY); 8089 } 8090 if ((mp = udp->udp_rcv_list_head) != NULL) { 8091 uint_t size = msgdsize(mp); 8092 8093 /* Last datagram in the list? */ 8094 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8095 udp->udp_rcv_list_tail = NULL; 8096 mp->b_next = NULL; 8097 8098 udp->udp_rcv_cnt -= size; 8099 udp->udp_rcv_msgcnt--; 8100 UDP_STAT(us, udp_rrw_msgcnt); 8101 8102 /* No longer flow-controlling? */ 8103 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8104 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8105 udp->udp_drain_qfull = B_FALSE; 8106 } 8107 if (udp->udp_rcv_list_head == NULL) { 8108 /* 8109 * Either we just dequeued the last datagram or 8110 * we get here from sockfs and have nothing to 8111 * return; in this case clear RSLEEP. 8112 */ 8113 ASSERT(udp->udp_rcv_cnt == 0); 8114 ASSERT(udp->udp_rcv_msgcnt == 0); 8115 ASSERT(udp->udp_rcv_list_tail == NULL); 8116 STR_WAKEUP_CLEAR(STREAM(q)); 8117 } else { 8118 /* 8119 * More data follows; we need udp_rrw() to be 8120 * called in future to pick up the rest. 8121 */ 8122 STR_WAKEUP_SET(STREAM(q)); 8123 } 8124 mutex_exit(&udp->udp_drain_lock); 8125 dp->d_mp = mp; 8126 return (0); 8127 } 8128 8129 /* 8130 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8131 * list; this is typically executed within the interrupt thread context 8132 * and so we do things as quickly as possible. 8133 */ 8134 static void 8135 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8136 { 8137 ASSERT(q == RD(q)); 8138 ASSERT(pkt_len == msgdsize(mp)); 8139 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8140 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8141 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8142 8143 mutex_enter(&udp->udp_drain_lock); 8144 /* 8145 * Wake up and signal the receiving app; it is okay to do this 8146 * before enqueueing the mp because we are holding the drain lock. 8147 * One of the advantages of synchronous stream is the ability for 8148 * us to find out when the application performs a read on the 8149 * socket by way of udp_rrw() entry point being called. We need 8150 * to generate SIGPOLL/SIGIO for each received data in the case 8151 * of asynchronous socket just as in the strrput() case. However, 8152 * we only wake the application up when necessary, i.e. during the 8153 * first enqueue. When udp_rrw() is called, we send up a single 8154 * datagram upstream and call STR_WAKEUP_SET() again when there 8155 * are still data remaining in our receive queue. 8156 */ 8157 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 8158 if (udp->udp_rcv_list_head == NULL) 8159 udp->udp_rcv_list_head = mp; 8160 else 8161 udp->udp_rcv_list_tail->b_next = mp; 8162 udp->udp_rcv_list_tail = mp; 8163 udp->udp_rcv_cnt += pkt_len; 8164 udp->udp_rcv_msgcnt++; 8165 8166 /* Need to flow-control? */ 8167 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8168 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8169 udp->udp_drain_qfull = B_TRUE; 8170 8171 mutex_exit(&udp->udp_drain_lock); 8172 } 8173 8174 /* 8175 * Drain the contents of receive list to the module upstream; we do 8176 * this during close or when we fallback to the slow mode due to 8177 * sockmod being popped or a module being pushed on top of us. 8178 */ 8179 static void 8180 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8181 { 8182 mblk_t *mp; 8183 udp_stack_t *us = udp->udp_us; 8184 8185 ASSERT(q == RD(q)); 8186 8187 mutex_enter(&udp->udp_drain_lock); 8188 /* 8189 * There is no race with a concurrent udp_input() sending 8190 * up packets using putnext() after we have cleared the 8191 * udp_direct_sockfs flag but before we have completed 8192 * sending up the packets in udp_rcv_list, since we are 8193 * either a writer or we have quiesced the conn. 8194 */ 8195 udp->udp_direct_sockfs = B_FALSE; 8196 mutex_exit(&udp->udp_drain_lock); 8197 8198 if (udp->udp_rcv_list_head != NULL) 8199 UDP_STAT(us, udp_drain); 8200 8201 /* 8202 * Send up everything via putnext(); note here that we 8203 * don't need the udp_drain_lock to protect us since 8204 * nothing can enter udp_rrw() and that we currently 8205 * have exclusive access to this udp. 8206 */ 8207 while ((mp = udp->udp_rcv_list_head) != NULL) { 8208 udp->udp_rcv_list_head = mp->b_next; 8209 mp->b_next = NULL; 8210 udp->udp_rcv_cnt -= msgdsize(mp); 8211 udp->udp_rcv_msgcnt--; 8212 if (closing) { 8213 freemsg(mp); 8214 } else { 8215 putnext(q, mp); 8216 } 8217 } 8218 ASSERT(udp->udp_rcv_cnt == 0); 8219 ASSERT(udp->udp_rcv_msgcnt == 0); 8220 ASSERT(udp->udp_rcv_list_head == NULL); 8221 udp->udp_rcv_list_tail = NULL; 8222 udp->udp_drain_qfull = B_FALSE; 8223 } 8224 8225 static size_t 8226 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8227 { 8228 udp_stack_t *us = udp->udp_us; 8229 8230 /* We add a bit of extra buffering */ 8231 size += size >> 1; 8232 if (size > us->us_max_buf) 8233 size = us->us_max_buf; 8234 8235 udp->udp_rcv_hiwat = size; 8236 return (size); 8237 } 8238 8239 /* 8240 * For the lower queue so that UDP can be a dummy mux. 8241 * Nobody should be sending 8242 * packets up this stream 8243 */ 8244 static void 8245 udp_lrput(queue_t *q, mblk_t *mp) 8246 { 8247 mblk_t *mp1; 8248 8249 switch (mp->b_datap->db_type) { 8250 case M_FLUSH: 8251 /* Turn around */ 8252 if (*mp->b_rptr & FLUSHW) { 8253 *mp->b_rptr &= ~FLUSHR; 8254 qreply(q, mp); 8255 return; 8256 } 8257 break; 8258 } 8259 /* Could receive messages that passed through ar_rput */ 8260 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 8261 mp1->b_prev = mp1->b_next = NULL; 8262 freemsg(mp); 8263 } 8264 8265 /* 8266 * For the lower queue so that UDP can be a dummy mux. 8267 * Nobody should be sending packets down this stream. 8268 */ 8269 /* ARGSUSED */ 8270 void 8271 udp_lwput(queue_t *q, mblk_t *mp) 8272 { 8273 freemsg(mp); 8274 } 8275