1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 102 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 103 * We also use conn_lock when updating things that affect the IP classifier 104 * lookup. 105 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 106 * 107 * The fanout lock uf_lock: 108 * When a UDP endpoint is bound to a local port, it is inserted into 109 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 110 * The size of the array is controlled by the udp_bind_fanout_size variable. 111 * This variable can be changed in /etc/system if the default value is 112 * not large enough. Each bind hash bucket is protected by a per bucket 113 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 114 * structure and a few other fields in the udp_t. A UDP endpoint is removed 115 * from the bind hash list only when it is being unbound or being closed. 116 * The per bucket lock also protects a UDP endpoint's state changes. 117 * 118 * The udp_rwlock: 119 * This protects most of the other fields in the udp_t. The exact list of 120 * fields which are protected by each of the above locks is documented in 121 * the udp_t structure definition. 122 * 123 * Plumbing notes: 124 * UDP is always a device driver. For compatibility with mibopen() code 125 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 126 * dummy module. 127 * 128 * The above implies that we don't support any intermediate module to 129 * reside in between /dev/ip and udp -- in fact, we never supported such 130 * scenario in the past as the inter-layer communication semantics have 131 * always been private. 132 */ 133 134 /* For /etc/system control */ 135 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 136 137 #define NDD_TOO_QUICK_MSG \ 138 "ndd get info rate too high for non-privileged users, try again " \ 139 "later.\n" 140 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 141 142 /* Option processing attrs */ 143 typedef struct udpattrs_s { 144 union { 145 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 146 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 147 } udpattr_ippu; 148 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 149 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 150 mblk_t *udpattr_mb; 151 boolean_t udpattr_credset; 152 } udpattrs_t; 153 154 static void udp_addr_req(queue_t *q, mblk_t *mp); 155 static void udp_bind(queue_t *q, mblk_t *mp); 156 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 157 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 158 static void udp_bind_result(conn_t *, mblk_t *); 159 static void udp_bind_ack(conn_t *, mblk_t *mp); 160 static void udp_bind_error(conn_t *, mblk_t *mp); 161 static int udp_build_hdrs(udp_t *udp); 162 static void udp_capability_req(queue_t *q, mblk_t *mp); 163 static int udp_close(queue_t *q); 164 static void udp_connect(queue_t *q, mblk_t *mp); 165 static void udp_disconnect(queue_t *q, mblk_t *mp); 166 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 167 int sys_error); 168 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 169 t_scalar_t tlierr, int unixerr); 170 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 171 cred_t *cr); 172 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 173 char *value, caddr_t cp, cred_t *cr); 174 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 175 char *value, caddr_t cp, cred_t *cr); 176 static void udp_icmp_error(queue_t *q, mblk_t *mp); 177 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 178 static void udp_info_req(queue_t *q, mblk_t *mp); 179 static void udp_input(void *, mblk_t *, void *); 180 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 181 t_scalar_t addr_length); 182 static void udp_lrput(queue_t *, mblk_t *); 183 static void udp_lwput(queue_t *, mblk_t *); 184 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp, boolean_t isv6); 186 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 187 cred_t *credp); 188 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 189 cred_t *credp); 190 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 191 int *errorp, udpattrs_t *udpattrs); 192 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 193 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 194 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 195 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 196 cred_t *cr); 197 static void udp_report_item(mblk_t *mp, udp_t *udp); 198 static int udp_rinfop(queue_t *q, infod_t *dp); 199 static int udp_rrw(queue_t *q, struiod_t *dp); 200 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 201 cred_t *cr); 202 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 203 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 204 t_scalar_t destlen, t_scalar_t err); 205 static void udp_unbind(queue_t *q, mblk_t *mp); 206 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 207 boolean_t random); 208 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 209 int *, boolean_t); 210 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 211 int *error); 212 static void udp_wput_other(queue_t *q, mblk_t *mp); 213 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 214 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 215 216 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 217 static void udp_stack_fini(netstackid_t stackid, void *arg); 218 219 static void *udp_kstat_init(netstackid_t stackid); 220 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 221 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 222 static void udp_kstat2_fini(netstackid_t, kstat_t *); 223 static int udp_kstat_update(kstat_t *kp, int rw); 224 225 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 226 uint_t pkt_len); 227 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 228 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 229 230 #define UDP_RECV_HIWATER (56 * 1024) 231 #define UDP_RECV_LOWATER 128 232 #define UDP_XMIT_HIWATER (56 * 1024) 233 #define UDP_XMIT_LOWATER 1024 234 235 static struct module_info udp_mod_info = { 236 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 237 }; 238 239 /* 240 * Entry points for UDP as a device. 241 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 242 */ 243 static struct qinit udp_rinitv4 = { 244 NULL, NULL, udp_openv4, udp_close, NULL, 245 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 246 }; 247 248 static struct qinit udp_rinitv6 = { 249 NULL, NULL, udp_openv6, udp_close, NULL, 250 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 251 }; 252 253 static struct qinit udp_winit = { 254 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 255 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 256 }; 257 258 /* 259 * UDP needs to handle I_LINK and I_PLINK since ifconfig 260 * likes to use it as a place to hang the various streams. 261 */ 262 static struct qinit udp_lrinit = { 263 (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, 264 &udp_mod_info 265 }; 266 267 static struct qinit udp_lwinit = { 268 (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, 269 &udp_mod_info 270 }; 271 272 /* For AF_INET aka /dev/udp */ 273 struct streamtab udpinfov4 = { 274 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 275 }; 276 277 /* For AF_INET6 aka /dev/udp6 */ 278 struct streamtab udpinfov6 = { 279 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 280 }; 281 282 static sin_t sin_null; /* Zero address for quick clears */ 283 static sin6_t sin6_null; /* Zero address for quick clears */ 284 285 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 286 287 /* Default structure copied into T_INFO_ACK messages */ 288 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 289 T_INFO_ACK, 290 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 291 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 292 T_INVALID, /* CDATA_size. udp does not support connect data. */ 293 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 294 sizeof (sin_t), /* ADDR_size. */ 295 0, /* OPT_size - not initialized here */ 296 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 297 T_CLTS, /* SERV_type. udp supports connection-less. */ 298 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 299 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 300 }; 301 302 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 303 304 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 305 T_INFO_ACK, 306 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 307 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 308 T_INVALID, /* CDATA_size. udp does not support connect data. */ 309 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 310 sizeof (sin6_t), /* ADDR_size. */ 311 0, /* OPT_size - not initialized here */ 312 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 313 T_CLTS, /* SERV_type. udp supports connection-less. */ 314 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 315 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 316 }; 317 318 /* largest UDP port number */ 319 #define UDP_MAX_PORT 65535 320 321 /* 322 * Table of ND variables supported by udp. These are loaded into us_nd 323 * in udp_open. 324 * All of these are alterable, within the min/max values given, at run time. 325 */ 326 /* BEGIN CSTYLED */ 327 udpparam_t udp_param_arr[] = { 328 /*min max value name */ 329 { 0L, 256, 32, "udp_wroff_extra" }, 330 { 1L, 255, 255, "udp_ipv4_ttl" }, 331 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 332 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 333 { 0, 1, 1, "udp_do_checksum" }, 334 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 335 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 336 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 337 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 338 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 339 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 340 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 341 }; 342 /* END CSTYLED */ 343 344 /* Setable in /etc/system */ 345 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 346 uint32_t udp_random_anon_port = 1; 347 348 /* 349 * Hook functions to enable cluster networking. 350 * On non-clustered systems these vectors must always be NULL 351 */ 352 353 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 354 uint8_t *laddrp, in_port_t lport) = NULL; 355 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 356 uint8_t *laddrp, in_port_t lport) = NULL; 357 358 typedef union T_primitives *t_primp_t; 359 360 /* 361 * Return the next anonymous port in the privileged port range for 362 * bind checking. 363 * 364 * Trusted Extension (TX) notes: TX allows administrator to mark or 365 * reserve ports as Multilevel ports (MLP). MLP has special function 366 * on TX systems. Once a port is made MLP, it's not available as 367 * ordinary port. This creates "holes" in the port name space. It 368 * may be necessary to skip the "holes" find a suitable anon port. 369 */ 370 static in_port_t 371 udp_get_next_priv_port(udp_t *udp) 372 { 373 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 374 in_port_t nextport; 375 boolean_t restart = B_FALSE; 376 udp_stack_t *us = udp->udp_us; 377 378 retry: 379 if (next_priv_port < us->us_min_anonpriv_port || 380 next_priv_port >= IPPORT_RESERVED) { 381 next_priv_port = IPPORT_RESERVED - 1; 382 if (restart) 383 return (0); 384 restart = B_TRUE; 385 } 386 387 if (is_system_labeled() && 388 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 389 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 390 next_priv_port = nextport; 391 goto retry; 392 } 393 394 return (next_priv_port--); 395 } 396 397 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 398 /* ARGSUSED */ 399 static int 400 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 401 { 402 udp_fanout_t *udpf; 403 int i; 404 zoneid_t zoneid; 405 conn_t *connp; 406 udp_t *udp; 407 udp_stack_t *us; 408 409 connp = Q_TO_CONN(q); 410 udp = connp->conn_udp; 411 us = udp->udp_us; 412 413 /* Refer to comments in udp_status_report(). */ 414 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 415 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 416 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 417 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 418 return (0); 419 } 420 } 421 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 422 /* The following may work even if we cannot get a large buf. */ 423 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 424 return (0); 425 } 426 427 (void) mi_mpprintf(mp, 428 "UDP " MI_COL_HDRPAD_STR 429 /* 12345678[89ABCDEF] */ 430 " zone lport src addr dest addr port state"); 431 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 432 433 zoneid = connp->conn_zoneid; 434 435 for (i = 0; i < us->us_bind_fanout_size; i++) { 436 udpf = &us->us_bind_fanout[i]; 437 mutex_enter(&udpf->uf_lock); 438 439 /* Print the hash index. */ 440 udp = udpf->uf_udp; 441 if (zoneid != GLOBAL_ZONEID) { 442 /* skip to first entry in this zone; might be none */ 443 while (udp != NULL && 444 udp->udp_connp->conn_zoneid != zoneid) 445 udp = udp->udp_bind_hash; 446 } 447 if (udp != NULL) { 448 uint_t print_len, buf_len; 449 450 buf_len = mp->b_cont->b_datap->db_lim - 451 mp->b_cont->b_wptr; 452 print_len = snprintf((char *)mp->b_cont->b_wptr, 453 buf_len, "%d\n", i); 454 if (print_len < buf_len) { 455 mp->b_cont->b_wptr += print_len; 456 } else { 457 mp->b_cont->b_wptr += buf_len; 458 } 459 for (; udp != NULL; udp = udp->udp_bind_hash) { 460 if (zoneid == GLOBAL_ZONEID || 461 zoneid == udp->udp_connp->conn_zoneid) 462 udp_report_item(mp->b_cont, udp); 463 } 464 } 465 mutex_exit(&udpf->uf_lock); 466 } 467 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 468 return (0); 469 } 470 471 /* 472 * Hash list removal routine for udp_t structures. 473 */ 474 static void 475 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 476 { 477 udp_t *udpnext; 478 kmutex_t *lockp; 479 udp_stack_t *us = udp->udp_us; 480 481 if (udp->udp_ptpbhn == NULL) 482 return; 483 484 /* 485 * Extract the lock pointer in case there are concurrent 486 * hash_remove's for this instance. 487 */ 488 ASSERT(udp->udp_port != 0); 489 if (!caller_holds_lock) { 490 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 491 us->us_bind_fanout_size)].uf_lock; 492 ASSERT(lockp != NULL); 493 mutex_enter(lockp); 494 } 495 if (udp->udp_ptpbhn != NULL) { 496 udpnext = udp->udp_bind_hash; 497 if (udpnext != NULL) { 498 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 499 udp->udp_bind_hash = NULL; 500 } 501 *udp->udp_ptpbhn = udpnext; 502 udp->udp_ptpbhn = NULL; 503 } 504 if (!caller_holds_lock) { 505 mutex_exit(lockp); 506 } 507 } 508 509 static void 510 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 511 { 512 udp_t **udpp; 513 udp_t *udpnext; 514 515 ASSERT(MUTEX_HELD(&uf->uf_lock)); 516 ASSERT(udp->udp_ptpbhn == NULL); 517 udpp = &uf->uf_udp; 518 udpnext = udpp[0]; 519 if (udpnext != NULL) { 520 /* 521 * If the new udp bound to the INADDR_ANY address 522 * and the first one in the list is not bound to 523 * INADDR_ANY we skip all entries until we find the 524 * first one bound to INADDR_ANY. 525 * This makes sure that applications binding to a 526 * specific address get preference over those binding to 527 * INADDR_ANY. 528 */ 529 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 530 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 531 while ((udpnext = udpp[0]) != NULL && 532 !V6_OR_V4_INADDR_ANY( 533 udpnext->udp_bound_v6src)) { 534 udpp = &(udpnext->udp_bind_hash); 535 } 536 if (udpnext != NULL) 537 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 538 } else { 539 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 540 } 541 } 542 udp->udp_bind_hash = udpnext; 543 udp->udp_ptpbhn = udpp; 544 udpp[0] = udp; 545 } 546 547 /* 548 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 549 * passed to udp_wput. 550 * It associates a port number and local address with the stream. 551 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 552 * protocol type (IPPROTO_UDP) placed in the message following the address. 553 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 554 * (Called as writer.) 555 * 556 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 557 * without setting SO_REUSEADDR. This is needed so that they 558 * can be viewed as two independent transport protocols. 559 * However, anonymouns ports are allocated from the same range to avoid 560 * duplicating the us->us_next_port_to_try. 561 */ 562 static void 563 udp_bind(queue_t *q, mblk_t *mp) 564 { 565 sin_t *sin; 566 sin6_t *sin6; 567 mblk_t *mp1; 568 in_port_t port; /* Host byte order */ 569 in_port_t requested_port; /* Host byte order */ 570 struct T_bind_req *tbr; 571 int count; 572 in6_addr_t v6src; 573 boolean_t bind_to_req_port_only; 574 int loopmax; 575 udp_fanout_t *udpf; 576 in_port_t lport; /* Network byte order */ 577 zoneid_t zoneid; 578 conn_t *connp; 579 udp_t *udp; 580 boolean_t is_inaddr_any; 581 mlp_type_t addrtype, mlptype; 582 udp_stack_t *us; 583 584 connp = Q_TO_CONN(q); 585 udp = connp->conn_udp; 586 us = udp->udp_us; 587 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 588 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 589 "udp_bind: bad req, len %u", 590 (uint_t)(mp->b_wptr - mp->b_rptr)); 591 udp_err_ack(q, mp, TPROTO, 0); 592 return; 593 } 594 if (udp->udp_state != TS_UNBND) { 595 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 596 "udp_bind: bad state, %u", udp->udp_state); 597 udp_err_ack(q, mp, TOUTSTATE, 0); 598 return; 599 } 600 /* 601 * Reallocate the message to make sure we have enough room for an 602 * address and the protocol type. 603 */ 604 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 605 if (!mp1) { 606 udp_err_ack(q, mp, TSYSERR, ENOMEM); 607 return; 608 } 609 610 mp = mp1; 611 tbr = (struct T_bind_req *)mp->b_rptr; 612 switch (tbr->ADDR_length) { 613 case 0: /* Request for a generic port */ 614 tbr->ADDR_offset = sizeof (struct T_bind_req); 615 if (udp->udp_family == AF_INET) { 616 tbr->ADDR_length = sizeof (sin_t); 617 sin = (sin_t *)&tbr[1]; 618 *sin = sin_null; 619 sin->sin_family = AF_INET; 620 mp->b_wptr = (uchar_t *)&sin[1]; 621 } else { 622 ASSERT(udp->udp_family == AF_INET6); 623 tbr->ADDR_length = sizeof (sin6_t); 624 sin6 = (sin6_t *)&tbr[1]; 625 *sin6 = sin6_null; 626 sin6->sin6_family = AF_INET6; 627 mp->b_wptr = (uchar_t *)&sin6[1]; 628 } 629 port = 0; 630 break; 631 632 case sizeof (sin_t): /* Complete IPv4 address */ 633 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 634 sizeof (sin_t)); 635 if (sin == NULL || !OK_32PTR((char *)sin)) { 636 udp_err_ack(q, mp, TSYSERR, EINVAL); 637 return; 638 } 639 if (udp->udp_family != AF_INET || 640 sin->sin_family != AF_INET) { 641 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 642 return; 643 } 644 port = ntohs(sin->sin_port); 645 break; 646 647 case sizeof (sin6_t): /* complete IPv6 address */ 648 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 649 sizeof (sin6_t)); 650 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 651 udp_err_ack(q, mp, TSYSERR, EINVAL); 652 return; 653 } 654 if (udp->udp_family != AF_INET6 || 655 sin6->sin6_family != AF_INET6) { 656 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 657 return; 658 } 659 port = ntohs(sin6->sin6_port); 660 break; 661 662 default: /* Invalid request */ 663 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 664 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 665 udp_err_ack(q, mp, TBADADDR, 0); 666 return; 667 } 668 669 requested_port = port; 670 671 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 672 bind_to_req_port_only = B_FALSE; 673 else /* T_BIND_REQ and requested_port != 0 */ 674 bind_to_req_port_only = B_TRUE; 675 676 if (requested_port == 0) { 677 /* 678 * If the application passed in zero for the port number, it 679 * doesn't care which port number we bind to. Get one in the 680 * valid range. 681 */ 682 if (udp->udp_anon_priv_bind) { 683 port = udp_get_next_priv_port(udp); 684 } else { 685 port = udp_update_next_port(udp, 686 us->us_next_port_to_try, B_TRUE); 687 } 688 } else { 689 /* 690 * If the port is in the well-known privileged range, 691 * make sure the caller was privileged. 692 */ 693 int i; 694 boolean_t priv = B_FALSE; 695 696 if (port < us->us_smallest_nonpriv_port) { 697 priv = B_TRUE; 698 } else { 699 for (i = 0; i < us->us_num_epriv_ports; i++) { 700 if (port == us->us_epriv_ports[i]) { 701 priv = B_TRUE; 702 break; 703 } 704 } 705 } 706 707 if (priv) { 708 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 709 710 if (secpolicy_net_privaddr(cr, port) != 0) { 711 udp_err_ack(q, mp, TACCES, 0); 712 return; 713 } 714 } 715 } 716 717 if (port == 0) { 718 udp_err_ack(q, mp, TNOADDR, 0); 719 return; 720 } 721 722 /* 723 * The state must be TS_UNBND. TPI mandates that users must send 724 * TPI primitives only 1 at a time and wait for the response before 725 * sending the next primitive. 726 */ 727 rw_enter(&udp->udp_rwlock, RW_WRITER); 728 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 729 rw_exit(&udp->udp_rwlock); 730 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 731 "udp_bind: bad state, %u", udp->udp_state); 732 udp_err_ack(q, mp, TOUTSTATE, 0); 733 return; 734 } 735 udp->udp_pending_op = tbr->PRIM_type; 736 /* 737 * Copy the source address into our udp structure. This address 738 * may still be zero; if so, IP will fill in the correct address 739 * each time an outbound packet is passed to it. Since the udp is 740 * not yet in the bind hash list, we don't grab the uf_lock to 741 * change udp_ipversion 742 */ 743 if (udp->udp_family == AF_INET) { 744 ASSERT(sin != NULL); 745 ASSERT(udp->udp_ipversion == IPV4_VERSION); 746 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 747 udp->udp_ip_snd_options_len; 748 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 749 } else { 750 ASSERT(sin6 != NULL); 751 v6src = sin6->sin6_addr; 752 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 753 /* 754 * no need to hold the uf_lock to set the udp_ipversion 755 * since we are not yet in the fanout list 756 */ 757 udp->udp_ipversion = IPV4_VERSION; 758 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 759 UDPH_SIZE + udp->udp_ip_snd_options_len; 760 } else { 761 udp->udp_ipversion = IPV6_VERSION; 762 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 763 } 764 } 765 766 /* 767 * If udp_reuseaddr is not set, then we have to make sure that 768 * the IP address and port number the application requested 769 * (or we selected for the application) is not being used by 770 * another stream. If another stream is already using the 771 * requested IP address and port, the behavior depends on 772 * "bind_to_req_port_only". If set the bind fails; otherwise we 773 * search for any an unused port to bind to the the stream. 774 * 775 * As per the BSD semantics, as modified by the Deering multicast 776 * changes, if udp_reuseaddr is set, then we allow multiple binds 777 * to the same port independent of the local IP address. 778 * 779 * This is slightly different than in SunOS 4.X which did not 780 * support IP multicast. Note that the change implemented by the 781 * Deering multicast code effects all binds - not only binding 782 * to IP multicast addresses. 783 * 784 * Note that when binding to port zero we ignore SO_REUSEADDR in 785 * order to guarantee a unique port. 786 */ 787 788 count = 0; 789 if (udp->udp_anon_priv_bind) { 790 /* 791 * loopmax = (IPPORT_RESERVED-1) - 792 * us->us_min_anonpriv_port + 1 793 */ 794 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 795 } else { 796 loopmax = us->us_largest_anon_port - 797 us->us_smallest_anon_port + 1; 798 } 799 800 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 801 zoneid = connp->conn_zoneid; 802 803 for (;;) { 804 udp_t *udp1; 805 boolean_t found_exclbind = B_FALSE; 806 807 /* 808 * Walk through the list of udp streams bound to 809 * requested port with the same IP address. 810 */ 811 lport = htons(port); 812 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 813 us->us_bind_fanout_size)]; 814 mutex_enter(&udpf->uf_lock); 815 for (udp1 = udpf->uf_udp; udp1 != NULL; 816 udp1 = udp1->udp_bind_hash) { 817 if (lport != udp1->udp_port) 818 continue; 819 820 /* 821 * On a labeled system, we must treat bindings to ports 822 * on shared IP addresses by sockets with MAC exemption 823 * privilege as being in all zones, as there's 824 * otherwise no way to identify the right receiver. 825 */ 826 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 827 IPCL_ZONE_MATCH(connp, 828 udp1->udp_connp->conn_zoneid)) && 829 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 830 continue; 831 832 /* 833 * If UDP_EXCLBIND is set for either the bound or 834 * binding endpoint, the semantics of bind 835 * is changed according to the following chart. 836 * 837 * spec = specified address (v4 or v6) 838 * unspec = unspecified address (v4 or v6) 839 * A = specified addresses are different for endpoints 840 * 841 * bound bind to allowed? 842 * ------------------------------------- 843 * unspec unspec no 844 * unspec spec no 845 * spec unspec no 846 * spec spec yes if A 847 * 848 * For labeled systems, SO_MAC_EXEMPT behaves the same 849 * as UDP_EXCLBIND, except that zoneid is ignored. 850 */ 851 if (udp1->udp_exclbind || udp->udp_exclbind || 852 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 853 if (V6_OR_V4_INADDR_ANY( 854 udp1->udp_bound_v6src) || 855 is_inaddr_any || 856 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 857 &v6src)) { 858 found_exclbind = B_TRUE; 859 break; 860 } 861 continue; 862 } 863 864 /* 865 * Check ipversion to allow IPv4 and IPv6 sockets to 866 * have disjoint port number spaces. 867 */ 868 if (udp->udp_ipversion != udp1->udp_ipversion) { 869 870 /* 871 * On the first time through the loop, if the 872 * the user intentionally specified a 873 * particular port number, then ignore any 874 * bindings of the other protocol that may 875 * conflict. This allows the user to bind IPv6 876 * alone and get both v4 and v6, or bind both 877 * both and get each seperately. On subsequent 878 * times through the loop, we're checking a 879 * port that we chose (not the user) and thus 880 * we do not allow casual duplicate bindings. 881 */ 882 if (count == 0 && requested_port != 0) 883 continue; 884 } 885 886 /* 887 * No difference depending on SO_REUSEADDR. 888 * 889 * If existing port is bound to a 890 * non-wildcard IP address and 891 * the requesting stream is bound to 892 * a distinct different IP addresses 893 * (non-wildcard, also), keep going. 894 */ 895 if (!is_inaddr_any && 896 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 897 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 898 &v6src)) { 899 continue; 900 } 901 break; 902 } 903 904 if (!found_exclbind && 905 (udp->udp_reuseaddr && requested_port != 0)) { 906 break; 907 } 908 909 if (udp1 == NULL) { 910 /* 911 * No other stream has this IP address 912 * and port number. We can use it. 913 */ 914 break; 915 } 916 mutex_exit(&udpf->uf_lock); 917 if (bind_to_req_port_only) { 918 /* 919 * We get here only when requested port 920 * is bound (and only first of the for() 921 * loop iteration). 922 * 923 * The semantics of this bind request 924 * require it to fail so we return from 925 * the routine (and exit the loop). 926 * 927 */ 928 udp->udp_pending_op = -1; 929 rw_exit(&udp->udp_rwlock); 930 udp_err_ack(q, mp, TADDRBUSY, 0); 931 return; 932 } 933 934 if (udp->udp_anon_priv_bind) { 935 port = udp_get_next_priv_port(udp); 936 } else { 937 if ((count == 0) && (requested_port != 0)) { 938 /* 939 * If the application wants us to find 940 * a port, get one to start with. Set 941 * requested_port to 0, so that we will 942 * update us->us_next_port_to_try below. 943 */ 944 port = udp_update_next_port(udp, 945 us->us_next_port_to_try, B_TRUE); 946 requested_port = 0; 947 } else { 948 port = udp_update_next_port(udp, port + 1, 949 B_FALSE); 950 } 951 } 952 953 if (port == 0 || ++count >= loopmax) { 954 /* 955 * We've tried every possible port number and 956 * there are none available, so send an error 957 * to the user. 958 */ 959 udp->udp_pending_op = -1; 960 rw_exit(&udp->udp_rwlock); 961 udp_err_ack(q, mp, TNOADDR, 0); 962 return; 963 } 964 } 965 966 /* 967 * Copy the source address into our udp structure. This address 968 * may still be zero; if so, ip will fill in the correct address 969 * each time an outbound packet is passed to it. 970 * If we are binding to a broadcast or multicast address then 971 * udp_bind_ack will clear the source address when it receives 972 * the T_BIND_ACK. 973 */ 974 udp->udp_v6src = udp->udp_bound_v6src = v6src; 975 udp->udp_port = lport; 976 /* 977 * Now reset the the next anonymous port if the application requested 978 * an anonymous port, or we handed out the next anonymous port. 979 */ 980 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 981 us->us_next_port_to_try = port + 1; 982 } 983 984 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 985 if (udp->udp_family == AF_INET) { 986 sin->sin_port = udp->udp_port; 987 } else { 988 int error; 989 990 sin6->sin6_port = udp->udp_port; 991 /* Rebuild the header template */ 992 error = udp_build_hdrs(udp); 993 if (error != 0) { 994 udp->udp_pending_op = -1; 995 rw_exit(&udp->udp_rwlock); 996 mutex_exit(&udpf->uf_lock); 997 udp_err_ack(q, mp, TSYSERR, error); 998 return; 999 } 1000 } 1001 udp->udp_state = TS_IDLE; 1002 udp_bind_hash_insert(udpf, udp); 1003 mutex_exit(&udpf->uf_lock); 1004 rw_exit(&udp->udp_rwlock); 1005 1006 if (cl_inet_bind) { 1007 /* 1008 * Running in cluster mode - register bind information 1009 */ 1010 if (udp->udp_ipversion == IPV4_VERSION) { 1011 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1012 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1013 (in_port_t)udp->udp_port); 1014 } else { 1015 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1016 (uint8_t *)&(udp->udp_v6src), 1017 (in_port_t)udp->udp_port); 1018 } 1019 1020 } 1021 1022 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1023 if (is_system_labeled() && (!connp->conn_anon_port || 1024 connp->conn_anon_mlp)) { 1025 uint16_t mlpport; 1026 cred_t *cr = connp->conn_cred; 1027 zone_t *zone; 1028 1029 zone = crgetzone(cr); 1030 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1031 mlptSingle; 1032 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1033 &v6src, us->us_netstack->netstack_ip); 1034 if (addrtype == mlptSingle) { 1035 rw_enter(&udp->udp_rwlock, RW_WRITER); 1036 udp->udp_pending_op = -1; 1037 rw_exit(&udp->udp_rwlock); 1038 udp_err_ack(q, mp, TNOADDR, 0); 1039 connp->conn_anon_port = B_FALSE; 1040 connp->conn_mlp_type = mlptSingle; 1041 return; 1042 } 1043 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1044 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1045 addrtype); 1046 if (mlptype != mlptSingle && 1047 (connp->conn_mlp_type == mlptSingle || 1048 secpolicy_net_bindmlp(cr) != 0)) { 1049 if (udp->udp_debug) { 1050 (void) strlog(UDP_MOD_ID, 0, 1, 1051 SL_ERROR|SL_TRACE, 1052 "udp_bind: no priv for multilevel port %d", 1053 mlpport); 1054 } 1055 rw_enter(&udp->udp_rwlock, RW_WRITER); 1056 udp->udp_pending_op = -1; 1057 rw_exit(&udp->udp_rwlock); 1058 udp_err_ack(q, mp, TACCES, 0); 1059 connp->conn_anon_port = B_FALSE; 1060 connp->conn_mlp_type = mlptSingle; 1061 return; 1062 } 1063 1064 /* 1065 * If we're specifically binding a shared IP address and the 1066 * port is MLP on shared addresses, then check to see if this 1067 * zone actually owns the MLP. Reject if not. 1068 */ 1069 if (mlptype == mlptShared && addrtype == mlptShared) { 1070 /* 1071 * No need to handle exclusive-stack zones since 1072 * ALL_ZONES only applies to the shared stack. 1073 */ 1074 zoneid_t mlpzone; 1075 1076 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1077 htons(mlpport)); 1078 if (connp->conn_zoneid != mlpzone) { 1079 if (udp->udp_debug) { 1080 (void) strlog(UDP_MOD_ID, 0, 1, 1081 SL_ERROR|SL_TRACE, 1082 "udp_bind: attempt to bind port " 1083 "%d on shared addr in zone %d " 1084 "(should be %d)", 1085 mlpport, connp->conn_zoneid, 1086 mlpzone); 1087 } 1088 rw_enter(&udp->udp_rwlock, RW_WRITER); 1089 udp->udp_pending_op = -1; 1090 rw_exit(&udp->udp_rwlock); 1091 udp_err_ack(q, mp, TACCES, 0); 1092 connp->conn_anon_port = B_FALSE; 1093 connp->conn_mlp_type = mlptSingle; 1094 return; 1095 } 1096 } 1097 if (connp->conn_anon_port) { 1098 int error; 1099 1100 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1101 port, B_TRUE); 1102 if (error != 0) { 1103 if (udp->udp_debug) { 1104 (void) strlog(UDP_MOD_ID, 0, 1, 1105 SL_ERROR|SL_TRACE, 1106 "udp_bind: cannot establish anon " 1107 "MLP for port %d", port); 1108 } 1109 rw_enter(&udp->udp_rwlock, RW_WRITER); 1110 udp->udp_pending_op = -1; 1111 rw_exit(&udp->udp_rwlock); 1112 udp_err_ack(q, mp, TACCES, 0); 1113 connp->conn_anon_port = B_FALSE; 1114 connp->conn_mlp_type = mlptSingle; 1115 return; 1116 } 1117 } 1118 connp->conn_mlp_type = mlptype; 1119 } 1120 1121 /* Pass the protocol number in the message following the address. */ 1122 *mp->b_wptr++ = IPPROTO_UDP; 1123 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1124 /* 1125 * Append a request for an IRE if udp_v6src not 1126 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1127 */ 1128 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1129 if (!mp->b_cont) { 1130 rw_enter(&udp->udp_rwlock, RW_WRITER); 1131 udp->udp_pending_op = -1; 1132 rw_exit(&udp->udp_rwlock); 1133 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1134 return; 1135 } 1136 mp->b_cont->b_wptr += sizeof (ire_t); 1137 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1138 } 1139 if (udp->udp_family == AF_INET6) 1140 mp = ip_bind_v6(q, mp, connp, NULL); 1141 else 1142 mp = ip_bind_v4(q, mp, connp); 1143 1144 /* The above return NULL if the bind needs to be deferred */ 1145 if (mp != NULL) 1146 udp_bind_result(connp, mp); 1147 else 1148 CONN_INC_REF(connp); 1149 } 1150 1151 /* 1152 * This is called from ip_wput_nondata to handle the results of a 1153 * deferred UDP bind. It is called once the bind has been completed. 1154 */ 1155 void 1156 udp_resume_bind(conn_t *connp, mblk_t *mp) 1157 { 1158 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1159 1160 udp_bind_result(connp, mp); 1161 1162 CONN_OPER_PENDING_DONE(connp); 1163 } 1164 1165 /* 1166 * This routine handles each T_CONN_REQ message passed to udp. It 1167 * associates a default destination address with the stream. 1168 * 1169 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1170 * T_BIND_REQ - specifying local and remote address/port 1171 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1172 * T_OK_ACK - for the T_CONN_REQ 1173 * T_CONN_CON - to keep the TPI user happy 1174 * 1175 * The connect completes in udp_bind_result. 1176 * When a T_BIND_ACK is received information is extracted from the IRE 1177 * and the two appended messages are sent to the TPI user. 1178 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1179 * convert it to an error ack for the appropriate primitive. 1180 */ 1181 static void 1182 udp_connect(queue_t *q, mblk_t *mp) 1183 { 1184 sin6_t *sin6; 1185 sin_t *sin; 1186 struct T_conn_req *tcr; 1187 in6_addr_t v6dst; 1188 ipaddr_t v4dst; 1189 uint16_t dstport; 1190 uint32_t flowinfo; 1191 mblk_t *mp1, *mp2; 1192 udp_fanout_t *udpf; 1193 udp_t *udp, *udp1; 1194 ushort_t ipversion; 1195 udp_stack_t *us; 1196 conn_t *connp = Q_TO_CONN(q); 1197 1198 udp = connp->conn_udp; 1199 tcr = (struct T_conn_req *)mp->b_rptr; 1200 us = udp->udp_us; 1201 1202 /* A bit of sanity checking */ 1203 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1204 udp_err_ack(q, mp, TPROTO, 0); 1205 return; 1206 } 1207 1208 if (tcr->OPT_length != 0) { 1209 udp_err_ack(q, mp, TBADOPT, 0); 1210 return; 1211 } 1212 1213 /* 1214 * Determine packet type based on type of address passed in 1215 * the request should contain an IPv4 or IPv6 address. 1216 * Make sure that address family matches the type of 1217 * family of the the address passed down 1218 */ 1219 switch (tcr->DEST_length) { 1220 default: 1221 udp_err_ack(q, mp, TBADADDR, 0); 1222 return; 1223 1224 case sizeof (sin_t): 1225 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1226 sizeof (sin_t)); 1227 if (sin == NULL || !OK_32PTR((char *)sin)) { 1228 udp_err_ack(q, mp, TSYSERR, EINVAL); 1229 return; 1230 } 1231 if (udp->udp_family != AF_INET || 1232 sin->sin_family != AF_INET) { 1233 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1234 return; 1235 } 1236 v4dst = sin->sin_addr.s_addr; 1237 dstport = sin->sin_port; 1238 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1239 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1240 ipversion = IPV4_VERSION; 1241 break; 1242 1243 case sizeof (sin6_t): 1244 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1245 sizeof (sin6_t)); 1246 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1247 udp_err_ack(q, mp, TSYSERR, EINVAL); 1248 return; 1249 } 1250 if (udp->udp_family != AF_INET6 || 1251 sin6->sin6_family != AF_INET6) { 1252 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1253 return; 1254 } 1255 v6dst = sin6->sin6_addr; 1256 dstport = sin6->sin6_port; 1257 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1258 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1259 ipversion = IPV4_VERSION; 1260 flowinfo = 0; 1261 } else { 1262 ipversion = IPV6_VERSION; 1263 flowinfo = sin6->sin6_flowinfo; 1264 } 1265 break; 1266 } 1267 if (dstport == 0) { 1268 udp_err_ack(q, mp, TBADADDR, 0); 1269 return; 1270 } 1271 1272 rw_enter(&udp->udp_rwlock, RW_WRITER); 1273 1274 /* 1275 * This UDP must have bound to a port already before doing a connect. 1276 * TPI mandates that users must send TPI primitives only 1 at a time 1277 * and wait for the response before sending the next primitive. 1278 */ 1279 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 1280 rw_exit(&udp->udp_rwlock); 1281 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1282 "udp_connect: bad state, %u", udp->udp_state); 1283 udp_err_ack(q, mp, TOUTSTATE, 0); 1284 return; 1285 } 1286 udp->udp_pending_op = T_CONN_REQ; 1287 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1288 1289 if (ipversion == IPV4_VERSION) { 1290 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1291 udp->udp_ip_snd_options_len; 1292 } else { 1293 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1294 } 1295 1296 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1297 us->us_bind_fanout_size)]; 1298 1299 mutex_enter(&udpf->uf_lock); 1300 if (udp->udp_state == TS_DATA_XFER) { 1301 /* Already connected - clear out state */ 1302 udp->udp_v6src = udp->udp_bound_v6src; 1303 udp->udp_state = TS_IDLE; 1304 } 1305 1306 /* 1307 * Create a default IP header with no IP options. 1308 */ 1309 udp->udp_dstport = dstport; 1310 udp->udp_ipversion = ipversion; 1311 if (ipversion == IPV4_VERSION) { 1312 /* 1313 * Interpret a zero destination to mean loopback. 1314 * Update the T_CONN_REQ (sin/sin6) since it is used to 1315 * generate the T_CONN_CON. 1316 */ 1317 if (v4dst == INADDR_ANY) { 1318 v4dst = htonl(INADDR_LOOPBACK); 1319 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1320 if (udp->udp_family == AF_INET) { 1321 sin->sin_addr.s_addr = v4dst; 1322 } else { 1323 sin6->sin6_addr = v6dst; 1324 } 1325 } 1326 udp->udp_v6dst = v6dst; 1327 udp->udp_flowinfo = 0; 1328 1329 /* 1330 * If the destination address is multicast and 1331 * an outgoing multicast interface has been set, 1332 * use the address of that interface as our 1333 * source address if no source address has been set. 1334 */ 1335 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1336 CLASSD(v4dst) && 1337 udp->udp_multicast_if_addr != INADDR_ANY) { 1338 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1339 &udp->udp_v6src); 1340 } 1341 } else { 1342 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1343 /* 1344 * Interpret a zero destination to mean loopback. 1345 * Update the T_CONN_REQ (sin/sin6) since it is used to 1346 * generate the T_CONN_CON. 1347 */ 1348 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1349 v6dst = ipv6_loopback; 1350 sin6->sin6_addr = v6dst; 1351 } 1352 udp->udp_v6dst = v6dst; 1353 udp->udp_flowinfo = flowinfo; 1354 /* 1355 * If the destination address is multicast and 1356 * an outgoing multicast interface has been set, 1357 * then the ip bind logic will pick the correct source 1358 * address (i.e. matching the outgoing multicast interface). 1359 */ 1360 } 1361 1362 /* 1363 * Verify that the src/port/dst/port is unique for all 1364 * connections in TS_DATA_XFER 1365 */ 1366 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1367 if (udp1->udp_state != TS_DATA_XFER) 1368 continue; 1369 if (udp->udp_port != udp1->udp_port || 1370 udp->udp_ipversion != udp1->udp_ipversion || 1371 dstport != udp1->udp_dstport || 1372 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1373 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1374 !(IPCL_ZONE_MATCH(udp->udp_connp, 1375 udp1->udp_connp->conn_zoneid) || 1376 IPCL_ZONE_MATCH(udp1->udp_connp, 1377 udp->udp_connp->conn_zoneid))) 1378 continue; 1379 mutex_exit(&udpf->uf_lock); 1380 udp->udp_pending_op = -1; 1381 rw_exit(&udp->udp_rwlock); 1382 udp_err_ack(q, mp, TBADADDR, 0); 1383 return; 1384 } 1385 udp->udp_state = TS_DATA_XFER; 1386 mutex_exit(&udpf->uf_lock); 1387 1388 /* 1389 * Send down bind to IP to verify that there is a route 1390 * and to determine the source address. 1391 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1392 */ 1393 if (udp->udp_family == AF_INET) 1394 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1395 else 1396 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1397 if (mp1 == NULL) { 1398 bind_failed: 1399 mutex_enter(&udpf->uf_lock); 1400 udp->udp_state = TS_IDLE; 1401 udp->udp_pending_op = -1; 1402 mutex_exit(&udpf->uf_lock); 1403 rw_exit(&udp->udp_rwlock); 1404 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1405 return; 1406 } 1407 1408 rw_exit(&udp->udp_rwlock); 1409 /* 1410 * We also have to send a connection confirmation to 1411 * keep TLI happy. Prepare it for udp_bind_result. 1412 */ 1413 if (udp->udp_family == AF_INET) 1414 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1415 sizeof (*sin), NULL, 0); 1416 else 1417 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1418 sizeof (*sin6), NULL, 0); 1419 if (mp2 == NULL) { 1420 freemsg(mp1); 1421 rw_enter(&udp->udp_rwlock, RW_WRITER); 1422 goto bind_failed; 1423 } 1424 1425 mp = mi_tpi_ok_ack_alloc(mp); 1426 if (mp == NULL) { 1427 /* Unable to reuse the T_CONN_REQ for the ack. */ 1428 freemsg(mp2); 1429 rw_enter(&udp->udp_rwlock, RW_WRITER); 1430 mutex_enter(&udpf->uf_lock); 1431 udp->udp_state = TS_IDLE; 1432 udp->udp_pending_op = -1; 1433 mutex_exit(&udpf->uf_lock); 1434 rw_exit(&udp->udp_rwlock); 1435 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1436 return; 1437 } 1438 1439 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1440 linkb(mp1, mp); 1441 linkb(mp1, mp2); 1442 1443 mblk_setcred(mp1, connp->conn_cred); 1444 if (udp->udp_family == AF_INET) 1445 mp1 = ip_bind_v4(q, mp1, connp); 1446 else 1447 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1448 1449 /* The above return NULL if the bind needs to be deferred */ 1450 if (mp1 != NULL) 1451 udp_bind_result(connp, mp1); 1452 else 1453 CONN_INC_REF(connp); 1454 } 1455 1456 static int 1457 udp_close(queue_t *q) 1458 { 1459 conn_t *connp = (conn_t *)q->q_ptr; 1460 udp_t *udp; 1461 1462 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1463 udp = connp->conn_udp; 1464 1465 udp_quiesce_conn(connp); 1466 ip_quiesce_conn(connp); 1467 /* 1468 * Disable read-side synchronous stream 1469 * interface and drain any queued data. 1470 */ 1471 udp_rcv_drain(q, udp, B_TRUE); 1472 ASSERT(!udp->udp_direct_sockfs); 1473 1474 qprocsoff(q); 1475 1476 ASSERT(udp->udp_rcv_cnt == 0); 1477 ASSERT(udp->udp_rcv_msgcnt == 0); 1478 ASSERT(udp->udp_rcv_list_head == NULL); 1479 ASSERT(udp->udp_rcv_list_tail == NULL); 1480 1481 udp_close_free(connp); 1482 1483 /* 1484 * Now we are truly single threaded on this stream, and can 1485 * delete the things hanging off the connp, and finally the connp. 1486 * We removed this connp from the fanout list, it cannot be 1487 * accessed thru the fanouts, and we already waited for the 1488 * conn_ref to drop to 0. We are already in close, so 1489 * there cannot be any other thread from the top. qprocsoff 1490 * has completed, and service has completed or won't run in 1491 * future. 1492 */ 1493 ASSERT(connp->conn_ref == 1); 1494 1495 inet_minor_free(ip_minor_arena, connp->conn_dev); 1496 1497 connp->conn_ref--; 1498 ipcl_conn_destroy(connp); 1499 1500 q->q_ptr = WR(q)->q_ptr = NULL; 1501 return (0); 1502 } 1503 1504 /* 1505 * Called in the close path to quiesce the conn 1506 */ 1507 void 1508 udp_quiesce_conn(conn_t *connp) 1509 { 1510 udp_t *udp = connp->conn_udp; 1511 1512 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1513 /* 1514 * Running in cluster mode - register unbind information 1515 */ 1516 if (udp->udp_ipversion == IPV4_VERSION) { 1517 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1518 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1519 (in_port_t)udp->udp_port); 1520 } else { 1521 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1522 (uint8_t *)(&(udp->udp_v6src)), 1523 (in_port_t)udp->udp_port); 1524 } 1525 } 1526 1527 udp_bind_hash_remove(udp, B_FALSE); 1528 1529 } 1530 1531 void 1532 udp_close_free(conn_t *connp) 1533 { 1534 udp_t *udp = connp->conn_udp; 1535 1536 /* If there are any options associated with the stream, free them. */ 1537 if (udp->udp_ip_snd_options) { 1538 mi_free((char *)udp->udp_ip_snd_options); 1539 udp->udp_ip_snd_options = NULL; 1540 } 1541 1542 if (udp->udp_ip_rcv_options) { 1543 mi_free((char *)udp->udp_ip_rcv_options); 1544 udp->udp_ip_rcv_options = NULL; 1545 } 1546 1547 /* Free memory associated with sticky options */ 1548 if (udp->udp_sticky_hdrs_len != 0) { 1549 kmem_free(udp->udp_sticky_hdrs, 1550 udp->udp_sticky_hdrs_len); 1551 udp->udp_sticky_hdrs = NULL; 1552 udp->udp_sticky_hdrs_len = 0; 1553 } 1554 1555 ip6_pkt_free(&udp->udp_sticky_ipp); 1556 } 1557 1558 /* 1559 * This routine handles each T_DISCON_REQ message passed to udp 1560 * as an indicating that UDP is no longer connected. This results 1561 * in sending a T_BIND_REQ to IP to restore the binding to just 1562 * the local address/port. 1563 * 1564 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1565 * T_BIND_REQ - specifying just the local address/port 1566 * T_OK_ACK - for the T_DISCON_REQ 1567 * 1568 * The disconnect completes in udp_bind_result. 1569 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1570 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1571 * convert it to an error ack for the appropriate primitive. 1572 */ 1573 static void 1574 udp_disconnect(queue_t *q, mblk_t *mp) 1575 { 1576 udp_t *udp; 1577 mblk_t *mp1; 1578 udp_fanout_t *udpf; 1579 udp_stack_t *us; 1580 conn_t *connp = Q_TO_CONN(q); 1581 1582 udp = connp->conn_udp; 1583 us = udp->udp_us; 1584 rw_enter(&udp->udp_rwlock, RW_WRITER); 1585 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 1586 rw_exit(&udp->udp_rwlock); 1587 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1588 "udp_disconnect: bad state, %u", udp->udp_state); 1589 udp_err_ack(q, mp, TOUTSTATE, 0); 1590 return; 1591 } 1592 udp->udp_pending_op = T_DISCON_REQ; 1593 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1594 us->us_bind_fanout_size)]; 1595 mutex_enter(&udpf->uf_lock); 1596 udp->udp_v6src = udp->udp_bound_v6src; 1597 udp->udp_state = TS_IDLE; 1598 mutex_exit(&udpf->uf_lock); 1599 1600 /* 1601 * Send down bind to IP to remove the full binding and revert 1602 * to the local address binding. 1603 */ 1604 if (udp->udp_family == AF_INET) 1605 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1606 else 1607 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1608 if (mp1 == NULL) { 1609 udp->udp_pending_op = -1; 1610 rw_exit(&udp->udp_rwlock); 1611 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1612 return; 1613 } 1614 mp = mi_tpi_ok_ack_alloc(mp); 1615 if (mp == NULL) { 1616 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1617 udp->udp_pending_op = -1; 1618 rw_exit(&udp->udp_rwlock); 1619 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1620 return; 1621 } 1622 1623 if (udp->udp_family == AF_INET6) { 1624 int error; 1625 1626 /* Rebuild the header template */ 1627 error = udp_build_hdrs(udp); 1628 if (error != 0) { 1629 udp->udp_pending_op = -1; 1630 rw_exit(&udp->udp_rwlock); 1631 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1632 freemsg(mp1); 1633 return; 1634 } 1635 } 1636 1637 rw_exit(&udp->udp_rwlock); 1638 /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ 1639 linkb(mp1, mp); 1640 1641 if (udp->udp_family == AF_INET6) 1642 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1643 else 1644 mp1 = ip_bind_v4(q, mp1, connp); 1645 1646 /* The above return NULL if the bind needs to be deferred */ 1647 if (mp1 != NULL) 1648 udp_bind_result(connp, mp1); 1649 else 1650 CONN_INC_REF(connp); 1651 } 1652 1653 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1654 static void 1655 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1656 { 1657 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1658 qreply(q, mp); 1659 } 1660 1661 /* Shorthand to generate and send TPI error acks to our client */ 1662 static void 1663 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1664 int sys_error) 1665 { 1666 struct T_error_ack *teackp; 1667 1668 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1669 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1670 teackp = (struct T_error_ack *)mp->b_rptr; 1671 teackp->ERROR_prim = primitive; 1672 teackp->TLI_error = t_error; 1673 teackp->UNIX_error = sys_error; 1674 qreply(q, mp); 1675 } 1676 } 1677 1678 /*ARGSUSED*/ 1679 static int 1680 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1681 { 1682 int i; 1683 udp_t *udp = Q_TO_UDP(q); 1684 udp_stack_t *us = udp->udp_us; 1685 1686 for (i = 0; i < us->us_num_epriv_ports; i++) { 1687 if (us->us_epriv_ports[i] != 0) 1688 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1689 } 1690 return (0); 1691 } 1692 1693 /* ARGSUSED */ 1694 static int 1695 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1696 cred_t *cr) 1697 { 1698 long new_value; 1699 int i; 1700 udp_t *udp = Q_TO_UDP(q); 1701 udp_stack_t *us = udp->udp_us; 1702 1703 /* 1704 * Fail the request if the new value does not lie within the 1705 * port number limits. 1706 */ 1707 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1708 new_value <= 0 || new_value >= 65536) { 1709 return (EINVAL); 1710 } 1711 1712 /* Check if the value is already in the list */ 1713 for (i = 0; i < us->us_num_epriv_ports; i++) { 1714 if (new_value == us->us_epriv_ports[i]) { 1715 return (EEXIST); 1716 } 1717 } 1718 /* Find an empty slot */ 1719 for (i = 0; i < us->us_num_epriv_ports; i++) { 1720 if (us->us_epriv_ports[i] == 0) 1721 break; 1722 } 1723 if (i == us->us_num_epriv_ports) { 1724 return (EOVERFLOW); 1725 } 1726 1727 /* Set the new value */ 1728 us->us_epriv_ports[i] = (in_port_t)new_value; 1729 return (0); 1730 } 1731 1732 /* ARGSUSED */ 1733 static int 1734 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1735 cred_t *cr) 1736 { 1737 long new_value; 1738 int i; 1739 udp_t *udp = Q_TO_UDP(q); 1740 udp_stack_t *us = udp->udp_us; 1741 1742 /* 1743 * Fail the request if the new value does not lie within the 1744 * port number limits. 1745 */ 1746 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1747 new_value <= 0 || new_value >= 65536) { 1748 return (EINVAL); 1749 } 1750 1751 /* Check that the value is already in the list */ 1752 for (i = 0; i < us->us_num_epriv_ports; i++) { 1753 if (us->us_epriv_ports[i] == new_value) 1754 break; 1755 } 1756 if (i == us->us_num_epriv_ports) { 1757 return (ESRCH); 1758 } 1759 1760 /* Clear the value */ 1761 us->us_epriv_ports[i] = 0; 1762 return (0); 1763 } 1764 1765 /* At minimum we need 4 bytes of UDP header */ 1766 #define ICMP_MIN_UDP_HDR 4 1767 1768 /* 1769 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1770 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1771 * Assumes that IP has pulled up everything up to and including the ICMP header. 1772 */ 1773 static void 1774 udp_icmp_error(queue_t *q, mblk_t *mp) 1775 { 1776 icmph_t *icmph; 1777 ipha_t *ipha; 1778 int iph_hdr_length; 1779 udpha_t *udpha; 1780 sin_t sin; 1781 sin6_t sin6; 1782 mblk_t *mp1; 1783 int error = 0; 1784 udp_t *udp = Q_TO_UDP(q); 1785 1786 ipha = (ipha_t *)mp->b_rptr; 1787 1788 ASSERT(OK_32PTR(mp->b_rptr)); 1789 1790 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1791 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1792 udp_icmp_error_ipv6(q, mp); 1793 return; 1794 } 1795 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1796 1797 /* Skip past the outer IP and ICMP headers */ 1798 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1799 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1800 ipha = (ipha_t *)&icmph[1]; 1801 1802 /* Skip past the inner IP and find the ULP header */ 1803 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1804 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1805 1806 switch (icmph->icmph_type) { 1807 case ICMP_DEST_UNREACHABLE: 1808 switch (icmph->icmph_code) { 1809 case ICMP_FRAGMENTATION_NEEDED: 1810 /* 1811 * IP has already adjusted the path MTU. 1812 */ 1813 break; 1814 case ICMP_PORT_UNREACHABLE: 1815 case ICMP_PROTOCOL_UNREACHABLE: 1816 error = ECONNREFUSED; 1817 break; 1818 default: 1819 /* Transient errors */ 1820 break; 1821 } 1822 break; 1823 default: 1824 /* Transient errors */ 1825 break; 1826 } 1827 if (error == 0) { 1828 freemsg(mp); 1829 return; 1830 } 1831 1832 /* 1833 * Deliver T_UDERROR_IND when the application has asked for it. 1834 * The socket layer enables this automatically when connected. 1835 */ 1836 if (!udp->udp_dgram_errind) { 1837 freemsg(mp); 1838 return; 1839 } 1840 1841 switch (udp->udp_family) { 1842 case AF_INET: 1843 sin = sin_null; 1844 sin.sin_family = AF_INET; 1845 sin.sin_addr.s_addr = ipha->ipha_dst; 1846 sin.sin_port = udpha->uha_dst_port; 1847 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1848 error); 1849 break; 1850 case AF_INET6: 1851 sin6 = sin6_null; 1852 sin6.sin6_family = AF_INET6; 1853 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1854 sin6.sin6_port = udpha->uha_dst_port; 1855 1856 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1857 NULL, 0, error); 1858 break; 1859 } 1860 if (mp1) 1861 putnext(q, mp1); 1862 freemsg(mp); 1863 } 1864 1865 /* 1866 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1867 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1868 * Assumes that IP has pulled up all the extension headers as well as the 1869 * ICMPv6 header. 1870 */ 1871 static void 1872 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1873 { 1874 icmp6_t *icmp6; 1875 ip6_t *ip6h, *outer_ip6h; 1876 uint16_t iph_hdr_length; 1877 uint8_t *nexthdrp; 1878 udpha_t *udpha; 1879 sin6_t sin6; 1880 mblk_t *mp1; 1881 int error = 0; 1882 udp_t *udp = Q_TO_UDP(q); 1883 udp_stack_t *us = udp->udp_us; 1884 1885 outer_ip6h = (ip6_t *)mp->b_rptr; 1886 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1887 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1888 else 1889 iph_hdr_length = IPV6_HDR_LEN; 1890 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1891 ip6h = (ip6_t *)&icmp6[1]; 1892 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1893 freemsg(mp); 1894 return; 1895 } 1896 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1897 1898 switch (icmp6->icmp6_type) { 1899 case ICMP6_DST_UNREACH: 1900 switch (icmp6->icmp6_code) { 1901 case ICMP6_DST_UNREACH_NOPORT: 1902 error = ECONNREFUSED; 1903 break; 1904 case ICMP6_DST_UNREACH_ADMIN: 1905 case ICMP6_DST_UNREACH_NOROUTE: 1906 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1907 case ICMP6_DST_UNREACH_ADDR: 1908 /* Transient errors */ 1909 break; 1910 default: 1911 break; 1912 } 1913 break; 1914 case ICMP6_PACKET_TOO_BIG: { 1915 struct T_unitdata_ind *tudi; 1916 struct T_opthdr *toh; 1917 size_t udi_size; 1918 mblk_t *newmp; 1919 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1920 sizeof (struct ip6_mtuinfo); 1921 sin6_t *sin6; 1922 struct ip6_mtuinfo *mtuinfo; 1923 1924 /* 1925 * If the application has requested to receive path mtu 1926 * information, send up an empty message containing an 1927 * IPV6_PATHMTU ancillary data item. 1928 */ 1929 if (!udp->udp_ipv6_recvpathmtu) 1930 break; 1931 1932 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1933 opt_length; 1934 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1935 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1936 break; 1937 } 1938 1939 /* 1940 * newmp->b_cont is left to NULL on purpose. This is an 1941 * empty message containing only ancillary data. 1942 */ 1943 newmp->b_datap->db_type = M_PROTO; 1944 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1945 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1946 tudi->PRIM_type = T_UNITDATA_IND; 1947 tudi->SRC_length = sizeof (sin6_t); 1948 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1949 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1950 tudi->OPT_length = opt_length; 1951 1952 sin6 = (sin6_t *)&tudi[1]; 1953 bzero(sin6, sizeof (sin6_t)); 1954 sin6->sin6_family = AF_INET6; 1955 sin6->sin6_addr = udp->udp_v6dst; 1956 1957 toh = (struct T_opthdr *)&sin6[1]; 1958 toh->level = IPPROTO_IPV6; 1959 toh->name = IPV6_PATHMTU; 1960 toh->len = opt_length; 1961 toh->status = 0; 1962 1963 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1964 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1965 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1966 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1967 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1968 /* 1969 * We've consumed everything we need from the original 1970 * message. Free it, then send our empty message. 1971 */ 1972 freemsg(mp); 1973 putnext(q, newmp); 1974 return; 1975 } 1976 case ICMP6_TIME_EXCEEDED: 1977 /* Transient errors */ 1978 break; 1979 case ICMP6_PARAM_PROB: 1980 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1981 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1982 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1983 (uchar_t *)nexthdrp) { 1984 error = ECONNREFUSED; 1985 break; 1986 } 1987 break; 1988 } 1989 if (error == 0) { 1990 freemsg(mp); 1991 return; 1992 } 1993 1994 /* 1995 * Deliver T_UDERROR_IND when the application has asked for it. 1996 * The socket layer enables this automatically when connected. 1997 */ 1998 if (!udp->udp_dgram_errind) { 1999 freemsg(mp); 2000 return; 2001 } 2002 2003 sin6 = sin6_null; 2004 sin6.sin6_family = AF_INET6; 2005 sin6.sin6_addr = ip6h->ip6_dst; 2006 sin6.sin6_port = udpha->uha_dst_port; 2007 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2008 2009 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2010 error); 2011 if (mp1) 2012 putnext(q, mp1); 2013 freemsg(mp); 2014 } 2015 2016 /* 2017 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2018 * The local address is filled in if endpoint is bound. The remote address 2019 * is filled in if remote address has been precified ("connected endpoint") 2020 * (The concept of connected CLTS sockets is alien to published TPI 2021 * but we support it anyway). 2022 */ 2023 static void 2024 udp_addr_req(queue_t *q, mblk_t *mp) 2025 { 2026 sin_t *sin; 2027 sin6_t *sin6; 2028 mblk_t *ackmp; 2029 struct T_addr_ack *taa; 2030 udp_t *udp = Q_TO_UDP(q); 2031 2032 /* Make it large enough for worst case */ 2033 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2034 2 * sizeof (sin6_t), 1); 2035 if (ackmp == NULL) { 2036 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2037 return; 2038 } 2039 taa = (struct T_addr_ack *)ackmp->b_rptr; 2040 2041 bzero(taa, sizeof (struct T_addr_ack)); 2042 ackmp->b_wptr = (uchar_t *)&taa[1]; 2043 2044 taa->PRIM_type = T_ADDR_ACK; 2045 ackmp->b_datap->db_type = M_PCPROTO; 2046 rw_enter(&udp->udp_rwlock, RW_READER); 2047 /* 2048 * Note: Following code assumes 32 bit alignment of basic 2049 * data structures like sin_t and struct T_addr_ack. 2050 */ 2051 if (udp->udp_state != TS_UNBND) { 2052 /* 2053 * Fill in local address first 2054 */ 2055 taa->LOCADDR_offset = sizeof (*taa); 2056 if (udp->udp_family == AF_INET) { 2057 taa->LOCADDR_length = sizeof (sin_t); 2058 sin = (sin_t *)&taa[1]; 2059 /* Fill zeroes and then initialize non-zero fields */ 2060 *sin = sin_null; 2061 sin->sin_family = AF_INET; 2062 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2063 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2064 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2065 sin->sin_addr.s_addr); 2066 } else { 2067 /* 2068 * INADDR_ANY 2069 * udp_v6src is not set, we might be bound to 2070 * broadcast/multicast. Use udp_bound_v6src as 2071 * local address instead (that could 2072 * also still be INADDR_ANY) 2073 */ 2074 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2075 sin->sin_addr.s_addr); 2076 } 2077 sin->sin_port = udp->udp_port; 2078 ackmp->b_wptr = (uchar_t *)&sin[1]; 2079 if (udp->udp_state == TS_DATA_XFER) { 2080 /* 2081 * connected, fill remote address too 2082 */ 2083 taa->REMADDR_length = sizeof (sin_t); 2084 /* assumed 32-bit alignment */ 2085 taa->REMADDR_offset = taa->LOCADDR_offset + 2086 taa->LOCADDR_length; 2087 2088 sin = (sin_t *)(ackmp->b_rptr + 2089 taa->REMADDR_offset); 2090 /* initialize */ 2091 *sin = sin_null; 2092 sin->sin_family = AF_INET; 2093 sin->sin_addr.s_addr = 2094 V4_PART_OF_V6(udp->udp_v6dst); 2095 sin->sin_port = udp->udp_dstport; 2096 ackmp->b_wptr = (uchar_t *)&sin[1]; 2097 } 2098 } else { 2099 taa->LOCADDR_length = sizeof (sin6_t); 2100 sin6 = (sin6_t *)&taa[1]; 2101 /* Fill zeroes and then initialize non-zero fields */ 2102 *sin6 = sin6_null; 2103 sin6->sin6_family = AF_INET6; 2104 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2105 sin6->sin6_addr = udp->udp_v6src; 2106 } else { 2107 /* 2108 * UNSPECIFIED 2109 * udp_v6src is not set, we might be bound to 2110 * broadcast/multicast. Use udp_bound_v6src as 2111 * local address instead (that could 2112 * also still be UNSPECIFIED) 2113 */ 2114 sin6->sin6_addr = 2115 udp->udp_bound_v6src; 2116 } 2117 sin6->sin6_port = udp->udp_port; 2118 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2119 if (udp->udp_state == TS_DATA_XFER) { 2120 /* 2121 * connected, fill remote address too 2122 */ 2123 taa->REMADDR_length = sizeof (sin6_t); 2124 /* assumed 32-bit alignment */ 2125 taa->REMADDR_offset = taa->LOCADDR_offset + 2126 taa->LOCADDR_length; 2127 2128 sin6 = (sin6_t *)(ackmp->b_rptr + 2129 taa->REMADDR_offset); 2130 /* initialize */ 2131 *sin6 = sin6_null; 2132 sin6->sin6_family = AF_INET6; 2133 sin6->sin6_addr = udp->udp_v6dst; 2134 sin6->sin6_port = udp->udp_dstport; 2135 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2136 } 2137 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2138 } 2139 } 2140 rw_exit(&udp->udp_rwlock); 2141 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2142 qreply(q, ackmp); 2143 } 2144 2145 static void 2146 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2147 { 2148 if (udp->udp_family == AF_INET) { 2149 *tap = udp_g_t_info_ack_ipv4; 2150 } else { 2151 *tap = udp_g_t_info_ack_ipv6; 2152 } 2153 tap->CURRENT_state = udp->udp_state; 2154 tap->OPT_size = udp_max_optsize; 2155 } 2156 2157 /* 2158 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2159 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2160 * udp_g_t_info_ack. The current state of the stream is copied from 2161 * udp_state. 2162 */ 2163 static void 2164 udp_capability_req(queue_t *q, mblk_t *mp) 2165 { 2166 t_uscalar_t cap_bits1; 2167 struct T_capability_ack *tcap; 2168 udp_t *udp = Q_TO_UDP(q); 2169 2170 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2171 2172 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2173 mp->b_datap->db_type, T_CAPABILITY_ACK); 2174 if (!mp) 2175 return; 2176 2177 tcap = (struct T_capability_ack *)mp->b_rptr; 2178 tcap->CAP_bits1 = 0; 2179 2180 if (cap_bits1 & TC1_INFO) { 2181 udp_copy_info(&tcap->INFO_ack, udp); 2182 tcap->CAP_bits1 |= TC1_INFO; 2183 } 2184 2185 qreply(q, mp); 2186 } 2187 2188 /* 2189 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2190 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2191 * The current state of the stream is copied from udp_state. 2192 */ 2193 static void 2194 udp_info_req(queue_t *q, mblk_t *mp) 2195 { 2196 udp_t *udp = Q_TO_UDP(q); 2197 2198 /* Create a T_INFO_ACK message. */ 2199 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2200 T_INFO_ACK); 2201 if (!mp) 2202 return; 2203 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2204 qreply(q, mp); 2205 } 2206 2207 /* 2208 * IP recognizes seven kinds of bind requests: 2209 * 2210 * - A zero-length address binds only to the protocol number. 2211 * 2212 * - A 4-byte address is treated as a request to 2213 * validate that the address is a valid local IPv4 2214 * address, appropriate for an application to bind to. 2215 * IP does the verification, but does not make any note 2216 * of the address at this time. 2217 * 2218 * - A 16-byte address contains is treated as a request 2219 * to validate a local IPv6 address, as the 4-byte 2220 * address case above. 2221 * 2222 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2223 * use it for the inbound fanout of packets. 2224 * 2225 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2226 * use it for the inbound fanout of packets. 2227 * 2228 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2229 * information consisting of local and remote addresses 2230 * and ports. In this case, the addresses are both 2231 * validated as appropriate for this operation, and, if 2232 * so, the information is retained for use in the 2233 * inbound fanout. 2234 * 2235 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2236 * fanout information, like the 12-byte case above. 2237 * 2238 * IP will also fill in the IRE request mblk with information 2239 * regarding our peer. In all cases, we notify IP of our protocol 2240 * type by appending a single protocol byte to the bind request. 2241 */ 2242 static mblk_t * 2243 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2244 { 2245 char *cp; 2246 mblk_t *mp; 2247 struct T_bind_req *tbr; 2248 ipa_conn_t *ac; 2249 ipa6_conn_t *ac6; 2250 sin_t *sin; 2251 sin6_t *sin6; 2252 2253 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2254 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 2255 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2256 if (!mp) 2257 return (mp); 2258 mp->b_datap->db_type = M_PROTO; 2259 tbr = (struct T_bind_req *)mp->b_rptr; 2260 tbr->PRIM_type = bind_prim; 2261 tbr->ADDR_offset = sizeof (*tbr); 2262 tbr->CONIND_number = 0; 2263 tbr->ADDR_length = addr_length; 2264 cp = (char *)&tbr[1]; 2265 switch (addr_length) { 2266 case sizeof (ipa_conn_t): 2267 ASSERT(udp->udp_family == AF_INET); 2268 /* Append a request for an IRE */ 2269 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2270 if (!mp->b_cont) { 2271 freemsg(mp); 2272 return (NULL); 2273 } 2274 mp->b_cont->b_wptr += sizeof (ire_t); 2275 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2276 2277 /* cp known to be 32 bit aligned */ 2278 ac = (ipa_conn_t *)cp; 2279 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2280 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2281 ac->ac_fport = udp->udp_dstport; 2282 ac->ac_lport = udp->udp_port; 2283 break; 2284 2285 case sizeof (ipa6_conn_t): 2286 ASSERT(udp->udp_family == AF_INET6); 2287 /* Append a request for an IRE */ 2288 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2289 if (!mp->b_cont) { 2290 freemsg(mp); 2291 return (NULL); 2292 } 2293 mp->b_cont->b_wptr += sizeof (ire_t); 2294 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2295 2296 /* cp known to be 32 bit aligned */ 2297 ac6 = (ipa6_conn_t *)cp; 2298 ac6->ac6_laddr = udp->udp_v6src; 2299 ac6->ac6_faddr = udp->udp_v6dst; 2300 ac6->ac6_fport = udp->udp_dstport; 2301 ac6->ac6_lport = udp->udp_port; 2302 break; 2303 2304 case sizeof (sin_t): 2305 ASSERT(udp->udp_family == AF_INET); 2306 /* Append a request for an IRE */ 2307 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2308 if (!mp->b_cont) { 2309 freemsg(mp); 2310 return (NULL); 2311 } 2312 mp->b_cont->b_wptr += sizeof (ire_t); 2313 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2314 2315 sin = (sin_t *)cp; 2316 *sin = sin_null; 2317 sin->sin_family = AF_INET; 2318 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2319 sin->sin_port = udp->udp_port; 2320 break; 2321 2322 case sizeof (sin6_t): 2323 ASSERT(udp->udp_family == AF_INET6); 2324 /* Append a request for an IRE */ 2325 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2326 if (!mp->b_cont) { 2327 freemsg(mp); 2328 return (NULL); 2329 } 2330 mp->b_cont->b_wptr += sizeof (ire_t); 2331 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2332 2333 sin6 = (sin6_t *)cp; 2334 *sin6 = sin6_null; 2335 sin6->sin6_family = AF_INET6; 2336 sin6->sin6_addr = udp->udp_bound_v6src; 2337 sin6->sin6_port = udp->udp_port; 2338 break; 2339 } 2340 /* Add protocol number to end */ 2341 cp[addr_length] = (char)IPPROTO_UDP; 2342 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2343 return (mp); 2344 } 2345 2346 /* For /dev/udp aka AF_INET open */ 2347 static int 2348 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2349 { 2350 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 2351 } 2352 2353 /* For /dev/udp6 aka AF_INET6 open */ 2354 static int 2355 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2356 { 2357 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 2358 } 2359 2360 /* 2361 * This is the open routine for udp. It allocates a udp_t structure for 2362 * the stream and, on the first open of the module, creates an ND table. 2363 */ 2364 /*ARGSUSED2*/ 2365 static int 2366 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 2367 boolean_t isv6) 2368 { 2369 int err; 2370 udp_t *udp; 2371 conn_t *connp; 2372 dev_t conn_dev; 2373 zoneid_t zoneid; 2374 netstack_t *ns; 2375 udp_stack_t *us; 2376 2377 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2378 2379 /* If the stream is already open, return immediately. */ 2380 if (q->q_ptr != NULL) 2381 return (0); 2382 2383 if (sflag == MODOPEN) 2384 return (EINVAL); 2385 2386 ns = netstack_find_by_cred(credp); 2387 ASSERT(ns != NULL); 2388 us = ns->netstack_udp; 2389 ASSERT(us != NULL); 2390 2391 /* 2392 * For exclusive stacks we set the zoneid to zero 2393 * to make UDP operate as if in the global zone. 2394 */ 2395 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 2396 zoneid = GLOBAL_ZONEID; 2397 else 2398 zoneid = crgetzoneid(credp); 2399 2400 if ((conn_dev = inet_minor_alloc(ip_minor_arena)) == 0) { 2401 netstack_rele(ns); 2402 return (EBUSY); 2403 } 2404 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 2405 2406 connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); 2407 connp->conn_dev = conn_dev; 2408 udp = connp->conn_udp; 2409 2410 /* 2411 * ipcl_conn_create did a netstack_hold. Undo the hold that was 2412 * done by netstack_find_by_cred() 2413 */ 2414 netstack_rele(ns); 2415 2416 /* 2417 * Initialize the udp_t structure for this stream. 2418 */ 2419 q->q_ptr = connp; 2420 WR(q)->q_ptr = connp; 2421 connp->conn_rq = q; 2422 connp->conn_wq = WR(q); 2423 2424 rw_enter(&udp->udp_rwlock, RW_WRITER); 2425 ASSERT(connp->conn_ulp == IPPROTO_UDP); 2426 ASSERT(connp->conn_udp == udp); 2427 ASSERT(udp->udp_connp == connp); 2428 2429 /* Set the initial state of the stream and the privilege status. */ 2430 udp->udp_state = TS_UNBND; 2431 if (isv6) { 2432 udp->udp_family = AF_INET6; 2433 udp->udp_ipversion = IPV6_VERSION; 2434 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2435 udp->udp_ttl = us->us_ipv6_hoplimit; 2436 connp->conn_af_isv6 = B_TRUE; 2437 connp->conn_flags |= IPCL_ISV6; 2438 } else { 2439 udp->udp_family = AF_INET; 2440 udp->udp_ipversion = IPV4_VERSION; 2441 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2442 udp->udp_ttl = us->us_ipv4_ttl; 2443 connp->conn_af_isv6 = B_FALSE; 2444 connp->conn_flags &= ~IPCL_ISV6; 2445 } 2446 2447 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2448 udp->udp_pending_op = -1; 2449 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2450 connp->conn_zoneid = zoneid; 2451 2452 udp->udp_open_time = lbolt64; 2453 udp->udp_open_pid = curproc->p_pid; 2454 2455 /* 2456 * If the caller has the process-wide flag set, then default to MAC 2457 * exempt mode. This allows read-down to unlabeled hosts. 2458 */ 2459 if (getpflags(NET_MAC_AWARE, credp) != 0) 2460 udp->udp_mac_exempt = B_TRUE; 2461 2462 if (flag & SO_SOCKSTR) { 2463 connp->conn_flags |= IPCL_SOCKET; 2464 udp->udp_issocket = B_TRUE; 2465 udp->udp_direct_sockfs = B_TRUE; 2466 } 2467 2468 connp->conn_ulp_labeled = is_system_labeled(); 2469 2470 udp->udp_us = us; 2471 2472 q->q_hiwat = us->us_recv_hiwat; 2473 WR(q)->q_hiwat = us->us_xmit_hiwat; 2474 WR(q)->q_lowat = us->us_xmit_lowat; 2475 2476 connp->conn_recv = udp_input; 2477 crhold(credp); 2478 connp->conn_cred = credp; 2479 2480 mutex_enter(&connp->conn_lock); 2481 connp->conn_state_flags &= ~CONN_INCIPIENT; 2482 mutex_exit(&connp->conn_lock); 2483 2484 qprocson(q); 2485 2486 if (udp->udp_family == AF_INET6) { 2487 /* Build initial header template for transmit */ 2488 if ((err = udp_build_hdrs(udp)) != 0) { 2489 rw_exit(&udp->udp_rwlock); 2490 qprocsoff(q); 2491 ipcl_conn_destroy(connp); 2492 return (err); 2493 } 2494 } 2495 rw_exit(&udp->udp_rwlock); 2496 2497 /* Set the Stream head write offset and high watermark. */ 2498 (void) mi_set_sth_wroff(q, 2499 udp->udp_max_hdr_len + us->us_wroff_extra); 2500 (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); 2501 2502 return (0); 2503 } 2504 2505 /* 2506 * Which UDP options OK to set through T_UNITDATA_REQ... 2507 */ 2508 /* ARGSUSED */ 2509 static boolean_t 2510 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2511 { 2512 return (B_TRUE); 2513 } 2514 2515 /* 2516 * This routine gets default values of certain options whose default 2517 * values are maintained by protcol specific code 2518 */ 2519 /* ARGSUSED */ 2520 int 2521 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2522 { 2523 udp_t *udp = Q_TO_UDP(q); 2524 udp_stack_t *us = udp->udp_us; 2525 int *i1 = (int *)ptr; 2526 2527 switch (level) { 2528 case IPPROTO_IP: 2529 switch (name) { 2530 case IP_MULTICAST_TTL: 2531 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2532 return (sizeof (uchar_t)); 2533 case IP_MULTICAST_LOOP: 2534 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2535 return (sizeof (uchar_t)); 2536 } 2537 break; 2538 case IPPROTO_IPV6: 2539 switch (name) { 2540 case IPV6_MULTICAST_HOPS: 2541 *i1 = IP_DEFAULT_MULTICAST_TTL; 2542 return (sizeof (int)); 2543 case IPV6_MULTICAST_LOOP: 2544 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2545 return (sizeof (int)); 2546 case IPV6_UNICAST_HOPS: 2547 *i1 = us->us_ipv6_hoplimit; 2548 return (sizeof (int)); 2549 } 2550 break; 2551 } 2552 return (-1); 2553 } 2554 2555 /* 2556 * This routine retrieves the current status of socket options. 2557 * It returns the size of the option retrieved. 2558 */ 2559 int 2560 udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2561 { 2562 int *i1 = (int *)ptr; 2563 conn_t *connp; 2564 udp_t *udp; 2565 ip6_pkt_t *ipp; 2566 int len; 2567 udp_stack_t *us; 2568 2569 connp = Q_TO_CONN(q); 2570 udp = connp->conn_udp; 2571 ipp = &udp->udp_sticky_ipp; 2572 us = udp->udp_us; 2573 2574 switch (level) { 2575 case SOL_SOCKET: 2576 switch (name) { 2577 case SO_DEBUG: 2578 *i1 = udp->udp_debug; 2579 break; /* goto sizeof (int) option return */ 2580 case SO_REUSEADDR: 2581 *i1 = udp->udp_reuseaddr; 2582 break; /* goto sizeof (int) option return */ 2583 case SO_TYPE: 2584 *i1 = SOCK_DGRAM; 2585 break; /* goto sizeof (int) option return */ 2586 2587 /* 2588 * The following three items are available here, 2589 * but are only meaningful to IP. 2590 */ 2591 case SO_DONTROUTE: 2592 *i1 = udp->udp_dontroute; 2593 break; /* goto sizeof (int) option return */ 2594 case SO_USELOOPBACK: 2595 *i1 = udp->udp_useloopback; 2596 break; /* goto sizeof (int) option return */ 2597 case SO_BROADCAST: 2598 *i1 = udp->udp_broadcast; 2599 break; /* goto sizeof (int) option return */ 2600 2601 case SO_SNDBUF: 2602 *i1 = q->q_hiwat; 2603 break; /* goto sizeof (int) option return */ 2604 case SO_RCVBUF: 2605 *i1 = RD(q)->q_hiwat; 2606 break; /* goto sizeof (int) option return */ 2607 case SO_DGRAM_ERRIND: 2608 *i1 = udp->udp_dgram_errind; 2609 break; /* goto sizeof (int) option return */ 2610 case SO_RECVUCRED: 2611 *i1 = udp->udp_recvucred; 2612 break; /* goto sizeof (int) option return */ 2613 case SO_TIMESTAMP: 2614 *i1 = udp->udp_timestamp; 2615 break; /* goto sizeof (int) option return */ 2616 case SO_ANON_MLP: 2617 *i1 = udp->udp_anon_mlp; 2618 break; /* goto sizeof (int) option return */ 2619 case SO_MAC_EXEMPT: 2620 *i1 = udp->udp_mac_exempt; 2621 break; /* goto sizeof (int) option return */ 2622 case SO_ALLZONES: 2623 *i1 = connp->conn_allzones; 2624 break; /* goto sizeof (int) option return */ 2625 case SO_EXCLBIND: 2626 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2627 break; 2628 case SO_PROTOTYPE: 2629 *i1 = IPPROTO_UDP; 2630 break; 2631 case SO_DOMAIN: 2632 *i1 = udp->udp_family; 2633 break; 2634 default: 2635 return (-1); 2636 } 2637 break; 2638 case IPPROTO_IP: 2639 if (udp->udp_family != AF_INET) 2640 return (-1); 2641 switch (name) { 2642 case IP_OPTIONS: 2643 case T_IP_OPTIONS: 2644 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2645 if (len > 0) { 2646 bcopy(udp->udp_ip_rcv_options + 2647 udp->udp_label_len, ptr, len); 2648 } 2649 return (len); 2650 case IP_TOS: 2651 case T_IP_TOS: 2652 *i1 = (int)udp->udp_type_of_service; 2653 break; /* goto sizeof (int) option return */ 2654 case IP_TTL: 2655 *i1 = (int)udp->udp_ttl; 2656 break; /* goto sizeof (int) option return */ 2657 case IP_NEXTHOP: 2658 case IP_RECVPKTINFO: 2659 /* 2660 * This also handles IP_PKTINFO. 2661 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2662 * Differentiation is based on the size of the argument 2663 * passed in. 2664 * This option is handled in IP which will return an 2665 * error for IP_PKTINFO as it's not supported as a 2666 * sticky option. 2667 */ 2668 return (-EINVAL); 2669 case IP_MULTICAST_IF: 2670 /* 0 address if not set */ 2671 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2672 return (sizeof (ipaddr_t)); 2673 case IP_MULTICAST_TTL: 2674 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2675 return (sizeof (uchar_t)); 2676 case IP_MULTICAST_LOOP: 2677 *ptr = connp->conn_multicast_loop; 2678 return (sizeof (uint8_t)); 2679 case IP_RECVOPTS: 2680 *i1 = udp->udp_recvopts; 2681 break; /* goto sizeof (int) option return */ 2682 case IP_RECVDSTADDR: 2683 *i1 = udp->udp_recvdstaddr; 2684 break; /* goto sizeof (int) option return */ 2685 case IP_RECVIF: 2686 *i1 = udp->udp_recvif; 2687 break; /* goto sizeof (int) option return */ 2688 case IP_RECVSLLA: 2689 *i1 = udp->udp_recvslla; 2690 break; /* goto sizeof (int) option return */ 2691 case IP_RECVTTL: 2692 *i1 = udp->udp_recvttl; 2693 break; /* goto sizeof (int) option return */ 2694 case IP_ADD_MEMBERSHIP: 2695 case IP_DROP_MEMBERSHIP: 2696 case IP_BLOCK_SOURCE: 2697 case IP_UNBLOCK_SOURCE: 2698 case IP_ADD_SOURCE_MEMBERSHIP: 2699 case IP_DROP_SOURCE_MEMBERSHIP: 2700 case MCAST_JOIN_GROUP: 2701 case MCAST_LEAVE_GROUP: 2702 case MCAST_BLOCK_SOURCE: 2703 case MCAST_UNBLOCK_SOURCE: 2704 case MCAST_JOIN_SOURCE_GROUP: 2705 case MCAST_LEAVE_SOURCE_GROUP: 2706 case IP_DONTFAILOVER_IF: 2707 /* cannot "get" the value for these */ 2708 return (-1); 2709 case IP_BOUND_IF: 2710 /* Zero if not set */ 2711 *i1 = udp->udp_bound_if; 2712 break; /* goto sizeof (int) option return */ 2713 case IP_UNSPEC_SRC: 2714 *i1 = udp->udp_unspec_source; 2715 break; /* goto sizeof (int) option return */ 2716 case IP_XMIT_IF: 2717 *i1 = udp->udp_xmit_if; 2718 break; /* goto sizeof (int) option return */ 2719 default: 2720 return (-1); 2721 } 2722 break; 2723 case IPPROTO_IPV6: 2724 if (udp->udp_family != AF_INET6) 2725 return (-1); 2726 switch (name) { 2727 case IPV6_UNICAST_HOPS: 2728 *i1 = (unsigned int)udp->udp_ttl; 2729 break; /* goto sizeof (int) option return */ 2730 case IPV6_MULTICAST_IF: 2731 /* 0 index if not set */ 2732 *i1 = udp->udp_multicast_if_index; 2733 break; /* goto sizeof (int) option return */ 2734 case IPV6_MULTICAST_HOPS: 2735 *i1 = udp->udp_multicast_ttl; 2736 break; /* goto sizeof (int) option return */ 2737 case IPV6_MULTICAST_LOOP: 2738 *i1 = connp->conn_multicast_loop; 2739 break; /* goto sizeof (int) option return */ 2740 case IPV6_JOIN_GROUP: 2741 case IPV6_LEAVE_GROUP: 2742 case MCAST_JOIN_GROUP: 2743 case MCAST_LEAVE_GROUP: 2744 case MCAST_BLOCK_SOURCE: 2745 case MCAST_UNBLOCK_SOURCE: 2746 case MCAST_JOIN_SOURCE_GROUP: 2747 case MCAST_LEAVE_SOURCE_GROUP: 2748 /* cannot "get" the value for these */ 2749 return (-1); 2750 case IPV6_BOUND_IF: 2751 /* Zero if not set */ 2752 *i1 = udp->udp_bound_if; 2753 break; /* goto sizeof (int) option return */ 2754 case IPV6_UNSPEC_SRC: 2755 *i1 = udp->udp_unspec_source; 2756 break; /* goto sizeof (int) option return */ 2757 case IPV6_RECVPKTINFO: 2758 *i1 = udp->udp_ip_recvpktinfo; 2759 break; /* goto sizeof (int) option return */ 2760 case IPV6_RECVTCLASS: 2761 *i1 = udp->udp_ipv6_recvtclass; 2762 break; /* goto sizeof (int) option return */ 2763 case IPV6_RECVPATHMTU: 2764 *i1 = udp->udp_ipv6_recvpathmtu; 2765 break; /* goto sizeof (int) option return */ 2766 case IPV6_RECVHOPLIMIT: 2767 *i1 = udp->udp_ipv6_recvhoplimit; 2768 break; /* goto sizeof (int) option return */ 2769 case IPV6_RECVHOPOPTS: 2770 *i1 = udp->udp_ipv6_recvhopopts; 2771 break; /* goto sizeof (int) option return */ 2772 case IPV6_RECVDSTOPTS: 2773 *i1 = udp->udp_ipv6_recvdstopts; 2774 break; /* goto sizeof (int) option return */ 2775 case _OLD_IPV6_RECVDSTOPTS: 2776 *i1 = udp->udp_old_ipv6_recvdstopts; 2777 break; /* goto sizeof (int) option return */ 2778 case IPV6_RECVRTHDRDSTOPTS: 2779 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2780 break; /* goto sizeof (int) option return */ 2781 case IPV6_RECVRTHDR: 2782 *i1 = udp->udp_ipv6_recvrthdr; 2783 break; /* goto sizeof (int) option return */ 2784 case IPV6_PKTINFO: { 2785 /* XXX assumes that caller has room for max size! */ 2786 struct in6_pktinfo *pkti; 2787 2788 pkti = (struct in6_pktinfo *)ptr; 2789 if (ipp->ipp_fields & IPPF_IFINDEX) 2790 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2791 else 2792 pkti->ipi6_ifindex = 0; 2793 if (ipp->ipp_fields & IPPF_ADDR) 2794 pkti->ipi6_addr = ipp->ipp_addr; 2795 else 2796 pkti->ipi6_addr = ipv6_all_zeros; 2797 return (sizeof (struct in6_pktinfo)); 2798 } 2799 case IPV6_TCLASS: 2800 if (ipp->ipp_fields & IPPF_TCLASS) 2801 *i1 = ipp->ipp_tclass; 2802 else 2803 *i1 = IPV6_FLOW_TCLASS( 2804 IPV6_DEFAULT_VERS_AND_FLOW); 2805 break; /* goto sizeof (int) option return */ 2806 case IPV6_NEXTHOP: { 2807 sin6_t *sin6 = (sin6_t *)ptr; 2808 2809 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2810 return (0); 2811 *sin6 = sin6_null; 2812 sin6->sin6_family = AF_INET6; 2813 sin6->sin6_addr = ipp->ipp_nexthop; 2814 return (sizeof (sin6_t)); 2815 } 2816 case IPV6_HOPOPTS: 2817 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2818 return (0); 2819 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2820 return (0); 2821 /* 2822 * The cipso/label option is added by kernel. 2823 * User is not usually aware of this option. 2824 * We copy out the hbh opt after the label option. 2825 */ 2826 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2827 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2828 if (udp->udp_label_len_v6 > 0) { 2829 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2830 ptr[1] = (ipp->ipp_hopoptslen - 2831 udp->udp_label_len_v6 + 7) / 8 - 1; 2832 } 2833 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2834 case IPV6_RTHDRDSTOPTS: 2835 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2836 return (0); 2837 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2838 return (ipp->ipp_rtdstoptslen); 2839 case IPV6_RTHDR: 2840 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2841 return (0); 2842 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2843 return (ipp->ipp_rthdrlen); 2844 case IPV6_DSTOPTS: 2845 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2846 return (0); 2847 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2848 return (ipp->ipp_dstoptslen); 2849 case IPV6_PATHMTU: 2850 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2851 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2852 us->us_netstack)); 2853 default: 2854 return (-1); 2855 } 2856 break; 2857 case IPPROTO_UDP: 2858 switch (name) { 2859 case UDP_ANONPRIVBIND: 2860 *i1 = udp->udp_anon_priv_bind; 2861 break; 2862 case UDP_EXCLBIND: 2863 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2864 break; 2865 case UDP_RCVHDR: 2866 *i1 = udp->udp_rcvhdr ? 1 : 0; 2867 break; 2868 case UDP_NAT_T_ENDPOINT: 2869 *i1 = udp->udp_nat_t_endpoint; 2870 break; 2871 default: 2872 return (-1); 2873 } 2874 break; 2875 default: 2876 return (-1); 2877 } 2878 return (sizeof (int)); 2879 } 2880 2881 int 2882 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2883 { 2884 udp_t *udp; 2885 int err; 2886 2887 udp = Q_TO_UDP(q); 2888 2889 rw_enter(&udp->udp_rwlock, RW_READER); 2890 err = udp_opt_get_locked(q, level, name, ptr); 2891 rw_exit(&udp->udp_rwlock); 2892 return (err); 2893 } 2894 2895 /* 2896 * This routine sets socket options. 2897 */ 2898 /* ARGSUSED */ 2899 int 2900 udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, 2901 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 2902 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2903 { 2904 udpattrs_t *attrs = thisdg_attrs; 2905 int *i1 = (int *)invalp; 2906 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2907 boolean_t checkonly; 2908 int error; 2909 conn_t *connp; 2910 udp_t *udp; 2911 uint_t newlen; 2912 udp_stack_t *us; 2913 size_t sth_wroff; 2914 2915 connp = Q_TO_CONN(q); 2916 udp = connp->conn_udp; 2917 us = udp->udp_us; 2918 2919 switch (optset_context) { 2920 case SETFN_OPTCOM_CHECKONLY: 2921 checkonly = B_TRUE; 2922 /* 2923 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2924 * inlen != 0 implies value supplied and 2925 * we have to "pretend" to set it. 2926 * inlen == 0 implies that there is no 2927 * value part in T_CHECK request and just validation 2928 * done elsewhere should be enough, we just return here. 2929 */ 2930 if (inlen == 0) { 2931 *outlenp = 0; 2932 return (0); 2933 } 2934 break; 2935 case SETFN_OPTCOM_NEGOTIATE: 2936 checkonly = B_FALSE; 2937 break; 2938 case SETFN_UD_NEGOTIATE: 2939 case SETFN_CONN_NEGOTIATE: 2940 checkonly = B_FALSE; 2941 /* 2942 * Negotiating local and "association-related" options 2943 * through T_UNITDATA_REQ. 2944 * 2945 * Following routine can filter out ones we do not 2946 * want to be "set" this way. 2947 */ 2948 if (!udp_opt_allow_udr_set(level, name)) { 2949 *outlenp = 0; 2950 return (EINVAL); 2951 } 2952 break; 2953 default: 2954 /* 2955 * We should never get here 2956 */ 2957 *outlenp = 0; 2958 return (EINVAL); 2959 } 2960 2961 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2962 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2963 2964 /* 2965 * For fixed length options, no sanity check 2966 * of passed in length is done. It is assumed *_optcom_req() 2967 * routines do the right thing. 2968 */ 2969 2970 switch (level) { 2971 case SOL_SOCKET: 2972 switch (name) { 2973 case SO_REUSEADDR: 2974 if (!checkonly) 2975 udp->udp_reuseaddr = onoff; 2976 break; 2977 case SO_DEBUG: 2978 if (!checkonly) 2979 udp->udp_debug = onoff; 2980 break; 2981 /* 2982 * The following three items are available here, 2983 * but are only meaningful to IP. 2984 */ 2985 case SO_DONTROUTE: 2986 if (!checkonly) 2987 udp->udp_dontroute = onoff; 2988 break; 2989 case SO_USELOOPBACK: 2990 if (!checkonly) 2991 udp->udp_useloopback = onoff; 2992 break; 2993 case SO_BROADCAST: 2994 if (!checkonly) 2995 udp->udp_broadcast = onoff; 2996 break; 2997 2998 case SO_SNDBUF: 2999 if (*i1 > us->us_max_buf) { 3000 *outlenp = 0; 3001 return (ENOBUFS); 3002 } 3003 if (!checkonly) { 3004 q->q_hiwat = *i1; 3005 } 3006 break; 3007 case SO_RCVBUF: 3008 if (*i1 > us->us_max_buf) { 3009 *outlenp = 0; 3010 return (ENOBUFS); 3011 } 3012 if (!checkonly) { 3013 RD(q)->q_hiwat = *i1; 3014 rw_exit(&udp->udp_rwlock); 3015 (void) mi_set_sth_hiwat(RD(q), 3016 udp_set_rcv_hiwat(udp, *i1)); 3017 rw_enter(&udp->udp_rwlock, RW_WRITER); 3018 } 3019 break; 3020 case SO_DGRAM_ERRIND: 3021 if (!checkonly) 3022 udp->udp_dgram_errind = onoff; 3023 break; 3024 case SO_RECVUCRED: 3025 if (!checkonly) 3026 udp->udp_recvucred = onoff; 3027 break; 3028 case SO_ALLZONES: 3029 /* 3030 * "soft" error (negative) 3031 * option not handled at this level 3032 * Do not modify *outlenp. 3033 */ 3034 return (-EINVAL); 3035 case SO_TIMESTAMP: 3036 if (!checkonly) 3037 udp->udp_timestamp = onoff; 3038 break; 3039 case SO_ANON_MLP: 3040 if (!checkonly) 3041 udp->udp_anon_mlp = onoff; 3042 break; 3043 case SO_MAC_EXEMPT: 3044 if (secpolicy_net_mac_aware(cr) != 0 || 3045 udp->udp_state != TS_UNBND) 3046 return (EACCES); 3047 if (!checkonly) 3048 udp->udp_mac_exempt = onoff; 3049 break; 3050 case SCM_UCRED: { 3051 struct ucred_s *ucr; 3052 cred_t *cr, *newcr; 3053 ts_label_t *tsl; 3054 3055 /* 3056 * Only sockets that have proper privileges and are 3057 * bound to MLPs will have any other value here, so 3058 * this implicitly tests for privilege to set label. 3059 */ 3060 if (connp->conn_mlp_type == mlptSingle) 3061 break; 3062 ucr = (struct ucred_s *)invalp; 3063 if (inlen != ucredsize || 3064 ucr->uc_labeloff < sizeof (*ucr) || 3065 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3066 return (EINVAL); 3067 if (!checkonly) { 3068 mblk_t *mb; 3069 3070 if (attrs == NULL || 3071 (mb = attrs->udpattr_mb) == NULL) 3072 return (EINVAL); 3073 if ((cr = DB_CRED(mb)) == NULL) 3074 cr = udp->udp_connp->conn_cred; 3075 ASSERT(cr != NULL); 3076 if ((tsl = crgetlabel(cr)) == NULL) 3077 return (EINVAL); 3078 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3079 tsl->tsl_doi, KM_NOSLEEP); 3080 if (newcr == NULL) 3081 return (ENOSR); 3082 mblk_setcred(mb, newcr); 3083 attrs->udpattr_credset = B_TRUE; 3084 crfree(newcr); 3085 } 3086 break; 3087 } 3088 case SO_EXCLBIND: 3089 if (!checkonly) 3090 udp->udp_exclbind = onoff; 3091 break; 3092 default: 3093 *outlenp = 0; 3094 return (EINVAL); 3095 } 3096 break; 3097 case IPPROTO_IP: 3098 if (udp->udp_family != AF_INET) { 3099 *outlenp = 0; 3100 return (ENOPROTOOPT); 3101 } 3102 switch (name) { 3103 case IP_OPTIONS: 3104 case T_IP_OPTIONS: 3105 /* Save options for use by IP. */ 3106 newlen = inlen + udp->udp_label_len; 3107 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3108 *outlenp = 0; 3109 return (EINVAL); 3110 } 3111 if (checkonly) 3112 break; 3113 3114 /* 3115 * Update the stored options taking into account 3116 * any CIPSO option which we should not overwrite. 3117 */ 3118 if (!tsol_option_set(&udp->udp_ip_snd_options, 3119 &udp->udp_ip_snd_options_len, 3120 udp->udp_label_len, invalp, inlen)) { 3121 *outlenp = 0; 3122 return (ENOMEM); 3123 } 3124 3125 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3126 UDPH_SIZE + udp->udp_ip_snd_options_len; 3127 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3128 rw_exit(&udp->udp_rwlock); 3129 (void) mi_set_sth_wroff(RD(q), sth_wroff); 3130 rw_enter(&udp->udp_rwlock, RW_WRITER); 3131 break; 3132 3133 case IP_TTL: 3134 if (!checkonly) { 3135 udp->udp_ttl = (uchar_t)*i1; 3136 } 3137 break; 3138 case IP_TOS: 3139 case T_IP_TOS: 3140 if (!checkonly) { 3141 udp->udp_type_of_service = (uchar_t)*i1; 3142 } 3143 break; 3144 case IP_MULTICAST_IF: { 3145 /* 3146 * TODO should check OPTMGMT reply and undo this if 3147 * there is an error. 3148 */ 3149 struct in_addr *inap = (struct in_addr *)invalp; 3150 if (!checkonly) { 3151 udp->udp_multicast_if_addr = 3152 inap->s_addr; 3153 } 3154 break; 3155 } 3156 case IP_MULTICAST_TTL: 3157 if (!checkonly) 3158 udp->udp_multicast_ttl = *invalp; 3159 break; 3160 case IP_MULTICAST_LOOP: 3161 if (!checkonly) 3162 connp->conn_multicast_loop = *invalp; 3163 break; 3164 case IP_RECVOPTS: 3165 if (!checkonly) 3166 udp->udp_recvopts = onoff; 3167 break; 3168 case IP_RECVDSTADDR: 3169 if (!checkonly) 3170 udp->udp_recvdstaddr = onoff; 3171 break; 3172 case IP_RECVIF: 3173 if (!checkonly) 3174 udp->udp_recvif = onoff; 3175 break; 3176 case IP_RECVSLLA: 3177 if (!checkonly) 3178 udp->udp_recvslla = onoff; 3179 break; 3180 case IP_RECVTTL: 3181 if (!checkonly) 3182 udp->udp_recvttl = onoff; 3183 break; 3184 case IP_PKTINFO: { 3185 /* 3186 * This also handles IP_RECVPKTINFO. 3187 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3188 * Differentiation is based on the size of the 3189 * argument passed in. 3190 */ 3191 struct in_pktinfo *pktinfop; 3192 ip4_pkt_t *attr_pktinfop; 3193 3194 if (checkonly) 3195 break; 3196 3197 if (inlen == sizeof (int)) { 3198 /* 3199 * This is IP_RECVPKTINFO option. 3200 * Keep a local copy of whether this option is 3201 * set or not and pass it down to IP for 3202 * processing. 3203 */ 3204 3205 udp->udp_ip_recvpktinfo = onoff; 3206 return (-EINVAL); 3207 } 3208 3209 if (attrs == NULL || 3210 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3211 /* 3212 * sticky option or no buffer to return 3213 * the results. 3214 */ 3215 return (EINVAL); 3216 } 3217 3218 if (inlen != sizeof (struct in_pktinfo)) 3219 return (EINVAL); 3220 3221 pktinfop = (struct in_pktinfo *)invalp; 3222 3223 /* 3224 * At least one of the values should be specified 3225 */ 3226 if (pktinfop->ipi_ifindex == 0 && 3227 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3228 return (EINVAL); 3229 } 3230 3231 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3232 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3233 3234 break; 3235 } 3236 case IP_ADD_MEMBERSHIP: 3237 case IP_DROP_MEMBERSHIP: 3238 case IP_BLOCK_SOURCE: 3239 case IP_UNBLOCK_SOURCE: 3240 case IP_ADD_SOURCE_MEMBERSHIP: 3241 case IP_DROP_SOURCE_MEMBERSHIP: 3242 case MCAST_JOIN_GROUP: 3243 case MCAST_LEAVE_GROUP: 3244 case MCAST_BLOCK_SOURCE: 3245 case MCAST_UNBLOCK_SOURCE: 3246 case MCAST_JOIN_SOURCE_GROUP: 3247 case MCAST_LEAVE_SOURCE_GROUP: 3248 case IP_SEC_OPT: 3249 case IP_NEXTHOP: 3250 /* 3251 * "soft" error (negative) 3252 * option not handled at this level 3253 * Do not modify *outlenp. 3254 */ 3255 return (-EINVAL); 3256 case IP_BOUND_IF: 3257 if (!checkonly) 3258 udp->udp_bound_if = *i1; 3259 break; 3260 case IP_UNSPEC_SRC: 3261 if (!checkonly) 3262 udp->udp_unspec_source = onoff; 3263 break; 3264 case IP_XMIT_IF: 3265 if (!checkonly) 3266 udp->udp_xmit_if = *i1; 3267 break; 3268 default: 3269 *outlenp = 0; 3270 return (EINVAL); 3271 } 3272 break; 3273 case IPPROTO_IPV6: { 3274 ip6_pkt_t *ipp; 3275 boolean_t sticky; 3276 3277 if (udp->udp_family != AF_INET6) { 3278 *outlenp = 0; 3279 return (ENOPROTOOPT); 3280 } 3281 /* 3282 * Deal with both sticky options and ancillary data 3283 */ 3284 sticky = B_FALSE; 3285 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3286 NULL) { 3287 /* sticky options, or none */ 3288 ipp = &udp->udp_sticky_ipp; 3289 sticky = B_TRUE; 3290 } 3291 3292 switch (name) { 3293 case IPV6_MULTICAST_IF: 3294 if (!checkonly) 3295 udp->udp_multicast_if_index = *i1; 3296 break; 3297 case IPV6_UNICAST_HOPS: 3298 /* -1 means use default */ 3299 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3300 *outlenp = 0; 3301 return (EINVAL); 3302 } 3303 if (!checkonly) { 3304 if (*i1 == -1) { 3305 udp->udp_ttl = ipp->ipp_unicast_hops = 3306 us->us_ipv6_hoplimit; 3307 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3308 /* Pass modified value to IP. */ 3309 *i1 = udp->udp_ttl; 3310 } else { 3311 udp->udp_ttl = ipp->ipp_unicast_hops = 3312 (uint8_t)*i1; 3313 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3314 } 3315 /* Rebuild the header template */ 3316 error = udp_build_hdrs(udp); 3317 if (error != 0) { 3318 *outlenp = 0; 3319 return (error); 3320 } 3321 } 3322 break; 3323 case IPV6_MULTICAST_HOPS: 3324 /* -1 means use default */ 3325 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3326 *outlenp = 0; 3327 return (EINVAL); 3328 } 3329 if (!checkonly) { 3330 if (*i1 == -1) { 3331 udp->udp_multicast_ttl = 3332 ipp->ipp_multicast_hops = 3333 IP_DEFAULT_MULTICAST_TTL; 3334 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3335 /* Pass modified value to IP. */ 3336 *i1 = udp->udp_multicast_ttl; 3337 } else { 3338 udp->udp_multicast_ttl = 3339 ipp->ipp_multicast_hops = 3340 (uint8_t)*i1; 3341 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3342 } 3343 } 3344 break; 3345 case IPV6_MULTICAST_LOOP: 3346 if (*i1 != 0 && *i1 != 1) { 3347 *outlenp = 0; 3348 return (EINVAL); 3349 } 3350 if (!checkonly) 3351 connp->conn_multicast_loop = *i1; 3352 break; 3353 case IPV6_JOIN_GROUP: 3354 case IPV6_LEAVE_GROUP: 3355 case MCAST_JOIN_GROUP: 3356 case MCAST_LEAVE_GROUP: 3357 case MCAST_BLOCK_SOURCE: 3358 case MCAST_UNBLOCK_SOURCE: 3359 case MCAST_JOIN_SOURCE_GROUP: 3360 case MCAST_LEAVE_SOURCE_GROUP: 3361 /* 3362 * "soft" error (negative) 3363 * option not handled at this level 3364 * Note: Do not modify *outlenp 3365 */ 3366 return (-EINVAL); 3367 case IPV6_BOUND_IF: 3368 if (!checkonly) 3369 udp->udp_bound_if = *i1; 3370 break; 3371 case IPV6_UNSPEC_SRC: 3372 if (!checkonly) 3373 udp->udp_unspec_source = onoff; 3374 break; 3375 /* 3376 * Set boolean switches for ancillary data delivery 3377 */ 3378 case IPV6_RECVPKTINFO: 3379 if (!checkonly) 3380 udp->udp_ip_recvpktinfo = onoff; 3381 break; 3382 case IPV6_RECVTCLASS: 3383 if (!checkonly) { 3384 udp->udp_ipv6_recvtclass = onoff; 3385 } 3386 break; 3387 case IPV6_RECVPATHMTU: 3388 if (!checkonly) { 3389 udp->udp_ipv6_recvpathmtu = onoff; 3390 } 3391 break; 3392 case IPV6_RECVHOPLIMIT: 3393 if (!checkonly) 3394 udp->udp_ipv6_recvhoplimit = onoff; 3395 break; 3396 case IPV6_RECVHOPOPTS: 3397 if (!checkonly) 3398 udp->udp_ipv6_recvhopopts = onoff; 3399 break; 3400 case IPV6_RECVDSTOPTS: 3401 if (!checkonly) 3402 udp->udp_ipv6_recvdstopts = onoff; 3403 break; 3404 case _OLD_IPV6_RECVDSTOPTS: 3405 if (!checkonly) 3406 udp->udp_old_ipv6_recvdstopts = onoff; 3407 break; 3408 case IPV6_RECVRTHDRDSTOPTS: 3409 if (!checkonly) 3410 udp->udp_ipv6_recvrthdrdstopts = onoff; 3411 break; 3412 case IPV6_RECVRTHDR: 3413 if (!checkonly) 3414 udp->udp_ipv6_recvrthdr = onoff; 3415 break; 3416 /* 3417 * Set sticky options or ancillary data. 3418 * If sticky options, (re)build any extension headers 3419 * that might be needed as a result. 3420 */ 3421 case IPV6_PKTINFO: 3422 /* 3423 * The source address and ifindex are verified 3424 * in ip_opt_set(). For ancillary data the 3425 * source address is checked in ip_wput_v6. 3426 */ 3427 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3428 return (EINVAL); 3429 if (checkonly) 3430 break; 3431 3432 if (inlen == 0) { 3433 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3434 ipp->ipp_sticky_ignored |= 3435 (IPPF_IFINDEX|IPPF_ADDR); 3436 } else { 3437 struct in6_pktinfo *pkti; 3438 3439 pkti = (struct in6_pktinfo *)invalp; 3440 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3441 ipp->ipp_addr = pkti->ipi6_addr; 3442 if (ipp->ipp_ifindex != 0) 3443 ipp->ipp_fields |= IPPF_IFINDEX; 3444 else 3445 ipp->ipp_fields &= ~IPPF_IFINDEX; 3446 if (!IN6_IS_ADDR_UNSPECIFIED( 3447 &ipp->ipp_addr)) 3448 ipp->ipp_fields |= IPPF_ADDR; 3449 else 3450 ipp->ipp_fields &= ~IPPF_ADDR; 3451 } 3452 if (sticky) { 3453 error = udp_build_hdrs(udp); 3454 if (error != 0) 3455 return (error); 3456 } 3457 break; 3458 case IPV6_HOPLIMIT: 3459 if (sticky) 3460 return (EINVAL); 3461 if (inlen != 0 && inlen != sizeof (int)) 3462 return (EINVAL); 3463 if (checkonly) 3464 break; 3465 3466 if (inlen == 0) { 3467 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3468 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3469 } else { 3470 if (*i1 > 255 || *i1 < -1) 3471 return (EINVAL); 3472 if (*i1 == -1) 3473 ipp->ipp_hoplimit = 3474 us->us_ipv6_hoplimit; 3475 else 3476 ipp->ipp_hoplimit = *i1; 3477 ipp->ipp_fields |= IPPF_HOPLIMIT; 3478 } 3479 break; 3480 case IPV6_TCLASS: 3481 if (inlen != 0 && inlen != sizeof (int)) 3482 return (EINVAL); 3483 if (checkonly) 3484 break; 3485 3486 if (inlen == 0) { 3487 ipp->ipp_fields &= ~IPPF_TCLASS; 3488 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3489 } else { 3490 if (*i1 > 255 || *i1 < -1) 3491 return (EINVAL); 3492 if (*i1 == -1) 3493 ipp->ipp_tclass = 0; 3494 else 3495 ipp->ipp_tclass = *i1; 3496 ipp->ipp_fields |= IPPF_TCLASS; 3497 } 3498 if (sticky) { 3499 error = udp_build_hdrs(udp); 3500 if (error != 0) 3501 return (error); 3502 } 3503 break; 3504 case IPV6_NEXTHOP: 3505 /* 3506 * IP will verify that the nexthop is reachable 3507 * and fail for sticky options. 3508 */ 3509 if (inlen != 0 && inlen != sizeof (sin6_t)) 3510 return (EINVAL); 3511 if (checkonly) 3512 break; 3513 3514 if (inlen == 0) { 3515 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3516 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3517 } else { 3518 sin6_t *sin6 = (sin6_t *)invalp; 3519 3520 if (sin6->sin6_family != AF_INET6) 3521 return (EAFNOSUPPORT); 3522 if (IN6_IS_ADDR_V4MAPPED( 3523 &sin6->sin6_addr)) 3524 return (EADDRNOTAVAIL); 3525 ipp->ipp_nexthop = sin6->sin6_addr; 3526 if (!IN6_IS_ADDR_UNSPECIFIED( 3527 &ipp->ipp_nexthop)) 3528 ipp->ipp_fields |= IPPF_NEXTHOP; 3529 else 3530 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3531 } 3532 if (sticky) { 3533 error = udp_build_hdrs(udp); 3534 if (error != 0) 3535 return (error); 3536 } 3537 break; 3538 case IPV6_HOPOPTS: { 3539 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3540 /* 3541 * Sanity checks - minimum size, size a multiple of 3542 * eight bytes, and matching size passed in. 3543 */ 3544 if (inlen != 0 && 3545 inlen != (8 * (hopts->ip6h_len + 1))) 3546 return (EINVAL); 3547 3548 if (checkonly) 3549 break; 3550 3551 error = optcom_pkt_set(invalp, inlen, sticky, 3552 (uchar_t **)&ipp->ipp_hopopts, 3553 &ipp->ipp_hopoptslen, 3554 sticky ? udp->udp_label_len_v6 : 0); 3555 if (error != 0) 3556 return (error); 3557 if (ipp->ipp_hopoptslen == 0) { 3558 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3559 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3560 } else { 3561 ipp->ipp_fields |= IPPF_HOPOPTS; 3562 } 3563 if (sticky) { 3564 error = udp_build_hdrs(udp); 3565 if (error != 0) 3566 return (error); 3567 } 3568 break; 3569 } 3570 case IPV6_RTHDRDSTOPTS: { 3571 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3572 3573 /* 3574 * Sanity checks - minimum size, size a multiple of 3575 * eight bytes, and matching size passed in. 3576 */ 3577 if (inlen != 0 && 3578 inlen != (8 * (dopts->ip6d_len + 1))) 3579 return (EINVAL); 3580 3581 if (checkonly) 3582 break; 3583 3584 if (inlen == 0) { 3585 if (sticky && 3586 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3587 kmem_free(ipp->ipp_rtdstopts, 3588 ipp->ipp_rtdstoptslen); 3589 ipp->ipp_rtdstopts = NULL; 3590 ipp->ipp_rtdstoptslen = 0; 3591 } 3592 3593 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3594 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3595 } else { 3596 error = optcom_pkt_set(invalp, inlen, sticky, 3597 (uchar_t **)&ipp->ipp_rtdstopts, 3598 &ipp->ipp_rtdstoptslen, 0); 3599 if (error != 0) 3600 return (error); 3601 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3602 } 3603 if (sticky) { 3604 error = udp_build_hdrs(udp); 3605 if (error != 0) 3606 return (error); 3607 } 3608 break; 3609 } 3610 case IPV6_DSTOPTS: { 3611 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3612 3613 /* 3614 * Sanity checks - minimum size, size a multiple of 3615 * eight bytes, and matching size passed in. 3616 */ 3617 if (inlen != 0 && 3618 inlen != (8 * (dopts->ip6d_len + 1))) 3619 return (EINVAL); 3620 3621 if (checkonly) 3622 break; 3623 3624 if (inlen == 0) { 3625 if (sticky && 3626 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3627 kmem_free(ipp->ipp_dstopts, 3628 ipp->ipp_dstoptslen); 3629 ipp->ipp_dstopts = NULL; 3630 ipp->ipp_dstoptslen = 0; 3631 } 3632 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3633 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3634 } else { 3635 error = optcom_pkt_set(invalp, inlen, sticky, 3636 (uchar_t **)&ipp->ipp_dstopts, 3637 &ipp->ipp_dstoptslen, 0); 3638 if (error != 0) 3639 return (error); 3640 ipp->ipp_fields |= IPPF_DSTOPTS; 3641 } 3642 if (sticky) { 3643 error = udp_build_hdrs(udp); 3644 if (error != 0) 3645 return (error); 3646 } 3647 break; 3648 } 3649 case IPV6_RTHDR: { 3650 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3651 3652 /* 3653 * Sanity checks - minimum size, size a multiple of 3654 * eight bytes, and matching size passed in. 3655 */ 3656 if (inlen != 0 && 3657 inlen != (8 * (rt->ip6r_len + 1))) 3658 return (EINVAL); 3659 3660 if (checkonly) 3661 break; 3662 3663 if (inlen == 0) { 3664 if (sticky && 3665 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3666 kmem_free(ipp->ipp_rthdr, 3667 ipp->ipp_rthdrlen); 3668 ipp->ipp_rthdr = NULL; 3669 ipp->ipp_rthdrlen = 0; 3670 } 3671 ipp->ipp_fields &= ~IPPF_RTHDR; 3672 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3673 } else { 3674 error = optcom_pkt_set(invalp, inlen, sticky, 3675 (uchar_t **)&ipp->ipp_rthdr, 3676 &ipp->ipp_rthdrlen, 0); 3677 if (error != 0) 3678 return (error); 3679 ipp->ipp_fields |= IPPF_RTHDR; 3680 } 3681 if (sticky) { 3682 error = udp_build_hdrs(udp); 3683 if (error != 0) 3684 return (error); 3685 } 3686 break; 3687 } 3688 3689 case IPV6_DONTFRAG: 3690 if (checkonly) 3691 break; 3692 3693 if (onoff) { 3694 ipp->ipp_fields |= IPPF_DONTFRAG; 3695 } else { 3696 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3697 } 3698 break; 3699 3700 case IPV6_USE_MIN_MTU: 3701 if (inlen != sizeof (int)) 3702 return (EINVAL); 3703 3704 if (*i1 < -1 || *i1 > 1) 3705 return (EINVAL); 3706 3707 if (checkonly) 3708 break; 3709 3710 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3711 ipp->ipp_use_min_mtu = *i1; 3712 break; 3713 3714 case IPV6_BOUND_PIF: 3715 case IPV6_SEC_OPT: 3716 case IPV6_DONTFAILOVER_IF: 3717 case IPV6_SRC_PREFERENCES: 3718 case IPV6_V6ONLY: 3719 /* Handled at the IP level */ 3720 return (-EINVAL); 3721 default: 3722 *outlenp = 0; 3723 return (EINVAL); 3724 } 3725 break; 3726 } /* end IPPROTO_IPV6 */ 3727 case IPPROTO_UDP: 3728 switch (name) { 3729 case UDP_ANONPRIVBIND: 3730 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3731 *outlenp = 0; 3732 return (error); 3733 } 3734 if (!checkonly) { 3735 udp->udp_anon_priv_bind = onoff; 3736 } 3737 break; 3738 case UDP_EXCLBIND: 3739 if (!checkonly) 3740 udp->udp_exclbind = onoff; 3741 break; 3742 case UDP_RCVHDR: 3743 if (!checkonly) 3744 udp->udp_rcvhdr = onoff; 3745 break; 3746 case UDP_NAT_T_ENDPOINT: 3747 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3748 *outlenp = 0; 3749 return (error); 3750 } 3751 3752 /* 3753 * Use udp_family instead so we can avoid ambiguitites 3754 * with AF_INET6 sockets that may switch from IPv4 3755 * to IPv6. 3756 */ 3757 if (udp->udp_family != AF_INET) { 3758 *outlenp = 0; 3759 return (EAFNOSUPPORT); 3760 } 3761 3762 if (!checkonly) { 3763 udp->udp_nat_t_endpoint = onoff; 3764 3765 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3766 UDPH_SIZE + udp->udp_ip_snd_options_len; 3767 3768 /* Also, adjust wroff */ 3769 if (onoff) { 3770 udp->udp_max_hdr_len += 3771 sizeof (uint32_t); 3772 } 3773 (void) mi_set_sth_wroff(RD(q), 3774 udp->udp_max_hdr_len + us->us_wroff_extra); 3775 } 3776 break; 3777 default: 3778 *outlenp = 0; 3779 return (EINVAL); 3780 } 3781 break; 3782 default: 3783 *outlenp = 0; 3784 return (EINVAL); 3785 } 3786 /* 3787 * Common case of OK return with outval same as inval. 3788 */ 3789 if (invalp != outvalp) { 3790 /* don't trust bcopy for identical src/dst */ 3791 (void) bcopy(invalp, outvalp, inlen); 3792 } 3793 *outlenp = inlen; 3794 return (0); 3795 } 3796 3797 int 3798 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3799 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3800 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3801 { 3802 udp_t *udp; 3803 int err; 3804 3805 udp = Q_TO_UDP(q); 3806 3807 rw_enter(&udp->udp_rwlock, RW_WRITER); 3808 err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 3809 outlenp, outvalp, thisdg_attrs, cr, mblk); 3810 rw_exit(&udp->udp_rwlock); 3811 return (err); 3812 } 3813 3814 /* 3815 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3816 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3817 * headers, and the udp header. 3818 * Returns failure if can't allocate memory. 3819 */ 3820 static int 3821 udp_build_hdrs(udp_t *udp) 3822 { 3823 udp_stack_t *us = udp->udp_us; 3824 uchar_t *hdrs; 3825 uint_t hdrs_len; 3826 ip6_t *ip6h; 3827 ip6i_t *ip6i; 3828 udpha_t *udpha; 3829 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3830 size_t sth_wroff; 3831 3832 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3833 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3834 ASSERT(hdrs_len != 0); 3835 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3836 /* Need to reallocate */ 3837 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3838 if (hdrs == NULL) 3839 return (ENOMEM); 3840 3841 if (udp->udp_sticky_hdrs_len != 0) { 3842 kmem_free(udp->udp_sticky_hdrs, 3843 udp->udp_sticky_hdrs_len); 3844 } 3845 udp->udp_sticky_hdrs = hdrs; 3846 udp->udp_sticky_hdrs_len = hdrs_len; 3847 } 3848 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3849 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3850 3851 /* Set header fields not in ipp */ 3852 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3853 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3854 ip6h = (ip6_t *)&ip6i[1]; 3855 } else { 3856 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3857 } 3858 3859 if (!(ipp->ipp_fields & IPPF_ADDR)) 3860 ip6h->ip6_src = udp->udp_v6src; 3861 3862 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3863 udpha->uha_src_port = udp->udp_port; 3864 3865 /* Try to get everything in a single mblk */ 3866 if (hdrs_len > udp->udp_max_hdr_len) { 3867 udp->udp_max_hdr_len = hdrs_len; 3868 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3869 rw_exit(&udp->udp_rwlock); 3870 (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); 3871 rw_enter(&udp->udp_rwlock, RW_WRITER); 3872 } 3873 return (0); 3874 } 3875 3876 /* 3877 * This routine retrieves the value of an ND variable in a udpparam_t 3878 * structure. It is called through nd_getset when a user reads the 3879 * variable. 3880 */ 3881 /* ARGSUSED */ 3882 static int 3883 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3884 { 3885 udpparam_t *udppa = (udpparam_t *)cp; 3886 3887 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3888 return (0); 3889 } 3890 3891 /* 3892 * Walk through the param array specified registering each element with the 3893 * named dispatch (ND) handler. 3894 */ 3895 static boolean_t 3896 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3897 { 3898 for (; cnt-- > 0; udppa++) { 3899 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3900 if (!nd_load(ndp, udppa->udp_param_name, 3901 udp_param_get, udp_param_set, 3902 (caddr_t)udppa)) { 3903 nd_free(ndp); 3904 return (B_FALSE); 3905 } 3906 } 3907 } 3908 if (!nd_load(ndp, "udp_extra_priv_ports", 3909 udp_extra_priv_ports_get, NULL, NULL)) { 3910 nd_free(ndp); 3911 return (B_FALSE); 3912 } 3913 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3914 NULL, udp_extra_priv_ports_add, NULL)) { 3915 nd_free(ndp); 3916 return (B_FALSE); 3917 } 3918 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3919 NULL, udp_extra_priv_ports_del, NULL)) { 3920 nd_free(ndp); 3921 return (B_FALSE); 3922 } 3923 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3924 NULL)) { 3925 nd_free(ndp); 3926 return (B_FALSE); 3927 } 3928 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3929 NULL)) { 3930 nd_free(ndp); 3931 return (B_FALSE); 3932 } 3933 return (B_TRUE); 3934 } 3935 3936 /* This routine sets an ND variable in a udpparam_t structure. */ 3937 /* ARGSUSED */ 3938 static int 3939 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3940 { 3941 long new_value; 3942 udpparam_t *udppa = (udpparam_t *)cp; 3943 3944 /* 3945 * Fail the request if the new value does not lie within the 3946 * required bounds. 3947 */ 3948 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3949 new_value < udppa->udp_param_min || 3950 new_value > udppa->udp_param_max) { 3951 return (EINVAL); 3952 } 3953 3954 /* Set the new value */ 3955 udppa->udp_param_value = new_value; 3956 return (0); 3957 } 3958 3959 /* 3960 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3961 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3962 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3963 * then it's assumed to be allocated to be large enough. 3964 * 3965 * Returns zero if trimming of the security option causes all options to go 3966 * away. 3967 */ 3968 static size_t 3969 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3970 { 3971 struct T_opthdr *toh; 3972 size_t hol = ipp->ipp_hopoptslen; 3973 ip6_hbh_t *dstopt = NULL; 3974 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3975 size_t tlen, olen, plen; 3976 boolean_t deleting; 3977 const struct ip6_opt *sopt, *lastpad; 3978 struct ip6_opt *dopt; 3979 3980 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 3981 toh->level = IPPROTO_IPV6; 3982 toh->name = IPV6_HOPOPTS; 3983 toh->status = 0; 3984 dstopt = (ip6_hbh_t *)(toh + 1); 3985 } 3986 3987 /* 3988 * If labeling is enabled, then skip the label option 3989 * but get other options if there are any. 3990 */ 3991 if (is_system_labeled()) { 3992 dopt = NULL; 3993 if (dstopt != NULL) { 3994 /* will fill in ip6h_len later */ 3995 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 3996 dopt = (struct ip6_opt *)(dstopt + 1); 3997 } 3998 sopt = (const struct ip6_opt *)(srcopt + 1); 3999 hol -= sizeof (*srcopt); 4000 tlen = sizeof (*dstopt); 4001 lastpad = NULL; 4002 deleting = B_FALSE; 4003 /* 4004 * This loop finds the first (lastpad pointer) of any number of 4005 * pads that preceeds the security option, then treats the 4006 * security option as though it were a pad, and then finds the 4007 * next non-pad option (or end of list). 4008 * 4009 * It then treats the entire block as one big pad. To preserve 4010 * alignment of any options that follow, or just the end of the 4011 * list, it computes a minimal new padding size that keeps the 4012 * same alignment for the next option. 4013 * 4014 * If it encounters just a sequence of pads with no security 4015 * option, those are copied as-is rather than collapsed. 4016 * 4017 * Note that to handle the end of list case, the code makes one 4018 * loop with 'hol' set to zero. 4019 */ 4020 for (;;) { 4021 if (hol > 0) { 4022 if (sopt->ip6o_type == IP6OPT_PAD1) { 4023 if (lastpad == NULL) 4024 lastpad = sopt; 4025 sopt = (const struct ip6_opt *) 4026 &sopt->ip6o_len; 4027 hol--; 4028 continue; 4029 } 4030 olen = sopt->ip6o_len + sizeof (*sopt); 4031 if (olen > hol) 4032 olen = hol; 4033 if (sopt->ip6o_type == IP6OPT_PADN || 4034 sopt->ip6o_type == ip6opt_ls) { 4035 if (sopt->ip6o_type == ip6opt_ls) 4036 deleting = B_TRUE; 4037 if (lastpad == NULL) 4038 lastpad = sopt; 4039 sopt = (const struct ip6_opt *) 4040 ((const char *)sopt + olen); 4041 hol -= olen; 4042 continue; 4043 } 4044 } else { 4045 /* if nothing was copied at all, then delete */ 4046 if (tlen == sizeof (*dstopt)) 4047 return (0); 4048 /* last pass; pick up any trailing padding */ 4049 olen = 0; 4050 } 4051 if (deleting) { 4052 /* 4053 * compute aligning effect of deleted material 4054 * to reproduce with pad. 4055 */ 4056 plen = ((const char *)sopt - 4057 (const char *)lastpad) & 7; 4058 tlen += plen; 4059 if (dopt != NULL) { 4060 if (plen == 1) { 4061 dopt->ip6o_type = IP6OPT_PAD1; 4062 } else if (plen > 1) { 4063 plen -= sizeof (*dopt); 4064 dopt->ip6o_type = IP6OPT_PADN; 4065 dopt->ip6o_len = plen; 4066 if (plen > 0) 4067 bzero(dopt + 1, plen); 4068 } 4069 dopt = (struct ip6_opt *) 4070 ((char *)dopt + plen); 4071 } 4072 deleting = B_FALSE; 4073 lastpad = NULL; 4074 } 4075 /* if there's uncopied padding, then copy that now */ 4076 if (lastpad != NULL) { 4077 olen += (const char *)sopt - 4078 (const char *)lastpad; 4079 sopt = lastpad; 4080 lastpad = NULL; 4081 } 4082 if (dopt != NULL && olen > 0) { 4083 bcopy(sopt, dopt, olen); 4084 dopt = (struct ip6_opt *)((char *)dopt + olen); 4085 } 4086 if (hol == 0) 4087 break; 4088 tlen += olen; 4089 sopt = (const struct ip6_opt *) 4090 ((const char *)sopt + olen); 4091 hol -= olen; 4092 } 4093 /* go back and patch up the length value, rounded upward */ 4094 if (dstopt != NULL) 4095 dstopt->ip6h_len = (tlen - 1) >> 3; 4096 } else { 4097 tlen = hol; 4098 if (dstopt != NULL) 4099 bcopy(srcopt, dstopt, hol); 4100 } 4101 4102 tlen += sizeof (*toh); 4103 if (toh != NULL) 4104 toh->len = tlen; 4105 4106 return (tlen); 4107 } 4108 4109 /* 4110 * Update udp_rcv_opt_len from the packet. 4111 * Called when options received, and when no options received but 4112 * udp_ip_recv_opt_len has previously recorded options. 4113 */ 4114 static void 4115 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 4116 { 4117 /* Save the options if any */ 4118 if (opt_len > 0) { 4119 if (opt_len > udp->udp_ip_rcv_options_len) { 4120 /* Need to allocate larger buffer */ 4121 if (udp->udp_ip_rcv_options_len != 0) 4122 mi_free((char *)udp->udp_ip_rcv_options); 4123 udp->udp_ip_rcv_options_len = 0; 4124 udp->udp_ip_rcv_options = 4125 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4126 if (udp->udp_ip_rcv_options != NULL) 4127 udp->udp_ip_rcv_options_len = opt_len; 4128 } 4129 if (udp->udp_ip_rcv_options_len != 0) { 4130 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 4131 /* Adjust length if we are resusing the space */ 4132 udp->udp_ip_rcv_options_len = opt_len; 4133 } 4134 } else if (udp->udp_ip_rcv_options_len != 0) { 4135 /* Clear out previously recorded options */ 4136 mi_free((char *)udp->udp_ip_rcv_options); 4137 udp->udp_ip_rcv_options = NULL; 4138 udp->udp_ip_rcv_options_len = 0; 4139 } 4140 } 4141 4142 /* ARGSUSED2 */ 4143 static void 4144 udp_input(void *arg1, mblk_t *mp, void *arg2) 4145 { 4146 conn_t *connp = (conn_t *)arg1; 4147 struct T_unitdata_ind *tudi; 4148 uchar_t *rptr; /* Pointer to IP header */ 4149 int hdr_length; /* Length of IP+UDP headers */ 4150 int opt_len; 4151 int udi_size; /* Size of T_unitdata_ind */ 4152 int mp_len; 4153 udp_t *udp; 4154 udpha_t *udpha; 4155 int ipversion; 4156 ip6_pkt_t ipp; 4157 ip6_t *ip6h; 4158 ip6i_t *ip6i; 4159 mblk_t *mp1; 4160 mblk_t *options_mp = NULL; 4161 ip_pktinfo_t *pinfo = NULL; 4162 cred_t *cr = NULL; 4163 pid_t cpid; 4164 uint32_t udp_ip_rcv_options_len; 4165 udp_bits_t udp_bits; 4166 cred_t *rcr = connp->conn_cred; 4167 udp_stack_t *us; 4168 4169 ASSERT(connp->conn_flags & IPCL_UDPCONN); 4170 4171 udp = connp->conn_udp; 4172 us = udp->udp_us; 4173 rptr = mp->b_rptr; 4174 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4175 ASSERT(OK_32PTR(rptr)); 4176 4177 /* 4178 * IP should have prepended the options data in an M_CTL 4179 * Check M_CTL "type" to make sure are not here bcos of 4180 * a valid ICMP message 4181 */ 4182 if (DB_TYPE(mp) == M_CTL) { 4183 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4184 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4185 IN_PKTINFO) { 4186 /* 4187 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4188 * has been prepended to the packet by IP. We need to 4189 * extract the mblk and adjust the rptr 4190 */ 4191 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4192 options_mp = mp; 4193 mp = mp->b_cont; 4194 rptr = mp->b_rptr; 4195 UDP_STAT(us, udp_in_pktinfo); 4196 } else { 4197 /* 4198 * ICMP messages. 4199 */ 4200 udp_icmp_error(connp->conn_rq, mp); 4201 return; 4202 } 4203 } 4204 4205 mp_len = msgdsize(mp); 4206 /* 4207 * This is the inbound data path. 4208 * First, we check to make sure the IP version number is correct, 4209 * and then pull the IP and UDP headers into the first mblk. 4210 */ 4211 4212 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4213 ipp.ipp_fields = 0; 4214 4215 ipversion = IPH_HDR_VERSION(rptr); 4216 4217 rw_enter(&udp->udp_rwlock, RW_READER); 4218 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 4219 udp_bits = udp->udp_bits; 4220 rw_exit(&udp->udp_rwlock); 4221 4222 switch (ipversion) { 4223 case IPV4_VERSION: 4224 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4225 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4226 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4227 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4228 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 4229 udp->udp_family == AF_INET) { 4230 /* 4231 * Record/update udp_ip_rcv_options with the lock 4232 * held. Not needed for AF_INET6 sockets 4233 * since they don't support a getsockopt of IP_OPTIONS. 4234 */ 4235 rw_enter(&udp->udp_rwlock, RW_WRITER); 4236 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 4237 opt_len); 4238 rw_exit(&udp->udp_rwlock); 4239 } 4240 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 4241 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4242 udp->udp_ip_recvpktinfo) { 4243 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4244 ipp.ipp_fields |= IPPF_IFINDEX; 4245 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4246 } 4247 } 4248 break; 4249 case IPV6_VERSION: 4250 /* 4251 * IPv6 packets can only be received by applications 4252 * that are prepared to receive IPv6 addresses. 4253 * The IP fanout must ensure this. 4254 */ 4255 ASSERT(udp->udp_family == AF_INET6); 4256 4257 ip6h = (ip6_t *)rptr; 4258 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4259 4260 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4261 uint8_t nexthdrp; 4262 /* Look for ifindex information */ 4263 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4264 ip6i = (ip6i_t *)ip6h; 4265 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4266 goto tossit; 4267 4268 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4269 ASSERT(ip6i->ip6i_ifindex != 0); 4270 ipp.ipp_fields |= IPPF_IFINDEX; 4271 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4272 } 4273 rptr = (uchar_t *)&ip6i[1]; 4274 mp->b_rptr = rptr; 4275 if (rptr == mp->b_wptr) { 4276 mp1 = mp->b_cont; 4277 freeb(mp); 4278 mp = mp1; 4279 rptr = mp->b_rptr; 4280 } 4281 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4282 goto tossit; 4283 ip6h = (ip6_t *)rptr; 4284 mp_len = msgdsize(mp); 4285 } 4286 /* 4287 * Find any potentially interesting extension headers 4288 * as well as the length of the IPv6 + extension 4289 * headers. 4290 */ 4291 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4292 UDPH_SIZE; 4293 ASSERT(nexthdrp == IPPROTO_UDP); 4294 } else { 4295 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4296 ip6i = NULL; 4297 } 4298 break; 4299 default: 4300 ASSERT(0); 4301 } 4302 4303 /* 4304 * IP inspected the UDP header thus all of it must be in the mblk. 4305 * UDP length check is performed for IPv6 packets and IPv4 packets 4306 * to check if the size of the packet as specified 4307 * by the header is the same as the physical size of the packet. 4308 * FIXME? Didn't IP already check this? 4309 */ 4310 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4311 if ((MBLKL(mp) < hdr_length) || 4312 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4313 goto tossit; 4314 } 4315 4316 4317 /* Walk past the headers unless IP_RECVHDR was set. */ 4318 if (!udp_bits.udpb_rcvhdr) { 4319 mp->b_rptr = rptr + hdr_length; 4320 mp_len -= hdr_length; 4321 } 4322 4323 /* 4324 * This is the inbound data path. Packets are passed upstream as 4325 * T_UNITDATA_IND messages with full IP headers still attached. 4326 */ 4327 if (udp->udp_family == AF_INET) { 4328 sin_t *sin; 4329 4330 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4331 4332 /* 4333 * Normally only send up the source address. 4334 * If IP_RECVDSTADDR is set we include the destination IP 4335 * address as an option. With IP_RECVOPTS we include all 4336 * the IP options. 4337 */ 4338 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4339 if (udp_bits.udpb_recvdstaddr) { 4340 udi_size += sizeof (struct T_opthdr) + 4341 sizeof (struct in_addr); 4342 UDP_STAT(us, udp_in_recvdstaddr); 4343 } 4344 4345 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 4346 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4347 udi_size += sizeof (struct T_opthdr) + 4348 sizeof (struct in_pktinfo); 4349 UDP_STAT(us, udp_ip_rcvpktinfo); 4350 } 4351 4352 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 4353 udi_size += sizeof (struct T_opthdr) + opt_len; 4354 UDP_STAT(us, udp_in_recvopts); 4355 } 4356 4357 /* 4358 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4359 * space accordingly 4360 */ 4361 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4362 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4363 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4364 UDP_STAT(us, udp_in_recvif); 4365 } 4366 4367 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4368 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4369 udi_size += sizeof (struct T_opthdr) + 4370 sizeof (struct sockaddr_dl); 4371 UDP_STAT(us, udp_in_recvslla); 4372 } 4373 4374 if ((udp_bits.udpb_recvucred) && 4375 (cr = DB_CRED(mp)) != NULL) { 4376 udi_size += sizeof (struct T_opthdr) + ucredsize; 4377 cpid = DB_CPID(mp); 4378 UDP_STAT(us, udp_in_recvucred); 4379 } 4380 4381 /* XXX FIXME: apply to AF_INET6 as well */ 4382 /* 4383 * If SO_TIMESTAMP is set allocate the appropriate sized 4384 * buffer. Since gethrestime() expects a pointer aligned 4385 * argument, we allocate space necessary for extra 4386 * alignment (even though it might not be used). 4387 */ 4388 if (udp_bits.udpb_timestamp) { 4389 udi_size += sizeof (struct T_opthdr) + 4390 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4391 UDP_STAT(us, udp_in_timestamp); 4392 } 4393 4394 /* 4395 * If IP_RECVTTL is set allocate the appropriate sized buffer 4396 */ 4397 if (udp_bits.udpb_recvttl) { 4398 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4399 UDP_STAT(us, udp_in_recvttl); 4400 } 4401 4402 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4403 mp1 = allocb(udi_size, BPRI_MED); 4404 if (mp1 == NULL) { 4405 freemsg(mp); 4406 if (options_mp != NULL) 4407 freeb(options_mp); 4408 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4409 return; 4410 } 4411 mp1->b_cont = mp; 4412 mp = mp1; 4413 mp->b_datap->db_type = M_PROTO; 4414 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4415 mp->b_wptr = (uchar_t *)tudi + udi_size; 4416 tudi->PRIM_type = T_UNITDATA_IND; 4417 tudi->SRC_length = sizeof (sin_t); 4418 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4419 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4420 sizeof (sin_t); 4421 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4422 tudi->OPT_length = udi_size; 4423 sin = (sin_t *)&tudi[1]; 4424 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4425 sin->sin_port = udpha->uha_src_port; 4426 sin->sin_family = udp->udp_family; 4427 *(uint32_t *)&sin->sin_zero[0] = 0; 4428 *(uint32_t *)&sin->sin_zero[4] = 0; 4429 4430 /* 4431 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4432 * IP_RECVTTL has been set. 4433 */ 4434 if (udi_size != 0) { 4435 /* 4436 * Copy in destination address before options to avoid 4437 * any padding issues. 4438 */ 4439 char *dstopt; 4440 4441 dstopt = (char *)&sin[1]; 4442 if (udp_bits.udpb_recvdstaddr) { 4443 struct T_opthdr *toh; 4444 ipaddr_t *dstptr; 4445 4446 toh = (struct T_opthdr *)dstopt; 4447 toh->level = IPPROTO_IP; 4448 toh->name = IP_RECVDSTADDR; 4449 toh->len = sizeof (struct T_opthdr) + 4450 sizeof (ipaddr_t); 4451 toh->status = 0; 4452 dstopt += sizeof (struct T_opthdr); 4453 dstptr = (ipaddr_t *)dstopt; 4454 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4455 dstopt += sizeof (ipaddr_t); 4456 udi_size -= toh->len; 4457 } 4458 4459 if (udp_bits.udpb_recvopts && opt_len > 0) { 4460 struct T_opthdr *toh; 4461 4462 toh = (struct T_opthdr *)dstopt; 4463 toh->level = IPPROTO_IP; 4464 toh->name = IP_RECVOPTS; 4465 toh->len = sizeof (struct T_opthdr) + opt_len; 4466 toh->status = 0; 4467 dstopt += sizeof (struct T_opthdr); 4468 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4469 opt_len); 4470 dstopt += opt_len; 4471 udi_size -= toh->len; 4472 } 4473 4474 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4475 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4476 struct T_opthdr *toh; 4477 struct in_pktinfo *pktinfop; 4478 4479 toh = (struct T_opthdr *)dstopt; 4480 toh->level = IPPROTO_IP; 4481 toh->name = IP_PKTINFO; 4482 toh->len = sizeof (struct T_opthdr) + 4483 sizeof (*pktinfop); 4484 toh->status = 0; 4485 dstopt += sizeof (struct T_opthdr); 4486 pktinfop = (struct in_pktinfo *)dstopt; 4487 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4488 pktinfop->ipi_spec_dst = 4489 pinfo->ip_pkt_match_addr; 4490 pktinfop->ipi_addr.s_addr = 4491 ((ipha_t *)rptr)->ipha_dst; 4492 4493 dstopt += sizeof (struct in_pktinfo); 4494 udi_size -= toh->len; 4495 } 4496 4497 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4498 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4499 4500 struct T_opthdr *toh; 4501 struct sockaddr_dl *dstptr; 4502 4503 toh = (struct T_opthdr *)dstopt; 4504 toh->level = IPPROTO_IP; 4505 toh->name = IP_RECVSLLA; 4506 toh->len = sizeof (struct T_opthdr) + 4507 sizeof (struct sockaddr_dl); 4508 toh->status = 0; 4509 dstopt += sizeof (struct T_opthdr); 4510 dstptr = (struct sockaddr_dl *)dstopt; 4511 bcopy(&pinfo->ip_pkt_slla, dstptr, 4512 sizeof (struct sockaddr_dl)); 4513 dstopt += sizeof (struct sockaddr_dl); 4514 udi_size -= toh->len; 4515 } 4516 4517 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4518 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4519 4520 struct T_opthdr *toh; 4521 uint_t *dstptr; 4522 4523 toh = (struct T_opthdr *)dstopt; 4524 toh->level = IPPROTO_IP; 4525 toh->name = IP_RECVIF; 4526 toh->len = sizeof (struct T_opthdr) + 4527 sizeof (uint_t); 4528 toh->status = 0; 4529 dstopt += sizeof (struct T_opthdr); 4530 dstptr = (uint_t *)dstopt; 4531 *dstptr = pinfo->ip_pkt_ifindex; 4532 dstopt += sizeof (uint_t); 4533 udi_size -= toh->len; 4534 } 4535 4536 if (cr != NULL) { 4537 struct T_opthdr *toh; 4538 4539 toh = (struct T_opthdr *)dstopt; 4540 toh->level = SOL_SOCKET; 4541 toh->name = SCM_UCRED; 4542 toh->len = sizeof (struct T_opthdr) + ucredsize; 4543 toh->status = 0; 4544 dstopt += sizeof (struct T_opthdr); 4545 (void) cred2ucred(cr, cpid, dstopt, rcr); 4546 dstopt += ucredsize; 4547 udi_size -= toh->len; 4548 } 4549 4550 if (udp_bits.udpb_timestamp) { 4551 struct T_opthdr *toh; 4552 4553 toh = (struct T_opthdr *)dstopt; 4554 toh->level = SOL_SOCKET; 4555 toh->name = SCM_TIMESTAMP; 4556 toh->len = sizeof (struct T_opthdr) + 4557 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4558 toh->status = 0; 4559 dstopt += sizeof (struct T_opthdr); 4560 /* Align for gethrestime() */ 4561 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4562 sizeof (intptr_t)); 4563 gethrestime((timestruc_t *)dstopt); 4564 dstopt = (char *)toh + toh->len; 4565 udi_size -= toh->len; 4566 } 4567 4568 /* 4569 * CAUTION: 4570 * Due to aligment issues 4571 * Processing of IP_RECVTTL option 4572 * should always be the last. Adding 4573 * any option processing after this will 4574 * cause alignment panic. 4575 */ 4576 if (udp_bits.udpb_recvttl) { 4577 struct T_opthdr *toh; 4578 uint8_t *dstptr; 4579 4580 toh = (struct T_opthdr *)dstopt; 4581 toh->level = IPPROTO_IP; 4582 toh->name = IP_RECVTTL; 4583 toh->len = sizeof (struct T_opthdr) + 4584 sizeof (uint8_t); 4585 toh->status = 0; 4586 dstopt += sizeof (struct T_opthdr); 4587 dstptr = (uint8_t *)dstopt; 4588 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4589 dstopt += sizeof (uint8_t); 4590 udi_size -= toh->len; 4591 } 4592 4593 /* Consumed all of allocated space */ 4594 ASSERT(udi_size == 0); 4595 } 4596 } else { 4597 sin6_t *sin6; 4598 4599 /* 4600 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4601 * 4602 * Normally we only send up the address. If receiving of any 4603 * optional receive side information is enabled, we also send 4604 * that up as options. 4605 */ 4606 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4607 4608 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4609 IPPF_RTHDR|IPPF_IFINDEX)) { 4610 if ((udp_bits.udpb_ipv6_recvhopopts) && 4611 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4612 size_t hlen; 4613 4614 UDP_STAT(us, udp_in_recvhopopts); 4615 hlen = copy_hop_opts(&ipp, NULL); 4616 if (hlen == 0) 4617 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4618 udi_size += hlen; 4619 } 4620 if (((udp_bits.udpb_ipv6_recvdstopts) || 4621 udp_bits.udpb_old_ipv6_recvdstopts) && 4622 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4623 udi_size += sizeof (struct T_opthdr) + 4624 ipp.ipp_dstoptslen; 4625 UDP_STAT(us, udp_in_recvdstopts); 4626 } 4627 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4628 udp_bits.udpb_ipv6_recvrthdr && 4629 (ipp.ipp_fields & IPPF_RTHDR)) || 4630 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4631 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4632 udi_size += sizeof (struct T_opthdr) + 4633 ipp.ipp_rtdstoptslen; 4634 UDP_STAT(us, udp_in_recvrtdstopts); 4635 } 4636 if ((udp_bits.udpb_ipv6_recvrthdr) && 4637 (ipp.ipp_fields & IPPF_RTHDR)) { 4638 udi_size += sizeof (struct T_opthdr) + 4639 ipp.ipp_rthdrlen; 4640 UDP_STAT(us, udp_in_recvrthdr); 4641 } 4642 if ((udp_bits.udpb_ip_recvpktinfo) && 4643 (ipp.ipp_fields & IPPF_IFINDEX)) { 4644 udi_size += sizeof (struct T_opthdr) + 4645 sizeof (struct in6_pktinfo); 4646 UDP_STAT(us, udp_in_recvpktinfo); 4647 } 4648 4649 } 4650 if ((udp_bits.udpb_recvucred) && 4651 (cr = DB_CRED(mp)) != NULL) { 4652 udi_size += sizeof (struct T_opthdr) + ucredsize; 4653 cpid = DB_CPID(mp); 4654 UDP_STAT(us, udp_in_recvucred); 4655 } 4656 4657 if (udp_bits.udpb_ipv6_recvhoplimit) { 4658 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4659 UDP_STAT(us, udp_in_recvhoplimit); 4660 } 4661 4662 if (udp_bits.udpb_ipv6_recvtclass) { 4663 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4664 UDP_STAT(us, udp_in_recvtclass); 4665 } 4666 4667 mp1 = allocb(udi_size, BPRI_MED); 4668 if (mp1 == NULL) { 4669 freemsg(mp); 4670 if (options_mp != NULL) 4671 freeb(options_mp); 4672 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4673 return; 4674 } 4675 mp1->b_cont = mp; 4676 mp = mp1; 4677 mp->b_datap->db_type = M_PROTO; 4678 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4679 mp->b_wptr = (uchar_t *)tudi + udi_size; 4680 tudi->PRIM_type = T_UNITDATA_IND; 4681 tudi->SRC_length = sizeof (sin6_t); 4682 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4683 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4684 sizeof (sin6_t); 4685 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4686 tudi->OPT_length = udi_size; 4687 sin6 = (sin6_t *)&tudi[1]; 4688 if (ipversion == IPV4_VERSION) { 4689 in6_addr_t v6dst; 4690 4691 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4692 &sin6->sin6_addr); 4693 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4694 &v6dst); 4695 sin6->sin6_flowinfo = 0; 4696 sin6->sin6_scope_id = 0; 4697 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4698 connp->conn_zoneid, us->us_netstack); 4699 } else { 4700 sin6->sin6_addr = ip6h->ip6_src; 4701 /* No sin6_flowinfo per API */ 4702 sin6->sin6_flowinfo = 0; 4703 /* For link-scope source pass up scope id */ 4704 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4705 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4706 sin6->sin6_scope_id = ipp.ipp_ifindex; 4707 else 4708 sin6->sin6_scope_id = 0; 4709 sin6->__sin6_src_id = ip_srcid_find_addr( 4710 &ip6h->ip6_dst, connp->conn_zoneid, 4711 us->us_netstack); 4712 } 4713 sin6->sin6_port = udpha->uha_src_port; 4714 sin6->sin6_family = udp->udp_family; 4715 4716 if (udi_size != 0) { 4717 uchar_t *dstopt; 4718 4719 dstopt = (uchar_t *)&sin6[1]; 4720 if ((udp_bits.udpb_ip_recvpktinfo) && 4721 (ipp.ipp_fields & IPPF_IFINDEX)) { 4722 struct T_opthdr *toh; 4723 struct in6_pktinfo *pkti; 4724 4725 toh = (struct T_opthdr *)dstopt; 4726 toh->level = IPPROTO_IPV6; 4727 toh->name = IPV6_PKTINFO; 4728 toh->len = sizeof (struct T_opthdr) + 4729 sizeof (*pkti); 4730 toh->status = 0; 4731 dstopt += sizeof (struct T_opthdr); 4732 pkti = (struct in6_pktinfo *)dstopt; 4733 if (ipversion == IPV6_VERSION) 4734 pkti->ipi6_addr = ip6h->ip6_dst; 4735 else 4736 IN6_IPADDR_TO_V4MAPPED( 4737 ((ipha_t *)rptr)->ipha_dst, 4738 &pkti->ipi6_addr); 4739 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4740 dstopt += sizeof (*pkti); 4741 udi_size -= toh->len; 4742 } 4743 if (udp_bits.udpb_ipv6_recvhoplimit) { 4744 struct T_opthdr *toh; 4745 4746 toh = (struct T_opthdr *)dstopt; 4747 toh->level = IPPROTO_IPV6; 4748 toh->name = IPV6_HOPLIMIT; 4749 toh->len = sizeof (struct T_opthdr) + 4750 sizeof (uint_t); 4751 toh->status = 0; 4752 dstopt += sizeof (struct T_opthdr); 4753 if (ipversion == IPV6_VERSION) 4754 *(uint_t *)dstopt = ip6h->ip6_hops; 4755 else 4756 *(uint_t *)dstopt = 4757 ((ipha_t *)rptr)->ipha_ttl; 4758 dstopt += sizeof (uint_t); 4759 udi_size -= toh->len; 4760 } 4761 if (udp_bits.udpb_ipv6_recvtclass) { 4762 struct T_opthdr *toh; 4763 4764 toh = (struct T_opthdr *)dstopt; 4765 toh->level = IPPROTO_IPV6; 4766 toh->name = IPV6_TCLASS; 4767 toh->len = sizeof (struct T_opthdr) + 4768 sizeof (uint_t); 4769 toh->status = 0; 4770 dstopt += sizeof (struct T_opthdr); 4771 if (ipversion == IPV6_VERSION) { 4772 *(uint_t *)dstopt = 4773 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4774 } else { 4775 ipha_t *ipha = (ipha_t *)rptr; 4776 *(uint_t *)dstopt = 4777 ipha->ipha_type_of_service; 4778 } 4779 dstopt += sizeof (uint_t); 4780 udi_size -= toh->len; 4781 } 4782 if ((udp_bits.udpb_ipv6_recvhopopts) && 4783 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4784 size_t hlen; 4785 4786 hlen = copy_hop_opts(&ipp, dstopt); 4787 dstopt += hlen; 4788 udi_size -= hlen; 4789 } 4790 if ((udp_bits.udpb_ipv6_recvdstopts) && 4791 (udp_bits.udpb_ipv6_recvrthdr) && 4792 (ipp.ipp_fields & IPPF_RTHDR) && 4793 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4794 struct T_opthdr *toh; 4795 4796 toh = (struct T_opthdr *)dstopt; 4797 toh->level = IPPROTO_IPV6; 4798 toh->name = IPV6_DSTOPTS; 4799 toh->len = sizeof (struct T_opthdr) + 4800 ipp.ipp_rtdstoptslen; 4801 toh->status = 0; 4802 dstopt += sizeof (struct T_opthdr); 4803 bcopy(ipp.ipp_rtdstopts, dstopt, 4804 ipp.ipp_rtdstoptslen); 4805 dstopt += ipp.ipp_rtdstoptslen; 4806 udi_size -= toh->len; 4807 } 4808 if ((udp_bits.udpb_ipv6_recvrthdr) && 4809 (ipp.ipp_fields & IPPF_RTHDR)) { 4810 struct T_opthdr *toh; 4811 4812 toh = (struct T_opthdr *)dstopt; 4813 toh->level = IPPROTO_IPV6; 4814 toh->name = IPV6_RTHDR; 4815 toh->len = sizeof (struct T_opthdr) + 4816 ipp.ipp_rthdrlen; 4817 toh->status = 0; 4818 dstopt += sizeof (struct T_opthdr); 4819 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4820 dstopt += ipp.ipp_rthdrlen; 4821 udi_size -= toh->len; 4822 } 4823 if ((udp_bits.udpb_ipv6_recvdstopts) && 4824 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4825 struct T_opthdr *toh; 4826 4827 toh = (struct T_opthdr *)dstopt; 4828 toh->level = IPPROTO_IPV6; 4829 toh->name = IPV6_DSTOPTS; 4830 toh->len = sizeof (struct T_opthdr) + 4831 ipp.ipp_dstoptslen; 4832 toh->status = 0; 4833 dstopt += sizeof (struct T_opthdr); 4834 bcopy(ipp.ipp_dstopts, dstopt, 4835 ipp.ipp_dstoptslen); 4836 dstopt += ipp.ipp_dstoptslen; 4837 udi_size -= toh->len; 4838 } 4839 4840 if (cr != NULL) { 4841 struct T_opthdr *toh; 4842 4843 toh = (struct T_opthdr *)dstopt; 4844 toh->level = SOL_SOCKET; 4845 toh->name = SCM_UCRED; 4846 toh->len = sizeof (struct T_opthdr) + ucredsize; 4847 toh->status = 0; 4848 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4849 dstopt += toh->len; 4850 udi_size -= toh->len; 4851 } 4852 /* Consumed all of allocated space */ 4853 ASSERT(udi_size == 0); 4854 } 4855 #undef sin6 4856 /* No IP_RECVDSTADDR for IPv6. */ 4857 } 4858 4859 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4860 if (options_mp != NULL) 4861 freeb(options_mp); 4862 4863 if (udp_bits.udpb_direct_sockfs) { 4864 /* 4865 * There is nothing above us except for the stream head; 4866 * use the read-side synchronous stream interface in 4867 * order to reduce the time spent in interrupt thread. 4868 */ 4869 ASSERT(udp->udp_issocket); 4870 udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); 4871 } else { 4872 /* 4873 * Use regular STREAMS interface to pass data upstream 4874 * if this is not a socket endpoint, or if we have 4875 * switched over to the slow mode due to sockmod being 4876 * popped or a module being pushed on top of us. 4877 */ 4878 putnext(connp->conn_rq, mp); 4879 } 4880 return; 4881 4882 tossit: 4883 freemsg(mp); 4884 if (options_mp != NULL) 4885 freeb(options_mp); 4886 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4887 } 4888 4889 /* 4890 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 4891 * immediately. 4892 */ 4893 static void 4894 udp_bind_result(conn_t *connp, mblk_t *mp) 4895 { 4896 struct T_error_ack *tea; 4897 4898 switch (mp->b_datap->db_type) { 4899 case M_PROTO: 4900 case M_PCPROTO: 4901 /* M_PROTO messages contain some type of TPI message. */ 4902 ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= 4903 (uintptr_t)INT_MAX); 4904 if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { 4905 freemsg(mp); 4906 return; 4907 } 4908 tea = (struct T_error_ack *)mp->b_rptr; 4909 4910 switch (tea->PRIM_type) { 4911 case T_ERROR_ACK: 4912 switch (tea->ERROR_prim) { 4913 case O_T_BIND_REQ: 4914 case T_BIND_REQ: 4915 udp_bind_error(connp, mp); 4916 return; 4917 default: 4918 break; 4919 } 4920 ASSERT(0); 4921 freemsg(mp); 4922 return; 4923 4924 case T_BIND_ACK: 4925 udp_bind_ack(connp, mp); 4926 return; 4927 4928 default: 4929 break; 4930 } 4931 freemsg(mp); 4932 return; 4933 default: 4934 /* FIXME: other cases? */ 4935 ASSERT(0); 4936 freemsg(mp); 4937 return; 4938 } 4939 } 4940 4941 /* 4942 * Process a T_BIND_ACK 4943 */ 4944 static void 4945 udp_bind_ack(conn_t *connp, mblk_t *mp) 4946 { 4947 udp_t *udp = connp->conn_udp; 4948 mblk_t *mp1; 4949 ire_t *ire; 4950 struct T_bind_ack *tba; 4951 uchar_t *addrp; 4952 ipa_conn_t *ac; 4953 ipa6_conn_t *ac6; 4954 udp_fanout_t *udpf; 4955 udp_stack_t *us = udp->udp_us; 4956 4957 ASSERT(udp->udp_pending_op != -1); 4958 rw_enter(&udp->udp_rwlock, RW_WRITER); 4959 /* 4960 * If a broadcast/multicast address was bound set 4961 * the source address to 0. 4962 * This ensures no datagrams with broadcast address 4963 * as source address are emitted (which would violate 4964 * RFC1122 - Hosts requirements) 4965 * 4966 * Note that when connecting the returned IRE is 4967 * for the destination address and we only perform 4968 * the broadcast check for the source address (it 4969 * is OK to connect to a broadcast/multicast address.) 4970 */ 4971 mp1 = mp->b_cont; 4972 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 4973 ire = (ire_t *)mp1->b_rptr; 4974 4975 /* 4976 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 4977 * local address. 4978 */ 4979 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 4980 us->us_bind_fanout_size)]; 4981 if (ire->ire_type == IRE_BROADCAST && 4982 udp->udp_state != TS_DATA_XFER) { 4983 ASSERT(udp->udp_pending_op == T_BIND_REQ || 4984 udp->udp_pending_op == O_T_BIND_REQ); 4985 /* This was just a local bind to a broadcast addr */ 4986 mutex_enter(&udpf->uf_lock); 4987 V6_SET_ZERO(udp->udp_v6src); 4988 mutex_exit(&udpf->uf_lock); 4989 if (udp->udp_family == AF_INET6) 4990 (void) udp_build_hdrs(udp); 4991 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 4992 /* 4993 * Local address not yet set - pick it from the 4994 * T_bind_ack 4995 */ 4996 tba = (struct T_bind_ack *)mp->b_rptr; 4997 addrp = &mp->b_rptr[tba->ADDR_offset]; 4998 switch (udp->udp_family) { 4999 case AF_INET: 5000 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5001 ac = (ipa_conn_t *)addrp; 5002 } else { 5003 ASSERT(tba->ADDR_length == 5004 sizeof (ipa_conn_x_t)); 5005 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5006 } 5007 mutex_enter(&udpf->uf_lock); 5008 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5009 &udp->udp_v6src); 5010 mutex_exit(&udpf->uf_lock); 5011 break; 5012 case AF_INET6: 5013 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5014 ac6 = (ipa6_conn_t *)addrp; 5015 } else { 5016 ASSERT(tba->ADDR_length == 5017 sizeof (ipa6_conn_x_t)); 5018 ac6 = &((ipa6_conn_x_t *) 5019 addrp)->ac6x_conn; 5020 } 5021 mutex_enter(&udpf->uf_lock); 5022 udp->udp_v6src = ac6->ac6_laddr; 5023 mutex_exit(&udpf->uf_lock); 5024 (void) udp_build_hdrs(udp); 5025 break; 5026 } 5027 } 5028 mp1 = mp1->b_cont; 5029 } 5030 udp->udp_pending_op = -1; 5031 rw_exit(&udp->udp_rwlock); 5032 /* 5033 * Look for one or more appended ACK message added by 5034 * udp_connect or udp_disconnect. 5035 * If none found just send up the T_BIND_ACK. 5036 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5037 * udp_disconnect has appended a T_OK_ACK. 5038 */ 5039 if (mp1 != NULL) { 5040 if (mp->b_cont == mp1) 5041 mp->b_cont = NULL; 5042 else { 5043 ASSERT(mp->b_cont->b_cont == mp1); 5044 mp->b_cont->b_cont = NULL; 5045 } 5046 freemsg(mp); 5047 mp = mp1; 5048 while (mp != NULL) { 5049 mp1 = mp->b_cont; 5050 mp->b_cont = NULL; 5051 putnext(connp->conn_rq, mp); 5052 mp = mp1; 5053 } 5054 return; 5055 } 5056 freemsg(mp->b_cont); 5057 mp->b_cont = NULL; 5058 putnext(connp->conn_rq, mp); 5059 } 5060 5061 static void 5062 udp_bind_error(conn_t *connp, mblk_t *mp) 5063 { 5064 udp_t *udp = connp->conn_udp; 5065 struct T_error_ack *tea; 5066 udp_fanout_t *udpf; 5067 udp_stack_t *us = udp->udp_us; 5068 5069 tea = (struct T_error_ack *)mp->b_rptr; 5070 5071 /* 5072 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5073 * clear out the associated port and source 5074 * address before passing the message 5075 * upstream. If this was caused by a T_CONN_REQ 5076 * revert back to bound state. 5077 */ 5078 5079 rw_enter(&udp->udp_rwlock, RW_WRITER); 5080 ASSERT(udp->udp_pending_op != -1); 5081 tea->ERROR_prim = udp->udp_pending_op; 5082 udp->udp_pending_op = -1; 5083 udpf = &us->us_bind_fanout[ 5084 UDP_BIND_HASH(udp->udp_port, 5085 us->us_bind_fanout_size)]; 5086 mutex_enter(&udpf->uf_lock); 5087 5088 switch (tea->ERROR_prim) { 5089 case T_CONN_REQ: 5090 ASSERT(udp->udp_state == TS_DATA_XFER); 5091 /* Connect failed */ 5092 /* Revert back to the bound source */ 5093 udp->udp_v6src = udp->udp_bound_v6src; 5094 udp->udp_state = TS_IDLE; 5095 mutex_exit(&udpf->uf_lock); 5096 if (udp->udp_family == AF_INET6) 5097 (void) udp_build_hdrs(udp); 5098 rw_exit(&udp->udp_rwlock); 5099 break; 5100 5101 case T_DISCON_REQ: 5102 case T_BIND_REQ: 5103 case O_T_BIND_REQ: 5104 V6_SET_ZERO(udp->udp_v6src); 5105 V6_SET_ZERO(udp->udp_bound_v6src); 5106 udp->udp_state = TS_UNBND; 5107 udp_bind_hash_remove(udp, B_TRUE); 5108 udp->udp_port = 0; 5109 mutex_exit(&udpf->uf_lock); 5110 if (udp->udp_family == AF_INET6) 5111 (void) udp_build_hdrs(udp); 5112 rw_exit(&udp->udp_rwlock); 5113 break; 5114 5115 default: 5116 mutex_exit(&udpf->uf_lock); 5117 rw_exit(&udp->udp_rwlock); 5118 (void) mi_strlog(connp->conn_rq, 1, 5119 SL_ERROR|SL_TRACE, 5120 "udp_input_other: bad ERROR_prim, " 5121 "len %d", tea->ERROR_prim); 5122 } 5123 putnext(connp->conn_rq, mp); 5124 } 5125 5126 /* 5127 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 5128 * information that can be changing beneath us. 5129 */ 5130 mblk_t * 5131 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5132 { 5133 mblk_t *mpdata; 5134 mblk_t *mp_conn_ctl; 5135 mblk_t *mp_attr_ctl; 5136 mblk_t *mp6_conn_ctl; 5137 mblk_t *mp6_attr_ctl; 5138 mblk_t *mp_conn_tail; 5139 mblk_t *mp_attr_tail; 5140 mblk_t *mp6_conn_tail; 5141 mblk_t *mp6_attr_tail; 5142 struct opthdr *optp; 5143 mib2_udpEntry_t ude; 5144 mib2_udp6Entry_t ude6; 5145 mib2_transportMLPEntry_t mlp; 5146 int state; 5147 zoneid_t zoneid; 5148 int i; 5149 connf_t *connfp; 5150 conn_t *connp = Q_TO_CONN(q); 5151 int v4_conn_idx; 5152 int v6_conn_idx; 5153 boolean_t needattr; 5154 udp_t *udp; 5155 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5156 udp_stack_t *us = connp->conn_netstack->netstack_udp; 5157 mblk_t *mp2ctl; 5158 5159 /* 5160 * make a copy of the original message 5161 */ 5162 mp2ctl = copymsg(mpctl); 5163 5164 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5165 if (mpctl == NULL || 5166 (mpdata = mpctl->b_cont) == NULL || 5167 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5168 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5169 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5170 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5171 freemsg(mp_conn_ctl); 5172 freemsg(mp_attr_ctl); 5173 freemsg(mp6_conn_ctl); 5174 freemsg(mpctl); 5175 freemsg(mp2ctl); 5176 return (0); 5177 } 5178 5179 zoneid = connp->conn_zoneid; 5180 5181 /* fixed length structure for IPv4 and IPv6 counters */ 5182 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5183 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5184 /* synchronize 64- and 32-bit counters */ 5185 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 5186 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5187 5188 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5189 optp->level = MIB2_UDP; 5190 optp->name = 0; 5191 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 5192 sizeof (us->us_udp_mib)); 5193 optp->len = msgdsize(mpdata); 5194 qreply(q, mpctl); 5195 5196 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5197 v4_conn_idx = v6_conn_idx = 0; 5198 5199 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5200 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5201 connp = NULL; 5202 5203 while ((connp = ipcl_get_next_conn(connfp, connp, 5204 IPCL_UDPCONN))) { 5205 udp = connp->conn_udp; 5206 if (zoneid != connp->conn_zoneid) 5207 continue; 5208 5209 /* 5210 * Note that the port numbers are sent in 5211 * host byte order 5212 */ 5213 5214 if (udp->udp_state == TS_UNBND) 5215 state = MIB2_UDP_unbound; 5216 else if (udp->udp_state == TS_IDLE) 5217 state = MIB2_UDP_idle; 5218 else if (udp->udp_state == TS_DATA_XFER) 5219 state = MIB2_UDP_connected; 5220 else 5221 state = MIB2_UDP_unknown; 5222 5223 needattr = B_FALSE; 5224 bzero(&mlp, sizeof (mlp)); 5225 if (connp->conn_mlp_type != mlptSingle) { 5226 if (connp->conn_mlp_type == mlptShared || 5227 connp->conn_mlp_type == mlptBoth) 5228 mlp.tme_flags |= MIB2_TMEF_SHARED; 5229 if (connp->conn_mlp_type == mlptPrivate || 5230 connp->conn_mlp_type == mlptBoth) 5231 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5232 needattr = B_TRUE; 5233 } 5234 5235 /* 5236 * Create an IPv4 table entry for IPv4 entries and also 5237 * any IPv6 entries which are bound to in6addr_any 5238 * (i.e. anything a IPv4 peer could connect/send to). 5239 */ 5240 if (udp->udp_ipversion == IPV4_VERSION || 5241 (udp->udp_state <= TS_IDLE && 5242 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5243 ude.udpEntryInfo.ue_state = state; 5244 /* 5245 * If in6addr_any this will set it to 5246 * INADDR_ANY 5247 */ 5248 ude.udpLocalAddress = 5249 V4_PART_OF_V6(udp->udp_v6src); 5250 ude.udpLocalPort = ntohs(udp->udp_port); 5251 if (udp->udp_state == TS_DATA_XFER) { 5252 /* 5253 * Can potentially get here for 5254 * v6 socket if another process 5255 * (say, ping) has just done a 5256 * sendto(), changing the state 5257 * from the TS_IDLE above to 5258 * TS_DATA_XFER by the time we hit 5259 * this part of the code. 5260 */ 5261 ude.udpEntryInfo.ue_RemoteAddress = 5262 V4_PART_OF_V6(udp->udp_v6dst); 5263 ude.udpEntryInfo.ue_RemotePort = 5264 ntohs(udp->udp_dstport); 5265 } else { 5266 ude.udpEntryInfo.ue_RemoteAddress = 0; 5267 ude.udpEntryInfo.ue_RemotePort = 0; 5268 } 5269 5270 /* 5271 * We make the assumption that all udp_t 5272 * structs will be created within an address 5273 * region no larger than 32-bits. 5274 */ 5275 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5276 ude.udpCreationProcess = 5277 (udp->udp_open_pid < 0) ? 5278 MIB2_UNKNOWN_PROCESS : 5279 udp->udp_open_pid; 5280 ude.udpCreationTime = udp->udp_open_time; 5281 5282 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5283 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5284 mlp.tme_connidx = v4_conn_idx++; 5285 if (needattr) 5286 (void) snmp_append_data2( 5287 mp_attr_ctl->b_cont, &mp_attr_tail, 5288 (char *)&mlp, sizeof (mlp)); 5289 } 5290 if (udp->udp_ipversion == IPV6_VERSION) { 5291 ude6.udp6EntryInfo.ue_state = state; 5292 ude6.udp6LocalAddress = udp->udp_v6src; 5293 ude6.udp6LocalPort = ntohs(udp->udp_port); 5294 ude6.udp6IfIndex = udp->udp_bound_if; 5295 if (udp->udp_state == TS_DATA_XFER) { 5296 ude6.udp6EntryInfo.ue_RemoteAddress = 5297 udp->udp_v6dst; 5298 ude6.udp6EntryInfo.ue_RemotePort = 5299 ntohs(udp->udp_dstport); 5300 } else { 5301 ude6.udp6EntryInfo.ue_RemoteAddress = 5302 sin6_null.sin6_addr; 5303 ude6.udp6EntryInfo.ue_RemotePort = 0; 5304 } 5305 /* 5306 * We make the assumption that all udp_t 5307 * structs will be created within an address 5308 * region no larger than 32-bits. 5309 */ 5310 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5311 ude6.udp6CreationProcess = 5312 (udp->udp_open_pid < 0) ? 5313 MIB2_UNKNOWN_PROCESS : 5314 udp->udp_open_pid; 5315 ude6.udp6CreationTime = udp->udp_open_time; 5316 5317 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5318 &mp6_conn_tail, (char *)&ude6, 5319 sizeof (ude6)); 5320 mlp.tme_connidx = v6_conn_idx++; 5321 if (needattr) 5322 (void) snmp_append_data2( 5323 mp6_attr_ctl->b_cont, 5324 &mp6_attr_tail, (char *)&mlp, 5325 sizeof (mlp)); 5326 } 5327 } 5328 } 5329 5330 /* IPv4 UDP endpoints */ 5331 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5332 sizeof (struct T_optmgmt_ack)]; 5333 optp->level = MIB2_UDP; 5334 optp->name = MIB2_UDP_ENTRY; 5335 optp->len = msgdsize(mp_conn_ctl->b_cont); 5336 qreply(q, mp_conn_ctl); 5337 5338 /* table of MLP attributes... */ 5339 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5340 sizeof (struct T_optmgmt_ack)]; 5341 optp->level = MIB2_UDP; 5342 optp->name = EXPER_XPORT_MLP; 5343 optp->len = msgdsize(mp_attr_ctl->b_cont); 5344 if (optp->len == 0) 5345 freemsg(mp_attr_ctl); 5346 else 5347 qreply(q, mp_attr_ctl); 5348 5349 /* IPv6 UDP endpoints */ 5350 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5351 sizeof (struct T_optmgmt_ack)]; 5352 optp->level = MIB2_UDP6; 5353 optp->name = MIB2_UDP6_ENTRY; 5354 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5355 qreply(q, mp6_conn_ctl); 5356 5357 /* table of MLP attributes... */ 5358 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5359 sizeof (struct T_optmgmt_ack)]; 5360 optp->level = MIB2_UDP6; 5361 optp->name = EXPER_XPORT_MLP; 5362 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5363 if (optp->len == 0) 5364 freemsg(mp6_attr_ctl); 5365 else 5366 qreply(q, mp6_attr_ctl); 5367 5368 return (mp2ctl); 5369 } 5370 5371 /* 5372 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5373 * NOTE: Per MIB-II, UDP has no writable data. 5374 * TODO: If this ever actually tries to set anything, it needs to be 5375 * to do the appropriate locking. 5376 */ 5377 /* ARGSUSED */ 5378 int 5379 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5380 uchar_t *ptr, int len) 5381 { 5382 switch (level) { 5383 case MIB2_UDP: 5384 return (0); 5385 default: 5386 return (1); 5387 } 5388 } 5389 5390 static void 5391 udp_report_item(mblk_t *mp, udp_t *udp) 5392 { 5393 char *state; 5394 char addrbuf1[INET6_ADDRSTRLEN]; 5395 char addrbuf2[INET6_ADDRSTRLEN]; 5396 uint_t print_len, buf_len; 5397 5398 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5399 ASSERT(buf_len >= 0); 5400 if (buf_len == 0) 5401 return; 5402 5403 if (udp->udp_state == TS_UNBND) 5404 state = "UNBOUND"; 5405 else if (udp->udp_state == TS_IDLE) 5406 state = "IDLE"; 5407 else if (udp->udp_state == TS_DATA_XFER) 5408 state = "CONNECTED"; 5409 else 5410 state = "UnkState"; 5411 print_len = snprintf((char *)mp->b_wptr, buf_len, 5412 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5413 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5414 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 5415 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 5416 ntohs(udp->udp_dstport), state); 5417 if (print_len < buf_len) { 5418 mp->b_wptr += print_len; 5419 } else { 5420 mp->b_wptr += buf_len; 5421 } 5422 } 5423 5424 /* Report for ndd "udp_status" */ 5425 /* ARGSUSED */ 5426 static int 5427 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5428 { 5429 zoneid_t zoneid; 5430 connf_t *connfp; 5431 conn_t *connp = Q_TO_CONN(q); 5432 udp_t *udp = connp->conn_udp; 5433 int i; 5434 udp_stack_t *us = udp->udp_us; 5435 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5436 5437 /* 5438 * Because of the ndd constraint, at most we can have 64K buffer 5439 * to put in all UDP info. So to be more efficient, just 5440 * allocate a 64K buffer here, assuming we need that large buffer. 5441 * This may be a problem as any user can read udp_status. Therefore 5442 * we limit the rate of doing this using us_ndd_get_info_interval. 5443 * This should be OK as normal users should not do this too often. 5444 */ 5445 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 5446 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 5447 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 5448 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5449 return (0); 5450 } 5451 } 5452 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5453 /* The following may work even if we cannot get a large buf. */ 5454 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5455 return (0); 5456 } 5457 (void) mi_mpprintf(mp, 5458 "UDP " MI_COL_HDRPAD_STR 5459 /* 12345678[89ABCDEF] */ 5460 " zone lport src addr dest addr port state"); 5461 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5462 5463 zoneid = connp->conn_zoneid; 5464 5465 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5466 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5467 connp = NULL; 5468 5469 while ((connp = ipcl_get_next_conn(connfp, connp, 5470 IPCL_UDPCONN))) { 5471 udp = connp->conn_udp; 5472 if (zoneid != GLOBAL_ZONEID && 5473 zoneid != connp->conn_zoneid) 5474 continue; 5475 5476 udp_report_item(mp->b_cont, udp); 5477 } 5478 } 5479 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 5480 return (0); 5481 } 5482 5483 /* 5484 * This routine creates a T_UDERROR_IND message and passes it upstream. 5485 * The address and options are copied from the T_UNITDATA_REQ message 5486 * passed in mp. This message is freed. 5487 */ 5488 static void 5489 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5490 t_scalar_t err) 5491 { 5492 struct T_unitdata_req *tudr; 5493 mblk_t *mp1; 5494 uchar_t *optaddr; 5495 t_scalar_t optlen; 5496 5497 if (DB_TYPE(mp) == M_DATA) { 5498 ASSERT(destaddr != NULL && destlen != 0); 5499 optaddr = NULL; 5500 optlen = 0; 5501 } else { 5502 if ((mp->b_wptr < mp->b_rptr) || 5503 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5504 goto done; 5505 } 5506 tudr = (struct T_unitdata_req *)mp->b_rptr; 5507 destaddr = mp->b_rptr + tudr->DEST_offset; 5508 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5509 destaddr + tudr->DEST_length < mp->b_rptr || 5510 destaddr + tudr->DEST_length > mp->b_wptr) { 5511 goto done; 5512 } 5513 optaddr = mp->b_rptr + tudr->OPT_offset; 5514 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5515 optaddr + tudr->OPT_length < mp->b_rptr || 5516 optaddr + tudr->OPT_length > mp->b_wptr) { 5517 goto done; 5518 } 5519 destlen = tudr->DEST_length; 5520 optlen = tudr->OPT_length; 5521 } 5522 5523 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5524 (char *)optaddr, optlen, err); 5525 if (mp1 != NULL) 5526 qreply(q, mp1); 5527 5528 done: 5529 freemsg(mp); 5530 } 5531 5532 /* 5533 * This routine removes a port number association from a stream. It 5534 * is called by udp_wput to handle T_UNBIND_REQ messages. 5535 */ 5536 static void 5537 udp_unbind(queue_t *q, mblk_t *mp) 5538 { 5539 udp_t *udp = Q_TO_UDP(q); 5540 udp_fanout_t *udpf; 5541 udp_stack_t *us = udp->udp_us; 5542 5543 if (cl_inet_unbind != NULL) { 5544 /* 5545 * Running in cluster mode - register unbind information 5546 */ 5547 if (udp->udp_ipversion == IPV4_VERSION) { 5548 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5549 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5550 (in_port_t)udp->udp_port); 5551 } else { 5552 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5553 (uint8_t *)&(udp->udp_v6src), 5554 (in_port_t)udp->udp_port); 5555 } 5556 } 5557 5558 rw_enter(&udp->udp_rwlock, RW_WRITER); 5559 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 5560 rw_exit(&udp->udp_rwlock); 5561 udp_err_ack(q, mp, TOUTSTATE, 0); 5562 return; 5563 } 5564 udp->udp_pending_op = T_UNBIND_REQ; 5565 rw_exit(&udp->udp_rwlock); 5566 5567 /* 5568 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5569 * and therefore ip_unbind must never return NULL. 5570 */ 5571 mp = ip_unbind(q, mp); 5572 ASSERT(mp != NULL); 5573 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 5574 5575 /* 5576 * Once we're unbound from IP, the pending operation may be cleared 5577 * here. 5578 */ 5579 rw_enter(&udp->udp_rwlock, RW_WRITER); 5580 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5581 us->us_bind_fanout_size)]; 5582 mutex_enter(&udpf->uf_lock); 5583 udp_bind_hash_remove(udp, B_TRUE); 5584 V6_SET_ZERO(udp->udp_v6src); 5585 V6_SET_ZERO(udp->udp_bound_v6src); 5586 udp->udp_port = 0; 5587 mutex_exit(&udpf->uf_lock); 5588 5589 udp->udp_pending_op = -1; 5590 udp->udp_state = TS_UNBND; 5591 if (udp->udp_family == AF_INET6) 5592 (void) udp_build_hdrs(udp); 5593 rw_exit(&udp->udp_rwlock); 5594 5595 qreply(q, mp); 5596 } 5597 5598 /* 5599 * Don't let port fall into the privileged range. 5600 * Since the extra privileged ports can be arbitrary we also 5601 * ensure that we exclude those from consideration. 5602 * us->us_epriv_ports is not sorted thus we loop over it until 5603 * there are no changes. 5604 */ 5605 static in_port_t 5606 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 5607 { 5608 int i; 5609 in_port_t nextport; 5610 boolean_t restart = B_FALSE; 5611 udp_stack_t *us = udp->udp_us; 5612 5613 if (random && udp_random_anon_port != 0) { 5614 (void) random_get_pseudo_bytes((uint8_t *)&port, 5615 sizeof (in_port_t)); 5616 /* 5617 * Unless changed by a sys admin, the smallest anon port 5618 * is 32768 and the largest anon port is 65535. It is 5619 * very likely (50%) for the random port to be smaller 5620 * than the smallest anon port. When that happens, 5621 * add port % (anon port range) to the smallest anon 5622 * port to get the random port. It should fall into the 5623 * valid anon port range. 5624 */ 5625 if (port < us->us_smallest_anon_port) { 5626 port = us->us_smallest_anon_port + 5627 port % (us->us_largest_anon_port - 5628 us->us_smallest_anon_port); 5629 } 5630 } 5631 5632 retry: 5633 if (port < us->us_smallest_anon_port) 5634 port = us->us_smallest_anon_port; 5635 5636 if (port > us->us_largest_anon_port) { 5637 port = us->us_smallest_anon_port; 5638 if (restart) 5639 return (0); 5640 restart = B_TRUE; 5641 } 5642 5643 if (port < us->us_smallest_nonpriv_port) 5644 port = us->us_smallest_nonpriv_port; 5645 5646 for (i = 0; i < us->us_num_epriv_ports; i++) { 5647 if (port == us->us_epriv_ports[i]) { 5648 port++; 5649 /* 5650 * Make sure that the port is in the 5651 * valid range. 5652 */ 5653 goto retry; 5654 } 5655 } 5656 5657 if (is_system_labeled() && 5658 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5659 port, IPPROTO_UDP, B_TRUE)) != 0) { 5660 port = nextport; 5661 goto retry; 5662 } 5663 5664 return (port); 5665 } 5666 5667 static int 5668 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 5669 { 5670 int err; 5671 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5672 udp_t *udp = Q_TO_UDP(wq); 5673 udp_stack_t *us = udp->udp_us; 5674 5675 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 5676 opt_storage, udp->udp_mac_exempt, 5677 us->us_netstack->netstack_ip); 5678 if (err == 0) { 5679 err = tsol_update_options(&udp->udp_ip_snd_options, 5680 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5681 opt_storage); 5682 } 5683 if (err != 0) { 5684 DTRACE_PROBE4( 5685 tx__ip__log__info__updatelabel__udp, 5686 char *, "queue(1) failed to update options(2) on mp(3)", 5687 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5688 } else { 5689 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 5690 } 5691 return (err); 5692 } 5693 5694 static mblk_t * 5695 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5696 uint_t srcid, int *error, boolean_t insert_spi) 5697 { 5698 udp_t *udp = connp->conn_udp; 5699 queue_t *q = connp->conn_wq; 5700 mblk_t *mp1 = mp; 5701 mblk_t *mp2; 5702 ipha_t *ipha; 5703 int ip_hdr_length; 5704 uint32_t ip_len; 5705 udpha_t *udpha; 5706 boolean_t lock_held = B_FALSE; 5707 in_port_t uha_src_port; 5708 udpattrs_t attrs; 5709 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5710 uint32_t ip_snd_opt_len = 0; 5711 ip4_pkt_t pktinfo; 5712 ip4_pkt_t *pktinfop = &pktinfo; 5713 ip_opt_info_t optinfo; 5714 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5715 udp_stack_t *us = udp->udp_us; 5716 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5717 5718 5719 *error = 0; 5720 pktinfop->ip4_ill_index = 0; 5721 pktinfop->ip4_addr = INADDR_ANY; 5722 optinfo.ip_opt_flags = 0; 5723 optinfo.ip_opt_ill_index = 0; 5724 5725 if (v4dst == INADDR_ANY) 5726 v4dst = htonl(INADDR_LOOPBACK); 5727 5728 /* 5729 * If options passed in, feed it for verification and handling 5730 */ 5731 attrs.udpattr_credset = B_FALSE; 5732 if (DB_TYPE(mp) != M_DATA) { 5733 mp1 = mp->b_cont; 5734 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 5735 attrs.udpattr_ipp4 = pktinfop; 5736 attrs.udpattr_mb = mp; 5737 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 5738 goto done; 5739 /* 5740 * Note: success in processing options. 5741 * mp option buffer represented by 5742 * OPT_length/offset now potentially modified 5743 * and contain option setting results 5744 */ 5745 ASSERT(*error == 0); 5746 } 5747 } 5748 5749 /* mp1 points to the M_DATA mblk carrying the packet */ 5750 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5751 5752 rw_enter(&udp->udp_rwlock, RW_READER); 5753 lock_held = B_TRUE; 5754 /* 5755 * Check if our saved options are valid; update if not. 5756 * TSOL Note: Since we are not in WRITER mode, UDP packets 5757 * to different destination may require different labels, 5758 * or worse, UDP packets to same IP address may require 5759 * different labels due to use of shared all-zones address. 5760 * We use conn_lock to ensure that lastdst, ip_snd_options, 5761 * and ip_snd_options_len are consistent for the current 5762 * destination and are updated atomically. 5763 */ 5764 mutex_enter(&connp->conn_lock); 5765 if (is_system_labeled()) { 5766 /* Using UDP MLP requires SCM_UCRED from user */ 5767 if (connp->conn_mlp_type != mlptSingle && 5768 !attrs.udpattr_credset) { 5769 mutex_exit(&connp->conn_lock); 5770 DTRACE_PROBE4( 5771 tx__ip__log__info__output__udp, 5772 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5773 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5774 *error = ECONNREFUSED; 5775 goto done; 5776 } 5777 /* 5778 * update label option for this UDP socket if 5779 * - the destination has changed, or 5780 * - the UDP socket is MLP 5781 */ 5782 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5783 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5784 connp->conn_mlp_type != mlptSingle) && 5785 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5786 mutex_exit(&connp->conn_lock); 5787 goto done; 5788 } 5789 } 5790 if (udp->udp_ip_snd_options_len > 0) { 5791 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5792 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5793 } 5794 mutex_exit(&connp->conn_lock); 5795 5796 /* Add an IP header */ 5797 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5798 (insert_spi ? sizeof (uint32_t) : 0); 5799 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5800 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5801 !OK_32PTR(ipha)) { 5802 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5803 if (mp2 == NULL) { 5804 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5805 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5806 *error = ENOMEM; 5807 goto done; 5808 } 5809 mp2->b_wptr = DB_LIM(mp2); 5810 mp2->b_cont = mp1; 5811 mp1 = mp2; 5812 if (DB_TYPE(mp) != M_DATA) 5813 mp->b_cont = mp1; 5814 else 5815 mp = mp1; 5816 5817 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5818 } 5819 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5820 #ifdef _BIG_ENDIAN 5821 /* Set version, header length, and tos */ 5822 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5823 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5824 udp->udp_type_of_service); 5825 /* Set ttl and protocol */ 5826 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5827 #else 5828 /* Set version, header length, and tos */ 5829 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5830 ((udp->udp_type_of_service << 8) | 5831 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5832 /* Set ttl and protocol */ 5833 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5834 #endif 5835 if (pktinfop->ip4_addr != INADDR_ANY) { 5836 ipha->ipha_src = pktinfop->ip4_addr; 5837 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5838 } else { 5839 /* 5840 * Copy our address into the packet. If this is zero, 5841 * first look at __sin6_src_id for a hint. If we leave the 5842 * source as INADDR_ANY then ip will fill in the real source 5843 * address. 5844 */ 5845 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5846 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5847 in6_addr_t v6src; 5848 5849 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5850 us->us_netstack); 5851 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5852 } 5853 } 5854 uha_src_port = udp->udp_port; 5855 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5856 rw_exit(&udp->udp_rwlock); 5857 lock_held = B_FALSE; 5858 } 5859 5860 if (pktinfop->ip4_ill_index != 0) { 5861 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5862 } 5863 5864 ipha->ipha_fragment_offset_and_flags = 0; 5865 ipha->ipha_ident = 0; 5866 5867 mp1->b_rptr = (uchar_t *)ipha; 5868 5869 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5870 (uintptr_t)UINT_MAX); 5871 5872 /* Determine length of packet */ 5873 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5874 if ((mp2 = mp1->b_cont) != NULL) { 5875 do { 5876 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5877 ip_len += (uint32_t)MBLKL(mp2); 5878 } while ((mp2 = mp2->b_cont) != NULL); 5879 } 5880 /* 5881 * If the size of the packet is greater than the maximum allowed by 5882 * ip, return an error. Passing this down could cause panics because 5883 * the size will have wrapped and be inconsistent with the msg size. 5884 */ 5885 if (ip_len > IP_MAXPACKET) { 5886 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5887 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5888 *error = EMSGSIZE; 5889 goto done; 5890 } 5891 ipha->ipha_length = htons((uint16_t)ip_len); 5892 ip_len -= ip_hdr_length; 5893 ip_len = htons((uint16_t)ip_len); 5894 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5895 5896 /* Insert all-0s SPI now. */ 5897 if (insert_spi) 5898 *((uint32_t *)(udpha + 1)) = 0; 5899 5900 /* 5901 * Copy in the destination address 5902 */ 5903 ipha->ipha_dst = v4dst; 5904 5905 /* 5906 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5907 */ 5908 if (CLASSD(v4dst)) 5909 ipha->ipha_ttl = udp->udp_multicast_ttl; 5910 5911 udpha->uha_dst_port = port; 5912 udpha->uha_src_port = uha_src_port; 5913 5914 if (ip_snd_opt_len > 0) { 5915 uint32_t cksum; 5916 5917 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5918 lock_held = B_FALSE; 5919 rw_exit(&udp->udp_rwlock); 5920 /* 5921 * Massage source route putting first source route in ipha_dst. 5922 * Ignore the destination in T_unitdata_req. 5923 * Create a checksum adjustment for a source route, if any. 5924 */ 5925 cksum = ip_massage_options(ipha, us->us_netstack); 5926 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5927 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5928 (ipha->ipha_dst & 0xFFFF); 5929 if ((int)cksum < 0) 5930 cksum--; 5931 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5932 /* 5933 * IP does the checksum if uha_checksum is non-zero, 5934 * We make it easy for IP to include our pseudo header 5935 * by putting our length in uha_checksum. 5936 */ 5937 cksum += ip_len; 5938 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5939 /* There might be a carry. */ 5940 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5941 #ifdef _LITTLE_ENDIAN 5942 if (us->us_do_checksum) 5943 ip_len = (cksum << 16) | ip_len; 5944 #else 5945 if (us->us_do_checksum) 5946 ip_len = (ip_len << 16) | cksum; 5947 else 5948 ip_len <<= 16; 5949 #endif 5950 } else { 5951 /* 5952 * IP does the checksum if uha_checksum is non-zero, 5953 * We make it easy for IP to include our pseudo header 5954 * by putting our length in uha_checksum. 5955 */ 5956 if (us->us_do_checksum) 5957 ip_len |= (ip_len << 16); 5958 #ifndef _LITTLE_ENDIAN 5959 else 5960 ip_len <<= 16; 5961 #endif 5962 } 5963 ASSERT(!lock_held); 5964 /* Set UDP length and checksum */ 5965 *((uint32_t *)&udpha->uha_length) = ip_len; 5966 if (DB_CRED(mp) != NULL) 5967 mblk_setcred(mp1, DB_CRED(mp)); 5968 5969 if (DB_TYPE(mp) != M_DATA) { 5970 ASSERT(mp != mp1); 5971 freeb(mp); 5972 } 5973 5974 /* mp has been consumed and we'll return success */ 5975 ASSERT(*error == 0); 5976 mp = NULL; 5977 5978 /* We're done. Pass the packet to ip. */ 5979 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 5980 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5981 "udp_wput_end: q %p (%S)", q, "end"); 5982 5983 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5984 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 5985 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5986 connp->conn_nofailover_ill != NULL || 5987 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 5988 optinfo.ip_opt_ill_index != 0 || 5989 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5990 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 5991 ipst->ips_ip_g_mrouter != NULL) { 5992 UDP_STAT(us, udp_ip_send); 5993 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 5994 &optinfo); 5995 } else { 5996 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5997 } 5998 5999 done: 6000 if (lock_held) 6001 rw_exit(&udp->udp_rwlock); 6002 if (*error != 0) { 6003 ASSERT(mp != NULL); 6004 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6005 } 6006 return (mp); 6007 } 6008 6009 static void 6010 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6011 { 6012 conn_t *connp = udp->udp_connp; 6013 ipaddr_t src, dst; 6014 ire_t *ire; 6015 ipif_t *ipif = NULL; 6016 mblk_t *ire_fp_mp; 6017 boolean_t retry_caching; 6018 udp_stack_t *us = udp->udp_us; 6019 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6020 6021 dst = ipha->ipha_dst; 6022 src = ipha->ipha_src; 6023 ASSERT(ipha->ipha_ident == 0); 6024 6025 if (CLASSD(dst)) { 6026 int err; 6027 6028 ipif = conn_get_held_ipif(connp, 6029 &connp->conn_multicast_ipif, &err); 6030 6031 if (ipif == NULL || ipif->ipif_isv6 || 6032 (ipif->ipif_ill->ill_phyint->phyint_flags & 6033 PHYI_LOOPBACK)) { 6034 if (ipif != NULL) 6035 ipif_refrele(ipif); 6036 UDP_STAT(us, udp_ip_send); 6037 ip_output(connp, mp, q, IP_WPUT); 6038 return; 6039 } 6040 } 6041 6042 retry_caching = B_FALSE; 6043 mutex_enter(&connp->conn_lock); 6044 ire = connp->conn_ire_cache; 6045 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6046 6047 if (ire == NULL || ire->ire_addr != dst || 6048 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6049 retry_caching = B_TRUE; 6050 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6051 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6052 6053 ASSERT(ipif != NULL); 6054 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6055 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6056 retry_caching = B_TRUE; 6057 } 6058 6059 if (!retry_caching) { 6060 ASSERT(ire != NULL); 6061 IRE_REFHOLD(ire); 6062 mutex_exit(&connp->conn_lock); 6063 } else { 6064 boolean_t cached = B_FALSE; 6065 6066 connp->conn_ire_cache = NULL; 6067 mutex_exit(&connp->conn_lock); 6068 6069 /* Release the old ire */ 6070 if (ire != NULL) { 6071 IRE_REFRELE_NOTR(ire); 6072 ire = NULL; 6073 } 6074 6075 if (CLASSD(dst)) { 6076 ASSERT(ipif != NULL); 6077 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6078 connp->conn_zoneid, MBLK_GETLABEL(mp), 6079 MATCH_IRE_ILL_GROUP, ipst); 6080 } else { 6081 ASSERT(ipif == NULL); 6082 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6083 MBLK_GETLABEL(mp), ipst); 6084 } 6085 6086 if (ire == NULL) { 6087 if (ipif != NULL) 6088 ipif_refrele(ipif); 6089 UDP_STAT(us, udp_ire_null); 6090 ip_output(connp, mp, q, IP_WPUT); 6091 return; 6092 } 6093 IRE_REFHOLD_NOTR(ire); 6094 6095 mutex_enter(&connp->conn_lock); 6096 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL) { 6097 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6098 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6099 connp->conn_ire_cache = ire; 6100 cached = B_TRUE; 6101 } 6102 rw_exit(&ire->ire_bucket->irb_lock); 6103 } 6104 mutex_exit(&connp->conn_lock); 6105 6106 /* 6107 * We can continue to use the ire but since it was not 6108 * cached, we should drop the extra reference. 6109 */ 6110 if (!cached) 6111 IRE_REFRELE_NOTR(ire); 6112 } 6113 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6114 ASSERT(!CLASSD(dst) || ipif != NULL); 6115 6116 /* 6117 * Check if we can take the fast-path. 6118 * Note that "incomplete" ire's (where the link-layer for next hop 6119 * is not resolved, or where the fast-path header in nce_fp_mp is not 6120 * available yet) are sent down the legacy (slow) path 6121 */ 6122 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6123 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6124 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6125 ((ire->ire_nce == NULL) || 6126 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 6127 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 6128 if (ipif != NULL) 6129 ipif_refrele(ipif); 6130 UDP_STAT(us, udp_ip_ire_send); 6131 IRE_REFRELE(ire); 6132 ip_output(connp, mp, q, IP_WPUT); 6133 return; 6134 } 6135 6136 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6137 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6138 ipha->ipha_src = ipif->ipif_src_addr; 6139 else 6140 ipha->ipha_src = ire->ire_src_addr; 6141 } 6142 6143 if (ipif != NULL) 6144 ipif_refrele(ipif); 6145 6146 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 6147 } 6148 6149 static void 6150 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 6151 { 6152 ipaddr_t src, dst; 6153 ill_t *ill; 6154 mblk_t *ire_fp_mp; 6155 uint_t ire_fp_mp_len; 6156 uint16_t *up; 6157 uint32_t cksum, hcksum_txflags; 6158 queue_t *dev_q; 6159 udp_t *udp = connp->conn_udp; 6160 ipha_t *ipha = (ipha_t *)mp->b_rptr; 6161 udp_stack_t *us = udp->udp_us; 6162 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6163 6164 dev_q = ire->ire_stq->q_next; 6165 ASSERT(dev_q != NULL); 6166 6167 6168 if (DEV_Q_IS_FLOW_CTLED(dev_q)) { 6169 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 6170 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 6171 ire_refrele(ire); 6172 return; 6173 } 6174 6175 ire_fp_mp = ire->ire_nce->nce_fp_mp; 6176 ire_fp_mp_len = MBLKL(ire_fp_mp); 6177 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 6178 6179 dst = ipha->ipha_dst; 6180 src = ipha->ipha_src; 6181 6182 ill = ire_to_ill(ire); 6183 ASSERT(ill != NULL); 6184 6185 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6186 6187 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6188 #ifndef _BIG_ENDIAN 6189 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6190 #endif 6191 6192 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6193 ASSERT(ill->ill_hcksum_capab != NULL); 6194 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6195 } else { 6196 hcksum_txflags = 0; 6197 } 6198 6199 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6200 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6201 6202 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6203 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6204 if (*up != 0) { 6205 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6206 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6207 ntohs(ipha->ipha_length), cksum); 6208 6209 /* Software checksum? */ 6210 if (DB_CKSUMFLAGS(mp) == 0) { 6211 UDP_STAT(us, udp_out_sw_cksum); 6212 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6213 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6214 } 6215 } 6216 6217 if (!CLASSD(dst)) { 6218 ipha->ipha_fragment_offset_and_flags |= 6219 (uint32_t)htons(ire->ire_frag_flag); 6220 } 6221 6222 /* Calculate IP header checksum if hardware isn't capable */ 6223 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6224 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6225 ((uint16_t *)ipha)[4]); 6226 } 6227 6228 if (CLASSD(dst)) { 6229 ilm_t *ilm; 6230 6231 ILM_WALKER_HOLD(ill); 6232 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6233 ILM_WALKER_RELE(ill); 6234 if (ilm != NULL) { 6235 ip_multicast_loopback(q, ill, mp, 6236 connp->conn_multicast_loop ? 0 : 6237 IP_FF_NO_MCAST_LOOP, zoneid); 6238 } 6239 6240 /* If multicast TTL is 0 then we are done */ 6241 if (ipha->ipha_ttl == 0) { 6242 freemsg(mp); 6243 ire_refrele(ire); 6244 return; 6245 } 6246 } 6247 6248 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6249 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6250 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6251 6252 UPDATE_OB_PKT_COUNT(ire); 6253 ire->ire_last_used_time = lbolt; 6254 6255 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6256 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6257 ntohs(ipha->ipha_length)); 6258 6259 if (ILL_DLS_CAPABLE(ill)) { 6260 /* 6261 * Send the packet directly to DLD, where it may be queued 6262 * depending on the availability of transmit resources at 6263 * the media layer. 6264 */ 6265 IP_DLS_ILL_TX(ill, ipha, mp, ipst); 6266 } else { 6267 DTRACE_PROBE4(ip4__physical__out__start, 6268 ill_t *, NULL, ill_t *, ill, 6269 ipha_t *, ipha, mblk_t *, mp); 6270 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6271 ipst->ips_ipv4firewall_physical_out, 6272 NULL, ill, ipha, mp, mp, ipst); 6273 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6274 if (mp != NULL) 6275 putnext(ire->ire_stq, mp); 6276 } 6277 6278 IRE_REFRELE(ire); 6279 } 6280 6281 static boolean_t 6282 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6283 { 6284 udp_t *udp = Q_TO_UDP(wq); 6285 int err; 6286 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6287 udp_stack_t *us = udp->udp_us; 6288 6289 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6290 dst, opt_storage, udp->udp_mac_exempt, 6291 us->us_netstack->netstack_ip); 6292 if (err == 0) { 6293 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6294 &udp->udp_label_len_v6, opt_storage); 6295 } 6296 if (err != 0) { 6297 DTRACE_PROBE4( 6298 tx__ip__log__drop__updatelabel__udp6, 6299 char *, "queue(1) failed to update options(2) on mp(3)", 6300 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6301 } else { 6302 udp->udp_v6lastdst = *dst; 6303 } 6304 return (err); 6305 } 6306 6307 void 6308 udp_output_connected(void *arg, mblk_t *mp) 6309 { 6310 conn_t *connp = (conn_t *)arg; 6311 udp_t *udp = connp->conn_udp; 6312 udp_stack_t *us = udp->udp_us; 6313 ipaddr_t v4dst; 6314 in_port_t dstport; 6315 boolean_t mapped_addr; 6316 struct sockaddr_storage ss; 6317 sin_t *sin; 6318 sin6_t *sin6; 6319 struct sockaddr *addr; 6320 socklen_t addrlen; 6321 int error; 6322 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6323 6324 /* M_DATA for connected socket */ 6325 6326 ASSERT(udp->udp_issocket); 6327 UDP_DBGSTAT(us, udp_data_conn); 6328 6329 mutex_enter(&connp->conn_lock); 6330 if (udp->udp_state != TS_DATA_XFER) { 6331 mutex_exit(&connp->conn_lock); 6332 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6333 UDP_STAT(us, udp_out_err_notconn); 6334 freemsg(mp); 6335 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6336 "udp_wput_end: connp %p (%S)", connp, 6337 "not-connected; address required"); 6338 return; 6339 } 6340 6341 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 6342 if (mapped_addr) 6343 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6344 6345 /* Initialize addr and addrlen as if they're passed in */ 6346 if (udp->udp_family == AF_INET) { 6347 sin = (sin_t *)&ss; 6348 sin->sin_family = AF_INET; 6349 dstport = sin->sin_port = udp->udp_dstport; 6350 ASSERT(mapped_addr); 6351 sin->sin_addr.s_addr = v4dst; 6352 addr = (struct sockaddr *)sin; 6353 addrlen = sizeof (*sin); 6354 } else { 6355 sin6 = (sin6_t *)&ss; 6356 sin6->sin6_family = AF_INET6; 6357 dstport = sin6->sin6_port = udp->udp_dstport; 6358 sin6->sin6_flowinfo = udp->udp_flowinfo; 6359 sin6->sin6_addr = udp->udp_v6dst; 6360 sin6->sin6_scope_id = 0; 6361 sin6->__sin6_src_id = 0; 6362 addr = (struct sockaddr *)sin6; 6363 addrlen = sizeof (*sin6); 6364 } 6365 mutex_exit(&connp->conn_lock); 6366 6367 if (mapped_addr) { 6368 /* 6369 * Handle both AF_INET and AF_INET6; the latter 6370 * for IPV4 mapped destination addresses. Note 6371 * here that both addr and addrlen point to the 6372 * corresponding struct depending on the address 6373 * family of the socket. 6374 */ 6375 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 6376 insert_spi); 6377 } else { 6378 mp = udp_output_v6(connp, mp, sin6, &error); 6379 } 6380 if (error == 0) { 6381 ASSERT(mp == NULL); 6382 return; 6383 } 6384 6385 UDP_STAT(us, udp_out_err_output); 6386 ASSERT(mp != NULL); 6387 /* mp is freed by the following routine */ 6388 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6389 (t_scalar_t)error); 6390 } 6391 6392 /* 6393 * This routine handles all messages passed downstream. It either 6394 * consumes the message or passes it downstream; it never queues a 6395 * a message. 6396 * 6397 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6398 * is valid when we are directly beneath the stream head, and thus sockfs 6399 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6400 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6401 * connected endpoints. 6402 */ 6403 void 6404 udp_wput(queue_t *q, mblk_t *mp) 6405 { 6406 sin6_t *sin6; 6407 sin_t *sin; 6408 ipaddr_t v4dst; 6409 uint16_t port; 6410 uint_t srcid; 6411 conn_t *connp = Q_TO_CONN(q); 6412 udp_t *udp = connp->conn_udp; 6413 int error = 0; 6414 struct sockaddr *addr; 6415 socklen_t addrlen; 6416 udp_stack_t *us = udp->udp_us; 6417 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6418 6419 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6420 "udp_wput_start: queue %p mp %p", q, mp); 6421 6422 /* 6423 * We directly handle several cases here: T_UNITDATA_REQ message 6424 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6425 * socket. 6426 */ 6427 switch (DB_TYPE(mp)) { 6428 case M_DATA: 6429 /* 6430 * Quick check for error cases. Checks will be done again 6431 * under the lock later on 6432 */ 6433 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6434 /* Not connected; address is required */ 6435 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6436 UDP_STAT(us, udp_out_err_notconn); 6437 freemsg(mp); 6438 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6439 "udp_wput_end: connp %p (%S)", connp, 6440 "not-connected; address required"); 6441 return; 6442 } 6443 udp_output_connected(connp, mp); 6444 return; 6445 6446 case M_PROTO: 6447 case M_PCPROTO: { 6448 struct T_unitdata_req *tudr; 6449 6450 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6451 tudr = (struct T_unitdata_req *)mp->b_rptr; 6452 6453 /* Handle valid T_UNITDATA_REQ here */ 6454 if (MBLKL(mp) >= sizeof (*tudr) && 6455 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6456 if (mp->b_cont == NULL) { 6457 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6458 "udp_wput_end: q %p (%S)", q, "badaddr"); 6459 error = EPROTO; 6460 goto ud_error; 6461 } 6462 6463 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6464 tudr->DEST_length)) { 6465 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6466 "udp_wput_end: q %p (%S)", q, "badaddr"); 6467 error = EADDRNOTAVAIL; 6468 goto ud_error; 6469 } 6470 /* 6471 * If a port has not been bound to the stream, fail. 6472 * This is not a problem when sockfs is directly 6473 * above us, because it will ensure that the socket 6474 * is first bound before allowing data to be sent. 6475 */ 6476 if (udp->udp_state == TS_UNBND) { 6477 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6478 "udp_wput_end: q %p (%S)", q, "outstate"); 6479 error = EPROTO; 6480 goto ud_error; 6481 } 6482 addr = (struct sockaddr *) 6483 &mp->b_rptr[tudr->DEST_offset]; 6484 addrlen = tudr->DEST_length; 6485 if (tudr->OPT_length != 0) 6486 UDP_STAT(us, udp_out_opt); 6487 break; 6488 } 6489 /* FALLTHRU */ 6490 } 6491 default: 6492 udp_wput_other(q, mp); 6493 return; 6494 } 6495 ASSERT(addr != NULL); 6496 6497 switch (udp->udp_family) { 6498 case AF_INET6: 6499 sin6 = (sin6_t *)addr; 6500 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 6501 (sin6->sin6_family != AF_INET6)) { 6502 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6503 "udp_wput_end: q %p (%S)", q, "badaddr"); 6504 error = EADDRNOTAVAIL; 6505 goto ud_error; 6506 } 6507 6508 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6509 /* 6510 * Destination is a non-IPv4-compatible IPv6 address. 6511 * Send out an IPv6 format packet. 6512 */ 6513 mp = udp_output_v6(connp, mp, sin6, &error); 6514 if (error != 0) 6515 goto ud_error; 6516 6517 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6518 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6519 return; 6520 } 6521 /* 6522 * If the local address is not zero or a mapped address 6523 * return an error. It would be possible to send an IPv4 6524 * packet but the response would never make it back to the 6525 * application since it is bound to a non-mapped address. 6526 */ 6527 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6528 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6529 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6530 "udp_wput_end: q %p (%S)", q, "badaddr"); 6531 error = EADDRNOTAVAIL; 6532 goto ud_error; 6533 } 6534 /* Send IPv4 packet without modifying udp_ipversion */ 6535 /* Extract port and ipaddr */ 6536 port = sin6->sin6_port; 6537 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6538 srcid = sin6->__sin6_src_id; 6539 break; 6540 6541 case AF_INET: 6542 sin = (sin_t *)addr; 6543 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 6544 (sin->sin_family != AF_INET)) { 6545 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6546 "udp_wput_end: q %p (%S)", q, "badaddr"); 6547 error = EADDRNOTAVAIL; 6548 goto ud_error; 6549 } 6550 /* Extract port and ipaddr */ 6551 port = sin->sin_port; 6552 v4dst = sin->sin_addr.s_addr; 6553 srcid = 0; 6554 break; 6555 } 6556 6557 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 6558 if (error != 0) { 6559 ud_error: 6560 UDP_STAT(us, udp_out_err_output); 6561 ASSERT(mp != NULL); 6562 /* mp is freed by the following routine */ 6563 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6564 (t_scalar_t)error); 6565 } 6566 } 6567 6568 /* 6569 * udp_output_v6(): 6570 * Assumes that udp_wput did some sanity checking on the destination 6571 * address. 6572 */ 6573 static mblk_t * 6574 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 6575 { 6576 ip6_t *ip6h; 6577 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6578 mblk_t *mp1 = mp; 6579 mblk_t *mp2; 6580 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6581 size_t ip_len; 6582 udpha_t *udph; 6583 udp_t *udp = connp->conn_udp; 6584 queue_t *q = connp->conn_wq; 6585 ip6_pkt_t ipp_s; /* For ancillary data options */ 6586 ip6_pkt_t *ipp = &ipp_s; 6587 ip6_pkt_t *tipp; /* temporary ipp */ 6588 uint32_t csum = 0; 6589 uint_t ignore = 0; 6590 uint_t option_exists = 0, is_sticky = 0; 6591 uint8_t *cp; 6592 uint8_t *nxthdr_ptr; 6593 in6_addr_t ip6_dst; 6594 udpattrs_t attrs; 6595 boolean_t opt_present; 6596 ip6_hbh_t *hopoptsptr = NULL; 6597 uint_t hopoptslen = 0; 6598 boolean_t is_ancillary = B_FALSE; 6599 udp_stack_t *us = udp->udp_us; 6600 size_t sth_wroff = 0; 6601 6602 *error = 0; 6603 6604 /* 6605 * If the local address is a mapped address return 6606 * an error. 6607 * It would be possible to send an IPv6 packet but the 6608 * response would never make it back to the application 6609 * since it is bound to a mapped address. 6610 */ 6611 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6612 *error = EADDRNOTAVAIL; 6613 goto done; 6614 } 6615 6616 ipp->ipp_fields = 0; 6617 ipp->ipp_sticky_ignored = 0; 6618 6619 /* 6620 * If TPI options passed in, feed it for verification and handling 6621 */ 6622 attrs.udpattr_credset = B_FALSE; 6623 opt_present = B_FALSE; 6624 if (DB_TYPE(mp) != M_DATA) { 6625 mp1 = mp->b_cont; 6626 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6627 attrs.udpattr_ipp6 = ipp; 6628 attrs.udpattr_mb = mp; 6629 if (udp_unitdata_opt_process(q, mp, error, 6630 &attrs) < 0) { 6631 goto done; 6632 } 6633 ASSERT(*error == 0); 6634 opt_present = B_TRUE; 6635 } 6636 } 6637 rw_enter(&udp->udp_rwlock, RW_READER); 6638 ignore = ipp->ipp_sticky_ignored; 6639 6640 /* mp1 points to the M_DATA mblk carrying the packet */ 6641 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6642 6643 if (sin6->sin6_scope_id != 0 && 6644 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6645 /* 6646 * IPPF_SCOPE_ID is special. It's neither a sticky 6647 * option nor ancillary data. It needs to be 6648 * explicitly set in options_exists. 6649 */ 6650 option_exists |= IPPF_SCOPE_ID; 6651 } 6652 6653 /* 6654 * Compute the destination address 6655 */ 6656 ip6_dst = sin6->sin6_addr; 6657 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6658 ip6_dst = ipv6_loopback; 6659 6660 /* 6661 * If we're not going to the same destination as last time, then 6662 * recompute the label required. This is done in a separate routine to 6663 * avoid blowing up our stack here. 6664 * 6665 * TSOL Note: Since we are not in WRITER mode, UDP packets 6666 * to different destination may require different labels, 6667 * or worse, UDP packets to same IP address may require 6668 * different labels due to use of shared all-zones address. 6669 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6670 * and sticky ipp_hopoptslen are consistent for the current 6671 * destination and are updated atomically. 6672 */ 6673 mutex_enter(&connp->conn_lock); 6674 if (is_system_labeled()) { 6675 /* Using UDP MLP requires SCM_UCRED from user */ 6676 if (connp->conn_mlp_type != mlptSingle && 6677 !attrs.udpattr_credset) { 6678 DTRACE_PROBE4( 6679 tx__ip__log__info__output__udp6, 6680 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6681 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6682 *error = ECONNREFUSED; 6683 rw_exit(&udp->udp_rwlock); 6684 mutex_exit(&connp->conn_lock); 6685 goto done; 6686 } 6687 /* 6688 * update label option for this UDP socket if 6689 * - the destination has changed, or 6690 * - the UDP socket is MLP 6691 */ 6692 if ((opt_present || 6693 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6694 connp->conn_mlp_type != mlptSingle) && 6695 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6696 rw_exit(&udp->udp_rwlock); 6697 mutex_exit(&connp->conn_lock); 6698 goto done; 6699 } 6700 } 6701 6702 /* 6703 * If there's a security label here, then we ignore any options the 6704 * user may try to set. We keep the peer's label as a hidden sticky 6705 * option. We make a private copy of this label before releasing the 6706 * lock so that label is kept consistent with the destination addr. 6707 */ 6708 if (udp->udp_label_len_v6 > 0) { 6709 ignore &= ~IPPF_HOPOPTS; 6710 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6711 } 6712 6713 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6714 /* No sticky options nor ancillary data. */ 6715 mutex_exit(&connp->conn_lock); 6716 goto no_options; 6717 } 6718 6719 /* 6720 * Go through the options figuring out where each is going to 6721 * come from and build two masks. The first mask indicates if 6722 * the option exists at all. The second mask indicates if the 6723 * option is sticky or ancillary. 6724 */ 6725 if (!(ignore & IPPF_HOPOPTS)) { 6726 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6727 option_exists |= IPPF_HOPOPTS; 6728 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6729 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6730 option_exists |= IPPF_HOPOPTS; 6731 is_sticky |= IPPF_HOPOPTS; 6732 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6733 hopoptsptr = kmem_alloc( 6734 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6735 if (hopoptsptr == NULL) { 6736 *error = ENOMEM; 6737 mutex_exit(&connp->conn_lock); 6738 goto done; 6739 } 6740 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6741 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6742 hopoptslen); 6743 udp_ip_hdr_len += hopoptslen; 6744 } 6745 } 6746 mutex_exit(&connp->conn_lock); 6747 6748 if (!(ignore & IPPF_RTHDR)) { 6749 if (ipp->ipp_fields & IPPF_RTHDR) { 6750 option_exists |= IPPF_RTHDR; 6751 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6752 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6753 option_exists |= IPPF_RTHDR; 6754 is_sticky |= IPPF_RTHDR; 6755 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6756 } 6757 } 6758 6759 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6760 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6761 option_exists |= IPPF_RTDSTOPTS; 6762 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6763 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6764 option_exists |= IPPF_RTDSTOPTS; 6765 is_sticky |= IPPF_RTDSTOPTS; 6766 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6767 } 6768 } 6769 6770 if (!(ignore & IPPF_DSTOPTS)) { 6771 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6772 option_exists |= IPPF_DSTOPTS; 6773 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6774 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6775 option_exists |= IPPF_DSTOPTS; 6776 is_sticky |= IPPF_DSTOPTS; 6777 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6778 } 6779 } 6780 6781 if (!(ignore & IPPF_IFINDEX)) { 6782 if (ipp->ipp_fields & IPPF_IFINDEX) { 6783 option_exists |= IPPF_IFINDEX; 6784 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6785 option_exists |= IPPF_IFINDEX; 6786 is_sticky |= IPPF_IFINDEX; 6787 } 6788 } 6789 6790 if (!(ignore & IPPF_ADDR)) { 6791 if (ipp->ipp_fields & IPPF_ADDR) { 6792 option_exists |= IPPF_ADDR; 6793 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6794 option_exists |= IPPF_ADDR; 6795 is_sticky |= IPPF_ADDR; 6796 } 6797 } 6798 6799 if (!(ignore & IPPF_DONTFRAG)) { 6800 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6801 option_exists |= IPPF_DONTFRAG; 6802 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6803 option_exists |= IPPF_DONTFRAG; 6804 is_sticky |= IPPF_DONTFRAG; 6805 } 6806 } 6807 6808 if (!(ignore & IPPF_USE_MIN_MTU)) { 6809 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6810 option_exists |= IPPF_USE_MIN_MTU; 6811 } else if (udp->udp_sticky_ipp.ipp_fields & 6812 IPPF_USE_MIN_MTU) { 6813 option_exists |= IPPF_USE_MIN_MTU; 6814 is_sticky |= IPPF_USE_MIN_MTU; 6815 } 6816 } 6817 6818 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6819 option_exists |= IPPF_HOPLIMIT; 6820 /* IPV6_HOPLIMIT can never be sticky */ 6821 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6822 6823 if (!(ignore & IPPF_UNICAST_HOPS) && 6824 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6825 option_exists |= IPPF_UNICAST_HOPS; 6826 is_sticky |= IPPF_UNICAST_HOPS; 6827 } 6828 6829 if (!(ignore & IPPF_MULTICAST_HOPS) && 6830 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6831 option_exists |= IPPF_MULTICAST_HOPS; 6832 is_sticky |= IPPF_MULTICAST_HOPS; 6833 } 6834 6835 if (!(ignore & IPPF_TCLASS)) { 6836 if (ipp->ipp_fields & IPPF_TCLASS) { 6837 option_exists |= IPPF_TCLASS; 6838 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6839 option_exists |= IPPF_TCLASS; 6840 is_sticky |= IPPF_TCLASS; 6841 } 6842 } 6843 6844 if (!(ignore & IPPF_NEXTHOP) && 6845 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6846 option_exists |= IPPF_NEXTHOP; 6847 is_sticky |= IPPF_NEXTHOP; 6848 } 6849 6850 no_options: 6851 6852 /* 6853 * If any options carried in the ip6i_t were specified, we 6854 * need to account for the ip6i_t in the data we'll be sending 6855 * down. 6856 */ 6857 if (option_exists & IPPF_HAS_IP6I) 6858 udp_ip_hdr_len += sizeof (ip6i_t); 6859 6860 /* check/fix buffer config, setup pointers into it */ 6861 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6862 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6863 !OK_32PTR(ip6h)) { 6864 6865 /* Try to get everything in a single mblk next time */ 6866 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6867 udp->udp_max_hdr_len = udp_ip_hdr_len; 6868 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6869 } 6870 6871 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6872 if (mp2 == NULL) { 6873 *error = ENOMEM; 6874 rw_exit(&udp->udp_rwlock); 6875 goto done; 6876 } 6877 mp2->b_wptr = DB_LIM(mp2); 6878 mp2->b_cont = mp1; 6879 mp1 = mp2; 6880 if (DB_TYPE(mp) != M_DATA) 6881 mp->b_cont = mp1; 6882 else 6883 mp = mp1; 6884 6885 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6886 } 6887 mp1->b_rptr = (unsigned char *)ip6h; 6888 ip6i = (ip6i_t *)ip6h; 6889 6890 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6891 if (option_exists & IPPF_HAS_IP6I) { 6892 ip6h = (ip6_t *)&ip6i[1]; 6893 ip6i->ip6i_flags = 0; 6894 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6895 6896 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6897 if (option_exists & IPPF_SCOPE_ID) { 6898 ip6i->ip6i_flags |= IP6I_IFINDEX; 6899 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6900 } else if (option_exists & IPPF_IFINDEX) { 6901 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6902 ASSERT(tipp->ipp_ifindex != 0); 6903 ip6i->ip6i_flags |= IP6I_IFINDEX; 6904 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6905 } 6906 6907 if (option_exists & IPPF_ADDR) { 6908 /* 6909 * Enable per-packet source address verification if 6910 * IPV6_PKTINFO specified the source address. 6911 * ip6_src is set in the transport's _wput function. 6912 */ 6913 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6914 } 6915 6916 if (option_exists & IPPF_DONTFRAG) { 6917 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6918 } 6919 6920 if (option_exists & IPPF_USE_MIN_MTU) { 6921 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6922 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6923 } 6924 6925 if (option_exists & IPPF_NEXTHOP) { 6926 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6927 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6928 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6929 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6930 } 6931 6932 /* 6933 * tell IP this is an ip6i_t private header 6934 */ 6935 ip6i->ip6i_nxt = IPPROTO_RAW; 6936 } 6937 6938 /* Initialize IPv6 header */ 6939 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6940 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6941 6942 /* Set the hoplimit of the outgoing packet. */ 6943 if (option_exists & IPPF_HOPLIMIT) { 6944 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6945 ip6h->ip6_hops = ipp->ipp_hoplimit; 6946 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6947 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6948 ip6h->ip6_hops = udp->udp_multicast_ttl; 6949 if (option_exists & IPPF_MULTICAST_HOPS) 6950 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6951 } else { 6952 ip6h->ip6_hops = udp->udp_ttl; 6953 if (option_exists & IPPF_UNICAST_HOPS) 6954 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6955 } 6956 6957 if (option_exists & IPPF_ADDR) { 6958 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6959 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6960 ip6h->ip6_src = tipp->ipp_addr; 6961 } else { 6962 /* 6963 * The source address was not set using IPV6_PKTINFO. 6964 * First look at the bound source. 6965 * If unspecified fallback to __sin6_src_id. 6966 */ 6967 ip6h->ip6_src = udp->udp_v6src; 6968 if (sin6->__sin6_src_id != 0 && 6969 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6970 ip_srcid_find_id(sin6->__sin6_src_id, 6971 &ip6h->ip6_src, connp->conn_zoneid, 6972 us->us_netstack); 6973 } 6974 } 6975 6976 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6977 cp = (uint8_t *)&ip6h[1]; 6978 6979 /* 6980 * Here's where we have to start stringing together 6981 * any extension headers in the right order: 6982 * Hop-by-hop, destination, routing, and final destination opts. 6983 */ 6984 if (option_exists & IPPF_HOPOPTS) { 6985 /* Hop-by-hop options */ 6986 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6987 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6988 if (hopoptslen == 0) { 6989 hopoptsptr = tipp->ipp_hopopts; 6990 hopoptslen = tipp->ipp_hopoptslen; 6991 is_ancillary = B_TRUE; 6992 } 6993 6994 *nxthdr_ptr = IPPROTO_HOPOPTS; 6995 nxthdr_ptr = &hbh->ip6h_nxt; 6996 6997 bcopy(hopoptsptr, cp, hopoptslen); 6998 cp += hopoptslen; 6999 7000 if (hopoptsptr != NULL && !is_ancillary) { 7001 kmem_free(hopoptsptr, hopoptslen); 7002 hopoptsptr = NULL; 7003 hopoptslen = 0; 7004 } 7005 } 7006 /* 7007 * En-route destination options 7008 * Only do them if there's a routing header as well 7009 */ 7010 if (option_exists & IPPF_RTDSTOPTS) { 7011 ip6_dest_t *dst = (ip6_dest_t *)cp; 7012 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7013 7014 *nxthdr_ptr = IPPROTO_DSTOPTS; 7015 nxthdr_ptr = &dst->ip6d_nxt; 7016 7017 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7018 cp += tipp->ipp_rtdstoptslen; 7019 } 7020 /* 7021 * Routing header next 7022 */ 7023 if (option_exists & IPPF_RTHDR) { 7024 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7025 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7026 7027 *nxthdr_ptr = IPPROTO_ROUTING; 7028 nxthdr_ptr = &rt->ip6r_nxt; 7029 7030 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7031 cp += tipp->ipp_rthdrlen; 7032 } 7033 /* 7034 * Do ultimate destination options 7035 */ 7036 if (option_exists & IPPF_DSTOPTS) { 7037 ip6_dest_t *dest = (ip6_dest_t *)cp; 7038 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7039 7040 *nxthdr_ptr = IPPROTO_DSTOPTS; 7041 nxthdr_ptr = &dest->ip6d_nxt; 7042 7043 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7044 cp += tipp->ipp_dstoptslen; 7045 } 7046 /* 7047 * Now set the last header pointer to the proto passed in 7048 */ 7049 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7050 *nxthdr_ptr = IPPROTO_UDP; 7051 7052 /* Update UDP header */ 7053 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7054 udph->uha_dst_port = sin6->sin6_port; 7055 udph->uha_src_port = udp->udp_port; 7056 7057 /* 7058 * Copy in the destination address 7059 */ 7060 ip6h->ip6_dst = ip6_dst; 7061 7062 ip6h->ip6_vcf = 7063 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7064 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7065 7066 if (option_exists & IPPF_TCLASS) { 7067 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7068 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7069 tipp->ipp_tclass); 7070 } 7071 rw_exit(&udp->udp_rwlock); 7072 7073 if (option_exists & IPPF_RTHDR) { 7074 ip6_rthdr_t *rth; 7075 7076 /* 7077 * Perform any processing needed for source routing. 7078 * We know that all extension headers will be in the same mblk 7079 * as the IPv6 header. 7080 */ 7081 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7082 if (rth != NULL && rth->ip6r_segleft != 0) { 7083 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7084 /* 7085 * Drop packet - only support Type 0 routing. 7086 * Notify the application as well. 7087 */ 7088 *error = EPROTO; 7089 goto done; 7090 } 7091 7092 /* 7093 * rth->ip6r_len is twice the number of 7094 * addresses in the header. Thus it must be even. 7095 */ 7096 if (rth->ip6r_len & 0x1) { 7097 *error = EPROTO; 7098 goto done; 7099 } 7100 /* 7101 * Shuffle the routing header and ip6_dst 7102 * addresses, and get the checksum difference 7103 * between the first hop (in ip6_dst) and 7104 * the destination (in the last routing hdr entry). 7105 */ 7106 csum = ip_massage_options_v6(ip6h, rth, 7107 us->us_netstack); 7108 /* 7109 * Verify that the first hop isn't a mapped address. 7110 * Routers along the path need to do this verification 7111 * for subsequent hops. 7112 */ 7113 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7114 *error = EADDRNOTAVAIL; 7115 goto done; 7116 } 7117 7118 cp += (rth->ip6r_len + 1)*8; 7119 } 7120 } 7121 7122 /* count up length of UDP packet */ 7123 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7124 if ((mp2 = mp1->b_cont) != NULL) { 7125 do { 7126 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7127 ip_len += (uint32_t)MBLKL(mp2); 7128 } while ((mp2 = mp2->b_cont) != NULL); 7129 } 7130 7131 /* 7132 * If the size of the packet is greater than the maximum allowed by 7133 * ip, return an error. Passing this down could cause panics because 7134 * the size will have wrapped and be inconsistent with the msg size. 7135 */ 7136 if (ip_len > IP_MAXPACKET) { 7137 *error = EMSGSIZE; 7138 goto done; 7139 } 7140 7141 /* Store the UDP length. Subtract length of extension hdrs */ 7142 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7143 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7144 7145 /* 7146 * We make it easy for IP to include our pseudo header 7147 * by putting our length in uh_checksum, modified (if 7148 * we have a routing header) by the checksum difference 7149 * between the ultimate destination and first hop addresses. 7150 * Note: UDP over IPv6 must always checksum the packet. 7151 */ 7152 csum += udph->uha_length; 7153 csum = (csum & 0xFFFF) + (csum >> 16); 7154 udph->uha_checksum = (uint16_t)csum; 7155 7156 #ifdef _LITTLE_ENDIAN 7157 ip_len = htons(ip_len); 7158 #endif 7159 ip6h->ip6_plen = ip_len; 7160 if (DB_CRED(mp) != NULL) 7161 mblk_setcred(mp1, DB_CRED(mp)); 7162 7163 if (DB_TYPE(mp) != M_DATA) { 7164 ASSERT(mp != mp1); 7165 freeb(mp); 7166 } 7167 7168 /* mp has been consumed and we'll return success */ 7169 ASSERT(*error == 0); 7170 mp = NULL; 7171 7172 /* We're done. Pass the packet to IP */ 7173 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 7174 ip_output_v6(connp, mp1, q, IP_WPUT); 7175 7176 done: 7177 if (sth_wroff != 0) { 7178 (void) mi_set_sth_wroff(RD(q), 7179 udp->udp_max_hdr_len + us->us_wroff_extra); 7180 } 7181 if (hopoptsptr != NULL && !is_ancillary) { 7182 kmem_free(hopoptsptr, hopoptslen); 7183 hopoptsptr = NULL; 7184 } 7185 if (*error != 0) { 7186 ASSERT(mp != NULL); 7187 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 7188 } 7189 return (mp); 7190 } 7191 7192 static void 7193 udp_wput_other(queue_t *q, mblk_t *mp) 7194 { 7195 uchar_t *rptr = mp->b_rptr; 7196 struct datab *db; 7197 struct iocblk *iocp; 7198 cred_t *cr; 7199 conn_t *connp = Q_TO_CONN(q); 7200 udp_t *udp = connp->conn_udp; 7201 udp_stack_t *us; 7202 7203 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7204 "udp_wput_other_start: q %p", q); 7205 7206 us = udp->udp_us; 7207 db = mp->b_datap; 7208 7209 cr = DB_CREDDEF(mp, connp->conn_cred); 7210 7211 switch (db->db_type) { 7212 case M_PROTO: 7213 case M_PCPROTO: 7214 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7215 freemsg(mp); 7216 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7217 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7218 return; 7219 } 7220 switch (((t_primp_t)rptr)->type) { 7221 case T_ADDR_REQ: 7222 udp_addr_req(q, mp); 7223 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7224 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7225 return; 7226 case O_T_BIND_REQ: 7227 case T_BIND_REQ: 7228 udp_bind(q, mp); 7229 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7230 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7231 return; 7232 case T_CONN_REQ: 7233 udp_connect(q, mp); 7234 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7235 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7236 return; 7237 case T_CAPABILITY_REQ: 7238 udp_capability_req(q, mp); 7239 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7240 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7241 return; 7242 case T_INFO_REQ: 7243 udp_info_req(q, mp); 7244 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7245 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7246 return; 7247 case T_UNITDATA_REQ: 7248 /* 7249 * If a T_UNITDATA_REQ gets here, the address must 7250 * be bad. Valid T_UNITDATA_REQs are handled 7251 * in udp_wput. 7252 */ 7253 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7254 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7255 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7256 return; 7257 case T_UNBIND_REQ: 7258 udp_unbind(q, mp); 7259 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7260 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7261 return; 7262 case T_SVR4_OPTMGMT_REQ: 7263 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7264 cr)) { 7265 (void) svr4_optcom_req(q, 7266 mp, cr, &udp_opt_obj, B_TRUE); 7267 } 7268 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7269 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7270 return; 7271 7272 case T_OPTMGMT_REQ: 7273 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7274 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7275 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7276 return; 7277 7278 case T_DISCON_REQ: 7279 udp_disconnect(q, mp); 7280 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7281 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7282 return; 7283 7284 /* The following TPI message is not supported by udp. */ 7285 case O_T_CONN_RES: 7286 case T_CONN_RES: 7287 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7288 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7289 "udp_wput_other_end: q %p (%S)", q, 7290 "connres/disconreq"); 7291 return; 7292 7293 /* The following 3 TPI messages are illegal for udp. */ 7294 case T_DATA_REQ: 7295 case T_EXDATA_REQ: 7296 case T_ORDREL_REQ: 7297 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7298 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7299 "udp_wput_other_end: q %p (%S)", q, 7300 "data/exdata/ordrel"); 7301 return; 7302 default: 7303 break; 7304 } 7305 break; 7306 case M_FLUSH: 7307 if (*rptr & FLUSHW) 7308 flushq(q, FLUSHDATA); 7309 break; 7310 case M_IOCTL: 7311 iocp = (struct iocblk *)mp->b_rptr; 7312 switch (iocp->ioc_cmd) { 7313 case TI_GETPEERNAME: 7314 if (udp->udp_state != TS_DATA_XFER) { 7315 /* 7316 * If a default destination address has not 7317 * been associated with the stream, then we 7318 * don't know the peer's name. 7319 */ 7320 iocp->ioc_error = ENOTCONN; 7321 iocp->ioc_count = 0; 7322 mp->b_datap->db_type = M_IOCACK; 7323 qreply(q, mp); 7324 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7325 "udp_wput_other_end: q %p (%S)", q, 7326 "getpeername"); 7327 return; 7328 } 7329 /* FALLTHRU */ 7330 case TI_GETMYNAME: { 7331 /* 7332 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7333 * need to copyin the user's strbuf structure. 7334 * Processing will continue in the M_IOCDATA case 7335 * below. 7336 */ 7337 mi_copyin(q, mp, NULL, 7338 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7339 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7340 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7341 return; 7342 } 7343 case ND_SET: 7344 /* nd_getset performs the necessary checking */ 7345 case ND_GET: 7346 if (nd_getset(q, us->us_nd, mp)) { 7347 qreply(q, mp); 7348 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7349 "udp_wput_other_end: q %p (%S)", q, "get"); 7350 return; 7351 } 7352 break; 7353 case _SIOCSOCKFALLBACK: 7354 /* 7355 * Either sockmod is about to be popped and the 7356 * socket would now be treated as a plain stream, 7357 * or a module is about to be pushed so we could 7358 * no longer use read-side synchronous stream. 7359 * Drain any queued data and disable direct sockfs 7360 * interface from now on. 7361 */ 7362 if (!udp->udp_issocket) { 7363 DB_TYPE(mp) = M_IOCNAK; 7364 iocp->ioc_error = EINVAL; 7365 } else { 7366 udp->udp_issocket = B_FALSE; 7367 if (udp->udp_direct_sockfs) { 7368 /* 7369 * Disable read-side synchronous 7370 * stream interface and drain any 7371 * queued data. 7372 */ 7373 udp_rcv_drain(RD(q), udp, 7374 B_FALSE); 7375 ASSERT(!udp->udp_direct_sockfs); 7376 UDP_STAT(us, udp_sock_fallback); 7377 } 7378 DB_TYPE(mp) = M_IOCACK; 7379 iocp->ioc_error = 0; 7380 } 7381 iocp->ioc_count = 0; 7382 iocp->ioc_rval = 0; 7383 qreply(q, mp); 7384 return; 7385 default: 7386 break; 7387 } 7388 break; 7389 case M_IOCDATA: 7390 udp_wput_iocdata(q, mp); 7391 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7392 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7393 return; 7394 default: 7395 /* Unrecognized messages are passed through without change. */ 7396 break; 7397 } 7398 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7399 "udp_wput_other_end: q %p (%S)", q, "end"); 7400 ip_output(connp, mp, q, IP_WPUT); 7401 } 7402 7403 /* 7404 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7405 * messages. 7406 */ 7407 static void 7408 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7409 { 7410 mblk_t *mp1; 7411 STRUCT_HANDLE(strbuf, sb); 7412 uint16_t port; 7413 in6_addr_t v6addr; 7414 ipaddr_t v4addr; 7415 uint32_t flowinfo = 0; 7416 int addrlen; 7417 udp_t *udp = Q_TO_UDP(q); 7418 7419 /* Make sure it is one of ours. */ 7420 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7421 case TI_GETMYNAME: 7422 case TI_GETPEERNAME: 7423 break; 7424 default: 7425 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7426 return; 7427 } 7428 7429 switch (mi_copy_state(q, mp, &mp1)) { 7430 case -1: 7431 return; 7432 case MI_COPY_CASE(MI_COPY_IN, 1): 7433 break; 7434 case MI_COPY_CASE(MI_COPY_OUT, 1): 7435 /* 7436 * The address has been copied out, so now 7437 * copyout the strbuf. 7438 */ 7439 mi_copyout(q, mp); 7440 return; 7441 case MI_COPY_CASE(MI_COPY_OUT, 2): 7442 /* 7443 * The address and strbuf have been copied out. 7444 * We're done, so just acknowledge the original 7445 * M_IOCTL. 7446 */ 7447 mi_copy_done(q, mp, 0); 7448 return; 7449 default: 7450 /* 7451 * Something strange has happened, so acknowledge 7452 * the original M_IOCTL with an EPROTO error. 7453 */ 7454 mi_copy_done(q, mp, EPROTO); 7455 return; 7456 } 7457 7458 /* 7459 * Now we have the strbuf structure for TI_GETMYNAME 7460 * and TI_GETPEERNAME. Next we copyout the requested 7461 * address and then we'll copyout the strbuf. 7462 */ 7463 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7464 (void *)mp1->b_rptr); 7465 if (udp->udp_family == AF_INET) 7466 addrlen = sizeof (sin_t); 7467 else 7468 addrlen = sizeof (sin6_t); 7469 7470 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7471 mi_copy_done(q, mp, EINVAL); 7472 return; 7473 } 7474 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7475 case TI_GETMYNAME: 7476 if (udp->udp_family == AF_INET) { 7477 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7478 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7479 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7480 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7481 } else { 7482 /* 7483 * INADDR_ANY 7484 * udp_v6src is not set, we might be bound to 7485 * broadcast/multicast. Use udp_bound_v6src as 7486 * local address instead (that could 7487 * also still be INADDR_ANY) 7488 */ 7489 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7490 } 7491 } else { 7492 /* udp->udp_family == AF_INET6 */ 7493 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7494 v6addr = udp->udp_v6src; 7495 } else { 7496 /* 7497 * UNSPECIFIED 7498 * udp_v6src is not set, we might be bound to 7499 * broadcast/multicast. Use udp_bound_v6src as 7500 * local address instead (that could 7501 * also still be UNSPECIFIED) 7502 */ 7503 v6addr = udp->udp_bound_v6src; 7504 } 7505 } 7506 port = udp->udp_port; 7507 break; 7508 case TI_GETPEERNAME: 7509 if (udp->udp_state != TS_DATA_XFER) { 7510 mi_copy_done(q, mp, ENOTCONN); 7511 return; 7512 } 7513 if (udp->udp_family == AF_INET) { 7514 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7515 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7516 } else { 7517 /* udp->udp_family == AF_INET6) */ 7518 v6addr = udp->udp_v6dst; 7519 flowinfo = udp->udp_flowinfo; 7520 } 7521 port = udp->udp_dstport; 7522 break; 7523 default: 7524 mi_copy_done(q, mp, EPROTO); 7525 return; 7526 } 7527 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7528 if (!mp1) 7529 return; 7530 7531 if (udp->udp_family == AF_INET) { 7532 sin_t *sin; 7533 7534 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7535 sin = (sin_t *)mp1->b_rptr; 7536 mp1->b_wptr = (uchar_t *)&sin[1]; 7537 *sin = sin_null; 7538 sin->sin_family = AF_INET; 7539 sin->sin_addr.s_addr = v4addr; 7540 sin->sin_port = port; 7541 } else { 7542 /* udp->udp_family == AF_INET6 */ 7543 sin6_t *sin6; 7544 7545 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7546 sin6 = (sin6_t *)mp1->b_rptr; 7547 mp1->b_wptr = (uchar_t *)&sin6[1]; 7548 *sin6 = sin6_null; 7549 sin6->sin6_family = AF_INET6; 7550 sin6->sin6_flowinfo = flowinfo; 7551 sin6->sin6_addr = v6addr; 7552 sin6->sin6_port = port; 7553 } 7554 /* Copy out the address */ 7555 mi_copyout(q, mp); 7556 } 7557 7558 7559 static int 7560 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7561 udpattrs_t *udpattrs) 7562 { 7563 struct T_unitdata_req *udreqp; 7564 int is_absreq_failure; 7565 cred_t *cr; 7566 conn_t *connp = Q_TO_CONN(q); 7567 7568 ASSERT(((t_primp_t)mp->b_rptr)->type); 7569 7570 cr = DB_CREDDEF(mp, connp->conn_cred); 7571 7572 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7573 7574 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7575 udreqp->OPT_offset, cr, &udp_opt_obj, 7576 udpattrs, &is_absreq_failure); 7577 7578 if (*errorp != 0) { 7579 /* 7580 * Note: No special action needed in this 7581 * module for "is_absreq_failure" 7582 */ 7583 return (-1); /* failure */ 7584 } 7585 ASSERT(is_absreq_failure == 0); 7586 return (0); /* success */ 7587 } 7588 7589 void 7590 udp_ddi_init(void) 7591 { 7592 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7593 udp_opt_obj.odb_opt_arr_cnt); 7594 7595 /* 7596 * We want to be informed each time a stack is created or 7597 * destroyed in the kernel, so we can maintain the 7598 * set of udp_stack_t's. 7599 */ 7600 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7601 } 7602 7603 void 7604 udp_ddi_destroy(void) 7605 { 7606 netstack_unregister(NS_UDP); 7607 } 7608 7609 /* 7610 * Initialize the UDP stack instance. 7611 */ 7612 static void * 7613 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7614 { 7615 udp_stack_t *us; 7616 udpparam_t *pa; 7617 int i; 7618 7619 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7620 us->us_netstack = ns; 7621 7622 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7623 us->us_epriv_ports[0] = 2049; 7624 us->us_epriv_ports[1] = 4045; 7625 7626 /* 7627 * The smallest anonymous port in the priviledged port range which UDP 7628 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7629 */ 7630 us->us_min_anonpriv_port = 512; 7631 7632 us->us_bind_fanout_size = udp_bind_fanout_size; 7633 7634 /* Roundup variable that might have been modified in /etc/system */ 7635 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7636 /* Not a power of two. Round up to nearest power of two */ 7637 for (i = 0; i < 31; i++) { 7638 if (us->us_bind_fanout_size < (1 << i)) 7639 break; 7640 } 7641 us->us_bind_fanout_size = 1 << i; 7642 } 7643 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7644 sizeof (udp_fanout_t), KM_SLEEP); 7645 for (i = 0; i < us->us_bind_fanout_size; i++) { 7646 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7647 NULL); 7648 } 7649 7650 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7651 7652 us->us_param_arr = pa; 7653 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7654 7655 (void) udp_param_register(&us->us_nd, 7656 us->us_param_arr, A_CNT(udp_param_arr)); 7657 7658 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7659 us->us_mibkp = udp_kstat_init(stackid); 7660 return (us); 7661 } 7662 7663 /* 7664 * Free the UDP stack instance. 7665 */ 7666 static void 7667 udp_stack_fini(netstackid_t stackid, void *arg) 7668 { 7669 udp_stack_t *us = (udp_stack_t *)arg; 7670 int i; 7671 7672 for (i = 0; i < us->us_bind_fanout_size; i++) { 7673 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7674 } 7675 7676 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7677 sizeof (udp_fanout_t)); 7678 7679 us->us_bind_fanout = NULL; 7680 7681 nd_free(&us->us_nd); 7682 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7683 us->us_param_arr = NULL; 7684 7685 udp_kstat_fini(stackid, us->us_mibkp); 7686 us->us_mibkp = NULL; 7687 7688 udp_kstat2_fini(stackid, us->us_kstat); 7689 us->us_kstat = NULL; 7690 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7691 kmem_free(us, sizeof (*us)); 7692 } 7693 7694 static void * 7695 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7696 { 7697 kstat_t *ksp; 7698 7699 udp_stat_t template = { 7700 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7701 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7702 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7703 { "udp_drain", KSTAT_DATA_UINT64 }, 7704 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7705 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7706 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7707 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7708 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7709 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7710 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7711 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7712 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7713 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7714 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7715 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7716 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7717 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7718 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7719 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7720 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7721 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7722 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7723 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7724 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7725 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7726 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7727 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7728 #ifdef DEBUG 7729 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7730 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7731 #endif 7732 }; 7733 7734 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7735 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7736 KSTAT_FLAG_VIRTUAL, stackid); 7737 7738 if (ksp == NULL) 7739 return (NULL); 7740 7741 bcopy(&template, us_statisticsp, sizeof (template)); 7742 ksp->ks_data = (void *)us_statisticsp; 7743 ksp->ks_private = (void *)(uintptr_t)stackid; 7744 7745 kstat_install(ksp); 7746 return (ksp); 7747 } 7748 7749 static void 7750 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7751 { 7752 if (ksp != NULL) { 7753 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7754 kstat_delete_netstack(ksp, stackid); 7755 } 7756 } 7757 7758 static void * 7759 udp_kstat_init(netstackid_t stackid) 7760 { 7761 kstat_t *ksp; 7762 7763 udp_named_kstat_t template = { 7764 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7765 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7766 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7767 { "entrySize", KSTAT_DATA_INT32, 0 }, 7768 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7769 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7770 }; 7771 7772 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7773 KSTAT_TYPE_NAMED, 7774 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7775 7776 if (ksp == NULL || ksp->ks_data == NULL) 7777 return (NULL); 7778 7779 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7780 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7781 7782 bcopy(&template, ksp->ks_data, sizeof (template)); 7783 ksp->ks_update = udp_kstat_update; 7784 ksp->ks_private = (void *)(uintptr_t)stackid; 7785 7786 kstat_install(ksp); 7787 return (ksp); 7788 } 7789 7790 static void 7791 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7792 { 7793 if (ksp != NULL) { 7794 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7795 kstat_delete_netstack(ksp, stackid); 7796 } 7797 } 7798 7799 static int 7800 udp_kstat_update(kstat_t *kp, int rw) 7801 { 7802 udp_named_kstat_t *udpkp; 7803 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7804 netstack_t *ns; 7805 udp_stack_t *us; 7806 7807 if ((kp == NULL) || (kp->ks_data == NULL)) 7808 return (EIO); 7809 7810 if (rw == KSTAT_WRITE) 7811 return (EACCES); 7812 7813 ns = netstack_find_by_stackid(stackid); 7814 if (ns == NULL) 7815 return (-1); 7816 us = ns->netstack_udp; 7817 if (us == NULL) { 7818 netstack_rele(ns); 7819 return (-1); 7820 } 7821 udpkp = (udp_named_kstat_t *)kp->ks_data; 7822 7823 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7824 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7825 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7826 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7827 netstack_rele(ns); 7828 return (0); 7829 } 7830 7831 /* 7832 * Read-side synchronous stream info entry point, called as a 7833 * result of handling certain STREAMS ioctl operations. 7834 */ 7835 static int 7836 udp_rinfop(queue_t *q, infod_t *dp) 7837 { 7838 mblk_t *mp; 7839 uint_t cmd = dp->d_cmd; 7840 int res = 0; 7841 int error = 0; 7842 udp_t *udp = Q_TO_UDP(q); 7843 struct stdata *stp = STREAM(q); 7844 7845 mutex_enter(&udp->udp_drain_lock); 7846 /* If shutdown on read has happened, return nothing */ 7847 mutex_enter(&stp->sd_lock); 7848 if (stp->sd_flag & STREOF) { 7849 mutex_exit(&stp->sd_lock); 7850 goto done; 7851 } 7852 mutex_exit(&stp->sd_lock); 7853 7854 if ((mp = udp->udp_rcv_list_head) == NULL) 7855 goto done; 7856 7857 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7858 7859 if (cmd & INFOD_COUNT) { 7860 /* 7861 * Return the number of messages. 7862 */ 7863 dp->d_count += udp->udp_rcv_msgcnt; 7864 res |= INFOD_COUNT; 7865 } 7866 if (cmd & INFOD_BYTES) { 7867 /* 7868 * Return size of all data messages. 7869 */ 7870 dp->d_bytes += udp->udp_rcv_cnt; 7871 res |= INFOD_BYTES; 7872 } 7873 if (cmd & INFOD_FIRSTBYTES) { 7874 /* 7875 * Return size of first data message. 7876 */ 7877 dp->d_bytes = msgdsize(mp); 7878 res |= INFOD_FIRSTBYTES; 7879 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7880 } 7881 if (cmd & INFOD_COPYOUT) { 7882 mblk_t *mp1 = mp->b_cont; 7883 int n; 7884 /* 7885 * Return data contents of first message. 7886 */ 7887 ASSERT(DB_TYPE(mp1) == M_DATA); 7888 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7889 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7890 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7891 UIO_READ, dp->d_uiop)) != 0) { 7892 goto done; 7893 } 7894 mp1 = mp1->b_cont; 7895 } 7896 res |= INFOD_COPYOUT; 7897 dp->d_cmd &= ~INFOD_COPYOUT; 7898 } 7899 done: 7900 mutex_exit(&udp->udp_drain_lock); 7901 7902 dp->d_res |= res; 7903 7904 return (error); 7905 } 7906 7907 /* 7908 * Read-side synchronous stream entry point. This is called as a result 7909 * of recv/read operation done at sockfs, and is guaranteed to execute 7910 * outside of the interrupt thread context. It returns a single datagram 7911 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7912 */ 7913 static int 7914 udp_rrw(queue_t *q, struiod_t *dp) 7915 { 7916 mblk_t *mp; 7917 udp_t *udp = Q_TO_UDP(q); 7918 udp_stack_t *us = udp->udp_us; 7919 7920 /* 7921 * Dequeue datagram from the head of the list and return 7922 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7923 * set/cleared depending on whether or not there's data 7924 * remaining in the list. 7925 */ 7926 mutex_enter(&udp->udp_drain_lock); 7927 if (!udp->udp_direct_sockfs) { 7928 mutex_exit(&udp->udp_drain_lock); 7929 UDP_STAT(us, udp_rrw_busy); 7930 return (EBUSY); 7931 } 7932 if ((mp = udp->udp_rcv_list_head) != NULL) { 7933 uint_t size = msgdsize(mp); 7934 7935 /* Last datagram in the list? */ 7936 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7937 udp->udp_rcv_list_tail = NULL; 7938 mp->b_next = NULL; 7939 7940 udp->udp_rcv_cnt -= size; 7941 udp->udp_rcv_msgcnt--; 7942 UDP_STAT(us, udp_rrw_msgcnt); 7943 7944 /* No longer flow-controlling? */ 7945 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7946 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7947 udp->udp_drain_qfull = B_FALSE; 7948 } 7949 if (udp->udp_rcv_list_head == NULL) { 7950 /* 7951 * Either we just dequeued the last datagram or 7952 * we get here from sockfs and have nothing to 7953 * return; in this case clear RSLEEP. 7954 */ 7955 ASSERT(udp->udp_rcv_cnt == 0); 7956 ASSERT(udp->udp_rcv_msgcnt == 0); 7957 ASSERT(udp->udp_rcv_list_tail == NULL); 7958 STR_WAKEUP_CLEAR(STREAM(q)); 7959 } else { 7960 /* 7961 * More data follows; we need udp_rrw() to be 7962 * called in future to pick up the rest. 7963 */ 7964 STR_WAKEUP_SET(STREAM(q)); 7965 } 7966 mutex_exit(&udp->udp_drain_lock); 7967 dp->d_mp = mp; 7968 return (0); 7969 } 7970 7971 /* 7972 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7973 * list; this is typically executed within the interrupt thread context 7974 * and so we do things as quickly as possible. 7975 */ 7976 static void 7977 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7978 { 7979 ASSERT(q == RD(q)); 7980 ASSERT(pkt_len == msgdsize(mp)); 7981 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7982 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7983 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7984 7985 mutex_enter(&udp->udp_drain_lock); 7986 /* 7987 * Wake up and signal the receiving app; it is okay to do this 7988 * before enqueueing the mp because we are holding the drain lock. 7989 * One of the advantages of synchronous stream is the ability for 7990 * us to find out when the application performs a read on the 7991 * socket by way of udp_rrw() entry point being called. We need 7992 * to generate SIGPOLL/SIGIO for each received data in the case 7993 * of asynchronous socket just as in the strrput() case. However, 7994 * we only wake the application up when necessary, i.e. during the 7995 * first enqueue. When udp_rrw() is called, we send up a single 7996 * datagram upstream and call STR_WAKEUP_SET() again when there 7997 * are still data remaining in our receive queue. 7998 */ 7999 if (udp->udp_rcv_list_head == NULL) { 8000 STR_WAKEUP_SET(STREAM(q)); 8001 udp->udp_rcv_list_head = mp; 8002 } else { 8003 udp->udp_rcv_list_tail->b_next = mp; 8004 } 8005 udp->udp_rcv_list_tail = mp; 8006 udp->udp_rcv_cnt += pkt_len; 8007 udp->udp_rcv_msgcnt++; 8008 8009 /* Need to flow-control? */ 8010 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8011 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8012 udp->udp_drain_qfull = B_TRUE; 8013 8014 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8015 STR_SENDSIG(STREAM(q)); 8016 mutex_exit(&udp->udp_drain_lock); 8017 } 8018 8019 /* 8020 * Drain the contents of receive list to the module upstream; we do 8021 * this during close or when we fallback to the slow mode due to 8022 * sockmod being popped or a module being pushed on top of us. 8023 */ 8024 static void 8025 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8026 { 8027 mblk_t *mp; 8028 udp_stack_t *us = udp->udp_us; 8029 8030 ASSERT(q == RD(q)); 8031 8032 mutex_enter(&udp->udp_drain_lock); 8033 /* 8034 * There is no race with a concurrent udp_input() sending 8035 * up packets using putnext() after we have cleared the 8036 * udp_direct_sockfs flag but before we have completed 8037 * sending up the packets in udp_rcv_list, since we are 8038 * either a writer or we have quiesced the conn. 8039 */ 8040 udp->udp_direct_sockfs = B_FALSE; 8041 mutex_exit(&udp->udp_drain_lock); 8042 8043 if (udp->udp_rcv_list_head != NULL) 8044 UDP_STAT(us, udp_drain); 8045 8046 /* 8047 * Send up everything via putnext(); note here that we 8048 * don't need the udp_drain_lock to protect us since 8049 * nothing can enter udp_rrw() and that we currently 8050 * have exclusive access to this udp. 8051 */ 8052 while ((mp = udp->udp_rcv_list_head) != NULL) { 8053 udp->udp_rcv_list_head = mp->b_next; 8054 mp->b_next = NULL; 8055 udp->udp_rcv_cnt -= msgdsize(mp); 8056 udp->udp_rcv_msgcnt--; 8057 if (closing) { 8058 freemsg(mp); 8059 } else { 8060 putnext(q, mp); 8061 } 8062 } 8063 ASSERT(udp->udp_rcv_cnt == 0); 8064 ASSERT(udp->udp_rcv_msgcnt == 0); 8065 ASSERT(udp->udp_rcv_list_head == NULL); 8066 udp->udp_rcv_list_tail = NULL; 8067 udp->udp_drain_qfull = B_FALSE; 8068 } 8069 8070 static size_t 8071 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8072 { 8073 udp_stack_t *us = udp->udp_us; 8074 8075 /* We add a bit of extra buffering */ 8076 size += size >> 1; 8077 if (size > us->us_max_buf) 8078 size = us->us_max_buf; 8079 8080 udp->udp_rcv_hiwat = size; 8081 return (size); 8082 } 8083 8084 /* 8085 * For the lower queue so that UDP can be a dummy mux. 8086 * Nobody should be sending 8087 * packets up this stream 8088 */ 8089 static void 8090 udp_lrput(queue_t *q, mblk_t *mp) 8091 { 8092 mblk_t *mp1; 8093 8094 switch (mp->b_datap->db_type) { 8095 case M_FLUSH: 8096 /* Turn around */ 8097 if (*mp->b_rptr & FLUSHW) { 8098 *mp->b_rptr &= ~FLUSHR; 8099 qreply(q, mp); 8100 return; 8101 } 8102 break; 8103 } 8104 /* Could receive messages that passed through ar_rput */ 8105 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 8106 mp1->b_prev = mp1->b_next = NULL; 8107 freemsg(mp); 8108 } 8109 8110 /* 8111 * For the lower queue so that UDP can be a dummy mux. 8112 * Nobody should be sending packets down this stream. 8113 */ 8114 /* ARGSUSED */ 8115 void 8116 udp_lwput(queue_t *q, mblk_t *mp) 8117 { 8118 freemsg(mp); 8119 } 8120