1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/cmn_err.h> 44 #include <sys/kmem.h> 45 #include <sys/policy.h> 46 #include <sys/ucred.h> 47 #include <sys/zone.h> 48 49 #include <sys/socket.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <inet/ipnet.h> 82 83 /* 84 * The ipsec_info.h header file is here since it has the definition for the 85 * M_CTL message types used by IP to convey information to the ULP. The 86 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 87 */ 88 #include <net/pfkeyv2.h> 89 #include <inet/ipsec_info.h> 90 91 #include <sys/tsol/label.h> 92 #include <sys/tsol/tnet.h> 93 #include <rpc/pmap_prot.h> 94 95 /* 96 * Synchronization notes: 97 * 98 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 99 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 100 * We also use conn_lock when updating things that affect the IP classifier 101 * lookup. 102 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 103 * 104 * The fanout lock uf_lock: 105 * When a UDP endpoint is bound to a local port, it is inserted into 106 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 107 * The size of the array is controlled by the udp_bind_fanout_size variable. 108 * This variable can be changed in /etc/system if the default value is 109 * not large enough. Each bind hash bucket is protected by a per bucket 110 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 111 * structure and a few other fields in the udp_t. A UDP endpoint is removed 112 * from the bind hash list only when it is being unbound or being closed. 113 * The per bucket lock also protects a UDP endpoint's state changes. 114 * 115 * The udp_rwlock: 116 * This protects most of the other fields in the udp_t. The exact list of 117 * fields which are protected by each of the above locks is documented in 118 * the udp_t structure definition. 119 * 120 * Plumbing notes: 121 * UDP is always a device driver. For compatibility with mibopen() code 122 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 123 * dummy module. 124 * 125 * The above implies that we don't support any intermediate module to 126 * reside in between /dev/ip and udp -- in fact, we never supported such 127 * scenario in the past as the inter-layer communication semantics have 128 * always been private. 129 */ 130 131 /* For /etc/system control */ 132 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 133 134 #define NDD_TOO_QUICK_MSG \ 135 "ndd get info rate too high for non-privileged users, try again " \ 136 "later.\n" 137 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 138 139 /* Option processing attrs */ 140 typedef struct udpattrs_s { 141 union { 142 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 143 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 144 } udpattr_ippu; 145 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 146 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 147 mblk_t *udpattr_mb; 148 boolean_t udpattr_credset; 149 } udpattrs_t; 150 151 static void udp_addr_req(queue_t *q, mblk_t *mp); 152 static void udp_bind(queue_t *q, mblk_t *mp); 153 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 154 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 155 static void udp_bind_result(conn_t *, mblk_t *); 156 static void udp_bind_ack(conn_t *, mblk_t *mp); 157 static void udp_bind_error(conn_t *, mblk_t *mp); 158 static int udp_build_hdrs(udp_t *udp); 159 static void udp_capability_req(queue_t *q, mblk_t *mp); 160 static int udp_close(queue_t *q); 161 static void udp_connect(queue_t *q, mblk_t *mp); 162 static void udp_disconnect(queue_t *q, mblk_t *mp); 163 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 164 int sys_error); 165 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 166 t_scalar_t tlierr, int unixerr); 167 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 168 cred_t *cr); 169 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 170 char *value, caddr_t cp, cred_t *cr); 171 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 172 char *value, caddr_t cp, cred_t *cr); 173 static void udp_icmp_error(queue_t *q, mblk_t *mp); 174 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 175 static void udp_info_req(queue_t *q, mblk_t *mp); 176 static void udp_input(void *, mblk_t *, void *); 177 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 178 t_scalar_t addr_length); 179 static void udp_lrput(queue_t *, mblk_t *); 180 static void udp_lwput(queue_t *, mblk_t *); 181 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 182 cred_t *credp, boolean_t isv6); 183 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 184 cred_t *credp); 185 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 186 cred_t *credp); 187 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 188 int *errorp, udpattrs_t *udpattrs); 189 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 190 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 191 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 192 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 193 cred_t *cr); 194 static void udp_report_item(mblk_t *mp, udp_t *udp); 195 static int udp_rinfop(queue_t *q, infod_t *dp); 196 static int udp_rrw(queue_t *q, struiod_t *dp); 197 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 198 cred_t *cr); 199 static void udp_send_data(udp_t *, queue_t *, mblk_t *, ipha_t *); 200 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 201 t_scalar_t destlen, t_scalar_t err); 202 static void udp_unbind(queue_t *q, mblk_t *mp); 203 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 204 boolean_t random); 205 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 206 int *, boolean_t); 207 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 208 int *error); 209 static void udp_wput_other(queue_t *q, mblk_t *mp); 210 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 211 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 212 213 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 214 static void udp_stack_fini(netstackid_t stackid, void *arg); 215 216 static void *udp_kstat_init(netstackid_t stackid); 217 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 218 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 219 static void udp_kstat2_fini(netstackid_t, kstat_t *); 220 static int udp_kstat_update(kstat_t *kp, int rw); 221 222 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 223 uint_t pkt_len); 224 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 225 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 226 227 #define UDP_RECV_HIWATER (56 * 1024) 228 #define UDP_RECV_LOWATER 128 229 #define UDP_XMIT_HIWATER (56 * 1024) 230 #define UDP_XMIT_LOWATER 1024 231 232 static struct module_info udp_mod_info = { 233 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 234 }; 235 236 /* 237 * Entry points for UDP as a device. 238 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 239 */ 240 static struct qinit udp_rinitv4 = { 241 NULL, NULL, udp_openv4, udp_close, NULL, 242 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 243 }; 244 245 static struct qinit udp_rinitv6 = { 246 NULL, NULL, udp_openv6, udp_close, NULL, 247 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 248 }; 249 250 static struct qinit udp_winit = { 251 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 252 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 253 }; 254 255 /* 256 * UDP needs to handle I_LINK and I_PLINK since ifconfig 257 * likes to use it as a place to hang the various streams. 258 */ 259 static struct qinit udp_lrinit = { 260 (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, 261 &udp_mod_info 262 }; 263 264 static struct qinit udp_lwinit = { 265 (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, 266 &udp_mod_info 267 }; 268 269 /* For AF_INET aka /dev/udp */ 270 struct streamtab udpinfov4 = { 271 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 272 }; 273 274 /* For AF_INET6 aka /dev/udp6 */ 275 struct streamtab udpinfov6 = { 276 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 277 }; 278 279 static sin_t sin_null; /* Zero address for quick clears */ 280 static sin6_t sin6_null; /* Zero address for quick clears */ 281 282 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 283 284 /* Default structure copied into T_INFO_ACK messages */ 285 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 286 T_INFO_ACK, 287 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 288 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 289 T_INVALID, /* CDATA_size. udp does not support connect data. */ 290 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 291 sizeof (sin_t), /* ADDR_size. */ 292 0, /* OPT_size - not initialized here */ 293 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 294 T_CLTS, /* SERV_type. udp supports connection-less. */ 295 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 296 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 297 }; 298 299 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 300 301 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 302 T_INFO_ACK, 303 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 304 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 305 T_INVALID, /* CDATA_size. udp does not support connect data. */ 306 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 307 sizeof (sin6_t), /* ADDR_size. */ 308 0, /* OPT_size - not initialized here */ 309 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 310 T_CLTS, /* SERV_type. udp supports connection-less. */ 311 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 312 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 313 }; 314 315 /* largest UDP port number */ 316 #define UDP_MAX_PORT 65535 317 318 /* 319 * Table of ND variables supported by udp. These are loaded into us_nd 320 * in udp_open. 321 * All of these are alterable, within the min/max values given, at run time. 322 */ 323 /* BEGIN CSTYLED */ 324 udpparam_t udp_param_arr[] = { 325 /*min max value name */ 326 { 0L, 256, 32, "udp_wroff_extra" }, 327 { 1L, 255, 255, "udp_ipv4_ttl" }, 328 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 329 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 330 { 0, 1, 1, "udp_do_checksum" }, 331 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 332 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 333 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 334 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 335 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 336 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 337 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 338 }; 339 /* END CSTYLED */ 340 341 /* Setable in /etc/system */ 342 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 343 uint32_t udp_random_anon_port = 1; 344 345 /* 346 * Hook functions to enable cluster networking. 347 * On non-clustered systems these vectors must always be NULL 348 */ 349 350 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 351 uint8_t *laddrp, in_port_t lport) = NULL; 352 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 353 uint8_t *laddrp, in_port_t lport) = NULL; 354 355 typedef union T_primitives *t_primp_t; 356 357 /* 358 * Return the next anonymous port in the privileged port range for 359 * bind checking. 360 * 361 * Trusted Extension (TX) notes: TX allows administrator to mark or 362 * reserve ports as Multilevel ports (MLP). MLP has special function 363 * on TX systems. Once a port is made MLP, it's not available as 364 * ordinary port. This creates "holes" in the port name space. It 365 * may be necessary to skip the "holes" find a suitable anon port. 366 */ 367 static in_port_t 368 udp_get_next_priv_port(udp_t *udp) 369 { 370 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 371 in_port_t nextport; 372 boolean_t restart = B_FALSE; 373 udp_stack_t *us = udp->udp_us; 374 375 retry: 376 if (next_priv_port < us->us_min_anonpriv_port || 377 next_priv_port >= IPPORT_RESERVED) { 378 next_priv_port = IPPORT_RESERVED - 1; 379 if (restart) 380 return (0); 381 restart = B_TRUE; 382 } 383 384 if (is_system_labeled() && 385 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 386 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 387 next_priv_port = nextport; 388 goto retry; 389 } 390 391 return (next_priv_port--); 392 } 393 394 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 395 /* ARGSUSED */ 396 static int 397 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 398 { 399 udp_fanout_t *udpf; 400 int i; 401 zoneid_t zoneid; 402 conn_t *connp; 403 udp_t *udp; 404 udp_stack_t *us; 405 406 connp = Q_TO_CONN(q); 407 udp = connp->conn_udp; 408 us = udp->udp_us; 409 410 /* Refer to comments in udp_status_report(). */ 411 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 412 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 413 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 414 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 415 return (0); 416 } 417 } 418 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 419 /* The following may work even if we cannot get a large buf. */ 420 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 421 return (0); 422 } 423 424 (void) mi_mpprintf(mp, 425 "UDP " MI_COL_HDRPAD_STR 426 /* 12345678[89ABCDEF] */ 427 " zone lport src addr dest addr port state"); 428 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 429 430 zoneid = connp->conn_zoneid; 431 432 for (i = 0; i < us->us_bind_fanout_size; i++) { 433 udpf = &us->us_bind_fanout[i]; 434 mutex_enter(&udpf->uf_lock); 435 436 /* Print the hash index. */ 437 udp = udpf->uf_udp; 438 if (zoneid != GLOBAL_ZONEID) { 439 /* skip to first entry in this zone; might be none */ 440 while (udp != NULL && 441 udp->udp_connp->conn_zoneid != zoneid) 442 udp = udp->udp_bind_hash; 443 } 444 if (udp != NULL) { 445 uint_t print_len, buf_len; 446 447 buf_len = mp->b_cont->b_datap->db_lim - 448 mp->b_cont->b_wptr; 449 print_len = snprintf((char *)mp->b_cont->b_wptr, 450 buf_len, "%d\n", i); 451 if (print_len < buf_len) { 452 mp->b_cont->b_wptr += print_len; 453 } else { 454 mp->b_cont->b_wptr += buf_len; 455 } 456 for (; udp != NULL; udp = udp->udp_bind_hash) { 457 if (zoneid == GLOBAL_ZONEID || 458 zoneid == udp->udp_connp->conn_zoneid) 459 udp_report_item(mp->b_cont, udp); 460 } 461 } 462 mutex_exit(&udpf->uf_lock); 463 } 464 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 465 return (0); 466 } 467 468 /* 469 * Hash list removal routine for udp_t structures. 470 */ 471 static void 472 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 473 { 474 udp_t *udpnext; 475 kmutex_t *lockp; 476 udp_stack_t *us = udp->udp_us; 477 478 if (udp->udp_ptpbhn == NULL) 479 return; 480 481 /* 482 * Extract the lock pointer in case there are concurrent 483 * hash_remove's for this instance. 484 */ 485 ASSERT(udp->udp_port != 0); 486 if (!caller_holds_lock) { 487 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 488 us->us_bind_fanout_size)].uf_lock; 489 ASSERT(lockp != NULL); 490 mutex_enter(lockp); 491 } 492 if (udp->udp_ptpbhn != NULL) { 493 udpnext = udp->udp_bind_hash; 494 if (udpnext != NULL) { 495 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 496 udp->udp_bind_hash = NULL; 497 } 498 *udp->udp_ptpbhn = udpnext; 499 udp->udp_ptpbhn = NULL; 500 } 501 if (!caller_holds_lock) { 502 mutex_exit(lockp); 503 } 504 } 505 506 static void 507 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 508 { 509 udp_t **udpp; 510 udp_t *udpnext; 511 512 ASSERT(MUTEX_HELD(&uf->uf_lock)); 513 ASSERT(udp->udp_ptpbhn == NULL); 514 udpp = &uf->uf_udp; 515 udpnext = udpp[0]; 516 if (udpnext != NULL) { 517 /* 518 * If the new udp bound to the INADDR_ANY address 519 * and the first one in the list is not bound to 520 * INADDR_ANY we skip all entries until we find the 521 * first one bound to INADDR_ANY. 522 * This makes sure that applications binding to a 523 * specific address get preference over those binding to 524 * INADDR_ANY. 525 */ 526 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 527 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 528 while ((udpnext = udpp[0]) != NULL && 529 !V6_OR_V4_INADDR_ANY( 530 udpnext->udp_bound_v6src)) { 531 udpp = &(udpnext->udp_bind_hash); 532 } 533 if (udpnext != NULL) 534 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 535 } else { 536 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 537 } 538 } 539 udp->udp_bind_hash = udpnext; 540 udp->udp_ptpbhn = udpp; 541 udpp[0] = udp; 542 } 543 544 /* 545 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 546 * passed to udp_wput. 547 * It associates a port number and local address with the stream. 548 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 549 * protocol type (IPPROTO_UDP) placed in the message following the address. 550 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 551 * (Called as writer.) 552 * 553 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 554 * without setting SO_REUSEADDR. This is needed so that they 555 * can be viewed as two independent transport protocols. 556 * However, anonymouns ports are allocated from the same range to avoid 557 * duplicating the us->us_next_port_to_try. 558 */ 559 static void 560 udp_bind(queue_t *q, mblk_t *mp) 561 { 562 sin_t *sin; 563 sin6_t *sin6; 564 mblk_t *mp1; 565 in_port_t port; /* Host byte order */ 566 in_port_t requested_port; /* Host byte order */ 567 struct T_bind_req *tbr; 568 int count; 569 in6_addr_t v6src; 570 boolean_t bind_to_req_port_only; 571 int loopmax; 572 udp_fanout_t *udpf; 573 in_port_t lport; /* Network byte order */ 574 zoneid_t zoneid; 575 conn_t *connp; 576 udp_t *udp; 577 boolean_t is_inaddr_any; 578 mlp_type_t addrtype, mlptype; 579 udp_stack_t *us; 580 581 connp = Q_TO_CONN(q); 582 udp = connp->conn_udp; 583 us = udp->udp_us; 584 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 585 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 586 "udp_bind: bad req, len %u", 587 (uint_t)(mp->b_wptr - mp->b_rptr)); 588 udp_err_ack(q, mp, TPROTO, 0); 589 return; 590 } 591 if (udp->udp_state != TS_UNBND) { 592 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 593 "udp_bind: bad state, %u", udp->udp_state); 594 udp_err_ack(q, mp, TOUTSTATE, 0); 595 return; 596 } 597 /* 598 * Reallocate the message to make sure we have enough room for an 599 * address and the protocol type. 600 */ 601 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 602 if (!mp1) { 603 udp_err_ack(q, mp, TSYSERR, ENOMEM); 604 return; 605 } 606 607 mp = mp1; 608 tbr = (struct T_bind_req *)mp->b_rptr; 609 switch (tbr->ADDR_length) { 610 case 0: /* Request for a generic port */ 611 tbr->ADDR_offset = sizeof (struct T_bind_req); 612 if (udp->udp_family == AF_INET) { 613 tbr->ADDR_length = sizeof (sin_t); 614 sin = (sin_t *)&tbr[1]; 615 *sin = sin_null; 616 sin->sin_family = AF_INET; 617 mp->b_wptr = (uchar_t *)&sin[1]; 618 } else { 619 ASSERT(udp->udp_family == AF_INET6); 620 tbr->ADDR_length = sizeof (sin6_t); 621 sin6 = (sin6_t *)&tbr[1]; 622 *sin6 = sin6_null; 623 sin6->sin6_family = AF_INET6; 624 mp->b_wptr = (uchar_t *)&sin6[1]; 625 } 626 port = 0; 627 break; 628 629 case sizeof (sin_t): /* Complete IPv4 address */ 630 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 631 sizeof (sin_t)); 632 if (sin == NULL || !OK_32PTR((char *)sin)) { 633 udp_err_ack(q, mp, TSYSERR, EINVAL); 634 return; 635 } 636 if (udp->udp_family != AF_INET || 637 sin->sin_family != AF_INET) { 638 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 639 return; 640 } 641 port = ntohs(sin->sin_port); 642 break; 643 644 case sizeof (sin6_t): /* complete IPv6 address */ 645 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 646 sizeof (sin6_t)); 647 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 648 udp_err_ack(q, mp, TSYSERR, EINVAL); 649 return; 650 } 651 if (udp->udp_family != AF_INET6 || 652 sin6->sin6_family != AF_INET6) { 653 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 654 return; 655 } 656 port = ntohs(sin6->sin6_port); 657 break; 658 659 default: /* Invalid request */ 660 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 661 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 662 udp_err_ack(q, mp, TBADADDR, 0); 663 return; 664 } 665 666 requested_port = port; 667 668 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 669 bind_to_req_port_only = B_FALSE; 670 else /* T_BIND_REQ and requested_port != 0 */ 671 bind_to_req_port_only = B_TRUE; 672 673 if (requested_port == 0) { 674 /* 675 * If the application passed in zero for the port number, it 676 * doesn't care which port number we bind to. Get one in the 677 * valid range. 678 */ 679 if (udp->udp_anon_priv_bind) { 680 port = udp_get_next_priv_port(udp); 681 } else { 682 port = udp_update_next_port(udp, 683 us->us_next_port_to_try, B_TRUE); 684 } 685 } else { 686 /* 687 * If the port is in the well-known privileged range, 688 * make sure the caller was privileged. 689 */ 690 int i; 691 boolean_t priv = B_FALSE; 692 693 if (port < us->us_smallest_nonpriv_port) { 694 priv = B_TRUE; 695 } else { 696 for (i = 0; i < us->us_num_epriv_ports; i++) { 697 if (port == us->us_epriv_ports[i]) { 698 priv = B_TRUE; 699 break; 700 } 701 } 702 } 703 704 if (priv) { 705 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 706 707 if (secpolicy_net_privaddr(cr, port, 708 IPPROTO_UDP) != 0) { 709 udp_err_ack(q, mp, TACCES, 0); 710 return; 711 } 712 } 713 } 714 715 if (port == 0) { 716 udp_err_ack(q, mp, TNOADDR, 0); 717 return; 718 } 719 720 /* 721 * The state must be TS_UNBND. TPI mandates that users must send 722 * TPI primitives only 1 at a time and wait for the response before 723 * sending the next primitive. 724 */ 725 rw_enter(&udp->udp_rwlock, RW_WRITER); 726 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 727 rw_exit(&udp->udp_rwlock); 728 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 729 "udp_bind: bad state, %u", udp->udp_state); 730 udp_err_ack(q, mp, TOUTSTATE, 0); 731 return; 732 } 733 udp->udp_pending_op = tbr->PRIM_type; 734 /* 735 * Copy the source address into our udp structure. This address 736 * may still be zero; if so, IP will fill in the correct address 737 * each time an outbound packet is passed to it. Since the udp is 738 * not yet in the bind hash list, we don't grab the uf_lock to 739 * change udp_ipversion 740 */ 741 if (udp->udp_family == AF_INET) { 742 ASSERT(sin != NULL); 743 ASSERT(udp->udp_ipversion == IPV4_VERSION); 744 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 745 udp->udp_ip_snd_options_len; 746 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 747 } else { 748 ASSERT(sin6 != NULL); 749 v6src = sin6->sin6_addr; 750 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 751 /* 752 * no need to hold the uf_lock to set the udp_ipversion 753 * since we are not yet in the fanout list 754 */ 755 udp->udp_ipversion = IPV4_VERSION; 756 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 757 UDPH_SIZE + udp->udp_ip_snd_options_len; 758 } else { 759 udp->udp_ipversion = IPV6_VERSION; 760 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 761 } 762 } 763 764 /* 765 * If udp_reuseaddr is not set, then we have to make sure that 766 * the IP address and port number the application requested 767 * (or we selected for the application) is not being used by 768 * another stream. If another stream is already using the 769 * requested IP address and port, the behavior depends on 770 * "bind_to_req_port_only". If set the bind fails; otherwise we 771 * search for any an unused port to bind to the the stream. 772 * 773 * As per the BSD semantics, as modified by the Deering multicast 774 * changes, if udp_reuseaddr is set, then we allow multiple binds 775 * to the same port independent of the local IP address. 776 * 777 * This is slightly different than in SunOS 4.X which did not 778 * support IP multicast. Note that the change implemented by the 779 * Deering multicast code effects all binds - not only binding 780 * to IP multicast addresses. 781 * 782 * Note that when binding to port zero we ignore SO_REUSEADDR in 783 * order to guarantee a unique port. 784 */ 785 786 count = 0; 787 if (udp->udp_anon_priv_bind) { 788 /* 789 * loopmax = (IPPORT_RESERVED-1) - 790 * us->us_min_anonpriv_port + 1 791 */ 792 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 793 } else { 794 loopmax = us->us_largest_anon_port - 795 us->us_smallest_anon_port + 1; 796 } 797 798 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 799 zoneid = connp->conn_zoneid; 800 801 for (;;) { 802 udp_t *udp1; 803 boolean_t found_exclbind = B_FALSE; 804 805 /* 806 * Walk through the list of udp streams bound to 807 * requested port with the same IP address. 808 */ 809 lport = htons(port); 810 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 811 us->us_bind_fanout_size)]; 812 mutex_enter(&udpf->uf_lock); 813 for (udp1 = udpf->uf_udp; udp1 != NULL; 814 udp1 = udp1->udp_bind_hash) { 815 if (lport != udp1->udp_port) 816 continue; 817 818 /* 819 * On a labeled system, we must treat bindings to ports 820 * on shared IP addresses by sockets with MAC exemption 821 * privilege as being in all zones, as there's 822 * otherwise no way to identify the right receiver. 823 */ 824 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 825 IPCL_ZONE_MATCH(connp, 826 udp1->udp_connp->conn_zoneid)) && 827 !connp->conn_mac_exempt && \ 828 !udp1->udp_connp->conn_mac_exempt) 829 continue; 830 831 /* 832 * If UDP_EXCLBIND is set for either the bound or 833 * binding endpoint, the semantics of bind 834 * is changed according to the following chart. 835 * 836 * spec = specified address (v4 or v6) 837 * unspec = unspecified address (v4 or v6) 838 * A = specified addresses are different for endpoints 839 * 840 * bound bind to allowed? 841 * ------------------------------------- 842 * unspec unspec no 843 * unspec spec no 844 * spec unspec no 845 * spec spec yes if A 846 * 847 * For labeled systems, SO_MAC_EXEMPT behaves the same 848 * as UDP_EXCLBIND, except that zoneid is ignored. 849 */ 850 if (udp1->udp_exclbind || udp->udp_exclbind || 851 udp1->udp_connp->conn_mac_exempt || 852 connp->conn_mac_exempt) { 853 if (V6_OR_V4_INADDR_ANY( 854 udp1->udp_bound_v6src) || 855 is_inaddr_any || 856 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 857 &v6src)) { 858 found_exclbind = B_TRUE; 859 break; 860 } 861 continue; 862 } 863 864 /* 865 * Check ipversion to allow IPv4 and IPv6 sockets to 866 * have disjoint port number spaces. 867 */ 868 if (udp->udp_ipversion != udp1->udp_ipversion) { 869 870 /* 871 * On the first time through the loop, if the 872 * the user intentionally specified a 873 * particular port number, then ignore any 874 * bindings of the other protocol that may 875 * conflict. This allows the user to bind IPv6 876 * alone and get both v4 and v6, or bind both 877 * both and get each seperately. On subsequent 878 * times through the loop, we're checking a 879 * port that we chose (not the user) and thus 880 * we do not allow casual duplicate bindings. 881 */ 882 if (count == 0 && requested_port != 0) 883 continue; 884 } 885 886 /* 887 * No difference depending on SO_REUSEADDR. 888 * 889 * If existing port is bound to a 890 * non-wildcard IP address and 891 * the requesting stream is bound to 892 * a distinct different IP addresses 893 * (non-wildcard, also), keep going. 894 */ 895 if (!is_inaddr_any && 896 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 897 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 898 &v6src)) { 899 continue; 900 } 901 break; 902 } 903 904 if (!found_exclbind && 905 (udp->udp_reuseaddr && requested_port != 0)) { 906 break; 907 } 908 909 if (udp1 == NULL) { 910 /* 911 * No other stream has this IP address 912 * and port number. We can use it. 913 */ 914 break; 915 } 916 mutex_exit(&udpf->uf_lock); 917 if (bind_to_req_port_only) { 918 /* 919 * We get here only when requested port 920 * is bound (and only first of the for() 921 * loop iteration). 922 * 923 * The semantics of this bind request 924 * require it to fail so we return from 925 * the routine (and exit the loop). 926 * 927 */ 928 udp->udp_pending_op = -1; 929 rw_exit(&udp->udp_rwlock); 930 udp_err_ack(q, mp, TADDRBUSY, 0); 931 return; 932 } 933 934 if (udp->udp_anon_priv_bind) { 935 port = udp_get_next_priv_port(udp); 936 } else { 937 if ((count == 0) && (requested_port != 0)) { 938 /* 939 * If the application wants us to find 940 * a port, get one to start with. Set 941 * requested_port to 0, so that we will 942 * update us->us_next_port_to_try below. 943 */ 944 port = udp_update_next_port(udp, 945 us->us_next_port_to_try, B_TRUE); 946 requested_port = 0; 947 } else { 948 port = udp_update_next_port(udp, port + 1, 949 B_FALSE); 950 } 951 } 952 953 if (port == 0 || ++count >= loopmax) { 954 /* 955 * We've tried every possible port number and 956 * there are none available, so send an error 957 * to the user. 958 */ 959 udp->udp_pending_op = -1; 960 rw_exit(&udp->udp_rwlock); 961 udp_err_ack(q, mp, TNOADDR, 0); 962 return; 963 } 964 } 965 966 /* 967 * Copy the source address into our udp structure. This address 968 * may still be zero; if so, ip will fill in the correct address 969 * each time an outbound packet is passed to it. 970 * If we are binding to a broadcast or multicast address then 971 * udp_bind_ack will clear the source address when it receives 972 * the T_BIND_ACK. 973 */ 974 udp->udp_v6src = udp->udp_bound_v6src = v6src; 975 udp->udp_port = lport; 976 /* 977 * Now reset the the next anonymous port if the application requested 978 * an anonymous port, or we handed out the next anonymous port. 979 */ 980 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 981 us->us_next_port_to_try = port + 1; 982 } 983 984 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 985 if (udp->udp_family == AF_INET) { 986 sin->sin_port = udp->udp_port; 987 } else { 988 int error; 989 990 sin6->sin6_port = udp->udp_port; 991 /* Rebuild the header template */ 992 error = udp_build_hdrs(udp); 993 if (error != 0) { 994 udp->udp_pending_op = -1; 995 rw_exit(&udp->udp_rwlock); 996 mutex_exit(&udpf->uf_lock); 997 udp_err_ack(q, mp, TSYSERR, error); 998 return; 999 } 1000 } 1001 udp->udp_state = TS_IDLE; 1002 udp_bind_hash_insert(udpf, udp); 1003 mutex_exit(&udpf->uf_lock); 1004 rw_exit(&udp->udp_rwlock); 1005 1006 if (cl_inet_bind) { 1007 /* 1008 * Running in cluster mode - register bind information 1009 */ 1010 if (udp->udp_ipversion == IPV4_VERSION) { 1011 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1012 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1013 (in_port_t)udp->udp_port); 1014 } else { 1015 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1016 (uint8_t *)&(udp->udp_v6src), 1017 (in_port_t)udp->udp_port); 1018 } 1019 1020 } 1021 1022 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1023 if (is_system_labeled() && (!connp->conn_anon_port || 1024 connp->conn_anon_mlp)) { 1025 uint16_t mlpport; 1026 cred_t *cr = connp->conn_cred; 1027 zone_t *zone; 1028 1029 zone = crgetzone(cr); 1030 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1031 mlptSingle; 1032 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1033 &v6src, us->us_netstack->netstack_ip); 1034 if (addrtype == mlptSingle) { 1035 rw_enter(&udp->udp_rwlock, RW_WRITER); 1036 udp->udp_pending_op = -1; 1037 rw_exit(&udp->udp_rwlock); 1038 udp_err_ack(q, mp, TNOADDR, 0); 1039 connp->conn_anon_port = B_FALSE; 1040 connp->conn_mlp_type = mlptSingle; 1041 return; 1042 } 1043 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1044 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1045 addrtype); 1046 if (mlptype != mlptSingle && 1047 (connp->conn_mlp_type == mlptSingle || 1048 secpolicy_net_bindmlp(cr) != 0)) { 1049 if (udp->udp_debug) { 1050 (void) strlog(UDP_MOD_ID, 0, 1, 1051 SL_ERROR|SL_TRACE, 1052 "udp_bind: no priv for multilevel port %d", 1053 mlpport); 1054 } 1055 rw_enter(&udp->udp_rwlock, RW_WRITER); 1056 udp->udp_pending_op = -1; 1057 rw_exit(&udp->udp_rwlock); 1058 udp_err_ack(q, mp, TACCES, 0); 1059 connp->conn_anon_port = B_FALSE; 1060 connp->conn_mlp_type = mlptSingle; 1061 return; 1062 } 1063 1064 /* 1065 * If we're specifically binding a shared IP address and the 1066 * port is MLP on shared addresses, then check to see if this 1067 * zone actually owns the MLP. Reject if not. 1068 */ 1069 if (mlptype == mlptShared && addrtype == mlptShared) { 1070 /* 1071 * No need to handle exclusive-stack zones since 1072 * ALL_ZONES only applies to the shared stack. 1073 */ 1074 zoneid_t mlpzone; 1075 1076 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1077 htons(mlpport)); 1078 if (connp->conn_zoneid != mlpzone) { 1079 if (udp->udp_debug) { 1080 (void) strlog(UDP_MOD_ID, 0, 1, 1081 SL_ERROR|SL_TRACE, 1082 "udp_bind: attempt to bind port " 1083 "%d on shared addr in zone %d " 1084 "(should be %d)", 1085 mlpport, connp->conn_zoneid, 1086 mlpzone); 1087 } 1088 rw_enter(&udp->udp_rwlock, RW_WRITER); 1089 udp->udp_pending_op = -1; 1090 rw_exit(&udp->udp_rwlock); 1091 udp_err_ack(q, mp, TACCES, 0); 1092 connp->conn_anon_port = B_FALSE; 1093 connp->conn_mlp_type = mlptSingle; 1094 return; 1095 } 1096 } 1097 if (connp->conn_anon_port) { 1098 int error; 1099 1100 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1101 port, B_TRUE); 1102 if (error != 0) { 1103 if (udp->udp_debug) { 1104 (void) strlog(UDP_MOD_ID, 0, 1, 1105 SL_ERROR|SL_TRACE, 1106 "udp_bind: cannot establish anon " 1107 "MLP for port %d", port); 1108 } 1109 rw_enter(&udp->udp_rwlock, RW_WRITER); 1110 udp->udp_pending_op = -1; 1111 rw_exit(&udp->udp_rwlock); 1112 udp_err_ack(q, mp, TACCES, 0); 1113 connp->conn_anon_port = B_FALSE; 1114 connp->conn_mlp_type = mlptSingle; 1115 return; 1116 } 1117 } 1118 connp->conn_mlp_type = mlptype; 1119 } 1120 1121 /* Pass the protocol number in the message following the address. */ 1122 *mp->b_wptr++ = IPPROTO_UDP; 1123 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1124 /* 1125 * Append a request for an IRE if udp_v6src not 1126 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1127 */ 1128 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1129 if (!mp->b_cont) { 1130 rw_enter(&udp->udp_rwlock, RW_WRITER); 1131 udp->udp_pending_op = -1; 1132 rw_exit(&udp->udp_rwlock); 1133 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1134 return; 1135 } 1136 mp->b_cont->b_wptr += sizeof (ire_t); 1137 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1138 } 1139 if (udp->udp_family == AF_INET6) 1140 mp = ip_bind_v6(q, mp, connp, NULL); 1141 else 1142 mp = ip_bind_v4(q, mp, connp); 1143 1144 /* The above return NULL if the bind needs to be deferred */ 1145 if (mp != NULL) 1146 udp_bind_result(connp, mp); 1147 else 1148 CONN_INC_REF(connp); 1149 } 1150 1151 /* 1152 * This is called from ip_wput_nondata to handle the results of a 1153 * deferred UDP bind. It is called once the bind has been completed. 1154 */ 1155 void 1156 udp_resume_bind(conn_t *connp, mblk_t *mp) 1157 { 1158 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1159 1160 udp_bind_result(connp, mp); 1161 1162 CONN_OPER_PENDING_DONE(connp); 1163 } 1164 1165 /* 1166 * This routine handles each T_CONN_REQ message passed to udp. It 1167 * associates a default destination address with the stream. 1168 * 1169 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1170 * T_BIND_REQ - specifying local and remote address/port 1171 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1172 * T_OK_ACK - for the T_CONN_REQ 1173 * T_CONN_CON - to keep the TPI user happy 1174 * 1175 * The connect completes in udp_bind_result. 1176 * When a T_BIND_ACK is received information is extracted from the IRE 1177 * and the two appended messages are sent to the TPI user. 1178 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1179 * convert it to an error ack for the appropriate primitive. 1180 */ 1181 static void 1182 udp_connect(queue_t *q, mblk_t *mp) 1183 { 1184 sin6_t *sin6; 1185 sin_t *sin; 1186 struct T_conn_req *tcr; 1187 in6_addr_t v6dst; 1188 ipaddr_t v4dst; 1189 uint16_t dstport; 1190 uint32_t flowinfo; 1191 mblk_t *mp1, *mp2; 1192 udp_fanout_t *udpf; 1193 udp_t *udp, *udp1; 1194 ushort_t ipversion; 1195 udp_stack_t *us; 1196 conn_t *connp = Q_TO_CONN(q); 1197 1198 udp = connp->conn_udp; 1199 tcr = (struct T_conn_req *)mp->b_rptr; 1200 us = udp->udp_us; 1201 1202 /* A bit of sanity checking */ 1203 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1204 udp_err_ack(q, mp, TPROTO, 0); 1205 return; 1206 } 1207 1208 if (tcr->OPT_length != 0) { 1209 udp_err_ack(q, mp, TBADOPT, 0); 1210 return; 1211 } 1212 1213 /* 1214 * Determine packet type based on type of address passed in 1215 * the request should contain an IPv4 or IPv6 address. 1216 * Make sure that address family matches the type of 1217 * family of the the address passed down 1218 */ 1219 switch (tcr->DEST_length) { 1220 default: 1221 udp_err_ack(q, mp, TBADADDR, 0); 1222 return; 1223 1224 case sizeof (sin_t): 1225 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1226 sizeof (sin_t)); 1227 if (sin == NULL || !OK_32PTR((char *)sin)) { 1228 udp_err_ack(q, mp, TSYSERR, EINVAL); 1229 return; 1230 } 1231 if (udp->udp_family != AF_INET || 1232 sin->sin_family != AF_INET) { 1233 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1234 return; 1235 } 1236 v4dst = sin->sin_addr.s_addr; 1237 dstport = sin->sin_port; 1238 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1239 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1240 ipversion = IPV4_VERSION; 1241 break; 1242 1243 case sizeof (sin6_t): 1244 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1245 sizeof (sin6_t)); 1246 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1247 udp_err_ack(q, mp, TSYSERR, EINVAL); 1248 return; 1249 } 1250 if (udp->udp_family != AF_INET6 || 1251 sin6->sin6_family != AF_INET6) { 1252 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1253 return; 1254 } 1255 v6dst = sin6->sin6_addr; 1256 dstport = sin6->sin6_port; 1257 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1258 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1259 ipversion = IPV4_VERSION; 1260 flowinfo = 0; 1261 } else { 1262 ipversion = IPV6_VERSION; 1263 flowinfo = sin6->sin6_flowinfo; 1264 } 1265 break; 1266 } 1267 if (dstport == 0) { 1268 udp_err_ack(q, mp, TBADADDR, 0); 1269 return; 1270 } 1271 1272 rw_enter(&udp->udp_rwlock, RW_WRITER); 1273 1274 /* 1275 * This UDP must have bound to a port already before doing a connect. 1276 * TPI mandates that users must send TPI primitives only 1 at a time 1277 * and wait for the response before sending the next primitive. 1278 */ 1279 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 1280 rw_exit(&udp->udp_rwlock); 1281 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1282 "udp_connect: bad state, %u", udp->udp_state); 1283 udp_err_ack(q, mp, TOUTSTATE, 0); 1284 return; 1285 } 1286 udp->udp_pending_op = T_CONN_REQ; 1287 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1288 1289 if (ipversion == IPV4_VERSION) { 1290 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1291 udp->udp_ip_snd_options_len; 1292 } else { 1293 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1294 } 1295 1296 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1297 us->us_bind_fanout_size)]; 1298 1299 mutex_enter(&udpf->uf_lock); 1300 if (udp->udp_state == TS_DATA_XFER) { 1301 /* Already connected - clear out state */ 1302 udp->udp_v6src = udp->udp_bound_v6src; 1303 udp->udp_state = TS_IDLE; 1304 } 1305 1306 /* 1307 * Create a default IP header with no IP options. 1308 */ 1309 udp->udp_dstport = dstport; 1310 udp->udp_ipversion = ipversion; 1311 if (ipversion == IPV4_VERSION) { 1312 /* 1313 * Interpret a zero destination to mean loopback. 1314 * Update the T_CONN_REQ (sin/sin6) since it is used to 1315 * generate the T_CONN_CON. 1316 */ 1317 if (v4dst == INADDR_ANY) { 1318 v4dst = htonl(INADDR_LOOPBACK); 1319 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1320 if (udp->udp_family == AF_INET) { 1321 sin->sin_addr.s_addr = v4dst; 1322 } else { 1323 sin6->sin6_addr = v6dst; 1324 } 1325 } 1326 udp->udp_v6dst = v6dst; 1327 udp->udp_flowinfo = 0; 1328 1329 /* 1330 * If the destination address is multicast and 1331 * an outgoing multicast interface has been set, 1332 * use the address of that interface as our 1333 * source address if no source address has been set. 1334 */ 1335 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1336 CLASSD(v4dst) && 1337 udp->udp_multicast_if_addr != INADDR_ANY) { 1338 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1339 &udp->udp_v6src); 1340 } 1341 } else { 1342 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1343 /* 1344 * Interpret a zero destination to mean loopback. 1345 * Update the T_CONN_REQ (sin/sin6) since it is used to 1346 * generate the T_CONN_CON. 1347 */ 1348 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1349 v6dst = ipv6_loopback; 1350 sin6->sin6_addr = v6dst; 1351 } 1352 udp->udp_v6dst = v6dst; 1353 udp->udp_flowinfo = flowinfo; 1354 /* 1355 * If the destination address is multicast and 1356 * an outgoing multicast interface has been set, 1357 * then the ip bind logic will pick the correct source 1358 * address (i.e. matching the outgoing multicast interface). 1359 */ 1360 } 1361 1362 /* 1363 * Verify that the src/port/dst/port is unique for all 1364 * connections in TS_DATA_XFER 1365 */ 1366 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1367 if (udp1->udp_state != TS_DATA_XFER) 1368 continue; 1369 if (udp->udp_port != udp1->udp_port || 1370 udp->udp_ipversion != udp1->udp_ipversion || 1371 dstport != udp1->udp_dstport || 1372 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1373 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1374 !(IPCL_ZONE_MATCH(udp->udp_connp, 1375 udp1->udp_connp->conn_zoneid) || 1376 IPCL_ZONE_MATCH(udp1->udp_connp, 1377 udp->udp_connp->conn_zoneid))) 1378 continue; 1379 mutex_exit(&udpf->uf_lock); 1380 udp->udp_pending_op = -1; 1381 rw_exit(&udp->udp_rwlock); 1382 udp_err_ack(q, mp, TBADADDR, 0); 1383 return; 1384 } 1385 udp->udp_state = TS_DATA_XFER; 1386 mutex_exit(&udpf->uf_lock); 1387 1388 /* 1389 * Send down bind to IP to verify that there is a route 1390 * and to determine the source address. 1391 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1392 */ 1393 if (udp->udp_family == AF_INET) 1394 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1395 else 1396 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1397 if (mp1 == NULL) { 1398 bind_failed: 1399 mutex_enter(&udpf->uf_lock); 1400 udp->udp_state = TS_IDLE; 1401 udp->udp_pending_op = -1; 1402 mutex_exit(&udpf->uf_lock); 1403 rw_exit(&udp->udp_rwlock); 1404 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1405 return; 1406 } 1407 1408 rw_exit(&udp->udp_rwlock); 1409 /* 1410 * We also have to send a connection confirmation to 1411 * keep TLI happy. Prepare it for udp_bind_result. 1412 */ 1413 if (udp->udp_family == AF_INET) 1414 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1415 sizeof (*sin), NULL, 0); 1416 else 1417 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1418 sizeof (*sin6), NULL, 0); 1419 if (mp2 == NULL) { 1420 freemsg(mp1); 1421 rw_enter(&udp->udp_rwlock, RW_WRITER); 1422 goto bind_failed; 1423 } 1424 1425 mp = mi_tpi_ok_ack_alloc(mp); 1426 if (mp == NULL) { 1427 /* Unable to reuse the T_CONN_REQ for the ack. */ 1428 freemsg(mp2); 1429 rw_enter(&udp->udp_rwlock, RW_WRITER); 1430 mutex_enter(&udpf->uf_lock); 1431 udp->udp_state = TS_IDLE; 1432 udp->udp_pending_op = -1; 1433 mutex_exit(&udpf->uf_lock); 1434 rw_exit(&udp->udp_rwlock); 1435 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1436 return; 1437 } 1438 1439 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1440 linkb(mp1, mp); 1441 linkb(mp1, mp2); 1442 1443 mblk_setcred(mp1, connp->conn_cred); 1444 if (udp->udp_family == AF_INET) 1445 mp1 = ip_bind_v4(q, mp1, connp); 1446 else 1447 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1448 1449 /* The above return NULL if the bind needs to be deferred */ 1450 if (mp1 != NULL) 1451 udp_bind_result(connp, mp1); 1452 else 1453 CONN_INC_REF(connp); 1454 } 1455 1456 static int 1457 udp_close(queue_t *q) 1458 { 1459 conn_t *connp = (conn_t *)q->q_ptr; 1460 udp_t *udp; 1461 1462 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1463 udp = connp->conn_udp; 1464 1465 udp_quiesce_conn(connp); 1466 ip_quiesce_conn(connp); 1467 /* 1468 * Disable read-side synchronous stream 1469 * interface and drain any queued data. 1470 */ 1471 udp_rcv_drain(q, udp, B_TRUE); 1472 ASSERT(!udp->udp_direct_sockfs); 1473 1474 qprocsoff(q); 1475 1476 ASSERT(udp->udp_rcv_cnt == 0); 1477 ASSERT(udp->udp_rcv_msgcnt == 0); 1478 ASSERT(udp->udp_rcv_list_head == NULL); 1479 ASSERT(udp->udp_rcv_list_tail == NULL); 1480 1481 udp_close_free(connp); 1482 1483 /* 1484 * Now we are truly single threaded on this stream, and can 1485 * delete the things hanging off the connp, and finally the connp. 1486 * We removed this connp from the fanout list, it cannot be 1487 * accessed thru the fanouts, and we already waited for the 1488 * conn_ref to drop to 0. We are already in close, so 1489 * there cannot be any other thread from the top. qprocsoff 1490 * has completed, and service has completed or won't run in 1491 * future. 1492 */ 1493 ASSERT(connp->conn_ref == 1); 1494 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 1495 connp->conn_ref--; 1496 ipcl_conn_destroy(connp); 1497 1498 q->q_ptr = WR(q)->q_ptr = NULL; 1499 return (0); 1500 } 1501 1502 /* 1503 * Called in the close path to quiesce the conn 1504 */ 1505 void 1506 udp_quiesce_conn(conn_t *connp) 1507 { 1508 udp_t *udp = connp->conn_udp; 1509 1510 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1511 /* 1512 * Running in cluster mode - register unbind information 1513 */ 1514 if (udp->udp_ipversion == IPV4_VERSION) { 1515 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1516 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1517 (in_port_t)udp->udp_port); 1518 } else { 1519 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1520 (uint8_t *)(&(udp->udp_v6src)), 1521 (in_port_t)udp->udp_port); 1522 } 1523 } 1524 1525 udp_bind_hash_remove(udp, B_FALSE); 1526 1527 } 1528 1529 void 1530 udp_close_free(conn_t *connp) 1531 { 1532 udp_t *udp = connp->conn_udp; 1533 1534 /* If there are any options associated with the stream, free them. */ 1535 if (udp->udp_ip_snd_options != NULL) { 1536 mi_free((char *)udp->udp_ip_snd_options); 1537 udp->udp_ip_snd_options = NULL; 1538 udp->udp_ip_snd_options_len = 0; 1539 } 1540 1541 if (udp->udp_ip_rcv_options != NULL) { 1542 mi_free((char *)udp->udp_ip_rcv_options); 1543 udp->udp_ip_rcv_options = NULL; 1544 udp->udp_ip_rcv_options_len = 0; 1545 } 1546 1547 /* Free memory associated with sticky options */ 1548 if (udp->udp_sticky_hdrs_len != 0) { 1549 kmem_free(udp->udp_sticky_hdrs, 1550 udp->udp_sticky_hdrs_len); 1551 udp->udp_sticky_hdrs = NULL; 1552 udp->udp_sticky_hdrs_len = 0; 1553 } 1554 1555 ip6_pkt_free(&udp->udp_sticky_ipp); 1556 1557 /* 1558 * Clear any fields which the kmem_cache constructor clears. 1559 * Only udp_connp needs to be preserved. 1560 * TBD: We should make this more efficient to avoid clearing 1561 * everything. 1562 */ 1563 ASSERT(udp->udp_connp == connp); 1564 bzero(udp, sizeof (udp_t)); 1565 udp->udp_connp = connp; 1566 } 1567 1568 /* 1569 * This routine handles each T_DISCON_REQ message passed to udp 1570 * as an indicating that UDP is no longer connected. This results 1571 * in sending a T_BIND_REQ to IP to restore the binding to just 1572 * the local address/port. 1573 * 1574 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1575 * T_BIND_REQ - specifying just the local address/port 1576 * T_OK_ACK - for the T_DISCON_REQ 1577 * 1578 * The disconnect completes in udp_bind_result. 1579 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1580 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1581 * convert it to an error ack for the appropriate primitive. 1582 */ 1583 static void 1584 udp_disconnect(queue_t *q, mblk_t *mp) 1585 { 1586 udp_t *udp; 1587 mblk_t *mp1; 1588 udp_fanout_t *udpf; 1589 udp_stack_t *us; 1590 conn_t *connp = Q_TO_CONN(q); 1591 1592 udp = connp->conn_udp; 1593 us = udp->udp_us; 1594 rw_enter(&udp->udp_rwlock, RW_WRITER); 1595 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 1596 rw_exit(&udp->udp_rwlock); 1597 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1598 "udp_disconnect: bad state, %u", udp->udp_state); 1599 udp_err_ack(q, mp, TOUTSTATE, 0); 1600 return; 1601 } 1602 udp->udp_pending_op = T_DISCON_REQ; 1603 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1604 us->us_bind_fanout_size)]; 1605 mutex_enter(&udpf->uf_lock); 1606 udp->udp_v6src = udp->udp_bound_v6src; 1607 udp->udp_state = TS_IDLE; 1608 mutex_exit(&udpf->uf_lock); 1609 1610 /* 1611 * Send down bind to IP to remove the full binding and revert 1612 * to the local address binding. 1613 */ 1614 if (udp->udp_family == AF_INET) 1615 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1616 else 1617 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1618 if (mp1 == NULL) { 1619 udp->udp_pending_op = -1; 1620 rw_exit(&udp->udp_rwlock); 1621 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1622 return; 1623 } 1624 mp = mi_tpi_ok_ack_alloc(mp); 1625 if (mp == NULL) { 1626 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1627 udp->udp_pending_op = -1; 1628 rw_exit(&udp->udp_rwlock); 1629 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1630 return; 1631 } 1632 1633 if (udp->udp_family == AF_INET6) { 1634 int error; 1635 1636 /* Rebuild the header template */ 1637 error = udp_build_hdrs(udp); 1638 if (error != 0) { 1639 udp->udp_pending_op = -1; 1640 rw_exit(&udp->udp_rwlock); 1641 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1642 freemsg(mp1); 1643 return; 1644 } 1645 } 1646 1647 rw_exit(&udp->udp_rwlock); 1648 /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ 1649 linkb(mp1, mp); 1650 1651 if (udp->udp_family == AF_INET6) 1652 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1653 else 1654 mp1 = ip_bind_v4(q, mp1, connp); 1655 1656 /* The above return NULL if the bind needs to be deferred */ 1657 if (mp1 != NULL) 1658 udp_bind_result(connp, mp1); 1659 else 1660 CONN_INC_REF(connp); 1661 } 1662 1663 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1664 static void 1665 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1666 { 1667 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1668 qreply(q, mp); 1669 } 1670 1671 /* Shorthand to generate and send TPI error acks to our client */ 1672 static void 1673 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1674 int sys_error) 1675 { 1676 struct T_error_ack *teackp; 1677 1678 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1679 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1680 teackp = (struct T_error_ack *)mp->b_rptr; 1681 teackp->ERROR_prim = primitive; 1682 teackp->TLI_error = t_error; 1683 teackp->UNIX_error = sys_error; 1684 qreply(q, mp); 1685 } 1686 } 1687 1688 /*ARGSUSED*/ 1689 static int 1690 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1691 { 1692 int i; 1693 udp_t *udp = Q_TO_UDP(q); 1694 udp_stack_t *us = udp->udp_us; 1695 1696 for (i = 0; i < us->us_num_epriv_ports; i++) { 1697 if (us->us_epriv_ports[i] != 0) 1698 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1699 } 1700 return (0); 1701 } 1702 1703 /* ARGSUSED */ 1704 static int 1705 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1706 cred_t *cr) 1707 { 1708 long new_value; 1709 int i; 1710 udp_t *udp = Q_TO_UDP(q); 1711 udp_stack_t *us = udp->udp_us; 1712 1713 /* 1714 * Fail the request if the new value does not lie within the 1715 * port number limits. 1716 */ 1717 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1718 new_value <= 0 || new_value >= 65536) { 1719 return (EINVAL); 1720 } 1721 1722 /* Check if the value is already in the list */ 1723 for (i = 0; i < us->us_num_epriv_ports; i++) { 1724 if (new_value == us->us_epriv_ports[i]) { 1725 return (EEXIST); 1726 } 1727 } 1728 /* Find an empty slot */ 1729 for (i = 0; i < us->us_num_epriv_ports; i++) { 1730 if (us->us_epriv_ports[i] == 0) 1731 break; 1732 } 1733 if (i == us->us_num_epriv_ports) { 1734 return (EOVERFLOW); 1735 } 1736 1737 /* Set the new value */ 1738 us->us_epriv_ports[i] = (in_port_t)new_value; 1739 return (0); 1740 } 1741 1742 /* ARGSUSED */ 1743 static int 1744 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1745 cred_t *cr) 1746 { 1747 long new_value; 1748 int i; 1749 udp_t *udp = Q_TO_UDP(q); 1750 udp_stack_t *us = udp->udp_us; 1751 1752 /* 1753 * Fail the request if the new value does not lie within the 1754 * port number limits. 1755 */ 1756 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1757 new_value <= 0 || new_value >= 65536) { 1758 return (EINVAL); 1759 } 1760 1761 /* Check that the value is already in the list */ 1762 for (i = 0; i < us->us_num_epriv_ports; i++) { 1763 if (us->us_epriv_ports[i] == new_value) 1764 break; 1765 } 1766 if (i == us->us_num_epriv_ports) { 1767 return (ESRCH); 1768 } 1769 1770 /* Clear the value */ 1771 us->us_epriv_ports[i] = 0; 1772 return (0); 1773 } 1774 1775 /* At minimum we need 4 bytes of UDP header */ 1776 #define ICMP_MIN_UDP_HDR 4 1777 1778 /* 1779 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1780 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1781 * Assumes that IP has pulled up everything up to and including the ICMP header. 1782 */ 1783 static void 1784 udp_icmp_error(queue_t *q, mblk_t *mp) 1785 { 1786 icmph_t *icmph; 1787 ipha_t *ipha; 1788 int iph_hdr_length; 1789 udpha_t *udpha; 1790 sin_t sin; 1791 sin6_t sin6; 1792 mblk_t *mp1; 1793 int error = 0; 1794 udp_t *udp = Q_TO_UDP(q); 1795 1796 ipha = (ipha_t *)mp->b_rptr; 1797 1798 ASSERT(OK_32PTR(mp->b_rptr)); 1799 1800 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1801 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1802 udp_icmp_error_ipv6(q, mp); 1803 return; 1804 } 1805 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1806 1807 /* Skip past the outer IP and ICMP headers */ 1808 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1809 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1810 ipha = (ipha_t *)&icmph[1]; 1811 1812 /* Skip past the inner IP and find the ULP header */ 1813 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1814 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1815 1816 switch (icmph->icmph_type) { 1817 case ICMP_DEST_UNREACHABLE: 1818 switch (icmph->icmph_code) { 1819 case ICMP_FRAGMENTATION_NEEDED: 1820 /* 1821 * IP has already adjusted the path MTU. 1822 */ 1823 break; 1824 case ICMP_PORT_UNREACHABLE: 1825 case ICMP_PROTOCOL_UNREACHABLE: 1826 error = ECONNREFUSED; 1827 break; 1828 default: 1829 /* Transient errors */ 1830 break; 1831 } 1832 break; 1833 default: 1834 /* Transient errors */ 1835 break; 1836 } 1837 if (error == 0) { 1838 freemsg(mp); 1839 return; 1840 } 1841 1842 /* 1843 * Deliver T_UDERROR_IND when the application has asked for it. 1844 * The socket layer enables this automatically when connected. 1845 */ 1846 if (!udp->udp_dgram_errind) { 1847 freemsg(mp); 1848 return; 1849 } 1850 1851 switch (udp->udp_family) { 1852 case AF_INET: 1853 sin = sin_null; 1854 sin.sin_family = AF_INET; 1855 sin.sin_addr.s_addr = ipha->ipha_dst; 1856 sin.sin_port = udpha->uha_dst_port; 1857 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1858 error); 1859 break; 1860 case AF_INET6: 1861 sin6 = sin6_null; 1862 sin6.sin6_family = AF_INET6; 1863 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1864 sin6.sin6_port = udpha->uha_dst_port; 1865 1866 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1867 NULL, 0, error); 1868 break; 1869 } 1870 if (mp1) 1871 putnext(q, mp1); 1872 freemsg(mp); 1873 } 1874 1875 /* 1876 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1877 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1878 * Assumes that IP has pulled up all the extension headers as well as the 1879 * ICMPv6 header. 1880 */ 1881 static void 1882 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1883 { 1884 icmp6_t *icmp6; 1885 ip6_t *ip6h, *outer_ip6h; 1886 uint16_t iph_hdr_length; 1887 uint8_t *nexthdrp; 1888 udpha_t *udpha; 1889 sin6_t sin6; 1890 mblk_t *mp1; 1891 int error = 0; 1892 udp_t *udp = Q_TO_UDP(q); 1893 udp_stack_t *us = udp->udp_us; 1894 1895 outer_ip6h = (ip6_t *)mp->b_rptr; 1896 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1897 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1898 else 1899 iph_hdr_length = IPV6_HDR_LEN; 1900 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1901 ip6h = (ip6_t *)&icmp6[1]; 1902 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1903 freemsg(mp); 1904 return; 1905 } 1906 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1907 1908 switch (icmp6->icmp6_type) { 1909 case ICMP6_DST_UNREACH: 1910 switch (icmp6->icmp6_code) { 1911 case ICMP6_DST_UNREACH_NOPORT: 1912 error = ECONNREFUSED; 1913 break; 1914 case ICMP6_DST_UNREACH_ADMIN: 1915 case ICMP6_DST_UNREACH_NOROUTE: 1916 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1917 case ICMP6_DST_UNREACH_ADDR: 1918 /* Transient errors */ 1919 break; 1920 default: 1921 break; 1922 } 1923 break; 1924 case ICMP6_PACKET_TOO_BIG: { 1925 struct T_unitdata_ind *tudi; 1926 struct T_opthdr *toh; 1927 size_t udi_size; 1928 mblk_t *newmp; 1929 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1930 sizeof (struct ip6_mtuinfo); 1931 sin6_t *sin6; 1932 struct ip6_mtuinfo *mtuinfo; 1933 1934 /* 1935 * If the application has requested to receive path mtu 1936 * information, send up an empty message containing an 1937 * IPV6_PATHMTU ancillary data item. 1938 */ 1939 if (!udp->udp_ipv6_recvpathmtu) 1940 break; 1941 1942 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1943 opt_length; 1944 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1945 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1946 break; 1947 } 1948 1949 /* 1950 * newmp->b_cont is left to NULL on purpose. This is an 1951 * empty message containing only ancillary data. 1952 */ 1953 newmp->b_datap->db_type = M_PROTO; 1954 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1955 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1956 tudi->PRIM_type = T_UNITDATA_IND; 1957 tudi->SRC_length = sizeof (sin6_t); 1958 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1959 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1960 tudi->OPT_length = opt_length; 1961 1962 sin6 = (sin6_t *)&tudi[1]; 1963 bzero(sin6, sizeof (sin6_t)); 1964 sin6->sin6_family = AF_INET6; 1965 sin6->sin6_addr = udp->udp_v6dst; 1966 1967 toh = (struct T_opthdr *)&sin6[1]; 1968 toh->level = IPPROTO_IPV6; 1969 toh->name = IPV6_PATHMTU; 1970 toh->len = opt_length; 1971 toh->status = 0; 1972 1973 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1974 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1975 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1976 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1977 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1978 /* 1979 * We've consumed everything we need from the original 1980 * message. Free it, then send our empty message. 1981 */ 1982 freemsg(mp); 1983 putnext(q, newmp); 1984 return; 1985 } 1986 case ICMP6_TIME_EXCEEDED: 1987 /* Transient errors */ 1988 break; 1989 case ICMP6_PARAM_PROB: 1990 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1991 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1992 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1993 (uchar_t *)nexthdrp) { 1994 error = ECONNREFUSED; 1995 break; 1996 } 1997 break; 1998 } 1999 if (error == 0) { 2000 freemsg(mp); 2001 return; 2002 } 2003 2004 /* 2005 * Deliver T_UDERROR_IND when the application has asked for it. 2006 * The socket layer enables this automatically when connected. 2007 */ 2008 if (!udp->udp_dgram_errind) { 2009 freemsg(mp); 2010 return; 2011 } 2012 2013 sin6 = sin6_null; 2014 sin6.sin6_family = AF_INET6; 2015 sin6.sin6_addr = ip6h->ip6_dst; 2016 sin6.sin6_port = udpha->uha_dst_port; 2017 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2018 2019 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2020 error); 2021 if (mp1) 2022 putnext(q, mp1); 2023 freemsg(mp); 2024 } 2025 2026 /* 2027 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2028 * The local address is filled in if endpoint is bound. The remote address 2029 * is filled in if remote address has been precified ("connected endpoint") 2030 * (The concept of connected CLTS sockets is alien to published TPI 2031 * but we support it anyway). 2032 */ 2033 static void 2034 udp_addr_req(queue_t *q, mblk_t *mp) 2035 { 2036 sin_t *sin; 2037 sin6_t *sin6; 2038 mblk_t *ackmp; 2039 struct T_addr_ack *taa; 2040 udp_t *udp = Q_TO_UDP(q); 2041 2042 /* Make it large enough for worst case */ 2043 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2044 2 * sizeof (sin6_t), 1); 2045 if (ackmp == NULL) { 2046 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2047 return; 2048 } 2049 taa = (struct T_addr_ack *)ackmp->b_rptr; 2050 2051 bzero(taa, sizeof (struct T_addr_ack)); 2052 ackmp->b_wptr = (uchar_t *)&taa[1]; 2053 2054 taa->PRIM_type = T_ADDR_ACK; 2055 ackmp->b_datap->db_type = M_PCPROTO; 2056 rw_enter(&udp->udp_rwlock, RW_READER); 2057 /* 2058 * Note: Following code assumes 32 bit alignment of basic 2059 * data structures like sin_t and struct T_addr_ack. 2060 */ 2061 if (udp->udp_state != TS_UNBND) { 2062 /* 2063 * Fill in local address first 2064 */ 2065 taa->LOCADDR_offset = sizeof (*taa); 2066 if (udp->udp_family == AF_INET) { 2067 taa->LOCADDR_length = sizeof (sin_t); 2068 sin = (sin_t *)&taa[1]; 2069 /* Fill zeroes and then initialize non-zero fields */ 2070 *sin = sin_null; 2071 sin->sin_family = AF_INET; 2072 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2073 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2074 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2075 sin->sin_addr.s_addr); 2076 } else { 2077 /* 2078 * INADDR_ANY 2079 * udp_v6src is not set, we might be bound to 2080 * broadcast/multicast. Use udp_bound_v6src as 2081 * local address instead (that could 2082 * also still be INADDR_ANY) 2083 */ 2084 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2085 sin->sin_addr.s_addr); 2086 } 2087 sin->sin_port = udp->udp_port; 2088 ackmp->b_wptr = (uchar_t *)&sin[1]; 2089 if (udp->udp_state == TS_DATA_XFER) { 2090 /* 2091 * connected, fill remote address too 2092 */ 2093 taa->REMADDR_length = sizeof (sin_t); 2094 /* assumed 32-bit alignment */ 2095 taa->REMADDR_offset = taa->LOCADDR_offset + 2096 taa->LOCADDR_length; 2097 2098 sin = (sin_t *)(ackmp->b_rptr + 2099 taa->REMADDR_offset); 2100 /* initialize */ 2101 *sin = sin_null; 2102 sin->sin_family = AF_INET; 2103 sin->sin_addr.s_addr = 2104 V4_PART_OF_V6(udp->udp_v6dst); 2105 sin->sin_port = udp->udp_dstport; 2106 ackmp->b_wptr = (uchar_t *)&sin[1]; 2107 } 2108 } else { 2109 taa->LOCADDR_length = sizeof (sin6_t); 2110 sin6 = (sin6_t *)&taa[1]; 2111 /* Fill zeroes and then initialize non-zero fields */ 2112 *sin6 = sin6_null; 2113 sin6->sin6_family = AF_INET6; 2114 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2115 sin6->sin6_addr = udp->udp_v6src; 2116 } else { 2117 /* 2118 * UNSPECIFIED 2119 * udp_v6src is not set, we might be bound to 2120 * broadcast/multicast. Use udp_bound_v6src as 2121 * local address instead (that could 2122 * also still be UNSPECIFIED) 2123 */ 2124 sin6->sin6_addr = 2125 udp->udp_bound_v6src; 2126 } 2127 sin6->sin6_port = udp->udp_port; 2128 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2129 if (udp->udp_state == TS_DATA_XFER) { 2130 /* 2131 * connected, fill remote address too 2132 */ 2133 taa->REMADDR_length = sizeof (sin6_t); 2134 /* assumed 32-bit alignment */ 2135 taa->REMADDR_offset = taa->LOCADDR_offset + 2136 taa->LOCADDR_length; 2137 2138 sin6 = (sin6_t *)(ackmp->b_rptr + 2139 taa->REMADDR_offset); 2140 /* initialize */ 2141 *sin6 = sin6_null; 2142 sin6->sin6_family = AF_INET6; 2143 sin6->sin6_addr = udp->udp_v6dst; 2144 sin6->sin6_port = udp->udp_dstport; 2145 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2146 } 2147 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2148 } 2149 } 2150 rw_exit(&udp->udp_rwlock); 2151 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2152 qreply(q, ackmp); 2153 } 2154 2155 static void 2156 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2157 { 2158 if (udp->udp_family == AF_INET) { 2159 *tap = udp_g_t_info_ack_ipv4; 2160 } else { 2161 *tap = udp_g_t_info_ack_ipv6; 2162 } 2163 tap->CURRENT_state = udp->udp_state; 2164 tap->OPT_size = udp_max_optsize; 2165 } 2166 2167 /* 2168 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2169 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2170 * udp_g_t_info_ack. The current state of the stream is copied from 2171 * udp_state. 2172 */ 2173 static void 2174 udp_capability_req(queue_t *q, mblk_t *mp) 2175 { 2176 t_uscalar_t cap_bits1; 2177 struct T_capability_ack *tcap; 2178 udp_t *udp = Q_TO_UDP(q); 2179 2180 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2181 2182 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2183 mp->b_datap->db_type, T_CAPABILITY_ACK); 2184 if (!mp) 2185 return; 2186 2187 tcap = (struct T_capability_ack *)mp->b_rptr; 2188 tcap->CAP_bits1 = 0; 2189 2190 if (cap_bits1 & TC1_INFO) { 2191 udp_copy_info(&tcap->INFO_ack, udp); 2192 tcap->CAP_bits1 |= TC1_INFO; 2193 } 2194 2195 qreply(q, mp); 2196 } 2197 2198 /* 2199 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2200 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2201 * The current state of the stream is copied from udp_state. 2202 */ 2203 static void 2204 udp_info_req(queue_t *q, mblk_t *mp) 2205 { 2206 udp_t *udp = Q_TO_UDP(q); 2207 2208 /* Create a T_INFO_ACK message. */ 2209 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2210 T_INFO_ACK); 2211 if (!mp) 2212 return; 2213 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2214 qreply(q, mp); 2215 } 2216 2217 /* 2218 * IP recognizes seven kinds of bind requests: 2219 * 2220 * - A zero-length address binds only to the protocol number. 2221 * 2222 * - A 4-byte address is treated as a request to 2223 * validate that the address is a valid local IPv4 2224 * address, appropriate for an application to bind to. 2225 * IP does the verification, but does not make any note 2226 * of the address at this time. 2227 * 2228 * - A 16-byte address contains is treated as a request 2229 * to validate a local IPv6 address, as the 4-byte 2230 * address case above. 2231 * 2232 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2233 * use it for the inbound fanout of packets. 2234 * 2235 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2236 * use it for the inbound fanout of packets. 2237 * 2238 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2239 * information consisting of local and remote addresses 2240 * and ports. In this case, the addresses are both 2241 * validated as appropriate for this operation, and, if 2242 * so, the information is retained for use in the 2243 * inbound fanout. 2244 * 2245 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2246 * fanout information, like the 12-byte case above. 2247 * 2248 * IP will also fill in the IRE request mblk with information 2249 * regarding our peer. In all cases, we notify IP of our protocol 2250 * type by appending a single protocol byte to the bind request. 2251 */ 2252 static mblk_t * 2253 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2254 { 2255 char *cp; 2256 mblk_t *mp; 2257 struct T_bind_req *tbr; 2258 ipa_conn_t *ac; 2259 ipa6_conn_t *ac6; 2260 sin_t *sin; 2261 sin6_t *sin6; 2262 2263 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2264 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 2265 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2266 if (!mp) 2267 return (mp); 2268 mp->b_datap->db_type = M_PROTO; 2269 tbr = (struct T_bind_req *)mp->b_rptr; 2270 tbr->PRIM_type = bind_prim; 2271 tbr->ADDR_offset = sizeof (*tbr); 2272 tbr->CONIND_number = 0; 2273 tbr->ADDR_length = addr_length; 2274 cp = (char *)&tbr[1]; 2275 switch (addr_length) { 2276 case sizeof (ipa_conn_t): 2277 ASSERT(udp->udp_family == AF_INET); 2278 /* Append a request for an IRE */ 2279 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2280 if (!mp->b_cont) { 2281 freemsg(mp); 2282 return (NULL); 2283 } 2284 mp->b_cont->b_wptr += sizeof (ire_t); 2285 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2286 2287 /* cp known to be 32 bit aligned */ 2288 ac = (ipa_conn_t *)cp; 2289 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2290 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2291 ac->ac_fport = udp->udp_dstport; 2292 ac->ac_lport = udp->udp_port; 2293 break; 2294 2295 case sizeof (ipa6_conn_t): 2296 ASSERT(udp->udp_family == AF_INET6); 2297 /* Append a request for an IRE */ 2298 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2299 if (!mp->b_cont) { 2300 freemsg(mp); 2301 return (NULL); 2302 } 2303 mp->b_cont->b_wptr += sizeof (ire_t); 2304 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2305 2306 /* cp known to be 32 bit aligned */ 2307 ac6 = (ipa6_conn_t *)cp; 2308 ac6->ac6_laddr = udp->udp_v6src; 2309 ac6->ac6_faddr = udp->udp_v6dst; 2310 ac6->ac6_fport = udp->udp_dstport; 2311 ac6->ac6_lport = udp->udp_port; 2312 break; 2313 2314 case sizeof (sin_t): 2315 ASSERT(udp->udp_family == AF_INET); 2316 /* Append a request for an IRE */ 2317 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2318 if (!mp->b_cont) { 2319 freemsg(mp); 2320 return (NULL); 2321 } 2322 mp->b_cont->b_wptr += sizeof (ire_t); 2323 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2324 2325 sin = (sin_t *)cp; 2326 *sin = sin_null; 2327 sin->sin_family = AF_INET; 2328 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2329 sin->sin_port = udp->udp_port; 2330 break; 2331 2332 case sizeof (sin6_t): 2333 ASSERT(udp->udp_family == AF_INET6); 2334 /* Append a request for an IRE */ 2335 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2336 if (!mp->b_cont) { 2337 freemsg(mp); 2338 return (NULL); 2339 } 2340 mp->b_cont->b_wptr += sizeof (ire_t); 2341 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2342 2343 sin6 = (sin6_t *)cp; 2344 *sin6 = sin6_null; 2345 sin6->sin6_family = AF_INET6; 2346 sin6->sin6_addr = udp->udp_bound_v6src; 2347 sin6->sin6_port = udp->udp_port; 2348 break; 2349 } 2350 /* Add protocol number to end */ 2351 cp[addr_length] = (char)IPPROTO_UDP; 2352 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2353 return (mp); 2354 } 2355 2356 /* For /dev/udp aka AF_INET open */ 2357 static int 2358 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2359 { 2360 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 2361 } 2362 2363 /* For /dev/udp6 aka AF_INET6 open */ 2364 static int 2365 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2366 { 2367 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 2368 } 2369 2370 /* 2371 * This is the open routine for udp. It allocates a udp_t structure for 2372 * the stream and, on the first open of the module, creates an ND table. 2373 */ 2374 /*ARGSUSED2*/ 2375 static int 2376 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 2377 boolean_t isv6) 2378 { 2379 int err; 2380 udp_t *udp; 2381 conn_t *connp; 2382 dev_t conn_dev; 2383 zoneid_t zoneid; 2384 netstack_t *ns; 2385 udp_stack_t *us; 2386 vmem_t *minor_arena; 2387 2388 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2389 2390 /* If the stream is already open, return immediately. */ 2391 if (q->q_ptr != NULL) 2392 return (0); 2393 2394 if (sflag == MODOPEN) 2395 return (EINVAL); 2396 2397 ns = netstack_find_by_cred(credp); 2398 ASSERT(ns != NULL); 2399 us = ns->netstack_udp; 2400 ASSERT(us != NULL); 2401 2402 /* 2403 * For exclusive stacks we set the zoneid to zero 2404 * to make UDP operate as if in the global zone. 2405 */ 2406 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 2407 zoneid = GLOBAL_ZONEID; 2408 else 2409 zoneid = crgetzoneid(credp); 2410 2411 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 2412 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 2413 minor_arena = ip_minor_arena_la; 2414 } else { 2415 /* 2416 * Either minor numbers in the large arena were exhausted 2417 * or a non socket application is doing the open. 2418 * Try to allocate from the small arena. 2419 */ 2420 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 2421 netstack_rele(ns); 2422 return (EBUSY); 2423 } 2424 minor_arena = ip_minor_arena_sa; 2425 } 2426 2427 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 2428 2429 connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); 2430 connp->conn_dev = conn_dev; 2431 connp->conn_minor_arena = minor_arena; 2432 udp = connp->conn_udp; 2433 2434 /* 2435 * ipcl_conn_create did a netstack_hold. Undo the hold that was 2436 * done by netstack_find_by_cred() 2437 */ 2438 netstack_rele(ns); 2439 2440 /* 2441 * Initialize the udp_t structure for this stream. 2442 */ 2443 q->q_ptr = connp; 2444 WR(q)->q_ptr = connp; 2445 connp->conn_rq = q; 2446 connp->conn_wq = WR(q); 2447 2448 rw_enter(&udp->udp_rwlock, RW_WRITER); 2449 ASSERT(connp->conn_ulp == IPPROTO_UDP); 2450 ASSERT(connp->conn_udp == udp); 2451 ASSERT(udp->udp_connp == connp); 2452 2453 /* Set the initial state of the stream and the privilege status. */ 2454 udp->udp_state = TS_UNBND; 2455 if (isv6) { 2456 udp->udp_family = AF_INET6; 2457 udp->udp_ipversion = IPV6_VERSION; 2458 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2459 udp->udp_ttl = us->us_ipv6_hoplimit; 2460 connp->conn_af_isv6 = B_TRUE; 2461 connp->conn_flags |= IPCL_ISV6; 2462 } else { 2463 udp->udp_family = AF_INET; 2464 udp->udp_ipversion = IPV4_VERSION; 2465 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2466 udp->udp_ttl = us->us_ipv4_ttl; 2467 connp->conn_af_isv6 = B_FALSE; 2468 connp->conn_flags &= ~IPCL_ISV6; 2469 } 2470 2471 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2472 udp->udp_pending_op = -1; 2473 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2474 connp->conn_zoneid = zoneid; 2475 2476 udp->udp_open_time = lbolt64; 2477 udp->udp_open_pid = curproc->p_pid; 2478 2479 /* 2480 * If the caller has the process-wide flag set, then default to MAC 2481 * exempt mode. This allows read-down to unlabeled hosts. 2482 */ 2483 if (getpflags(NET_MAC_AWARE, credp) != 0) 2484 connp->conn_mac_exempt = B_TRUE; 2485 2486 if (flag & SO_SOCKSTR) { 2487 connp->conn_flags |= IPCL_SOCKET; 2488 udp->udp_issocket = B_TRUE; 2489 udp->udp_direct_sockfs = B_TRUE; 2490 } 2491 2492 connp->conn_ulp_labeled = is_system_labeled(); 2493 2494 udp->udp_us = us; 2495 2496 q->q_hiwat = us->us_recv_hiwat; 2497 WR(q)->q_hiwat = us->us_xmit_hiwat; 2498 WR(q)->q_lowat = us->us_xmit_lowat; 2499 2500 connp->conn_recv = udp_input; 2501 crhold(credp); 2502 connp->conn_cred = credp; 2503 2504 mutex_enter(&connp->conn_lock); 2505 connp->conn_state_flags &= ~CONN_INCIPIENT; 2506 mutex_exit(&connp->conn_lock); 2507 2508 qprocson(q); 2509 2510 if (udp->udp_family == AF_INET6) { 2511 /* Build initial header template for transmit */ 2512 if ((err = udp_build_hdrs(udp)) != 0) { 2513 rw_exit(&udp->udp_rwlock); 2514 qprocsoff(q); 2515 ipcl_conn_destroy(connp); 2516 return (err); 2517 } 2518 } 2519 rw_exit(&udp->udp_rwlock); 2520 2521 /* Set the Stream head write offset and high watermark. */ 2522 (void) mi_set_sth_wroff(q, 2523 udp->udp_max_hdr_len + us->us_wroff_extra); 2524 (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); 2525 2526 return (0); 2527 } 2528 2529 /* 2530 * Which UDP options OK to set through T_UNITDATA_REQ... 2531 */ 2532 /* ARGSUSED */ 2533 static boolean_t 2534 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2535 { 2536 return (B_TRUE); 2537 } 2538 2539 /* 2540 * This routine gets default values of certain options whose default 2541 * values are maintained by protcol specific code 2542 */ 2543 /* ARGSUSED */ 2544 int 2545 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2546 { 2547 udp_t *udp = Q_TO_UDP(q); 2548 udp_stack_t *us = udp->udp_us; 2549 int *i1 = (int *)ptr; 2550 2551 switch (level) { 2552 case IPPROTO_IP: 2553 switch (name) { 2554 case IP_MULTICAST_TTL: 2555 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2556 return (sizeof (uchar_t)); 2557 case IP_MULTICAST_LOOP: 2558 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2559 return (sizeof (uchar_t)); 2560 } 2561 break; 2562 case IPPROTO_IPV6: 2563 switch (name) { 2564 case IPV6_MULTICAST_HOPS: 2565 *i1 = IP_DEFAULT_MULTICAST_TTL; 2566 return (sizeof (int)); 2567 case IPV6_MULTICAST_LOOP: 2568 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2569 return (sizeof (int)); 2570 case IPV6_UNICAST_HOPS: 2571 *i1 = us->us_ipv6_hoplimit; 2572 return (sizeof (int)); 2573 } 2574 break; 2575 } 2576 return (-1); 2577 } 2578 2579 /* 2580 * This routine retrieves the current status of socket options. 2581 * It returns the size of the option retrieved. 2582 */ 2583 int 2584 udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2585 { 2586 int *i1 = (int *)ptr; 2587 conn_t *connp; 2588 udp_t *udp; 2589 ip6_pkt_t *ipp; 2590 int len; 2591 udp_stack_t *us; 2592 2593 connp = Q_TO_CONN(q); 2594 udp = connp->conn_udp; 2595 ipp = &udp->udp_sticky_ipp; 2596 us = udp->udp_us; 2597 2598 switch (level) { 2599 case SOL_SOCKET: 2600 switch (name) { 2601 case SO_DEBUG: 2602 *i1 = udp->udp_debug; 2603 break; /* goto sizeof (int) option return */ 2604 case SO_REUSEADDR: 2605 *i1 = udp->udp_reuseaddr; 2606 break; /* goto sizeof (int) option return */ 2607 case SO_TYPE: 2608 *i1 = SOCK_DGRAM; 2609 break; /* goto sizeof (int) option return */ 2610 2611 /* 2612 * The following three items are available here, 2613 * but are only meaningful to IP. 2614 */ 2615 case SO_DONTROUTE: 2616 *i1 = udp->udp_dontroute; 2617 break; /* goto sizeof (int) option return */ 2618 case SO_USELOOPBACK: 2619 *i1 = udp->udp_useloopback; 2620 break; /* goto sizeof (int) option return */ 2621 case SO_BROADCAST: 2622 *i1 = udp->udp_broadcast; 2623 break; /* goto sizeof (int) option return */ 2624 2625 case SO_SNDBUF: 2626 *i1 = q->q_hiwat; 2627 break; /* goto sizeof (int) option return */ 2628 case SO_RCVBUF: 2629 *i1 = RD(q)->q_hiwat; 2630 break; /* goto sizeof (int) option return */ 2631 case SO_DGRAM_ERRIND: 2632 *i1 = udp->udp_dgram_errind; 2633 break; /* goto sizeof (int) option return */ 2634 case SO_RECVUCRED: 2635 *i1 = udp->udp_recvucred; 2636 break; /* goto sizeof (int) option return */ 2637 case SO_TIMESTAMP: 2638 *i1 = udp->udp_timestamp; 2639 break; /* goto sizeof (int) option return */ 2640 case SO_ANON_MLP: 2641 *i1 = connp->conn_anon_mlp; 2642 break; /* goto sizeof (int) option return */ 2643 case SO_MAC_EXEMPT: 2644 *i1 = connp->conn_mac_exempt; 2645 break; /* goto sizeof (int) option return */ 2646 case SO_ALLZONES: 2647 *i1 = connp->conn_allzones; 2648 break; /* goto sizeof (int) option return */ 2649 case SO_EXCLBIND: 2650 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2651 break; 2652 case SO_PROTOTYPE: 2653 *i1 = IPPROTO_UDP; 2654 break; 2655 case SO_DOMAIN: 2656 *i1 = udp->udp_family; 2657 break; 2658 default: 2659 return (-1); 2660 } 2661 break; 2662 case IPPROTO_IP: 2663 if (udp->udp_family != AF_INET) 2664 return (-1); 2665 switch (name) { 2666 case IP_OPTIONS: 2667 case T_IP_OPTIONS: 2668 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2669 if (len > 0) { 2670 bcopy(udp->udp_ip_rcv_options + 2671 udp->udp_label_len, ptr, len); 2672 } 2673 return (len); 2674 case IP_TOS: 2675 case T_IP_TOS: 2676 *i1 = (int)udp->udp_type_of_service; 2677 break; /* goto sizeof (int) option return */ 2678 case IP_TTL: 2679 *i1 = (int)udp->udp_ttl; 2680 break; /* goto sizeof (int) option return */ 2681 case IP_DHCPINIT_IF: 2682 return (-EINVAL); 2683 case IP_NEXTHOP: 2684 case IP_RECVPKTINFO: 2685 /* 2686 * This also handles IP_PKTINFO. 2687 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2688 * Differentiation is based on the size of the argument 2689 * passed in. 2690 * This option is handled in IP which will return an 2691 * error for IP_PKTINFO as it's not supported as a 2692 * sticky option. 2693 */ 2694 return (-EINVAL); 2695 case IP_MULTICAST_IF: 2696 /* 0 address if not set */ 2697 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2698 return (sizeof (ipaddr_t)); 2699 case IP_MULTICAST_TTL: 2700 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2701 return (sizeof (uchar_t)); 2702 case IP_MULTICAST_LOOP: 2703 *ptr = connp->conn_multicast_loop; 2704 return (sizeof (uint8_t)); 2705 case IP_RECVOPTS: 2706 *i1 = udp->udp_recvopts; 2707 break; /* goto sizeof (int) option return */ 2708 case IP_RECVDSTADDR: 2709 *i1 = udp->udp_recvdstaddr; 2710 break; /* goto sizeof (int) option return */ 2711 case IP_RECVIF: 2712 *i1 = udp->udp_recvif; 2713 break; /* goto sizeof (int) option return */ 2714 case IP_RECVSLLA: 2715 *i1 = udp->udp_recvslla; 2716 break; /* goto sizeof (int) option return */ 2717 case IP_RECVTTL: 2718 *i1 = udp->udp_recvttl; 2719 break; /* goto sizeof (int) option return */ 2720 case IP_ADD_MEMBERSHIP: 2721 case IP_DROP_MEMBERSHIP: 2722 case IP_BLOCK_SOURCE: 2723 case IP_UNBLOCK_SOURCE: 2724 case IP_ADD_SOURCE_MEMBERSHIP: 2725 case IP_DROP_SOURCE_MEMBERSHIP: 2726 case MCAST_JOIN_GROUP: 2727 case MCAST_LEAVE_GROUP: 2728 case MCAST_BLOCK_SOURCE: 2729 case MCAST_UNBLOCK_SOURCE: 2730 case MCAST_JOIN_SOURCE_GROUP: 2731 case MCAST_LEAVE_SOURCE_GROUP: 2732 case IP_DONTFAILOVER_IF: 2733 /* cannot "get" the value for these */ 2734 return (-1); 2735 case IP_BOUND_IF: 2736 /* Zero if not set */ 2737 *i1 = udp->udp_bound_if; 2738 break; /* goto sizeof (int) option return */ 2739 case IP_UNSPEC_SRC: 2740 *i1 = udp->udp_unspec_source; 2741 break; /* goto sizeof (int) option return */ 2742 case IP_BROADCAST_TTL: 2743 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2744 return (sizeof (uchar_t)); 2745 default: 2746 return (-1); 2747 } 2748 break; 2749 case IPPROTO_IPV6: 2750 if (udp->udp_family != AF_INET6) 2751 return (-1); 2752 switch (name) { 2753 case IPV6_UNICAST_HOPS: 2754 *i1 = (unsigned int)udp->udp_ttl; 2755 break; /* goto sizeof (int) option return */ 2756 case IPV6_MULTICAST_IF: 2757 /* 0 index if not set */ 2758 *i1 = udp->udp_multicast_if_index; 2759 break; /* goto sizeof (int) option return */ 2760 case IPV6_MULTICAST_HOPS: 2761 *i1 = udp->udp_multicast_ttl; 2762 break; /* goto sizeof (int) option return */ 2763 case IPV6_MULTICAST_LOOP: 2764 *i1 = connp->conn_multicast_loop; 2765 break; /* goto sizeof (int) option return */ 2766 case IPV6_JOIN_GROUP: 2767 case IPV6_LEAVE_GROUP: 2768 case MCAST_JOIN_GROUP: 2769 case MCAST_LEAVE_GROUP: 2770 case MCAST_BLOCK_SOURCE: 2771 case MCAST_UNBLOCK_SOURCE: 2772 case MCAST_JOIN_SOURCE_GROUP: 2773 case MCAST_LEAVE_SOURCE_GROUP: 2774 /* cannot "get" the value for these */ 2775 return (-1); 2776 case IPV6_BOUND_IF: 2777 /* Zero if not set */ 2778 *i1 = udp->udp_bound_if; 2779 break; /* goto sizeof (int) option return */ 2780 case IPV6_UNSPEC_SRC: 2781 *i1 = udp->udp_unspec_source; 2782 break; /* goto sizeof (int) option return */ 2783 case IPV6_RECVPKTINFO: 2784 *i1 = udp->udp_ip_recvpktinfo; 2785 break; /* goto sizeof (int) option return */ 2786 case IPV6_RECVTCLASS: 2787 *i1 = udp->udp_ipv6_recvtclass; 2788 break; /* goto sizeof (int) option return */ 2789 case IPV6_RECVPATHMTU: 2790 *i1 = udp->udp_ipv6_recvpathmtu; 2791 break; /* goto sizeof (int) option return */ 2792 case IPV6_RECVHOPLIMIT: 2793 *i1 = udp->udp_ipv6_recvhoplimit; 2794 break; /* goto sizeof (int) option return */ 2795 case IPV6_RECVHOPOPTS: 2796 *i1 = udp->udp_ipv6_recvhopopts; 2797 break; /* goto sizeof (int) option return */ 2798 case IPV6_RECVDSTOPTS: 2799 *i1 = udp->udp_ipv6_recvdstopts; 2800 break; /* goto sizeof (int) option return */ 2801 case _OLD_IPV6_RECVDSTOPTS: 2802 *i1 = udp->udp_old_ipv6_recvdstopts; 2803 break; /* goto sizeof (int) option return */ 2804 case IPV6_RECVRTHDRDSTOPTS: 2805 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2806 break; /* goto sizeof (int) option return */ 2807 case IPV6_RECVRTHDR: 2808 *i1 = udp->udp_ipv6_recvrthdr; 2809 break; /* goto sizeof (int) option return */ 2810 case IPV6_PKTINFO: { 2811 /* XXX assumes that caller has room for max size! */ 2812 struct in6_pktinfo *pkti; 2813 2814 pkti = (struct in6_pktinfo *)ptr; 2815 if (ipp->ipp_fields & IPPF_IFINDEX) 2816 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2817 else 2818 pkti->ipi6_ifindex = 0; 2819 if (ipp->ipp_fields & IPPF_ADDR) 2820 pkti->ipi6_addr = ipp->ipp_addr; 2821 else 2822 pkti->ipi6_addr = ipv6_all_zeros; 2823 return (sizeof (struct in6_pktinfo)); 2824 } 2825 case IPV6_TCLASS: 2826 if (ipp->ipp_fields & IPPF_TCLASS) 2827 *i1 = ipp->ipp_tclass; 2828 else 2829 *i1 = IPV6_FLOW_TCLASS( 2830 IPV6_DEFAULT_VERS_AND_FLOW); 2831 break; /* goto sizeof (int) option return */ 2832 case IPV6_NEXTHOP: { 2833 sin6_t *sin6 = (sin6_t *)ptr; 2834 2835 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2836 return (0); 2837 *sin6 = sin6_null; 2838 sin6->sin6_family = AF_INET6; 2839 sin6->sin6_addr = ipp->ipp_nexthop; 2840 return (sizeof (sin6_t)); 2841 } 2842 case IPV6_HOPOPTS: 2843 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2844 return (0); 2845 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2846 return (0); 2847 /* 2848 * The cipso/label option is added by kernel. 2849 * User is not usually aware of this option. 2850 * We copy out the hbh opt after the label option. 2851 */ 2852 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2853 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2854 if (udp->udp_label_len_v6 > 0) { 2855 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2856 ptr[1] = (ipp->ipp_hopoptslen - 2857 udp->udp_label_len_v6 + 7) / 8 - 1; 2858 } 2859 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2860 case IPV6_RTHDRDSTOPTS: 2861 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2862 return (0); 2863 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2864 return (ipp->ipp_rtdstoptslen); 2865 case IPV6_RTHDR: 2866 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2867 return (0); 2868 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2869 return (ipp->ipp_rthdrlen); 2870 case IPV6_DSTOPTS: 2871 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2872 return (0); 2873 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2874 return (ipp->ipp_dstoptslen); 2875 case IPV6_PATHMTU: 2876 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2877 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2878 us->us_netstack)); 2879 default: 2880 return (-1); 2881 } 2882 break; 2883 case IPPROTO_UDP: 2884 switch (name) { 2885 case UDP_ANONPRIVBIND: 2886 *i1 = udp->udp_anon_priv_bind; 2887 break; 2888 case UDP_EXCLBIND: 2889 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2890 break; 2891 case UDP_RCVHDR: 2892 *i1 = udp->udp_rcvhdr ? 1 : 0; 2893 break; 2894 case UDP_NAT_T_ENDPOINT: 2895 *i1 = udp->udp_nat_t_endpoint; 2896 break; 2897 default: 2898 return (-1); 2899 } 2900 break; 2901 default: 2902 return (-1); 2903 } 2904 return (sizeof (int)); 2905 } 2906 2907 int 2908 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2909 { 2910 udp_t *udp; 2911 int err; 2912 2913 udp = Q_TO_UDP(q); 2914 2915 rw_enter(&udp->udp_rwlock, RW_READER); 2916 err = udp_opt_get_locked(q, level, name, ptr); 2917 rw_exit(&udp->udp_rwlock); 2918 return (err); 2919 } 2920 2921 /* 2922 * This routine sets socket options. 2923 */ 2924 /* ARGSUSED */ 2925 int 2926 udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, 2927 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 2928 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2929 { 2930 udpattrs_t *attrs = thisdg_attrs; 2931 int *i1 = (int *)invalp; 2932 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2933 boolean_t checkonly; 2934 int error; 2935 conn_t *connp; 2936 udp_t *udp; 2937 uint_t newlen; 2938 udp_stack_t *us; 2939 size_t sth_wroff; 2940 2941 connp = Q_TO_CONN(q); 2942 udp = connp->conn_udp; 2943 us = udp->udp_us; 2944 2945 switch (optset_context) { 2946 case SETFN_OPTCOM_CHECKONLY: 2947 checkonly = B_TRUE; 2948 /* 2949 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2950 * inlen != 0 implies value supplied and 2951 * we have to "pretend" to set it. 2952 * inlen == 0 implies that there is no 2953 * value part in T_CHECK request and just validation 2954 * done elsewhere should be enough, we just return here. 2955 */ 2956 if (inlen == 0) { 2957 *outlenp = 0; 2958 return (0); 2959 } 2960 break; 2961 case SETFN_OPTCOM_NEGOTIATE: 2962 checkonly = B_FALSE; 2963 break; 2964 case SETFN_UD_NEGOTIATE: 2965 case SETFN_CONN_NEGOTIATE: 2966 checkonly = B_FALSE; 2967 /* 2968 * Negotiating local and "association-related" options 2969 * through T_UNITDATA_REQ. 2970 * 2971 * Following routine can filter out ones we do not 2972 * want to be "set" this way. 2973 */ 2974 if (!udp_opt_allow_udr_set(level, name)) { 2975 *outlenp = 0; 2976 return (EINVAL); 2977 } 2978 break; 2979 default: 2980 /* 2981 * We should never get here 2982 */ 2983 *outlenp = 0; 2984 return (EINVAL); 2985 } 2986 2987 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2988 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2989 2990 /* 2991 * For fixed length options, no sanity check 2992 * of passed in length is done. It is assumed *_optcom_req() 2993 * routines do the right thing. 2994 */ 2995 2996 switch (level) { 2997 case SOL_SOCKET: 2998 switch (name) { 2999 case SO_REUSEADDR: 3000 if (!checkonly) 3001 udp->udp_reuseaddr = onoff; 3002 break; 3003 case SO_DEBUG: 3004 if (!checkonly) 3005 udp->udp_debug = onoff; 3006 break; 3007 /* 3008 * The following three items are available here, 3009 * but are only meaningful to IP. 3010 */ 3011 case SO_DONTROUTE: 3012 if (!checkonly) 3013 udp->udp_dontroute = onoff; 3014 break; 3015 case SO_USELOOPBACK: 3016 if (!checkonly) 3017 udp->udp_useloopback = onoff; 3018 break; 3019 case SO_BROADCAST: 3020 if (!checkonly) 3021 udp->udp_broadcast = onoff; 3022 break; 3023 3024 case SO_SNDBUF: 3025 if (*i1 > us->us_max_buf) { 3026 *outlenp = 0; 3027 return (ENOBUFS); 3028 } 3029 if (!checkonly) { 3030 q->q_hiwat = *i1; 3031 } 3032 break; 3033 case SO_RCVBUF: 3034 if (*i1 > us->us_max_buf) { 3035 *outlenp = 0; 3036 return (ENOBUFS); 3037 } 3038 if (!checkonly) { 3039 RD(q)->q_hiwat = *i1; 3040 rw_exit(&udp->udp_rwlock); 3041 (void) mi_set_sth_hiwat(RD(q), 3042 udp_set_rcv_hiwat(udp, *i1)); 3043 rw_enter(&udp->udp_rwlock, RW_WRITER); 3044 } 3045 break; 3046 case SO_DGRAM_ERRIND: 3047 if (!checkonly) 3048 udp->udp_dgram_errind = onoff; 3049 break; 3050 case SO_RECVUCRED: 3051 if (!checkonly) 3052 udp->udp_recvucred = onoff; 3053 break; 3054 case SO_ALLZONES: 3055 /* 3056 * "soft" error (negative) 3057 * option not handled at this level 3058 * Do not modify *outlenp. 3059 */ 3060 return (-EINVAL); 3061 case SO_TIMESTAMP: 3062 if (!checkonly) 3063 udp->udp_timestamp = onoff; 3064 break; 3065 case SO_ANON_MLP: 3066 /* Pass option along to IP level for handling */ 3067 return (-EINVAL); 3068 case SO_MAC_EXEMPT: 3069 /* Pass option along to IP level for handling */ 3070 return (-EINVAL); 3071 case SCM_UCRED: { 3072 struct ucred_s *ucr; 3073 cred_t *cr, *newcr; 3074 ts_label_t *tsl; 3075 3076 /* 3077 * Only sockets that have proper privileges and are 3078 * bound to MLPs will have any other value here, so 3079 * this implicitly tests for privilege to set label. 3080 */ 3081 if (connp->conn_mlp_type == mlptSingle) 3082 break; 3083 ucr = (struct ucred_s *)invalp; 3084 if (inlen != ucredsize || 3085 ucr->uc_labeloff < sizeof (*ucr) || 3086 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3087 return (EINVAL); 3088 if (!checkonly) { 3089 mblk_t *mb; 3090 3091 if (attrs == NULL || 3092 (mb = attrs->udpattr_mb) == NULL) 3093 return (EINVAL); 3094 if ((cr = DB_CRED(mb)) == NULL) 3095 cr = udp->udp_connp->conn_cred; 3096 ASSERT(cr != NULL); 3097 if ((tsl = crgetlabel(cr)) == NULL) 3098 return (EINVAL); 3099 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3100 tsl->tsl_doi, KM_NOSLEEP); 3101 if (newcr == NULL) 3102 return (ENOSR); 3103 mblk_setcred(mb, newcr); 3104 attrs->udpattr_credset = B_TRUE; 3105 crfree(newcr); 3106 } 3107 break; 3108 } 3109 case SO_EXCLBIND: 3110 if (!checkonly) 3111 udp->udp_exclbind = onoff; 3112 break; 3113 default: 3114 *outlenp = 0; 3115 return (EINVAL); 3116 } 3117 break; 3118 case IPPROTO_IP: 3119 if (udp->udp_family != AF_INET) { 3120 *outlenp = 0; 3121 return (ENOPROTOOPT); 3122 } 3123 switch (name) { 3124 case IP_OPTIONS: 3125 case T_IP_OPTIONS: 3126 /* Save options for use by IP. */ 3127 newlen = inlen + udp->udp_label_len; 3128 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3129 *outlenp = 0; 3130 return (EINVAL); 3131 } 3132 if (checkonly) 3133 break; 3134 3135 /* 3136 * Update the stored options taking into account 3137 * any CIPSO option which we should not overwrite. 3138 */ 3139 if (!tsol_option_set(&udp->udp_ip_snd_options, 3140 &udp->udp_ip_snd_options_len, 3141 udp->udp_label_len, invalp, inlen)) { 3142 *outlenp = 0; 3143 return (ENOMEM); 3144 } 3145 3146 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3147 UDPH_SIZE + udp->udp_ip_snd_options_len; 3148 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3149 rw_exit(&udp->udp_rwlock); 3150 (void) mi_set_sth_wroff(RD(q), sth_wroff); 3151 rw_enter(&udp->udp_rwlock, RW_WRITER); 3152 break; 3153 3154 case IP_TTL: 3155 if (!checkonly) { 3156 udp->udp_ttl = (uchar_t)*i1; 3157 } 3158 break; 3159 case IP_TOS: 3160 case T_IP_TOS: 3161 if (!checkonly) { 3162 udp->udp_type_of_service = (uchar_t)*i1; 3163 } 3164 break; 3165 case IP_MULTICAST_IF: { 3166 /* 3167 * TODO should check OPTMGMT reply and undo this if 3168 * there is an error. 3169 */ 3170 struct in_addr *inap = (struct in_addr *)invalp; 3171 if (!checkonly) { 3172 udp->udp_multicast_if_addr = 3173 inap->s_addr; 3174 } 3175 break; 3176 } 3177 case IP_MULTICAST_TTL: 3178 if (!checkonly) 3179 udp->udp_multicast_ttl = *invalp; 3180 break; 3181 case IP_MULTICAST_LOOP: 3182 if (!checkonly) 3183 connp->conn_multicast_loop = *invalp; 3184 break; 3185 case IP_RECVOPTS: 3186 if (!checkonly) 3187 udp->udp_recvopts = onoff; 3188 break; 3189 case IP_RECVDSTADDR: 3190 if (!checkonly) 3191 udp->udp_recvdstaddr = onoff; 3192 break; 3193 case IP_RECVIF: 3194 if (!checkonly) 3195 udp->udp_recvif = onoff; 3196 break; 3197 case IP_RECVSLLA: 3198 if (!checkonly) 3199 udp->udp_recvslla = onoff; 3200 break; 3201 case IP_RECVTTL: 3202 if (!checkonly) 3203 udp->udp_recvttl = onoff; 3204 break; 3205 case IP_PKTINFO: { 3206 /* 3207 * This also handles IP_RECVPKTINFO. 3208 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3209 * Differentiation is based on the size of the 3210 * argument passed in. 3211 */ 3212 struct in_pktinfo *pktinfop; 3213 ip4_pkt_t *attr_pktinfop; 3214 3215 if (checkonly) 3216 break; 3217 3218 if (inlen == sizeof (int)) { 3219 /* 3220 * This is IP_RECVPKTINFO option. 3221 * Keep a local copy of whether this option is 3222 * set or not and pass it down to IP for 3223 * processing. 3224 */ 3225 3226 udp->udp_ip_recvpktinfo = onoff; 3227 return (-EINVAL); 3228 } 3229 3230 if (attrs == NULL || 3231 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3232 /* 3233 * sticky option or no buffer to return 3234 * the results. 3235 */ 3236 return (EINVAL); 3237 } 3238 3239 if (inlen != sizeof (struct in_pktinfo)) 3240 return (EINVAL); 3241 3242 pktinfop = (struct in_pktinfo *)invalp; 3243 3244 /* 3245 * At least one of the values should be specified 3246 */ 3247 if (pktinfop->ipi_ifindex == 0 && 3248 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3249 return (EINVAL); 3250 } 3251 3252 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3253 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3254 3255 break; 3256 } 3257 case IP_ADD_MEMBERSHIP: 3258 case IP_DROP_MEMBERSHIP: 3259 case IP_BLOCK_SOURCE: 3260 case IP_UNBLOCK_SOURCE: 3261 case IP_ADD_SOURCE_MEMBERSHIP: 3262 case IP_DROP_SOURCE_MEMBERSHIP: 3263 case MCAST_JOIN_GROUP: 3264 case MCAST_LEAVE_GROUP: 3265 case MCAST_BLOCK_SOURCE: 3266 case MCAST_UNBLOCK_SOURCE: 3267 case MCAST_JOIN_SOURCE_GROUP: 3268 case MCAST_LEAVE_SOURCE_GROUP: 3269 case IP_SEC_OPT: 3270 case IP_NEXTHOP: 3271 case IP_DHCPINIT_IF: 3272 /* 3273 * "soft" error (negative) 3274 * option not handled at this level 3275 * Do not modify *outlenp. 3276 */ 3277 return (-EINVAL); 3278 case IP_BOUND_IF: 3279 if (!checkonly) 3280 udp->udp_bound_if = *i1; 3281 break; 3282 case IP_UNSPEC_SRC: 3283 if (!checkonly) 3284 udp->udp_unspec_source = onoff; 3285 break; 3286 case IP_BROADCAST_TTL: 3287 if (!checkonly) 3288 connp->conn_broadcast_ttl = *invalp; 3289 break; 3290 default: 3291 *outlenp = 0; 3292 return (EINVAL); 3293 } 3294 break; 3295 case IPPROTO_IPV6: { 3296 ip6_pkt_t *ipp; 3297 boolean_t sticky; 3298 3299 if (udp->udp_family != AF_INET6) { 3300 *outlenp = 0; 3301 return (ENOPROTOOPT); 3302 } 3303 /* 3304 * Deal with both sticky options and ancillary data 3305 */ 3306 sticky = B_FALSE; 3307 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3308 NULL) { 3309 /* sticky options, or none */ 3310 ipp = &udp->udp_sticky_ipp; 3311 sticky = B_TRUE; 3312 } 3313 3314 switch (name) { 3315 case IPV6_MULTICAST_IF: 3316 if (!checkonly) 3317 udp->udp_multicast_if_index = *i1; 3318 break; 3319 case IPV6_UNICAST_HOPS: 3320 /* -1 means use default */ 3321 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3322 *outlenp = 0; 3323 return (EINVAL); 3324 } 3325 if (!checkonly) { 3326 if (*i1 == -1) { 3327 udp->udp_ttl = ipp->ipp_unicast_hops = 3328 us->us_ipv6_hoplimit; 3329 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3330 /* Pass modified value to IP. */ 3331 *i1 = udp->udp_ttl; 3332 } else { 3333 udp->udp_ttl = ipp->ipp_unicast_hops = 3334 (uint8_t)*i1; 3335 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3336 } 3337 /* Rebuild the header template */ 3338 error = udp_build_hdrs(udp); 3339 if (error != 0) { 3340 *outlenp = 0; 3341 return (error); 3342 } 3343 } 3344 break; 3345 case IPV6_MULTICAST_HOPS: 3346 /* -1 means use default */ 3347 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3348 *outlenp = 0; 3349 return (EINVAL); 3350 } 3351 if (!checkonly) { 3352 if (*i1 == -1) { 3353 udp->udp_multicast_ttl = 3354 ipp->ipp_multicast_hops = 3355 IP_DEFAULT_MULTICAST_TTL; 3356 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3357 /* Pass modified value to IP. */ 3358 *i1 = udp->udp_multicast_ttl; 3359 } else { 3360 udp->udp_multicast_ttl = 3361 ipp->ipp_multicast_hops = 3362 (uint8_t)*i1; 3363 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3364 } 3365 } 3366 break; 3367 case IPV6_MULTICAST_LOOP: 3368 if (*i1 != 0 && *i1 != 1) { 3369 *outlenp = 0; 3370 return (EINVAL); 3371 } 3372 if (!checkonly) 3373 connp->conn_multicast_loop = *i1; 3374 break; 3375 case IPV6_JOIN_GROUP: 3376 case IPV6_LEAVE_GROUP: 3377 case MCAST_JOIN_GROUP: 3378 case MCAST_LEAVE_GROUP: 3379 case MCAST_BLOCK_SOURCE: 3380 case MCAST_UNBLOCK_SOURCE: 3381 case MCAST_JOIN_SOURCE_GROUP: 3382 case MCAST_LEAVE_SOURCE_GROUP: 3383 /* 3384 * "soft" error (negative) 3385 * option not handled at this level 3386 * Note: Do not modify *outlenp 3387 */ 3388 return (-EINVAL); 3389 case IPV6_BOUND_IF: 3390 if (!checkonly) 3391 udp->udp_bound_if = *i1; 3392 break; 3393 case IPV6_UNSPEC_SRC: 3394 if (!checkonly) 3395 udp->udp_unspec_source = onoff; 3396 break; 3397 /* 3398 * Set boolean switches for ancillary data delivery 3399 */ 3400 case IPV6_RECVPKTINFO: 3401 if (!checkonly) 3402 udp->udp_ip_recvpktinfo = onoff; 3403 break; 3404 case IPV6_RECVTCLASS: 3405 if (!checkonly) { 3406 udp->udp_ipv6_recvtclass = onoff; 3407 } 3408 break; 3409 case IPV6_RECVPATHMTU: 3410 if (!checkonly) { 3411 udp->udp_ipv6_recvpathmtu = onoff; 3412 } 3413 break; 3414 case IPV6_RECVHOPLIMIT: 3415 if (!checkonly) 3416 udp->udp_ipv6_recvhoplimit = onoff; 3417 break; 3418 case IPV6_RECVHOPOPTS: 3419 if (!checkonly) 3420 udp->udp_ipv6_recvhopopts = onoff; 3421 break; 3422 case IPV6_RECVDSTOPTS: 3423 if (!checkonly) 3424 udp->udp_ipv6_recvdstopts = onoff; 3425 break; 3426 case _OLD_IPV6_RECVDSTOPTS: 3427 if (!checkonly) 3428 udp->udp_old_ipv6_recvdstopts = onoff; 3429 break; 3430 case IPV6_RECVRTHDRDSTOPTS: 3431 if (!checkonly) 3432 udp->udp_ipv6_recvrthdrdstopts = onoff; 3433 break; 3434 case IPV6_RECVRTHDR: 3435 if (!checkonly) 3436 udp->udp_ipv6_recvrthdr = onoff; 3437 break; 3438 /* 3439 * Set sticky options or ancillary data. 3440 * If sticky options, (re)build any extension headers 3441 * that might be needed as a result. 3442 */ 3443 case IPV6_PKTINFO: 3444 /* 3445 * The source address and ifindex are verified 3446 * in ip_opt_set(). For ancillary data the 3447 * source address is checked in ip_wput_v6. 3448 */ 3449 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3450 return (EINVAL); 3451 if (checkonly) 3452 break; 3453 3454 if (inlen == 0) { 3455 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3456 ipp->ipp_sticky_ignored |= 3457 (IPPF_IFINDEX|IPPF_ADDR); 3458 } else { 3459 struct in6_pktinfo *pkti; 3460 3461 pkti = (struct in6_pktinfo *)invalp; 3462 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3463 ipp->ipp_addr = pkti->ipi6_addr; 3464 if (ipp->ipp_ifindex != 0) 3465 ipp->ipp_fields |= IPPF_IFINDEX; 3466 else 3467 ipp->ipp_fields &= ~IPPF_IFINDEX; 3468 if (!IN6_IS_ADDR_UNSPECIFIED( 3469 &ipp->ipp_addr)) 3470 ipp->ipp_fields |= IPPF_ADDR; 3471 else 3472 ipp->ipp_fields &= ~IPPF_ADDR; 3473 } 3474 if (sticky) { 3475 error = udp_build_hdrs(udp); 3476 if (error != 0) 3477 return (error); 3478 } 3479 break; 3480 case IPV6_HOPLIMIT: 3481 if (sticky) 3482 return (EINVAL); 3483 if (inlen != 0 && inlen != sizeof (int)) 3484 return (EINVAL); 3485 if (checkonly) 3486 break; 3487 3488 if (inlen == 0) { 3489 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3490 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3491 } else { 3492 if (*i1 > 255 || *i1 < -1) 3493 return (EINVAL); 3494 if (*i1 == -1) 3495 ipp->ipp_hoplimit = 3496 us->us_ipv6_hoplimit; 3497 else 3498 ipp->ipp_hoplimit = *i1; 3499 ipp->ipp_fields |= IPPF_HOPLIMIT; 3500 } 3501 break; 3502 case IPV6_TCLASS: 3503 if (inlen != 0 && inlen != sizeof (int)) 3504 return (EINVAL); 3505 if (checkonly) 3506 break; 3507 3508 if (inlen == 0) { 3509 ipp->ipp_fields &= ~IPPF_TCLASS; 3510 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3511 } else { 3512 if (*i1 > 255 || *i1 < -1) 3513 return (EINVAL); 3514 if (*i1 == -1) 3515 ipp->ipp_tclass = 0; 3516 else 3517 ipp->ipp_tclass = *i1; 3518 ipp->ipp_fields |= IPPF_TCLASS; 3519 } 3520 if (sticky) { 3521 error = udp_build_hdrs(udp); 3522 if (error != 0) 3523 return (error); 3524 } 3525 break; 3526 case IPV6_NEXTHOP: 3527 /* 3528 * IP will verify that the nexthop is reachable 3529 * and fail for sticky options. 3530 */ 3531 if (inlen != 0 && inlen != sizeof (sin6_t)) 3532 return (EINVAL); 3533 if (checkonly) 3534 break; 3535 3536 if (inlen == 0) { 3537 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3538 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3539 } else { 3540 sin6_t *sin6 = (sin6_t *)invalp; 3541 3542 if (sin6->sin6_family != AF_INET6) 3543 return (EAFNOSUPPORT); 3544 if (IN6_IS_ADDR_V4MAPPED( 3545 &sin6->sin6_addr)) 3546 return (EADDRNOTAVAIL); 3547 ipp->ipp_nexthop = sin6->sin6_addr; 3548 if (!IN6_IS_ADDR_UNSPECIFIED( 3549 &ipp->ipp_nexthop)) 3550 ipp->ipp_fields |= IPPF_NEXTHOP; 3551 else 3552 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3553 } 3554 if (sticky) { 3555 error = udp_build_hdrs(udp); 3556 if (error != 0) 3557 return (error); 3558 } 3559 break; 3560 case IPV6_HOPOPTS: { 3561 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3562 /* 3563 * Sanity checks - minimum size, size a multiple of 3564 * eight bytes, and matching size passed in. 3565 */ 3566 if (inlen != 0 && 3567 inlen != (8 * (hopts->ip6h_len + 1))) 3568 return (EINVAL); 3569 3570 if (checkonly) 3571 break; 3572 3573 error = optcom_pkt_set(invalp, inlen, sticky, 3574 (uchar_t **)&ipp->ipp_hopopts, 3575 &ipp->ipp_hopoptslen, 3576 sticky ? udp->udp_label_len_v6 : 0); 3577 if (error != 0) 3578 return (error); 3579 if (ipp->ipp_hopoptslen == 0) { 3580 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3581 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3582 } else { 3583 ipp->ipp_fields |= IPPF_HOPOPTS; 3584 } 3585 if (sticky) { 3586 error = udp_build_hdrs(udp); 3587 if (error != 0) 3588 return (error); 3589 } 3590 break; 3591 } 3592 case IPV6_RTHDRDSTOPTS: { 3593 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3594 3595 /* 3596 * Sanity checks - minimum size, size a multiple of 3597 * eight bytes, and matching size passed in. 3598 */ 3599 if (inlen != 0 && 3600 inlen != (8 * (dopts->ip6d_len + 1))) 3601 return (EINVAL); 3602 3603 if (checkonly) 3604 break; 3605 3606 if (inlen == 0) { 3607 if (sticky && 3608 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3609 kmem_free(ipp->ipp_rtdstopts, 3610 ipp->ipp_rtdstoptslen); 3611 ipp->ipp_rtdstopts = NULL; 3612 ipp->ipp_rtdstoptslen = 0; 3613 } 3614 3615 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3616 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3617 } else { 3618 error = optcom_pkt_set(invalp, inlen, sticky, 3619 (uchar_t **)&ipp->ipp_rtdstopts, 3620 &ipp->ipp_rtdstoptslen, 0); 3621 if (error != 0) 3622 return (error); 3623 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3624 } 3625 if (sticky) { 3626 error = udp_build_hdrs(udp); 3627 if (error != 0) 3628 return (error); 3629 } 3630 break; 3631 } 3632 case IPV6_DSTOPTS: { 3633 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3634 3635 /* 3636 * Sanity checks - minimum size, size a multiple of 3637 * eight bytes, and matching size passed in. 3638 */ 3639 if (inlen != 0 && 3640 inlen != (8 * (dopts->ip6d_len + 1))) 3641 return (EINVAL); 3642 3643 if (checkonly) 3644 break; 3645 3646 if (inlen == 0) { 3647 if (sticky && 3648 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3649 kmem_free(ipp->ipp_dstopts, 3650 ipp->ipp_dstoptslen); 3651 ipp->ipp_dstopts = NULL; 3652 ipp->ipp_dstoptslen = 0; 3653 } 3654 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3655 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3656 } else { 3657 error = optcom_pkt_set(invalp, inlen, sticky, 3658 (uchar_t **)&ipp->ipp_dstopts, 3659 &ipp->ipp_dstoptslen, 0); 3660 if (error != 0) 3661 return (error); 3662 ipp->ipp_fields |= IPPF_DSTOPTS; 3663 } 3664 if (sticky) { 3665 error = udp_build_hdrs(udp); 3666 if (error != 0) 3667 return (error); 3668 } 3669 break; 3670 } 3671 case IPV6_RTHDR: { 3672 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3673 3674 /* 3675 * Sanity checks - minimum size, size a multiple of 3676 * eight bytes, and matching size passed in. 3677 */ 3678 if (inlen != 0 && 3679 inlen != (8 * (rt->ip6r_len + 1))) 3680 return (EINVAL); 3681 3682 if (checkonly) 3683 break; 3684 3685 if (inlen == 0) { 3686 if (sticky && 3687 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3688 kmem_free(ipp->ipp_rthdr, 3689 ipp->ipp_rthdrlen); 3690 ipp->ipp_rthdr = NULL; 3691 ipp->ipp_rthdrlen = 0; 3692 } 3693 ipp->ipp_fields &= ~IPPF_RTHDR; 3694 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3695 } else { 3696 error = optcom_pkt_set(invalp, inlen, sticky, 3697 (uchar_t **)&ipp->ipp_rthdr, 3698 &ipp->ipp_rthdrlen, 0); 3699 if (error != 0) 3700 return (error); 3701 ipp->ipp_fields |= IPPF_RTHDR; 3702 } 3703 if (sticky) { 3704 error = udp_build_hdrs(udp); 3705 if (error != 0) 3706 return (error); 3707 } 3708 break; 3709 } 3710 3711 case IPV6_DONTFRAG: 3712 if (checkonly) 3713 break; 3714 3715 if (onoff) { 3716 ipp->ipp_fields |= IPPF_DONTFRAG; 3717 } else { 3718 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3719 } 3720 break; 3721 3722 case IPV6_USE_MIN_MTU: 3723 if (inlen != sizeof (int)) 3724 return (EINVAL); 3725 3726 if (*i1 < -1 || *i1 > 1) 3727 return (EINVAL); 3728 3729 if (checkonly) 3730 break; 3731 3732 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3733 ipp->ipp_use_min_mtu = *i1; 3734 break; 3735 3736 case IPV6_BOUND_PIF: 3737 case IPV6_SEC_OPT: 3738 case IPV6_DONTFAILOVER_IF: 3739 case IPV6_SRC_PREFERENCES: 3740 case IPV6_V6ONLY: 3741 /* Handled at the IP level */ 3742 return (-EINVAL); 3743 default: 3744 *outlenp = 0; 3745 return (EINVAL); 3746 } 3747 break; 3748 } /* end IPPROTO_IPV6 */ 3749 case IPPROTO_UDP: 3750 switch (name) { 3751 case UDP_ANONPRIVBIND: 3752 if ((error = secpolicy_net_privaddr(cr, 0, 3753 IPPROTO_UDP)) != 0) { 3754 *outlenp = 0; 3755 return (error); 3756 } 3757 if (!checkonly) { 3758 udp->udp_anon_priv_bind = onoff; 3759 } 3760 break; 3761 case UDP_EXCLBIND: 3762 if (!checkonly) 3763 udp->udp_exclbind = onoff; 3764 break; 3765 case UDP_RCVHDR: 3766 if (!checkonly) 3767 udp->udp_rcvhdr = onoff; 3768 break; 3769 case UDP_NAT_T_ENDPOINT: 3770 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3771 *outlenp = 0; 3772 return (error); 3773 } 3774 3775 /* 3776 * Use udp_family instead so we can avoid ambiguitites 3777 * with AF_INET6 sockets that may switch from IPv4 3778 * to IPv6. 3779 */ 3780 if (udp->udp_family != AF_INET) { 3781 *outlenp = 0; 3782 return (EAFNOSUPPORT); 3783 } 3784 3785 if (!checkonly) { 3786 udp->udp_nat_t_endpoint = onoff; 3787 3788 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3789 UDPH_SIZE + udp->udp_ip_snd_options_len; 3790 3791 /* Also, adjust wroff */ 3792 if (onoff) { 3793 udp->udp_max_hdr_len += 3794 sizeof (uint32_t); 3795 } 3796 (void) mi_set_sth_wroff(RD(q), 3797 udp->udp_max_hdr_len + us->us_wroff_extra); 3798 } 3799 break; 3800 default: 3801 *outlenp = 0; 3802 return (EINVAL); 3803 } 3804 break; 3805 default: 3806 *outlenp = 0; 3807 return (EINVAL); 3808 } 3809 /* 3810 * Common case of OK return with outval same as inval. 3811 */ 3812 if (invalp != outvalp) { 3813 /* don't trust bcopy for identical src/dst */ 3814 (void) bcopy(invalp, outvalp, inlen); 3815 } 3816 *outlenp = inlen; 3817 return (0); 3818 } 3819 3820 int 3821 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3822 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3823 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3824 { 3825 udp_t *udp; 3826 int err; 3827 3828 udp = Q_TO_UDP(q); 3829 3830 rw_enter(&udp->udp_rwlock, RW_WRITER); 3831 err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 3832 outlenp, outvalp, thisdg_attrs, cr, mblk); 3833 rw_exit(&udp->udp_rwlock); 3834 return (err); 3835 } 3836 3837 /* 3838 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3839 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3840 * headers, and the udp header. 3841 * Returns failure if can't allocate memory. 3842 */ 3843 static int 3844 udp_build_hdrs(udp_t *udp) 3845 { 3846 udp_stack_t *us = udp->udp_us; 3847 uchar_t *hdrs; 3848 uint_t hdrs_len; 3849 ip6_t *ip6h; 3850 ip6i_t *ip6i; 3851 udpha_t *udpha; 3852 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3853 size_t sth_wroff; 3854 3855 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3856 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3857 ASSERT(hdrs_len != 0); 3858 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3859 /* Need to reallocate */ 3860 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3861 if (hdrs == NULL) 3862 return (ENOMEM); 3863 3864 if (udp->udp_sticky_hdrs_len != 0) { 3865 kmem_free(udp->udp_sticky_hdrs, 3866 udp->udp_sticky_hdrs_len); 3867 } 3868 udp->udp_sticky_hdrs = hdrs; 3869 udp->udp_sticky_hdrs_len = hdrs_len; 3870 } 3871 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3872 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3873 3874 /* Set header fields not in ipp */ 3875 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3876 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3877 ip6h = (ip6_t *)&ip6i[1]; 3878 } else { 3879 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3880 } 3881 3882 if (!(ipp->ipp_fields & IPPF_ADDR)) 3883 ip6h->ip6_src = udp->udp_v6src; 3884 3885 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3886 udpha->uha_src_port = udp->udp_port; 3887 3888 /* Try to get everything in a single mblk */ 3889 if (hdrs_len > udp->udp_max_hdr_len) { 3890 udp->udp_max_hdr_len = hdrs_len; 3891 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3892 rw_exit(&udp->udp_rwlock); 3893 (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); 3894 rw_enter(&udp->udp_rwlock, RW_WRITER); 3895 } 3896 return (0); 3897 } 3898 3899 /* 3900 * This routine retrieves the value of an ND variable in a udpparam_t 3901 * structure. It is called through nd_getset when a user reads the 3902 * variable. 3903 */ 3904 /* ARGSUSED */ 3905 static int 3906 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3907 { 3908 udpparam_t *udppa = (udpparam_t *)cp; 3909 3910 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3911 return (0); 3912 } 3913 3914 /* 3915 * Walk through the param array specified registering each element with the 3916 * named dispatch (ND) handler. 3917 */ 3918 static boolean_t 3919 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3920 { 3921 for (; cnt-- > 0; udppa++) { 3922 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3923 if (!nd_load(ndp, udppa->udp_param_name, 3924 udp_param_get, udp_param_set, 3925 (caddr_t)udppa)) { 3926 nd_free(ndp); 3927 return (B_FALSE); 3928 } 3929 } 3930 } 3931 if (!nd_load(ndp, "udp_extra_priv_ports", 3932 udp_extra_priv_ports_get, NULL, NULL)) { 3933 nd_free(ndp); 3934 return (B_FALSE); 3935 } 3936 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3937 NULL, udp_extra_priv_ports_add, NULL)) { 3938 nd_free(ndp); 3939 return (B_FALSE); 3940 } 3941 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3942 NULL, udp_extra_priv_ports_del, NULL)) { 3943 nd_free(ndp); 3944 return (B_FALSE); 3945 } 3946 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3947 NULL)) { 3948 nd_free(ndp); 3949 return (B_FALSE); 3950 } 3951 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3952 NULL)) { 3953 nd_free(ndp); 3954 return (B_FALSE); 3955 } 3956 return (B_TRUE); 3957 } 3958 3959 /* This routine sets an ND variable in a udpparam_t structure. */ 3960 /* ARGSUSED */ 3961 static int 3962 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3963 { 3964 long new_value; 3965 udpparam_t *udppa = (udpparam_t *)cp; 3966 3967 /* 3968 * Fail the request if the new value does not lie within the 3969 * required bounds. 3970 */ 3971 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3972 new_value < udppa->udp_param_min || 3973 new_value > udppa->udp_param_max) { 3974 return (EINVAL); 3975 } 3976 3977 /* Set the new value */ 3978 udppa->udp_param_value = new_value; 3979 return (0); 3980 } 3981 3982 /* 3983 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3984 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3985 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3986 * then it's assumed to be allocated to be large enough. 3987 * 3988 * Returns zero if trimming of the security option causes all options to go 3989 * away. 3990 */ 3991 static size_t 3992 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3993 { 3994 struct T_opthdr *toh; 3995 size_t hol = ipp->ipp_hopoptslen; 3996 ip6_hbh_t *dstopt = NULL; 3997 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 3998 size_t tlen, olen, plen; 3999 boolean_t deleting; 4000 const struct ip6_opt *sopt, *lastpad; 4001 struct ip6_opt *dopt; 4002 4003 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4004 toh->level = IPPROTO_IPV6; 4005 toh->name = IPV6_HOPOPTS; 4006 toh->status = 0; 4007 dstopt = (ip6_hbh_t *)(toh + 1); 4008 } 4009 4010 /* 4011 * If labeling is enabled, then skip the label option 4012 * but get other options if there are any. 4013 */ 4014 if (is_system_labeled()) { 4015 dopt = NULL; 4016 if (dstopt != NULL) { 4017 /* will fill in ip6h_len later */ 4018 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4019 dopt = (struct ip6_opt *)(dstopt + 1); 4020 } 4021 sopt = (const struct ip6_opt *)(srcopt + 1); 4022 hol -= sizeof (*srcopt); 4023 tlen = sizeof (*dstopt); 4024 lastpad = NULL; 4025 deleting = B_FALSE; 4026 /* 4027 * This loop finds the first (lastpad pointer) of any number of 4028 * pads that preceeds the security option, then treats the 4029 * security option as though it were a pad, and then finds the 4030 * next non-pad option (or end of list). 4031 * 4032 * It then treats the entire block as one big pad. To preserve 4033 * alignment of any options that follow, or just the end of the 4034 * list, it computes a minimal new padding size that keeps the 4035 * same alignment for the next option. 4036 * 4037 * If it encounters just a sequence of pads with no security 4038 * option, those are copied as-is rather than collapsed. 4039 * 4040 * Note that to handle the end of list case, the code makes one 4041 * loop with 'hol' set to zero. 4042 */ 4043 for (;;) { 4044 if (hol > 0) { 4045 if (sopt->ip6o_type == IP6OPT_PAD1) { 4046 if (lastpad == NULL) 4047 lastpad = sopt; 4048 sopt = (const struct ip6_opt *) 4049 &sopt->ip6o_len; 4050 hol--; 4051 continue; 4052 } 4053 olen = sopt->ip6o_len + sizeof (*sopt); 4054 if (olen > hol) 4055 olen = hol; 4056 if (sopt->ip6o_type == IP6OPT_PADN || 4057 sopt->ip6o_type == ip6opt_ls) { 4058 if (sopt->ip6o_type == ip6opt_ls) 4059 deleting = B_TRUE; 4060 if (lastpad == NULL) 4061 lastpad = sopt; 4062 sopt = (const struct ip6_opt *) 4063 ((const char *)sopt + olen); 4064 hol -= olen; 4065 continue; 4066 } 4067 } else { 4068 /* if nothing was copied at all, then delete */ 4069 if (tlen == sizeof (*dstopt)) 4070 return (0); 4071 /* last pass; pick up any trailing padding */ 4072 olen = 0; 4073 } 4074 if (deleting) { 4075 /* 4076 * compute aligning effect of deleted material 4077 * to reproduce with pad. 4078 */ 4079 plen = ((const char *)sopt - 4080 (const char *)lastpad) & 7; 4081 tlen += plen; 4082 if (dopt != NULL) { 4083 if (plen == 1) { 4084 dopt->ip6o_type = IP6OPT_PAD1; 4085 } else if (plen > 1) { 4086 plen -= sizeof (*dopt); 4087 dopt->ip6o_type = IP6OPT_PADN; 4088 dopt->ip6o_len = plen; 4089 if (plen > 0) 4090 bzero(dopt + 1, plen); 4091 } 4092 dopt = (struct ip6_opt *) 4093 ((char *)dopt + plen); 4094 } 4095 deleting = B_FALSE; 4096 lastpad = NULL; 4097 } 4098 /* if there's uncopied padding, then copy that now */ 4099 if (lastpad != NULL) { 4100 olen += (const char *)sopt - 4101 (const char *)lastpad; 4102 sopt = lastpad; 4103 lastpad = NULL; 4104 } 4105 if (dopt != NULL && olen > 0) { 4106 bcopy(sopt, dopt, olen); 4107 dopt = (struct ip6_opt *)((char *)dopt + olen); 4108 } 4109 if (hol == 0) 4110 break; 4111 tlen += olen; 4112 sopt = (const struct ip6_opt *) 4113 ((const char *)sopt + olen); 4114 hol -= olen; 4115 } 4116 /* go back and patch up the length value, rounded upward */ 4117 if (dstopt != NULL) 4118 dstopt->ip6h_len = (tlen - 1) >> 3; 4119 } else { 4120 tlen = hol; 4121 if (dstopt != NULL) 4122 bcopy(srcopt, dstopt, hol); 4123 } 4124 4125 tlen += sizeof (*toh); 4126 if (toh != NULL) 4127 toh->len = tlen; 4128 4129 return (tlen); 4130 } 4131 4132 /* 4133 * Update udp_rcv_opt_len from the packet. 4134 * Called when options received, and when no options received but 4135 * udp_ip_recv_opt_len has previously recorded options. 4136 */ 4137 static void 4138 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 4139 { 4140 /* Save the options if any */ 4141 if (opt_len > 0) { 4142 if (opt_len > udp->udp_ip_rcv_options_len) { 4143 /* Need to allocate larger buffer */ 4144 if (udp->udp_ip_rcv_options_len != 0) 4145 mi_free((char *)udp->udp_ip_rcv_options); 4146 udp->udp_ip_rcv_options_len = 0; 4147 udp->udp_ip_rcv_options = 4148 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4149 if (udp->udp_ip_rcv_options != NULL) 4150 udp->udp_ip_rcv_options_len = opt_len; 4151 } 4152 if (udp->udp_ip_rcv_options_len != 0) { 4153 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 4154 /* Adjust length if we are resusing the space */ 4155 udp->udp_ip_rcv_options_len = opt_len; 4156 } 4157 } else if (udp->udp_ip_rcv_options_len != 0) { 4158 /* Clear out previously recorded options */ 4159 mi_free((char *)udp->udp_ip_rcv_options); 4160 udp->udp_ip_rcv_options = NULL; 4161 udp->udp_ip_rcv_options_len = 0; 4162 } 4163 } 4164 4165 /* ARGSUSED2 */ 4166 static void 4167 udp_input(void *arg1, mblk_t *mp, void *arg2) 4168 { 4169 conn_t *connp = (conn_t *)arg1; 4170 struct T_unitdata_ind *tudi; 4171 uchar_t *rptr; /* Pointer to IP header */ 4172 int hdr_length; /* Length of IP+UDP headers */ 4173 int opt_len; 4174 int udi_size; /* Size of T_unitdata_ind */ 4175 int mp_len; 4176 udp_t *udp; 4177 udpha_t *udpha; 4178 int ipversion; 4179 ip6_pkt_t ipp; 4180 ip6_t *ip6h; 4181 ip6i_t *ip6i; 4182 mblk_t *mp1; 4183 mblk_t *options_mp = NULL; 4184 ip_pktinfo_t *pinfo = NULL; 4185 cred_t *cr = NULL; 4186 pid_t cpid; 4187 uint32_t udp_ip_rcv_options_len; 4188 udp_bits_t udp_bits; 4189 cred_t *rcr = connp->conn_cred; 4190 udp_stack_t *us; 4191 4192 ASSERT(connp->conn_flags & IPCL_UDPCONN); 4193 4194 udp = connp->conn_udp; 4195 us = udp->udp_us; 4196 rptr = mp->b_rptr; 4197 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4198 ASSERT(OK_32PTR(rptr)); 4199 4200 /* 4201 * IP should have prepended the options data in an M_CTL 4202 * Check M_CTL "type" to make sure are not here bcos of 4203 * a valid ICMP message 4204 */ 4205 if (DB_TYPE(mp) == M_CTL) { 4206 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4207 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4208 IN_PKTINFO) { 4209 /* 4210 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4211 * has been prepended to the packet by IP. We need to 4212 * extract the mblk and adjust the rptr 4213 */ 4214 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4215 options_mp = mp; 4216 mp = mp->b_cont; 4217 rptr = mp->b_rptr; 4218 UDP_STAT(us, udp_in_pktinfo); 4219 } else { 4220 /* 4221 * ICMP messages. 4222 */ 4223 udp_icmp_error(connp->conn_rq, mp); 4224 return; 4225 } 4226 } 4227 4228 mp_len = msgdsize(mp); 4229 /* 4230 * This is the inbound data path. 4231 * First, we check to make sure the IP version number is correct, 4232 * and then pull the IP and UDP headers into the first mblk. 4233 */ 4234 4235 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4236 ipp.ipp_fields = 0; 4237 4238 ipversion = IPH_HDR_VERSION(rptr); 4239 4240 rw_enter(&udp->udp_rwlock, RW_READER); 4241 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 4242 udp_bits = udp->udp_bits; 4243 rw_exit(&udp->udp_rwlock); 4244 4245 switch (ipversion) { 4246 case IPV4_VERSION: 4247 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4248 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4249 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4250 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4251 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 4252 udp->udp_family == AF_INET) { 4253 /* 4254 * Record/update udp_ip_rcv_options with the lock 4255 * held. Not needed for AF_INET6 sockets 4256 * since they don't support a getsockopt of IP_OPTIONS. 4257 */ 4258 rw_enter(&udp->udp_rwlock, RW_WRITER); 4259 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 4260 opt_len); 4261 rw_exit(&udp->udp_rwlock); 4262 } 4263 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 4264 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4265 udp->udp_ip_recvpktinfo) { 4266 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4267 ipp.ipp_fields |= IPPF_IFINDEX; 4268 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4269 } 4270 } 4271 break; 4272 case IPV6_VERSION: 4273 /* 4274 * IPv6 packets can only be received by applications 4275 * that are prepared to receive IPv6 addresses. 4276 * The IP fanout must ensure this. 4277 */ 4278 ASSERT(udp->udp_family == AF_INET6); 4279 4280 ip6h = (ip6_t *)rptr; 4281 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4282 4283 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4284 uint8_t nexthdrp; 4285 /* Look for ifindex information */ 4286 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4287 ip6i = (ip6i_t *)ip6h; 4288 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4289 goto tossit; 4290 4291 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4292 ASSERT(ip6i->ip6i_ifindex != 0); 4293 ipp.ipp_fields |= IPPF_IFINDEX; 4294 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4295 } 4296 rptr = (uchar_t *)&ip6i[1]; 4297 mp->b_rptr = rptr; 4298 if (rptr == mp->b_wptr) { 4299 mp1 = mp->b_cont; 4300 freeb(mp); 4301 mp = mp1; 4302 rptr = mp->b_rptr; 4303 } 4304 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4305 goto tossit; 4306 ip6h = (ip6_t *)rptr; 4307 mp_len = msgdsize(mp); 4308 } 4309 /* 4310 * Find any potentially interesting extension headers 4311 * as well as the length of the IPv6 + extension 4312 * headers. 4313 */ 4314 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4315 UDPH_SIZE; 4316 ASSERT(nexthdrp == IPPROTO_UDP); 4317 } else { 4318 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4319 ip6i = NULL; 4320 } 4321 break; 4322 default: 4323 ASSERT(0); 4324 } 4325 4326 /* 4327 * IP inspected the UDP header thus all of it must be in the mblk. 4328 * UDP length check is performed for IPv6 packets and IPv4 packets 4329 * to check if the size of the packet as specified 4330 * by the header is the same as the physical size of the packet. 4331 * FIXME? Didn't IP already check this? 4332 */ 4333 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4334 if ((MBLKL(mp) < hdr_length) || 4335 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4336 goto tossit; 4337 } 4338 4339 4340 /* Walk past the headers unless IP_RECVHDR was set. */ 4341 if (!udp_bits.udpb_rcvhdr) { 4342 mp->b_rptr = rptr + hdr_length; 4343 mp_len -= hdr_length; 4344 } 4345 4346 /* 4347 * This is the inbound data path. Packets are passed upstream as 4348 * T_UNITDATA_IND messages with full IP headers still attached. 4349 */ 4350 if (udp->udp_family == AF_INET) { 4351 sin_t *sin; 4352 4353 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4354 4355 /* 4356 * Normally only send up the source address. 4357 * If IP_RECVDSTADDR is set we include the destination IP 4358 * address as an option. With IP_RECVOPTS we include all 4359 * the IP options. 4360 */ 4361 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4362 if (udp_bits.udpb_recvdstaddr) { 4363 udi_size += sizeof (struct T_opthdr) + 4364 sizeof (struct in_addr); 4365 UDP_STAT(us, udp_in_recvdstaddr); 4366 } 4367 4368 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 4369 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4370 udi_size += sizeof (struct T_opthdr) + 4371 sizeof (struct in_pktinfo); 4372 UDP_STAT(us, udp_ip_rcvpktinfo); 4373 } 4374 4375 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 4376 udi_size += sizeof (struct T_opthdr) + opt_len; 4377 UDP_STAT(us, udp_in_recvopts); 4378 } 4379 4380 /* 4381 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4382 * space accordingly 4383 */ 4384 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4385 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4386 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4387 UDP_STAT(us, udp_in_recvif); 4388 } 4389 4390 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4391 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4392 udi_size += sizeof (struct T_opthdr) + 4393 sizeof (struct sockaddr_dl); 4394 UDP_STAT(us, udp_in_recvslla); 4395 } 4396 4397 if ((udp_bits.udpb_recvucred) && 4398 (cr = DB_CRED(mp)) != NULL) { 4399 udi_size += sizeof (struct T_opthdr) + ucredsize; 4400 cpid = DB_CPID(mp); 4401 UDP_STAT(us, udp_in_recvucred); 4402 } 4403 4404 /* 4405 * If SO_TIMESTAMP is set allocate the appropriate sized 4406 * buffer. Since gethrestime() expects a pointer aligned 4407 * argument, we allocate space necessary for extra 4408 * alignment (even though it might not be used). 4409 */ 4410 if (udp_bits.udpb_timestamp) { 4411 udi_size += sizeof (struct T_opthdr) + 4412 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4413 UDP_STAT(us, udp_in_timestamp); 4414 } 4415 4416 /* 4417 * If IP_RECVTTL is set allocate the appropriate sized buffer 4418 */ 4419 if (udp_bits.udpb_recvttl) { 4420 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4421 UDP_STAT(us, udp_in_recvttl); 4422 } 4423 4424 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4425 mp1 = allocb(udi_size, BPRI_MED); 4426 if (mp1 == NULL) { 4427 freemsg(mp); 4428 if (options_mp != NULL) 4429 freeb(options_mp); 4430 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4431 return; 4432 } 4433 mp1->b_cont = mp; 4434 mp = mp1; 4435 mp->b_datap->db_type = M_PROTO; 4436 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4437 mp->b_wptr = (uchar_t *)tudi + udi_size; 4438 tudi->PRIM_type = T_UNITDATA_IND; 4439 tudi->SRC_length = sizeof (sin_t); 4440 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4441 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4442 sizeof (sin_t); 4443 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4444 tudi->OPT_length = udi_size; 4445 sin = (sin_t *)&tudi[1]; 4446 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4447 sin->sin_port = udpha->uha_src_port; 4448 sin->sin_family = udp->udp_family; 4449 *(uint32_t *)&sin->sin_zero[0] = 0; 4450 *(uint32_t *)&sin->sin_zero[4] = 0; 4451 4452 /* 4453 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4454 * IP_RECVTTL has been set. 4455 */ 4456 if (udi_size != 0) { 4457 /* 4458 * Copy in destination address before options to avoid 4459 * any padding issues. 4460 */ 4461 char *dstopt; 4462 4463 dstopt = (char *)&sin[1]; 4464 if (udp_bits.udpb_recvdstaddr) { 4465 struct T_opthdr *toh; 4466 ipaddr_t *dstptr; 4467 4468 toh = (struct T_opthdr *)dstopt; 4469 toh->level = IPPROTO_IP; 4470 toh->name = IP_RECVDSTADDR; 4471 toh->len = sizeof (struct T_opthdr) + 4472 sizeof (ipaddr_t); 4473 toh->status = 0; 4474 dstopt += sizeof (struct T_opthdr); 4475 dstptr = (ipaddr_t *)dstopt; 4476 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4477 dstopt += sizeof (ipaddr_t); 4478 udi_size -= toh->len; 4479 } 4480 4481 if (udp_bits.udpb_recvopts && opt_len > 0) { 4482 struct T_opthdr *toh; 4483 4484 toh = (struct T_opthdr *)dstopt; 4485 toh->level = IPPROTO_IP; 4486 toh->name = IP_RECVOPTS; 4487 toh->len = sizeof (struct T_opthdr) + opt_len; 4488 toh->status = 0; 4489 dstopt += sizeof (struct T_opthdr); 4490 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4491 opt_len); 4492 dstopt += opt_len; 4493 udi_size -= toh->len; 4494 } 4495 4496 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4497 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4498 struct T_opthdr *toh; 4499 struct in_pktinfo *pktinfop; 4500 4501 toh = (struct T_opthdr *)dstopt; 4502 toh->level = IPPROTO_IP; 4503 toh->name = IP_PKTINFO; 4504 toh->len = sizeof (struct T_opthdr) + 4505 sizeof (*pktinfop); 4506 toh->status = 0; 4507 dstopt += sizeof (struct T_opthdr); 4508 pktinfop = (struct in_pktinfo *)dstopt; 4509 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4510 pktinfop->ipi_spec_dst = 4511 pinfo->ip_pkt_match_addr; 4512 pktinfop->ipi_addr.s_addr = 4513 ((ipha_t *)rptr)->ipha_dst; 4514 4515 dstopt += sizeof (struct in_pktinfo); 4516 udi_size -= toh->len; 4517 } 4518 4519 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4520 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4521 4522 struct T_opthdr *toh; 4523 struct sockaddr_dl *dstptr; 4524 4525 toh = (struct T_opthdr *)dstopt; 4526 toh->level = IPPROTO_IP; 4527 toh->name = IP_RECVSLLA; 4528 toh->len = sizeof (struct T_opthdr) + 4529 sizeof (struct sockaddr_dl); 4530 toh->status = 0; 4531 dstopt += sizeof (struct T_opthdr); 4532 dstptr = (struct sockaddr_dl *)dstopt; 4533 bcopy(&pinfo->ip_pkt_slla, dstptr, 4534 sizeof (struct sockaddr_dl)); 4535 dstopt += sizeof (struct sockaddr_dl); 4536 udi_size -= toh->len; 4537 } 4538 4539 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4540 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4541 4542 struct T_opthdr *toh; 4543 uint_t *dstptr; 4544 4545 toh = (struct T_opthdr *)dstopt; 4546 toh->level = IPPROTO_IP; 4547 toh->name = IP_RECVIF; 4548 toh->len = sizeof (struct T_opthdr) + 4549 sizeof (uint_t); 4550 toh->status = 0; 4551 dstopt += sizeof (struct T_opthdr); 4552 dstptr = (uint_t *)dstopt; 4553 *dstptr = pinfo->ip_pkt_ifindex; 4554 dstopt += sizeof (uint_t); 4555 udi_size -= toh->len; 4556 } 4557 4558 if (cr != NULL) { 4559 struct T_opthdr *toh; 4560 4561 toh = (struct T_opthdr *)dstopt; 4562 toh->level = SOL_SOCKET; 4563 toh->name = SCM_UCRED; 4564 toh->len = sizeof (struct T_opthdr) + ucredsize; 4565 toh->status = 0; 4566 dstopt += sizeof (struct T_opthdr); 4567 (void) cred2ucred(cr, cpid, dstopt, rcr); 4568 dstopt += ucredsize; 4569 udi_size -= toh->len; 4570 } 4571 4572 if (udp_bits.udpb_timestamp) { 4573 struct T_opthdr *toh; 4574 4575 toh = (struct T_opthdr *)dstopt; 4576 toh->level = SOL_SOCKET; 4577 toh->name = SCM_TIMESTAMP; 4578 toh->len = sizeof (struct T_opthdr) + 4579 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4580 toh->status = 0; 4581 dstopt += sizeof (struct T_opthdr); 4582 /* Align for gethrestime() */ 4583 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4584 sizeof (intptr_t)); 4585 gethrestime((timestruc_t *)dstopt); 4586 dstopt = (char *)toh + toh->len; 4587 udi_size -= toh->len; 4588 } 4589 4590 /* 4591 * CAUTION: 4592 * Due to aligment issues 4593 * Processing of IP_RECVTTL option 4594 * should always be the last. Adding 4595 * any option processing after this will 4596 * cause alignment panic. 4597 */ 4598 if (udp_bits.udpb_recvttl) { 4599 struct T_opthdr *toh; 4600 uint8_t *dstptr; 4601 4602 toh = (struct T_opthdr *)dstopt; 4603 toh->level = IPPROTO_IP; 4604 toh->name = IP_RECVTTL; 4605 toh->len = sizeof (struct T_opthdr) + 4606 sizeof (uint8_t); 4607 toh->status = 0; 4608 dstopt += sizeof (struct T_opthdr); 4609 dstptr = (uint8_t *)dstopt; 4610 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4611 dstopt += sizeof (uint8_t); 4612 udi_size -= toh->len; 4613 } 4614 4615 /* Consumed all of allocated space */ 4616 ASSERT(udi_size == 0); 4617 } 4618 } else { 4619 sin6_t *sin6; 4620 4621 /* 4622 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4623 * 4624 * Normally we only send up the address. If receiving of any 4625 * optional receive side information is enabled, we also send 4626 * that up as options. 4627 */ 4628 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4629 4630 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4631 IPPF_RTHDR|IPPF_IFINDEX)) { 4632 if ((udp_bits.udpb_ipv6_recvhopopts) && 4633 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4634 size_t hlen; 4635 4636 UDP_STAT(us, udp_in_recvhopopts); 4637 hlen = copy_hop_opts(&ipp, NULL); 4638 if (hlen == 0) 4639 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4640 udi_size += hlen; 4641 } 4642 if (((udp_bits.udpb_ipv6_recvdstopts) || 4643 udp_bits.udpb_old_ipv6_recvdstopts) && 4644 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4645 udi_size += sizeof (struct T_opthdr) + 4646 ipp.ipp_dstoptslen; 4647 UDP_STAT(us, udp_in_recvdstopts); 4648 } 4649 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4650 udp_bits.udpb_ipv6_recvrthdr && 4651 (ipp.ipp_fields & IPPF_RTHDR)) || 4652 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4653 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4654 udi_size += sizeof (struct T_opthdr) + 4655 ipp.ipp_rtdstoptslen; 4656 UDP_STAT(us, udp_in_recvrtdstopts); 4657 } 4658 if ((udp_bits.udpb_ipv6_recvrthdr) && 4659 (ipp.ipp_fields & IPPF_RTHDR)) { 4660 udi_size += sizeof (struct T_opthdr) + 4661 ipp.ipp_rthdrlen; 4662 UDP_STAT(us, udp_in_recvrthdr); 4663 } 4664 if ((udp_bits.udpb_ip_recvpktinfo) && 4665 (ipp.ipp_fields & IPPF_IFINDEX)) { 4666 udi_size += sizeof (struct T_opthdr) + 4667 sizeof (struct in6_pktinfo); 4668 UDP_STAT(us, udp_in_recvpktinfo); 4669 } 4670 4671 } 4672 if ((udp_bits.udpb_recvucred) && 4673 (cr = DB_CRED(mp)) != NULL) { 4674 udi_size += sizeof (struct T_opthdr) + ucredsize; 4675 cpid = DB_CPID(mp); 4676 UDP_STAT(us, udp_in_recvucred); 4677 } 4678 4679 /* 4680 * If SO_TIMESTAMP is set allocate the appropriate sized 4681 * buffer. Since gethrestime() expects a pointer aligned 4682 * argument, we allocate space necessary for extra 4683 * alignment (even though it might not be used). 4684 */ 4685 if (udp_bits.udpb_timestamp) { 4686 udi_size += sizeof (struct T_opthdr) + 4687 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4688 UDP_STAT(us, udp_in_timestamp); 4689 } 4690 4691 if (udp_bits.udpb_ipv6_recvhoplimit) { 4692 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4693 UDP_STAT(us, udp_in_recvhoplimit); 4694 } 4695 4696 if (udp_bits.udpb_ipv6_recvtclass) { 4697 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4698 UDP_STAT(us, udp_in_recvtclass); 4699 } 4700 4701 mp1 = allocb(udi_size, BPRI_MED); 4702 if (mp1 == NULL) { 4703 freemsg(mp); 4704 if (options_mp != NULL) 4705 freeb(options_mp); 4706 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4707 return; 4708 } 4709 mp1->b_cont = mp; 4710 mp = mp1; 4711 mp->b_datap->db_type = M_PROTO; 4712 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4713 mp->b_wptr = (uchar_t *)tudi + udi_size; 4714 tudi->PRIM_type = T_UNITDATA_IND; 4715 tudi->SRC_length = sizeof (sin6_t); 4716 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4717 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4718 sizeof (sin6_t); 4719 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4720 tudi->OPT_length = udi_size; 4721 sin6 = (sin6_t *)&tudi[1]; 4722 if (ipversion == IPV4_VERSION) { 4723 in6_addr_t v6dst; 4724 4725 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4726 &sin6->sin6_addr); 4727 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4728 &v6dst); 4729 sin6->sin6_flowinfo = 0; 4730 sin6->sin6_scope_id = 0; 4731 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4732 connp->conn_zoneid, us->us_netstack); 4733 } else { 4734 sin6->sin6_addr = ip6h->ip6_src; 4735 /* No sin6_flowinfo per API */ 4736 sin6->sin6_flowinfo = 0; 4737 /* For link-scope source pass up scope id */ 4738 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4739 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4740 sin6->sin6_scope_id = ipp.ipp_ifindex; 4741 else 4742 sin6->sin6_scope_id = 0; 4743 sin6->__sin6_src_id = ip_srcid_find_addr( 4744 &ip6h->ip6_dst, connp->conn_zoneid, 4745 us->us_netstack); 4746 } 4747 sin6->sin6_port = udpha->uha_src_port; 4748 sin6->sin6_family = udp->udp_family; 4749 4750 if (udi_size != 0) { 4751 uchar_t *dstopt; 4752 4753 dstopt = (uchar_t *)&sin6[1]; 4754 if ((udp_bits.udpb_ip_recvpktinfo) && 4755 (ipp.ipp_fields & IPPF_IFINDEX)) { 4756 struct T_opthdr *toh; 4757 struct in6_pktinfo *pkti; 4758 4759 toh = (struct T_opthdr *)dstopt; 4760 toh->level = IPPROTO_IPV6; 4761 toh->name = IPV6_PKTINFO; 4762 toh->len = sizeof (struct T_opthdr) + 4763 sizeof (*pkti); 4764 toh->status = 0; 4765 dstopt += sizeof (struct T_opthdr); 4766 pkti = (struct in6_pktinfo *)dstopt; 4767 if (ipversion == IPV6_VERSION) 4768 pkti->ipi6_addr = ip6h->ip6_dst; 4769 else 4770 IN6_IPADDR_TO_V4MAPPED( 4771 ((ipha_t *)rptr)->ipha_dst, 4772 &pkti->ipi6_addr); 4773 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4774 dstopt += sizeof (*pkti); 4775 udi_size -= toh->len; 4776 } 4777 if (udp_bits.udpb_ipv6_recvhoplimit) { 4778 struct T_opthdr *toh; 4779 4780 toh = (struct T_opthdr *)dstopt; 4781 toh->level = IPPROTO_IPV6; 4782 toh->name = IPV6_HOPLIMIT; 4783 toh->len = sizeof (struct T_opthdr) + 4784 sizeof (uint_t); 4785 toh->status = 0; 4786 dstopt += sizeof (struct T_opthdr); 4787 if (ipversion == IPV6_VERSION) 4788 *(uint_t *)dstopt = ip6h->ip6_hops; 4789 else 4790 *(uint_t *)dstopt = 4791 ((ipha_t *)rptr)->ipha_ttl; 4792 dstopt += sizeof (uint_t); 4793 udi_size -= toh->len; 4794 } 4795 if (udp_bits.udpb_ipv6_recvtclass) { 4796 struct T_opthdr *toh; 4797 4798 toh = (struct T_opthdr *)dstopt; 4799 toh->level = IPPROTO_IPV6; 4800 toh->name = IPV6_TCLASS; 4801 toh->len = sizeof (struct T_opthdr) + 4802 sizeof (uint_t); 4803 toh->status = 0; 4804 dstopt += sizeof (struct T_opthdr); 4805 if (ipversion == IPV6_VERSION) { 4806 *(uint_t *)dstopt = 4807 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4808 } else { 4809 ipha_t *ipha = (ipha_t *)rptr; 4810 *(uint_t *)dstopt = 4811 ipha->ipha_type_of_service; 4812 } 4813 dstopt += sizeof (uint_t); 4814 udi_size -= toh->len; 4815 } 4816 if ((udp_bits.udpb_ipv6_recvhopopts) && 4817 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4818 size_t hlen; 4819 4820 hlen = copy_hop_opts(&ipp, dstopt); 4821 dstopt += hlen; 4822 udi_size -= hlen; 4823 } 4824 if ((udp_bits.udpb_ipv6_recvdstopts) && 4825 (udp_bits.udpb_ipv6_recvrthdr) && 4826 (ipp.ipp_fields & IPPF_RTHDR) && 4827 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4828 struct T_opthdr *toh; 4829 4830 toh = (struct T_opthdr *)dstopt; 4831 toh->level = IPPROTO_IPV6; 4832 toh->name = IPV6_DSTOPTS; 4833 toh->len = sizeof (struct T_opthdr) + 4834 ipp.ipp_rtdstoptslen; 4835 toh->status = 0; 4836 dstopt += sizeof (struct T_opthdr); 4837 bcopy(ipp.ipp_rtdstopts, dstopt, 4838 ipp.ipp_rtdstoptslen); 4839 dstopt += ipp.ipp_rtdstoptslen; 4840 udi_size -= toh->len; 4841 } 4842 if ((udp_bits.udpb_ipv6_recvrthdr) && 4843 (ipp.ipp_fields & IPPF_RTHDR)) { 4844 struct T_opthdr *toh; 4845 4846 toh = (struct T_opthdr *)dstopt; 4847 toh->level = IPPROTO_IPV6; 4848 toh->name = IPV6_RTHDR; 4849 toh->len = sizeof (struct T_opthdr) + 4850 ipp.ipp_rthdrlen; 4851 toh->status = 0; 4852 dstopt += sizeof (struct T_opthdr); 4853 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4854 dstopt += ipp.ipp_rthdrlen; 4855 udi_size -= toh->len; 4856 } 4857 if ((udp_bits.udpb_ipv6_recvdstopts) && 4858 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4859 struct T_opthdr *toh; 4860 4861 toh = (struct T_opthdr *)dstopt; 4862 toh->level = IPPROTO_IPV6; 4863 toh->name = IPV6_DSTOPTS; 4864 toh->len = sizeof (struct T_opthdr) + 4865 ipp.ipp_dstoptslen; 4866 toh->status = 0; 4867 dstopt += sizeof (struct T_opthdr); 4868 bcopy(ipp.ipp_dstopts, dstopt, 4869 ipp.ipp_dstoptslen); 4870 dstopt += ipp.ipp_dstoptslen; 4871 udi_size -= toh->len; 4872 } 4873 4874 if (cr != NULL) { 4875 struct T_opthdr *toh; 4876 4877 toh = (struct T_opthdr *)dstopt; 4878 toh->level = SOL_SOCKET; 4879 toh->name = SCM_UCRED; 4880 toh->len = sizeof (struct T_opthdr) + ucredsize; 4881 toh->status = 0; 4882 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4883 dstopt += toh->len; 4884 udi_size -= toh->len; 4885 } 4886 if (udp_bits.udpb_timestamp) { 4887 struct T_opthdr *toh; 4888 4889 toh = (struct T_opthdr *)dstopt; 4890 toh->level = SOL_SOCKET; 4891 toh->name = SCM_TIMESTAMP; 4892 toh->len = sizeof (struct T_opthdr) + 4893 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4894 toh->status = 0; 4895 dstopt += sizeof (struct T_opthdr); 4896 /* Align for gethrestime() */ 4897 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4898 sizeof (intptr_t)); 4899 gethrestime((timestruc_t *)dstopt); 4900 dstopt = (uchar_t *)toh + toh->len; 4901 udi_size -= toh->len; 4902 } 4903 4904 /* Consumed all of allocated space */ 4905 ASSERT(udi_size == 0); 4906 } 4907 #undef sin6 4908 /* No IP_RECVDSTADDR for IPv6. */ 4909 } 4910 4911 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4912 if (options_mp != NULL) 4913 freeb(options_mp); 4914 4915 if (udp_bits.udpb_direct_sockfs) { 4916 /* 4917 * There is nothing above us except for the stream head; 4918 * use the read-side synchronous stream interface in 4919 * order to reduce the time spent in interrupt thread. 4920 */ 4921 ASSERT(udp->udp_issocket); 4922 udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); 4923 } else { 4924 /* 4925 * Use regular STREAMS interface to pass data upstream 4926 * if this is not a socket endpoint, or if we have 4927 * switched over to the slow mode due to sockmod being 4928 * popped or a module being pushed on top of us. 4929 */ 4930 putnext(connp->conn_rq, mp); 4931 } 4932 return; 4933 4934 tossit: 4935 freemsg(mp); 4936 if (options_mp != NULL) 4937 freeb(options_mp); 4938 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4939 } 4940 4941 /* 4942 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 4943 * immediately. 4944 */ 4945 static void 4946 udp_bind_result(conn_t *connp, mblk_t *mp) 4947 { 4948 struct T_error_ack *tea; 4949 4950 switch (mp->b_datap->db_type) { 4951 case M_PROTO: 4952 case M_PCPROTO: 4953 /* M_PROTO messages contain some type of TPI message. */ 4954 ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= 4955 (uintptr_t)INT_MAX); 4956 if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { 4957 freemsg(mp); 4958 return; 4959 } 4960 tea = (struct T_error_ack *)mp->b_rptr; 4961 4962 switch (tea->PRIM_type) { 4963 case T_ERROR_ACK: 4964 switch (tea->ERROR_prim) { 4965 case O_T_BIND_REQ: 4966 case T_BIND_REQ: 4967 udp_bind_error(connp, mp); 4968 return; 4969 default: 4970 break; 4971 } 4972 ASSERT(0); 4973 freemsg(mp); 4974 return; 4975 4976 case T_BIND_ACK: 4977 udp_bind_ack(connp, mp); 4978 return; 4979 4980 default: 4981 break; 4982 } 4983 freemsg(mp); 4984 return; 4985 default: 4986 /* FIXME: other cases? */ 4987 ASSERT(0); 4988 freemsg(mp); 4989 return; 4990 } 4991 } 4992 4993 /* 4994 * Process a T_BIND_ACK 4995 */ 4996 static void 4997 udp_bind_ack(conn_t *connp, mblk_t *mp) 4998 { 4999 udp_t *udp = connp->conn_udp; 5000 mblk_t *mp1; 5001 ire_t *ire; 5002 struct T_bind_ack *tba; 5003 uchar_t *addrp; 5004 ipa_conn_t *ac; 5005 ipa6_conn_t *ac6; 5006 udp_fanout_t *udpf; 5007 udp_stack_t *us = udp->udp_us; 5008 5009 ASSERT(udp->udp_pending_op != -1); 5010 rw_enter(&udp->udp_rwlock, RW_WRITER); 5011 /* 5012 * If a broadcast/multicast address was bound set 5013 * the source address to 0. 5014 * This ensures no datagrams with broadcast address 5015 * as source address are emitted (which would violate 5016 * RFC1122 - Hosts requirements) 5017 * 5018 * Note that when connecting the returned IRE is 5019 * for the destination address and we only perform 5020 * the broadcast check for the source address (it 5021 * is OK to connect to a broadcast/multicast address.) 5022 */ 5023 mp1 = mp->b_cont; 5024 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5025 ire = (ire_t *)mp1->b_rptr; 5026 5027 /* 5028 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5029 * local address. 5030 */ 5031 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5032 us->us_bind_fanout_size)]; 5033 if (ire->ire_type == IRE_BROADCAST && 5034 udp->udp_state != TS_DATA_XFER) { 5035 ASSERT(udp->udp_pending_op == T_BIND_REQ || 5036 udp->udp_pending_op == O_T_BIND_REQ); 5037 /* This was just a local bind to a broadcast addr */ 5038 mutex_enter(&udpf->uf_lock); 5039 V6_SET_ZERO(udp->udp_v6src); 5040 mutex_exit(&udpf->uf_lock); 5041 if (udp->udp_family == AF_INET6) 5042 (void) udp_build_hdrs(udp); 5043 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5044 /* 5045 * Local address not yet set - pick it from the 5046 * T_bind_ack 5047 */ 5048 tba = (struct T_bind_ack *)mp->b_rptr; 5049 addrp = &mp->b_rptr[tba->ADDR_offset]; 5050 switch (udp->udp_family) { 5051 case AF_INET: 5052 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5053 ac = (ipa_conn_t *)addrp; 5054 } else { 5055 ASSERT(tba->ADDR_length == 5056 sizeof (ipa_conn_x_t)); 5057 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5058 } 5059 mutex_enter(&udpf->uf_lock); 5060 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5061 &udp->udp_v6src); 5062 mutex_exit(&udpf->uf_lock); 5063 break; 5064 case AF_INET6: 5065 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5066 ac6 = (ipa6_conn_t *)addrp; 5067 } else { 5068 ASSERT(tba->ADDR_length == 5069 sizeof (ipa6_conn_x_t)); 5070 ac6 = &((ipa6_conn_x_t *) 5071 addrp)->ac6x_conn; 5072 } 5073 mutex_enter(&udpf->uf_lock); 5074 udp->udp_v6src = ac6->ac6_laddr; 5075 mutex_exit(&udpf->uf_lock); 5076 (void) udp_build_hdrs(udp); 5077 break; 5078 } 5079 } 5080 mp1 = mp1->b_cont; 5081 } 5082 udp->udp_pending_op = -1; 5083 rw_exit(&udp->udp_rwlock); 5084 /* 5085 * Look for one or more appended ACK message added by 5086 * udp_connect or udp_disconnect. 5087 * If none found just send up the T_BIND_ACK. 5088 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5089 * udp_disconnect has appended a T_OK_ACK. 5090 */ 5091 if (mp1 != NULL) { 5092 if (mp->b_cont == mp1) 5093 mp->b_cont = NULL; 5094 else { 5095 ASSERT(mp->b_cont->b_cont == mp1); 5096 mp->b_cont->b_cont = NULL; 5097 } 5098 freemsg(mp); 5099 mp = mp1; 5100 while (mp != NULL) { 5101 mp1 = mp->b_cont; 5102 mp->b_cont = NULL; 5103 putnext(connp->conn_rq, mp); 5104 mp = mp1; 5105 } 5106 return; 5107 } 5108 freemsg(mp->b_cont); 5109 mp->b_cont = NULL; 5110 putnext(connp->conn_rq, mp); 5111 } 5112 5113 static void 5114 udp_bind_error(conn_t *connp, mblk_t *mp) 5115 { 5116 udp_t *udp = connp->conn_udp; 5117 struct T_error_ack *tea; 5118 udp_fanout_t *udpf; 5119 udp_stack_t *us = udp->udp_us; 5120 5121 tea = (struct T_error_ack *)mp->b_rptr; 5122 5123 /* 5124 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5125 * clear out the associated port and source 5126 * address before passing the message 5127 * upstream. If this was caused by a T_CONN_REQ 5128 * revert back to bound state. 5129 */ 5130 5131 rw_enter(&udp->udp_rwlock, RW_WRITER); 5132 ASSERT(udp->udp_pending_op != -1); 5133 tea->ERROR_prim = udp->udp_pending_op; 5134 udp->udp_pending_op = -1; 5135 udpf = &us->us_bind_fanout[ 5136 UDP_BIND_HASH(udp->udp_port, 5137 us->us_bind_fanout_size)]; 5138 mutex_enter(&udpf->uf_lock); 5139 5140 switch (tea->ERROR_prim) { 5141 case T_CONN_REQ: 5142 ASSERT(udp->udp_state == TS_DATA_XFER); 5143 /* Connect failed */ 5144 /* Revert back to the bound source */ 5145 udp->udp_v6src = udp->udp_bound_v6src; 5146 udp->udp_state = TS_IDLE; 5147 mutex_exit(&udpf->uf_lock); 5148 if (udp->udp_family == AF_INET6) 5149 (void) udp_build_hdrs(udp); 5150 rw_exit(&udp->udp_rwlock); 5151 break; 5152 5153 case T_DISCON_REQ: 5154 case T_BIND_REQ: 5155 case O_T_BIND_REQ: 5156 V6_SET_ZERO(udp->udp_v6src); 5157 V6_SET_ZERO(udp->udp_bound_v6src); 5158 udp->udp_state = TS_UNBND; 5159 udp_bind_hash_remove(udp, B_TRUE); 5160 udp->udp_port = 0; 5161 mutex_exit(&udpf->uf_lock); 5162 if (udp->udp_family == AF_INET6) 5163 (void) udp_build_hdrs(udp); 5164 rw_exit(&udp->udp_rwlock); 5165 break; 5166 5167 default: 5168 mutex_exit(&udpf->uf_lock); 5169 rw_exit(&udp->udp_rwlock); 5170 (void) mi_strlog(connp->conn_rq, 1, 5171 SL_ERROR|SL_TRACE, 5172 "udp_input_other: bad ERROR_prim, " 5173 "len %d", tea->ERROR_prim); 5174 } 5175 putnext(connp->conn_rq, mp); 5176 } 5177 5178 /* 5179 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 5180 * information that can be changing beneath us. 5181 */ 5182 mblk_t * 5183 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5184 { 5185 mblk_t *mpdata; 5186 mblk_t *mp_conn_ctl; 5187 mblk_t *mp_attr_ctl; 5188 mblk_t *mp6_conn_ctl; 5189 mblk_t *mp6_attr_ctl; 5190 mblk_t *mp_conn_tail; 5191 mblk_t *mp_attr_tail; 5192 mblk_t *mp6_conn_tail; 5193 mblk_t *mp6_attr_tail; 5194 struct opthdr *optp; 5195 mib2_udpEntry_t ude; 5196 mib2_udp6Entry_t ude6; 5197 mib2_transportMLPEntry_t mlp; 5198 int state; 5199 zoneid_t zoneid; 5200 int i; 5201 connf_t *connfp; 5202 conn_t *connp = Q_TO_CONN(q); 5203 int v4_conn_idx; 5204 int v6_conn_idx; 5205 boolean_t needattr; 5206 udp_t *udp; 5207 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5208 udp_stack_t *us = connp->conn_netstack->netstack_udp; 5209 mblk_t *mp2ctl; 5210 5211 /* 5212 * make a copy of the original message 5213 */ 5214 mp2ctl = copymsg(mpctl); 5215 5216 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5217 if (mpctl == NULL || 5218 (mpdata = mpctl->b_cont) == NULL || 5219 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5220 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5221 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5222 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5223 freemsg(mp_conn_ctl); 5224 freemsg(mp_attr_ctl); 5225 freemsg(mp6_conn_ctl); 5226 freemsg(mpctl); 5227 freemsg(mp2ctl); 5228 return (0); 5229 } 5230 5231 zoneid = connp->conn_zoneid; 5232 5233 /* fixed length structure for IPv4 and IPv6 counters */ 5234 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5235 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5236 /* synchronize 64- and 32-bit counters */ 5237 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 5238 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5239 5240 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5241 optp->level = MIB2_UDP; 5242 optp->name = 0; 5243 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 5244 sizeof (us->us_udp_mib)); 5245 optp->len = msgdsize(mpdata); 5246 qreply(q, mpctl); 5247 5248 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5249 v4_conn_idx = v6_conn_idx = 0; 5250 5251 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5252 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5253 connp = NULL; 5254 5255 while ((connp = ipcl_get_next_conn(connfp, connp, 5256 IPCL_UDPCONN))) { 5257 udp = connp->conn_udp; 5258 if (zoneid != connp->conn_zoneid) 5259 continue; 5260 5261 /* 5262 * Note that the port numbers are sent in 5263 * host byte order 5264 */ 5265 5266 if (udp->udp_state == TS_UNBND) 5267 state = MIB2_UDP_unbound; 5268 else if (udp->udp_state == TS_IDLE) 5269 state = MIB2_UDP_idle; 5270 else if (udp->udp_state == TS_DATA_XFER) 5271 state = MIB2_UDP_connected; 5272 else 5273 state = MIB2_UDP_unknown; 5274 5275 needattr = B_FALSE; 5276 bzero(&mlp, sizeof (mlp)); 5277 if (connp->conn_mlp_type != mlptSingle) { 5278 if (connp->conn_mlp_type == mlptShared || 5279 connp->conn_mlp_type == mlptBoth) 5280 mlp.tme_flags |= MIB2_TMEF_SHARED; 5281 if (connp->conn_mlp_type == mlptPrivate || 5282 connp->conn_mlp_type == mlptBoth) 5283 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5284 needattr = B_TRUE; 5285 } 5286 5287 /* 5288 * Create an IPv4 table entry for IPv4 entries and also 5289 * any IPv6 entries which are bound to in6addr_any 5290 * (i.e. anything a IPv4 peer could connect/send to). 5291 */ 5292 if (udp->udp_ipversion == IPV4_VERSION || 5293 (udp->udp_state <= TS_IDLE && 5294 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5295 ude.udpEntryInfo.ue_state = state; 5296 /* 5297 * If in6addr_any this will set it to 5298 * INADDR_ANY 5299 */ 5300 ude.udpLocalAddress = 5301 V4_PART_OF_V6(udp->udp_v6src); 5302 ude.udpLocalPort = ntohs(udp->udp_port); 5303 if (udp->udp_state == TS_DATA_XFER) { 5304 /* 5305 * Can potentially get here for 5306 * v6 socket if another process 5307 * (say, ping) has just done a 5308 * sendto(), changing the state 5309 * from the TS_IDLE above to 5310 * TS_DATA_XFER by the time we hit 5311 * this part of the code. 5312 */ 5313 ude.udpEntryInfo.ue_RemoteAddress = 5314 V4_PART_OF_V6(udp->udp_v6dst); 5315 ude.udpEntryInfo.ue_RemotePort = 5316 ntohs(udp->udp_dstport); 5317 } else { 5318 ude.udpEntryInfo.ue_RemoteAddress = 0; 5319 ude.udpEntryInfo.ue_RemotePort = 0; 5320 } 5321 5322 /* 5323 * We make the assumption that all udp_t 5324 * structs will be created within an address 5325 * region no larger than 32-bits. 5326 */ 5327 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5328 ude.udpCreationProcess = 5329 (udp->udp_open_pid < 0) ? 5330 MIB2_UNKNOWN_PROCESS : 5331 udp->udp_open_pid; 5332 ude.udpCreationTime = udp->udp_open_time; 5333 5334 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5335 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5336 mlp.tme_connidx = v4_conn_idx++; 5337 if (needattr) 5338 (void) snmp_append_data2( 5339 mp_attr_ctl->b_cont, &mp_attr_tail, 5340 (char *)&mlp, sizeof (mlp)); 5341 } 5342 if (udp->udp_ipversion == IPV6_VERSION) { 5343 ude6.udp6EntryInfo.ue_state = state; 5344 ude6.udp6LocalAddress = udp->udp_v6src; 5345 ude6.udp6LocalPort = ntohs(udp->udp_port); 5346 ude6.udp6IfIndex = udp->udp_bound_if; 5347 if (udp->udp_state == TS_DATA_XFER) { 5348 ude6.udp6EntryInfo.ue_RemoteAddress = 5349 udp->udp_v6dst; 5350 ude6.udp6EntryInfo.ue_RemotePort = 5351 ntohs(udp->udp_dstport); 5352 } else { 5353 ude6.udp6EntryInfo.ue_RemoteAddress = 5354 sin6_null.sin6_addr; 5355 ude6.udp6EntryInfo.ue_RemotePort = 0; 5356 } 5357 /* 5358 * We make the assumption that all udp_t 5359 * structs will be created within an address 5360 * region no larger than 32-bits. 5361 */ 5362 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5363 ude6.udp6CreationProcess = 5364 (udp->udp_open_pid < 0) ? 5365 MIB2_UNKNOWN_PROCESS : 5366 udp->udp_open_pid; 5367 ude6.udp6CreationTime = udp->udp_open_time; 5368 5369 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5370 &mp6_conn_tail, (char *)&ude6, 5371 sizeof (ude6)); 5372 mlp.tme_connidx = v6_conn_idx++; 5373 if (needattr) 5374 (void) snmp_append_data2( 5375 mp6_attr_ctl->b_cont, 5376 &mp6_attr_tail, (char *)&mlp, 5377 sizeof (mlp)); 5378 } 5379 } 5380 } 5381 5382 /* IPv4 UDP endpoints */ 5383 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5384 sizeof (struct T_optmgmt_ack)]; 5385 optp->level = MIB2_UDP; 5386 optp->name = MIB2_UDP_ENTRY; 5387 optp->len = msgdsize(mp_conn_ctl->b_cont); 5388 qreply(q, mp_conn_ctl); 5389 5390 /* table of MLP attributes... */ 5391 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5392 sizeof (struct T_optmgmt_ack)]; 5393 optp->level = MIB2_UDP; 5394 optp->name = EXPER_XPORT_MLP; 5395 optp->len = msgdsize(mp_attr_ctl->b_cont); 5396 if (optp->len == 0) 5397 freemsg(mp_attr_ctl); 5398 else 5399 qreply(q, mp_attr_ctl); 5400 5401 /* IPv6 UDP endpoints */ 5402 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5403 sizeof (struct T_optmgmt_ack)]; 5404 optp->level = MIB2_UDP6; 5405 optp->name = MIB2_UDP6_ENTRY; 5406 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5407 qreply(q, mp6_conn_ctl); 5408 5409 /* table of MLP attributes... */ 5410 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5411 sizeof (struct T_optmgmt_ack)]; 5412 optp->level = MIB2_UDP6; 5413 optp->name = EXPER_XPORT_MLP; 5414 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5415 if (optp->len == 0) 5416 freemsg(mp6_attr_ctl); 5417 else 5418 qreply(q, mp6_attr_ctl); 5419 5420 return (mp2ctl); 5421 } 5422 5423 /* 5424 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5425 * NOTE: Per MIB-II, UDP has no writable data. 5426 * TODO: If this ever actually tries to set anything, it needs to be 5427 * to do the appropriate locking. 5428 */ 5429 /* ARGSUSED */ 5430 int 5431 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5432 uchar_t *ptr, int len) 5433 { 5434 switch (level) { 5435 case MIB2_UDP: 5436 return (0); 5437 default: 5438 return (1); 5439 } 5440 } 5441 5442 static void 5443 udp_report_item(mblk_t *mp, udp_t *udp) 5444 { 5445 char *state; 5446 char addrbuf1[INET6_ADDRSTRLEN]; 5447 char addrbuf2[INET6_ADDRSTRLEN]; 5448 uint_t print_len, buf_len; 5449 5450 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5451 ASSERT(buf_len >= 0); 5452 if (buf_len == 0) 5453 return; 5454 5455 if (udp->udp_state == TS_UNBND) 5456 state = "UNBOUND"; 5457 else if (udp->udp_state == TS_IDLE) 5458 state = "IDLE"; 5459 else if (udp->udp_state == TS_DATA_XFER) 5460 state = "CONNECTED"; 5461 else 5462 state = "UnkState"; 5463 print_len = snprintf((char *)mp->b_wptr, buf_len, 5464 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5465 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5466 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 5467 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 5468 ntohs(udp->udp_dstport), state); 5469 if (print_len < buf_len) { 5470 mp->b_wptr += print_len; 5471 } else { 5472 mp->b_wptr += buf_len; 5473 } 5474 } 5475 5476 /* Report for ndd "udp_status" */ 5477 /* ARGSUSED */ 5478 static int 5479 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5480 { 5481 zoneid_t zoneid; 5482 connf_t *connfp; 5483 conn_t *connp = Q_TO_CONN(q); 5484 udp_t *udp = connp->conn_udp; 5485 int i; 5486 udp_stack_t *us = udp->udp_us; 5487 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5488 5489 /* 5490 * Because of the ndd constraint, at most we can have 64K buffer 5491 * to put in all UDP info. So to be more efficient, just 5492 * allocate a 64K buffer here, assuming we need that large buffer. 5493 * This may be a problem as any user can read udp_status. Therefore 5494 * we limit the rate of doing this using us_ndd_get_info_interval. 5495 * This should be OK as normal users should not do this too often. 5496 */ 5497 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 5498 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 5499 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 5500 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5501 return (0); 5502 } 5503 } 5504 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5505 /* The following may work even if we cannot get a large buf. */ 5506 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5507 return (0); 5508 } 5509 (void) mi_mpprintf(mp, 5510 "UDP " MI_COL_HDRPAD_STR 5511 /* 12345678[89ABCDEF] */ 5512 " zone lport src addr dest addr port state"); 5513 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5514 5515 zoneid = connp->conn_zoneid; 5516 5517 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5518 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5519 connp = NULL; 5520 5521 while ((connp = ipcl_get_next_conn(connfp, connp, 5522 IPCL_UDPCONN))) { 5523 udp = connp->conn_udp; 5524 if (zoneid != GLOBAL_ZONEID && 5525 zoneid != connp->conn_zoneid) 5526 continue; 5527 5528 udp_report_item(mp->b_cont, udp); 5529 } 5530 } 5531 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 5532 return (0); 5533 } 5534 5535 /* 5536 * This routine creates a T_UDERROR_IND message and passes it upstream. 5537 * The address and options are copied from the T_UNITDATA_REQ message 5538 * passed in mp. This message is freed. 5539 */ 5540 static void 5541 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5542 t_scalar_t err) 5543 { 5544 struct T_unitdata_req *tudr; 5545 mblk_t *mp1; 5546 uchar_t *optaddr; 5547 t_scalar_t optlen; 5548 5549 if (DB_TYPE(mp) == M_DATA) { 5550 ASSERT(destaddr != NULL && destlen != 0); 5551 optaddr = NULL; 5552 optlen = 0; 5553 } else { 5554 if ((mp->b_wptr < mp->b_rptr) || 5555 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5556 goto done; 5557 } 5558 tudr = (struct T_unitdata_req *)mp->b_rptr; 5559 destaddr = mp->b_rptr + tudr->DEST_offset; 5560 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5561 destaddr + tudr->DEST_length < mp->b_rptr || 5562 destaddr + tudr->DEST_length > mp->b_wptr) { 5563 goto done; 5564 } 5565 optaddr = mp->b_rptr + tudr->OPT_offset; 5566 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5567 optaddr + tudr->OPT_length < mp->b_rptr || 5568 optaddr + tudr->OPT_length > mp->b_wptr) { 5569 goto done; 5570 } 5571 destlen = tudr->DEST_length; 5572 optlen = tudr->OPT_length; 5573 } 5574 5575 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5576 (char *)optaddr, optlen, err); 5577 if (mp1 != NULL) 5578 qreply(q, mp1); 5579 5580 done: 5581 freemsg(mp); 5582 } 5583 5584 /* 5585 * This routine removes a port number association from a stream. It 5586 * is called by udp_wput to handle T_UNBIND_REQ messages. 5587 */ 5588 static void 5589 udp_unbind(queue_t *q, mblk_t *mp) 5590 { 5591 udp_t *udp = Q_TO_UDP(q); 5592 udp_fanout_t *udpf; 5593 udp_stack_t *us = udp->udp_us; 5594 5595 if (cl_inet_unbind != NULL) { 5596 /* 5597 * Running in cluster mode - register unbind information 5598 */ 5599 if (udp->udp_ipversion == IPV4_VERSION) { 5600 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5601 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5602 (in_port_t)udp->udp_port); 5603 } else { 5604 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5605 (uint8_t *)&(udp->udp_v6src), 5606 (in_port_t)udp->udp_port); 5607 } 5608 } 5609 5610 rw_enter(&udp->udp_rwlock, RW_WRITER); 5611 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 5612 rw_exit(&udp->udp_rwlock); 5613 udp_err_ack(q, mp, TOUTSTATE, 0); 5614 return; 5615 } 5616 udp->udp_pending_op = T_UNBIND_REQ; 5617 rw_exit(&udp->udp_rwlock); 5618 5619 /* 5620 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5621 * and therefore ip_unbind must never return NULL. 5622 */ 5623 mp = ip_unbind(q, mp); 5624 ASSERT(mp != NULL); 5625 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 5626 5627 /* 5628 * Once we're unbound from IP, the pending operation may be cleared 5629 * here. 5630 */ 5631 rw_enter(&udp->udp_rwlock, RW_WRITER); 5632 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5633 us->us_bind_fanout_size)]; 5634 mutex_enter(&udpf->uf_lock); 5635 udp_bind_hash_remove(udp, B_TRUE); 5636 V6_SET_ZERO(udp->udp_v6src); 5637 V6_SET_ZERO(udp->udp_bound_v6src); 5638 udp->udp_port = 0; 5639 mutex_exit(&udpf->uf_lock); 5640 5641 udp->udp_pending_op = -1; 5642 udp->udp_state = TS_UNBND; 5643 if (udp->udp_family == AF_INET6) 5644 (void) udp_build_hdrs(udp); 5645 rw_exit(&udp->udp_rwlock); 5646 5647 qreply(q, mp); 5648 } 5649 5650 /* 5651 * Don't let port fall into the privileged range. 5652 * Since the extra privileged ports can be arbitrary we also 5653 * ensure that we exclude those from consideration. 5654 * us->us_epriv_ports is not sorted thus we loop over it until 5655 * there are no changes. 5656 */ 5657 static in_port_t 5658 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 5659 { 5660 int i; 5661 in_port_t nextport; 5662 boolean_t restart = B_FALSE; 5663 udp_stack_t *us = udp->udp_us; 5664 5665 if (random && udp_random_anon_port != 0) { 5666 (void) random_get_pseudo_bytes((uint8_t *)&port, 5667 sizeof (in_port_t)); 5668 /* 5669 * Unless changed by a sys admin, the smallest anon port 5670 * is 32768 and the largest anon port is 65535. It is 5671 * very likely (50%) for the random port to be smaller 5672 * than the smallest anon port. When that happens, 5673 * add port % (anon port range) to the smallest anon 5674 * port to get the random port. It should fall into the 5675 * valid anon port range. 5676 */ 5677 if (port < us->us_smallest_anon_port) { 5678 port = us->us_smallest_anon_port + 5679 port % (us->us_largest_anon_port - 5680 us->us_smallest_anon_port); 5681 } 5682 } 5683 5684 retry: 5685 if (port < us->us_smallest_anon_port) 5686 port = us->us_smallest_anon_port; 5687 5688 if (port > us->us_largest_anon_port) { 5689 port = us->us_smallest_anon_port; 5690 if (restart) 5691 return (0); 5692 restart = B_TRUE; 5693 } 5694 5695 if (port < us->us_smallest_nonpriv_port) 5696 port = us->us_smallest_nonpriv_port; 5697 5698 for (i = 0; i < us->us_num_epriv_ports; i++) { 5699 if (port == us->us_epriv_ports[i]) { 5700 port++; 5701 /* 5702 * Make sure that the port is in the 5703 * valid range. 5704 */ 5705 goto retry; 5706 } 5707 } 5708 5709 if (is_system_labeled() && 5710 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5711 port, IPPROTO_UDP, B_TRUE)) != 0) { 5712 port = nextport; 5713 goto retry; 5714 } 5715 5716 return (port); 5717 } 5718 5719 static int 5720 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 5721 { 5722 int err; 5723 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5724 udp_t *udp = Q_TO_UDP(wq); 5725 udp_stack_t *us = udp->udp_us; 5726 5727 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 5728 opt_storage, udp->udp_connp->conn_mac_exempt, 5729 us->us_netstack->netstack_ip); 5730 if (err == 0) { 5731 err = tsol_update_options(&udp->udp_ip_snd_options, 5732 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5733 opt_storage); 5734 } 5735 if (err != 0) { 5736 DTRACE_PROBE4( 5737 tx__ip__log__info__updatelabel__udp, 5738 char *, "queue(1) failed to update options(2) on mp(3)", 5739 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5740 } else { 5741 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 5742 } 5743 return (err); 5744 } 5745 5746 static mblk_t * 5747 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5748 uint_t srcid, int *error, boolean_t insert_spi) 5749 { 5750 udp_t *udp = connp->conn_udp; 5751 queue_t *q = connp->conn_wq; 5752 mblk_t *mp1 = mp; 5753 mblk_t *mp2; 5754 ipha_t *ipha; 5755 int ip_hdr_length; 5756 uint32_t ip_len; 5757 udpha_t *udpha; 5758 boolean_t lock_held = B_FALSE; 5759 in_port_t uha_src_port; 5760 udpattrs_t attrs; 5761 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5762 uint32_t ip_snd_opt_len = 0; 5763 ip4_pkt_t pktinfo; 5764 ip4_pkt_t *pktinfop = &pktinfo; 5765 ip_opt_info_t optinfo; 5766 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5767 udp_stack_t *us = udp->udp_us; 5768 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5769 5770 5771 *error = 0; 5772 pktinfop->ip4_ill_index = 0; 5773 pktinfop->ip4_addr = INADDR_ANY; 5774 optinfo.ip_opt_flags = 0; 5775 optinfo.ip_opt_ill_index = 0; 5776 5777 if (v4dst == INADDR_ANY) 5778 v4dst = htonl(INADDR_LOOPBACK); 5779 5780 /* 5781 * If options passed in, feed it for verification and handling 5782 */ 5783 attrs.udpattr_credset = B_FALSE; 5784 if (DB_TYPE(mp) != M_DATA) { 5785 mp1 = mp->b_cont; 5786 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 5787 attrs.udpattr_ipp4 = pktinfop; 5788 attrs.udpattr_mb = mp; 5789 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 5790 goto done; 5791 /* 5792 * Note: success in processing options. 5793 * mp option buffer represented by 5794 * OPT_length/offset now potentially modified 5795 * and contain option setting results 5796 */ 5797 ASSERT(*error == 0); 5798 } 5799 } 5800 5801 /* mp1 points to the M_DATA mblk carrying the packet */ 5802 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5803 5804 rw_enter(&udp->udp_rwlock, RW_READER); 5805 lock_held = B_TRUE; 5806 /* 5807 * Check if our saved options are valid; update if not. 5808 * TSOL Note: Since we are not in WRITER mode, UDP packets 5809 * to different destination may require different labels, 5810 * or worse, UDP packets to same IP address may require 5811 * different labels due to use of shared all-zones address. 5812 * We use conn_lock to ensure that lastdst, ip_snd_options, 5813 * and ip_snd_options_len are consistent for the current 5814 * destination and are updated atomically. 5815 */ 5816 mutex_enter(&connp->conn_lock); 5817 if (is_system_labeled()) { 5818 /* Using UDP MLP requires SCM_UCRED from user */ 5819 if (connp->conn_mlp_type != mlptSingle && 5820 !attrs.udpattr_credset) { 5821 mutex_exit(&connp->conn_lock); 5822 DTRACE_PROBE4( 5823 tx__ip__log__info__output__udp, 5824 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5825 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5826 *error = ECONNREFUSED; 5827 goto done; 5828 } 5829 /* 5830 * update label option for this UDP socket if 5831 * - the destination has changed, or 5832 * - the UDP socket is MLP 5833 */ 5834 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5835 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5836 connp->conn_mlp_type != mlptSingle) && 5837 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5838 mutex_exit(&connp->conn_lock); 5839 goto done; 5840 } 5841 } 5842 if (udp->udp_ip_snd_options_len > 0) { 5843 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5844 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5845 } 5846 mutex_exit(&connp->conn_lock); 5847 5848 /* Add an IP header */ 5849 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5850 (insert_spi ? sizeof (uint32_t) : 0); 5851 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5852 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5853 !OK_32PTR(ipha)) { 5854 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5855 if (mp2 == NULL) { 5856 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5857 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5858 *error = ENOMEM; 5859 goto done; 5860 } 5861 mp2->b_wptr = DB_LIM(mp2); 5862 mp2->b_cont = mp1; 5863 mp1 = mp2; 5864 if (DB_TYPE(mp) != M_DATA) 5865 mp->b_cont = mp1; 5866 else 5867 mp = mp1; 5868 5869 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5870 } 5871 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5872 #ifdef _BIG_ENDIAN 5873 /* Set version, header length, and tos */ 5874 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5875 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5876 udp->udp_type_of_service); 5877 /* Set ttl and protocol */ 5878 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5879 #else 5880 /* Set version, header length, and tos */ 5881 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5882 ((udp->udp_type_of_service << 8) | 5883 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5884 /* Set ttl and protocol */ 5885 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5886 #endif 5887 if (pktinfop->ip4_addr != INADDR_ANY) { 5888 ipha->ipha_src = pktinfop->ip4_addr; 5889 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5890 } else { 5891 /* 5892 * Copy our address into the packet. If this is zero, 5893 * first look at __sin6_src_id for a hint. If we leave the 5894 * source as INADDR_ANY then ip will fill in the real source 5895 * address. 5896 */ 5897 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5898 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5899 in6_addr_t v6src; 5900 5901 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5902 us->us_netstack); 5903 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5904 } 5905 } 5906 uha_src_port = udp->udp_port; 5907 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5908 rw_exit(&udp->udp_rwlock); 5909 lock_held = B_FALSE; 5910 } 5911 5912 if (pktinfop->ip4_ill_index != 0) { 5913 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5914 } 5915 5916 ipha->ipha_fragment_offset_and_flags = 0; 5917 ipha->ipha_ident = 0; 5918 5919 mp1->b_rptr = (uchar_t *)ipha; 5920 5921 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5922 (uintptr_t)UINT_MAX); 5923 5924 /* Determine length of packet */ 5925 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5926 if ((mp2 = mp1->b_cont) != NULL) { 5927 do { 5928 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5929 ip_len += (uint32_t)MBLKL(mp2); 5930 } while ((mp2 = mp2->b_cont) != NULL); 5931 } 5932 /* 5933 * If the size of the packet is greater than the maximum allowed by 5934 * ip, return an error. Passing this down could cause panics because 5935 * the size will have wrapped and be inconsistent with the msg size. 5936 */ 5937 if (ip_len > IP_MAXPACKET) { 5938 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5939 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5940 *error = EMSGSIZE; 5941 goto done; 5942 } 5943 ipha->ipha_length = htons((uint16_t)ip_len); 5944 ip_len -= ip_hdr_length; 5945 ip_len = htons((uint16_t)ip_len); 5946 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5947 5948 /* Insert all-0s SPI now. */ 5949 if (insert_spi) 5950 *((uint32_t *)(udpha + 1)) = 0; 5951 5952 /* 5953 * Copy in the destination address 5954 */ 5955 ipha->ipha_dst = v4dst; 5956 5957 /* 5958 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5959 */ 5960 if (CLASSD(v4dst)) 5961 ipha->ipha_ttl = udp->udp_multicast_ttl; 5962 5963 udpha->uha_dst_port = port; 5964 udpha->uha_src_port = uha_src_port; 5965 5966 if (ip_snd_opt_len > 0) { 5967 uint32_t cksum; 5968 5969 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5970 lock_held = B_FALSE; 5971 rw_exit(&udp->udp_rwlock); 5972 /* 5973 * Massage source route putting first source route in ipha_dst. 5974 * Ignore the destination in T_unitdata_req. 5975 * Create a checksum adjustment for a source route, if any. 5976 */ 5977 cksum = ip_massage_options(ipha, us->us_netstack); 5978 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5979 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5980 (ipha->ipha_dst & 0xFFFF); 5981 if ((int)cksum < 0) 5982 cksum--; 5983 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5984 /* 5985 * IP does the checksum if uha_checksum is non-zero, 5986 * We make it easy for IP to include our pseudo header 5987 * by putting our length in uha_checksum. 5988 */ 5989 cksum += ip_len; 5990 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5991 /* There might be a carry. */ 5992 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5993 #ifdef _LITTLE_ENDIAN 5994 if (us->us_do_checksum) 5995 ip_len = (cksum << 16) | ip_len; 5996 #else 5997 if (us->us_do_checksum) 5998 ip_len = (ip_len << 16) | cksum; 5999 else 6000 ip_len <<= 16; 6001 #endif 6002 } else { 6003 /* 6004 * IP does the checksum if uha_checksum is non-zero, 6005 * We make it easy for IP to include our pseudo header 6006 * by putting our length in uha_checksum. 6007 */ 6008 if (us->us_do_checksum) 6009 ip_len |= (ip_len << 16); 6010 #ifndef _LITTLE_ENDIAN 6011 else 6012 ip_len <<= 16; 6013 #endif 6014 } 6015 ASSERT(!lock_held); 6016 /* Set UDP length and checksum */ 6017 *((uint32_t *)&udpha->uha_length) = ip_len; 6018 if (DB_CRED(mp) != NULL) 6019 mblk_setcred(mp1, DB_CRED(mp)); 6020 6021 if (DB_TYPE(mp) != M_DATA) { 6022 ASSERT(mp != mp1); 6023 freeb(mp); 6024 } 6025 6026 /* mp has been consumed and we'll return success */ 6027 ASSERT(*error == 0); 6028 mp = NULL; 6029 6030 /* We're done. Pass the packet to ip. */ 6031 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6032 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6033 "udp_wput_end: q %p (%S)", q, "end"); 6034 6035 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6036 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6037 connp->conn_dontroute || 6038 connp->conn_nofailover_ill != NULL || 6039 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6040 optinfo.ip_opt_ill_index != 0 || 6041 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6042 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6043 ipst->ips_ip_g_mrouter != NULL) { 6044 UDP_STAT(us, udp_ip_send); 6045 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6046 &optinfo); 6047 } else { 6048 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6049 } 6050 6051 done: 6052 if (lock_held) 6053 rw_exit(&udp->udp_rwlock); 6054 if (*error != 0) { 6055 ASSERT(mp != NULL); 6056 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6057 } 6058 return (mp); 6059 } 6060 6061 static void 6062 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6063 { 6064 conn_t *connp = udp->udp_connp; 6065 ipaddr_t src, dst; 6066 ire_t *ire; 6067 ipif_t *ipif = NULL; 6068 mblk_t *ire_fp_mp; 6069 boolean_t retry_caching; 6070 udp_stack_t *us = udp->udp_us; 6071 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6072 6073 dst = ipha->ipha_dst; 6074 src = ipha->ipha_src; 6075 ASSERT(ipha->ipha_ident == 0); 6076 6077 if (CLASSD(dst)) { 6078 int err; 6079 6080 ipif = conn_get_held_ipif(connp, 6081 &connp->conn_multicast_ipif, &err); 6082 6083 if (ipif == NULL || ipif->ipif_isv6 || 6084 (ipif->ipif_ill->ill_phyint->phyint_flags & 6085 PHYI_LOOPBACK)) { 6086 if (ipif != NULL) 6087 ipif_refrele(ipif); 6088 UDP_STAT(us, udp_ip_send); 6089 ip_output(connp, mp, q, IP_WPUT); 6090 return; 6091 } 6092 } 6093 6094 retry_caching = B_FALSE; 6095 mutex_enter(&connp->conn_lock); 6096 ire = connp->conn_ire_cache; 6097 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6098 6099 if (ire == NULL || ire->ire_addr != dst || 6100 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6101 retry_caching = B_TRUE; 6102 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6103 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6104 6105 ASSERT(ipif != NULL); 6106 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6107 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6108 retry_caching = B_TRUE; 6109 } 6110 6111 if (!retry_caching) { 6112 ASSERT(ire != NULL); 6113 IRE_REFHOLD(ire); 6114 mutex_exit(&connp->conn_lock); 6115 } else { 6116 boolean_t cached = B_FALSE; 6117 6118 connp->conn_ire_cache = NULL; 6119 mutex_exit(&connp->conn_lock); 6120 6121 /* Release the old ire */ 6122 if (ire != NULL) { 6123 IRE_REFRELE_NOTR(ire); 6124 ire = NULL; 6125 } 6126 6127 if (CLASSD(dst)) { 6128 ASSERT(ipif != NULL); 6129 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6130 connp->conn_zoneid, MBLK_GETLABEL(mp), 6131 MATCH_IRE_ILL_GROUP, ipst); 6132 } else { 6133 ASSERT(ipif == NULL); 6134 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6135 MBLK_GETLABEL(mp), ipst); 6136 } 6137 6138 if (ire == NULL) { 6139 if (ipif != NULL) 6140 ipif_refrele(ipif); 6141 UDP_STAT(us, udp_ire_null); 6142 ip_output(connp, mp, q, IP_WPUT); 6143 return; 6144 } 6145 IRE_REFHOLD_NOTR(ire); 6146 6147 mutex_enter(&connp->conn_lock); 6148 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 6149 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6150 irb_t *irb = ire->ire_bucket; 6151 6152 /* 6153 * IRE's created for non-connection oriented transports 6154 * are normally initialized with IRE_MARK_TEMPORARY set 6155 * in the ire_marks. These IRE's are preferentially 6156 * reaped when the hash chain length in the cache 6157 * bucket exceeds the maximum value specified in 6158 * ip[6]_ire_max_bucket_cnt. This can severely affect 6159 * UDP performance if IRE cache entries that we need 6160 * to reuse are continually removed. To remedy this, 6161 * when we cache the IRE in the conn_t, we remove the 6162 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 6163 * set. 6164 */ 6165 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6166 rw_enter(&irb->irb_lock, RW_WRITER); 6167 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6168 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 6169 irb->irb_tmp_ire_cnt--; 6170 } 6171 rw_exit(&irb->irb_lock); 6172 } 6173 connp->conn_ire_cache = ire; 6174 cached = B_TRUE; 6175 } 6176 mutex_exit(&connp->conn_lock); 6177 6178 /* 6179 * We can continue to use the ire but since it was not 6180 * cached, we should drop the extra reference. 6181 */ 6182 if (!cached) 6183 IRE_REFRELE_NOTR(ire); 6184 } 6185 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6186 ASSERT(!CLASSD(dst) || ipif != NULL); 6187 6188 /* 6189 * Check if we can take the fast-path. 6190 * Note that "incomplete" ire's (where the link-layer for next hop 6191 * is not resolved, or where the fast-path header in nce_fp_mp is not 6192 * available yet) are sent down the legacy (slow) path 6193 */ 6194 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6195 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6196 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6197 ((ire->ire_nce == NULL) || 6198 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 6199 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 6200 if (ipif != NULL) 6201 ipif_refrele(ipif); 6202 UDP_STAT(us, udp_ip_ire_send); 6203 IRE_REFRELE(ire); 6204 ip_output(connp, mp, q, IP_WPUT); 6205 return; 6206 } 6207 6208 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6209 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6210 ipha->ipha_src = ipif->ipif_src_addr; 6211 else 6212 ipha->ipha_src = ire->ire_src_addr; 6213 } 6214 6215 if (ipif != NULL) 6216 ipif_refrele(ipif); 6217 6218 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 6219 } 6220 6221 static void 6222 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 6223 { 6224 ipaddr_t src, dst; 6225 ill_t *ill; 6226 mblk_t *ire_fp_mp; 6227 uint_t ire_fp_mp_len; 6228 uint16_t *up; 6229 uint32_t cksum, hcksum_txflags; 6230 queue_t *dev_q; 6231 udp_t *udp = connp->conn_udp; 6232 ipha_t *ipha = (ipha_t *)mp->b_rptr; 6233 udp_stack_t *us = udp->udp_us; 6234 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6235 boolean_t ll_multicast = B_FALSE; 6236 6237 dev_q = ire->ire_stq->q_next; 6238 ASSERT(dev_q != NULL); 6239 6240 6241 if (DEV_Q_IS_FLOW_CTLED(dev_q)) { 6242 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 6243 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 6244 if (ipst->ips_ip_output_queue) 6245 (void) putq(connp->conn_wq, mp); 6246 else 6247 freemsg(mp); 6248 ire_refrele(ire); 6249 return; 6250 } 6251 6252 ire_fp_mp = ire->ire_nce->nce_fp_mp; 6253 ire_fp_mp_len = MBLKL(ire_fp_mp); 6254 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 6255 6256 dst = ipha->ipha_dst; 6257 src = ipha->ipha_src; 6258 6259 ill = ire_to_ill(ire); 6260 ASSERT(ill != NULL); 6261 6262 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6263 6264 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6265 #ifndef _BIG_ENDIAN 6266 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6267 #endif 6268 6269 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6270 ASSERT(ill->ill_hcksum_capab != NULL); 6271 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6272 } else { 6273 hcksum_txflags = 0; 6274 } 6275 6276 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6277 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6278 6279 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6280 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6281 if (*up != 0) { 6282 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6283 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6284 ntohs(ipha->ipha_length), cksum); 6285 6286 /* Software checksum? */ 6287 if (DB_CKSUMFLAGS(mp) == 0) { 6288 UDP_STAT(us, udp_out_sw_cksum); 6289 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6290 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6291 } 6292 } 6293 6294 if (!CLASSD(dst)) { 6295 ipha->ipha_fragment_offset_and_flags |= 6296 (uint32_t)htons(ire->ire_frag_flag); 6297 } 6298 6299 /* Calculate IP header checksum if hardware isn't capable */ 6300 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6301 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6302 ((uint16_t *)ipha)[4]); 6303 } 6304 6305 if (CLASSD(dst)) { 6306 boolean_t ilm_exists; 6307 6308 ILM_WALKER_HOLD(ill); 6309 ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL); 6310 ILM_WALKER_RELE(ill); 6311 if (ilm_exists) { 6312 ip_multicast_loopback(q, ill, mp, 6313 connp->conn_multicast_loop ? 0 : 6314 IP_FF_NO_MCAST_LOOP, zoneid); 6315 } 6316 6317 /* If multicast TTL is 0 then we are done */ 6318 if (ipha->ipha_ttl == 0) { 6319 freemsg(mp); 6320 ire_refrele(ire); 6321 return; 6322 } 6323 ll_multicast = B_TRUE; 6324 } 6325 6326 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6327 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6328 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6329 6330 UPDATE_OB_PKT_COUNT(ire); 6331 ire->ire_last_used_time = lbolt; 6332 6333 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6334 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6335 ntohs(ipha->ipha_length)); 6336 6337 if (ILL_DLS_CAPABLE(ill)) { 6338 /* 6339 * Send the packet directly to DLD, where it may be queued 6340 * depending on the availability of transmit resources at 6341 * the media layer. 6342 */ 6343 IP_DLS_ILL_TX(ill, ipha, mp, ipst, ire_fp_mp_len); 6344 } else { 6345 DTRACE_PROBE4(ip4__physical__out__start, 6346 ill_t *, NULL, ill_t *, ill, 6347 ipha_t *, ipha, mblk_t *, mp); 6348 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6349 ipst->ips_ipv4firewall_physical_out, 6350 NULL, ill, ipha, mp, mp, ll_multicast, ipst); 6351 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6352 if (mp != NULL) { 6353 if (ipst->ips_ipobs_enabled) { 6354 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, 6355 IP_REAL_ZONEID(connp->conn_zoneid, ipst), 6356 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, 6357 ipst); 6358 } 6359 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 6360 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 6361 ipha_t *, ipha, ip6_t *, NULL, int, 0); 6362 putnext(ire->ire_stq, mp); 6363 } 6364 } 6365 IRE_REFRELE(ire); 6366 } 6367 6368 static boolean_t 6369 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6370 { 6371 udp_t *udp = Q_TO_UDP(wq); 6372 int err; 6373 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6374 udp_stack_t *us = udp->udp_us; 6375 6376 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6377 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 6378 us->us_netstack->netstack_ip); 6379 if (err == 0) { 6380 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6381 &udp->udp_label_len_v6, opt_storage); 6382 } 6383 if (err != 0) { 6384 DTRACE_PROBE4( 6385 tx__ip__log__drop__updatelabel__udp6, 6386 char *, "queue(1) failed to update options(2) on mp(3)", 6387 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6388 } else { 6389 udp->udp_v6lastdst = *dst; 6390 } 6391 return (err); 6392 } 6393 6394 void 6395 udp_output_connected(void *arg, mblk_t *mp) 6396 { 6397 conn_t *connp = (conn_t *)arg; 6398 udp_t *udp = connp->conn_udp; 6399 udp_stack_t *us = udp->udp_us; 6400 ipaddr_t v4dst; 6401 in_port_t dstport; 6402 boolean_t mapped_addr; 6403 struct sockaddr_storage ss; 6404 sin_t *sin; 6405 sin6_t *sin6; 6406 struct sockaddr *addr; 6407 socklen_t addrlen; 6408 int error; 6409 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6410 6411 /* M_DATA for connected socket */ 6412 6413 ASSERT(udp->udp_issocket); 6414 UDP_DBGSTAT(us, udp_data_conn); 6415 6416 mutex_enter(&connp->conn_lock); 6417 if (udp->udp_state != TS_DATA_XFER) { 6418 mutex_exit(&connp->conn_lock); 6419 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6420 UDP_STAT(us, udp_out_err_notconn); 6421 freemsg(mp); 6422 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6423 "udp_wput_end: connp %p (%S)", connp, 6424 "not-connected; address required"); 6425 return; 6426 } 6427 6428 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 6429 if (mapped_addr) 6430 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6431 6432 /* Initialize addr and addrlen as if they're passed in */ 6433 if (udp->udp_family == AF_INET) { 6434 sin = (sin_t *)&ss; 6435 sin->sin_family = AF_INET; 6436 dstport = sin->sin_port = udp->udp_dstport; 6437 ASSERT(mapped_addr); 6438 sin->sin_addr.s_addr = v4dst; 6439 addr = (struct sockaddr *)sin; 6440 addrlen = sizeof (*sin); 6441 } else { 6442 sin6 = (sin6_t *)&ss; 6443 sin6->sin6_family = AF_INET6; 6444 dstport = sin6->sin6_port = udp->udp_dstport; 6445 sin6->sin6_flowinfo = udp->udp_flowinfo; 6446 sin6->sin6_addr = udp->udp_v6dst; 6447 sin6->sin6_scope_id = 0; 6448 sin6->__sin6_src_id = 0; 6449 addr = (struct sockaddr *)sin6; 6450 addrlen = sizeof (*sin6); 6451 } 6452 mutex_exit(&connp->conn_lock); 6453 6454 if (mapped_addr) { 6455 /* 6456 * Handle both AF_INET and AF_INET6; the latter 6457 * for IPV4 mapped destination addresses. Note 6458 * here that both addr and addrlen point to the 6459 * corresponding struct depending on the address 6460 * family of the socket. 6461 */ 6462 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 6463 insert_spi); 6464 } else { 6465 mp = udp_output_v6(connp, mp, sin6, &error); 6466 } 6467 if (error == 0) { 6468 ASSERT(mp == NULL); 6469 return; 6470 } 6471 6472 UDP_STAT(us, udp_out_err_output); 6473 ASSERT(mp != NULL); 6474 /* mp is freed by the following routine */ 6475 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6476 (t_scalar_t)error); 6477 } 6478 6479 /* 6480 * This routine handles all messages passed downstream. It either 6481 * consumes the message or passes it downstream; it never queues a 6482 * a message. 6483 * 6484 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6485 * is valid when we are directly beneath the stream head, and thus sockfs 6486 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6487 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6488 * connected endpoints. 6489 */ 6490 void 6491 udp_wput(queue_t *q, mblk_t *mp) 6492 { 6493 sin6_t *sin6; 6494 sin_t *sin; 6495 ipaddr_t v4dst; 6496 uint16_t port; 6497 uint_t srcid; 6498 conn_t *connp = Q_TO_CONN(q); 6499 udp_t *udp = connp->conn_udp; 6500 int error = 0; 6501 struct sockaddr *addr; 6502 socklen_t addrlen; 6503 udp_stack_t *us = udp->udp_us; 6504 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6505 6506 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6507 "udp_wput_start: queue %p mp %p", q, mp); 6508 6509 /* 6510 * We directly handle several cases here: T_UNITDATA_REQ message 6511 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6512 * socket. 6513 */ 6514 switch (DB_TYPE(mp)) { 6515 case M_DATA: 6516 /* 6517 * Quick check for error cases. Checks will be done again 6518 * under the lock later on 6519 */ 6520 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6521 /* Not connected; address is required */ 6522 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6523 UDP_STAT(us, udp_out_err_notconn); 6524 freemsg(mp); 6525 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6526 "udp_wput_end: connp %p (%S)", connp, 6527 "not-connected; address required"); 6528 return; 6529 } 6530 udp_output_connected(connp, mp); 6531 return; 6532 6533 case M_PROTO: 6534 case M_PCPROTO: { 6535 struct T_unitdata_req *tudr; 6536 6537 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6538 tudr = (struct T_unitdata_req *)mp->b_rptr; 6539 6540 /* Handle valid T_UNITDATA_REQ here */ 6541 if (MBLKL(mp) >= sizeof (*tudr) && 6542 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6543 if (mp->b_cont == NULL) { 6544 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6545 "udp_wput_end: q %p (%S)", q, "badaddr"); 6546 error = EPROTO; 6547 goto ud_error; 6548 } 6549 6550 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6551 tudr->DEST_length)) { 6552 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6553 "udp_wput_end: q %p (%S)", q, "badaddr"); 6554 error = EADDRNOTAVAIL; 6555 goto ud_error; 6556 } 6557 /* 6558 * If a port has not been bound to the stream, fail. 6559 * This is not a problem when sockfs is directly 6560 * above us, because it will ensure that the socket 6561 * is first bound before allowing data to be sent. 6562 */ 6563 if (udp->udp_state == TS_UNBND) { 6564 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6565 "udp_wput_end: q %p (%S)", q, "outstate"); 6566 error = EPROTO; 6567 goto ud_error; 6568 } 6569 addr = (struct sockaddr *) 6570 &mp->b_rptr[tudr->DEST_offset]; 6571 addrlen = tudr->DEST_length; 6572 if (tudr->OPT_length != 0) 6573 UDP_STAT(us, udp_out_opt); 6574 break; 6575 } 6576 /* FALLTHRU */ 6577 } 6578 default: 6579 udp_wput_other(q, mp); 6580 return; 6581 } 6582 ASSERT(addr != NULL); 6583 6584 switch (udp->udp_family) { 6585 case AF_INET6: 6586 sin6 = (sin6_t *)addr; 6587 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 6588 (sin6->sin6_family != AF_INET6)) { 6589 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6590 "udp_wput_end: q %p (%S)", q, "badaddr"); 6591 error = EADDRNOTAVAIL; 6592 goto ud_error; 6593 } 6594 6595 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6596 /* 6597 * Destination is a non-IPv4-compatible IPv6 address. 6598 * Send out an IPv6 format packet. 6599 */ 6600 mp = udp_output_v6(connp, mp, sin6, &error); 6601 if (error != 0) 6602 goto ud_error; 6603 6604 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6605 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6606 return; 6607 } 6608 /* 6609 * If the local address is not zero or a mapped address 6610 * return an error. It would be possible to send an IPv4 6611 * packet but the response would never make it back to the 6612 * application since it is bound to a non-mapped address. 6613 */ 6614 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6615 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6616 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6617 "udp_wput_end: q %p (%S)", q, "badaddr"); 6618 error = EADDRNOTAVAIL; 6619 goto ud_error; 6620 } 6621 /* Send IPv4 packet without modifying udp_ipversion */ 6622 /* Extract port and ipaddr */ 6623 port = sin6->sin6_port; 6624 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6625 srcid = sin6->__sin6_src_id; 6626 break; 6627 6628 case AF_INET: 6629 sin = (sin_t *)addr; 6630 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 6631 (sin->sin_family != AF_INET)) { 6632 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6633 "udp_wput_end: q %p (%S)", q, "badaddr"); 6634 error = EADDRNOTAVAIL; 6635 goto ud_error; 6636 } 6637 /* Extract port and ipaddr */ 6638 port = sin->sin_port; 6639 v4dst = sin->sin_addr.s_addr; 6640 srcid = 0; 6641 break; 6642 } 6643 6644 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 6645 if (error != 0) { 6646 ud_error: 6647 UDP_STAT(us, udp_out_err_output); 6648 ASSERT(mp != NULL); 6649 /* mp is freed by the following routine */ 6650 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6651 (t_scalar_t)error); 6652 } 6653 } 6654 6655 /* 6656 * udp_output_v6(): 6657 * Assumes that udp_wput did some sanity checking on the destination 6658 * address. 6659 */ 6660 static mblk_t * 6661 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 6662 { 6663 ip6_t *ip6h; 6664 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6665 mblk_t *mp1 = mp; 6666 mblk_t *mp2; 6667 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6668 size_t ip_len; 6669 udpha_t *udph; 6670 udp_t *udp = connp->conn_udp; 6671 queue_t *q = connp->conn_wq; 6672 ip6_pkt_t ipp_s; /* For ancillary data options */ 6673 ip6_pkt_t *ipp = &ipp_s; 6674 ip6_pkt_t *tipp; /* temporary ipp */ 6675 uint32_t csum = 0; 6676 uint_t ignore = 0; 6677 uint_t option_exists = 0, is_sticky = 0; 6678 uint8_t *cp; 6679 uint8_t *nxthdr_ptr; 6680 in6_addr_t ip6_dst; 6681 udpattrs_t attrs; 6682 boolean_t opt_present; 6683 ip6_hbh_t *hopoptsptr = NULL; 6684 uint_t hopoptslen = 0; 6685 boolean_t is_ancillary = B_FALSE; 6686 udp_stack_t *us = udp->udp_us; 6687 size_t sth_wroff = 0; 6688 6689 *error = 0; 6690 6691 /* 6692 * If the local address is a mapped address return 6693 * an error. 6694 * It would be possible to send an IPv6 packet but the 6695 * response would never make it back to the application 6696 * since it is bound to a mapped address. 6697 */ 6698 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6699 *error = EADDRNOTAVAIL; 6700 goto done; 6701 } 6702 6703 ipp->ipp_fields = 0; 6704 ipp->ipp_sticky_ignored = 0; 6705 6706 /* 6707 * If TPI options passed in, feed it for verification and handling 6708 */ 6709 attrs.udpattr_credset = B_FALSE; 6710 opt_present = B_FALSE; 6711 if (DB_TYPE(mp) != M_DATA) { 6712 mp1 = mp->b_cont; 6713 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6714 attrs.udpattr_ipp6 = ipp; 6715 attrs.udpattr_mb = mp; 6716 if (udp_unitdata_opt_process(q, mp, error, 6717 &attrs) < 0) { 6718 goto done; 6719 } 6720 ASSERT(*error == 0); 6721 opt_present = B_TRUE; 6722 } 6723 } 6724 rw_enter(&udp->udp_rwlock, RW_READER); 6725 ignore = ipp->ipp_sticky_ignored; 6726 6727 /* mp1 points to the M_DATA mblk carrying the packet */ 6728 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6729 6730 if (sin6->sin6_scope_id != 0 && 6731 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6732 /* 6733 * IPPF_SCOPE_ID is special. It's neither a sticky 6734 * option nor ancillary data. It needs to be 6735 * explicitly set in options_exists. 6736 */ 6737 option_exists |= IPPF_SCOPE_ID; 6738 } 6739 6740 /* 6741 * Compute the destination address 6742 */ 6743 ip6_dst = sin6->sin6_addr; 6744 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6745 ip6_dst = ipv6_loopback; 6746 6747 /* 6748 * If we're not going to the same destination as last time, then 6749 * recompute the label required. This is done in a separate routine to 6750 * avoid blowing up our stack here. 6751 * 6752 * TSOL Note: Since we are not in WRITER mode, UDP packets 6753 * to different destination may require different labels, 6754 * or worse, UDP packets to same IP address may require 6755 * different labels due to use of shared all-zones address. 6756 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6757 * and sticky ipp_hopoptslen are consistent for the current 6758 * destination and are updated atomically. 6759 */ 6760 mutex_enter(&connp->conn_lock); 6761 if (is_system_labeled()) { 6762 /* Using UDP MLP requires SCM_UCRED from user */ 6763 if (connp->conn_mlp_type != mlptSingle && 6764 !attrs.udpattr_credset) { 6765 DTRACE_PROBE4( 6766 tx__ip__log__info__output__udp6, 6767 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6768 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6769 *error = ECONNREFUSED; 6770 rw_exit(&udp->udp_rwlock); 6771 mutex_exit(&connp->conn_lock); 6772 goto done; 6773 } 6774 /* 6775 * update label option for this UDP socket if 6776 * - the destination has changed, or 6777 * - the UDP socket is MLP 6778 */ 6779 if ((opt_present || 6780 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6781 connp->conn_mlp_type != mlptSingle) && 6782 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6783 rw_exit(&udp->udp_rwlock); 6784 mutex_exit(&connp->conn_lock); 6785 goto done; 6786 } 6787 } 6788 6789 /* 6790 * If there's a security label here, then we ignore any options the 6791 * user may try to set. We keep the peer's label as a hidden sticky 6792 * option. We make a private copy of this label before releasing the 6793 * lock so that label is kept consistent with the destination addr. 6794 */ 6795 if (udp->udp_label_len_v6 > 0) { 6796 ignore &= ~IPPF_HOPOPTS; 6797 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6798 } 6799 6800 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6801 /* No sticky options nor ancillary data. */ 6802 mutex_exit(&connp->conn_lock); 6803 goto no_options; 6804 } 6805 6806 /* 6807 * Go through the options figuring out where each is going to 6808 * come from and build two masks. The first mask indicates if 6809 * the option exists at all. The second mask indicates if the 6810 * option is sticky or ancillary. 6811 */ 6812 if (!(ignore & IPPF_HOPOPTS)) { 6813 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6814 option_exists |= IPPF_HOPOPTS; 6815 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6816 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6817 option_exists |= IPPF_HOPOPTS; 6818 is_sticky |= IPPF_HOPOPTS; 6819 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6820 hopoptsptr = kmem_alloc( 6821 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6822 if (hopoptsptr == NULL) { 6823 *error = ENOMEM; 6824 mutex_exit(&connp->conn_lock); 6825 goto done; 6826 } 6827 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6828 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6829 hopoptslen); 6830 udp_ip_hdr_len += hopoptslen; 6831 } 6832 } 6833 mutex_exit(&connp->conn_lock); 6834 6835 if (!(ignore & IPPF_RTHDR)) { 6836 if (ipp->ipp_fields & IPPF_RTHDR) { 6837 option_exists |= IPPF_RTHDR; 6838 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6839 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6840 option_exists |= IPPF_RTHDR; 6841 is_sticky |= IPPF_RTHDR; 6842 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6843 } 6844 } 6845 6846 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6847 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6848 option_exists |= IPPF_RTDSTOPTS; 6849 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6850 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6851 option_exists |= IPPF_RTDSTOPTS; 6852 is_sticky |= IPPF_RTDSTOPTS; 6853 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6854 } 6855 } 6856 6857 if (!(ignore & IPPF_DSTOPTS)) { 6858 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6859 option_exists |= IPPF_DSTOPTS; 6860 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6861 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6862 option_exists |= IPPF_DSTOPTS; 6863 is_sticky |= IPPF_DSTOPTS; 6864 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6865 } 6866 } 6867 6868 if (!(ignore & IPPF_IFINDEX)) { 6869 if (ipp->ipp_fields & IPPF_IFINDEX) { 6870 option_exists |= IPPF_IFINDEX; 6871 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6872 option_exists |= IPPF_IFINDEX; 6873 is_sticky |= IPPF_IFINDEX; 6874 } 6875 } 6876 6877 if (!(ignore & IPPF_ADDR)) { 6878 if (ipp->ipp_fields & IPPF_ADDR) { 6879 option_exists |= IPPF_ADDR; 6880 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6881 option_exists |= IPPF_ADDR; 6882 is_sticky |= IPPF_ADDR; 6883 } 6884 } 6885 6886 if (!(ignore & IPPF_DONTFRAG)) { 6887 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6888 option_exists |= IPPF_DONTFRAG; 6889 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6890 option_exists |= IPPF_DONTFRAG; 6891 is_sticky |= IPPF_DONTFRAG; 6892 } 6893 } 6894 6895 if (!(ignore & IPPF_USE_MIN_MTU)) { 6896 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6897 option_exists |= IPPF_USE_MIN_MTU; 6898 } else if (udp->udp_sticky_ipp.ipp_fields & 6899 IPPF_USE_MIN_MTU) { 6900 option_exists |= IPPF_USE_MIN_MTU; 6901 is_sticky |= IPPF_USE_MIN_MTU; 6902 } 6903 } 6904 6905 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6906 option_exists |= IPPF_HOPLIMIT; 6907 /* IPV6_HOPLIMIT can never be sticky */ 6908 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6909 6910 if (!(ignore & IPPF_UNICAST_HOPS) && 6911 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6912 option_exists |= IPPF_UNICAST_HOPS; 6913 is_sticky |= IPPF_UNICAST_HOPS; 6914 } 6915 6916 if (!(ignore & IPPF_MULTICAST_HOPS) && 6917 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6918 option_exists |= IPPF_MULTICAST_HOPS; 6919 is_sticky |= IPPF_MULTICAST_HOPS; 6920 } 6921 6922 if (!(ignore & IPPF_TCLASS)) { 6923 if (ipp->ipp_fields & IPPF_TCLASS) { 6924 option_exists |= IPPF_TCLASS; 6925 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6926 option_exists |= IPPF_TCLASS; 6927 is_sticky |= IPPF_TCLASS; 6928 } 6929 } 6930 6931 if (!(ignore & IPPF_NEXTHOP) && 6932 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6933 option_exists |= IPPF_NEXTHOP; 6934 is_sticky |= IPPF_NEXTHOP; 6935 } 6936 6937 no_options: 6938 6939 /* 6940 * If any options carried in the ip6i_t were specified, we 6941 * need to account for the ip6i_t in the data we'll be sending 6942 * down. 6943 */ 6944 if (option_exists & IPPF_HAS_IP6I) 6945 udp_ip_hdr_len += sizeof (ip6i_t); 6946 6947 /* check/fix buffer config, setup pointers into it */ 6948 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6949 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6950 !OK_32PTR(ip6h)) { 6951 6952 /* Try to get everything in a single mblk next time */ 6953 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6954 udp->udp_max_hdr_len = udp_ip_hdr_len; 6955 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6956 } 6957 6958 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6959 if (mp2 == NULL) { 6960 *error = ENOMEM; 6961 rw_exit(&udp->udp_rwlock); 6962 goto done; 6963 } 6964 mp2->b_wptr = DB_LIM(mp2); 6965 mp2->b_cont = mp1; 6966 mp1 = mp2; 6967 if (DB_TYPE(mp) != M_DATA) 6968 mp->b_cont = mp1; 6969 else 6970 mp = mp1; 6971 6972 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6973 } 6974 mp1->b_rptr = (unsigned char *)ip6h; 6975 ip6i = (ip6i_t *)ip6h; 6976 6977 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6978 if (option_exists & IPPF_HAS_IP6I) { 6979 ip6h = (ip6_t *)&ip6i[1]; 6980 ip6i->ip6i_flags = 0; 6981 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6982 6983 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6984 if (option_exists & IPPF_SCOPE_ID) { 6985 ip6i->ip6i_flags |= IP6I_IFINDEX; 6986 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6987 } else if (option_exists & IPPF_IFINDEX) { 6988 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6989 ASSERT(tipp->ipp_ifindex != 0); 6990 ip6i->ip6i_flags |= IP6I_IFINDEX; 6991 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6992 } 6993 6994 if (option_exists & IPPF_ADDR) { 6995 /* 6996 * Enable per-packet source address verification if 6997 * IPV6_PKTINFO specified the source address. 6998 * ip6_src is set in the transport's _wput function. 6999 */ 7000 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7001 } 7002 7003 if (option_exists & IPPF_DONTFRAG) { 7004 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7005 } 7006 7007 if (option_exists & IPPF_USE_MIN_MTU) { 7008 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7009 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7010 } 7011 7012 if (option_exists & IPPF_NEXTHOP) { 7013 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7014 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7015 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7016 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7017 } 7018 7019 /* 7020 * tell IP this is an ip6i_t private header 7021 */ 7022 ip6i->ip6i_nxt = IPPROTO_RAW; 7023 } 7024 7025 /* Initialize IPv6 header */ 7026 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7027 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7028 7029 /* Set the hoplimit of the outgoing packet. */ 7030 if (option_exists & IPPF_HOPLIMIT) { 7031 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7032 ip6h->ip6_hops = ipp->ipp_hoplimit; 7033 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7034 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7035 ip6h->ip6_hops = udp->udp_multicast_ttl; 7036 if (option_exists & IPPF_MULTICAST_HOPS) 7037 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7038 } else { 7039 ip6h->ip6_hops = udp->udp_ttl; 7040 if (option_exists & IPPF_UNICAST_HOPS) 7041 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7042 } 7043 7044 if (option_exists & IPPF_ADDR) { 7045 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7046 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7047 ip6h->ip6_src = tipp->ipp_addr; 7048 } else { 7049 /* 7050 * The source address was not set using IPV6_PKTINFO. 7051 * First look at the bound source. 7052 * If unspecified fallback to __sin6_src_id. 7053 */ 7054 ip6h->ip6_src = udp->udp_v6src; 7055 if (sin6->__sin6_src_id != 0 && 7056 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7057 ip_srcid_find_id(sin6->__sin6_src_id, 7058 &ip6h->ip6_src, connp->conn_zoneid, 7059 us->us_netstack); 7060 } 7061 } 7062 7063 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7064 cp = (uint8_t *)&ip6h[1]; 7065 7066 /* 7067 * Here's where we have to start stringing together 7068 * any extension headers in the right order: 7069 * Hop-by-hop, destination, routing, and final destination opts. 7070 */ 7071 if (option_exists & IPPF_HOPOPTS) { 7072 /* Hop-by-hop options */ 7073 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7074 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7075 if (hopoptslen == 0) { 7076 hopoptsptr = tipp->ipp_hopopts; 7077 hopoptslen = tipp->ipp_hopoptslen; 7078 is_ancillary = B_TRUE; 7079 } 7080 7081 *nxthdr_ptr = IPPROTO_HOPOPTS; 7082 nxthdr_ptr = &hbh->ip6h_nxt; 7083 7084 bcopy(hopoptsptr, cp, hopoptslen); 7085 cp += hopoptslen; 7086 7087 if (hopoptsptr != NULL && !is_ancillary) { 7088 kmem_free(hopoptsptr, hopoptslen); 7089 hopoptsptr = NULL; 7090 hopoptslen = 0; 7091 } 7092 } 7093 /* 7094 * En-route destination options 7095 * Only do them if there's a routing header as well 7096 */ 7097 if (option_exists & IPPF_RTDSTOPTS) { 7098 ip6_dest_t *dst = (ip6_dest_t *)cp; 7099 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7100 7101 *nxthdr_ptr = IPPROTO_DSTOPTS; 7102 nxthdr_ptr = &dst->ip6d_nxt; 7103 7104 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7105 cp += tipp->ipp_rtdstoptslen; 7106 } 7107 /* 7108 * Routing header next 7109 */ 7110 if (option_exists & IPPF_RTHDR) { 7111 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7112 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7113 7114 *nxthdr_ptr = IPPROTO_ROUTING; 7115 nxthdr_ptr = &rt->ip6r_nxt; 7116 7117 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7118 cp += tipp->ipp_rthdrlen; 7119 } 7120 /* 7121 * Do ultimate destination options 7122 */ 7123 if (option_exists & IPPF_DSTOPTS) { 7124 ip6_dest_t *dest = (ip6_dest_t *)cp; 7125 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7126 7127 *nxthdr_ptr = IPPROTO_DSTOPTS; 7128 nxthdr_ptr = &dest->ip6d_nxt; 7129 7130 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7131 cp += tipp->ipp_dstoptslen; 7132 } 7133 /* 7134 * Now set the last header pointer to the proto passed in 7135 */ 7136 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7137 *nxthdr_ptr = IPPROTO_UDP; 7138 7139 /* Update UDP header */ 7140 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7141 udph->uha_dst_port = sin6->sin6_port; 7142 udph->uha_src_port = udp->udp_port; 7143 7144 /* 7145 * Copy in the destination address 7146 */ 7147 ip6h->ip6_dst = ip6_dst; 7148 7149 ip6h->ip6_vcf = 7150 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7151 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7152 7153 if (option_exists & IPPF_TCLASS) { 7154 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7155 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7156 tipp->ipp_tclass); 7157 } 7158 rw_exit(&udp->udp_rwlock); 7159 7160 if (option_exists & IPPF_RTHDR) { 7161 ip6_rthdr_t *rth; 7162 7163 /* 7164 * Perform any processing needed for source routing. 7165 * We know that all extension headers will be in the same mblk 7166 * as the IPv6 header. 7167 */ 7168 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7169 if (rth != NULL && rth->ip6r_segleft != 0) { 7170 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7171 /* 7172 * Drop packet - only support Type 0 routing. 7173 * Notify the application as well. 7174 */ 7175 *error = EPROTO; 7176 goto done; 7177 } 7178 7179 /* 7180 * rth->ip6r_len is twice the number of 7181 * addresses in the header. Thus it must be even. 7182 */ 7183 if (rth->ip6r_len & 0x1) { 7184 *error = EPROTO; 7185 goto done; 7186 } 7187 /* 7188 * Shuffle the routing header and ip6_dst 7189 * addresses, and get the checksum difference 7190 * between the first hop (in ip6_dst) and 7191 * the destination (in the last routing hdr entry). 7192 */ 7193 csum = ip_massage_options_v6(ip6h, rth, 7194 us->us_netstack); 7195 /* 7196 * Verify that the first hop isn't a mapped address. 7197 * Routers along the path need to do this verification 7198 * for subsequent hops. 7199 */ 7200 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7201 *error = EADDRNOTAVAIL; 7202 goto done; 7203 } 7204 7205 cp += (rth->ip6r_len + 1)*8; 7206 } 7207 } 7208 7209 /* count up length of UDP packet */ 7210 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7211 if ((mp2 = mp1->b_cont) != NULL) { 7212 do { 7213 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7214 ip_len += (uint32_t)MBLKL(mp2); 7215 } while ((mp2 = mp2->b_cont) != NULL); 7216 } 7217 7218 /* 7219 * If the size of the packet is greater than the maximum allowed by 7220 * ip, return an error. Passing this down could cause panics because 7221 * the size will have wrapped and be inconsistent with the msg size. 7222 */ 7223 if (ip_len > IP_MAXPACKET) { 7224 *error = EMSGSIZE; 7225 goto done; 7226 } 7227 7228 /* Store the UDP length. Subtract length of extension hdrs */ 7229 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7230 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7231 7232 /* 7233 * We make it easy for IP to include our pseudo header 7234 * by putting our length in uh_checksum, modified (if 7235 * we have a routing header) by the checksum difference 7236 * between the ultimate destination and first hop addresses. 7237 * Note: UDP over IPv6 must always checksum the packet. 7238 */ 7239 csum += udph->uha_length; 7240 csum = (csum & 0xFFFF) + (csum >> 16); 7241 udph->uha_checksum = (uint16_t)csum; 7242 7243 #ifdef _LITTLE_ENDIAN 7244 ip_len = htons(ip_len); 7245 #endif 7246 ip6h->ip6_plen = ip_len; 7247 if (DB_CRED(mp) != NULL) 7248 mblk_setcred(mp1, DB_CRED(mp)); 7249 7250 if (DB_TYPE(mp) != M_DATA) { 7251 ASSERT(mp != mp1); 7252 freeb(mp); 7253 } 7254 7255 /* mp has been consumed and we'll return success */ 7256 ASSERT(*error == 0); 7257 mp = NULL; 7258 7259 /* We're done. Pass the packet to IP */ 7260 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 7261 ip_output_v6(connp, mp1, q, IP_WPUT); 7262 7263 done: 7264 if (sth_wroff != 0) { 7265 (void) mi_set_sth_wroff(RD(q), 7266 udp->udp_max_hdr_len + us->us_wroff_extra); 7267 } 7268 if (hopoptsptr != NULL && !is_ancillary) { 7269 kmem_free(hopoptsptr, hopoptslen); 7270 hopoptsptr = NULL; 7271 } 7272 if (*error != 0) { 7273 ASSERT(mp != NULL); 7274 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 7275 } 7276 return (mp); 7277 } 7278 7279 7280 static int 7281 udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7282 { 7283 sin_t *sin = (sin_t *)sa; 7284 sin6_t *sin6 = (sin6_t *)sa; 7285 7286 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7287 7288 if (udp->udp_state != TS_DATA_XFER) 7289 return (ENOTCONN); 7290 7291 switch (udp->udp_family) { 7292 case AF_INET: 7293 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7294 7295 if (*salenp < sizeof (sin_t)) 7296 return (EINVAL); 7297 7298 *salenp = sizeof (sin_t); 7299 *sin = sin_null; 7300 sin->sin_family = AF_INET; 7301 sin->sin_port = udp->udp_dstport; 7302 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 7303 break; 7304 7305 case AF_INET6: 7306 if (*salenp < sizeof (sin6_t)) 7307 return (EINVAL); 7308 7309 *salenp = sizeof (sin6_t); 7310 *sin6 = sin6_null; 7311 sin6->sin6_family = AF_INET6; 7312 sin6->sin6_port = udp->udp_dstport; 7313 sin6->sin6_addr = udp->udp_v6dst; 7314 sin6->sin6_flowinfo = udp->udp_flowinfo; 7315 break; 7316 } 7317 7318 return (0); 7319 } 7320 7321 static int 7322 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7323 { 7324 sin_t *sin = (sin_t *)sa; 7325 sin6_t *sin6 = (sin6_t *)sa; 7326 7327 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7328 7329 switch (udp->udp_family) { 7330 case AF_INET: 7331 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7332 7333 if (*salenp < sizeof (sin_t)) 7334 return (EINVAL); 7335 7336 *salenp = sizeof (sin_t); 7337 *sin = sin_null; 7338 sin->sin_family = AF_INET; 7339 sin->sin_port = udp->udp_port; 7340 7341 /* 7342 * If udp_v6src is unspecified, we might be bound to broadcast 7343 * / multicast. Use udp_bound_v6src as local address instead 7344 * (that could also still be unspecified). 7345 */ 7346 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7347 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7348 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 7349 } else { 7350 sin->sin_addr.s_addr = 7351 V4_PART_OF_V6(udp->udp_bound_v6src); 7352 } 7353 break; 7354 7355 case AF_INET6: 7356 if (*salenp < sizeof (sin6_t)) 7357 return (EINVAL); 7358 7359 *salenp = sizeof (sin6_t); 7360 *sin6 = sin6_null; 7361 sin6->sin6_family = AF_INET6; 7362 sin6->sin6_port = udp->udp_port; 7363 sin6->sin6_flowinfo = udp->udp_flowinfo; 7364 7365 /* 7366 * If udp_v6src is unspecified, we might be bound to broadcast 7367 * / multicast. Use udp_bound_v6src as local address instead 7368 * (that could also still be unspecified). 7369 */ 7370 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 7371 sin6->sin6_addr = udp->udp_v6src; 7372 else 7373 sin6->sin6_addr = udp->udp_bound_v6src; 7374 break; 7375 } 7376 7377 return (0); 7378 } 7379 7380 /* 7381 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 7382 */ 7383 static void 7384 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 7385 { 7386 void *data; 7387 mblk_t *datamp = mp->b_cont; 7388 udp_t *udp = Q_TO_UDP(q); 7389 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 7390 7391 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 7392 cmdp->cb_error = EPROTO; 7393 qreply(q, mp); 7394 return; 7395 } 7396 data = datamp->b_rptr; 7397 7398 rw_enter(&udp->udp_rwlock, RW_READER); 7399 switch (cmdp->cb_cmd) { 7400 case TI_GETPEERNAME: 7401 cmdp->cb_error = udp_getpeername(udp, data, &cmdp->cb_len); 7402 break; 7403 case TI_GETMYNAME: 7404 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 7405 break; 7406 default: 7407 cmdp->cb_error = EINVAL; 7408 break; 7409 } 7410 rw_exit(&udp->udp_rwlock); 7411 7412 qreply(q, mp); 7413 } 7414 7415 static void 7416 udp_wput_other(queue_t *q, mblk_t *mp) 7417 { 7418 uchar_t *rptr = mp->b_rptr; 7419 struct datab *db; 7420 struct iocblk *iocp; 7421 cred_t *cr; 7422 conn_t *connp = Q_TO_CONN(q); 7423 udp_t *udp = connp->conn_udp; 7424 udp_stack_t *us; 7425 7426 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7427 "udp_wput_other_start: q %p", q); 7428 7429 us = udp->udp_us; 7430 db = mp->b_datap; 7431 7432 cr = DB_CREDDEF(mp, connp->conn_cred); 7433 7434 switch (db->db_type) { 7435 case M_CMD: 7436 udp_wput_cmdblk(q, mp); 7437 return; 7438 7439 case M_PROTO: 7440 case M_PCPROTO: 7441 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7442 freemsg(mp); 7443 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7444 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7445 return; 7446 } 7447 switch (((t_primp_t)rptr)->type) { 7448 case T_ADDR_REQ: 7449 udp_addr_req(q, mp); 7450 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7451 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7452 return; 7453 case O_T_BIND_REQ: 7454 case T_BIND_REQ: 7455 udp_bind(q, mp); 7456 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7457 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7458 return; 7459 case T_CONN_REQ: 7460 udp_connect(q, mp); 7461 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7462 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7463 return; 7464 case T_CAPABILITY_REQ: 7465 udp_capability_req(q, mp); 7466 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7467 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7468 return; 7469 case T_INFO_REQ: 7470 udp_info_req(q, mp); 7471 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7472 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7473 return; 7474 case T_UNITDATA_REQ: 7475 /* 7476 * If a T_UNITDATA_REQ gets here, the address must 7477 * be bad. Valid T_UNITDATA_REQs are handled 7478 * in udp_wput. 7479 */ 7480 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7481 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7482 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7483 return; 7484 case T_UNBIND_REQ: 7485 udp_unbind(q, mp); 7486 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7487 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7488 return; 7489 case T_SVR4_OPTMGMT_REQ: 7490 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7491 cr)) { 7492 (void) svr4_optcom_req(q, 7493 mp, cr, &udp_opt_obj, B_TRUE); 7494 } 7495 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7496 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7497 return; 7498 7499 case T_OPTMGMT_REQ: 7500 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7501 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7502 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7503 return; 7504 7505 case T_DISCON_REQ: 7506 udp_disconnect(q, mp); 7507 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7508 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7509 return; 7510 7511 /* The following TPI message is not supported by udp. */ 7512 case O_T_CONN_RES: 7513 case T_CONN_RES: 7514 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7515 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7516 "udp_wput_other_end: q %p (%S)", q, 7517 "connres/disconreq"); 7518 return; 7519 7520 /* The following 3 TPI messages are illegal for udp. */ 7521 case T_DATA_REQ: 7522 case T_EXDATA_REQ: 7523 case T_ORDREL_REQ: 7524 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7525 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7526 "udp_wput_other_end: q %p (%S)", q, 7527 "data/exdata/ordrel"); 7528 return; 7529 default: 7530 break; 7531 } 7532 break; 7533 case M_FLUSH: 7534 if (*rptr & FLUSHW) 7535 flushq(q, FLUSHDATA); 7536 break; 7537 case M_IOCTL: 7538 iocp = (struct iocblk *)mp->b_rptr; 7539 switch (iocp->ioc_cmd) { 7540 case TI_GETPEERNAME: 7541 if (udp->udp_state != TS_DATA_XFER) { 7542 /* 7543 * If a default destination address has not 7544 * been associated with the stream, then we 7545 * don't know the peer's name. 7546 */ 7547 iocp->ioc_error = ENOTCONN; 7548 iocp->ioc_count = 0; 7549 mp->b_datap->db_type = M_IOCACK; 7550 qreply(q, mp); 7551 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7552 "udp_wput_other_end: q %p (%S)", q, 7553 "getpeername"); 7554 return; 7555 } 7556 /* FALLTHRU */ 7557 case TI_GETMYNAME: { 7558 /* 7559 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7560 * need to copyin the user's strbuf structure. 7561 * Processing will continue in the M_IOCDATA case 7562 * below. 7563 */ 7564 mi_copyin(q, mp, NULL, 7565 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7566 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7567 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7568 return; 7569 } 7570 case ND_SET: 7571 /* nd_getset performs the necessary checking */ 7572 case ND_GET: 7573 if (nd_getset(q, us->us_nd, mp)) { 7574 qreply(q, mp); 7575 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7576 "udp_wput_other_end: q %p (%S)", q, "get"); 7577 return; 7578 } 7579 break; 7580 case _SIOCSOCKFALLBACK: 7581 /* 7582 * Either sockmod is about to be popped and the 7583 * socket would now be treated as a plain stream, 7584 * or a module is about to be pushed so we could 7585 * no longer use read-side synchronous stream. 7586 * Drain any queued data and disable direct sockfs 7587 * interface from now on. 7588 */ 7589 if (!udp->udp_issocket) { 7590 DB_TYPE(mp) = M_IOCNAK; 7591 iocp->ioc_error = EINVAL; 7592 } else { 7593 udp->udp_issocket = B_FALSE; 7594 if (udp->udp_direct_sockfs) { 7595 /* 7596 * Disable read-side synchronous 7597 * stream interface and drain any 7598 * queued data. 7599 */ 7600 udp_rcv_drain(RD(q), udp, 7601 B_FALSE); 7602 ASSERT(!udp->udp_direct_sockfs); 7603 UDP_STAT(us, udp_sock_fallback); 7604 } 7605 DB_TYPE(mp) = M_IOCACK; 7606 iocp->ioc_error = 0; 7607 } 7608 iocp->ioc_count = 0; 7609 iocp->ioc_rval = 0; 7610 qreply(q, mp); 7611 return; 7612 default: 7613 break; 7614 } 7615 break; 7616 case M_IOCDATA: 7617 udp_wput_iocdata(q, mp); 7618 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7619 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7620 return; 7621 default: 7622 /* Unrecognized messages are passed through without change. */ 7623 break; 7624 } 7625 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7626 "udp_wput_other_end: q %p (%S)", q, "end"); 7627 ip_output(connp, mp, q, IP_WPUT); 7628 } 7629 7630 /* 7631 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7632 * messages. 7633 */ 7634 static void 7635 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7636 { 7637 mblk_t *mp1; 7638 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7639 STRUCT_HANDLE(strbuf, sb); 7640 udp_t *udp = Q_TO_UDP(q); 7641 int error; 7642 uint_t addrlen; 7643 7644 /* Make sure it is one of ours. */ 7645 switch (iocp->ioc_cmd) { 7646 case TI_GETMYNAME: 7647 case TI_GETPEERNAME: 7648 break; 7649 default: 7650 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7651 return; 7652 } 7653 7654 switch (mi_copy_state(q, mp, &mp1)) { 7655 case -1: 7656 return; 7657 case MI_COPY_CASE(MI_COPY_IN, 1): 7658 break; 7659 case MI_COPY_CASE(MI_COPY_OUT, 1): 7660 /* 7661 * The address has been copied out, so now 7662 * copyout the strbuf. 7663 */ 7664 mi_copyout(q, mp); 7665 return; 7666 case MI_COPY_CASE(MI_COPY_OUT, 2): 7667 /* 7668 * The address and strbuf have been copied out. 7669 * We're done, so just acknowledge the original 7670 * M_IOCTL. 7671 */ 7672 mi_copy_done(q, mp, 0); 7673 return; 7674 default: 7675 /* 7676 * Something strange has happened, so acknowledge 7677 * the original M_IOCTL with an EPROTO error. 7678 */ 7679 mi_copy_done(q, mp, EPROTO); 7680 return; 7681 } 7682 7683 /* 7684 * Now we have the strbuf structure for TI_GETMYNAME 7685 * and TI_GETPEERNAME. Next we copyout the requested 7686 * address and then we'll copyout the strbuf. 7687 */ 7688 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7689 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7690 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7691 mi_copy_done(q, mp, EINVAL); 7692 return; 7693 } 7694 7695 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7696 if (mp1 == NULL) 7697 return; 7698 7699 rw_enter(&udp->udp_rwlock, RW_READER); 7700 switch (iocp->ioc_cmd) { 7701 case TI_GETMYNAME: 7702 error = udp_getmyname(udp, (void *)mp1->b_rptr, &addrlen); 7703 break; 7704 case TI_GETPEERNAME: 7705 error = udp_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7706 break; 7707 } 7708 rw_exit(&udp->udp_rwlock); 7709 7710 if (error != 0) { 7711 mi_copy_done(q, mp, error); 7712 } else { 7713 mp1->b_wptr += addrlen; 7714 STRUCT_FSET(sb, len, addrlen); 7715 7716 /* Copy out the address */ 7717 mi_copyout(q, mp); 7718 } 7719 } 7720 7721 static int 7722 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7723 udpattrs_t *udpattrs) 7724 { 7725 struct T_unitdata_req *udreqp; 7726 int is_absreq_failure; 7727 cred_t *cr; 7728 conn_t *connp = Q_TO_CONN(q); 7729 7730 ASSERT(((t_primp_t)mp->b_rptr)->type); 7731 7732 cr = DB_CREDDEF(mp, connp->conn_cred); 7733 7734 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7735 7736 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7737 udreqp->OPT_offset, cr, &udp_opt_obj, 7738 udpattrs, &is_absreq_failure); 7739 7740 if (*errorp != 0) { 7741 /* 7742 * Note: No special action needed in this 7743 * module for "is_absreq_failure" 7744 */ 7745 return (-1); /* failure */ 7746 } 7747 ASSERT(is_absreq_failure == 0); 7748 return (0); /* success */ 7749 } 7750 7751 void 7752 udp_ddi_init(void) 7753 { 7754 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7755 udp_opt_obj.odb_opt_arr_cnt); 7756 7757 /* 7758 * We want to be informed each time a stack is created or 7759 * destroyed in the kernel, so we can maintain the 7760 * set of udp_stack_t's. 7761 */ 7762 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7763 } 7764 7765 void 7766 udp_ddi_destroy(void) 7767 { 7768 netstack_unregister(NS_UDP); 7769 } 7770 7771 /* 7772 * Initialize the UDP stack instance. 7773 */ 7774 static void * 7775 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7776 { 7777 udp_stack_t *us; 7778 udpparam_t *pa; 7779 int i; 7780 7781 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7782 us->us_netstack = ns; 7783 7784 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7785 us->us_epriv_ports[0] = 2049; 7786 us->us_epriv_ports[1] = 4045; 7787 7788 /* 7789 * The smallest anonymous port in the priviledged port range which UDP 7790 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7791 */ 7792 us->us_min_anonpriv_port = 512; 7793 7794 us->us_bind_fanout_size = udp_bind_fanout_size; 7795 7796 /* Roundup variable that might have been modified in /etc/system */ 7797 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7798 /* Not a power of two. Round up to nearest power of two */ 7799 for (i = 0; i < 31; i++) { 7800 if (us->us_bind_fanout_size < (1 << i)) 7801 break; 7802 } 7803 us->us_bind_fanout_size = 1 << i; 7804 } 7805 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7806 sizeof (udp_fanout_t), KM_SLEEP); 7807 for (i = 0; i < us->us_bind_fanout_size; i++) { 7808 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7809 NULL); 7810 } 7811 7812 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7813 7814 us->us_param_arr = pa; 7815 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7816 7817 (void) udp_param_register(&us->us_nd, 7818 us->us_param_arr, A_CNT(udp_param_arr)); 7819 7820 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7821 us->us_mibkp = udp_kstat_init(stackid); 7822 return (us); 7823 } 7824 7825 /* 7826 * Free the UDP stack instance. 7827 */ 7828 static void 7829 udp_stack_fini(netstackid_t stackid, void *arg) 7830 { 7831 udp_stack_t *us = (udp_stack_t *)arg; 7832 int i; 7833 7834 for (i = 0; i < us->us_bind_fanout_size; i++) { 7835 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7836 } 7837 7838 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7839 sizeof (udp_fanout_t)); 7840 7841 us->us_bind_fanout = NULL; 7842 7843 nd_free(&us->us_nd); 7844 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7845 us->us_param_arr = NULL; 7846 7847 udp_kstat_fini(stackid, us->us_mibkp); 7848 us->us_mibkp = NULL; 7849 7850 udp_kstat2_fini(stackid, us->us_kstat); 7851 us->us_kstat = NULL; 7852 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7853 kmem_free(us, sizeof (*us)); 7854 } 7855 7856 static void * 7857 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7858 { 7859 kstat_t *ksp; 7860 7861 udp_stat_t template = { 7862 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7863 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7864 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7865 { "udp_drain", KSTAT_DATA_UINT64 }, 7866 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7867 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7868 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7869 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7870 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7871 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7872 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7873 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7874 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7875 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7876 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7877 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7878 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7879 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7880 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7881 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7882 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7883 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7884 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7885 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7886 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7887 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7888 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7889 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7890 #ifdef DEBUG 7891 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7892 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7893 #endif 7894 }; 7895 7896 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7897 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7898 KSTAT_FLAG_VIRTUAL, stackid); 7899 7900 if (ksp == NULL) 7901 return (NULL); 7902 7903 bcopy(&template, us_statisticsp, sizeof (template)); 7904 ksp->ks_data = (void *)us_statisticsp; 7905 ksp->ks_private = (void *)(uintptr_t)stackid; 7906 7907 kstat_install(ksp); 7908 return (ksp); 7909 } 7910 7911 static void 7912 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7913 { 7914 if (ksp != NULL) { 7915 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7916 kstat_delete_netstack(ksp, stackid); 7917 } 7918 } 7919 7920 static void * 7921 udp_kstat_init(netstackid_t stackid) 7922 { 7923 kstat_t *ksp; 7924 7925 udp_named_kstat_t template = { 7926 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7927 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7928 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7929 { "entrySize", KSTAT_DATA_INT32, 0 }, 7930 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7931 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7932 }; 7933 7934 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7935 KSTAT_TYPE_NAMED, 7936 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7937 7938 if (ksp == NULL || ksp->ks_data == NULL) 7939 return (NULL); 7940 7941 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7942 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7943 7944 bcopy(&template, ksp->ks_data, sizeof (template)); 7945 ksp->ks_update = udp_kstat_update; 7946 ksp->ks_private = (void *)(uintptr_t)stackid; 7947 7948 kstat_install(ksp); 7949 return (ksp); 7950 } 7951 7952 static void 7953 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7954 { 7955 if (ksp != NULL) { 7956 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7957 kstat_delete_netstack(ksp, stackid); 7958 } 7959 } 7960 7961 static int 7962 udp_kstat_update(kstat_t *kp, int rw) 7963 { 7964 udp_named_kstat_t *udpkp; 7965 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7966 netstack_t *ns; 7967 udp_stack_t *us; 7968 7969 if ((kp == NULL) || (kp->ks_data == NULL)) 7970 return (EIO); 7971 7972 if (rw == KSTAT_WRITE) 7973 return (EACCES); 7974 7975 ns = netstack_find_by_stackid(stackid); 7976 if (ns == NULL) 7977 return (-1); 7978 us = ns->netstack_udp; 7979 if (us == NULL) { 7980 netstack_rele(ns); 7981 return (-1); 7982 } 7983 udpkp = (udp_named_kstat_t *)kp->ks_data; 7984 7985 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7986 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7987 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7988 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7989 netstack_rele(ns); 7990 return (0); 7991 } 7992 7993 /* 7994 * Read-side synchronous stream info entry point, called as a 7995 * result of handling certain STREAMS ioctl operations. 7996 */ 7997 static int 7998 udp_rinfop(queue_t *q, infod_t *dp) 7999 { 8000 mblk_t *mp; 8001 uint_t cmd = dp->d_cmd; 8002 int res = 0; 8003 int error = 0; 8004 udp_t *udp = Q_TO_UDP(q); 8005 struct stdata *stp = STREAM(q); 8006 8007 mutex_enter(&udp->udp_drain_lock); 8008 /* If shutdown on read has happened, return nothing */ 8009 mutex_enter(&stp->sd_lock); 8010 if (stp->sd_flag & STREOF) { 8011 mutex_exit(&stp->sd_lock); 8012 goto done; 8013 } 8014 mutex_exit(&stp->sd_lock); 8015 8016 if ((mp = udp->udp_rcv_list_head) == NULL) 8017 goto done; 8018 8019 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8020 8021 if (cmd & INFOD_COUNT) { 8022 /* 8023 * Return the number of messages. 8024 */ 8025 dp->d_count += udp->udp_rcv_msgcnt; 8026 res |= INFOD_COUNT; 8027 } 8028 if (cmd & INFOD_BYTES) { 8029 /* 8030 * Return size of all data messages. 8031 */ 8032 dp->d_bytes += udp->udp_rcv_cnt; 8033 res |= INFOD_BYTES; 8034 } 8035 if (cmd & INFOD_FIRSTBYTES) { 8036 /* 8037 * Return size of first data message. 8038 */ 8039 dp->d_bytes = msgdsize(mp); 8040 res |= INFOD_FIRSTBYTES; 8041 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8042 } 8043 if (cmd & INFOD_COPYOUT) { 8044 mblk_t *mp1 = mp->b_cont; 8045 int n; 8046 /* 8047 * Return data contents of first message. 8048 */ 8049 ASSERT(DB_TYPE(mp1) == M_DATA); 8050 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8051 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8052 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8053 UIO_READ, dp->d_uiop)) != 0) { 8054 goto done; 8055 } 8056 mp1 = mp1->b_cont; 8057 } 8058 res |= INFOD_COPYOUT; 8059 dp->d_cmd &= ~INFOD_COPYOUT; 8060 } 8061 done: 8062 mutex_exit(&udp->udp_drain_lock); 8063 8064 dp->d_res |= res; 8065 8066 return (error); 8067 } 8068 8069 /* 8070 * Read-side synchronous stream entry point. This is called as a result 8071 * of recv/read operation done at sockfs, and is guaranteed to execute 8072 * outside of the interrupt thread context. It returns a single datagram 8073 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8074 */ 8075 static int 8076 udp_rrw(queue_t *q, struiod_t *dp) 8077 { 8078 mblk_t *mp; 8079 udp_t *udp = Q_TO_UDP(q); 8080 udp_stack_t *us = udp->udp_us; 8081 8082 /* 8083 * Dequeue datagram from the head of the list and return 8084 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8085 * set/cleared depending on whether or not there's data 8086 * remaining in the list. 8087 */ 8088 mutex_enter(&udp->udp_drain_lock); 8089 if (!udp->udp_direct_sockfs) { 8090 mutex_exit(&udp->udp_drain_lock); 8091 UDP_STAT(us, udp_rrw_busy); 8092 return (EBUSY); 8093 } 8094 if ((mp = udp->udp_rcv_list_head) != NULL) { 8095 uint_t size = msgdsize(mp); 8096 8097 /* Last datagram in the list? */ 8098 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8099 udp->udp_rcv_list_tail = NULL; 8100 mp->b_next = NULL; 8101 8102 udp->udp_rcv_cnt -= size; 8103 udp->udp_rcv_msgcnt--; 8104 UDP_STAT(us, udp_rrw_msgcnt); 8105 8106 /* No longer flow-controlling? */ 8107 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8108 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8109 udp->udp_drain_qfull = B_FALSE; 8110 } 8111 if (udp->udp_rcv_list_head == NULL) { 8112 /* 8113 * Either we just dequeued the last datagram or 8114 * we get here from sockfs and have nothing to 8115 * return; in this case clear RSLEEP. 8116 */ 8117 ASSERT(udp->udp_rcv_cnt == 0); 8118 ASSERT(udp->udp_rcv_msgcnt == 0); 8119 ASSERT(udp->udp_rcv_list_tail == NULL); 8120 STR_WAKEUP_CLEAR(STREAM(q)); 8121 } else { 8122 /* 8123 * More data follows; we need udp_rrw() to be 8124 * called in future to pick up the rest. 8125 */ 8126 STR_WAKEUP_SET(STREAM(q)); 8127 } 8128 mutex_exit(&udp->udp_drain_lock); 8129 dp->d_mp = mp; 8130 return (0); 8131 } 8132 8133 /* 8134 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8135 * list; this is typically executed within the interrupt thread context 8136 * and so we do things as quickly as possible. 8137 */ 8138 static void 8139 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8140 { 8141 ASSERT(q == RD(q)); 8142 ASSERT(pkt_len == msgdsize(mp)); 8143 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8144 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8145 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8146 8147 mutex_enter(&udp->udp_drain_lock); 8148 /* 8149 * Wake up and signal the receiving app; it is okay to do this 8150 * before enqueueing the mp because we are holding the drain lock. 8151 * One of the advantages of synchronous stream is the ability for 8152 * us to find out when the application performs a read on the 8153 * socket by way of udp_rrw() entry point being called. We need 8154 * to generate SIGPOLL/SIGIO for each received data in the case 8155 * of asynchronous socket just as in the strrput() case. However, 8156 * we only wake the application up when necessary, i.e. during the 8157 * first enqueue. When udp_rrw() is called, we send up a single 8158 * datagram upstream and call STR_WAKEUP_SET() again when there 8159 * are still data remaining in our receive queue. 8160 */ 8161 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 8162 if (udp->udp_rcv_list_head == NULL) 8163 udp->udp_rcv_list_head = mp; 8164 else 8165 udp->udp_rcv_list_tail->b_next = mp; 8166 udp->udp_rcv_list_tail = mp; 8167 udp->udp_rcv_cnt += pkt_len; 8168 udp->udp_rcv_msgcnt++; 8169 8170 /* Need to flow-control? */ 8171 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8172 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8173 udp->udp_drain_qfull = B_TRUE; 8174 8175 mutex_exit(&udp->udp_drain_lock); 8176 } 8177 8178 /* 8179 * Drain the contents of receive list to the module upstream; we do 8180 * this during close or when we fallback to the slow mode due to 8181 * sockmod being popped or a module being pushed on top of us. 8182 */ 8183 static void 8184 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8185 { 8186 mblk_t *mp; 8187 udp_stack_t *us = udp->udp_us; 8188 8189 ASSERT(q == RD(q)); 8190 8191 mutex_enter(&udp->udp_drain_lock); 8192 /* 8193 * There is no race with a concurrent udp_input() sending 8194 * up packets using putnext() after we have cleared the 8195 * udp_direct_sockfs flag but before we have completed 8196 * sending up the packets in udp_rcv_list, since we are 8197 * either a writer or we have quiesced the conn. 8198 */ 8199 udp->udp_direct_sockfs = B_FALSE; 8200 mutex_exit(&udp->udp_drain_lock); 8201 8202 if (udp->udp_rcv_list_head != NULL) 8203 UDP_STAT(us, udp_drain); 8204 8205 /* 8206 * Send up everything via putnext(); note here that we 8207 * don't need the udp_drain_lock to protect us since 8208 * nothing can enter udp_rrw() and that we currently 8209 * have exclusive access to this udp. 8210 */ 8211 while ((mp = udp->udp_rcv_list_head) != NULL) { 8212 udp->udp_rcv_list_head = mp->b_next; 8213 mp->b_next = NULL; 8214 udp->udp_rcv_cnt -= msgdsize(mp); 8215 udp->udp_rcv_msgcnt--; 8216 if (closing) { 8217 freemsg(mp); 8218 } else { 8219 putnext(q, mp); 8220 } 8221 } 8222 ASSERT(udp->udp_rcv_cnt == 0); 8223 ASSERT(udp->udp_rcv_msgcnt == 0); 8224 ASSERT(udp->udp_rcv_list_head == NULL); 8225 udp->udp_rcv_list_tail = NULL; 8226 udp->udp_drain_qfull = B_FALSE; 8227 } 8228 8229 static size_t 8230 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8231 { 8232 udp_stack_t *us = udp->udp_us; 8233 8234 /* We add a bit of extra buffering */ 8235 size += size >> 1; 8236 if (size > us->us_max_buf) 8237 size = us->us_max_buf; 8238 8239 udp->udp_rcv_hiwat = size; 8240 return (size); 8241 } 8242 8243 /* 8244 * For the lower queue so that UDP can be a dummy mux. 8245 * Nobody should be sending 8246 * packets up this stream 8247 */ 8248 static void 8249 udp_lrput(queue_t *q, mblk_t *mp) 8250 { 8251 mblk_t *mp1; 8252 8253 switch (mp->b_datap->db_type) { 8254 case M_FLUSH: 8255 /* Turn around */ 8256 if (*mp->b_rptr & FLUSHW) { 8257 *mp->b_rptr &= ~FLUSHR; 8258 qreply(q, mp); 8259 return; 8260 } 8261 break; 8262 } 8263 /* Could receive messages that passed through ar_rput */ 8264 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 8265 mp1->b_prev = mp1->b_next = NULL; 8266 freemsg(mp); 8267 } 8268 8269 /* 8270 * For the lower queue so that UDP can be a dummy mux. 8271 * Nobody should be sending packets down this stream. 8272 */ 8273 /* ARGSUSED */ 8274 void 8275 udp_lwput(queue_t *q, mblk_t *mp) 8276 { 8277 freemsg(mp); 8278 } 8279