1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/pattr.h> 31 #include <sys/stropts.h> 32 #include <sys/strlog.h> 33 #include <sys/strsun.h> 34 #include <sys/time.h> 35 #define _SUN_TPI_VERSION 2 36 #include <sys/tihdr.h> 37 #include <sys/timod.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/strsubr.h> 41 #include <sys/suntpi.h> 42 #include <sys/xti_inet.h> 43 #include <sys/cmn_err.h> 44 #include <sys/kmem.h> 45 #include <sys/policy.h> 46 #include <sys/ucred.h> 47 #include <sys/zone.h> 48 49 #include <sys/socket.h> 50 #include <sys/sockio.h> 51 #include <sys/vtrace.h> 52 #include <sys/sdt.h> 53 #include <sys/debug.h> 54 #include <sys/isa_defs.h> 55 #include <sys/random.h> 56 #include <netinet/in.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 #include <netinet/udp.h> 60 #include <net/if.h> 61 #include <net/route.h> 62 63 #include <inet/common.h> 64 #include <inet/ip.h> 65 #include <inet/ip_impl.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_ire.h> 68 #include <inet/ip_if.h> 69 #include <inet/ip_multi.h> 70 #include <inet/ip_ndp.h> 71 #include <inet/mi.h> 72 #include <inet/mib2.h> 73 #include <inet/nd.h> 74 #include <inet/optcom.h> 75 #include <inet/snmpcom.h> 76 #include <inet/kstatcom.h> 77 #include <inet/udp_impl.h> 78 #include <inet/ipclassifier.h> 79 #include <inet/ipsec_impl.h> 80 #include <inet/ipp_common.h> 81 #include <sys/squeue_impl.h> 82 #include <inet/ipnet.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP is MT and uses the usual kernel synchronization primitives. There are 2 100 * locks, the fanout lock (uf_lock) and the udp endpoint lock udp_rwlock. 101 * We also use conn_lock when updating things that affect the IP classifier 102 * lookup. 103 * The lock order is udp_rwlock -> uf_lock and is udp_rwlock -> conn_lock. 104 * 105 * The fanout lock uf_lock: 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure and a few other fields in the udp_t. A UDP endpoint is removed 113 * from the bind hash list only when it is being unbound or being closed. 114 * The per bucket lock also protects a UDP endpoint's state changes. 115 * 116 * The udp_rwlock: 117 * This protects most of the other fields in the udp_t. The exact list of 118 * fields which are protected by each of the above locks is documented in 119 * the udp_t structure definition. 120 * 121 * Plumbing notes: 122 * UDP is always a device driver. For compatibility with mibopen() code 123 * it is possible to I_PUSH "udp", but that results in pushing a passthrough 124 * dummy module. 125 * 126 * The above implies that we don't support any intermediate module to 127 * reside in between /dev/ip and udp -- in fact, we never supported such 128 * scenario in the past as the inter-layer communication semantics have 129 * always been private. 130 */ 131 132 /* For /etc/system control */ 133 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 134 135 #define NDD_TOO_QUICK_MSG \ 136 "ndd get info rate too high for non-privileged users, try again " \ 137 "later.\n" 138 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 139 140 /* Option processing attrs */ 141 typedef struct udpattrs_s { 142 union { 143 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 144 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 145 } udpattr_ippu; 146 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 147 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 148 mblk_t *udpattr_mb; 149 boolean_t udpattr_credset; 150 } udpattrs_t; 151 152 static void udp_addr_req(queue_t *q, mblk_t *mp); 153 static void udp_bind(queue_t *q, mblk_t *mp); 154 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 155 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 156 static void udp_bind_result(conn_t *, mblk_t *); 157 static void udp_bind_ack(conn_t *, mblk_t *mp); 158 static void udp_bind_error(conn_t *, mblk_t *mp); 159 static int udp_build_hdrs(udp_t *udp); 160 static void udp_capability_req(queue_t *q, mblk_t *mp); 161 static int udp_close(queue_t *q); 162 static void udp_connect(queue_t *q, mblk_t *mp); 163 static void udp_disconnect(queue_t *q, mblk_t *mp); 164 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 165 int sys_error); 166 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 167 t_scalar_t tlierr, int unixerr); 168 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 169 cred_t *cr); 170 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 171 char *value, caddr_t cp, cred_t *cr); 172 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 173 char *value, caddr_t cp, cred_t *cr); 174 static void udp_icmp_error(queue_t *q, mblk_t *mp); 175 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 176 static void udp_info_req(queue_t *q, mblk_t *mp); 177 static void udp_input(void *, mblk_t *, void *); 178 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 179 t_scalar_t addr_length); 180 static void udp_lrput(queue_t *, mblk_t *); 181 static void udp_lwput(queue_t *, mblk_t *); 182 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 183 cred_t *credp, boolean_t isv6); 184 static int udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, 185 cred_t *credp); 186 static int udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, 187 cred_t *credp); 188 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 189 int *errorp, udpattrs_t *udpattrs); 190 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 191 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 192 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt); 193 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 194 cred_t *cr); 195 static void udp_report_item(mblk_t *mp, udp_t *udp); 196 static int udp_rinfop(queue_t *q, infod_t *dp); 197 static int udp_rrw(queue_t *q, struiod_t *dp); 198 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 199 cred_t *cr); 200 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, 201 ipha_t *ipha); 202 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 203 t_scalar_t destlen, t_scalar_t err); 204 static void udp_unbind(queue_t *q, mblk_t *mp); 205 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 206 boolean_t random); 207 static mblk_t *udp_output_v4(conn_t *, mblk_t *, ipaddr_t, uint16_t, uint_t, 208 int *, boolean_t); 209 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 210 int *error); 211 static void udp_wput_other(queue_t *q, mblk_t *mp); 212 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 213 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 214 215 static void *udp_stack_init(netstackid_t stackid, netstack_t *ns); 216 static void udp_stack_fini(netstackid_t stackid, void *arg); 217 218 static void *udp_kstat_init(netstackid_t stackid); 219 static void udp_kstat_fini(netstackid_t stackid, kstat_t *ksp); 220 static void *udp_kstat2_init(netstackid_t, udp_stat_t *); 221 static void udp_kstat2_fini(netstackid_t, kstat_t *); 222 static int udp_kstat_update(kstat_t *kp, int rw); 223 224 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 225 uint_t pkt_len); 226 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 227 static void udp_xmit(queue_t *, mblk_t *, ire_t *ire, conn_t *, zoneid_t); 228 229 #define UDP_RECV_HIWATER (56 * 1024) 230 #define UDP_RECV_LOWATER 128 231 #define UDP_XMIT_HIWATER (56 * 1024) 232 #define UDP_XMIT_LOWATER 1024 233 234 static struct module_info udp_mod_info = { 235 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 236 }; 237 238 /* 239 * Entry points for UDP as a device. 240 * We have separate open functions for the /dev/udp and /dev/udp6 devices. 241 */ 242 static struct qinit udp_rinitv4 = { 243 NULL, NULL, udp_openv4, udp_close, NULL, 244 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 245 }; 246 247 static struct qinit udp_rinitv6 = { 248 NULL, NULL, udp_openv6, udp_close, NULL, 249 &udp_mod_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 250 }; 251 252 static struct qinit udp_winit = { 253 (pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, 254 &udp_mod_info, NULL, NULL, NULL, STRUIOT_NONE 255 }; 256 257 /* 258 * UDP needs to handle I_LINK and I_PLINK since ifconfig 259 * likes to use it as a place to hang the various streams. 260 */ 261 static struct qinit udp_lrinit = { 262 (pfi_t)udp_lrput, NULL, udp_openv4, udp_close, NULL, 263 &udp_mod_info 264 }; 265 266 static struct qinit udp_lwinit = { 267 (pfi_t)udp_lwput, NULL, udp_openv4, udp_close, NULL, 268 &udp_mod_info 269 }; 270 271 /* For AF_INET aka /dev/udp */ 272 struct streamtab udpinfov4 = { 273 &udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit 274 }; 275 276 /* For AF_INET6 aka /dev/udp6 */ 277 struct streamtab udpinfov6 = { 278 &udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit 279 }; 280 281 static sin_t sin_null; /* Zero address for quick clears */ 282 static sin6_t sin6_null; /* Zero address for quick clears */ 283 284 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 285 286 /* Default structure copied into T_INFO_ACK messages */ 287 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 288 T_INFO_ACK, 289 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 290 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 291 T_INVALID, /* CDATA_size. udp does not support connect data. */ 292 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 293 sizeof (sin_t), /* ADDR_size. */ 294 0, /* OPT_size - not initialized here */ 295 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 296 T_CLTS, /* SERV_type. udp supports connection-less. */ 297 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 298 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 299 }; 300 301 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 302 303 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 304 T_INFO_ACK, 305 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 306 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 307 T_INVALID, /* CDATA_size. udp does not support connect data. */ 308 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 309 sizeof (sin6_t), /* ADDR_size. */ 310 0, /* OPT_size - not initialized here */ 311 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 312 T_CLTS, /* SERV_type. udp supports connection-less. */ 313 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 314 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 315 }; 316 317 /* largest UDP port number */ 318 #define UDP_MAX_PORT 65535 319 320 /* 321 * Table of ND variables supported by udp. These are loaded into us_nd 322 * in udp_open. 323 * All of these are alterable, within the min/max values given, at run time. 324 */ 325 /* BEGIN CSTYLED */ 326 udpparam_t udp_param_arr[] = { 327 /*min max value name */ 328 { 0L, 256, 32, "udp_wroff_extra" }, 329 { 1L, 255, 255, "udp_ipv4_ttl" }, 330 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 331 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 332 { 0, 1, 1, "udp_do_checksum" }, 333 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 334 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 335 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 336 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 337 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 338 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 339 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 340 }; 341 /* END CSTYLED */ 342 343 /* Setable in /etc/system */ 344 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 345 uint32_t udp_random_anon_port = 1; 346 347 /* 348 * Hook functions to enable cluster networking. 349 * On non-clustered systems these vectors must always be NULL 350 */ 351 352 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 353 uint8_t *laddrp, in_port_t lport) = NULL; 354 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 355 uint8_t *laddrp, in_port_t lport) = NULL; 356 357 typedef union T_primitives *t_primp_t; 358 359 /* 360 * Return the next anonymous port in the privileged port range for 361 * bind checking. 362 * 363 * Trusted Extension (TX) notes: TX allows administrator to mark or 364 * reserve ports as Multilevel ports (MLP). MLP has special function 365 * on TX systems. Once a port is made MLP, it's not available as 366 * ordinary port. This creates "holes" in the port name space. It 367 * may be necessary to skip the "holes" find a suitable anon port. 368 */ 369 static in_port_t 370 udp_get_next_priv_port(udp_t *udp) 371 { 372 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 373 in_port_t nextport; 374 boolean_t restart = B_FALSE; 375 udp_stack_t *us = udp->udp_us; 376 377 retry: 378 if (next_priv_port < us->us_min_anonpriv_port || 379 next_priv_port >= IPPORT_RESERVED) { 380 next_priv_port = IPPORT_RESERVED - 1; 381 if (restart) 382 return (0); 383 restart = B_TRUE; 384 } 385 386 if (is_system_labeled() && 387 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 388 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 389 next_priv_port = nextport; 390 goto retry; 391 } 392 393 return (next_priv_port--); 394 } 395 396 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 397 /* ARGSUSED */ 398 static int 399 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 400 { 401 udp_fanout_t *udpf; 402 int i; 403 zoneid_t zoneid; 404 conn_t *connp; 405 udp_t *udp; 406 udp_stack_t *us; 407 408 connp = Q_TO_CONN(q); 409 udp = connp->conn_udp; 410 us = udp->udp_us; 411 412 /* Refer to comments in udp_status_report(). */ 413 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 414 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 415 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 416 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 417 return (0); 418 } 419 } 420 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 421 /* The following may work even if we cannot get a large buf. */ 422 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 423 return (0); 424 } 425 426 (void) mi_mpprintf(mp, 427 "UDP " MI_COL_HDRPAD_STR 428 /* 12345678[89ABCDEF] */ 429 " zone lport src addr dest addr port state"); 430 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 431 432 zoneid = connp->conn_zoneid; 433 434 for (i = 0; i < us->us_bind_fanout_size; i++) { 435 udpf = &us->us_bind_fanout[i]; 436 mutex_enter(&udpf->uf_lock); 437 438 /* Print the hash index. */ 439 udp = udpf->uf_udp; 440 if (zoneid != GLOBAL_ZONEID) { 441 /* skip to first entry in this zone; might be none */ 442 while (udp != NULL && 443 udp->udp_connp->conn_zoneid != zoneid) 444 udp = udp->udp_bind_hash; 445 } 446 if (udp != NULL) { 447 uint_t print_len, buf_len; 448 449 buf_len = mp->b_cont->b_datap->db_lim - 450 mp->b_cont->b_wptr; 451 print_len = snprintf((char *)mp->b_cont->b_wptr, 452 buf_len, "%d\n", i); 453 if (print_len < buf_len) { 454 mp->b_cont->b_wptr += print_len; 455 } else { 456 mp->b_cont->b_wptr += buf_len; 457 } 458 for (; udp != NULL; udp = udp->udp_bind_hash) { 459 if (zoneid == GLOBAL_ZONEID || 460 zoneid == udp->udp_connp->conn_zoneid) 461 udp_report_item(mp->b_cont, udp); 462 } 463 } 464 mutex_exit(&udpf->uf_lock); 465 } 466 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 467 return (0); 468 } 469 470 /* 471 * Hash list removal routine for udp_t structures. 472 */ 473 static void 474 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 475 { 476 udp_t *udpnext; 477 kmutex_t *lockp; 478 udp_stack_t *us = udp->udp_us; 479 480 if (udp->udp_ptpbhn == NULL) 481 return; 482 483 /* 484 * Extract the lock pointer in case there are concurrent 485 * hash_remove's for this instance. 486 */ 487 ASSERT(udp->udp_port != 0); 488 if (!caller_holds_lock) { 489 lockp = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 490 us->us_bind_fanout_size)].uf_lock; 491 ASSERT(lockp != NULL); 492 mutex_enter(lockp); 493 } 494 if (udp->udp_ptpbhn != NULL) { 495 udpnext = udp->udp_bind_hash; 496 if (udpnext != NULL) { 497 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 498 udp->udp_bind_hash = NULL; 499 } 500 *udp->udp_ptpbhn = udpnext; 501 udp->udp_ptpbhn = NULL; 502 } 503 if (!caller_holds_lock) { 504 mutex_exit(lockp); 505 } 506 } 507 508 static void 509 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 510 { 511 udp_t **udpp; 512 udp_t *udpnext; 513 514 ASSERT(MUTEX_HELD(&uf->uf_lock)); 515 ASSERT(udp->udp_ptpbhn == NULL); 516 udpp = &uf->uf_udp; 517 udpnext = udpp[0]; 518 if (udpnext != NULL) { 519 /* 520 * If the new udp bound to the INADDR_ANY address 521 * and the first one in the list is not bound to 522 * INADDR_ANY we skip all entries until we find the 523 * first one bound to INADDR_ANY. 524 * This makes sure that applications binding to a 525 * specific address get preference over those binding to 526 * INADDR_ANY. 527 */ 528 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 529 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 530 while ((udpnext = udpp[0]) != NULL && 531 !V6_OR_V4_INADDR_ANY( 532 udpnext->udp_bound_v6src)) { 533 udpp = &(udpnext->udp_bind_hash); 534 } 535 if (udpnext != NULL) 536 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 537 } else { 538 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 539 } 540 } 541 udp->udp_bind_hash = udpnext; 542 udp->udp_ptpbhn = udpp; 543 udpp[0] = udp; 544 } 545 546 /* 547 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 548 * passed to udp_wput. 549 * It associates a port number and local address with the stream. 550 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 551 * protocol type (IPPROTO_UDP) placed in the message following the address. 552 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 553 * (Called as writer.) 554 * 555 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 556 * without setting SO_REUSEADDR. This is needed so that they 557 * can be viewed as two independent transport protocols. 558 * However, anonymouns ports are allocated from the same range to avoid 559 * duplicating the us->us_next_port_to_try. 560 */ 561 static void 562 udp_bind(queue_t *q, mblk_t *mp) 563 { 564 sin_t *sin; 565 sin6_t *sin6; 566 mblk_t *mp1; 567 in_port_t port; /* Host byte order */ 568 in_port_t requested_port; /* Host byte order */ 569 struct T_bind_req *tbr; 570 int count; 571 in6_addr_t v6src; 572 boolean_t bind_to_req_port_only; 573 int loopmax; 574 udp_fanout_t *udpf; 575 in_port_t lport; /* Network byte order */ 576 zoneid_t zoneid; 577 conn_t *connp; 578 udp_t *udp; 579 boolean_t is_inaddr_any; 580 mlp_type_t addrtype, mlptype; 581 udp_stack_t *us; 582 583 connp = Q_TO_CONN(q); 584 udp = connp->conn_udp; 585 us = udp->udp_us; 586 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 587 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 588 "udp_bind: bad req, len %u", 589 (uint_t)(mp->b_wptr - mp->b_rptr)); 590 udp_err_ack(q, mp, TPROTO, 0); 591 return; 592 } 593 if (udp->udp_state != TS_UNBND) { 594 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 595 "udp_bind: bad state, %u", udp->udp_state); 596 udp_err_ack(q, mp, TOUTSTATE, 0); 597 return; 598 } 599 /* 600 * Reallocate the message to make sure we have enough room for an 601 * address and the protocol type. 602 */ 603 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 604 if (!mp1) { 605 udp_err_ack(q, mp, TSYSERR, ENOMEM); 606 return; 607 } 608 609 mp = mp1; 610 tbr = (struct T_bind_req *)mp->b_rptr; 611 switch (tbr->ADDR_length) { 612 case 0: /* Request for a generic port */ 613 tbr->ADDR_offset = sizeof (struct T_bind_req); 614 if (udp->udp_family == AF_INET) { 615 tbr->ADDR_length = sizeof (sin_t); 616 sin = (sin_t *)&tbr[1]; 617 *sin = sin_null; 618 sin->sin_family = AF_INET; 619 mp->b_wptr = (uchar_t *)&sin[1]; 620 } else { 621 ASSERT(udp->udp_family == AF_INET6); 622 tbr->ADDR_length = sizeof (sin6_t); 623 sin6 = (sin6_t *)&tbr[1]; 624 *sin6 = sin6_null; 625 sin6->sin6_family = AF_INET6; 626 mp->b_wptr = (uchar_t *)&sin6[1]; 627 } 628 port = 0; 629 break; 630 631 case sizeof (sin_t): /* Complete IPv4 address */ 632 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 633 sizeof (sin_t)); 634 if (sin == NULL || !OK_32PTR((char *)sin)) { 635 udp_err_ack(q, mp, TSYSERR, EINVAL); 636 return; 637 } 638 if (udp->udp_family != AF_INET || 639 sin->sin_family != AF_INET) { 640 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 641 return; 642 } 643 port = ntohs(sin->sin_port); 644 break; 645 646 case sizeof (sin6_t): /* complete IPv6 address */ 647 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 648 sizeof (sin6_t)); 649 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 650 udp_err_ack(q, mp, TSYSERR, EINVAL); 651 return; 652 } 653 if (udp->udp_family != AF_INET6 || 654 sin6->sin6_family != AF_INET6) { 655 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 656 return; 657 } 658 port = ntohs(sin6->sin6_port); 659 break; 660 661 default: /* Invalid request */ 662 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 663 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 664 udp_err_ack(q, mp, TBADADDR, 0); 665 return; 666 } 667 668 requested_port = port; 669 670 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 671 bind_to_req_port_only = B_FALSE; 672 else /* T_BIND_REQ and requested_port != 0 */ 673 bind_to_req_port_only = B_TRUE; 674 675 if (requested_port == 0) { 676 /* 677 * If the application passed in zero for the port number, it 678 * doesn't care which port number we bind to. Get one in the 679 * valid range. 680 */ 681 if (udp->udp_anon_priv_bind) { 682 port = udp_get_next_priv_port(udp); 683 } else { 684 port = udp_update_next_port(udp, 685 us->us_next_port_to_try, B_TRUE); 686 } 687 } else { 688 /* 689 * If the port is in the well-known privileged range, 690 * make sure the caller was privileged. 691 */ 692 int i; 693 boolean_t priv = B_FALSE; 694 695 if (port < us->us_smallest_nonpriv_port) { 696 priv = B_TRUE; 697 } else { 698 for (i = 0; i < us->us_num_epriv_ports; i++) { 699 if (port == us->us_epriv_ports[i]) { 700 priv = B_TRUE; 701 break; 702 } 703 } 704 } 705 706 if (priv) { 707 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 708 709 if (secpolicy_net_privaddr(cr, port, 710 IPPROTO_UDP) != 0) { 711 udp_err_ack(q, mp, TACCES, 0); 712 return; 713 } 714 } 715 } 716 717 if (port == 0) { 718 udp_err_ack(q, mp, TNOADDR, 0); 719 return; 720 } 721 722 /* 723 * The state must be TS_UNBND. TPI mandates that users must send 724 * TPI primitives only 1 at a time and wait for the response before 725 * sending the next primitive. 726 */ 727 rw_enter(&udp->udp_rwlock, RW_WRITER); 728 if (udp->udp_state != TS_UNBND || udp->udp_pending_op != -1) { 729 rw_exit(&udp->udp_rwlock); 730 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 731 "udp_bind: bad state, %u", udp->udp_state); 732 udp_err_ack(q, mp, TOUTSTATE, 0); 733 return; 734 } 735 udp->udp_pending_op = tbr->PRIM_type; 736 /* 737 * Copy the source address into our udp structure. This address 738 * may still be zero; if so, IP will fill in the correct address 739 * each time an outbound packet is passed to it. Since the udp is 740 * not yet in the bind hash list, we don't grab the uf_lock to 741 * change udp_ipversion 742 */ 743 if (udp->udp_family == AF_INET) { 744 ASSERT(sin != NULL); 745 ASSERT(udp->udp_ipversion == IPV4_VERSION); 746 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 747 udp->udp_ip_snd_options_len; 748 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 749 } else { 750 ASSERT(sin6 != NULL); 751 v6src = sin6->sin6_addr; 752 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 753 /* 754 * no need to hold the uf_lock to set the udp_ipversion 755 * since we are not yet in the fanout list 756 */ 757 udp->udp_ipversion = IPV4_VERSION; 758 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 759 UDPH_SIZE + udp->udp_ip_snd_options_len; 760 } else { 761 udp->udp_ipversion = IPV6_VERSION; 762 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 763 } 764 } 765 766 /* 767 * If udp_reuseaddr is not set, then we have to make sure that 768 * the IP address and port number the application requested 769 * (or we selected for the application) is not being used by 770 * another stream. If another stream is already using the 771 * requested IP address and port, the behavior depends on 772 * "bind_to_req_port_only". If set the bind fails; otherwise we 773 * search for any an unused port to bind to the the stream. 774 * 775 * As per the BSD semantics, as modified by the Deering multicast 776 * changes, if udp_reuseaddr is set, then we allow multiple binds 777 * to the same port independent of the local IP address. 778 * 779 * This is slightly different than in SunOS 4.X which did not 780 * support IP multicast. Note that the change implemented by the 781 * Deering multicast code effects all binds - not only binding 782 * to IP multicast addresses. 783 * 784 * Note that when binding to port zero we ignore SO_REUSEADDR in 785 * order to guarantee a unique port. 786 */ 787 788 count = 0; 789 if (udp->udp_anon_priv_bind) { 790 /* 791 * loopmax = (IPPORT_RESERVED-1) - 792 * us->us_min_anonpriv_port + 1 793 */ 794 loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port; 795 } else { 796 loopmax = us->us_largest_anon_port - 797 us->us_smallest_anon_port + 1; 798 } 799 800 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 801 zoneid = connp->conn_zoneid; 802 803 for (;;) { 804 udp_t *udp1; 805 boolean_t found_exclbind = B_FALSE; 806 807 /* 808 * Walk through the list of udp streams bound to 809 * requested port with the same IP address. 810 */ 811 lport = htons(port); 812 udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport, 813 us->us_bind_fanout_size)]; 814 mutex_enter(&udpf->uf_lock); 815 for (udp1 = udpf->uf_udp; udp1 != NULL; 816 udp1 = udp1->udp_bind_hash) { 817 if (lport != udp1->udp_port) 818 continue; 819 820 /* 821 * On a labeled system, we must treat bindings to ports 822 * on shared IP addresses by sockets with MAC exemption 823 * privilege as being in all zones, as there's 824 * otherwise no way to identify the right receiver. 825 */ 826 if (!(IPCL_ZONE_MATCH(udp1->udp_connp, zoneid) || 827 IPCL_ZONE_MATCH(connp, 828 udp1->udp_connp->conn_zoneid)) && 829 !connp->conn_mac_exempt && \ 830 !udp1->udp_connp->conn_mac_exempt) 831 continue; 832 833 /* 834 * If UDP_EXCLBIND is set for either the bound or 835 * binding endpoint, the semantics of bind 836 * is changed according to the following chart. 837 * 838 * spec = specified address (v4 or v6) 839 * unspec = unspecified address (v4 or v6) 840 * A = specified addresses are different for endpoints 841 * 842 * bound bind to allowed? 843 * ------------------------------------- 844 * unspec unspec no 845 * unspec spec no 846 * spec unspec no 847 * spec spec yes if A 848 * 849 * For labeled systems, SO_MAC_EXEMPT behaves the same 850 * as UDP_EXCLBIND, except that zoneid is ignored. 851 */ 852 if (udp1->udp_exclbind || udp->udp_exclbind || 853 udp1->udp_connp->conn_mac_exempt || 854 connp->conn_mac_exempt) { 855 if (V6_OR_V4_INADDR_ANY( 856 udp1->udp_bound_v6src) || 857 is_inaddr_any || 858 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 859 &v6src)) { 860 found_exclbind = B_TRUE; 861 break; 862 } 863 continue; 864 } 865 866 /* 867 * Check ipversion to allow IPv4 and IPv6 sockets to 868 * have disjoint port number spaces. 869 */ 870 if (udp->udp_ipversion != udp1->udp_ipversion) { 871 872 /* 873 * On the first time through the loop, if the 874 * the user intentionally specified a 875 * particular port number, then ignore any 876 * bindings of the other protocol that may 877 * conflict. This allows the user to bind IPv6 878 * alone and get both v4 and v6, or bind both 879 * both and get each seperately. On subsequent 880 * times through the loop, we're checking a 881 * port that we chose (not the user) and thus 882 * we do not allow casual duplicate bindings. 883 */ 884 if (count == 0 && requested_port != 0) 885 continue; 886 } 887 888 /* 889 * No difference depending on SO_REUSEADDR. 890 * 891 * If existing port is bound to a 892 * non-wildcard IP address and 893 * the requesting stream is bound to 894 * a distinct different IP addresses 895 * (non-wildcard, also), keep going. 896 */ 897 if (!is_inaddr_any && 898 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 899 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 900 &v6src)) { 901 continue; 902 } 903 break; 904 } 905 906 if (!found_exclbind && 907 (udp->udp_reuseaddr && requested_port != 0)) { 908 break; 909 } 910 911 if (udp1 == NULL) { 912 /* 913 * No other stream has this IP address 914 * and port number. We can use it. 915 */ 916 break; 917 } 918 mutex_exit(&udpf->uf_lock); 919 if (bind_to_req_port_only) { 920 /* 921 * We get here only when requested port 922 * is bound (and only first of the for() 923 * loop iteration). 924 * 925 * The semantics of this bind request 926 * require it to fail so we return from 927 * the routine (and exit the loop). 928 * 929 */ 930 udp->udp_pending_op = -1; 931 rw_exit(&udp->udp_rwlock); 932 udp_err_ack(q, mp, TADDRBUSY, 0); 933 return; 934 } 935 936 if (udp->udp_anon_priv_bind) { 937 port = udp_get_next_priv_port(udp); 938 } else { 939 if ((count == 0) && (requested_port != 0)) { 940 /* 941 * If the application wants us to find 942 * a port, get one to start with. Set 943 * requested_port to 0, so that we will 944 * update us->us_next_port_to_try below. 945 */ 946 port = udp_update_next_port(udp, 947 us->us_next_port_to_try, B_TRUE); 948 requested_port = 0; 949 } else { 950 port = udp_update_next_port(udp, port + 1, 951 B_FALSE); 952 } 953 } 954 955 if (port == 0 || ++count >= loopmax) { 956 /* 957 * We've tried every possible port number and 958 * there are none available, so send an error 959 * to the user. 960 */ 961 udp->udp_pending_op = -1; 962 rw_exit(&udp->udp_rwlock); 963 udp_err_ack(q, mp, TNOADDR, 0); 964 return; 965 } 966 } 967 968 /* 969 * Copy the source address into our udp structure. This address 970 * may still be zero; if so, ip will fill in the correct address 971 * each time an outbound packet is passed to it. 972 * If we are binding to a broadcast or multicast address then 973 * udp_bind_ack will clear the source address when it receives 974 * the T_BIND_ACK. 975 */ 976 udp->udp_v6src = udp->udp_bound_v6src = v6src; 977 udp->udp_port = lport; 978 /* 979 * Now reset the the next anonymous port if the application requested 980 * an anonymous port, or we handed out the next anonymous port. 981 */ 982 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 983 us->us_next_port_to_try = port + 1; 984 } 985 986 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 987 if (udp->udp_family == AF_INET) { 988 sin->sin_port = udp->udp_port; 989 } else { 990 int error; 991 992 sin6->sin6_port = udp->udp_port; 993 /* Rebuild the header template */ 994 error = udp_build_hdrs(udp); 995 if (error != 0) { 996 udp->udp_pending_op = -1; 997 rw_exit(&udp->udp_rwlock); 998 mutex_exit(&udpf->uf_lock); 999 udp_err_ack(q, mp, TSYSERR, error); 1000 return; 1001 } 1002 } 1003 udp->udp_state = TS_IDLE; 1004 udp_bind_hash_insert(udpf, udp); 1005 mutex_exit(&udpf->uf_lock); 1006 rw_exit(&udp->udp_rwlock); 1007 1008 if (cl_inet_bind) { 1009 /* 1010 * Running in cluster mode - register bind information 1011 */ 1012 if (udp->udp_ipversion == IPV4_VERSION) { 1013 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1014 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1015 (in_port_t)udp->udp_port); 1016 } else { 1017 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1018 (uint8_t *)&(udp->udp_v6src), 1019 (in_port_t)udp->udp_port); 1020 } 1021 1022 } 1023 1024 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1025 if (is_system_labeled() && (!connp->conn_anon_port || 1026 connp->conn_anon_mlp)) { 1027 uint16_t mlpport; 1028 cred_t *cr = connp->conn_cred; 1029 zone_t *zone; 1030 1031 zone = crgetzone(cr); 1032 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1033 mlptSingle; 1034 addrtype = tsol_mlp_addr_type(zone->zone_id, IPV6_VERSION, 1035 &v6src, us->us_netstack->netstack_ip); 1036 if (addrtype == mlptSingle) { 1037 rw_enter(&udp->udp_rwlock, RW_WRITER); 1038 udp->udp_pending_op = -1; 1039 rw_exit(&udp->udp_rwlock); 1040 udp_err_ack(q, mp, TNOADDR, 0); 1041 connp->conn_anon_port = B_FALSE; 1042 connp->conn_mlp_type = mlptSingle; 1043 return; 1044 } 1045 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1046 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1047 addrtype); 1048 if (mlptype != mlptSingle && 1049 (connp->conn_mlp_type == mlptSingle || 1050 secpolicy_net_bindmlp(cr) != 0)) { 1051 if (udp->udp_debug) { 1052 (void) strlog(UDP_MOD_ID, 0, 1, 1053 SL_ERROR|SL_TRACE, 1054 "udp_bind: no priv for multilevel port %d", 1055 mlpport); 1056 } 1057 rw_enter(&udp->udp_rwlock, RW_WRITER); 1058 udp->udp_pending_op = -1; 1059 rw_exit(&udp->udp_rwlock); 1060 udp_err_ack(q, mp, TACCES, 0); 1061 connp->conn_anon_port = B_FALSE; 1062 connp->conn_mlp_type = mlptSingle; 1063 return; 1064 } 1065 1066 /* 1067 * If we're specifically binding a shared IP address and the 1068 * port is MLP on shared addresses, then check to see if this 1069 * zone actually owns the MLP. Reject if not. 1070 */ 1071 if (mlptype == mlptShared && addrtype == mlptShared) { 1072 /* 1073 * No need to handle exclusive-stack zones since 1074 * ALL_ZONES only applies to the shared stack. 1075 */ 1076 zoneid_t mlpzone; 1077 1078 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1079 htons(mlpport)); 1080 if (connp->conn_zoneid != mlpzone) { 1081 if (udp->udp_debug) { 1082 (void) strlog(UDP_MOD_ID, 0, 1, 1083 SL_ERROR|SL_TRACE, 1084 "udp_bind: attempt to bind port " 1085 "%d on shared addr in zone %d " 1086 "(should be %d)", 1087 mlpport, connp->conn_zoneid, 1088 mlpzone); 1089 } 1090 rw_enter(&udp->udp_rwlock, RW_WRITER); 1091 udp->udp_pending_op = -1; 1092 rw_exit(&udp->udp_rwlock); 1093 udp_err_ack(q, mp, TACCES, 0); 1094 connp->conn_anon_port = B_FALSE; 1095 connp->conn_mlp_type = mlptSingle; 1096 return; 1097 } 1098 } 1099 if (connp->conn_anon_port) { 1100 int error; 1101 1102 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1103 port, B_TRUE); 1104 if (error != 0) { 1105 if (udp->udp_debug) { 1106 (void) strlog(UDP_MOD_ID, 0, 1, 1107 SL_ERROR|SL_TRACE, 1108 "udp_bind: cannot establish anon " 1109 "MLP for port %d", port); 1110 } 1111 rw_enter(&udp->udp_rwlock, RW_WRITER); 1112 udp->udp_pending_op = -1; 1113 rw_exit(&udp->udp_rwlock); 1114 udp_err_ack(q, mp, TACCES, 0); 1115 connp->conn_anon_port = B_FALSE; 1116 connp->conn_mlp_type = mlptSingle; 1117 return; 1118 } 1119 } 1120 connp->conn_mlp_type = mlptype; 1121 } 1122 1123 /* Pass the protocol number in the message following the address. */ 1124 *mp->b_wptr++ = IPPROTO_UDP; 1125 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1126 /* 1127 * Append a request for an IRE if udp_v6src not 1128 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1129 */ 1130 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1131 if (!mp->b_cont) { 1132 rw_enter(&udp->udp_rwlock, RW_WRITER); 1133 udp->udp_pending_op = -1; 1134 rw_exit(&udp->udp_rwlock); 1135 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1136 return; 1137 } 1138 mp->b_cont->b_wptr += sizeof (ire_t); 1139 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1140 } 1141 if (udp->udp_family == AF_INET6) 1142 mp = ip_bind_v6(q, mp, connp, NULL); 1143 else 1144 mp = ip_bind_v4(q, mp, connp); 1145 1146 /* The above return NULL if the bind needs to be deferred */ 1147 if (mp != NULL) 1148 udp_bind_result(connp, mp); 1149 else 1150 CONN_INC_REF(connp); 1151 } 1152 1153 /* 1154 * This is called from ip_wput_nondata to handle the results of a 1155 * deferred UDP bind. It is called once the bind has been completed. 1156 */ 1157 void 1158 udp_resume_bind(conn_t *connp, mblk_t *mp) 1159 { 1160 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1161 1162 udp_bind_result(connp, mp); 1163 1164 CONN_OPER_PENDING_DONE(connp); 1165 } 1166 1167 /* 1168 * This routine handles each T_CONN_REQ message passed to udp. It 1169 * associates a default destination address with the stream. 1170 * 1171 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1172 * T_BIND_REQ - specifying local and remote address/port 1173 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1174 * T_OK_ACK - for the T_CONN_REQ 1175 * T_CONN_CON - to keep the TPI user happy 1176 * 1177 * The connect completes in udp_bind_result. 1178 * When a T_BIND_ACK is received information is extracted from the IRE 1179 * and the two appended messages are sent to the TPI user. 1180 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1181 * convert it to an error ack for the appropriate primitive. 1182 */ 1183 static void 1184 udp_connect(queue_t *q, mblk_t *mp) 1185 { 1186 sin6_t *sin6; 1187 sin_t *sin; 1188 struct T_conn_req *tcr; 1189 in6_addr_t v6dst; 1190 ipaddr_t v4dst; 1191 uint16_t dstport; 1192 uint32_t flowinfo; 1193 mblk_t *mp1, *mp2; 1194 udp_fanout_t *udpf; 1195 udp_t *udp, *udp1; 1196 ushort_t ipversion; 1197 udp_stack_t *us; 1198 conn_t *connp = Q_TO_CONN(q); 1199 1200 udp = connp->conn_udp; 1201 tcr = (struct T_conn_req *)mp->b_rptr; 1202 us = udp->udp_us; 1203 1204 /* A bit of sanity checking */ 1205 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1206 udp_err_ack(q, mp, TPROTO, 0); 1207 return; 1208 } 1209 1210 if (tcr->OPT_length != 0) { 1211 udp_err_ack(q, mp, TBADOPT, 0); 1212 return; 1213 } 1214 1215 /* 1216 * Determine packet type based on type of address passed in 1217 * the request should contain an IPv4 or IPv6 address. 1218 * Make sure that address family matches the type of 1219 * family of the the address passed down 1220 */ 1221 switch (tcr->DEST_length) { 1222 default: 1223 udp_err_ack(q, mp, TBADADDR, 0); 1224 return; 1225 1226 case sizeof (sin_t): 1227 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1228 sizeof (sin_t)); 1229 if (sin == NULL || !OK_32PTR((char *)sin)) { 1230 udp_err_ack(q, mp, TSYSERR, EINVAL); 1231 return; 1232 } 1233 if (udp->udp_family != AF_INET || 1234 sin->sin_family != AF_INET) { 1235 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1236 return; 1237 } 1238 v4dst = sin->sin_addr.s_addr; 1239 dstport = sin->sin_port; 1240 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1241 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1242 ipversion = IPV4_VERSION; 1243 break; 1244 1245 case sizeof (sin6_t): 1246 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1247 sizeof (sin6_t)); 1248 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1249 udp_err_ack(q, mp, TSYSERR, EINVAL); 1250 return; 1251 } 1252 if (udp->udp_family != AF_INET6 || 1253 sin6->sin6_family != AF_INET6) { 1254 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1255 return; 1256 } 1257 v6dst = sin6->sin6_addr; 1258 dstport = sin6->sin6_port; 1259 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1260 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1261 ipversion = IPV4_VERSION; 1262 flowinfo = 0; 1263 } else { 1264 ipversion = IPV6_VERSION; 1265 flowinfo = sin6->sin6_flowinfo; 1266 } 1267 break; 1268 } 1269 if (dstport == 0) { 1270 udp_err_ack(q, mp, TBADADDR, 0); 1271 return; 1272 } 1273 1274 rw_enter(&udp->udp_rwlock, RW_WRITER); 1275 1276 /* 1277 * This UDP must have bound to a port already before doing a connect. 1278 * TPI mandates that users must send TPI primitives only 1 at a time 1279 * and wait for the response before sending the next primitive. 1280 */ 1281 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 1282 rw_exit(&udp->udp_rwlock); 1283 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1284 "udp_connect: bad state, %u", udp->udp_state); 1285 udp_err_ack(q, mp, TOUTSTATE, 0); 1286 return; 1287 } 1288 udp->udp_pending_op = T_CONN_REQ; 1289 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1290 1291 if (ipversion == IPV4_VERSION) { 1292 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1293 udp->udp_ip_snd_options_len; 1294 } else { 1295 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1296 } 1297 1298 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1299 us->us_bind_fanout_size)]; 1300 1301 mutex_enter(&udpf->uf_lock); 1302 if (udp->udp_state == TS_DATA_XFER) { 1303 /* Already connected - clear out state */ 1304 udp->udp_v6src = udp->udp_bound_v6src; 1305 udp->udp_state = TS_IDLE; 1306 } 1307 1308 /* 1309 * Create a default IP header with no IP options. 1310 */ 1311 udp->udp_dstport = dstport; 1312 udp->udp_ipversion = ipversion; 1313 if (ipversion == IPV4_VERSION) { 1314 /* 1315 * Interpret a zero destination to mean loopback. 1316 * Update the T_CONN_REQ (sin/sin6) since it is used to 1317 * generate the T_CONN_CON. 1318 */ 1319 if (v4dst == INADDR_ANY) { 1320 v4dst = htonl(INADDR_LOOPBACK); 1321 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1322 if (udp->udp_family == AF_INET) { 1323 sin->sin_addr.s_addr = v4dst; 1324 } else { 1325 sin6->sin6_addr = v6dst; 1326 } 1327 } 1328 udp->udp_v6dst = v6dst; 1329 udp->udp_flowinfo = 0; 1330 1331 /* 1332 * If the destination address is multicast and 1333 * an outgoing multicast interface has been set, 1334 * use the address of that interface as our 1335 * source address if no source address has been set. 1336 */ 1337 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1338 CLASSD(v4dst) && 1339 udp->udp_multicast_if_addr != INADDR_ANY) { 1340 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1341 &udp->udp_v6src); 1342 } 1343 } else { 1344 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1345 /* 1346 * Interpret a zero destination to mean loopback. 1347 * Update the T_CONN_REQ (sin/sin6) since it is used to 1348 * generate the T_CONN_CON. 1349 */ 1350 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1351 v6dst = ipv6_loopback; 1352 sin6->sin6_addr = v6dst; 1353 } 1354 udp->udp_v6dst = v6dst; 1355 udp->udp_flowinfo = flowinfo; 1356 /* 1357 * If the destination address is multicast and 1358 * an outgoing multicast interface has been set, 1359 * then the ip bind logic will pick the correct source 1360 * address (i.e. matching the outgoing multicast interface). 1361 */ 1362 } 1363 1364 /* 1365 * Verify that the src/port/dst/port is unique for all 1366 * connections in TS_DATA_XFER 1367 */ 1368 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1369 if (udp1->udp_state != TS_DATA_XFER) 1370 continue; 1371 if (udp->udp_port != udp1->udp_port || 1372 udp->udp_ipversion != udp1->udp_ipversion || 1373 dstport != udp1->udp_dstport || 1374 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1375 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst) || 1376 !(IPCL_ZONE_MATCH(udp->udp_connp, 1377 udp1->udp_connp->conn_zoneid) || 1378 IPCL_ZONE_MATCH(udp1->udp_connp, 1379 udp->udp_connp->conn_zoneid))) 1380 continue; 1381 mutex_exit(&udpf->uf_lock); 1382 udp->udp_pending_op = -1; 1383 rw_exit(&udp->udp_rwlock); 1384 udp_err_ack(q, mp, TBADADDR, 0); 1385 return; 1386 } 1387 udp->udp_state = TS_DATA_XFER; 1388 mutex_exit(&udpf->uf_lock); 1389 1390 /* 1391 * Send down bind to IP to verify that there is a route 1392 * and to determine the source address. 1393 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1394 */ 1395 if (udp->udp_family == AF_INET) 1396 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1397 else 1398 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1399 if (mp1 == NULL) { 1400 bind_failed: 1401 mutex_enter(&udpf->uf_lock); 1402 udp->udp_state = TS_IDLE; 1403 udp->udp_pending_op = -1; 1404 mutex_exit(&udpf->uf_lock); 1405 rw_exit(&udp->udp_rwlock); 1406 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1407 return; 1408 } 1409 1410 rw_exit(&udp->udp_rwlock); 1411 /* 1412 * We also have to send a connection confirmation to 1413 * keep TLI happy. Prepare it for udp_bind_result. 1414 */ 1415 if (udp->udp_family == AF_INET) 1416 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1417 sizeof (*sin), NULL, 0); 1418 else 1419 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1420 sizeof (*sin6), NULL, 0); 1421 if (mp2 == NULL) { 1422 freemsg(mp1); 1423 rw_enter(&udp->udp_rwlock, RW_WRITER); 1424 goto bind_failed; 1425 } 1426 1427 mp = mi_tpi_ok_ack_alloc(mp); 1428 if (mp == NULL) { 1429 /* Unable to reuse the T_CONN_REQ for the ack. */ 1430 freemsg(mp2); 1431 rw_enter(&udp->udp_rwlock, RW_WRITER); 1432 mutex_enter(&udpf->uf_lock); 1433 udp->udp_state = TS_IDLE; 1434 udp->udp_pending_op = -1; 1435 mutex_exit(&udpf->uf_lock); 1436 rw_exit(&udp->udp_rwlock); 1437 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1438 return; 1439 } 1440 1441 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1442 linkb(mp1, mp); 1443 linkb(mp1, mp2); 1444 1445 mblk_setcred(mp1, connp->conn_cred); 1446 if (udp->udp_family == AF_INET) 1447 mp1 = ip_bind_v4(q, mp1, connp); 1448 else 1449 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1450 1451 /* The above return NULL if the bind needs to be deferred */ 1452 if (mp1 != NULL) 1453 udp_bind_result(connp, mp1); 1454 else 1455 CONN_INC_REF(connp); 1456 } 1457 1458 static int 1459 udp_close(queue_t *q) 1460 { 1461 conn_t *connp = (conn_t *)q->q_ptr; 1462 udp_t *udp; 1463 1464 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1465 udp = connp->conn_udp; 1466 1467 udp_quiesce_conn(connp); 1468 ip_quiesce_conn(connp); 1469 /* 1470 * Disable read-side synchronous stream 1471 * interface and drain any queued data. 1472 */ 1473 udp_rcv_drain(q, udp, B_TRUE); 1474 ASSERT(!udp->udp_direct_sockfs); 1475 1476 qprocsoff(q); 1477 1478 ASSERT(udp->udp_rcv_cnt == 0); 1479 ASSERT(udp->udp_rcv_msgcnt == 0); 1480 ASSERT(udp->udp_rcv_list_head == NULL); 1481 ASSERT(udp->udp_rcv_list_tail == NULL); 1482 1483 udp_close_free(connp); 1484 1485 /* 1486 * Now we are truly single threaded on this stream, and can 1487 * delete the things hanging off the connp, and finally the connp. 1488 * We removed this connp from the fanout list, it cannot be 1489 * accessed thru the fanouts, and we already waited for the 1490 * conn_ref to drop to 0. We are already in close, so 1491 * there cannot be any other thread from the top. qprocsoff 1492 * has completed, and service has completed or won't run in 1493 * future. 1494 */ 1495 ASSERT(connp->conn_ref == 1); 1496 inet_minor_free(connp->conn_minor_arena, connp->conn_dev); 1497 connp->conn_ref--; 1498 ipcl_conn_destroy(connp); 1499 1500 q->q_ptr = WR(q)->q_ptr = NULL; 1501 return (0); 1502 } 1503 1504 /* 1505 * Called in the close path to quiesce the conn 1506 */ 1507 void 1508 udp_quiesce_conn(conn_t *connp) 1509 { 1510 udp_t *udp = connp->conn_udp; 1511 1512 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1513 /* 1514 * Running in cluster mode - register unbind information 1515 */ 1516 if (udp->udp_ipversion == IPV4_VERSION) { 1517 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1518 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1519 (in_port_t)udp->udp_port); 1520 } else { 1521 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1522 (uint8_t *)(&(udp->udp_v6src)), 1523 (in_port_t)udp->udp_port); 1524 } 1525 } 1526 1527 udp_bind_hash_remove(udp, B_FALSE); 1528 1529 } 1530 1531 void 1532 udp_close_free(conn_t *connp) 1533 { 1534 udp_t *udp = connp->conn_udp; 1535 1536 /* If there are any options associated with the stream, free them. */ 1537 if (udp->udp_ip_snd_options != NULL) { 1538 mi_free((char *)udp->udp_ip_snd_options); 1539 udp->udp_ip_snd_options = NULL; 1540 udp->udp_ip_snd_options_len = 0; 1541 } 1542 1543 if (udp->udp_ip_rcv_options != NULL) { 1544 mi_free((char *)udp->udp_ip_rcv_options); 1545 udp->udp_ip_rcv_options = NULL; 1546 udp->udp_ip_rcv_options_len = 0; 1547 } 1548 1549 /* Free memory associated with sticky options */ 1550 if (udp->udp_sticky_hdrs_len != 0) { 1551 kmem_free(udp->udp_sticky_hdrs, 1552 udp->udp_sticky_hdrs_len); 1553 udp->udp_sticky_hdrs = NULL; 1554 udp->udp_sticky_hdrs_len = 0; 1555 } 1556 1557 ip6_pkt_free(&udp->udp_sticky_ipp); 1558 1559 /* 1560 * Clear any fields which the kmem_cache constructor clears. 1561 * Only udp_connp needs to be preserved. 1562 * TBD: We should make this more efficient to avoid clearing 1563 * everything. 1564 */ 1565 ASSERT(udp->udp_connp == connp); 1566 bzero(udp, sizeof (udp_t)); 1567 udp->udp_connp = connp; 1568 } 1569 1570 /* 1571 * This routine handles each T_DISCON_REQ message passed to udp 1572 * as an indicating that UDP is no longer connected. This results 1573 * in sending a T_BIND_REQ to IP to restore the binding to just 1574 * the local address/port. 1575 * 1576 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1577 * T_BIND_REQ - specifying just the local address/port 1578 * T_OK_ACK - for the T_DISCON_REQ 1579 * 1580 * The disconnect completes in udp_bind_result. 1581 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1582 * Should udp_bind_result receive T_ERROR_ACK for the T_BIND_REQ it will 1583 * convert it to an error ack for the appropriate primitive. 1584 */ 1585 static void 1586 udp_disconnect(queue_t *q, mblk_t *mp) 1587 { 1588 udp_t *udp; 1589 mblk_t *mp1; 1590 udp_fanout_t *udpf; 1591 udp_stack_t *us; 1592 conn_t *connp = Q_TO_CONN(q); 1593 1594 udp = connp->conn_udp; 1595 us = udp->udp_us; 1596 rw_enter(&udp->udp_rwlock, RW_WRITER); 1597 if (udp->udp_state != TS_DATA_XFER || udp->udp_pending_op != -1) { 1598 rw_exit(&udp->udp_rwlock); 1599 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1600 "udp_disconnect: bad state, %u", udp->udp_state); 1601 udp_err_ack(q, mp, TOUTSTATE, 0); 1602 return; 1603 } 1604 udp->udp_pending_op = T_DISCON_REQ; 1605 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 1606 us->us_bind_fanout_size)]; 1607 mutex_enter(&udpf->uf_lock); 1608 udp->udp_v6src = udp->udp_bound_v6src; 1609 udp->udp_state = TS_IDLE; 1610 mutex_exit(&udpf->uf_lock); 1611 1612 /* 1613 * Send down bind to IP to remove the full binding and revert 1614 * to the local address binding. 1615 */ 1616 if (udp->udp_family == AF_INET) 1617 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1618 else 1619 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1620 if (mp1 == NULL) { 1621 udp->udp_pending_op = -1; 1622 rw_exit(&udp->udp_rwlock); 1623 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1624 return; 1625 } 1626 mp = mi_tpi_ok_ack_alloc(mp); 1627 if (mp == NULL) { 1628 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1629 udp->udp_pending_op = -1; 1630 rw_exit(&udp->udp_rwlock); 1631 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1632 return; 1633 } 1634 1635 if (udp->udp_family == AF_INET6) { 1636 int error; 1637 1638 /* Rebuild the header template */ 1639 error = udp_build_hdrs(udp); 1640 if (error != 0) { 1641 udp->udp_pending_op = -1; 1642 rw_exit(&udp->udp_rwlock); 1643 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1644 freemsg(mp1); 1645 return; 1646 } 1647 } 1648 1649 rw_exit(&udp->udp_rwlock); 1650 /* Append the T_OK_ACK to the T_BIND_REQ for udp_bind_ack */ 1651 linkb(mp1, mp); 1652 1653 if (udp->udp_family == AF_INET6) 1654 mp1 = ip_bind_v6(q, mp1, connp, NULL); 1655 else 1656 mp1 = ip_bind_v4(q, mp1, connp); 1657 1658 /* The above return NULL if the bind needs to be deferred */ 1659 if (mp1 != NULL) 1660 udp_bind_result(connp, mp1); 1661 else 1662 CONN_INC_REF(connp); 1663 } 1664 1665 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1666 static void 1667 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1668 { 1669 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1670 qreply(q, mp); 1671 } 1672 1673 /* Shorthand to generate and send TPI error acks to our client */ 1674 static void 1675 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1676 int sys_error) 1677 { 1678 struct T_error_ack *teackp; 1679 1680 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 1681 M_PCPROTO, T_ERROR_ACK)) != NULL) { 1682 teackp = (struct T_error_ack *)mp->b_rptr; 1683 teackp->ERROR_prim = primitive; 1684 teackp->TLI_error = t_error; 1685 teackp->UNIX_error = sys_error; 1686 qreply(q, mp); 1687 } 1688 } 1689 1690 /*ARGSUSED*/ 1691 static int 1692 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 1693 { 1694 int i; 1695 udp_t *udp = Q_TO_UDP(q); 1696 udp_stack_t *us = udp->udp_us; 1697 1698 for (i = 0; i < us->us_num_epriv_ports; i++) { 1699 if (us->us_epriv_ports[i] != 0) 1700 (void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]); 1701 } 1702 return (0); 1703 } 1704 1705 /* ARGSUSED */ 1706 static int 1707 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1708 cred_t *cr) 1709 { 1710 long new_value; 1711 int i; 1712 udp_t *udp = Q_TO_UDP(q); 1713 udp_stack_t *us = udp->udp_us; 1714 1715 /* 1716 * Fail the request if the new value does not lie within the 1717 * port number limits. 1718 */ 1719 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1720 new_value <= 0 || new_value >= 65536) { 1721 return (EINVAL); 1722 } 1723 1724 /* Check if the value is already in the list */ 1725 for (i = 0; i < us->us_num_epriv_ports; i++) { 1726 if (new_value == us->us_epriv_ports[i]) { 1727 return (EEXIST); 1728 } 1729 } 1730 /* Find an empty slot */ 1731 for (i = 0; i < us->us_num_epriv_ports; i++) { 1732 if (us->us_epriv_ports[i] == 0) 1733 break; 1734 } 1735 if (i == us->us_num_epriv_ports) { 1736 return (EOVERFLOW); 1737 } 1738 1739 /* Set the new value */ 1740 us->us_epriv_ports[i] = (in_port_t)new_value; 1741 return (0); 1742 } 1743 1744 /* ARGSUSED */ 1745 static int 1746 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 1747 cred_t *cr) 1748 { 1749 long new_value; 1750 int i; 1751 udp_t *udp = Q_TO_UDP(q); 1752 udp_stack_t *us = udp->udp_us; 1753 1754 /* 1755 * Fail the request if the new value does not lie within the 1756 * port number limits. 1757 */ 1758 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 1759 new_value <= 0 || new_value >= 65536) { 1760 return (EINVAL); 1761 } 1762 1763 /* Check that the value is already in the list */ 1764 for (i = 0; i < us->us_num_epriv_ports; i++) { 1765 if (us->us_epriv_ports[i] == new_value) 1766 break; 1767 } 1768 if (i == us->us_num_epriv_ports) { 1769 return (ESRCH); 1770 } 1771 1772 /* Clear the value */ 1773 us->us_epriv_ports[i] = 0; 1774 return (0); 1775 } 1776 1777 /* At minimum we need 4 bytes of UDP header */ 1778 #define ICMP_MIN_UDP_HDR 4 1779 1780 /* 1781 * udp_icmp_error is called by udp_input to process ICMP msgs. passed up by IP. 1782 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1783 * Assumes that IP has pulled up everything up to and including the ICMP header. 1784 */ 1785 static void 1786 udp_icmp_error(queue_t *q, mblk_t *mp) 1787 { 1788 icmph_t *icmph; 1789 ipha_t *ipha; 1790 int iph_hdr_length; 1791 udpha_t *udpha; 1792 sin_t sin; 1793 sin6_t sin6; 1794 mblk_t *mp1; 1795 int error = 0; 1796 udp_t *udp = Q_TO_UDP(q); 1797 1798 ipha = (ipha_t *)mp->b_rptr; 1799 1800 ASSERT(OK_32PTR(mp->b_rptr)); 1801 1802 if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) { 1803 ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION); 1804 udp_icmp_error_ipv6(q, mp); 1805 return; 1806 } 1807 ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION); 1808 1809 /* Skip past the outer IP and ICMP headers */ 1810 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1811 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 1812 ipha = (ipha_t *)&icmph[1]; 1813 1814 /* Skip past the inner IP and find the ULP header */ 1815 iph_hdr_length = IPH_HDR_LENGTH(ipha); 1816 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 1817 1818 switch (icmph->icmph_type) { 1819 case ICMP_DEST_UNREACHABLE: 1820 switch (icmph->icmph_code) { 1821 case ICMP_FRAGMENTATION_NEEDED: 1822 /* 1823 * IP has already adjusted the path MTU. 1824 */ 1825 break; 1826 case ICMP_PORT_UNREACHABLE: 1827 case ICMP_PROTOCOL_UNREACHABLE: 1828 error = ECONNREFUSED; 1829 break; 1830 default: 1831 /* Transient errors */ 1832 break; 1833 } 1834 break; 1835 default: 1836 /* Transient errors */ 1837 break; 1838 } 1839 if (error == 0) { 1840 freemsg(mp); 1841 return; 1842 } 1843 1844 /* 1845 * Deliver T_UDERROR_IND when the application has asked for it. 1846 * The socket layer enables this automatically when connected. 1847 */ 1848 if (!udp->udp_dgram_errind) { 1849 freemsg(mp); 1850 return; 1851 } 1852 1853 switch (udp->udp_family) { 1854 case AF_INET: 1855 sin = sin_null; 1856 sin.sin_family = AF_INET; 1857 sin.sin_addr.s_addr = ipha->ipha_dst; 1858 sin.sin_port = udpha->uha_dst_port; 1859 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 1860 error); 1861 break; 1862 case AF_INET6: 1863 sin6 = sin6_null; 1864 sin6.sin6_family = AF_INET6; 1865 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 1866 sin6.sin6_port = udpha->uha_dst_port; 1867 1868 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 1869 NULL, 0, error); 1870 break; 1871 } 1872 if (mp1) 1873 putnext(q, mp1); 1874 freemsg(mp); 1875 } 1876 1877 /* 1878 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 1879 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 1880 * Assumes that IP has pulled up all the extension headers as well as the 1881 * ICMPv6 header. 1882 */ 1883 static void 1884 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 1885 { 1886 icmp6_t *icmp6; 1887 ip6_t *ip6h, *outer_ip6h; 1888 uint16_t iph_hdr_length; 1889 uint8_t *nexthdrp; 1890 udpha_t *udpha; 1891 sin6_t sin6; 1892 mblk_t *mp1; 1893 int error = 0; 1894 udp_t *udp = Q_TO_UDP(q); 1895 udp_stack_t *us = udp->udp_us; 1896 1897 outer_ip6h = (ip6_t *)mp->b_rptr; 1898 if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6) 1899 iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h); 1900 else 1901 iph_hdr_length = IPV6_HDR_LEN; 1902 icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length]; 1903 ip6h = (ip6_t *)&icmp6[1]; 1904 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) { 1905 freemsg(mp); 1906 return; 1907 } 1908 udpha = (udpha_t *)((char *)ip6h + iph_hdr_length); 1909 1910 switch (icmp6->icmp6_type) { 1911 case ICMP6_DST_UNREACH: 1912 switch (icmp6->icmp6_code) { 1913 case ICMP6_DST_UNREACH_NOPORT: 1914 error = ECONNREFUSED; 1915 break; 1916 case ICMP6_DST_UNREACH_ADMIN: 1917 case ICMP6_DST_UNREACH_NOROUTE: 1918 case ICMP6_DST_UNREACH_BEYONDSCOPE: 1919 case ICMP6_DST_UNREACH_ADDR: 1920 /* Transient errors */ 1921 break; 1922 default: 1923 break; 1924 } 1925 break; 1926 case ICMP6_PACKET_TOO_BIG: { 1927 struct T_unitdata_ind *tudi; 1928 struct T_opthdr *toh; 1929 size_t udi_size; 1930 mblk_t *newmp; 1931 t_scalar_t opt_length = sizeof (struct T_opthdr) + 1932 sizeof (struct ip6_mtuinfo); 1933 sin6_t *sin6; 1934 struct ip6_mtuinfo *mtuinfo; 1935 1936 /* 1937 * If the application has requested to receive path mtu 1938 * information, send up an empty message containing an 1939 * IPV6_PATHMTU ancillary data item. 1940 */ 1941 if (!udp->udp_ipv6_recvpathmtu) 1942 break; 1943 1944 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 1945 opt_length; 1946 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 1947 BUMP_MIB(&us->us_udp_mib, udpInErrors); 1948 break; 1949 } 1950 1951 /* 1952 * newmp->b_cont is left to NULL on purpose. This is an 1953 * empty message containing only ancillary data. 1954 */ 1955 newmp->b_datap->db_type = M_PROTO; 1956 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 1957 newmp->b_wptr = (uchar_t *)tudi + udi_size; 1958 tudi->PRIM_type = T_UNITDATA_IND; 1959 tudi->SRC_length = sizeof (sin6_t); 1960 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 1961 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 1962 tudi->OPT_length = opt_length; 1963 1964 sin6 = (sin6_t *)&tudi[1]; 1965 bzero(sin6, sizeof (sin6_t)); 1966 sin6->sin6_family = AF_INET6; 1967 sin6->sin6_addr = udp->udp_v6dst; 1968 1969 toh = (struct T_opthdr *)&sin6[1]; 1970 toh->level = IPPROTO_IPV6; 1971 toh->name = IPV6_PATHMTU; 1972 toh->len = opt_length; 1973 toh->status = 0; 1974 1975 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 1976 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 1977 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 1978 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 1979 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 1980 /* 1981 * We've consumed everything we need from the original 1982 * message. Free it, then send our empty message. 1983 */ 1984 freemsg(mp); 1985 putnext(q, newmp); 1986 return; 1987 } 1988 case ICMP6_TIME_EXCEEDED: 1989 /* Transient errors */ 1990 break; 1991 case ICMP6_PARAM_PROB: 1992 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 1993 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 1994 (uchar_t *)ip6h + icmp6->icmp6_pptr == 1995 (uchar_t *)nexthdrp) { 1996 error = ECONNREFUSED; 1997 break; 1998 } 1999 break; 2000 } 2001 if (error == 0) { 2002 freemsg(mp); 2003 return; 2004 } 2005 2006 /* 2007 * Deliver T_UDERROR_IND when the application has asked for it. 2008 * The socket layer enables this automatically when connected. 2009 */ 2010 if (!udp->udp_dgram_errind) { 2011 freemsg(mp); 2012 return; 2013 } 2014 2015 sin6 = sin6_null; 2016 sin6.sin6_family = AF_INET6; 2017 sin6.sin6_addr = ip6h->ip6_dst; 2018 sin6.sin6_port = udpha->uha_dst_port; 2019 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2020 2021 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2022 error); 2023 if (mp1) 2024 putnext(q, mp1); 2025 freemsg(mp); 2026 } 2027 2028 /* 2029 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2030 * The local address is filled in if endpoint is bound. The remote address 2031 * is filled in if remote address has been precified ("connected endpoint") 2032 * (The concept of connected CLTS sockets is alien to published TPI 2033 * but we support it anyway). 2034 */ 2035 static void 2036 udp_addr_req(queue_t *q, mblk_t *mp) 2037 { 2038 sin_t *sin; 2039 sin6_t *sin6; 2040 mblk_t *ackmp; 2041 struct T_addr_ack *taa; 2042 udp_t *udp = Q_TO_UDP(q); 2043 2044 /* Make it large enough for worst case */ 2045 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2046 2 * sizeof (sin6_t), 1); 2047 if (ackmp == NULL) { 2048 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2049 return; 2050 } 2051 taa = (struct T_addr_ack *)ackmp->b_rptr; 2052 2053 bzero(taa, sizeof (struct T_addr_ack)); 2054 ackmp->b_wptr = (uchar_t *)&taa[1]; 2055 2056 taa->PRIM_type = T_ADDR_ACK; 2057 ackmp->b_datap->db_type = M_PCPROTO; 2058 rw_enter(&udp->udp_rwlock, RW_READER); 2059 /* 2060 * Note: Following code assumes 32 bit alignment of basic 2061 * data structures like sin_t and struct T_addr_ack. 2062 */ 2063 if (udp->udp_state != TS_UNBND) { 2064 /* 2065 * Fill in local address first 2066 */ 2067 taa->LOCADDR_offset = sizeof (*taa); 2068 if (udp->udp_family == AF_INET) { 2069 taa->LOCADDR_length = sizeof (sin_t); 2070 sin = (sin_t *)&taa[1]; 2071 /* Fill zeroes and then initialize non-zero fields */ 2072 *sin = sin_null; 2073 sin->sin_family = AF_INET; 2074 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2075 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2076 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2077 sin->sin_addr.s_addr); 2078 } else { 2079 /* 2080 * INADDR_ANY 2081 * udp_v6src is not set, we might be bound to 2082 * broadcast/multicast. Use udp_bound_v6src as 2083 * local address instead (that could 2084 * also still be INADDR_ANY) 2085 */ 2086 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2087 sin->sin_addr.s_addr); 2088 } 2089 sin->sin_port = udp->udp_port; 2090 ackmp->b_wptr = (uchar_t *)&sin[1]; 2091 if (udp->udp_state == TS_DATA_XFER) { 2092 /* 2093 * connected, fill remote address too 2094 */ 2095 taa->REMADDR_length = sizeof (sin_t); 2096 /* assumed 32-bit alignment */ 2097 taa->REMADDR_offset = taa->LOCADDR_offset + 2098 taa->LOCADDR_length; 2099 2100 sin = (sin_t *)(ackmp->b_rptr + 2101 taa->REMADDR_offset); 2102 /* initialize */ 2103 *sin = sin_null; 2104 sin->sin_family = AF_INET; 2105 sin->sin_addr.s_addr = 2106 V4_PART_OF_V6(udp->udp_v6dst); 2107 sin->sin_port = udp->udp_dstport; 2108 ackmp->b_wptr = (uchar_t *)&sin[1]; 2109 } 2110 } else { 2111 taa->LOCADDR_length = sizeof (sin6_t); 2112 sin6 = (sin6_t *)&taa[1]; 2113 /* Fill zeroes and then initialize non-zero fields */ 2114 *sin6 = sin6_null; 2115 sin6->sin6_family = AF_INET6; 2116 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2117 sin6->sin6_addr = udp->udp_v6src; 2118 } else { 2119 /* 2120 * UNSPECIFIED 2121 * udp_v6src is not set, we might be bound to 2122 * broadcast/multicast. Use udp_bound_v6src as 2123 * local address instead (that could 2124 * also still be UNSPECIFIED) 2125 */ 2126 sin6->sin6_addr = 2127 udp->udp_bound_v6src; 2128 } 2129 sin6->sin6_port = udp->udp_port; 2130 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2131 if (udp->udp_state == TS_DATA_XFER) { 2132 /* 2133 * connected, fill remote address too 2134 */ 2135 taa->REMADDR_length = sizeof (sin6_t); 2136 /* assumed 32-bit alignment */ 2137 taa->REMADDR_offset = taa->LOCADDR_offset + 2138 taa->LOCADDR_length; 2139 2140 sin6 = (sin6_t *)(ackmp->b_rptr + 2141 taa->REMADDR_offset); 2142 /* initialize */ 2143 *sin6 = sin6_null; 2144 sin6->sin6_family = AF_INET6; 2145 sin6->sin6_addr = udp->udp_v6dst; 2146 sin6->sin6_port = udp->udp_dstport; 2147 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2148 } 2149 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2150 } 2151 } 2152 rw_exit(&udp->udp_rwlock); 2153 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2154 qreply(q, ackmp); 2155 } 2156 2157 static void 2158 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2159 { 2160 if (udp->udp_family == AF_INET) { 2161 *tap = udp_g_t_info_ack_ipv4; 2162 } else { 2163 *tap = udp_g_t_info_ack_ipv6; 2164 } 2165 tap->CURRENT_state = udp->udp_state; 2166 tap->OPT_size = udp_max_optsize; 2167 } 2168 2169 /* 2170 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2171 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2172 * udp_g_t_info_ack. The current state of the stream is copied from 2173 * udp_state. 2174 */ 2175 static void 2176 udp_capability_req(queue_t *q, mblk_t *mp) 2177 { 2178 t_uscalar_t cap_bits1; 2179 struct T_capability_ack *tcap; 2180 udp_t *udp = Q_TO_UDP(q); 2181 2182 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2183 2184 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2185 mp->b_datap->db_type, T_CAPABILITY_ACK); 2186 if (!mp) 2187 return; 2188 2189 tcap = (struct T_capability_ack *)mp->b_rptr; 2190 tcap->CAP_bits1 = 0; 2191 2192 if (cap_bits1 & TC1_INFO) { 2193 udp_copy_info(&tcap->INFO_ack, udp); 2194 tcap->CAP_bits1 |= TC1_INFO; 2195 } 2196 2197 qreply(q, mp); 2198 } 2199 2200 /* 2201 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2202 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2203 * The current state of the stream is copied from udp_state. 2204 */ 2205 static void 2206 udp_info_req(queue_t *q, mblk_t *mp) 2207 { 2208 udp_t *udp = Q_TO_UDP(q); 2209 2210 /* Create a T_INFO_ACK message. */ 2211 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2212 T_INFO_ACK); 2213 if (!mp) 2214 return; 2215 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2216 qreply(q, mp); 2217 } 2218 2219 /* 2220 * IP recognizes seven kinds of bind requests: 2221 * 2222 * - A zero-length address binds only to the protocol number. 2223 * 2224 * - A 4-byte address is treated as a request to 2225 * validate that the address is a valid local IPv4 2226 * address, appropriate for an application to bind to. 2227 * IP does the verification, but does not make any note 2228 * of the address at this time. 2229 * 2230 * - A 16-byte address contains is treated as a request 2231 * to validate a local IPv6 address, as the 4-byte 2232 * address case above. 2233 * 2234 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2235 * use it for the inbound fanout of packets. 2236 * 2237 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2238 * use it for the inbound fanout of packets. 2239 * 2240 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2241 * information consisting of local and remote addresses 2242 * and ports. In this case, the addresses are both 2243 * validated as appropriate for this operation, and, if 2244 * so, the information is retained for use in the 2245 * inbound fanout. 2246 * 2247 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2248 * fanout information, like the 12-byte case above. 2249 * 2250 * IP will also fill in the IRE request mblk with information 2251 * regarding our peer. In all cases, we notify IP of our protocol 2252 * type by appending a single protocol byte to the bind request. 2253 */ 2254 static mblk_t * 2255 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2256 { 2257 char *cp; 2258 mblk_t *mp; 2259 struct T_bind_req *tbr; 2260 ipa_conn_t *ac; 2261 ipa6_conn_t *ac6; 2262 sin_t *sin; 2263 sin6_t *sin6; 2264 2265 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2266 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 2267 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2268 if (!mp) 2269 return (mp); 2270 mp->b_datap->db_type = M_PROTO; 2271 tbr = (struct T_bind_req *)mp->b_rptr; 2272 tbr->PRIM_type = bind_prim; 2273 tbr->ADDR_offset = sizeof (*tbr); 2274 tbr->CONIND_number = 0; 2275 tbr->ADDR_length = addr_length; 2276 cp = (char *)&tbr[1]; 2277 switch (addr_length) { 2278 case sizeof (ipa_conn_t): 2279 ASSERT(udp->udp_family == AF_INET); 2280 /* Append a request for an IRE */ 2281 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2282 if (!mp->b_cont) { 2283 freemsg(mp); 2284 return (NULL); 2285 } 2286 mp->b_cont->b_wptr += sizeof (ire_t); 2287 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2288 2289 /* cp known to be 32 bit aligned */ 2290 ac = (ipa_conn_t *)cp; 2291 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2292 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2293 ac->ac_fport = udp->udp_dstport; 2294 ac->ac_lport = udp->udp_port; 2295 break; 2296 2297 case sizeof (ipa6_conn_t): 2298 ASSERT(udp->udp_family == AF_INET6); 2299 /* Append a request for an IRE */ 2300 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2301 if (!mp->b_cont) { 2302 freemsg(mp); 2303 return (NULL); 2304 } 2305 mp->b_cont->b_wptr += sizeof (ire_t); 2306 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2307 2308 /* cp known to be 32 bit aligned */ 2309 ac6 = (ipa6_conn_t *)cp; 2310 ac6->ac6_laddr = udp->udp_v6src; 2311 ac6->ac6_faddr = udp->udp_v6dst; 2312 ac6->ac6_fport = udp->udp_dstport; 2313 ac6->ac6_lport = udp->udp_port; 2314 break; 2315 2316 case sizeof (sin_t): 2317 ASSERT(udp->udp_family == AF_INET); 2318 /* Append a request for an IRE */ 2319 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2320 if (!mp->b_cont) { 2321 freemsg(mp); 2322 return (NULL); 2323 } 2324 mp->b_cont->b_wptr += sizeof (ire_t); 2325 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2326 2327 sin = (sin_t *)cp; 2328 *sin = sin_null; 2329 sin->sin_family = AF_INET; 2330 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2331 sin->sin_port = udp->udp_port; 2332 break; 2333 2334 case sizeof (sin6_t): 2335 ASSERT(udp->udp_family == AF_INET6); 2336 /* Append a request for an IRE */ 2337 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2338 if (!mp->b_cont) { 2339 freemsg(mp); 2340 return (NULL); 2341 } 2342 mp->b_cont->b_wptr += sizeof (ire_t); 2343 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2344 2345 sin6 = (sin6_t *)cp; 2346 *sin6 = sin6_null; 2347 sin6->sin6_family = AF_INET6; 2348 sin6->sin6_addr = udp->udp_bound_v6src; 2349 sin6->sin6_port = udp->udp_port; 2350 break; 2351 } 2352 /* Add protocol number to end */ 2353 cp[addr_length] = (char)IPPROTO_UDP; 2354 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2355 return (mp); 2356 } 2357 2358 /* For /dev/udp aka AF_INET open */ 2359 static int 2360 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2361 { 2362 return (udp_open(q, devp, flag, sflag, credp, B_FALSE)); 2363 } 2364 2365 /* For /dev/udp6 aka AF_INET6 open */ 2366 static int 2367 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2368 { 2369 return (udp_open(q, devp, flag, sflag, credp, B_TRUE)); 2370 } 2371 2372 /* 2373 * This is the open routine for udp. It allocates a udp_t structure for 2374 * the stream and, on the first open of the module, creates an ND table. 2375 */ 2376 /*ARGSUSED2*/ 2377 static int 2378 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp, 2379 boolean_t isv6) 2380 { 2381 int err; 2382 udp_t *udp; 2383 conn_t *connp; 2384 dev_t conn_dev; 2385 zoneid_t zoneid; 2386 netstack_t *ns; 2387 udp_stack_t *us; 2388 vmem_t *minor_arena; 2389 2390 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2391 2392 /* If the stream is already open, return immediately. */ 2393 if (q->q_ptr != NULL) 2394 return (0); 2395 2396 if (sflag == MODOPEN) 2397 return (EINVAL); 2398 2399 ns = netstack_find_by_cred(credp); 2400 ASSERT(ns != NULL); 2401 us = ns->netstack_udp; 2402 ASSERT(us != NULL); 2403 2404 /* 2405 * For exclusive stacks we set the zoneid to zero 2406 * to make UDP operate as if in the global zone. 2407 */ 2408 if (ns->netstack_stackid != GLOBAL_NETSTACKID) 2409 zoneid = GLOBAL_ZONEID; 2410 else 2411 zoneid = crgetzoneid(credp); 2412 2413 if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) && 2414 ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) { 2415 minor_arena = ip_minor_arena_la; 2416 } else { 2417 /* 2418 * Either minor numbers in the large arena were exhausted 2419 * or a non socket application is doing the open. 2420 * Try to allocate from the small arena. 2421 */ 2422 if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0) { 2423 netstack_rele(ns); 2424 return (EBUSY); 2425 } 2426 minor_arena = ip_minor_arena_sa; 2427 } 2428 2429 *devp = makedevice(getemajor(*devp), (minor_t)conn_dev); 2430 2431 connp = ipcl_conn_create(IPCL_UDPCONN, KM_SLEEP, ns); 2432 connp->conn_dev = conn_dev; 2433 connp->conn_minor_arena = minor_arena; 2434 udp = connp->conn_udp; 2435 2436 /* 2437 * ipcl_conn_create did a netstack_hold. Undo the hold that was 2438 * done by netstack_find_by_cred() 2439 */ 2440 netstack_rele(ns); 2441 2442 /* 2443 * Initialize the udp_t structure for this stream. 2444 */ 2445 q->q_ptr = connp; 2446 WR(q)->q_ptr = connp; 2447 connp->conn_rq = q; 2448 connp->conn_wq = WR(q); 2449 2450 rw_enter(&udp->udp_rwlock, RW_WRITER); 2451 ASSERT(connp->conn_ulp == IPPROTO_UDP); 2452 ASSERT(connp->conn_udp == udp); 2453 ASSERT(udp->udp_connp == connp); 2454 2455 /* Set the initial state of the stream and the privilege status. */ 2456 udp->udp_state = TS_UNBND; 2457 if (isv6) { 2458 udp->udp_family = AF_INET6; 2459 udp->udp_ipversion = IPV6_VERSION; 2460 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2461 udp->udp_ttl = us->us_ipv6_hoplimit; 2462 connp->conn_af_isv6 = B_TRUE; 2463 connp->conn_flags |= IPCL_ISV6; 2464 } else { 2465 udp->udp_family = AF_INET; 2466 udp->udp_ipversion = IPV4_VERSION; 2467 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2468 udp->udp_ttl = us->us_ipv4_ttl; 2469 connp->conn_af_isv6 = B_FALSE; 2470 connp->conn_flags &= ~IPCL_ISV6; 2471 } 2472 2473 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2474 udp->udp_pending_op = -1; 2475 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2476 connp->conn_zoneid = zoneid; 2477 2478 udp->udp_open_time = lbolt64; 2479 udp->udp_open_pid = curproc->p_pid; 2480 2481 /* 2482 * If the caller has the process-wide flag set, then default to MAC 2483 * exempt mode. This allows read-down to unlabeled hosts. 2484 */ 2485 if (getpflags(NET_MAC_AWARE, credp) != 0) 2486 connp->conn_mac_exempt = B_TRUE; 2487 2488 if (flag & SO_SOCKSTR) { 2489 connp->conn_flags |= IPCL_SOCKET; 2490 udp->udp_issocket = B_TRUE; 2491 udp->udp_direct_sockfs = B_TRUE; 2492 } 2493 2494 connp->conn_ulp_labeled = is_system_labeled(); 2495 2496 udp->udp_us = us; 2497 2498 q->q_hiwat = us->us_recv_hiwat; 2499 WR(q)->q_hiwat = us->us_xmit_hiwat; 2500 WR(q)->q_lowat = us->us_xmit_lowat; 2501 2502 connp->conn_recv = udp_input; 2503 crhold(credp); 2504 connp->conn_cred = credp; 2505 2506 mutex_enter(&connp->conn_lock); 2507 connp->conn_state_flags &= ~CONN_INCIPIENT; 2508 mutex_exit(&connp->conn_lock); 2509 2510 qprocson(q); 2511 2512 if (udp->udp_family == AF_INET6) { 2513 /* Build initial header template for transmit */ 2514 if ((err = udp_build_hdrs(udp)) != 0) { 2515 rw_exit(&udp->udp_rwlock); 2516 qprocsoff(q); 2517 ipcl_conn_destroy(connp); 2518 return (err); 2519 } 2520 } 2521 rw_exit(&udp->udp_rwlock); 2522 2523 /* Set the Stream head write offset and high watermark. */ 2524 (void) mi_set_sth_wroff(q, 2525 udp->udp_max_hdr_len + us->us_wroff_extra); 2526 (void) mi_set_sth_hiwat(q, udp_set_rcv_hiwat(udp, q->q_hiwat)); 2527 2528 return (0); 2529 } 2530 2531 /* 2532 * Which UDP options OK to set through T_UNITDATA_REQ... 2533 */ 2534 /* ARGSUSED */ 2535 static boolean_t 2536 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2537 { 2538 return (B_TRUE); 2539 } 2540 2541 /* 2542 * This routine gets default values of certain options whose default 2543 * values are maintained by protcol specific code 2544 */ 2545 /* ARGSUSED */ 2546 int 2547 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2548 { 2549 udp_t *udp = Q_TO_UDP(q); 2550 udp_stack_t *us = udp->udp_us; 2551 int *i1 = (int *)ptr; 2552 2553 switch (level) { 2554 case IPPROTO_IP: 2555 switch (name) { 2556 case IP_MULTICAST_TTL: 2557 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2558 return (sizeof (uchar_t)); 2559 case IP_MULTICAST_LOOP: 2560 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2561 return (sizeof (uchar_t)); 2562 } 2563 break; 2564 case IPPROTO_IPV6: 2565 switch (name) { 2566 case IPV6_MULTICAST_HOPS: 2567 *i1 = IP_DEFAULT_MULTICAST_TTL; 2568 return (sizeof (int)); 2569 case IPV6_MULTICAST_LOOP: 2570 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2571 return (sizeof (int)); 2572 case IPV6_UNICAST_HOPS: 2573 *i1 = us->us_ipv6_hoplimit; 2574 return (sizeof (int)); 2575 } 2576 break; 2577 } 2578 return (-1); 2579 } 2580 2581 /* 2582 * This routine retrieves the current status of socket options. 2583 * It returns the size of the option retrieved. 2584 */ 2585 int 2586 udp_opt_get_locked(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2587 { 2588 int *i1 = (int *)ptr; 2589 conn_t *connp; 2590 udp_t *udp; 2591 ip6_pkt_t *ipp; 2592 int len; 2593 udp_stack_t *us; 2594 2595 connp = Q_TO_CONN(q); 2596 udp = connp->conn_udp; 2597 ipp = &udp->udp_sticky_ipp; 2598 us = udp->udp_us; 2599 2600 switch (level) { 2601 case SOL_SOCKET: 2602 switch (name) { 2603 case SO_DEBUG: 2604 *i1 = udp->udp_debug; 2605 break; /* goto sizeof (int) option return */ 2606 case SO_REUSEADDR: 2607 *i1 = udp->udp_reuseaddr; 2608 break; /* goto sizeof (int) option return */ 2609 case SO_TYPE: 2610 *i1 = SOCK_DGRAM; 2611 break; /* goto sizeof (int) option return */ 2612 2613 /* 2614 * The following three items are available here, 2615 * but are only meaningful to IP. 2616 */ 2617 case SO_DONTROUTE: 2618 *i1 = udp->udp_dontroute; 2619 break; /* goto sizeof (int) option return */ 2620 case SO_USELOOPBACK: 2621 *i1 = udp->udp_useloopback; 2622 break; /* goto sizeof (int) option return */ 2623 case SO_BROADCAST: 2624 *i1 = udp->udp_broadcast; 2625 break; /* goto sizeof (int) option return */ 2626 2627 case SO_SNDBUF: 2628 *i1 = q->q_hiwat; 2629 break; /* goto sizeof (int) option return */ 2630 case SO_RCVBUF: 2631 *i1 = RD(q)->q_hiwat; 2632 break; /* goto sizeof (int) option return */ 2633 case SO_DGRAM_ERRIND: 2634 *i1 = udp->udp_dgram_errind; 2635 break; /* goto sizeof (int) option return */ 2636 case SO_RECVUCRED: 2637 *i1 = udp->udp_recvucred; 2638 break; /* goto sizeof (int) option return */ 2639 case SO_TIMESTAMP: 2640 *i1 = udp->udp_timestamp; 2641 break; /* goto sizeof (int) option return */ 2642 case SO_ANON_MLP: 2643 *i1 = connp->conn_anon_mlp; 2644 break; /* goto sizeof (int) option return */ 2645 case SO_MAC_EXEMPT: 2646 *i1 = connp->conn_mac_exempt; 2647 break; /* goto sizeof (int) option return */ 2648 case SO_ALLZONES: 2649 *i1 = connp->conn_allzones; 2650 break; /* goto sizeof (int) option return */ 2651 case SO_EXCLBIND: 2652 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 2653 break; 2654 case SO_PROTOTYPE: 2655 *i1 = IPPROTO_UDP; 2656 break; 2657 case SO_DOMAIN: 2658 *i1 = udp->udp_family; 2659 break; 2660 default: 2661 return (-1); 2662 } 2663 break; 2664 case IPPROTO_IP: 2665 if (udp->udp_family != AF_INET) 2666 return (-1); 2667 switch (name) { 2668 case IP_OPTIONS: 2669 case T_IP_OPTIONS: 2670 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 2671 if (len > 0) { 2672 bcopy(udp->udp_ip_rcv_options + 2673 udp->udp_label_len, ptr, len); 2674 } 2675 return (len); 2676 case IP_TOS: 2677 case T_IP_TOS: 2678 *i1 = (int)udp->udp_type_of_service; 2679 break; /* goto sizeof (int) option return */ 2680 case IP_TTL: 2681 *i1 = (int)udp->udp_ttl; 2682 break; /* goto sizeof (int) option return */ 2683 case IP_DHCPINIT_IF: 2684 return (-EINVAL); 2685 case IP_NEXTHOP: 2686 case IP_RECVPKTINFO: 2687 /* 2688 * This also handles IP_PKTINFO. 2689 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 2690 * Differentiation is based on the size of the argument 2691 * passed in. 2692 * This option is handled in IP which will return an 2693 * error for IP_PKTINFO as it's not supported as a 2694 * sticky option. 2695 */ 2696 return (-EINVAL); 2697 case IP_MULTICAST_IF: 2698 /* 0 address if not set */ 2699 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2700 return (sizeof (ipaddr_t)); 2701 case IP_MULTICAST_TTL: 2702 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2703 return (sizeof (uchar_t)); 2704 case IP_MULTICAST_LOOP: 2705 *ptr = connp->conn_multicast_loop; 2706 return (sizeof (uint8_t)); 2707 case IP_RECVOPTS: 2708 *i1 = udp->udp_recvopts; 2709 break; /* goto sizeof (int) option return */ 2710 case IP_RECVDSTADDR: 2711 *i1 = udp->udp_recvdstaddr; 2712 break; /* goto sizeof (int) option return */ 2713 case IP_RECVIF: 2714 *i1 = udp->udp_recvif; 2715 break; /* goto sizeof (int) option return */ 2716 case IP_RECVSLLA: 2717 *i1 = udp->udp_recvslla; 2718 break; /* goto sizeof (int) option return */ 2719 case IP_RECVTTL: 2720 *i1 = udp->udp_recvttl; 2721 break; /* goto sizeof (int) option return */ 2722 case IP_ADD_MEMBERSHIP: 2723 case IP_DROP_MEMBERSHIP: 2724 case IP_BLOCK_SOURCE: 2725 case IP_UNBLOCK_SOURCE: 2726 case IP_ADD_SOURCE_MEMBERSHIP: 2727 case IP_DROP_SOURCE_MEMBERSHIP: 2728 case MCAST_JOIN_GROUP: 2729 case MCAST_LEAVE_GROUP: 2730 case MCAST_BLOCK_SOURCE: 2731 case MCAST_UNBLOCK_SOURCE: 2732 case MCAST_JOIN_SOURCE_GROUP: 2733 case MCAST_LEAVE_SOURCE_GROUP: 2734 case IP_DONTFAILOVER_IF: 2735 /* cannot "get" the value for these */ 2736 return (-1); 2737 case IP_BOUND_IF: 2738 /* Zero if not set */ 2739 *i1 = udp->udp_bound_if; 2740 break; /* goto sizeof (int) option return */ 2741 case IP_UNSPEC_SRC: 2742 *i1 = udp->udp_unspec_source; 2743 break; /* goto sizeof (int) option return */ 2744 case IP_BROADCAST_TTL: 2745 *(uchar_t *)ptr = connp->conn_broadcast_ttl; 2746 return (sizeof (uchar_t)); 2747 default: 2748 return (-1); 2749 } 2750 break; 2751 case IPPROTO_IPV6: 2752 if (udp->udp_family != AF_INET6) 2753 return (-1); 2754 switch (name) { 2755 case IPV6_UNICAST_HOPS: 2756 *i1 = (unsigned int)udp->udp_ttl; 2757 break; /* goto sizeof (int) option return */ 2758 case IPV6_MULTICAST_IF: 2759 /* 0 index if not set */ 2760 *i1 = udp->udp_multicast_if_index; 2761 break; /* goto sizeof (int) option return */ 2762 case IPV6_MULTICAST_HOPS: 2763 *i1 = udp->udp_multicast_ttl; 2764 break; /* goto sizeof (int) option return */ 2765 case IPV6_MULTICAST_LOOP: 2766 *i1 = connp->conn_multicast_loop; 2767 break; /* goto sizeof (int) option return */ 2768 case IPV6_JOIN_GROUP: 2769 case IPV6_LEAVE_GROUP: 2770 case MCAST_JOIN_GROUP: 2771 case MCAST_LEAVE_GROUP: 2772 case MCAST_BLOCK_SOURCE: 2773 case MCAST_UNBLOCK_SOURCE: 2774 case MCAST_JOIN_SOURCE_GROUP: 2775 case MCAST_LEAVE_SOURCE_GROUP: 2776 /* cannot "get" the value for these */ 2777 return (-1); 2778 case IPV6_BOUND_IF: 2779 /* Zero if not set */ 2780 *i1 = udp->udp_bound_if; 2781 break; /* goto sizeof (int) option return */ 2782 case IPV6_UNSPEC_SRC: 2783 *i1 = udp->udp_unspec_source; 2784 break; /* goto sizeof (int) option return */ 2785 case IPV6_RECVPKTINFO: 2786 *i1 = udp->udp_ip_recvpktinfo; 2787 break; /* goto sizeof (int) option return */ 2788 case IPV6_RECVTCLASS: 2789 *i1 = udp->udp_ipv6_recvtclass; 2790 break; /* goto sizeof (int) option return */ 2791 case IPV6_RECVPATHMTU: 2792 *i1 = udp->udp_ipv6_recvpathmtu; 2793 break; /* goto sizeof (int) option return */ 2794 case IPV6_RECVHOPLIMIT: 2795 *i1 = udp->udp_ipv6_recvhoplimit; 2796 break; /* goto sizeof (int) option return */ 2797 case IPV6_RECVHOPOPTS: 2798 *i1 = udp->udp_ipv6_recvhopopts; 2799 break; /* goto sizeof (int) option return */ 2800 case IPV6_RECVDSTOPTS: 2801 *i1 = udp->udp_ipv6_recvdstopts; 2802 break; /* goto sizeof (int) option return */ 2803 case _OLD_IPV6_RECVDSTOPTS: 2804 *i1 = udp->udp_old_ipv6_recvdstopts; 2805 break; /* goto sizeof (int) option return */ 2806 case IPV6_RECVRTHDRDSTOPTS: 2807 *i1 = udp->udp_ipv6_recvrthdrdstopts; 2808 break; /* goto sizeof (int) option return */ 2809 case IPV6_RECVRTHDR: 2810 *i1 = udp->udp_ipv6_recvrthdr; 2811 break; /* goto sizeof (int) option return */ 2812 case IPV6_PKTINFO: { 2813 /* XXX assumes that caller has room for max size! */ 2814 struct in6_pktinfo *pkti; 2815 2816 pkti = (struct in6_pktinfo *)ptr; 2817 if (ipp->ipp_fields & IPPF_IFINDEX) 2818 pkti->ipi6_ifindex = ipp->ipp_ifindex; 2819 else 2820 pkti->ipi6_ifindex = 0; 2821 if (ipp->ipp_fields & IPPF_ADDR) 2822 pkti->ipi6_addr = ipp->ipp_addr; 2823 else 2824 pkti->ipi6_addr = ipv6_all_zeros; 2825 return (sizeof (struct in6_pktinfo)); 2826 } 2827 case IPV6_TCLASS: 2828 if (ipp->ipp_fields & IPPF_TCLASS) 2829 *i1 = ipp->ipp_tclass; 2830 else 2831 *i1 = IPV6_FLOW_TCLASS( 2832 IPV6_DEFAULT_VERS_AND_FLOW); 2833 break; /* goto sizeof (int) option return */ 2834 case IPV6_NEXTHOP: { 2835 sin6_t *sin6 = (sin6_t *)ptr; 2836 2837 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 2838 return (0); 2839 *sin6 = sin6_null; 2840 sin6->sin6_family = AF_INET6; 2841 sin6->sin6_addr = ipp->ipp_nexthop; 2842 return (sizeof (sin6_t)); 2843 } 2844 case IPV6_HOPOPTS: 2845 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 2846 return (0); 2847 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 2848 return (0); 2849 /* 2850 * The cipso/label option is added by kernel. 2851 * User is not usually aware of this option. 2852 * We copy out the hbh opt after the label option. 2853 */ 2854 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 2855 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2856 if (udp->udp_label_len_v6 > 0) { 2857 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 2858 ptr[1] = (ipp->ipp_hopoptslen - 2859 udp->udp_label_len_v6 + 7) / 8 - 1; 2860 } 2861 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 2862 case IPV6_RTHDRDSTOPTS: 2863 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 2864 return (0); 2865 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 2866 return (ipp->ipp_rtdstoptslen); 2867 case IPV6_RTHDR: 2868 if (!(ipp->ipp_fields & IPPF_RTHDR)) 2869 return (0); 2870 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 2871 return (ipp->ipp_rthdrlen); 2872 case IPV6_DSTOPTS: 2873 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 2874 return (0); 2875 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 2876 return (ipp->ipp_dstoptslen); 2877 case IPV6_PATHMTU: 2878 return (ip_fill_mtuinfo(&udp->udp_v6dst, 2879 udp->udp_dstport, (struct ip6_mtuinfo *)ptr, 2880 us->us_netstack)); 2881 default: 2882 return (-1); 2883 } 2884 break; 2885 case IPPROTO_UDP: 2886 switch (name) { 2887 case UDP_ANONPRIVBIND: 2888 *i1 = udp->udp_anon_priv_bind; 2889 break; 2890 case UDP_EXCLBIND: 2891 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 2892 break; 2893 case UDP_RCVHDR: 2894 *i1 = udp->udp_rcvhdr ? 1 : 0; 2895 break; 2896 case UDP_NAT_T_ENDPOINT: 2897 *i1 = udp->udp_nat_t_endpoint; 2898 break; 2899 default: 2900 return (-1); 2901 } 2902 break; 2903 default: 2904 return (-1); 2905 } 2906 return (sizeof (int)); 2907 } 2908 2909 int 2910 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2911 { 2912 udp_t *udp; 2913 int err; 2914 2915 udp = Q_TO_UDP(q); 2916 2917 rw_enter(&udp->udp_rwlock, RW_READER); 2918 err = udp_opt_get_locked(q, level, name, ptr); 2919 rw_exit(&udp->udp_rwlock); 2920 return (err); 2921 } 2922 2923 /* 2924 * This routine sets socket options. 2925 */ 2926 /* ARGSUSED */ 2927 int 2928 udp_opt_set_locked(queue_t *q, uint_t optset_context, int level, 2929 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 2930 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 2931 { 2932 udpattrs_t *attrs = thisdg_attrs; 2933 int *i1 = (int *)invalp; 2934 boolean_t onoff = (*i1 == 0) ? 0 : 1; 2935 boolean_t checkonly; 2936 int error; 2937 conn_t *connp; 2938 udp_t *udp; 2939 uint_t newlen; 2940 udp_stack_t *us; 2941 size_t sth_wroff; 2942 2943 connp = Q_TO_CONN(q); 2944 udp = connp->conn_udp; 2945 us = udp->udp_us; 2946 2947 switch (optset_context) { 2948 case SETFN_OPTCOM_CHECKONLY: 2949 checkonly = B_TRUE; 2950 /* 2951 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 2952 * inlen != 0 implies value supplied and 2953 * we have to "pretend" to set it. 2954 * inlen == 0 implies that there is no 2955 * value part in T_CHECK request and just validation 2956 * done elsewhere should be enough, we just return here. 2957 */ 2958 if (inlen == 0) { 2959 *outlenp = 0; 2960 return (0); 2961 } 2962 break; 2963 case SETFN_OPTCOM_NEGOTIATE: 2964 checkonly = B_FALSE; 2965 break; 2966 case SETFN_UD_NEGOTIATE: 2967 case SETFN_CONN_NEGOTIATE: 2968 checkonly = B_FALSE; 2969 /* 2970 * Negotiating local and "association-related" options 2971 * through T_UNITDATA_REQ. 2972 * 2973 * Following routine can filter out ones we do not 2974 * want to be "set" this way. 2975 */ 2976 if (!udp_opt_allow_udr_set(level, name)) { 2977 *outlenp = 0; 2978 return (EINVAL); 2979 } 2980 break; 2981 default: 2982 /* 2983 * We should never get here 2984 */ 2985 *outlenp = 0; 2986 return (EINVAL); 2987 } 2988 2989 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 2990 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 2991 2992 /* 2993 * For fixed length options, no sanity check 2994 * of passed in length is done. It is assumed *_optcom_req() 2995 * routines do the right thing. 2996 */ 2997 2998 switch (level) { 2999 case SOL_SOCKET: 3000 switch (name) { 3001 case SO_REUSEADDR: 3002 if (!checkonly) 3003 udp->udp_reuseaddr = onoff; 3004 break; 3005 case SO_DEBUG: 3006 if (!checkonly) 3007 udp->udp_debug = onoff; 3008 break; 3009 /* 3010 * The following three items are available here, 3011 * but are only meaningful to IP. 3012 */ 3013 case SO_DONTROUTE: 3014 if (!checkonly) 3015 udp->udp_dontroute = onoff; 3016 break; 3017 case SO_USELOOPBACK: 3018 if (!checkonly) 3019 udp->udp_useloopback = onoff; 3020 break; 3021 case SO_BROADCAST: 3022 if (!checkonly) 3023 udp->udp_broadcast = onoff; 3024 break; 3025 3026 case SO_SNDBUF: 3027 if (*i1 > us->us_max_buf) { 3028 *outlenp = 0; 3029 return (ENOBUFS); 3030 } 3031 if (!checkonly) { 3032 q->q_hiwat = *i1; 3033 } 3034 break; 3035 case SO_RCVBUF: 3036 if (*i1 > us->us_max_buf) { 3037 *outlenp = 0; 3038 return (ENOBUFS); 3039 } 3040 if (!checkonly) { 3041 RD(q)->q_hiwat = *i1; 3042 rw_exit(&udp->udp_rwlock); 3043 (void) mi_set_sth_hiwat(RD(q), 3044 udp_set_rcv_hiwat(udp, *i1)); 3045 rw_enter(&udp->udp_rwlock, RW_WRITER); 3046 } 3047 break; 3048 case SO_DGRAM_ERRIND: 3049 if (!checkonly) 3050 udp->udp_dgram_errind = onoff; 3051 break; 3052 case SO_RECVUCRED: 3053 if (!checkonly) 3054 udp->udp_recvucred = onoff; 3055 break; 3056 case SO_ALLZONES: 3057 /* 3058 * "soft" error (negative) 3059 * option not handled at this level 3060 * Do not modify *outlenp. 3061 */ 3062 return (-EINVAL); 3063 case SO_TIMESTAMP: 3064 if (!checkonly) 3065 udp->udp_timestamp = onoff; 3066 break; 3067 case SO_ANON_MLP: 3068 /* Pass option along to IP level for handling */ 3069 return (-EINVAL); 3070 case SO_MAC_EXEMPT: 3071 /* Pass option along to IP level for handling */ 3072 return (-EINVAL); 3073 case SCM_UCRED: { 3074 struct ucred_s *ucr; 3075 cred_t *cr, *newcr; 3076 ts_label_t *tsl; 3077 3078 /* 3079 * Only sockets that have proper privileges and are 3080 * bound to MLPs will have any other value here, so 3081 * this implicitly tests for privilege to set label. 3082 */ 3083 if (connp->conn_mlp_type == mlptSingle) 3084 break; 3085 ucr = (struct ucred_s *)invalp; 3086 if (inlen != ucredsize || 3087 ucr->uc_labeloff < sizeof (*ucr) || 3088 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3089 return (EINVAL); 3090 if (!checkonly) { 3091 mblk_t *mb; 3092 3093 if (attrs == NULL || 3094 (mb = attrs->udpattr_mb) == NULL) 3095 return (EINVAL); 3096 if ((cr = DB_CRED(mb)) == NULL) 3097 cr = udp->udp_connp->conn_cred; 3098 ASSERT(cr != NULL); 3099 if ((tsl = crgetlabel(cr)) == NULL) 3100 return (EINVAL); 3101 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3102 tsl->tsl_doi, KM_NOSLEEP); 3103 if (newcr == NULL) 3104 return (ENOSR); 3105 mblk_setcred(mb, newcr); 3106 attrs->udpattr_credset = B_TRUE; 3107 crfree(newcr); 3108 } 3109 break; 3110 } 3111 case SO_EXCLBIND: 3112 if (!checkonly) 3113 udp->udp_exclbind = onoff; 3114 break; 3115 default: 3116 *outlenp = 0; 3117 return (EINVAL); 3118 } 3119 break; 3120 case IPPROTO_IP: 3121 if (udp->udp_family != AF_INET) { 3122 *outlenp = 0; 3123 return (ENOPROTOOPT); 3124 } 3125 switch (name) { 3126 case IP_OPTIONS: 3127 case T_IP_OPTIONS: 3128 /* Save options for use by IP. */ 3129 newlen = inlen + udp->udp_label_len; 3130 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3131 *outlenp = 0; 3132 return (EINVAL); 3133 } 3134 if (checkonly) 3135 break; 3136 3137 /* 3138 * Update the stored options taking into account 3139 * any CIPSO option which we should not overwrite. 3140 */ 3141 if (!tsol_option_set(&udp->udp_ip_snd_options, 3142 &udp->udp_ip_snd_options_len, 3143 udp->udp_label_len, invalp, inlen)) { 3144 *outlenp = 0; 3145 return (ENOMEM); 3146 } 3147 3148 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3149 UDPH_SIZE + udp->udp_ip_snd_options_len; 3150 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3151 rw_exit(&udp->udp_rwlock); 3152 (void) mi_set_sth_wroff(RD(q), sth_wroff); 3153 rw_enter(&udp->udp_rwlock, RW_WRITER); 3154 break; 3155 3156 case IP_TTL: 3157 if (!checkonly) { 3158 udp->udp_ttl = (uchar_t)*i1; 3159 } 3160 break; 3161 case IP_TOS: 3162 case T_IP_TOS: 3163 if (!checkonly) { 3164 udp->udp_type_of_service = (uchar_t)*i1; 3165 } 3166 break; 3167 case IP_MULTICAST_IF: { 3168 /* 3169 * TODO should check OPTMGMT reply and undo this if 3170 * there is an error. 3171 */ 3172 struct in_addr *inap = (struct in_addr *)invalp; 3173 if (!checkonly) { 3174 udp->udp_multicast_if_addr = 3175 inap->s_addr; 3176 } 3177 break; 3178 } 3179 case IP_MULTICAST_TTL: 3180 if (!checkonly) 3181 udp->udp_multicast_ttl = *invalp; 3182 break; 3183 case IP_MULTICAST_LOOP: 3184 if (!checkonly) 3185 connp->conn_multicast_loop = *invalp; 3186 break; 3187 case IP_RECVOPTS: 3188 if (!checkonly) 3189 udp->udp_recvopts = onoff; 3190 break; 3191 case IP_RECVDSTADDR: 3192 if (!checkonly) 3193 udp->udp_recvdstaddr = onoff; 3194 break; 3195 case IP_RECVIF: 3196 if (!checkonly) 3197 udp->udp_recvif = onoff; 3198 break; 3199 case IP_RECVSLLA: 3200 if (!checkonly) 3201 udp->udp_recvslla = onoff; 3202 break; 3203 case IP_RECVTTL: 3204 if (!checkonly) 3205 udp->udp_recvttl = onoff; 3206 break; 3207 case IP_PKTINFO: { 3208 /* 3209 * This also handles IP_RECVPKTINFO. 3210 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3211 * Differentiation is based on the size of the 3212 * argument passed in. 3213 */ 3214 struct in_pktinfo *pktinfop; 3215 ip4_pkt_t *attr_pktinfop; 3216 3217 if (checkonly) 3218 break; 3219 3220 if (inlen == sizeof (int)) { 3221 /* 3222 * This is IP_RECVPKTINFO option. 3223 * Keep a local copy of whether this option is 3224 * set or not and pass it down to IP for 3225 * processing. 3226 */ 3227 3228 udp->udp_ip_recvpktinfo = onoff; 3229 return (-EINVAL); 3230 } 3231 3232 if (attrs == NULL || 3233 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3234 /* 3235 * sticky option or no buffer to return 3236 * the results. 3237 */ 3238 return (EINVAL); 3239 } 3240 3241 if (inlen != sizeof (struct in_pktinfo)) 3242 return (EINVAL); 3243 3244 pktinfop = (struct in_pktinfo *)invalp; 3245 3246 /* 3247 * At least one of the values should be specified 3248 */ 3249 if (pktinfop->ipi_ifindex == 0 && 3250 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3251 return (EINVAL); 3252 } 3253 3254 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3255 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3256 3257 break; 3258 } 3259 case IP_ADD_MEMBERSHIP: 3260 case IP_DROP_MEMBERSHIP: 3261 case IP_BLOCK_SOURCE: 3262 case IP_UNBLOCK_SOURCE: 3263 case IP_ADD_SOURCE_MEMBERSHIP: 3264 case IP_DROP_SOURCE_MEMBERSHIP: 3265 case MCAST_JOIN_GROUP: 3266 case MCAST_LEAVE_GROUP: 3267 case MCAST_BLOCK_SOURCE: 3268 case MCAST_UNBLOCK_SOURCE: 3269 case MCAST_JOIN_SOURCE_GROUP: 3270 case MCAST_LEAVE_SOURCE_GROUP: 3271 case IP_SEC_OPT: 3272 case IP_NEXTHOP: 3273 case IP_DHCPINIT_IF: 3274 /* 3275 * "soft" error (negative) 3276 * option not handled at this level 3277 * Do not modify *outlenp. 3278 */ 3279 return (-EINVAL); 3280 case IP_BOUND_IF: 3281 if (!checkonly) 3282 udp->udp_bound_if = *i1; 3283 break; 3284 case IP_UNSPEC_SRC: 3285 if (!checkonly) 3286 udp->udp_unspec_source = onoff; 3287 break; 3288 case IP_BROADCAST_TTL: 3289 if (!checkonly) 3290 connp->conn_broadcast_ttl = *invalp; 3291 break; 3292 default: 3293 *outlenp = 0; 3294 return (EINVAL); 3295 } 3296 break; 3297 case IPPROTO_IPV6: { 3298 ip6_pkt_t *ipp; 3299 boolean_t sticky; 3300 3301 if (udp->udp_family != AF_INET6) { 3302 *outlenp = 0; 3303 return (ENOPROTOOPT); 3304 } 3305 /* 3306 * Deal with both sticky options and ancillary data 3307 */ 3308 sticky = B_FALSE; 3309 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3310 NULL) { 3311 /* sticky options, or none */ 3312 ipp = &udp->udp_sticky_ipp; 3313 sticky = B_TRUE; 3314 } 3315 3316 switch (name) { 3317 case IPV6_MULTICAST_IF: 3318 if (!checkonly) 3319 udp->udp_multicast_if_index = *i1; 3320 break; 3321 case IPV6_UNICAST_HOPS: 3322 /* -1 means use default */ 3323 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3324 *outlenp = 0; 3325 return (EINVAL); 3326 } 3327 if (!checkonly) { 3328 if (*i1 == -1) { 3329 udp->udp_ttl = ipp->ipp_unicast_hops = 3330 us->us_ipv6_hoplimit; 3331 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3332 /* Pass modified value to IP. */ 3333 *i1 = udp->udp_ttl; 3334 } else { 3335 udp->udp_ttl = ipp->ipp_unicast_hops = 3336 (uint8_t)*i1; 3337 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3338 } 3339 /* Rebuild the header template */ 3340 error = udp_build_hdrs(udp); 3341 if (error != 0) { 3342 *outlenp = 0; 3343 return (error); 3344 } 3345 } 3346 break; 3347 case IPV6_MULTICAST_HOPS: 3348 /* -1 means use default */ 3349 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3350 *outlenp = 0; 3351 return (EINVAL); 3352 } 3353 if (!checkonly) { 3354 if (*i1 == -1) { 3355 udp->udp_multicast_ttl = 3356 ipp->ipp_multicast_hops = 3357 IP_DEFAULT_MULTICAST_TTL; 3358 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3359 /* Pass modified value to IP. */ 3360 *i1 = udp->udp_multicast_ttl; 3361 } else { 3362 udp->udp_multicast_ttl = 3363 ipp->ipp_multicast_hops = 3364 (uint8_t)*i1; 3365 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3366 } 3367 } 3368 break; 3369 case IPV6_MULTICAST_LOOP: 3370 if (*i1 != 0 && *i1 != 1) { 3371 *outlenp = 0; 3372 return (EINVAL); 3373 } 3374 if (!checkonly) 3375 connp->conn_multicast_loop = *i1; 3376 break; 3377 case IPV6_JOIN_GROUP: 3378 case IPV6_LEAVE_GROUP: 3379 case MCAST_JOIN_GROUP: 3380 case MCAST_LEAVE_GROUP: 3381 case MCAST_BLOCK_SOURCE: 3382 case MCAST_UNBLOCK_SOURCE: 3383 case MCAST_JOIN_SOURCE_GROUP: 3384 case MCAST_LEAVE_SOURCE_GROUP: 3385 /* 3386 * "soft" error (negative) 3387 * option not handled at this level 3388 * Note: Do not modify *outlenp 3389 */ 3390 return (-EINVAL); 3391 case IPV6_BOUND_IF: 3392 if (!checkonly) 3393 udp->udp_bound_if = *i1; 3394 break; 3395 case IPV6_UNSPEC_SRC: 3396 if (!checkonly) 3397 udp->udp_unspec_source = onoff; 3398 break; 3399 /* 3400 * Set boolean switches for ancillary data delivery 3401 */ 3402 case IPV6_RECVPKTINFO: 3403 if (!checkonly) 3404 udp->udp_ip_recvpktinfo = onoff; 3405 break; 3406 case IPV6_RECVTCLASS: 3407 if (!checkonly) { 3408 udp->udp_ipv6_recvtclass = onoff; 3409 } 3410 break; 3411 case IPV6_RECVPATHMTU: 3412 if (!checkonly) { 3413 udp->udp_ipv6_recvpathmtu = onoff; 3414 } 3415 break; 3416 case IPV6_RECVHOPLIMIT: 3417 if (!checkonly) 3418 udp->udp_ipv6_recvhoplimit = onoff; 3419 break; 3420 case IPV6_RECVHOPOPTS: 3421 if (!checkonly) 3422 udp->udp_ipv6_recvhopopts = onoff; 3423 break; 3424 case IPV6_RECVDSTOPTS: 3425 if (!checkonly) 3426 udp->udp_ipv6_recvdstopts = onoff; 3427 break; 3428 case _OLD_IPV6_RECVDSTOPTS: 3429 if (!checkonly) 3430 udp->udp_old_ipv6_recvdstopts = onoff; 3431 break; 3432 case IPV6_RECVRTHDRDSTOPTS: 3433 if (!checkonly) 3434 udp->udp_ipv6_recvrthdrdstopts = onoff; 3435 break; 3436 case IPV6_RECVRTHDR: 3437 if (!checkonly) 3438 udp->udp_ipv6_recvrthdr = onoff; 3439 break; 3440 /* 3441 * Set sticky options or ancillary data. 3442 * If sticky options, (re)build any extension headers 3443 * that might be needed as a result. 3444 */ 3445 case IPV6_PKTINFO: 3446 /* 3447 * The source address and ifindex are verified 3448 * in ip_opt_set(). For ancillary data the 3449 * source address is checked in ip_wput_v6. 3450 */ 3451 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3452 return (EINVAL); 3453 if (checkonly) 3454 break; 3455 3456 if (inlen == 0) { 3457 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3458 ipp->ipp_sticky_ignored |= 3459 (IPPF_IFINDEX|IPPF_ADDR); 3460 } else { 3461 struct in6_pktinfo *pkti; 3462 3463 pkti = (struct in6_pktinfo *)invalp; 3464 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3465 ipp->ipp_addr = pkti->ipi6_addr; 3466 if (ipp->ipp_ifindex != 0) 3467 ipp->ipp_fields |= IPPF_IFINDEX; 3468 else 3469 ipp->ipp_fields &= ~IPPF_IFINDEX; 3470 if (!IN6_IS_ADDR_UNSPECIFIED( 3471 &ipp->ipp_addr)) 3472 ipp->ipp_fields |= IPPF_ADDR; 3473 else 3474 ipp->ipp_fields &= ~IPPF_ADDR; 3475 } 3476 if (sticky) { 3477 error = udp_build_hdrs(udp); 3478 if (error != 0) 3479 return (error); 3480 } 3481 break; 3482 case IPV6_HOPLIMIT: 3483 if (sticky) 3484 return (EINVAL); 3485 if (inlen != 0 && inlen != sizeof (int)) 3486 return (EINVAL); 3487 if (checkonly) 3488 break; 3489 3490 if (inlen == 0) { 3491 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3492 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3493 } else { 3494 if (*i1 > 255 || *i1 < -1) 3495 return (EINVAL); 3496 if (*i1 == -1) 3497 ipp->ipp_hoplimit = 3498 us->us_ipv6_hoplimit; 3499 else 3500 ipp->ipp_hoplimit = *i1; 3501 ipp->ipp_fields |= IPPF_HOPLIMIT; 3502 } 3503 break; 3504 case IPV6_TCLASS: 3505 if (inlen != 0 && inlen != sizeof (int)) 3506 return (EINVAL); 3507 if (checkonly) 3508 break; 3509 3510 if (inlen == 0) { 3511 ipp->ipp_fields &= ~IPPF_TCLASS; 3512 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3513 } else { 3514 if (*i1 > 255 || *i1 < -1) 3515 return (EINVAL); 3516 if (*i1 == -1) 3517 ipp->ipp_tclass = 0; 3518 else 3519 ipp->ipp_tclass = *i1; 3520 ipp->ipp_fields |= IPPF_TCLASS; 3521 } 3522 if (sticky) { 3523 error = udp_build_hdrs(udp); 3524 if (error != 0) 3525 return (error); 3526 } 3527 break; 3528 case IPV6_NEXTHOP: 3529 /* 3530 * IP will verify that the nexthop is reachable 3531 * and fail for sticky options. 3532 */ 3533 if (inlen != 0 && inlen != sizeof (sin6_t)) 3534 return (EINVAL); 3535 if (checkonly) 3536 break; 3537 3538 if (inlen == 0) { 3539 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3540 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3541 } else { 3542 sin6_t *sin6 = (sin6_t *)invalp; 3543 3544 if (sin6->sin6_family != AF_INET6) 3545 return (EAFNOSUPPORT); 3546 if (IN6_IS_ADDR_V4MAPPED( 3547 &sin6->sin6_addr)) 3548 return (EADDRNOTAVAIL); 3549 ipp->ipp_nexthop = sin6->sin6_addr; 3550 if (!IN6_IS_ADDR_UNSPECIFIED( 3551 &ipp->ipp_nexthop)) 3552 ipp->ipp_fields |= IPPF_NEXTHOP; 3553 else 3554 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3555 } 3556 if (sticky) { 3557 error = udp_build_hdrs(udp); 3558 if (error != 0) 3559 return (error); 3560 } 3561 break; 3562 case IPV6_HOPOPTS: { 3563 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3564 /* 3565 * Sanity checks - minimum size, size a multiple of 3566 * eight bytes, and matching size passed in. 3567 */ 3568 if (inlen != 0 && 3569 inlen != (8 * (hopts->ip6h_len + 1))) 3570 return (EINVAL); 3571 3572 if (checkonly) 3573 break; 3574 3575 error = optcom_pkt_set(invalp, inlen, sticky, 3576 (uchar_t **)&ipp->ipp_hopopts, 3577 &ipp->ipp_hopoptslen, 3578 sticky ? udp->udp_label_len_v6 : 0); 3579 if (error != 0) 3580 return (error); 3581 if (ipp->ipp_hopoptslen == 0) { 3582 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3583 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3584 } else { 3585 ipp->ipp_fields |= IPPF_HOPOPTS; 3586 } 3587 if (sticky) { 3588 error = udp_build_hdrs(udp); 3589 if (error != 0) 3590 return (error); 3591 } 3592 break; 3593 } 3594 case IPV6_RTHDRDSTOPTS: { 3595 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3596 3597 /* 3598 * Sanity checks - minimum size, size a multiple of 3599 * eight bytes, and matching size passed in. 3600 */ 3601 if (inlen != 0 && 3602 inlen != (8 * (dopts->ip6d_len + 1))) 3603 return (EINVAL); 3604 3605 if (checkonly) 3606 break; 3607 3608 if (inlen == 0) { 3609 if (sticky && 3610 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3611 kmem_free(ipp->ipp_rtdstopts, 3612 ipp->ipp_rtdstoptslen); 3613 ipp->ipp_rtdstopts = NULL; 3614 ipp->ipp_rtdstoptslen = 0; 3615 } 3616 3617 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3618 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3619 } else { 3620 error = optcom_pkt_set(invalp, inlen, sticky, 3621 (uchar_t **)&ipp->ipp_rtdstopts, 3622 &ipp->ipp_rtdstoptslen, 0); 3623 if (error != 0) 3624 return (error); 3625 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3626 } 3627 if (sticky) { 3628 error = udp_build_hdrs(udp); 3629 if (error != 0) 3630 return (error); 3631 } 3632 break; 3633 } 3634 case IPV6_DSTOPTS: { 3635 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3636 3637 /* 3638 * Sanity checks - minimum size, size a multiple of 3639 * eight bytes, and matching size passed in. 3640 */ 3641 if (inlen != 0 && 3642 inlen != (8 * (dopts->ip6d_len + 1))) 3643 return (EINVAL); 3644 3645 if (checkonly) 3646 break; 3647 3648 if (inlen == 0) { 3649 if (sticky && 3650 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3651 kmem_free(ipp->ipp_dstopts, 3652 ipp->ipp_dstoptslen); 3653 ipp->ipp_dstopts = NULL; 3654 ipp->ipp_dstoptslen = 0; 3655 } 3656 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3657 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3658 } else { 3659 error = optcom_pkt_set(invalp, inlen, sticky, 3660 (uchar_t **)&ipp->ipp_dstopts, 3661 &ipp->ipp_dstoptslen, 0); 3662 if (error != 0) 3663 return (error); 3664 ipp->ipp_fields |= IPPF_DSTOPTS; 3665 } 3666 if (sticky) { 3667 error = udp_build_hdrs(udp); 3668 if (error != 0) 3669 return (error); 3670 } 3671 break; 3672 } 3673 case IPV6_RTHDR: { 3674 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3675 3676 /* 3677 * Sanity checks - minimum size, size a multiple of 3678 * eight bytes, and matching size passed in. 3679 */ 3680 if (inlen != 0 && 3681 inlen != (8 * (rt->ip6r_len + 1))) 3682 return (EINVAL); 3683 3684 if (checkonly) 3685 break; 3686 3687 if (inlen == 0) { 3688 if (sticky && 3689 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3690 kmem_free(ipp->ipp_rthdr, 3691 ipp->ipp_rthdrlen); 3692 ipp->ipp_rthdr = NULL; 3693 ipp->ipp_rthdrlen = 0; 3694 } 3695 ipp->ipp_fields &= ~IPPF_RTHDR; 3696 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3697 } else { 3698 error = optcom_pkt_set(invalp, inlen, sticky, 3699 (uchar_t **)&ipp->ipp_rthdr, 3700 &ipp->ipp_rthdrlen, 0); 3701 if (error != 0) 3702 return (error); 3703 ipp->ipp_fields |= IPPF_RTHDR; 3704 } 3705 if (sticky) { 3706 error = udp_build_hdrs(udp); 3707 if (error != 0) 3708 return (error); 3709 } 3710 break; 3711 } 3712 3713 case IPV6_DONTFRAG: 3714 if (checkonly) 3715 break; 3716 3717 if (onoff) { 3718 ipp->ipp_fields |= IPPF_DONTFRAG; 3719 } else { 3720 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3721 } 3722 break; 3723 3724 case IPV6_USE_MIN_MTU: 3725 if (inlen != sizeof (int)) 3726 return (EINVAL); 3727 3728 if (*i1 < -1 || *i1 > 1) 3729 return (EINVAL); 3730 3731 if (checkonly) 3732 break; 3733 3734 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3735 ipp->ipp_use_min_mtu = *i1; 3736 break; 3737 3738 case IPV6_BOUND_PIF: 3739 case IPV6_SEC_OPT: 3740 case IPV6_DONTFAILOVER_IF: 3741 case IPV6_SRC_PREFERENCES: 3742 case IPV6_V6ONLY: 3743 /* Handled at the IP level */ 3744 return (-EINVAL); 3745 default: 3746 *outlenp = 0; 3747 return (EINVAL); 3748 } 3749 break; 3750 } /* end IPPROTO_IPV6 */ 3751 case IPPROTO_UDP: 3752 switch (name) { 3753 case UDP_ANONPRIVBIND: 3754 if ((error = secpolicy_net_privaddr(cr, 0, 3755 IPPROTO_UDP)) != 0) { 3756 *outlenp = 0; 3757 return (error); 3758 } 3759 if (!checkonly) { 3760 udp->udp_anon_priv_bind = onoff; 3761 } 3762 break; 3763 case UDP_EXCLBIND: 3764 if (!checkonly) 3765 udp->udp_exclbind = onoff; 3766 break; 3767 case UDP_RCVHDR: 3768 if (!checkonly) 3769 udp->udp_rcvhdr = onoff; 3770 break; 3771 case UDP_NAT_T_ENDPOINT: 3772 if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) { 3773 *outlenp = 0; 3774 return (error); 3775 } 3776 3777 /* 3778 * Use udp_family instead so we can avoid ambiguitites 3779 * with AF_INET6 sockets that may switch from IPv4 3780 * to IPv6. 3781 */ 3782 if (udp->udp_family != AF_INET) { 3783 *outlenp = 0; 3784 return (EAFNOSUPPORT); 3785 } 3786 3787 if (!checkonly) { 3788 udp->udp_nat_t_endpoint = onoff; 3789 3790 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3791 UDPH_SIZE + udp->udp_ip_snd_options_len; 3792 3793 /* Also, adjust wroff */ 3794 if (onoff) { 3795 udp->udp_max_hdr_len += 3796 sizeof (uint32_t); 3797 } 3798 (void) mi_set_sth_wroff(RD(q), 3799 udp->udp_max_hdr_len + us->us_wroff_extra); 3800 } 3801 break; 3802 default: 3803 *outlenp = 0; 3804 return (EINVAL); 3805 } 3806 break; 3807 default: 3808 *outlenp = 0; 3809 return (EINVAL); 3810 } 3811 /* 3812 * Common case of OK return with outval same as inval. 3813 */ 3814 if (invalp != outvalp) { 3815 /* don't trust bcopy for identical src/dst */ 3816 (void) bcopy(invalp, outvalp, inlen); 3817 } 3818 *outlenp = inlen; 3819 return (0); 3820 } 3821 3822 int 3823 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3824 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3825 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3826 { 3827 udp_t *udp; 3828 int err; 3829 3830 udp = Q_TO_UDP(q); 3831 3832 rw_enter(&udp->udp_rwlock, RW_WRITER); 3833 err = udp_opt_set_locked(q, optset_context, level, name, inlen, invalp, 3834 outlenp, outvalp, thisdg_attrs, cr, mblk); 3835 rw_exit(&udp->udp_rwlock); 3836 return (err); 3837 } 3838 3839 /* 3840 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3841 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3842 * headers, and the udp header. 3843 * Returns failure if can't allocate memory. 3844 */ 3845 static int 3846 udp_build_hdrs(udp_t *udp) 3847 { 3848 udp_stack_t *us = udp->udp_us; 3849 uchar_t *hdrs; 3850 uint_t hdrs_len; 3851 ip6_t *ip6h; 3852 ip6i_t *ip6i; 3853 udpha_t *udpha; 3854 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3855 size_t sth_wroff; 3856 3857 ASSERT(RW_WRITE_HELD(&udp->udp_rwlock)); 3858 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3859 ASSERT(hdrs_len != 0); 3860 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3861 /* Need to reallocate */ 3862 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3863 if (hdrs == NULL) 3864 return (ENOMEM); 3865 3866 if (udp->udp_sticky_hdrs_len != 0) { 3867 kmem_free(udp->udp_sticky_hdrs, 3868 udp->udp_sticky_hdrs_len); 3869 } 3870 udp->udp_sticky_hdrs = hdrs; 3871 udp->udp_sticky_hdrs_len = hdrs_len; 3872 } 3873 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3874 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3875 3876 /* Set header fields not in ipp */ 3877 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3878 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3879 ip6h = (ip6_t *)&ip6i[1]; 3880 } else { 3881 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3882 } 3883 3884 if (!(ipp->ipp_fields & IPPF_ADDR)) 3885 ip6h->ip6_src = udp->udp_v6src; 3886 3887 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3888 udpha->uha_src_port = udp->udp_port; 3889 3890 /* Try to get everything in a single mblk */ 3891 if (hdrs_len > udp->udp_max_hdr_len) { 3892 udp->udp_max_hdr_len = hdrs_len; 3893 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 3894 rw_exit(&udp->udp_rwlock); 3895 (void) mi_set_sth_wroff(udp->udp_connp->conn_rq, sth_wroff); 3896 rw_enter(&udp->udp_rwlock, RW_WRITER); 3897 } 3898 return (0); 3899 } 3900 3901 /* 3902 * This routine retrieves the value of an ND variable in a udpparam_t 3903 * structure. It is called through nd_getset when a user reads the 3904 * variable. 3905 */ 3906 /* ARGSUSED */ 3907 static int 3908 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 3909 { 3910 udpparam_t *udppa = (udpparam_t *)cp; 3911 3912 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 3913 return (0); 3914 } 3915 3916 /* 3917 * Walk through the param array specified registering each element with the 3918 * named dispatch (ND) handler. 3919 */ 3920 static boolean_t 3921 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt) 3922 { 3923 for (; cnt-- > 0; udppa++) { 3924 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 3925 if (!nd_load(ndp, udppa->udp_param_name, 3926 udp_param_get, udp_param_set, 3927 (caddr_t)udppa)) { 3928 nd_free(ndp); 3929 return (B_FALSE); 3930 } 3931 } 3932 } 3933 if (!nd_load(ndp, "udp_extra_priv_ports", 3934 udp_extra_priv_ports_get, NULL, NULL)) { 3935 nd_free(ndp); 3936 return (B_FALSE); 3937 } 3938 if (!nd_load(ndp, "udp_extra_priv_ports_add", 3939 NULL, udp_extra_priv_ports_add, NULL)) { 3940 nd_free(ndp); 3941 return (B_FALSE); 3942 } 3943 if (!nd_load(ndp, "udp_extra_priv_ports_del", 3944 NULL, udp_extra_priv_ports_del, NULL)) { 3945 nd_free(ndp); 3946 return (B_FALSE); 3947 } 3948 if (!nd_load(ndp, "udp_status", udp_status_report, NULL, 3949 NULL)) { 3950 nd_free(ndp); 3951 return (B_FALSE); 3952 } 3953 if (!nd_load(ndp, "udp_bind_hash", udp_bind_hash_report, NULL, 3954 NULL)) { 3955 nd_free(ndp); 3956 return (B_FALSE); 3957 } 3958 return (B_TRUE); 3959 } 3960 3961 /* This routine sets an ND variable in a udpparam_t structure. */ 3962 /* ARGSUSED */ 3963 static int 3964 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 3965 { 3966 long new_value; 3967 udpparam_t *udppa = (udpparam_t *)cp; 3968 3969 /* 3970 * Fail the request if the new value does not lie within the 3971 * required bounds. 3972 */ 3973 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 3974 new_value < udppa->udp_param_min || 3975 new_value > udppa->udp_param_max) { 3976 return (EINVAL); 3977 } 3978 3979 /* Set the new value */ 3980 udppa->udp_param_value = new_value; 3981 return (0); 3982 } 3983 3984 /* 3985 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 3986 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 3987 * just count the length needed for allocation. If 'dbuf' is non-NULL, 3988 * then it's assumed to be allocated to be large enough. 3989 * 3990 * Returns zero if trimming of the security option causes all options to go 3991 * away. 3992 */ 3993 static size_t 3994 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 3995 { 3996 struct T_opthdr *toh; 3997 size_t hol = ipp->ipp_hopoptslen; 3998 ip6_hbh_t *dstopt = NULL; 3999 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4000 size_t tlen, olen, plen; 4001 boolean_t deleting; 4002 const struct ip6_opt *sopt, *lastpad; 4003 struct ip6_opt *dopt; 4004 4005 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4006 toh->level = IPPROTO_IPV6; 4007 toh->name = IPV6_HOPOPTS; 4008 toh->status = 0; 4009 dstopt = (ip6_hbh_t *)(toh + 1); 4010 } 4011 4012 /* 4013 * If labeling is enabled, then skip the label option 4014 * but get other options if there are any. 4015 */ 4016 if (is_system_labeled()) { 4017 dopt = NULL; 4018 if (dstopt != NULL) { 4019 /* will fill in ip6h_len later */ 4020 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4021 dopt = (struct ip6_opt *)(dstopt + 1); 4022 } 4023 sopt = (const struct ip6_opt *)(srcopt + 1); 4024 hol -= sizeof (*srcopt); 4025 tlen = sizeof (*dstopt); 4026 lastpad = NULL; 4027 deleting = B_FALSE; 4028 /* 4029 * This loop finds the first (lastpad pointer) of any number of 4030 * pads that preceeds the security option, then treats the 4031 * security option as though it were a pad, and then finds the 4032 * next non-pad option (or end of list). 4033 * 4034 * It then treats the entire block as one big pad. To preserve 4035 * alignment of any options that follow, or just the end of the 4036 * list, it computes a minimal new padding size that keeps the 4037 * same alignment for the next option. 4038 * 4039 * If it encounters just a sequence of pads with no security 4040 * option, those are copied as-is rather than collapsed. 4041 * 4042 * Note that to handle the end of list case, the code makes one 4043 * loop with 'hol' set to zero. 4044 */ 4045 for (;;) { 4046 if (hol > 0) { 4047 if (sopt->ip6o_type == IP6OPT_PAD1) { 4048 if (lastpad == NULL) 4049 lastpad = sopt; 4050 sopt = (const struct ip6_opt *) 4051 &sopt->ip6o_len; 4052 hol--; 4053 continue; 4054 } 4055 olen = sopt->ip6o_len + sizeof (*sopt); 4056 if (olen > hol) 4057 olen = hol; 4058 if (sopt->ip6o_type == IP6OPT_PADN || 4059 sopt->ip6o_type == ip6opt_ls) { 4060 if (sopt->ip6o_type == ip6opt_ls) 4061 deleting = B_TRUE; 4062 if (lastpad == NULL) 4063 lastpad = sopt; 4064 sopt = (const struct ip6_opt *) 4065 ((const char *)sopt + olen); 4066 hol -= olen; 4067 continue; 4068 } 4069 } else { 4070 /* if nothing was copied at all, then delete */ 4071 if (tlen == sizeof (*dstopt)) 4072 return (0); 4073 /* last pass; pick up any trailing padding */ 4074 olen = 0; 4075 } 4076 if (deleting) { 4077 /* 4078 * compute aligning effect of deleted material 4079 * to reproduce with pad. 4080 */ 4081 plen = ((const char *)sopt - 4082 (const char *)lastpad) & 7; 4083 tlen += plen; 4084 if (dopt != NULL) { 4085 if (plen == 1) { 4086 dopt->ip6o_type = IP6OPT_PAD1; 4087 } else if (plen > 1) { 4088 plen -= sizeof (*dopt); 4089 dopt->ip6o_type = IP6OPT_PADN; 4090 dopt->ip6o_len = plen; 4091 if (plen > 0) 4092 bzero(dopt + 1, plen); 4093 } 4094 dopt = (struct ip6_opt *) 4095 ((char *)dopt + plen); 4096 } 4097 deleting = B_FALSE; 4098 lastpad = NULL; 4099 } 4100 /* if there's uncopied padding, then copy that now */ 4101 if (lastpad != NULL) { 4102 olen += (const char *)sopt - 4103 (const char *)lastpad; 4104 sopt = lastpad; 4105 lastpad = NULL; 4106 } 4107 if (dopt != NULL && olen > 0) { 4108 bcopy(sopt, dopt, olen); 4109 dopt = (struct ip6_opt *)((char *)dopt + olen); 4110 } 4111 if (hol == 0) 4112 break; 4113 tlen += olen; 4114 sopt = (const struct ip6_opt *) 4115 ((const char *)sopt + olen); 4116 hol -= olen; 4117 } 4118 /* go back and patch up the length value, rounded upward */ 4119 if (dstopt != NULL) 4120 dstopt->ip6h_len = (tlen - 1) >> 3; 4121 } else { 4122 tlen = hol; 4123 if (dstopt != NULL) 4124 bcopy(srcopt, dstopt, hol); 4125 } 4126 4127 tlen += sizeof (*toh); 4128 if (toh != NULL) 4129 toh->len = tlen; 4130 4131 return (tlen); 4132 } 4133 4134 /* 4135 * Update udp_rcv_opt_len from the packet. 4136 * Called when options received, and when no options received but 4137 * udp_ip_recv_opt_len has previously recorded options. 4138 */ 4139 static void 4140 udp_save_ip_rcv_opt(udp_t *udp, void *opt, int opt_len) 4141 { 4142 /* Save the options if any */ 4143 if (opt_len > 0) { 4144 if (opt_len > udp->udp_ip_rcv_options_len) { 4145 /* Need to allocate larger buffer */ 4146 if (udp->udp_ip_rcv_options_len != 0) 4147 mi_free((char *)udp->udp_ip_rcv_options); 4148 udp->udp_ip_rcv_options_len = 0; 4149 udp->udp_ip_rcv_options = 4150 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4151 if (udp->udp_ip_rcv_options != NULL) 4152 udp->udp_ip_rcv_options_len = opt_len; 4153 } 4154 if (udp->udp_ip_rcv_options_len != 0) { 4155 bcopy(opt, udp->udp_ip_rcv_options, opt_len); 4156 /* Adjust length if we are resusing the space */ 4157 udp->udp_ip_rcv_options_len = opt_len; 4158 } 4159 } else if (udp->udp_ip_rcv_options_len != 0) { 4160 /* Clear out previously recorded options */ 4161 mi_free((char *)udp->udp_ip_rcv_options); 4162 udp->udp_ip_rcv_options = NULL; 4163 udp->udp_ip_rcv_options_len = 0; 4164 } 4165 } 4166 4167 /* ARGSUSED2 */ 4168 static void 4169 udp_input(void *arg1, mblk_t *mp, void *arg2) 4170 { 4171 conn_t *connp = (conn_t *)arg1; 4172 struct T_unitdata_ind *tudi; 4173 uchar_t *rptr; /* Pointer to IP header */ 4174 int hdr_length; /* Length of IP+UDP headers */ 4175 int opt_len; 4176 int udi_size; /* Size of T_unitdata_ind */ 4177 int mp_len; 4178 udp_t *udp; 4179 udpha_t *udpha; 4180 int ipversion; 4181 ip6_pkt_t ipp; 4182 ip6_t *ip6h; 4183 ip6i_t *ip6i; 4184 mblk_t *mp1; 4185 mblk_t *options_mp = NULL; 4186 ip_pktinfo_t *pinfo = NULL; 4187 cred_t *cr = NULL; 4188 pid_t cpid; 4189 uint32_t udp_ip_rcv_options_len; 4190 udp_bits_t udp_bits; 4191 cred_t *rcr = connp->conn_cred; 4192 udp_stack_t *us; 4193 4194 ASSERT(connp->conn_flags & IPCL_UDPCONN); 4195 4196 udp = connp->conn_udp; 4197 us = udp->udp_us; 4198 rptr = mp->b_rptr; 4199 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4200 ASSERT(OK_32PTR(rptr)); 4201 4202 /* 4203 * IP should have prepended the options data in an M_CTL 4204 * Check M_CTL "type" to make sure are not here bcos of 4205 * a valid ICMP message 4206 */ 4207 if (DB_TYPE(mp) == M_CTL) { 4208 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4209 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4210 IN_PKTINFO) { 4211 /* 4212 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4213 * has been prepended to the packet by IP. We need to 4214 * extract the mblk and adjust the rptr 4215 */ 4216 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4217 options_mp = mp; 4218 mp = mp->b_cont; 4219 rptr = mp->b_rptr; 4220 UDP_STAT(us, udp_in_pktinfo); 4221 } else { 4222 /* 4223 * ICMP messages. 4224 */ 4225 udp_icmp_error(connp->conn_rq, mp); 4226 return; 4227 } 4228 } 4229 4230 mp_len = msgdsize(mp); 4231 /* 4232 * This is the inbound data path. 4233 * First, we check to make sure the IP version number is correct, 4234 * and then pull the IP and UDP headers into the first mblk. 4235 */ 4236 4237 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4238 ipp.ipp_fields = 0; 4239 4240 ipversion = IPH_HDR_VERSION(rptr); 4241 4242 rw_enter(&udp->udp_rwlock, RW_READER); 4243 udp_ip_rcv_options_len = udp->udp_ip_rcv_options_len; 4244 udp_bits = udp->udp_bits; 4245 rw_exit(&udp->udp_rwlock); 4246 4247 switch (ipversion) { 4248 case IPV4_VERSION: 4249 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4250 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4251 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4252 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4253 if ((opt_len > 0 || udp_ip_rcv_options_len > 0) && 4254 udp->udp_family == AF_INET) { 4255 /* 4256 * Record/update udp_ip_rcv_options with the lock 4257 * held. Not needed for AF_INET6 sockets 4258 * since they don't support a getsockopt of IP_OPTIONS. 4259 */ 4260 rw_enter(&udp->udp_rwlock, RW_WRITER); 4261 udp_save_ip_rcv_opt(udp, rptr + IP_SIMPLE_HDR_LENGTH, 4262 opt_len); 4263 rw_exit(&udp->udp_rwlock); 4264 } 4265 /* Handle IPV6_RECVPKTINFO even for IPv4 packet. */ 4266 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4267 udp->udp_ip_recvpktinfo) { 4268 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4269 ipp.ipp_fields |= IPPF_IFINDEX; 4270 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4271 } 4272 } 4273 break; 4274 case IPV6_VERSION: 4275 /* 4276 * IPv6 packets can only be received by applications 4277 * that are prepared to receive IPv6 addresses. 4278 * The IP fanout must ensure this. 4279 */ 4280 ASSERT(udp->udp_family == AF_INET6); 4281 4282 ip6h = (ip6_t *)rptr; 4283 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4284 4285 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4286 uint8_t nexthdrp; 4287 /* Look for ifindex information */ 4288 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4289 ip6i = (ip6i_t *)ip6h; 4290 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4291 goto tossit; 4292 4293 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4294 ASSERT(ip6i->ip6i_ifindex != 0); 4295 ipp.ipp_fields |= IPPF_IFINDEX; 4296 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4297 } 4298 rptr = (uchar_t *)&ip6i[1]; 4299 mp->b_rptr = rptr; 4300 if (rptr == mp->b_wptr) { 4301 mp1 = mp->b_cont; 4302 freeb(mp); 4303 mp = mp1; 4304 rptr = mp->b_rptr; 4305 } 4306 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4307 goto tossit; 4308 ip6h = (ip6_t *)rptr; 4309 mp_len = msgdsize(mp); 4310 } 4311 /* 4312 * Find any potentially interesting extension headers 4313 * as well as the length of the IPv6 + extension 4314 * headers. 4315 */ 4316 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4317 UDPH_SIZE; 4318 ASSERT(nexthdrp == IPPROTO_UDP); 4319 } else { 4320 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4321 ip6i = NULL; 4322 } 4323 break; 4324 default: 4325 ASSERT(0); 4326 } 4327 4328 /* 4329 * IP inspected the UDP header thus all of it must be in the mblk. 4330 * UDP length check is performed for IPv6 packets and IPv4 packets 4331 * to check if the size of the packet as specified 4332 * by the header is the same as the physical size of the packet. 4333 * FIXME? Didn't IP already check this? 4334 */ 4335 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4336 if ((MBLKL(mp) < hdr_length) || 4337 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4338 goto tossit; 4339 } 4340 4341 4342 /* Walk past the headers unless IP_RECVHDR was set. */ 4343 if (!udp_bits.udpb_rcvhdr) { 4344 mp->b_rptr = rptr + hdr_length; 4345 mp_len -= hdr_length; 4346 } 4347 4348 /* 4349 * This is the inbound data path. Packets are passed upstream as 4350 * T_UNITDATA_IND messages with full IP headers still attached. 4351 */ 4352 if (udp->udp_family == AF_INET) { 4353 sin_t *sin; 4354 4355 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4356 4357 /* 4358 * Normally only send up the source address. 4359 * If IP_RECVDSTADDR is set we include the destination IP 4360 * address as an option. With IP_RECVOPTS we include all 4361 * the IP options. 4362 */ 4363 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4364 if (udp_bits.udpb_recvdstaddr) { 4365 udi_size += sizeof (struct T_opthdr) + 4366 sizeof (struct in_addr); 4367 UDP_STAT(us, udp_in_recvdstaddr); 4368 } 4369 4370 if (udp_bits.udpb_ip_recvpktinfo && (pinfo != NULL) && 4371 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4372 udi_size += sizeof (struct T_opthdr) + 4373 sizeof (struct in_pktinfo); 4374 UDP_STAT(us, udp_ip_rcvpktinfo); 4375 } 4376 4377 if ((udp_bits.udpb_recvopts) && opt_len > 0) { 4378 udi_size += sizeof (struct T_opthdr) + opt_len; 4379 UDP_STAT(us, udp_in_recvopts); 4380 } 4381 4382 /* 4383 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4384 * space accordingly 4385 */ 4386 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4387 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4388 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4389 UDP_STAT(us, udp_in_recvif); 4390 } 4391 4392 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4393 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4394 udi_size += sizeof (struct T_opthdr) + 4395 sizeof (struct sockaddr_dl); 4396 UDP_STAT(us, udp_in_recvslla); 4397 } 4398 4399 if ((udp_bits.udpb_recvucred) && 4400 (cr = DB_CRED(mp)) != NULL) { 4401 udi_size += sizeof (struct T_opthdr) + ucredsize; 4402 cpid = DB_CPID(mp); 4403 UDP_STAT(us, udp_in_recvucred); 4404 } 4405 4406 /* XXX FIXME: apply to AF_INET6 as well */ 4407 /* 4408 * If SO_TIMESTAMP is set allocate the appropriate sized 4409 * buffer. Since gethrestime() expects a pointer aligned 4410 * argument, we allocate space necessary for extra 4411 * alignment (even though it might not be used). 4412 */ 4413 if (udp_bits.udpb_timestamp) { 4414 udi_size += sizeof (struct T_opthdr) + 4415 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4416 UDP_STAT(us, udp_in_timestamp); 4417 } 4418 4419 /* 4420 * If IP_RECVTTL is set allocate the appropriate sized buffer 4421 */ 4422 if (udp_bits.udpb_recvttl) { 4423 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4424 UDP_STAT(us, udp_in_recvttl); 4425 } 4426 4427 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4428 mp1 = allocb(udi_size, BPRI_MED); 4429 if (mp1 == NULL) { 4430 freemsg(mp); 4431 if (options_mp != NULL) 4432 freeb(options_mp); 4433 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4434 return; 4435 } 4436 mp1->b_cont = mp; 4437 mp = mp1; 4438 mp->b_datap->db_type = M_PROTO; 4439 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4440 mp->b_wptr = (uchar_t *)tudi + udi_size; 4441 tudi->PRIM_type = T_UNITDATA_IND; 4442 tudi->SRC_length = sizeof (sin_t); 4443 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4444 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4445 sizeof (sin_t); 4446 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4447 tudi->OPT_length = udi_size; 4448 sin = (sin_t *)&tudi[1]; 4449 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4450 sin->sin_port = udpha->uha_src_port; 4451 sin->sin_family = udp->udp_family; 4452 *(uint32_t *)&sin->sin_zero[0] = 0; 4453 *(uint32_t *)&sin->sin_zero[4] = 0; 4454 4455 /* 4456 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4457 * IP_RECVTTL has been set. 4458 */ 4459 if (udi_size != 0) { 4460 /* 4461 * Copy in destination address before options to avoid 4462 * any padding issues. 4463 */ 4464 char *dstopt; 4465 4466 dstopt = (char *)&sin[1]; 4467 if (udp_bits.udpb_recvdstaddr) { 4468 struct T_opthdr *toh; 4469 ipaddr_t *dstptr; 4470 4471 toh = (struct T_opthdr *)dstopt; 4472 toh->level = IPPROTO_IP; 4473 toh->name = IP_RECVDSTADDR; 4474 toh->len = sizeof (struct T_opthdr) + 4475 sizeof (ipaddr_t); 4476 toh->status = 0; 4477 dstopt += sizeof (struct T_opthdr); 4478 dstptr = (ipaddr_t *)dstopt; 4479 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4480 dstopt += sizeof (ipaddr_t); 4481 udi_size -= toh->len; 4482 } 4483 4484 if (udp_bits.udpb_recvopts && opt_len > 0) { 4485 struct T_opthdr *toh; 4486 4487 toh = (struct T_opthdr *)dstopt; 4488 toh->level = IPPROTO_IP; 4489 toh->name = IP_RECVOPTS; 4490 toh->len = sizeof (struct T_opthdr) + opt_len; 4491 toh->status = 0; 4492 dstopt += sizeof (struct T_opthdr); 4493 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, 4494 opt_len); 4495 dstopt += opt_len; 4496 udi_size -= toh->len; 4497 } 4498 4499 if ((udp_bits.udpb_ip_recvpktinfo) && (pinfo != NULL) && 4500 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4501 struct T_opthdr *toh; 4502 struct in_pktinfo *pktinfop; 4503 4504 toh = (struct T_opthdr *)dstopt; 4505 toh->level = IPPROTO_IP; 4506 toh->name = IP_PKTINFO; 4507 toh->len = sizeof (struct T_opthdr) + 4508 sizeof (*pktinfop); 4509 toh->status = 0; 4510 dstopt += sizeof (struct T_opthdr); 4511 pktinfop = (struct in_pktinfo *)dstopt; 4512 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4513 pktinfop->ipi_spec_dst = 4514 pinfo->ip_pkt_match_addr; 4515 pktinfop->ipi_addr.s_addr = 4516 ((ipha_t *)rptr)->ipha_dst; 4517 4518 dstopt += sizeof (struct in_pktinfo); 4519 udi_size -= toh->len; 4520 } 4521 4522 if ((udp_bits.udpb_recvslla) && (pinfo != NULL) && 4523 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4524 4525 struct T_opthdr *toh; 4526 struct sockaddr_dl *dstptr; 4527 4528 toh = (struct T_opthdr *)dstopt; 4529 toh->level = IPPROTO_IP; 4530 toh->name = IP_RECVSLLA; 4531 toh->len = sizeof (struct T_opthdr) + 4532 sizeof (struct sockaddr_dl); 4533 toh->status = 0; 4534 dstopt += sizeof (struct T_opthdr); 4535 dstptr = (struct sockaddr_dl *)dstopt; 4536 bcopy(&pinfo->ip_pkt_slla, dstptr, 4537 sizeof (struct sockaddr_dl)); 4538 dstopt += sizeof (struct sockaddr_dl); 4539 udi_size -= toh->len; 4540 } 4541 4542 if ((udp_bits.udpb_recvif) && (pinfo != NULL) && 4543 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4544 4545 struct T_opthdr *toh; 4546 uint_t *dstptr; 4547 4548 toh = (struct T_opthdr *)dstopt; 4549 toh->level = IPPROTO_IP; 4550 toh->name = IP_RECVIF; 4551 toh->len = sizeof (struct T_opthdr) + 4552 sizeof (uint_t); 4553 toh->status = 0; 4554 dstopt += sizeof (struct T_opthdr); 4555 dstptr = (uint_t *)dstopt; 4556 *dstptr = pinfo->ip_pkt_ifindex; 4557 dstopt += sizeof (uint_t); 4558 udi_size -= toh->len; 4559 } 4560 4561 if (cr != NULL) { 4562 struct T_opthdr *toh; 4563 4564 toh = (struct T_opthdr *)dstopt; 4565 toh->level = SOL_SOCKET; 4566 toh->name = SCM_UCRED; 4567 toh->len = sizeof (struct T_opthdr) + ucredsize; 4568 toh->status = 0; 4569 dstopt += sizeof (struct T_opthdr); 4570 (void) cred2ucred(cr, cpid, dstopt, rcr); 4571 dstopt += ucredsize; 4572 udi_size -= toh->len; 4573 } 4574 4575 if (udp_bits.udpb_timestamp) { 4576 struct T_opthdr *toh; 4577 4578 toh = (struct T_opthdr *)dstopt; 4579 toh->level = SOL_SOCKET; 4580 toh->name = SCM_TIMESTAMP; 4581 toh->len = sizeof (struct T_opthdr) + 4582 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4583 toh->status = 0; 4584 dstopt += sizeof (struct T_opthdr); 4585 /* Align for gethrestime() */ 4586 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4587 sizeof (intptr_t)); 4588 gethrestime((timestruc_t *)dstopt); 4589 dstopt = (char *)toh + toh->len; 4590 udi_size -= toh->len; 4591 } 4592 4593 /* 4594 * CAUTION: 4595 * Due to aligment issues 4596 * Processing of IP_RECVTTL option 4597 * should always be the last. Adding 4598 * any option processing after this will 4599 * cause alignment panic. 4600 */ 4601 if (udp_bits.udpb_recvttl) { 4602 struct T_opthdr *toh; 4603 uint8_t *dstptr; 4604 4605 toh = (struct T_opthdr *)dstopt; 4606 toh->level = IPPROTO_IP; 4607 toh->name = IP_RECVTTL; 4608 toh->len = sizeof (struct T_opthdr) + 4609 sizeof (uint8_t); 4610 toh->status = 0; 4611 dstopt += sizeof (struct T_opthdr); 4612 dstptr = (uint8_t *)dstopt; 4613 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4614 dstopt += sizeof (uint8_t); 4615 udi_size -= toh->len; 4616 } 4617 4618 /* Consumed all of allocated space */ 4619 ASSERT(udi_size == 0); 4620 } 4621 } else { 4622 sin6_t *sin6; 4623 4624 /* 4625 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4626 * 4627 * Normally we only send up the address. If receiving of any 4628 * optional receive side information is enabled, we also send 4629 * that up as options. 4630 */ 4631 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4632 4633 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4634 IPPF_RTHDR|IPPF_IFINDEX)) { 4635 if ((udp_bits.udpb_ipv6_recvhopopts) && 4636 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4637 size_t hlen; 4638 4639 UDP_STAT(us, udp_in_recvhopopts); 4640 hlen = copy_hop_opts(&ipp, NULL); 4641 if (hlen == 0) 4642 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4643 udi_size += hlen; 4644 } 4645 if (((udp_bits.udpb_ipv6_recvdstopts) || 4646 udp_bits.udpb_old_ipv6_recvdstopts) && 4647 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4648 udi_size += sizeof (struct T_opthdr) + 4649 ipp.ipp_dstoptslen; 4650 UDP_STAT(us, udp_in_recvdstopts); 4651 } 4652 if ((((udp_bits.udpb_ipv6_recvdstopts) && 4653 udp_bits.udpb_ipv6_recvrthdr && 4654 (ipp.ipp_fields & IPPF_RTHDR)) || 4655 (udp_bits.udpb_ipv6_recvrthdrdstopts)) && 4656 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4657 udi_size += sizeof (struct T_opthdr) + 4658 ipp.ipp_rtdstoptslen; 4659 UDP_STAT(us, udp_in_recvrtdstopts); 4660 } 4661 if ((udp_bits.udpb_ipv6_recvrthdr) && 4662 (ipp.ipp_fields & IPPF_RTHDR)) { 4663 udi_size += sizeof (struct T_opthdr) + 4664 ipp.ipp_rthdrlen; 4665 UDP_STAT(us, udp_in_recvrthdr); 4666 } 4667 if ((udp_bits.udpb_ip_recvpktinfo) && 4668 (ipp.ipp_fields & IPPF_IFINDEX)) { 4669 udi_size += sizeof (struct T_opthdr) + 4670 sizeof (struct in6_pktinfo); 4671 UDP_STAT(us, udp_in_recvpktinfo); 4672 } 4673 4674 } 4675 if ((udp_bits.udpb_recvucred) && 4676 (cr = DB_CRED(mp)) != NULL) { 4677 udi_size += sizeof (struct T_opthdr) + ucredsize; 4678 cpid = DB_CPID(mp); 4679 UDP_STAT(us, udp_in_recvucred); 4680 } 4681 4682 /* 4683 * If SO_TIMESTAMP is set allocate the appropriate sized 4684 * buffer. Since gethrestime() expects a pointer aligned 4685 * argument, we allocate space necessary for extra 4686 * alignment (even though it might not be used). 4687 */ 4688 if (udp_bits.udpb_timestamp) { 4689 udi_size += sizeof (struct T_opthdr) + 4690 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4691 UDP_STAT(us, udp_in_timestamp); 4692 } 4693 4694 if (udp_bits.udpb_ipv6_recvhoplimit) { 4695 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4696 UDP_STAT(us, udp_in_recvhoplimit); 4697 } 4698 4699 if (udp_bits.udpb_ipv6_recvtclass) { 4700 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4701 UDP_STAT(us, udp_in_recvtclass); 4702 } 4703 4704 mp1 = allocb(udi_size, BPRI_MED); 4705 if (mp1 == NULL) { 4706 freemsg(mp); 4707 if (options_mp != NULL) 4708 freeb(options_mp); 4709 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4710 return; 4711 } 4712 mp1->b_cont = mp; 4713 mp = mp1; 4714 mp->b_datap->db_type = M_PROTO; 4715 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4716 mp->b_wptr = (uchar_t *)tudi + udi_size; 4717 tudi->PRIM_type = T_UNITDATA_IND; 4718 tudi->SRC_length = sizeof (sin6_t); 4719 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4720 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4721 sizeof (sin6_t); 4722 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4723 tudi->OPT_length = udi_size; 4724 sin6 = (sin6_t *)&tudi[1]; 4725 if (ipversion == IPV4_VERSION) { 4726 in6_addr_t v6dst; 4727 4728 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4729 &sin6->sin6_addr); 4730 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4731 &v6dst); 4732 sin6->sin6_flowinfo = 0; 4733 sin6->sin6_scope_id = 0; 4734 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4735 connp->conn_zoneid, us->us_netstack); 4736 } else { 4737 sin6->sin6_addr = ip6h->ip6_src; 4738 /* No sin6_flowinfo per API */ 4739 sin6->sin6_flowinfo = 0; 4740 /* For link-scope source pass up scope id */ 4741 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4742 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4743 sin6->sin6_scope_id = ipp.ipp_ifindex; 4744 else 4745 sin6->sin6_scope_id = 0; 4746 sin6->__sin6_src_id = ip_srcid_find_addr( 4747 &ip6h->ip6_dst, connp->conn_zoneid, 4748 us->us_netstack); 4749 } 4750 sin6->sin6_port = udpha->uha_src_port; 4751 sin6->sin6_family = udp->udp_family; 4752 4753 if (udi_size != 0) { 4754 uchar_t *dstopt; 4755 4756 dstopt = (uchar_t *)&sin6[1]; 4757 if ((udp_bits.udpb_ip_recvpktinfo) && 4758 (ipp.ipp_fields & IPPF_IFINDEX)) { 4759 struct T_opthdr *toh; 4760 struct in6_pktinfo *pkti; 4761 4762 toh = (struct T_opthdr *)dstopt; 4763 toh->level = IPPROTO_IPV6; 4764 toh->name = IPV6_PKTINFO; 4765 toh->len = sizeof (struct T_opthdr) + 4766 sizeof (*pkti); 4767 toh->status = 0; 4768 dstopt += sizeof (struct T_opthdr); 4769 pkti = (struct in6_pktinfo *)dstopt; 4770 if (ipversion == IPV6_VERSION) 4771 pkti->ipi6_addr = ip6h->ip6_dst; 4772 else 4773 IN6_IPADDR_TO_V4MAPPED( 4774 ((ipha_t *)rptr)->ipha_dst, 4775 &pkti->ipi6_addr); 4776 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4777 dstopt += sizeof (*pkti); 4778 udi_size -= toh->len; 4779 } 4780 if (udp_bits.udpb_ipv6_recvhoplimit) { 4781 struct T_opthdr *toh; 4782 4783 toh = (struct T_opthdr *)dstopt; 4784 toh->level = IPPROTO_IPV6; 4785 toh->name = IPV6_HOPLIMIT; 4786 toh->len = sizeof (struct T_opthdr) + 4787 sizeof (uint_t); 4788 toh->status = 0; 4789 dstopt += sizeof (struct T_opthdr); 4790 if (ipversion == IPV6_VERSION) 4791 *(uint_t *)dstopt = ip6h->ip6_hops; 4792 else 4793 *(uint_t *)dstopt = 4794 ((ipha_t *)rptr)->ipha_ttl; 4795 dstopt += sizeof (uint_t); 4796 udi_size -= toh->len; 4797 } 4798 if (udp_bits.udpb_ipv6_recvtclass) { 4799 struct T_opthdr *toh; 4800 4801 toh = (struct T_opthdr *)dstopt; 4802 toh->level = IPPROTO_IPV6; 4803 toh->name = IPV6_TCLASS; 4804 toh->len = sizeof (struct T_opthdr) + 4805 sizeof (uint_t); 4806 toh->status = 0; 4807 dstopt += sizeof (struct T_opthdr); 4808 if (ipversion == IPV6_VERSION) { 4809 *(uint_t *)dstopt = 4810 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4811 } else { 4812 ipha_t *ipha = (ipha_t *)rptr; 4813 *(uint_t *)dstopt = 4814 ipha->ipha_type_of_service; 4815 } 4816 dstopt += sizeof (uint_t); 4817 udi_size -= toh->len; 4818 } 4819 if ((udp_bits.udpb_ipv6_recvhopopts) && 4820 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4821 size_t hlen; 4822 4823 hlen = copy_hop_opts(&ipp, dstopt); 4824 dstopt += hlen; 4825 udi_size -= hlen; 4826 } 4827 if ((udp_bits.udpb_ipv6_recvdstopts) && 4828 (udp_bits.udpb_ipv6_recvrthdr) && 4829 (ipp.ipp_fields & IPPF_RTHDR) && 4830 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4831 struct T_opthdr *toh; 4832 4833 toh = (struct T_opthdr *)dstopt; 4834 toh->level = IPPROTO_IPV6; 4835 toh->name = IPV6_DSTOPTS; 4836 toh->len = sizeof (struct T_opthdr) + 4837 ipp.ipp_rtdstoptslen; 4838 toh->status = 0; 4839 dstopt += sizeof (struct T_opthdr); 4840 bcopy(ipp.ipp_rtdstopts, dstopt, 4841 ipp.ipp_rtdstoptslen); 4842 dstopt += ipp.ipp_rtdstoptslen; 4843 udi_size -= toh->len; 4844 } 4845 if ((udp_bits.udpb_ipv6_recvrthdr) && 4846 (ipp.ipp_fields & IPPF_RTHDR)) { 4847 struct T_opthdr *toh; 4848 4849 toh = (struct T_opthdr *)dstopt; 4850 toh->level = IPPROTO_IPV6; 4851 toh->name = IPV6_RTHDR; 4852 toh->len = sizeof (struct T_opthdr) + 4853 ipp.ipp_rthdrlen; 4854 toh->status = 0; 4855 dstopt += sizeof (struct T_opthdr); 4856 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4857 dstopt += ipp.ipp_rthdrlen; 4858 udi_size -= toh->len; 4859 } 4860 if ((udp_bits.udpb_ipv6_recvdstopts) && 4861 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4862 struct T_opthdr *toh; 4863 4864 toh = (struct T_opthdr *)dstopt; 4865 toh->level = IPPROTO_IPV6; 4866 toh->name = IPV6_DSTOPTS; 4867 toh->len = sizeof (struct T_opthdr) + 4868 ipp.ipp_dstoptslen; 4869 toh->status = 0; 4870 dstopt += sizeof (struct T_opthdr); 4871 bcopy(ipp.ipp_dstopts, dstopt, 4872 ipp.ipp_dstoptslen); 4873 dstopt += ipp.ipp_dstoptslen; 4874 udi_size -= toh->len; 4875 } 4876 4877 if (cr != NULL) { 4878 struct T_opthdr *toh; 4879 4880 toh = (struct T_opthdr *)dstopt; 4881 toh->level = SOL_SOCKET; 4882 toh->name = SCM_UCRED; 4883 toh->len = sizeof (struct T_opthdr) + ucredsize; 4884 toh->status = 0; 4885 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4886 dstopt += toh->len; 4887 udi_size -= toh->len; 4888 } 4889 if (udp_bits.udpb_timestamp) { 4890 struct T_opthdr *toh; 4891 4892 toh = (struct T_opthdr *)dstopt; 4893 toh->level = SOL_SOCKET; 4894 toh->name = SCM_TIMESTAMP; 4895 toh->len = sizeof (struct T_opthdr) + 4896 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4897 toh->status = 0; 4898 dstopt += sizeof (struct T_opthdr); 4899 /* Align for gethrestime() */ 4900 dstopt = (uchar_t *)P2ROUNDUP((intptr_t)dstopt, 4901 sizeof (intptr_t)); 4902 gethrestime((timestruc_t *)dstopt); 4903 dstopt = (uchar_t *)toh + toh->len; 4904 udi_size -= toh->len; 4905 } 4906 4907 /* Consumed all of allocated space */ 4908 ASSERT(udi_size == 0); 4909 } 4910 #undef sin6 4911 /* No IP_RECVDSTADDR for IPv6. */ 4912 } 4913 4914 BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams); 4915 if (options_mp != NULL) 4916 freeb(options_mp); 4917 4918 if (udp_bits.udpb_direct_sockfs) { 4919 /* 4920 * There is nothing above us except for the stream head; 4921 * use the read-side synchronous stream interface in 4922 * order to reduce the time spent in interrupt thread. 4923 */ 4924 ASSERT(udp->udp_issocket); 4925 udp_rcv_enqueue(connp->conn_rq, udp, mp, mp_len); 4926 } else { 4927 /* 4928 * Use regular STREAMS interface to pass data upstream 4929 * if this is not a socket endpoint, or if we have 4930 * switched over to the slow mode due to sockmod being 4931 * popped or a module being pushed on top of us. 4932 */ 4933 putnext(connp->conn_rq, mp); 4934 } 4935 return; 4936 4937 tossit: 4938 freemsg(mp); 4939 if (options_mp != NULL) 4940 freeb(options_mp); 4941 BUMP_MIB(&us->us_udp_mib, udpInErrors); 4942 } 4943 4944 /* 4945 * Handle the results of a T_BIND_REQ whether deferred by IP or handled 4946 * immediately. 4947 */ 4948 static void 4949 udp_bind_result(conn_t *connp, mblk_t *mp) 4950 { 4951 struct T_error_ack *tea; 4952 4953 switch (mp->b_datap->db_type) { 4954 case M_PROTO: 4955 case M_PCPROTO: 4956 /* M_PROTO messages contain some type of TPI message. */ 4957 ASSERT((uintptr_t)(mp->b_wptr - mp->b_rptr) <= 4958 (uintptr_t)INT_MAX); 4959 if (mp->b_wptr - mp->b_rptr < sizeof (t_scalar_t)) { 4960 freemsg(mp); 4961 return; 4962 } 4963 tea = (struct T_error_ack *)mp->b_rptr; 4964 4965 switch (tea->PRIM_type) { 4966 case T_ERROR_ACK: 4967 switch (tea->ERROR_prim) { 4968 case O_T_BIND_REQ: 4969 case T_BIND_REQ: 4970 udp_bind_error(connp, mp); 4971 return; 4972 default: 4973 break; 4974 } 4975 ASSERT(0); 4976 freemsg(mp); 4977 return; 4978 4979 case T_BIND_ACK: 4980 udp_bind_ack(connp, mp); 4981 return; 4982 4983 default: 4984 break; 4985 } 4986 freemsg(mp); 4987 return; 4988 default: 4989 /* FIXME: other cases? */ 4990 ASSERT(0); 4991 freemsg(mp); 4992 return; 4993 } 4994 } 4995 4996 /* 4997 * Process a T_BIND_ACK 4998 */ 4999 static void 5000 udp_bind_ack(conn_t *connp, mblk_t *mp) 5001 { 5002 udp_t *udp = connp->conn_udp; 5003 mblk_t *mp1; 5004 ire_t *ire; 5005 struct T_bind_ack *tba; 5006 uchar_t *addrp; 5007 ipa_conn_t *ac; 5008 ipa6_conn_t *ac6; 5009 udp_fanout_t *udpf; 5010 udp_stack_t *us = udp->udp_us; 5011 5012 ASSERT(udp->udp_pending_op != -1); 5013 rw_enter(&udp->udp_rwlock, RW_WRITER); 5014 /* 5015 * If a broadcast/multicast address was bound set 5016 * the source address to 0. 5017 * This ensures no datagrams with broadcast address 5018 * as source address are emitted (which would violate 5019 * RFC1122 - Hosts requirements) 5020 * 5021 * Note that when connecting the returned IRE is 5022 * for the destination address and we only perform 5023 * the broadcast check for the source address (it 5024 * is OK to connect to a broadcast/multicast address.) 5025 */ 5026 mp1 = mp->b_cont; 5027 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5028 ire = (ire_t *)mp1->b_rptr; 5029 5030 /* 5031 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5032 * local address. 5033 */ 5034 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5035 us->us_bind_fanout_size)]; 5036 if (ire->ire_type == IRE_BROADCAST && 5037 udp->udp_state != TS_DATA_XFER) { 5038 ASSERT(udp->udp_pending_op == T_BIND_REQ || 5039 udp->udp_pending_op == O_T_BIND_REQ); 5040 /* This was just a local bind to a broadcast addr */ 5041 mutex_enter(&udpf->uf_lock); 5042 V6_SET_ZERO(udp->udp_v6src); 5043 mutex_exit(&udpf->uf_lock); 5044 if (udp->udp_family == AF_INET6) 5045 (void) udp_build_hdrs(udp); 5046 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5047 /* 5048 * Local address not yet set - pick it from the 5049 * T_bind_ack 5050 */ 5051 tba = (struct T_bind_ack *)mp->b_rptr; 5052 addrp = &mp->b_rptr[tba->ADDR_offset]; 5053 switch (udp->udp_family) { 5054 case AF_INET: 5055 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5056 ac = (ipa_conn_t *)addrp; 5057 } else { 5058 ASSERT(tba->ADDR_length == 5059 sizeof (ipa_conn_x_t)); 5060 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5061 } 5062 mutex_enter(&udpf->uf_lock); 5063 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5064 &udp->udp_v6src); 5065 mutex_exit(&udpf->uf_lock); 5066 break; 5067 case AF_INET6: 5068 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5069 ac6 = (ipa6_conn_t *)addrp; 5070 } else { 5071 ASSERT(tba->ADDR_length == 5072 sizeof (ipa6_conn_x_t)); 5073 ac6 = &((ipa6_conn_x_t *) 5074 addrp)->ac6x_conn; 5075 } 5076 mutex_enter(&udpf->uf_lock); 5077 udp->udp_v6src = ac6->ac6_laddr; 5078 mutex_exit(&udpf->uf_lock); 5079 (void) udp_build_hdrs(udp); 5080 break; 5081 } 5082 } 5083 mp1 = mp1->b_cont; 5084 } 5085 udp->udp_pending_op = -1; 5086 rw_exit(&udp->udp_rwlock); 5087 /* 5088 * Look for one or more appended ACK message added by 5089 * udp_connect or udp_disconnect. 5090 * If none found just send up the T_BIND_ACK. 5091 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5092 * udp_disconnect has appended a T_OK_ACK. 5093 */ 5094 if (mp1 != NULL) { 5095 if (mp->b_cont == mp1) 5096 mp->b_cont = NULL; 5097 else { 5098 ASSERT(mp->b_cont->b_cont == mp1); 5099 mp->b_cont->b_cont = NULL; 5100 } 5101 freemsg(mp); 5102 mp = mp1; 5103 while (mp != NULL) { 5104 mp1 = mp->b_cont; 5105 mp->b_cont = NULL; 5106 putnext(connp->conn_rq, mp); 5107 mp = mp1; 5108 } 5109 return; 5110 } 5111 freemsg(mp->b_cont); 5112 mp->b_cont = NULL; 5113 putnext(connp->conn_rq, mp); 5114 } 5115 5116 static void 5117 udp_bind_error(conn_t *connp, mblk_t *mp) 5118 { 5119 udp_t *udp = connp->conn_udp; 5120 struct T_error_ack *tea; 5121 udp_fanout_t *udpf; 5122 udp_stack_t *us = udp->udp_us; 5123 5124 tea = (struct T_error_ack *)mp->b_rptr; 5125 5126 /* 5127 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5128 * clear out the associated port and source 5129 * address before passing the message 5130 * upstream. If this was caused by a T_CONN_REQ 5131 * revert back to bound state. 5132 */ 5133 5134 rw_enter(&udp->udp_rwlock, RW_WRITER); 5135 ASSERT(udp->udp_pending_op != -1); 5136 tea->ERROR_prim = udp->udp_pending_op; 5137 udp->udp_pending_op = -1; 5138 udpf = &us->us_bind_fanout[ 5139 UDP_BIND_HASH(udp->udp_port, 5140 us->us_bind_fanout_size)]; 5141 mutex_enter(&udpf->uf_lock); 5142 5143 switch (tea->ERROR_prim) { 5144 case T_CONN_REQ: 5145 ASSERT(udp->udp_state == TS_DATA_XFER); 5146 /* Connect failed */ 5147 /* Revert back to the bound source */ 5148 udp->udp_v6src = udp->udp_bound_v6src; 5149 udp->udp_state = TS_IDLE; 5150 mutex_exit(&udpf->uf_lock); 5151 if (udp->udp_family == AF_INET6) 5152 (void) udp_build_hdrs(udp); 5153 rw_exit(&udp->udp_rwlock); 5154 break; 5155 5156 case T_DISCON_REQ: 5157 case T_BIND_REQ: 5158 case O_T_BIND_REQ: 5159 V6_SET_ZERO(udp->udp_v6src); 5160 V6_SET_ZERO(udp->udp_bound_v6src); 5161 udp->udp_state = TS_UNBND; 5162 udp_bind_hash_remove(udp, B_TRUE); 5163 udp->udp_port = 0; 5164 mutex_exit(&udpf->uf_lock); 5165 if (udp->udp_family == AF_INET6) 5166 (void) udp_build_hdrs(udp); 5167 rw_exit(&udp->udp_rwlock); 5168 break; 5169 5170 default: 5171 mutex_exit(&udpf->uf_lock); 5172 rw_exit(&udp->udp_rwlock); 5173 (void) mi_strlog(connp->conn_rq, 1, 5174 SL_ERROR|SL_TRACE, 5175 "udp_input_other: bad ERROR_prim, " 5176 "len %d", tea->ERROR_prim); 5177 } 5178 putnext(connp->conn_rq, mp); 5179 } 5180 5181 /* 5182 * return SNMP stuff in buffer in mpdata. We don't hold any lock and report 5183 * information that can be changing beneath us. 5184 */ 5185 mblk_t * 5186 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5187 { 5188 mblk_t *mpdata; 5189 mblk_t *mp_conn_ctl; 5190 mblk_t *mp_attr_ctl; 5191 mblk_t *mp6_conn_ctl; 5192 mblk_t *mp6_attr_ctl; 5193 mblk_t *mp_conn_tail; 5194 mblk_t *mp_attr_tail; 5195 mblk_t *mp6_conn_tail; 5196 mblk_t *mp6_attr_tail; 5197 struct opthdr *optp; 5198 mib2_udpEntry_t ude; 5199 mib2_udp6Entry_t ude6; 5200 mib2_transportMLPEntry_t mlp; 5201 int state; 5202 zoneid_t zoneid; 5203 int i; 5204 connf_t *connfp; 5205 conn_t *connp = Q_TO_CONN(q); 5206 int v4_conn_idx; 5207 int v6_conn_idx; 5208 boolean_t needattr; 5209 udp_t *udp; 5210 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5211 udp_stack_t *us = connp->conn_netstack->netstack_udp; 5212 mblk_t *mp2ctl; 5213 5214 /* 5215 * make a copy of the original message 5216 */ 5217 mp2ctl = copymsg(mpctl); 5218 5219 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5220 if (mpctl == NULL || 5221 (mpdata = mpctl->b_cont) == NULL || 5222 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5223 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5224 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5225 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5226 freemsg(mp_conn_ctl); 5227 freemsg(mp_attr_ctl); 5228 freemsg(mp6_conn_ctl); 5229 freemsg(mpctl); 5230 freemsg(mp2ctl); 5231 return (0); 5232 } 5233 5234 zoneid = connp->conn_zoneid; 5235 5236 /* fixed length structure for IPv4 and IPv6 counters */ 5237 SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5238 SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5239 /* synchronize 64- and 32-bit counters */ 5240 SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams); 5241 SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5242 5243 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5244 optp->level = MIB2_UDP; 5245 optp->name = 0; 5246 (void) snmp_append_data(mpdata, (char *)&us->us_udp_mib, 5247 sizeof (us->us_udp_mib)); 5248 optp->len = msgdsize(mpdata); 5249 qreply(q, mpctl); 5250 5251 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5252 v4_conn_idx = v6_conn_idx = 0; 5253 5254 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5255 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5256 connp = NULL; 5257 5258 while ((connp = ipcl_get_next_conn(connfp, connp, 5259 IPCL_UDPCONN))) { 5260 udp = connp->conn_udp; 5261 if (zoneid != connp->conn_zoneid) 5262 continue; 5263 5264 /* 5265 * Note that the port numbers are sent in 5266 * host byte order 5267 */ 5268 5269 if (udp->udp_state == TS_UNBND) 5270 state = MIB2_UDP_unbound; 5271 else if (udp->udp_state == TS_IDLE) 5272 state = MIB2_UDP_idle; 5273 else if (udp->udp_state == TS_DATA_XFER) 5274 state = MIB2_UDP_connected; 5275 else 5276 state = MIB2_UDP_unknown; 5277 5278 needattr = B_FALSE; 5279 bzero(&mlp, sizeof (mlp)); 5280 if (connp->conn_mlp_type != mlptSingle) { 5281 if (connp->conn_mlp_type == mlptShared || 5282 connp->conn_mlp_type == mlptBoth) 5283 mlp.tme_flags |= MIB2_TMEF_SHARED; 5284 if (connp->conn_mlp_type == mlptPrivate || 5285 connp->conn_mlp_type == mlptBoth) 5286 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5287 needattr = B_TRUE; 5288 } 5289 5290 /* 5291 * Create an IPv4 table entry for IPv4 entries and also 5292 * any IPv6 entries which are bound to in6addr_any 5293 * (i.e. anything a IPv4 peer could connect/send to). 5294 */ 5295 if (udp->udp_ipversion == IPV4_VERSION || 5296 (udp->udp_state <= TS_IDLE && 5297 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5298 ude.udpEntryInfo.ue_state = state; 5299 /* 5300 * If in6addr_any this will set it to 5301 * INADDR_ANY 5302 */ 5303 ude.udpLocalAddress = 5304 V4_PART_OF_V6(udp->udp_v6src); 5305 ude.udpLocalPort = ntohs(udp->udp_port); 5306 if (udp->udp_state == TS_DATA_XFER) { 5307 /* 5308 * Can potentially get here for 5309 * v6 socket if another process 5310 * (say, ping) has just done a 5311 * sendto(), changing the state 5312 * from the TS_IDLE above to 5313 * TS_DATA_XFER by the time we hit 5314 * this part of the code. 5315 */ 5316 ude.udpEntryInfo.ue_RemoteAddress = 5317 V4_PART_OF_V6(udp->udp_v6dst); 5318 ude.udpEntryInfo.ue_RemotePort = 5319 ntohs(udp->udp_dstport); 5320 } else { 5321 ude.udpEntryInfo.ue_RemoteAddress = 0; 5322 ude.udpEntryInfo.ue_RemotePort = 0; 5323 } 5324 5325 /* 5326 * We make the assumption that all udp_t 5327 * structs will be created within an address 5328 * region no larger than 32-bits. 5329 */ 5330 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5331 ude.udpCreationProcess = 5332 (udp->udp_open_pid < 0) ? 5333 MIB2_UNKNOWN_PROCESS : 5334 udp->udp_open_pid; 5335 ude.udpCreationTime = udp->udp_open_time; 5336 5337 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5338 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5339 mlp.tme_connidx = v4_conn_idx++; 5340 if (needattr) 5341 (void) snmp_append_data2( 5342 mp_attr_ctl->b_cont, &mp_attr_tail, 5343 (char *)&mlp, sizeof (mlp)); 5344 } 5345 if (udp->udp_ipversion == IPV6_VERSION) { 5346 ude6.udp6EntryInfo.ue_state = state; 5347 ude6.udp6LocalAddress = udp->udp_v6src; 5348 ude6.udp6LocalPort = ntohs(udp->udp_port); 5349 ude6.udp6IfIndex = udp->udp_bound_if; 5350 if (udp->udp_state == TS_DATA_XFER) { 5351 ude6.udp6EntryInfo.ue_RemoteAddress = 5352 udp->udp_v6dst; 5353 ude6.udp6EntryInfo.ue_RemotePort = 5354 ntohs(udp->udp_dstport); 5355 } else { 5356 ude6.udp6EntryInfo.ue_RemoteAddress = 5357 sin6_null.sin6_addr; 5358 ude6.udp6EntryInfo.ue_RemotePort = 0; 5359 } 5360 /* 5361 * We make the assumption that all udp_t 5362 * structs will be created within an address 5363 * region no larger than 32-bits. 5364 */ 5365 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5366 ude6.udp6CreationProcess = 5367 (udp->udp_open_pid < 0) ? 5368 MIB2_UNKNOWN_PROCESS : 5369 udp->udp_open_pid; 5370 ude6.udp6CreationTime = udp->udp_open_time; 5371 5372 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5373 &mp6_conn_tail, (char *)&ude6, 5374 sizeof (ude6)); 5375 mlp.tme_connidx = v6_conn_idx++; 5376 if (needattr) 5377 (void) snmp_append_data2( 5378 mp6_attr_ctl->b_cont, 5379 &mp6_attr_tail, (char *)&mlp, 5380 sizeof (mlp)); 5381 } 5382 } 5383 } 5384 5385 /* IPv4 UDP endpoints */ 5386 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5387 sizeof (struct T_optmgmt_ack)]; 5388 optp->level = MIB2_UDP; 5389 optp->name = MIB2_UDP_ENTRY; 5390 optp->len = msgdsize(mp_conn_ctl->b_cont); 5391 qreply(q, mp_conn_ctl); 5392 5393 /* table of MLP attributes... */ 5394 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5395 sizeof (struct T_optmgmt_ack)]; 5396 optp->level = MIB2_UDP; 5397 optp->name = EXPER_XPORT_MLP; 5398 optp->len = msgdsize(mp_attr_ctl->b_cont); 5399 if (optp->len == 0) 5400 freemsg(mp_attr_ctl); 5401 else 5402 qreply(q, mp_attr_ctl); 5403 5404 /* IPv6 UDP endpoints */ 5405 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5406 sizeof (struct T_optmgmt_ack)]; 5407 optp->level = MIB2_UDP6; 5408 optp->name = MIB2_UDP6_ENTRY; 5409 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5410 qreply(q, mp6_conn_ctl); 5411 5412 /* table of MLP attributes... */ 5413 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5414 sizeof (struct T_optmgmt_ack)]; 5415 optp->level = MIB2_UDP6; 5416 optp->name = EXPER_XPORT_MLP; 5417 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5418 if (optp->len == 0) 5419 freemsg(mp6_attr_ctl); 5420 else 5421 qreply(q, mp6_attr_ctl); 5422 5423 return (mp2ctl); 5424 } 5425 5426 /* 5427 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5428 * NOTE: Per MIB-II, UDP has no writable data. 5429 * TODO: If this ever actually tries to set anything, it needs to be 5430 * to do the appropriate locking. 5431 */ 5432 /* ARGSUSED */ 5433 int 5434 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5435 uchar_t *ptr, int len) 5436 { 5437 switch (level) { 5438 case MIB2_UDP: 5439 return (0); 5440 default: 5441 return (1); 5442 } 5443 } 5444 5445 static void 5446 udp_report_item(mblk_t *mp, udp_t *udp) 5447 { 5448 char *state; 5449 char addrbuf1[INET6_ADDRSTRLEN]; 5450 char addrbuf2[INET6_ADDRSTRLEN]; 5451 uint_t print_len, buf_len; 5452 5453 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5454 ASSERT(buf_len >= 0); 5455 if (buf_len == 0) 5456 return; 5457 5458 if (udp->udp_state == TS_UNBND) 5459 state = "UNBOUND"; 5460 else if (udp->udp_state == TS_IDLE) 5461 state = "IDLE"; 5462 else if (udp->udp_state == TS_DATA_XFER) 5463 state = "CONNECTED"; 5464 else 5465 state = "UnkState"; 5466 print_len = snprintf((char *)mp->b_wptr, buf_len, 5467 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5468 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5469 inet_ntop(AF_INET6, &udp->udp_v6src, addrbuf1, sizeof (addrbuf1)), 5470 inet_ntop(AF_INET6, &udp->udp_v6dst, addrbuf2, sizeof (addrbuf2)), 5471 ntohs(udp->udp_dstport), state); 5472 if (print_len < buf_len) { 5473 mp->b_wptr += print_len; 5474 } else { 5475 mp->b_wptr += buf_len; 5476 } 5477 } 5478 5479 /* Report for ndd "udp_status" */ 5480 /* ARGSUSED */ 5481 static int 5482 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5483 { 5484 zoneid_t zoneid; 5485 connf_t *connfp; 5486 conn_t *connp = Q_TO_CONN(q); 5487 udp_t *udp = connp->conn_udp; 5488 int i; 5489 udp_stack_t *us = udp->udp_us; 5490 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5491 5492 /* 5493 * Because of the ndd constraint, at most we can have 64K buffer 5494 * to put in all UDP info. So to be more efficient, just 5495 * allocate a 64K buffer here, assuming we need that large buffer. 5496 * This may be a problem as any user can read udp_status. Therefore 5497 * we limit the rate of doing this using us_ndd_get_info_interval. 5498 * This should be OK as normal users should not do this too often. 5499 */ 5500 if (cr == NULL || secpolicy_ip_config(cr, B_TRUE) != 0) { 5501 if (ddi_get_lbolt() - us->us_last_ndd_get_info_time < 5502 drv_usectohz(us->us_ndd_get_info_interval * 1000)) { 5503 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5504 return (0); 5505 } 5506 } 5507 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5508 /* The following may work even if we cannot get a large buf. */ 5509 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5510 return (0); 5511 } 5512 (void) mi_mpprintf(mp, 5513 "UDP " MI_COL_HDRPAD_STR 5514 /* 12345678[89ABCDEF] */ 5515 " zone lport src addr dest addr port state"); 5516 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5517 5518 zoneid = connp->conn_zoneid; 5519 5520 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5521 connfp = &ipst->ips_ipcl_globalhash_fanout[i]; 5522 connp = NULL; 5523 5524 while ((connp = ipcl_get_next_conn(connfp, connp, 5525 IPCL_UDPCONN))) { 5526 udp = connp->conn_udp; 5527 if (zoneid != GLOBAL_ZONEID && 5528 zoneid != connp->conn_zoneid) 5529 continue; 5530 5531 udp_report_item(mp->b_cont, udp); 5532 } 5533 } 5534 us->us_last_ndd_get_info_time = ddi_get_lbolt(); 5535 return (0); 5536 } 5537 5538 /* 5539 * This routine creates a T_UDERROR_IND message and passes it upstream. 5540 * The address and options are copied from the T_UNITDATA_REQ message 5541 * passed in mp. This message is freed. 5542 */ 5543 static void 5544 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5545 t_scalar_t err) 5546 { 5547 struct T_unitdata_req *tudr; 5548 mblk_t *mp1; 5549 uchar_t *optaddr; 5550 t_scalar_t optlen; 5551 5552 if (DB_TYPE(mp) == M_DATA) { 5553 ASSERT(destaddr != NULL && destlen != 0); 5554 optaddr = NULL; 5555 optlen = 0; 5556 } else { 5557 if ((mp->b_wptr < mp->b_rptr) || 5558 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5559 goto done; 5560 } 5561 tudr = (struct T_unitdata_req *)mp->b_rptr; 5562 destaddr = mp->b_rptr + tudr->DEST_offset; 5563 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5564 destaddr + tudr->DEST_length < mp->b_rptr || 5565 destaddr + tudr->DEST_length > mp->b_wptr) { 5566 goto done; 5567 } 5568 optaddr = mp->b_rptr + tudr->OPT_offset; 5569 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5570 optaddr + tudr->OPT_length < mp->b_rptr || 5571 optaddr + tudr->OPT_length > mp->b_wptr) { 5572 goto done; 5573 } 5574 destlen = tudr->DEST_length; 5575 optlen = tudr->OPT_length; 5576 } 5577 5578 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5579 (char *)optaddr, optlen, err); 5580 if (mp1 != NULL) 5581 qreply(q, mp1); 5582 5583 done: 5584 freemsg(mp); 5585 } 5586 5587 /* 5588 * This routine removes a port number association from a stream. It 5589 * is called by udp_wput to handle T_UNBIND_REQ messages. 5590 */ 5591 static void 5592 udp_unbind(queue_t *q, mblk_t *mp) 5593 { 5594 udp_t *udp = Q_TO_UDP(q); 5595 udp_fanout_t *udpf; 5596 udp_stack_t *us = udp->udp_us; 5597 5598 if (cl_inet_unbind != NULL) { 5599 /* 5600 * Running in cluster mode - register unbind information 5601 */ 5602 if (udp->udp_ipversion == IPV4_VERSION) { 5603 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5604 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5605 (in_port_t)udp->udp_port); 5606 } else { 5607 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5608 (uint8_t *)&(udp->udp_v6src), 5609 (in_port_t)udp->udp_port); 5610 } 5611 } 5612 5613 rw_enter(&udp->udp_rwlock, RW_WRITER); 5614 if (udp->udp_state == TS_UNBND || udp->udp_pending_op != -1) { 5615 rw_exit(&udp->udp_rwlock); 5616 udp_err_ack(q, mp, TOUTSTATE, 0); 5617 return; 5618 } 5619 udp->udp_pending_op = T_UNBIND_REQ; 5620 rw_exit(&udp->udp_rwlock); 5621 5622 /* 5623 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5624 * and therefore ip_unbind must never return NULL. 5625 */ 5626 mp = ip_unbind(q, mp); 5627 ASSERT(mp != NULL); 5628 ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK); 5629 5630 /* 5631 * Once we're unbound from IP, the pending operation may be cleared 5632 * here. 5633 */ 5634 rw_enter(&udp->udp_rwlock, RW_WRITER); 5635 udpf = &us->us_bind_fanout[UDP_BIND_HASH(udp->udp_port, 5636 us->us_bind_fanout_size)]; 5637 mutex_enter(&udpf->uf_lock); 5638 udp_bind_hash_remove(udp, B_TRUE); 5639 V6_SET_ZERO(udp->udp_v6src); 5640 V6_SET_ZERO(udp->udp_bound_v6src); 5641 udp->udp_port = 0; 5642 mutex_exit(&udpf->uf_lock); 5643 5644 udp->udp_pending_op = -1; 5645 udp->udp_state = TS_UNBND; 5646 if (udp->udp_family == AF_INET6) 5647 (void) udp_build_hdrs(udp); 5648 rw_exit(&udp->udp_rwlock); 5649 5650 qreply(q, mp); 5651 } 5652 5653 /* 5654 * Don't let port fall into the privileged range. 5655 * Since the extra privileged ports can be arbitrary we also 5656 * ensure that we exclude those from consideration. 5657 * us->us_epriv_ports is not sorted thus we loop over it until 5658 * there are no changes. 5659 */ 5660 static in_port_t 5661 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 5662 { 5663 int i; 5664 in_port_t nextport; 5665 boolean_t restart = B_FALSE; 5666 udp_stack_t *us = udp->udp_us; 5667 5668 if (random && udp_random_anon_port != 0) { 5669 (void) random_get_pseudo_bytes((uint8_t *)&port, 5670 sizeof (in_port_t)); 5671 /* 5672 * Unless changed by a sys admin, the smallest anon port 5673 * is 32768 and the largest anon port is 65535. It is 5674 * very likely (50%) for the random port to be smaller 5675 * than the smallest anon port. When that happens, 5676 * add port % (anon port range) to the smallest anon 5677 * port to get the random port. It should fall into the 5678 * valid anon port range. 5679 */ 5680 if (port < us->us_smallest_anon_port) { 5681 port = us->us_smallest_anon_port + 5682 port % (us->us_largest_anon_port - 5683 us->us_smallest_anon_port); 5684 } 5685 } 5686 5687 retry: 5688 if (port < us->us_smallest_anon_port) 5689 port = us->us_smallest_anon_port; 5690 5691 if (port > us->us_largest_anon_port) { 5692 port = us->us_smallest_anon_port; 5693 if (restart) 5694 return (0); 5695 restart = B_TRUE; 5696 } 5697 5698 if (port < us->us_smallest_nonpriv_port) 5699 port = us->us_smallest_nonpriv_port; 5700 5701 for (i = 0; i < us->us_num_epriv_ports; i++) { 5702 if (port == us->us_epriv_ports[i]) { 5703 port++; 5704 /* 5705 * Make sure that the port is in the 5706 * valid range. 5707 */ 5708 goto retry; 5709 } 5710 } 5711 5712 if (is_system_labeled() && 5713 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 5714 port, IPPROTO_UDP, B_TRUE)) != 0) { 5715 port = nextport; 5716 goto retry; 5717 } 5718 5719 return (port); 5720 } 5721 5722 static int 5723 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 5724 { 5725 int err; 5726 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 5727 udp_t *udp = Q_TO_UDP(wq); 5728 udp_stack_t *us = udp->udp_us; 5729 5730 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 5731 opt_storage, udp->udp_connp->conn_mac_exempt, 5732 us->us_netstack->netstack_ip); 5733 if (err == 0) { 5734 err = tsol_update_options(&udp->udp_ip_snd_options, 5735 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 5736 opt_storage); 5737 } 5738 if (err != 0) { 5739 DTRACE_PROBE4( 5740 tx__ip__log__info__updatelabel__udp, 5741 char *, "queue(1) failed to update options(2) on mp(3)", 5742 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 5743 } else { 5744 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 5745 } 5746 return (err); 5747 } 5748 5749 static mblk_t * 5750 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5751 uint_t srcid, int *error, boolean_t insert_spi) 5752 { 5753 udp_t *udp = connp->conn_udp; 5754 queue_t *q = connp->conn_wq; 5755 mblk_t *mp1 = mp; 5756 mblk_t *mp2; 5757 ipha_t *ipha; 5758 int ip_hdr_length; 5759 uint32_t ip_len; 5760 udpha_t *udpha; 5761 boolean_t lock_held = B_FALSE; 5762 in_port_t uha_src_port; 5763 udpattrs_t attrs; 5764 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 5765 uint32_t ip_snd_opt_len = 0; 5766 ip4_pkt_t pktinfo; 5767 ip4_pkt_t *pktinfop = &pktinfo; 5768 ip_opt_info_t optinfo; 5769 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 5770 udp_stack_t *us = udp->udp_us; 5771 ipsec_stack_t *ipss = ipst->ips_netstack->netstack_ipsec; 5772 5773 5774 *error = 0; 5775 pktinfop->ip4_ill_index = 0; 5776 pktinfop->ip4_addr = INADDR_ANY; 5777 optinfo.ip_opt_flags = 0; 5778 optinfo.ip_opt_ill_index = 0; 5779 5780 if (v4dst == INADDR_ANY) 5781 v4dst = htonl(INADDR_LOOPBACK); 5782 5783 /* 5784 * If options passed in, feed it for verification and handling 5785 */ 5786 attrs.udpattr_credset = B_FALSE; 5787 if (DB_TYPE(mp) != M_DATA) { 5788 mp1 = mp->b_cont; 5789 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 5790 attrs.udpattr_ipp4 = pktinfop; 5791 attrs.udpattr_mb = mp; 5792 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 5793 goto done; 5794 /* 5795 * Note: success in processing options. 5796 * mp option buffer represented by 5797 * OPT_length/offset now potentially modified 5798 * and contain option setting results 5799 */ 5800 ASSERT(*error == 0); 5801 } 5802 } 5803 5804 /* mp1 points to the M_DATA mblk carrying the packet */ 5805 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5806 5807 rw_enter(&udp->udp_rwlock, RW_READER); 5808 lock_held = B_TRUE; 5809 /* 5810 * Check if our saved options are valid; update if not. 5811 * TSOL Note: Since we are not in WRITER mode, UDP packets 5812 * to different destination may require different labels, 5813 * or worse, UDP packets to same IP address may require 5814 * different labels due to use of shared all-zones address. 5815 * We use conn_lock to ensure that lastdst, ip_snd_options, 5816 * and ip_snd_options_len are consistent for the current 5817 * destination and are updated atomically. 5818 */ 5819 mutex_enter(&connp->conn_lock); 5820 if (is_system_labeled()) { 5821 /* Using UDP MLP requires SCM_UCRED from user */ 5822 if (connp->conn_mlp_type != mlptSingle && 5823 !attrs.udpattr_credset) { 5824 mutex_exit(&connp->conn_lock); 5825 DTRACE_PROBE4( 5826 tx__ip__log__info__output__udp, 5827 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 5828 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 5829 *error = ECONNREFUSED; 5830 goto done; 5831 } 5832 /* 5833 * update label option for this UDP socket if 5834 * - the destination has changed, or 5835 * - the UDP socket is MLP 5836 */ 5837 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 5838 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst || 5839 connp->conn_mlp_type != mlptSingle) && 5840 (*error = udp_update_label(q, mp, v4dst)) != 0) { 5841 mutex_exit(&connp->conn_lock); 5842 goto done; 5843 } 5844 } 5845 if (udp->udp_ip_snd_options_len > 0) { 5846 ip_snd_opt_len = udp->udp_ip_snd_options_len; 5847 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 5848 } 5849 mutex_exit(&connp->conn_lock); 5850 5851 /* Add an IP header */ 5852 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len + 5853 (insert_spi ? sizeof (uint32_t) : 0); 5854 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5855 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5856 !OK_32PTR(ipha)) { 5857 mp2 = allocb(ip_hdr_length + us->us_wroff_extra, BPRI_LO); 5858 if (mp2 == NULL) { 5859 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5860 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5861 *error = ENOMEM; 5862 goto done; 5863 } 5864 mp2->b_wptr = DB_LIM(mp2); 5865 mp2->b_cont = mp1; 5866 mp1 = mp2; 5867 if (DB_TYPE(mp) != M_DATA) 5868 mp->b_cont = mp1; 5869 else 5870 mp = mp1; 5871 5872 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5873 } 5874 ip_hdr_length -= (UDPH_SIZE + (insert_spi ? sizeof (uint32_t) : 0)); 5875 #ifdef _BIG_ENDIAN 5876 /* Set version, header length, and tos */ 5877 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5878 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5879 udp->udp_type_of_service); 5880 /* Set ttl and protocol */ 5881 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5882 #else 5883 /* Set version, header length, and tos */ 5884 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5885 ((udp->udp_type_of_service << 8) | 5886 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5887 /* Set ttl and protocol */ 5888 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5889 #endif 5890 if (pktinfop->ip4_addr != INADDR_ANY) { 5891 ipha->ipha_src = pktinfop->ip4_addr; 5892 optinfo.ip_opt_flags = IP_VERIFY_SRC; 5893 } else { 5894 /* 5895 * Copy our address into the packet. If this is zero, 5896 * first look at __sin6_src_id for a hint. If we leave the 5897 * source as INADDR_ANY then ip will fill in the real source 5898 * address. 5899 */ 5900 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5901 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5902 in6_addr_t v6src; 5903 5904 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid, 5905 us->us_netstack); 5906 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5907 } 5908 } 5909 uha_src_port = udp->udp_port; 5910 if (ip_hdr_length == IP_SIMPLE_HDR_LENGTH) { 5911 rw_exit(&udp->udp_rwlock); 5912 lock_held = B_FALSE; 5913 } 5914 5915 if (pktinfop->ip4_ill_index != 0) { 5916 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 5917 } 5918 5919 ipha->ipha_fragment_offset_and_flags = 0; 5920 ipha->ipha_ident = 0; 5921 5922 mp1->b_rptr = (uchar_t *)ipha; 5923 5924 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5925 (uintptr_t)UINT_MAX); 5926 5927 /* Determine length of packet */ 5928 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5929 if ((mp2 = mp1->b_cont) != NULL) { 5930 do { 5931 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5932 ip_len += (uint32_t)MBLKL(mp2); 5933 } while ((mp2 = mp2->b_cont) != NULL); 5934 } 5935 /* 5936 * If the size of the packet is greater than the maximum allowed by 5937 * ip, return an error. Passing this down could cause panics because 5938 * the size will have wrapped and be inconsistent with the msg size. 5939 */ 5940 if (ip_len > IP_MAXPACKET) { 5941 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5942 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5943 *error = EMSGSIZE; 5944 goto done; 5945 } 5946 ipha->ipha_length = htons((uint16_t)ip_len); 5947 ip_len -= ip_hdr_length; 5948 ip_len = htons((uint16_t)ip_len); 5949 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5950 5951 /* Insert all-0s SPI now. */ 5952 if (insert_spi) 5953 *((uint32_t *)(udpha + 1)) = 0; 5954 5955 /* 5956 * Copy in the destination address 5957 */ 5958 ipha->ipha_dst = v4dst; 5959 5960 /* 5961 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5962 */ 5963 if (CLASSD(v4dst)) 5964 ipha->ipha_ttl = udp->udp_multicast_ttl; 5965 5966 udpha->uha_dst_port = port; 5967 udpha->uha_src_port = uha_src_port; 5968 5969 if (ip_snd_opt_len > 0) { 5970 uint32_t cksum; 5971 5972 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 5973 lock_held = B_FALSE; 5974 rw_exit(&udp->udp_rwlock); 5975 /* 5976 * Massage source route putting first source route in ipha_dst. 5977 * Ignore the destination in T_unitdata_req. 5978 * Create a checksum adjustment for a source route, if any. 5979 */ 5980 cksum = ip_massage_options(ipha, us->us_netstack); 5981 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5982 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5983 (ipha->ipha_dst & 0xFFFF); 5984 if ((int)cksum < 0) 5985 cksum--; 5986 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5987 /* 5988 * IP does the checksum if uha_checksum is non-zero, 5989 * We make it easy for IP to include our pseudo header 5990 * by putting our length in uha_checksum. 5991 */ 5992 cksum += ip_len; 5993 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5994 /* There might be a carry. */ 5995 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5996 #ifdef _LITTLE_ENDIAN 5997 if (us->us_do_checksum) 5998 ip_len = (cksum << 16) | ip_len; 5999 #else 6000 if (us->us_do_checksum) 6001 ip_len = (ip_len << 16) | cksum; 6002 else 6003 ip_len <<= 16; 6004 #endif 6005 } else { 6006 /* 6007 * IP does the checksum if uha_checksum is non-zero, 6008 * We make it easy for IP to include our pseudo header 6009 * by putting our length in uha_checksum. 6010 */ 6011 if (us->us_do_checksum) 6012 ip_len |= (ip_len << 16); 6013 #ifndef _LITTLE_ENDIAN 6014 else 6015 ip_len <<= 16; 6016 #endif 6017 } 6018 ASSERT(!lock_held); 6019 /* Set UDP length and checksum */ 6020 *((uint32_t *)&udpha->uha_length) = ip_len; 6021 if (DB_CRED(mp) != NULL) 6022 mblk_setcred(mp1, DB_CRED(mp)); 6023 6024 if (DB_TYPE(mp) != M_DATA) { 6025 ASSERT(mp != mp1); 6026 freeb(mp); 6027 } 6028 6029 /* mp has been consumed and we'll return success */ 6030 ASSERT(*error == 0); 6031 mp = NULL; 6032 6033 /* We're done. Pass the packet to ip. */ 6034 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 6035 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6036 "udp_wput_end: q %p (%S)", q, "end"); 6037 6038 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6039 CONN_OUTBOUND_POLICY_PRESENT(connp, ipss) || 6040 connp->conn_dontroute || 6041 connp->conn_nofailover_ill != NULL || 6042 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6043 optinfo.ip_opt_ill_index != 0 || 6044 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6045 IPP_ENABLED(IPP_LOCAL_OUT, ipst) || 6046 ipst->ips_ip_g_mrouter != NULL) { 6047 UDP_STAT(us, udp_ip_send); 6048 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6049 &optinfo); 6050 } else { 6051 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6052 } 6053 6054 done: 6055 if (lock_held) 6056 rw_exit(&udp->udp_rwlock); 6057 if (*error != 0) { 6058 ASSERT(mp != NULL); 6059 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6060 } 6061 return (mp); 6062 } 6063 6064 static void 6065 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6066 { 6067 conn_t *connp = udp->udp_connp; 6068 ipaddr_t src, dst; 6069 ire_t *ire; 6070 ipif_t *ipif = NULL; 6071 mblk_t *ire_fp_mp; 6072 boolean_t retry_caching; 6073 udp_stack_t *us = udp->udp_us; 6074 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6075 6076 dst = ipha->ipha_dst; 6077 src = ipha->ipha_src; 6078 ASSERT(ipha->ipha_ident == 0); 6079 6080 if (CLASSD(dst)) { 6081 int err; 6082 6083 ipif = conn_get_held_ipif(connp, 6084 &connp->conn_multicast_ipif, &err); 6085 6086 if (ipif == NULL || ipif->ipif_isv6 || 6087 (ipif->ipif_ill->ill_phyint->phyint_flags & 6088 PHYI_LOOPBACK)) { 6089 if (ipif != NULL) 6090 ipif_refrele(ipif); 6091 UDP_STAT(us, udp_ip_send); 6092 ip_output(connp, mp, q, IP_WPUT); 6093 return; 6094 } 6095 } 6096 6097 retry_caching = B_FALSE; 6098 mutex_enter(&connp->conn_lock); 6099 ire = connp->conn_ire_cache; 6100 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6101 6102 if (ire == NULL || ire->ire_addr != dst || 6103 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6104 retry_caching = B_TRUE; 6105 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6106 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6107 6108 ASSERT(ipif != NULL); 6109 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6110 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6111 retry_caching = B_TRUE; 6112 } 6113 6114 if (!retry_caching) { 6115 ASSERT(ire != NULL); 6116 IRE_REFHOLD(ire); 6117 mutex_exit(&connp->conn_lock); 6118 } else { 6119 boolean_t cached = B_FALSE; 6120 6121 connp->conn_ire_cache = NULL; 6122 mutex_exit(&connp->conn_lock); 6123 6124 /* Release the old ire */ 6125 if (ire != NULL) { 6126 IRE_REFRELE_NOTR(ire); 6127 ire = NULL; 6128 } 6129 6130 if (CLASSD(dst)) { 6131 ASSERT(ipif != NULL); 6132 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6133 connp->conn_zoneid, MBLK_GETLABEL(mp), 6134 MATCH_IRE_ILL_GROUP, ipst); 6135 } else { 6136 ASSERT(ipif == NULL); 6137 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6138 MBLK_GETLABEL(mp), ipst); 6139 } 6140 6141 if (ire == NULL) { 6142 if (ipif != NULL) 6143 ipif_refrele(ipif); 6144 UDP_STAT(us, udp_ire_null); 6145 ip_output(connp, mp, q, IP_WPUT); 6146 return; 6147 } 6148 IRE_REFHOLD_NOTR(ire); 6149 6150 mutex_enter(&connp->conn_lock); 6151 if (CONN_CACHE_IRE(connp) && connp->conn_ire_cache == NULL && 6152 !(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6153 irb_t *irb = ire->ire_bucket; 6154 6155 /* 6156 * IRE's created for non-connection oriented transports 6157 * are normally initialized with IRE_MARK_TEMPORARY set 6158 * in the ire_marks. These IRE's are preferentially 6159 * reaped when the hash chain length in the cache 6160 * bucket exceeds the maximum value specified in 6161 * ip[6]_ire_max_bucket_cnt. This can severely affect 6162 * UDP performance if IRE cache entries that we need 6163 * to reuse are continually removed. To remedy this, 6164 * when we cache the IRE in the conn_t, we remove the 6165 * IRE_MARK_TEMPORARY bit from the ire_marks if it was 6166 * set. 6167 */ 6168 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6169 rw_enter(&irb->irb_lock, RW_WRITER); 6170 if (ire->ire_marks & IRE_MARK_TEMPORARY) { 6171 ire->ire_marks &= ~IRE_MARK_TEMPORARY; 6172 irb->irb_tmp_ire_cnt--; 6173 } 6174 rw_exit(&irb->irb_lock); 6175 } 6176 connp->conn_ire_cache = ire; 6177 cached = B_TRUE; 6178 } 6179 mutex_exit(&connp->conn_lock); 6180 6181 /* 6182 * We can continue to use the ire but since it was not 6183 * cached, we should drop the extra reference. 6184 */ 6185 if (!cached) 6186 IRE_REFRELE_NOTR(ire); 6187 } 6188 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6189 ASSERT(!CLASSD(dst) || ipif != NULL); 6190 6191 /* 6192 * Check if we can take the fast-path. 6193 * Note that "incomplete" ire's (where the link-layer for next hop 6194 * is not resolved, or where the fast-path header in nce_fp_mp is not 6195 * available yet) are sent down the legacy (slow) path 6196 */ 6197 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6198 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6199 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6200 ((ire->ire_nce == NULL) || 6201 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL)) || 6202 connp->conn_nexthop_set || (MBLKL(ire_fp_mp) > MBLKHEAD(mp))) { 6203 if (ipif != NULL) 6204 ipif_refrele(ipif); 6205 UDP_STAT(us, udp_ip_ire_send); 6206 IRE_REFRELE(ire); 6207 ip_output(connp, mp, q, IP_WPUT); 6208 return; 6209 } 6210 6211 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6212 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6213 ipha->ipha_src = ipif->ipif_src_addr; 6214 else 6215 ipha->ipha_src = ire->ire_src_addr; 6216 } 6217 6218 if (ipif != NULL) 6219 ipif_refrele(ipif); 6220 6221 udp_xmit(connp->conn_wq, mp, ire, connp, connp->conn_zoneid); 6222 } 6223 6224 static void 6225 udp_xmit(queue_t *q, mblk_t *mp, ire_t *ire, conn_t *connp, zoneid_t zoneid) 6226 { 6227 ipaddr_t src, dst; 6228 ill_t *ill; 6229 mblk_t *ire_fp_mp; 6230 uint_t ire_fp_mp_len; 6231 uint16_t *up; 6232 uint32_t cksum, hcksum_txflags; 6233 queue_t *dev_q; 6234 udp_t *udp = connp->conn_udp; 6235 ipha_t *ipha = (ipha_t *)mp->b_rptr; 6236 udp_stack_t *us = udp->udp_us; 6237 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 6238 boolean_t ll_multicast = B_FALSE; 6239 6240 dev_q = ire->ire_stq->q_next; 6241 ASSERT(dev_q != NULL); 6242 6243 ill = ire_to_ill(ire); 6244 ASSERT(ill != NULL); 6245 6246 /* is queue flow controlled? */ 6247 if (q->q_first != NULL || connp->conn_draining || 6248 DEV_Q_FLOW_BLOCKED(dev_q)) { 6249 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsHCOutRequests); 6250 BUMP_MIB(&ipst->ips_ip_mib, ipIfStatsOutDiscards); 6251 if (ipst->ips_ip_output_queue) 6252 (void) putq(connp->conn_wq, mp); 6253 else 6254 freemsg(mp); 6255 ire_refrele(ire); 6256 return; 6257 } 6258 6259 ire_fp_mp = ire->ire_nce->nce_fp_mp; 6260 ire_fp_mp_len = MBLKL(ire_fp_mp); 6261 ASSERT(MBLKHEAD(mp) >= ire_fp_mp_len); 6262 6263 dst = ipha->ipha_dst; 6264 src = ipha->ipha_src; 6265 6266 6267 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6268 6269 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6270 #ifndef _BIG_ENDIAN 6271 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6272 #endif 6273 6274 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6275 ASSERT(ill->ill_hcksum_capab != NULL); 6276 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6277 } else { 6278 hcksum_txflags = 0; 6279 } 6280 6281 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6282 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6283 6284 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6285 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6286 if (*up != 0) { 6287 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6288 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6289 ntohs(ipha->ipha_length), cksum); 6290 6291 /* Software checksum? */ 6292 if (DB_CKSUMFLAGS(mp) == 0) { 6293 UDP_STAT(us, udp_out_sw_cksum); 6294 UDP_STAT_UPDATE(us, udp_out_sw_cksum_bytes, 6295 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6296 } 6297 } 6298 6299 if (!CLASSD(dst)) { 6300 ipha->ipha_fragment_offset_and_flags |= 6301 (uint32_t)htons(ire->ire_frag_flag); 6302 } 6303 6304 /* Calculate IP header checksum if hardware isn't capable */ 6305 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6306 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6307 ((uint16_t *)ipha)[4]); 6308 } 6309 6310 if (CLASSD(dst)) { 6311 boolean_t ilm_exists; 6312 6313 ILM_WALKER_HOLD(ill); 6314 ilm_exists = (ilm_lookup_ill(ill, dst, ALL_ZONES) != NULL); 6315 ILM_WALKER_RELE(ill); 6316 if (ilm_exists) { 6317 ip_multicast_loopback(q, ill, mp, 6318 connp->conn_multicast_loop ? 0 : 6319 IP_FF_NO_MCAST_LOOP, zoneid); 6320 } 6321 6322 /* If multicast TTL is 0 then we are done */ 6323 if (ipha->ipha_ttl == 0) { 6324 freemsg(mp); 6325 ire_refrele(ire); 6326 return; 6327 } 6328 ll_multicast = B_TRUE; 6329 } 6330 6331 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6332 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6333 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6334 6335 UPDATE_OB_PKT_COUNT(ire); 6336 ire->ire_last_used_time = lbolt; 6337 6338 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6339 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6340 ntohs(ipha->ipha_length)); 6341 6342 DTRACE_PROBE4(ip4__physical__out__start, 6343 ill_t *, NULL, ill_t *, ill, ipha_t *, ipha, mblk_t *, mp); 6344 FW_HOOKS(ipst->ips_ip4_physical_out_event, 6345 ipst->ips_ipv4firewall_physical_out, NULL, ill, ipha, mp, mp, 6346 ll_multicast, ipst); 6347 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6348 if (ipst->ips_ipobs_enabled && mp != NULL) { 6349 zoneid_t szone; 6350 6351 szone = ip_get_zoneid_v4(ipha->ipha_src, mp, 6352 ipst, ALL_ZONES); 6353 ipobs_hook(mp, IPOBS_HOOK_OUTBOUND, szone, 6354 ALL_ZONES, ill, IPV4_VERSION, ire_fp_mp_len, ipst); 6355 } 6356 6357 if (mp != NULL) { 6358 DTRACE_IP7(send, mblk_t *, mp, conn_t *, NULL, 6359 void_ip_t *, ipha, __dtrace_ipsr_ill_t *, ill, 6360 ipha_t *, ipha, ip6_t *, NULL, int, 0); 6361 6362 if (ILL_DIRECT_CAPABLE(ill)) { 6363 ill_dld_direct_t *idd = &ill->ill_dld_capab->idc_direct; 6364 6365 (void) idd->idd_tx_df(idd->idd_tx_dh, mp, 6366 (uintptr_t)connp, 0); 6367 } else { 6368 putnext(ire->ire_stq, mp); 6369 } 6370 } 6371 IRE_REFRELE(ire); 6372 } 6373 6374 static boolean_t 6375 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6376 { 6377 udp_t *udp = Q_TO_UDP(wq); 6378 int err; 6379 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6380 udp_stack_t *us = udp->udp_us; 6381 6382 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6383 dst, opt_storage, udp->udp_connp->conn_mac_exempt, 6384 us->us_netstack->netstack_ip); 6385 if (err == 0) { 6386 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6387 &udp->udp_label_len_v6, opt_storage); 6388 } 6389 if (err != 0) { 6390 DTRACE_PROBE4( 6391 tx__ip__log__drop__updatelabel__udp6, 6392 char *, "queue(1) failed to update options(2) on mp(3)", 6393 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6394 } else { 6395 udp->udp_v6lastdst = *dst; 6396 } 6397 return (err); 6398 } 6399 6400 void 6401 udp_output_connected(void *arg, mblk_t *mp) 6402 { 6403 conn_t *connp = (conn_t *)arg; 6404 udp_t *udp = connp->conn_udp; 6405 udp_stack_t *us = udp->udp_us; 6406 ipaddr_t v4dst; 6407 in_port_t dstport; 6408 boolean_t mapped_addr; 6409 struct sockaddr_storage ss; 6410 sin_t *sin; 6411 sin6_t *sin6; 6412 struct sockaddr *addr; 6413 socklen_t addrlen; 6414 int error; 6415 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6416 6417 /* M_DATA for connected socket */ 6418 6419 ASSERT(udp->udp_issocket); 6420 UDP_DBGSTAT(us, udp_data_conn); 6421 6422 mutex_enter(&connp->conn_lock); 6423 if (udp->udp_state != TS_DATA_XFER) { 6424 mutex_exit(&connp->conn_lock); 6425 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6426 UDP_STAT(us, udp_out_err_notconn); 6427 freemsg(mp); 6428 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6429 "udp_wput_end: connp %p (%S)", connp, 6430 "not-connected; address required"); 6431 return; 6432 } 6433 6434 mapped_addr = IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst); 6435 if (mapped_addr) 6436 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6437 6438 /* Initialize addr and addrlen as if they're passed in */ 6439 if (udp->udp_family == AF_INET) { 6440 sin = (sin_t *)&ss; 6441 sin->sin_family = AF_INET; 6442 dstport = sin->sin_port = udp->udp_dstport; 6443 ASSERT(mapped_addr); 6444 sin->sin_addr.s_addr = v4dst; 6445 addr = (struct sockaddr *)sin; 6446 addrlen = sizeof (*sin); 6447 } else { 6448 sin6 = (sin6_t *)&ss; 6449 sin6->sin6_family = AF_INET6; 6450 dstport = sin6->sin6_port = udp->udp_dstport; 6451 sin6->sin6_flowinfo = udp->udp_flowinfo; 6452 sin6->sin6_addr = udp->udp_v6dst; 6453 sin6->sin6_scope_id = 0; 6454 sin6->__sin6_src_id = 0; 6455 addr = (struct sockaddr *)sin6; 6456 addrlen = sizeof (*sin6); 6457 } 6458 mutex_exit(&connp->conn_lock); 6459 6460 if (mapped_addr) { 6461 /* 6462 * Handle both AF_INET and AF_INET6; the latter 6463 * for IPV4 mapped destination addresses. Note 6464 * here that both addr and addrlen point to the 6465 * corresponding struct depending on the address 6466 * family of the socket. 6467 */ 6468 mp = udp_output_v4(connp, mp, v4dst, dstport, 0, &error, 6469 insert_spi); 6470 } else { 6471 mp = udp_output_v6(connp, mp, sin6, &error); 6472 } 6473 if (error == 0) { 6474 ASSERT(mp == NULL); 6475 return; 6476 } 6477 6478 UDP_STAT(us, udp_out_err_output); 6479 ASSERT(mp != NULL); 6480 /* mp is freed by the following routine */ 6481 udp_ud_err(connp->conn_wq, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6482 (t_scalar_t)error); 6483 } 6484 6485 /* 6486 * This routine handles all messages passed downstream. It either 6487 * consumes the message or passes it downstream; it never queues a 6488 * a message. 6489 * 6490 * Also entry point for sockfs when udp is in "direct sockfs" mode. This mode 6491 * is valid when we are directly beneath the stream head, and thus sockfs 6492 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6493 * structure without the cumbersome T_UNITDATA_REQ interface for the case of 6494 * connected endpoints. 6495 */ 6496 void 6497 udp_wput(queue_t *q, mblk_t *mp) 6498 { 6499 sin6_t *sin6; 6500 sin_t *sin; 6501 ipaddr_t v4dst; 6502 uint16_t port; 6503 uint_t srcid; 6504 conn_t *connp = Q_TO_CONN(q); 6505 udp_t *udp = connp->conn_udp; 6506 int error = 0; 6507 struct sockaddr *addr; 6508 socklen_t addrlen; 6509 udp_stack_t *us = udp->udp_us; 6510 boolean_t insert_spi = udp->udp_nat_t_endpoint; 6511 6512 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6513 "udp_wput_start: queue %p mp %p", q, mp); 6514 6515 /* 6516 * We directly handle several cases here: T_UNITDATA_REQ message 6517 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected 6518 * socket. 6519 */ 6520 switch (DB_TYPE(mp)) { 6521 case M_DATA: 6522 /* 6523 * Quick check for error cases. Checks will be done again 6524 * under the lock later on 6525 */ 6526 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6527 /* Not connected; address is required */ 6528 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 6529 UDP_STAT(us, udp_out_err_notconn); 6530 freemsg(mp); 6531 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6532 "udp_wput_end: connp %p (%S)", connp, 6533 "not-connected; address required"); 6534 return; 6535 } 6536 udp_output_connected(connp, mp); 6537 return; 6538 6539 case M_PROTO: 6540 case M_PCPROTO: { 6541 struct T_unitdata_req *tudr; 6542 6543 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6544 tudr = (struct T_unitdata_req *)mp->b_rptr; 6545 6546 /* Handle valid T_UNITDATA_REQ here */ 6547 if (MBLKL(mp) >= sizeof (*tudr) && 6548 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6549 if (mp->b_cont == NULL) { 6550 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6551 "udp_wput_end: q %p (%S)", q, "badaddr"); 6552 error = EPROTO; 6553 goto ud_error; 6554 } 6555 6556 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6557 tudr->DEST_length)) { 6558 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6559 "udp_wput_end: q %p (%S)", q, "badaddr"); 6560 error = EADDRNOTAVAIL; 6561 goto ud_error; 6562 } 6563 /* 6564 * If a port has not been bound to the stream, fail. 6565 * This is not a problem when sockfs is directly 6566 * above us, because it will ensure that the socket 6567 * is first bound before allowing data to be sent. 6568 */ 6569 if (udp->udp_state == TS_UNBND) { 6570 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6571 "udp_wput_end: q %p (%S)", q, "outstate"); 6572 error = EPROTO; 6573 goto ud_error; 6574 } 6575 addr = (struct sockaddr *) 6576 &mp->b_rptr[tudr->DEST_offset]; 6577 addrlen = tudr->DEST_length; 6578 if (tudr->OPT_length != 0) 6579 UDP_STAT(us, udp_out_opt); 6580 break; 6581 } 6582 /* FALLTHRU */ 6583 } 6584 default: 6585 udp_wput_other(q, mp); 6586 return; 6587 } 6588 ASSERT(addr != NULL); 6589 6590 switch (udp->udp_family) { 6591 case AF_INET6: 6592 sin6 = (sin6_t *)addr; 6593 if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) || 6594 (sin6->sin6_family != AF_INET6)) { 6595 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6596 "udp_wput_end: q %p (%S)", q, "badaddr"); 6597 error = EADDRNOTAVAIL; 6598 goto ud_error; 6599 } 6600 6601 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6602 /* 6603 * Destination is a non-IPv4-compatible IPv6 address. 6604 * Send out an IPv6 format packet. 6605 */ 6606 mp = udp_output_v6(connp, mp, sin6, &error); 6607 if (error != 0) 6608 goto ud_error; 6609 6610 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6611 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6612 return; 6613 } 6614 /* 6615 * If the local address is not zero or a mapped address 6616 * return an error. It would be possible to send an IPv4 6617 * packet but the response would never make it back to the 6618 * application since it is bound to a non-mapped address. 6619 */ 6620 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6621 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6622 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6623 "udp_wput_end: q %p (%S)", q, "badaddr"); 6624 error = EADDRNOTAVAIL; 6625 goto ud_error; 6626 } 6627 /* Send IPv4 packet without modifying udp_ipversion */ 6628 /* Extract port and ipaddr */ 6629 port = sin6->sin6_port; 6630 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6631 srcid = sin6->__sin6_src_id; 6632 break; 6633 6634 case AF_INET: 6635 sin = (sin_t *)addr; 6636 if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) || 6637 (sin->sin_family != AF_INET)) { 6638 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6639 "udp_wput_end: q %p (%S)", q, "badaddr"); 6640 error = EADDRNOTAVAIL; 6641 goto ud_error; 6642 } 6643 /* Extract port and ipaddr */ 6644 port = sin->sin_port; 6645 v4dst = sin->sin_addr.s_addr; 6646 srcid = 0; 6647 break; 6648 } 6649 6650 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error, insert_spi); 6651 if (error != 0) { 6652 ud_error: 6653 UDP_STAT(us, udp_out_err_output); 6654 ASSERT(mp != NULL); 6655 /* mp is freed by the following routine */ 6656 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6657 (t_scalar_t)error); 6658 } 6659 } 6660 6661 /* 6662 * udp_output_v6(): 6663 * Assumes that udp_wput did some sanity checking on the destination 6664 * address. 6665 */ 6666 static mblk_t * 6667 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 6668 { 6669 ip6_t *ip6h; 6670 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6671 mblk_t *mp1 = mp; 6672 mblk_t *mp2; 6673 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6674 size_t ip_len; 6675 udpha_t *udph; 6676 udp_t *udp = connp->conn_udp; 6677 queue_t *q = connp->conn_wq; 6678 ip6_pkt_t ipp_s; /* For ancillary data options */ 6679 ip6_pkt_t *ipp = &ipp_s; 6680 ip6_pkt_t *tipp; /* temporary ipp */ 6681 uint32_t csum = 0; 6682 uint_t ignore = 0; 6683 uint_t option_exists = 0, is_sticky = 0; 6684 uint8_t *cp; 6685 uint8_t *nxthdr_ptr; 6686 in6_addr_t ip6_dst; 6687 udpattrs_t attrs; 6688 boolean_t opt_present; 6689 ip6_hbh_t *hopoptsptr = NULL; 6690 uint_t hopoptslen = 0; 6691 boolean_t is_ancillary = B_FALSE; 6692 udp_stack_t *us = udp->udp_us; 6693 size_t sth_wroff = 0; 6694 6695 *error = 0; 6696 6697 /* 6698 * If the local address is a mapped address return 6699 * an error. 6700 * It would be possible to send an IPv6 packet but the 6701 * response would never make it back to the application 6702 * since it is bound to a mapped address. 6703 */ 6704 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6705 *error = EADDRNOTAVAIL; 6706 goto done; 6707 } 6708 6709 ipp->ipp_fields = 0; 6710 ipp->ipp_sticky_ignored = 0; 6711 6712 /* 6713 * If TPI options passed in, feed it for verification and handling 6714 */ 6715 attrs.udpattr_credset = B_FALSE; 6716 opt_present = B_FALSE; 6717 if (DB_TYPE(mp) != M_DATA) { 6718 mp1 = mp->b_cont; 6719 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6720 attrs.udpattr_ipp6 = ipp; 6721 attrs.udpattr_mb = mp; 6722 if (udp_unitdata_opt_process(q, mp, error, 6723 &attrs) < 0) { 6724 goto done; 6725 } 6726 ASSERT(*error == 0); 6727 opt_present = B_TRUE; 6728 } 6729 } 6730 rw_enter(&udp->udp_rwlock, RW_READER); 6731 ignore = ipp->ipp_sticky_ignored; 6732 6733 /* mp1 points to the M_DATA mblk carrying the packet */ 6734 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6735 6736 if (sin6->sin6_scope_id != 0 && 6737 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6738 /* 6739 * IPPF_SCOPE_ID is special. It's neither a sticky 6740 * option nor ancillary data. It needs to be 6741 * explicitly set in options_exists. 6742 */ 6743 option_exists |= IPPF_SCOPE_ID; 6744 } 6745 6746 /* 6747 * Compute the destination address 6748 */ 6749 ip6_dst = sin6->sin6_addr; 6750 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6751 ip6_dst = ipv6_loopback; 6752 6753 /* 6754 * If we're not going to the same destination as last time, then 6755 * recompute the label required. This is done in a separate routine to 6756 * avoid blowing up our stack here. 6757 * 6758 * TSOL Note: Since we are not in WRITER mode, UDP packets 6759 * to different destination may require different labels, 6760 * or worse, UDP packets to same IP address may require 6761 * different labels due to use of shared all-zones address. 6762 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 6763 * and sticky ipp_hopoptslen are consistent for the current 6764 * destination and are updated atomically. 6765 */ 6766 mutex_enter(&connp->conn_lock); 6767 if (is_system_labeled()) { 6768 /* Using UDP MLP requires SCM_UCRED from user */ 6769 if (connp->conn_mlp_type != mlptSingle && 6770 !attrs.udpattr_credset) { 6771 DTRACE_PROBE4( 6772 tx__ip__log__info__output__udp6, 6773 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6774 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6775 *error = ECONNREFUSED; 6776 rw_exit(&udp->udp_rwlock); 6777 mutex_exit(&connp->conn_lock); 6778 goto done; 6779 } 6780 /* 6781 * update label option for this UDP socket if 6782 * - the destination has changed, or 6783 * - the UDP socket is MLP 6784 */ 6785 if ((opt_present || 6786 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst) || 6787 connp->conn_mlp_type != mlptSingle) && 6788 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 6789 rw_exit(&udp->udp_rwlock); 6790 mutex_exit(&connp->conn_lock); 6791 goto done; 6792 } 6793 } 6794 6795 /* 6796 * If there's a security label here, then we ignore any options the 6797 * user may try to set. We keep the peer's label as a hidden sticky 6798 * option. We make a private copy of this label before releasing the 6799 * lock so that label is kept consistent with the destination addr. 6800 */ 6801 if (udp->udp_label_len_v6 > 0) { 6802 ignore &= ~IPPF_HOPOPTS; 6803 ipp->ipp_fields &= ~IPPF_HOPOPTS; 6804 } 6805 6806 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6807 /* No sticky options nor ancillary data. */ 6808 mutex_exit(&connp->conn_lock); 6809 goto no_options; 6810 } 6811 6812 /* 6813 * Go through the options figuring out where each is going to 6814 * come from and build two masks. The first mask indicates if 6815 * the option exists at all. The second mask indicates if the 6816 * option is sticky or ancillary. 6817 */ 6818 if (!(ignore & IPPF_HOPOPTS)) { 6819 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6820 option_exists |= IPPF_HOPOPTS; 6821 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6822 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6823 option_exists |= IPPF_HOPOPTS; 6824 is_sticky |= IPPF_HOPOPTS; 6825 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 6826 hopoptsptr = kmem_alloc( 6827 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 6828 if (hopoptsptr == NULL) { 6829 *error = ENOMEM; 6830 mutex_exit(&connp->conn_lock); 6831 goto done; 6832 } 6833 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 6834 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 6835 hopoptslen); 6836 udp_ip_hdr_len += hopoptslen; 6837 } 6838 } 6839 mutex_exit(&connp->conn_lock); 6840 6841 if (!(ignore & IPPF_RTHDR)) { 6842 if (ipp->ipp_fields & IPPF_RTHDR) { 6843 option_exists |= IPPF_RTHDR; 6844 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6845 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6846 option_exists |= IPPF_RTHDR; 6847 is_sticky |= IPPF_RTHDR; 6848 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6849 } 6850 } 6851 6852 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6853 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6854 option_exists |= IPPF_RTDSTOPTS; 6855 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6856 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6857 option_exists |= IPPF_RTDSTOPTS; 6858 is_sticky |= IPPF_RTDSTOPTS; 6859 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6860 } 6861 } 6862 6863 if (!(ignore & IPPF_DSTOPTS)) { 6864 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6865 option_exists |= IPPF_DSTOPTS; 6866 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6867 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6868 option_exists |= IPPF_DSTOPTS; 6869 is_sticky |= IPPF_DSTOPTS; 6870 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6871 } 6872 } 6873 6874 if (!(ignore & IPPF_IFINDEX)) { 6875 if (ipp->ipp_fields & IPPF_IFINDEX) { 6876 option_exists |= IPPF_IFINDEX; 6877 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6878 option_exists |= IPPF_IFINDEX; 6879 is_sticky |= IPPF_IFINDEX; 6880 } 6881 } 6882 6883 if (!(ignore & IPPF_ADDR)) { 6884 if (ipp->ipp_fields & IPPF_ADDR) { 6885 option_exists |= IPPF_ADDR; 6886 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6887 option_exists |= IPPF_ADDR; 6888 is_sticky |= IPPF_ADDR; 6889 } 6890 } 6891 6892 if (!(ignore & IPPF_DONTFRAG)) { 6893 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6894 option_exists |= IPPF_DONTFRAG; 6895 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6896 option_exists |= IPPF_DONTFRAG; 6897 is_sticky |= IPPF_DONTFRAG; 6898 } 6899 } 6900 6901 if (!(ignore & IPPF_USE_MIN_MTU)) { 6902 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6903 option_exists |= IPPF_USE_MIN_MTU; 6904 } else if (udp->udp_sticky_ipp.ipp_fields & 6905 IPPF_USE_MIN_MTU) { 6906 option_exists |= IPPF_USE_MIN_MTU; 6907 is_sticky |= IPPF_USE_MIN_MTU; 6908 } 6909 } 6910 6911 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6912 option_exists |= IPPF_HOPLIMIT; 6913 /* IPV6_HOPLIMIT can never be sticky */ 6914 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6915 6916 if (!(ignore & IPPF_UNICAST_HOPS) && 6917 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6918 option_exists |= IPPF_UNICAST_HOPS; 6919 is_sticky |= IPPF_UNICAST_HOPS; 6920 } 6921 6922 if (!(ignore & IPPF_MULTICAST_HOPS) && 6923 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6924 option_exists |= IPPF_MULTICAST_HOPS; 6925 is_sticky |= IPPF_MULTICAST_HOPS; 6926 } 6927 6928 if (!(ignore & IPPF_TCLASS)) { 6929 if (ipp->ipp_fields & IPPF_TCLASS) { 6930 option_exists |= IPPF_TCLASS; 6931 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6932 option_exists |= IPPF_TCLASS; 6933 is_sticky |= IPPF_TCLASS; 6934 } 6935 } 6936 6937 if (!(ignore & IPPF_NEXTHOP) && 6938 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 6939 option_exists |= IPPF_NEXTHOP; 6940 is_sticky |= IPPF_NEXTHOP; 6941 } 6942 6943 no_options: 6944 6945 /* 6946 * If any options carried in the ip6i_t were specified, we 6947 * need to account for the ip6i_t in the data we'll be sending 6948 * down. 6949 */ 6950 if (option_exists & IPPF_HAS_IP6I) 6951 udp_ip_hdr_len += sizeof (ip6i_t); 6952 6953 /* check/fix buffer config, setup pointers into it */ 6954 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6955 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6956 !OK_32PTR(ip6h)) { 6957 6958 /* Try to get everything in a single mblk next time */ 6959 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6960 udp->udp_max_hdr_len = udp_ip_hdr_len; 6961 sth_wroff = udp->udp_max_hdr_len + us->us_wroff_extra; 6962 } 6963 6964 mp2 = allocb(udp_ip_hdr_len + us->us_wroff_extra, BPRI_LO); 6965 if (mp2 == NULL) { 6966 *error = ENOMEM; 6967 rw_exit(&udp->udp_rwlock); 6968 goto done; 6969 } 6970 mp2->b_wptr = DB_LIM(mp2); 6971 mp2->b_cont = mp1; 6972 mp1 = mp2; 6973 if (DB_TYPE(mp) != M_DATA) 6974 mp->b_cont = mp1; 6975 else 6976 mp = mp1; 6977 6978 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6979 } 6980 mp1->b_rptr = (unsigned char *)ip6h; 6981 ip6i = (ip6i_t *)ip6h; 6982 6983 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6984 if (option_exists & IPPF_HAS_IP6I) { 6985 ip6h = (ip6_t *)&ip6i[1]; 6986 ip6i->ip6i_flags = 0; 6987 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6988 6989 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6990 if (option_exists & IPPF_SCOPE_ID) { 6991 ip6i->ip6i_flags |= IP6I_IFINDEX; 6992 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6993 } else if (option_exists & IPPF_IFINDEX) { 6994 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6995 ASSERT(tipp->ipp_ifindex != 0); 6996 ip6i->ip6i_flags |= IP6I_IFINDEX; 6997 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6998 } 6999 7000 if (option_exists & IPPF_ADDR) { 7001 /* 7002 * Enable per-packet source address verification if 7003 * IPV6_PKTINFO specified the source address. 7004 * ip6_src is set in the transport's _wput function. 7005 */ 7006 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7007 } 7008 7009 if (option_exists & IPPF_DONTFRAG) { 7010 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7011 } 7012 7013 if (option_exists & IPPF_USE_MIN_MTU) { 7014 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7015 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7016 } 7017 7018 if (option_exists & IPPF_NEXTHOP) { 7019 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7020 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7021 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7022 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7023 } 7024 7025 /* 7026 * tell IP this is an ip6i_t private header 7027 */ 7028 ip6i->ip6i_nxt = IPPROTO_RAW; 7029 } 7030 7031 /* Initialize IPv6 header */ 7032 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7033 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7034 7035 /* Set the hoplimit of the outgoing packet. */ 7036 if (option_exists & IPPF_HOPLIMIT) { 7037 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7038 ip6h->ip6_hops = ipp->ipp_hoplimit; 7039 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7040 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7041 ip6h->ip6_hops = udp->udp_multicast_ttl; 7042 if (option_exists & IPPF_MULTICAST_HOPS) 7043 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7044 } else { 7045 ip6h->ip6_hops = udp->udp_ttl; 7046 if (option_exists & IPPF_UNICAST_HOPS) 7047 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7048 } 7049 7050 if (option_exists & IPPF_ADDR) { 7051 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7052 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7053 ip6h->ip6_src = tipp->ipp_addr; 7054 } else { 7055 /* 7056 * The source address was not set using IPV6_PKTINFO. 7057 * First look at the bound source. 7058 * If unspecified fallback to __sin6_src_id. 7059 */ 7060 ip6h->ip6_src = udp->udp_v6src; 7061 if (sin6->__sin6_src_id != 0 && 7062 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7063 ip_srcid_find_id(sin6->__sin6_src_id, 7064 &ip6h->ip6_src, connp->conn_zoneid, 7065 us->us_netstack); 7066 } 7067 } 7068 7069 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7070 cp = (uint8_t *)&ip6h[1]; 7071 7072 /* 7073 * Here's where we have to start stringing together 7074 * any extension headers in the right order: 7075 * Hop-by-hop, destination, routing, and final destination opts. 7076 */ 7077 if (option_exists & IPPF_HOPOPTS) { 7078 /* Hop-by-hop options */ 7079 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7080 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7081 if (hopoptslen == 0) { 7082 hopoptsptr = tipp->ipp_hopopts; 7083 hopoptslen = tipp->ipp_hopoptslen; 7084 is_ancillary = B_TRUE; 7085 } 7086 7087 *nxthdr_ptr = IPPROTO_HOPOPTS; 7088 nxthdr_ptr = &hbh->ip6h_nxt; 7089 7090 bcopy(hopoptsptr, cp, hopoptslen); 7091 cp += hopoptslen; 7092 7093 if (hopoptsptr != NULL && !is_ancillary) { 7094 kmem_free(hopoptsptr, hopoptslen); 7095 hopoptsptr = NULL; 7096 hopoptslen = 0; 7097 } 7098 } 7099 /* 7100 * En-route destination options 7101 * Only do them if there's a routing header as well 7102 */ 7103 if (option_exists & IPPF_RTDSTOPTS) { 7104 ip6_dest_t *dst = (ip6_dest_t *)cp; 7105 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7106 7107 *nxthdr_ptr = IPPROTO_DSTOPTS; 7108 nxthdr_ptr = &dst->ip6d_nxt; 7109 7110 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7111 cp += tipp->ipp_rtdstoptslen; 7112 } 7113 /* 7114 * Routing header next 7115 */ 7116 if (option_exists & IPPF_RTHDR) { 7117 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7118 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7119 7120 *nxthdr_ptr = IPPROTO_ROUTING; 7121 nxthdr_ptr = &rt->ip6r_nxt; 7122 7123 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7124 cp += tipp->ipp_rthdrlen; 7125 } 7126 /* 7127 * Do ultimate destination options 7128 */ 7129 if (option_exists & IPPF_DSTOPTS) { 7130 ip6_dest_t *dest = (ip6_dest_t *)cp; 7131 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7132 7133 *nxthdr_ptr = IPPROTO_DSTOPTS; 7134 nxthdr_ptr = &dest->ip6d_nxt; 7135 7136 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7137 cp += tipp->ipp_dstoptslen; 7138 } 7139 /* 7140 * Now set the last header pointer to the proto passed in 7141 */ 7142 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7143 *nxthdr_ptr = IPPROTO_UDP; 7144 7145 /* Update UDP header */ 7146 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7147 udph->uha_dst_port = sin6->sin6_port; 7148 udph->uha_src_port = udp->udp_port; 7149 7150 /* 7151 * Copy in the destination address 7152 */ 7153 ip6h->ip6_dst = ip6_dst; 7154 7155 ip6h->ip6_vcf = 7156 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7157 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7158 7159 if (option_exists & IPPF_TCLASS) { 7160 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7161 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7162 tipp->ipp_tclass); 7163 } 7164 rw_exit(&udp->udp_rwlock); 7165 7166 if (option_exists & IPPF_RTHDR) { 7167 ip6_rthdr_t *rth; 7168 7169 /* 7170 * Perform any processing needed for source routing. 7171 * We know that all extension headers will be in the same mblk 7172 * as the IPv6 header. 7173 */ 7174 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7175 if (rth != NULL && rth->ip6r_segleft != 0) { 7176 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7177 /* 7178 * Drop packet - only support Type 0 routing. 7179 * Notify the application as well. 7180 */ 7181 *error = EPROTO; 7182 goto done; 7183 } 7184 7185 /* 7186 * rth->ip6r_len is twice the number of 7187 * addresses in the header. Thus it must be even. 7188 */ 7189 if (rth->ip6r_len & 0x1) { 7190 *error = EPROTO; 7191 goto done; 7192 } 7193 /* 7194 * Shuffle the routing header and ip6_dst 7195 * addresses, and get the checksum difference 7196 * between the first hop (in ip6_dst) and 7197 * the destination (in the last routing hdr entry). 7198 */ 7199 csum = ip_massage_options_v6(ip6h, rth, 7200 us->us_netstack); 7201 /* 7202 * Verify that the first hop isn't a mapped address. 7203 * Routers along the path need to do this verification 7204 * for subsequent hops. 7205 */ 7206 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7207 *error = EADDRNOTAVAIL; 7208 goto done; 7209 } 7210 7211 cp += (rth->ip6r_len + 1)*8; 7212 } 7213 } 7214 7215 /* count up length of UDP packet */ 7216 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7217 if ((mp2 = mp1->b_cont) != NULL) { 7218 do { 7219 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7220 ip_len += (uint32_t)MBLKL(mp2); 7221 } while ((mp2 = mp2->b_cont) != NULL); 7222 } 7223 7224 /* 7225 * If the size of the packet is greater than the maximum allowed by 7226 * ip, return an error. Passing this down could cause panics because 7227 * the size will have wrapped and be inconsistent with the msg size. 7228 */ 7229 if (ip_len > IP_MAXPACKET) { 7230 *error = EMSGSIZE; 7231 goto done; 7232 } 7233 7234 /* Store the UDP length. Subtract length of extension hdrs */ 7235 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7236 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7237 7238 /* 7239 * We make it easy for IP to include our pseudo header 7240 * by putting our length in uh_checksum, modified (if 7241 * we have a routing header) by the checksum difference 7242 * between the ultimate destination and first hop addresses. 7243 * Note: UDP over IPv6 must always checksum the packet. 7244 */ 7245 csum += udph->uha_length; 7246 csum = (csum & 0xFFFF) + (csum >> 16); 7247 udph->uha_checksum = (uint16_t)csum; 7248 7249 #ifdef _LITTLE_ENDIAN 7250 ip_len = htons(ip_len); 7251 #endif 7252 ip6h->ip6_plen = ip_len; 7253 if (DB_CRED(mp) != NULL) 7254 mblk_setcred(mp1, DB_CRED(mp)); 7255 7256 if (DB_TYPE(mp) != M_DATA) { 7257 ASSERT(mp != mp1); 7258 freeb(mp); 7259 } 7260 7261 /* mp has been consumed and we'll return success */ 7262 ASSERT(*error == 0); 7263 mp = NULL; 7264 7265 /* We're done. Pass the packet to IP */ 7266 BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams); 7267 ip_output_v6(connp, mp1, q, IP_WPUT); 7268 7269 done: 7270 if (sth_wroff != 0) { 7271 (void) mi_set_sth_wroff(RD(q), 7272 udp->udp_max_hdr_len + us->us_wroff_extra); 7273 } 7274 if (hopoptsptr != NULL && !is_ancillary) { 7275 kmem_free(hopoptsptr, hopoptslen); 7276 hopoptsptr = NULL; 7277 } 7278 if (*error != 0) { 7279 ASSERT(mp != NULL); 7280 BUMP_MIB(&us->us_udp_mib, udpOutErrors); 7281 } 7282 return (mp); 7283 } 7284 7285 7286 static int 7287 udp_getpeername(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7288 { 7289 sin_t *sin = (sin_t *)sa; 7290 sin6_t *sin6 = (sin6_t *)sa; 7291 7292 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7293 7294 if (udp->udp_state != TS_DATA_XFER) 7295 return (ENOTCONN); 7296 7297 switch (udp->udp_family) { 7298 case AF_INET: 7299 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7300 7301 if (*salenp < sizeof (sin_t)) 7302 return (EINVAL); 7303 7304 *salenp = sizeof (sin_t); 7305 *sin = sin_null; 7306 sin->sin_family = AF_INET; 7307 sin->sin_port = udp->udp_dstport; 7308 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6dst); 7309 break; 7310 7311 case AF_INET6: 7312 if (*salenp < sizeof (sin6_t)) 7313 return (EINVAL); 7314 7315 *salenp = sizeof (sin6_t); 7316 *sin6 = sin6_null; 7317 sin6->sin6_family = AF_INET6; 7318 sin6->sin6_port = udp->udp_dstport; 7319 sin6->sin6_addr = udp->udp_v6dst; 7320 sin6->sin6_flowinfo = udp->udp_flowinfo; 7321 break; 7322 } 7323 7324 return (0); 7325 } 7326 7327 static int 7328 udp_getmyname(udp_t *udp, struct sockaddr *sa, uint_t *salenp) 7329 { 7330 sin_t *sin = (sin_t *)sa; 7331 sin6_t *sin6 = (sin6_t *)sa; 7332 7333 ASSERT(RW_LOCK_HELD(&udp->udp_rwlock)); 7334 7335 switch (udp->udp_family) { 7336 case AF_INET: 7337 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7338 7339 if (*salenp < sizeof (sin_t)) 7340 return (EINVAL); 7341 7342 *salenp = sizeof (sin_t); 7343 *sin = sin_null; 7344 sin->sin_family = AF_INET; 7345 sin->sin_port = udp->udp_port; 7346 7347 /* 7348 * If udp_v6src is unspecified, we might be bound to broadcast 7349 * / multicast. Use udp_bound_v6src as local address instead 7350 * (that could also still be unspecified). 7351 */ 7352 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7353 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7354 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_v6src); 7355 } else { 7356 sin->sin_addr.s_addr = 7357 V4_PART_OF_V6(udp->udp_bound_v6src); 7358 } 7359 break; 7360 7361 case AF_INET6: 7362 if (*salenp < sizeof (sin6_t)) 7363 return (EINVAL); 7364 7365 *salenp = sizeof (sin6_t); 7366 *sin6 = sin6_null; 7367 sin6->sin6_family = AF_INET6; 7368 sin6->sin6_port = udp->udp_port; 7369 sin6->sin6_flowinfo = udp->udp_flowinfo; 7370 7371 /* 7372 * If udp_v6src is unspecified, we might be bound to broadcast 7373 * / multicast. Use udp_bound_v6src as local address instead 7374 * (that could also still be unspecified). 7375 */ 7376 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) 7377 sin6->sin6_addr = udp->udp_v6src; 7378 else 7379 sin6->sin6_addr = udp->udp_bound_v6src; 7380 break; 7381 } 7382 7383 return (0); 7384 } 7385 7386 /* 7387 * Handle special out-of-band ioctl requests (see PSARC/2008/265). 7388 */ 7389 static void 7390 udp_wput_cmdblk(queue_t *q, mblk_t *mp) 7391 { 7392 void *data; 7393 mblk_t *datamp = mp->b_cont; 7394 udp_t *udp = Q_TO_UDP(q); 7395 cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr; 7396 7397 if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) { 7398 cmdp->cb_error = EPROTO; 7399 qreply(q, mp); 7400 return; 7401 } 7402 data = datamp->b_rptr; 7403 7404 rw_enter(&udp->udp_rwlock, RW_READER); 7405 switch (cmdp->cb_cmd) { 7406 case TI_GETPEERNAME: 7407 cmdp->cb_error = udp_getpeername(udp, data, &cmdp->cb_len); 7408 break; 7409 case TI_GETMYNAME: 7410 cmdp->cb_error = udp_getmyname(udp, data, &cmdp->cb_len); 7411 break; 7412 default: 7413 cmdp->cb_error = EINVAL; 7414 break; 7415 } 7416 rw_exit(&udp->udp_rwlock); 7417 7418 qreply(q, mp); 7419 } 7420 7421 static void 7422 udp_wput_other(queue_t *q, mblk_t *mp) 7423 { 7424 uchar_t *rptr = mp->b_rptr; 7425 struct datab *db; 7426 struct iocblk *iocp; 7427 cred_t *cr; 7428 conn_t *connp = Q_TO_CONN(q); 7429 udp_t *udp = connp->conn_udp; 7430 udp_stack_t *us; 7431 7432 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7433 "udp_wput_other_start: q %p", q); 7434 7435 us = udp->udp_us; 7436 db = mp->b_datap; 7437 7438 cr = DB_CREDDEF(mp, connp->conn_cred); 7439 7440 switch (db->db_type) { 7441 case M_CMD: 7442 udp_wput_cmdblk(q, mp); 7443 return; 7444 7445 case M_PROTO: 7446 case M_PCPROTO: 7447 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7448 freemsg(mp); 7449 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7450 "udp_wput_other_end: q %p (%S)", q, "protoshort"); 7451 return; 7452 } 7453 switch (((t_primp_t)rptr)->type) { 7454 case T_ADDR_REQ: 7455 udp_addr_req(q, mp); 7456 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7457 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7458 return; 7459 case O_T_BIND_REQ: 7460 case T_BIND_REQ: 7461 udp_bind(q, mp); 7462 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7463 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7464 return; 7465 case T_CONN_REQ: 7466 udp_connect(q, mp); 7467 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7468 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7469 return; 7470 case T_CAPABILITY_REQ: 7471 udp_capability_req(q, mp); 7472 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7473 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7474 return; 7475 case T_INFO_REQ: 7476 udp_info_req(q, mp); 7477 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7478 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7479 return; 7480 case T_UNITDATA_REQ: 7481 /* 7482 * If a T_UNITDATA_REQ gets here, the address must 7483 * be bad. Valid T_UNITDATA_REQs are handled 7484 * in udp_wput. 7485 */ 7486 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7487 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7488 "udp_wput_other_end: q %p (%S)", q, "unitdatareq"); 7489 return; 7490 case T_UNBIND_REQ: 7491 udp_unbind(q, mp); 7492 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7493 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7494 return; 7495 case T_SVR4_OPTMGMT_REQ: 7496 if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get, 7497 cr)) { 7498 (void) svr4_optcom_req(q, 7499 mp, cr, &udp_opt_obj, B_TRUE); 7500 } 7501 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7502 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7503 return; 7504 7505 case T_OPTMGMT_REQ: 7506 (void) tpi_optcom_req(q, mp, cr, &udp_opt_obj, B_TRUE); 7507 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7508 "udp_wput_other_end: q %p (%S)", q, "optmgmtreq"); 7509 return; 7510 7511 case T_DISCON_REQ: 7512 udp_disconnect(q, mp); 7513 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7514 "udp_wput_other_end: q %p (%S)", q, "disconreq"); 7515 return; 7516 7517 /* The following TPI message is not supported by udp. */ 7518 case O_T_CONN_RES: 7519 case T_CONN_RES: 7520 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7521 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7522 "udp_wput_other_end: q %p (%S)", q, 7523 "connres/disconreq"); 7524 return; 7525 7526 /* The following 3 TPI messages are illegal for udp. */ 7527 case T_DATA_REQ: 7528 case T_EXDATA_REQ: 7529 case T_ORDREL_REQ: 7530 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7531 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7532 "udp_wput_other_end: q %p (%S)", q, 7533 "data/exdata/ordrel"); 7534 return; 7535 default: 7536 break; 7537 } 7538 break; 7539 case M_FLUSH: 7540 if (*rptr & FLUSHW) 7541 flushq(q, FLUSHDATA); 7542 break; 7543 case M_IOCTL: 7544 iocp = (struct iocblk *)mp->b_rptr; 7545 switch (iocp->ioc_cmd) { 7546 case TI_GETPEERNAME: 7547 if (udp->udp_state != TS_DATA_XFER) { 7548 /* 7549 * If a default destination address has not 7550 * been associated with the stream, then we 7551 * don't know the peer's name. 7552 */ 7553 iocp->ioc_error = ENOTCONN; 7554 iocp->ioc_count = 0; 7555 mp->b_datap->db_type = M_IOCACK; 7556 qreply(q, mp); 7557 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7558 "udp_wput_other_end: q %p (%S)", q, 7559 "getpeername"); 7560 return; 7561 } 7562 /* FALLTHRU */ 7563 case TI_GETMYNAME: { 7564 /* 7565 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7566 * need to copyin the user's strbuf structure. 7567 * Processing will continue in the M_IOCDATA case 7568 * below. 7569 */ 7570 mi_copyin(q, mp, NULL, 7571 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7572 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7573 "udp_wput_other_end: q %p (%S)", q, "getmyname"); 7574 return; 7575 } 7576 case ND_SET: 7577 /* nd_getset performs the necessary checking */ 7578 case ND_GET: 7579 if (nd_getset(q, us->us_nd, mp)) { 7580 qreply(q, mp); 7581 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7582 "udp_wput_other_end: q %p (%S)", q, "get"); 7583 return; 7584 } 7585 break; 7586 case _SIOCSOCKFALLBACK: 7587 /* 7588 * Either sockmod is about to be popped and the 7589 * socket would now be treated as a plain stream, 7590 * or a module is about to be pushed so we could 7591 * no longer use read-side synchronous stream. 7592 * Drain any queued data and disable direct sockfs 7593 * interface from now on. 7594 */ 7595 if (!udp->udp_issocket) { 7596 DB_TYPE(mp) = M_IOCNAK; 7597 iocp->ioc_error = EINVAL; 7598 } else { 7599 udp->udp_issocket = B_FALSE; 7600 if (udp->udp_direct_sockfs) { 7601 /* 7602 * Disable read-side synchronous 7603 * stream interface and drain any 7604 * queued data. 7605 */ 7606 udp_rcv_drain(RD(q), udp, 7607 B_FALSE); 7608 ASSERT(!udp->udp_direct_sockfs); 7609 UDP_STAT(us, udp_sock_fallback); 7610 } 7611 DB_TYPE(mp) = M_IOCACK; 7612 iocp->ioc_error = 0; 7613 } 7614 iocp->ioc_count = 0; 7615 iocp->ioc_rval = 0; 7616 qreply(q, mp); 7617 return; 7618 default: 7619 break; 7620 } 7621 break; 7622 case M_IOCDATA: 7623 udp_wput_iocdata(q, mp); 7624 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7625 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7626 return; 7627 default: 7628 /* Unrecognized messages are passed through without change. */ 7629 break; 7630 } 7631 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7632 "udp_wput_other_end: q %p (%S)", q, "end"); 7633 ip_output(connp, mp, q, IP_WPUT); 7634 } 7635 7636 /* 7637 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7638 * messages. 7639 */ 7640 static void 7641 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7642 { 7643 mblk_t *mp1; 7644 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 7645 STRUCT_HANDLE(strbuf, sb); 7646 udp_t *udp = Q_TO_UDP(q); 7647 int error; 7648 uint_t addrlen; 7649 7650 /* Make sure it is one of ours. */ 7651 switch (iocp->ioc_cmd) { 7652 case TI_GETMYNAME: 7653 case TI_GETPEERNAME: 7654 break; 7655 default: 7656 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7657 return; 7658 } 7659 7660 switch (mi_copy_state(q, mp, &mp1)) { 7661 case -1: 7662 return; 7663 case MI_COPY_CASE(MI_COPY_IN, 1): 7664 break; 7665 case MI_COPY_CASE(MI_COPY_OUT, 1): 7666 /* 7667 * The address has been copied out, so now 7668 * copyout the strbuf. 7669 */ 7670 mi_copyout(q, mp); 7671 return; 7672 case MI_COPY_CASE(MI_COPY_OUT, 2): 7673 /* 7674 * The address and strbuf have been copied out. 7675 * We're done, so just acknowledge the original 7676 * M_IOCTL. 7677 */ 7678 mi_copy_done(q, mp, 0); 7679 return; 7680 default: 7681 /* 7682 * Something strange has happened, so acknowledge 7683 * the original M_IOCTL with an EPROTO error. 7684 */ 7685 mi_copy_done(q, mp, EPROTO); 7686 return; 7687 } 7688 7689 /* 7690 * Now we have the strbuf structure for TI_GETMYNAME 7691 * and TI_GETPEERNAME. Next we copyout the requested 7692 * address and then we'll copyout the strbuf. 7693 */ 7694 STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr); 7695 addrlen = udp->udp_family == AF_INET ? sizeof (sin_t) : sizeof (sin6_t); 7696 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7697 mi_copy_done(q, mp, EINVAL); 7698 return; 7699 } 7700 7701 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7702 if (mp1 == NULL) 7703 return; 7704 7705 rw_enter(&udp->udp_rwlock, RW_READER); 7706 switch (iocp->ioc_cmd) { 7707 case TI_GETMYNAME: 7708 error = udp_getmyname(udp, (void *)mp1->b_rptr, &addrlen); 7709 break; 7710 case TI_GETPEERNAME: 7711 error = udp_getpeername(udp, (void *)mp1->b_rptr, &addrlen); 7712 break; 7713 } 7714 rw_exit(&udp->udp_rwlock); 7715 7716 if (error != 0) { 7717 mi_copy_done(q, mp, error); 7718 } else { 7719 mp1->b_wptr += addrlen; 7720 STRUCT_FSET(sb, len, addrlen); 7721 7722 /* Copy out the address */ 7723 mi_copyout(q, mp); 7724 } 7725 } 7726 7727 static int 7728 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7729 udpattrs_t *udpattrs) 7730 { 7731 struct T_unitdata_req *udreqp; 7732 int is_absreq_failure; 7733 cred_t *cr; 7734 conn_t *connp = Q_TO_CONN(q); 7735 7736 ASSERT(((t_primp_t)mp->b_rptr)->type); 7737 7738 cr = DB_CREDDEF(mp, connp->conn_cred); 7739 7740 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7741 7742 *errorp = tpi_optcom_buf(q, mp, &udreqp->OPT_length, 7743 udreqp->OPT_offset, cr, &udp_opt_obj, 7744 udpattrs, &is_absreq_failure); 7745 7746 if (*errorp != 0) { 7747 /* 7748 * Note: No special action needed in this 7749 * module for "is_absreq_failure" 7750 */ 7751 return (-1); /* failure */ 7752 } 7753 ASSERT(is_absreq_failure == 0); 7754 return (0); /* success */ 7755 } 7756 7757 void 7758 udp_ddi_init(void) 7759 { 7760 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7761 udp_opt_obj.odb_opt_arr_cnt); 7762 7763 /* 7764 * We want to be informed each time a stack is created or 7765 * destroyed in the kernel, so we can maintain the 7766 * set of udp_stack_t's. 7767 */ 7768 netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini); 7769 } 7770 7771 void 7772 udp_ddi_destroy(void) 7773 { 7774 netstack_unregister(NS_UDP); 7775 } 7776 7777 /* 7778 * Initialize the UDP stack instance. 7779 */ 7780 static void * 7781 udp_stack_init(netstackid_t stackid, netstack_t *ns) 7782 { 7783 udp_stack_t *us; 7784 udpparam_t *pa; 7785 int i; 7786 7787 us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP); 7788 us->us_netstack = ns; 7789 7790 us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 7791 us->us_epriv_ports[0] = 2049; 7792 us->us_epriv_ports[1] = 4045; 7793 7794 /* 7795 * The smallest anonymous port in the priviledged port range which UDP 7796 * looks for free port. Use in the option UDP_ANONPRIVBIND. 7797 */ 7798 us->us_min_anonpriv_port = 512; 7799 7800 us->us_bind_fanout_size = udp_bind_fanout_size; 7801 7802 /* Roundup variable that might have been modified in /etc/system */ 7803 if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) { 7804 /* Not a power of two. Round up to nearest power of two */ 7805 for (i = 0; i < 31; i++) { 7806 if (us->us_bind_fanout_size < (1 << i)) 7807 break; 7808 } 7809 us->us_bind_fanout_size = 1 << i; 7810 } 7811 us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size * 7812 sizeof (udp_fanout_t), KM_SLEEP); 7813 for (i = 0; i < us->us_bind_fanout_size; i++) { 7814 mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7815 NULL); 7816 } 7817 7818 pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP); 7819 7820 us->us_param_arr = pa; 7821 bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr)); 7822 7823 (void) udp_param_register(&us->us_nd, 7824 us->us_param_arr, A_CNT(udp_param_arr)); 7825 7826 us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics); 7827 us->us_mibkp = udp_kstat_init(stackid); 7828 return (us); 7829 } 7830 7831 /* 7832 * Free the UDP stack instance. 7833 */ 7834 static void 7835 udp_stack_fini(netstackid_t stackid, void *arg) 7836 { 7837 udp_stack_t *us = (udp_stack_t *)arg; 7838 int i; 7839 7840 for (i = 0; i < us->us_bind_fanout_size; i++) { 7841 mutex_destroy(&us->us_bind_fanout[i].uf_lock); 7842 } 7843 7844 kmem_free(us->us_bind_fanout, us->us_bind_fanout_size * 7845 sizeof (udp_fanout_t)); 7846 7847 us->us_bind_fanout = NULL; 7848 7849 nd_free(&us->us_nd); 7850 kmem_free(us->us_param_arr, sizeof (udp_param_arr)); 7851 us->us_param_arr = NULL; 7852 7853 udp_kstat_fini(stackid, us->us_mibkp); 7854 us->us_mibkp = NULL; 7855 7856 udp_kstat2_fini(stackid, us->us_kstat); 7857 us->us_kstat = NULL; 7858 bzero(&us->us_statistics, sizeof (us->us_statistics)); 7859 kmem_free(us, sizeof (*us)); 7860 } 7861 7862 static void * 7863 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp) 7864 { 7865 kstat_t *ksp; 7866 7867 udp_stat_t template = { 7868 { "udp_ip_send", KSTAT_DATA_UINT64 }, 7869 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 7870 { "udp_ire_null", KSTAT_DATA_UINT64 }, 7871 { "udp_drain", KSTAT_DATA_UINT64 }, 7872 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 7873 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 7874 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 7875 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 7876 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 7877 { "udp_out_opt", KSTAT_DATA_UINT64 }, 7878 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 7879 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 7880 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 7881 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 7882 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 7883 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 7884 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 7885 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 7886 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 7887 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 7888 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 7889 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 7890 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 7891 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 7892 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 7893 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 7894 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 7895 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 7896 #ifdef DEBUG 7897 { "udp_data_conn", KSTAT_DATA_UINT64 }, 7898 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 7899 #endif 7900 }; 7901 7902 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net", 7903 KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t), 7904 KSTAT_FLAG_VIRTUAL, stackid); 7905 7906 if (ksp == NULL) 7907 return (NULL); 7908 7909 bcopy(&template, us_statisticsp, sizeof (template)); 7910 ksp->ks_data = (void *)us_statisticsp; 7911 ksp->ks_private = (void *)(uintptr_t)stackid; 7912 7913 kstat_install(ksp); 7914 return (ksp); 7915 } 7916 7917 static void 7918 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp) 7919 { 7920 if (ksp != NULL) { 7921 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7922 kstat_delete_netstack(ksp, stackid); 7923 } 7924 } 7925 7926 static void * 7927 udp_kstat_init(netstackid_t stackid) 7928 { 7929 kstat_t *ksp; 7930 7931 udp_named_kstat_t template = { 7932 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 7933 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7934 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 7935 { "entrySize", KSTAT_DATA_INT32, 0 }, 7936 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7937 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7938 }; 7939 7940 ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2", 7941 KSTAT_TYPE_NAMED, 7942 NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid); 7943 7944 if (ksp == NULL || ksp->ks_data == NULL) 7945 return (NULL); 7946 7947 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7948 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7949 7950 bcopy(&template, ksp->ks_data, sizeof (template)); 7951 ksp->ks_update = udp_kstat_update; 7952 ksp->ks_private = (void *)(uintptr_t)stackid; 7953 7954 kstat_install(ksp); 7955 return (ksp); 7956 } 7957 7958 static void 7959 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp) 7960 { 7961 if (ksp != NULL) { 7962 ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private); 7963 kstat_delete_netstack(ksp, stackid); 7964 } 7965 } 7966 7967 static int 7968 udp_kstat_update(kstat_t *kp, int rw) 7969 { 7970 udp_named_kstat_t *udpkp; 7971 netstackid_t stackid = (netstackid_t)(uintptr_t)kp->ks_private; 7972 netstack_t *ns; 7973 udp_stack_t *us; 7974 7975 if ((kp == NULL) || (kp->ks_data == NULL)) 7976 return (EIO); 7977 7978 if (rw == KSTAT_WRITE) 7979 return (EACCES); 7980 7981 ns = netstack_find_by_stackid(stackid); 7982 if (ns == NULL) 7983 return (-1); 7984 us = ns->netstack_udp; 7985 if (us == NULL) { 7986 netstack_rele(ns); 7987 return (-1); 7988 } 7989 udpkp = (udp_named_kstat_t *)kp->ks_data; 7990 7991 udpkp->inDatagrams.value.ui64 = us->us_udp_mib.udpHCInDatagrams; 7992 udpkp->inErrors.value.ui32 = us->us_udp_mib.udpInErrors; 7993 udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams; 7994 udpkp->outErrors.value.ui32 = us->us_udp_mib.udpOutErrors; 7995 netstack_rele(ns); 7996 return (0); 7997 } 7998 7999 /* 8000 * Read-side synchronous stream info entry point, called as a 8001 * result of handling certain STREAMS ioctl operations. 8002 */ 8003 static int 8004 udp_rinfop(queue_t *q, infod_t *dp) 8005 { 8006 mblk_t *mp; 8007 uint_t cmd = dp->d_cmd; 8008 int res = 0; 8009 int error = 0; 8010 udp_t *udp = Q_TO_UDP(q); 8011 struct stdata *stp = STREAM(q); 8012 8013 mutex_enter(&udp->udp_drain_lock); 8014 /* If shutdown on read has happened, return nothing */ 8015 mutex_enter(&stp->sd_lock); 8016 if (stp->sd_flag & STREOF) { 8017 mutex_exit(&stp->sd_lock); 8018 goto done; 8019 } 8020 mutex_exit(&stp->sd_lock); 8021 8022 if ((mp = udp->udp_rcv_list_head) == NULL) 8023 goto done; 8024 8025 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8026 8027 if (cmd & INFOD_COUNT) { 8028 /* 8029 * Return the number of messages. 8030 */ 8031 dp->d_count += udp->udp_rcv_msgcnt; 8032 res |= INFOD_COUNT; 8033 } 8034 if (cmd & INFOD_BYTES) { 8035 /* 8036 * Return size of all data messages. 8037 */ 8038 dp->d_bytes += udp->udp_rcv_cnt; 8039 res |= INFOD_BYTES; 8040 } 8041 if (cmd & INFOD_FIRSTBYTES) { 8042 /* 8043 * Return size of first data message. 8044 */ 8045 dp->d_bytes = msgdsize(mp); 8046 res |= INFOD_FIRSTBYTES; 8047 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8048 } 8049 if (cmd & INFOD_COPYOUT) { 8050 mblk_t *mp1 = mp->b_cont; 8051 int n; 8052 /* 8053 * Return data contents of first message. 8054 */ 8055 ASSERT(DB_TYPE(mp1) == M_DATA); 8056 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8057 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8058 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8059 UIO_READ, dp->d_uiop)) != 0) { 8060 goto done; 8061 } 8062 mp1 = mp1->b_cont; 8063 } 8064 res |= INFOD_COPYOUT; 8065 dp->d_cmd &= ~INFOD_COPYOUT; 8066 } 8067 done: 8068 mutex_exit(&udp->udp_drain_lock); 8069 8070 dp->d_res |= res; 8071 8072 return (error); 8073 } 8074 8075 /* 8076 * Read-side synchronous stream entry point. This is called as a result 8077 * of recv/read operation done at sockfs, and is guaranteed to execute 8078 * outside of the interrupt thread context. It returns a single datagram 8079 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8080 */ 8081 static int 8082 udp_rrw(queue_t *q, struiod_t *dp) 8083 { 8084 mblk_t *mp; 8085 udp_t *udp = Q_TO_UDP(q); 8086 udp_stack_t *us = udp->udp_us; 8087 8088 /* 8089 * Dequeue datagram from the head of the list and return 8090 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8091 * set/cleared depending on whether or not there's data 8092 * remaining in the list. 8093 */ 8094 mutex_enter(&udp->udp_drain_lock); 8095 if (!udp->udp_direct_sockfs) { 8096 mutex_exit(&udp->udp_drain_lock); 8097 UDP_STAT(us, udp_rrw_busy); 8098 return (EBUSY); 8099 } 8100 if ((mp = udp->udp_rcv_list_head) != NULL) { 8101 uint_t size = msgdsize(mp); 8102 8103 /* Last datagram in the list? */ 8104 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8105 udp->udp_rcv_list_tail = NULL; 8106 mp->b_next = NULL; 8107 8108 udp->udp_rcv_cnt -= size; 8109 udp->udp_rcv_msgcnt--; 8110 UDP_STAT(us, udp_rrw_msgcnt); 8111 8112 /* No longer flow-controlling? */ 8113 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8114 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8115 udp->udp_drain_qfull = B_FALSE; 8116 } 8117 if (udp->udp_rcv_list_head == NULL) { 8118 /* 8119 * Either we just dequeued the last datagram or 8120 * we get here from sockfs and have nothing to 8121 * return; in this case clear RSLEEP. 8122 */ 8123 ASSERT(udp->udp_rcv_cnt == 0); 8124 ASSERT(udp->udp_rcv_msgcnt == 0); 8125 ASSERT(udp->udp_rcv_list_tail == NULL); 8126 STR_WAKEUP_CLEAR(STREAM(q)); 8127 } else { 8128 /* 8129 * More data follows; we need udp_rrw() to be 8130 * called in future to pick up the rest. 8131 */ 8132 STR_WAKEUP_SET(STREAM(q)); 8133 } 8134 mutex_exit(&udp->udp_drain_lock); 8135 dp->d_mp = mp; 8136 return (0); 8137 } 8138 8139 /* 8140 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8141 * list; this is typically executed within the interrupt thread context 8142 * and so we do things as quickly as possible. 8143 */ 8144 static void 8145 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8146 { 8147 ASSERT(q == RD(q)); 8148 ASSERT(pkt_len == msgdsize(mp)); 8149 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8150 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8151 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8152 8153 mutex_enter(&udp->udp_drain_lock); 8154 /* 8155 * Wake up and signal the receiving app; it is okay to do this 8156 * before enqueueing the mp because we are holding the drain lock. 8157 * One of the advantages of synchronous stream is the ability for 8158 * us to find out when the application performs a read on the 8159 * socket by way of udp_rrw() entry point being called. We need 8160 * to generate SIGPOLL/SIGIO for each received data in the case 8161 * of asynchronous socket just as in the strrput() case. However, 8162 * we only wake the application up when necessary, i.e. during the 8163 * first enqueue. When udp_rrw() is called, we send up a single 8164 * datagram upstream and call STR_WAKEUP_SET() again when there 8165 * are still data remaining in our receive queue. 8166 */ 8167 STR_WAKEUP_SENDSIG(STREAM(q), udp->udp_rcv_list_head); 8168 if (udp->udp_rcv_list_head == NULL) 8169 udp->udp_rcv_list_head = mp; 8170 else 8171 udp->udp_rcv_list_tail->b_next = mp; 8172 udp->udp_rcv_list_tail = mp; 8173 udp->udp_rcv_cnt += pkt_len; 8174 udp->udp_rcv_msgcnt++; 8175 8176 /* Need to flow-control? */ 8177 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8178 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8179 udp->udp_drain_qfull = B_TRUE; 8180 8181 mutex_exit(&udp->udp_drain_lock); 8182 } 8183 8184 /* 8185 * Drain the contents of receive list to the module upstream; we do 8186 * this during close or when we fallback to the slow mode due to 8187 * sockmod being popped or a module being pushed on top of us. 8188 */ 8189 static void 8190 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8191 { 8192 mblk_t *mp; 8193 udp_stack_t *us = udp->udp_us; 8194 8195 ASSERT(q == RD(q)); 8196 8197 mutex_enter(&udp->udp_drain_lock); 8198 /* 8199 * There is no race with a concurrent udp_input() sending 8200 * up packets using putnext() after we have cleared the 8201 * udp_direct_sockfs flag but before we have completed 8202 * sending up the packets in udp_rcv_list, since we are 8203 * either a writer or we have quiesced the conn. 8204 */ 8205 udp->udp_direct_sockfs = B_FALSE; 8206 mutex_exit(&udp->udp_drain_lock); 8207 8208 if (udp->udp_rcv_list_head != NULL) 8209 UDP_STAT(us, udp_drain); 8210 8211 /* 8212 * Send up everything via putnext(); note here that we 8213 * don't need the udp_drain_lock to protect us since 8214 * nothing can enter udp_rrw() and that we currently 8215 * have exclusive access to this udp. 8216 */ 8217 while ((mp = udp->udp_rcv_list_head) != NULL) { 8218 udp->udp_rcv_list_head = mp->b_next; 8219 mp->b_next = NULL; 8220 udp->udp_rcv_cnt -= msgdsize(mp); 8221 udp->udp_rcv_msgcnt--; 8222 if (closing) { 8223 freemsg(mp); 8224 } else { 8225 putnext(q, mp); 8226 } 8227 } 8228 ASSERT(udp->udp_rcv_cnt == 0); 8229 ASSERT(udp->udp_rcv_msgcnt == 0); 8230 ASSERT(udp->udp_rcv_list_head == NULL); 8231 udp->udp_rcv_list_tail = NULL; 8232 udp->udp_drain_qfull = B_FALSE; 8233 } 8234 8235 static size_t 8236 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8237 { 8238 udp_stack_t *us = udp->udp_us; 8239 8240 /* We add a bit of extra buffering */ 8241 size += size >> 1; 8242 if (size > us->us_max_buf) 8243 size = us->us_max_buf; 8244 8245 udp->udp_rcv_hiwat = size; 8246 return (size); 8247 } 8248 8249 /* 8250 * For the lower queue so that UDP can be a dummy mux. 8251 * Nobody should be sending 8252 * packets up this stream 8253 */ 8254 static void 8255 udp_lrput(queue_t *q, mblk_t *mp) 8256 { 8257 mblk_t *mp1; 8258 8259 switch (mp->b_datap->db_type) { 8260 case M_FLUSH: 8261 /* Turn around */ 8262 if (*mp->b_rptr & FLUSHW) { 8263 *mp->b_rptr &= ~FLUSHR; 8264 qreply(q, mp); 8265 return; 8266 } 8267 break; 8268 } 8269 /* Could receive messages that passed through ar_rput */ 8270 for (mp1 = mp; mp1; mp1 = mp1->b_cont) 8271 mp1->b_prev = mp1->b_next = NULL; 8272 freemsg(mp); 8273 } 8274 8275 /* 8276 * For the lower queue so that UDP can be a dummy mux. 8277 * Nobody should be sending packets down this stream. 8278 */ 8279 /* ARGSUSED */ 8280 void 8281 udp_lwput(queue_t *q, mblk_t *mp) 8282 { 8283 freemsg(mp); 8284 } 8285