1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP uses a combination of its internal perimeter, a global lock and 102 * a set of bind hash locks to protect its data structures. Please see 103 * the note above udp_mode_assertions for details about the internal 104 * perimeter. 105 * 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure. An UDP endpoint is removed from the bind hash list only 113 * when it is being unbound or being closed. The per bucket lock also 114 * protects a UDP endpoint's state changes. 115 * 116 * Plumbing notes: 117 * 118 * Both udp and ip are merged, but the streams plumbing is kept unchanged 119 * in that udp is always pushed atop /dev/ip. This is done to preserve 120 * backwards compatibility for certain applications which rely on such 121 * plumbing geometry to do things such as issuing I_POP on the stream 122 * in order to obtain direct access to /dev/ip, etc. 123 * 124 * All UDP processings happen in the /dev/ip instance; the udp module 125 * instance does not possess any state about the endpoint, and merely 126 * acts as a dummy module whose presence is to keep the streams plumbing 127 * appearance unchanged. At open time /dev/ip allocates a conn_t that 128 * happens to embed a udp_t. This stays dormant until the time udp is 129 * pushed, which indicates to /dev/ip that it must convert itself from 130 * an IP to a UDP endpoint. 131 * 132 * We only allow for the following plumbing cases: 133 * 134 * Normal: 135 * /dev/ip is first opened and later udp is pushed directly on top. 136 * This is the default action that happens when a udp socket or 137 * /dev/udp is opened. The conn_t created by /dev/ip instance is 138 * now shared and is marked with IPCL_UDP. 139 * 140 * SNMP-only: 141 * udp is pushed on top of a module other than /dev/ip. When this 142 * happens it will support only SNMP semantics. A new conn_t is 143 * allocated and marked with IPCL_UDPMOD. 144 * 145 * The above cases imply that we don't support any intermediate module to 146 * reside in between /dev/ip and udp -- in fact, we never supported such 147 * scenario in the past as the inter-layer communication semantics have 148 * always been private. Also note that the normal case allows for SNMP 149 * requests to be processed in addition to the rest of UDP operations. 150 * 151 * The normal case plumbing is depicted by the following diagram: 152 * 153 * +---------------+---------------+ 154 * | | | udp 155 * | udp_wq | udp_rq | 156 * | | UDP_RD | 157 * | | | 158 * +---------------+---------------+ 159 * | ^ 160 * v | 161 * +---------------+---------------+ 162 * | | | /dev/ip 163 * | ip_wq | ip_rq | conn_t 164 * | UDP_WR | | 165 * | | | 166 * +---------------+---------------+ 167 * 168 * Messages arriving at udp_wq from above will end up in ip_wq before 169 * it gets processed, i.e. udp write entry points will advance udp_wq 170 * and use its q_next value as ip_wq in order to use the conn_t that 171 * is stored in its q_ptr. Likewise, messages generated by ip to the 172 * module above udp will appear as if they are originated from udp_rq, 173 * i.e. putnext() calls to the module above udp is done using the 174 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 175 * nothing more than calling putnext(). 176 * 177 * The above implies the following rule of thumb: 178 * 179 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 180 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 181 * is no direct reference to conn_t from either udp_wq or udp_rq. 182 * 183 * 2. Write-side entry points of udp can obtain the conn_t via the 184 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 185 * 186 * 3. While in /dev/ip context, putnext() to the module above udp can 187 * be done by supplying the queue value obtained from UDP_RD(). 188 * 189 */ 190 191 static queue_t *UDP_WR(queue_t *); 192 static queue_t *UDP_RD(queue_t *); 193 194 udp_stat_t udp_statistics = { 195 { "udp_ip_send", KSTAT_DATA_UINT64 }, 196 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 197 { "udp_ire_null", KSTAT_DATA_UINT64 }, 198 { "udp_drain", KSTAT_DATA_UINT64 }, 199 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 200 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 201 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 202 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 203 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 204 { "udp_out_opt", KSTAT_DATA_UINT64 }, 205 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 206 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 207 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 208 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 217 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 218 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 219 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 220 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 221 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 222 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 223 #ifdef DEBUG 224 { "udp_data_conn", KSTAT_DATA_UINT64 }, 225 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 226 #endif 227 }; 228 229 static kstat_t *udp_ksp; 230 struct kmem_cache *udp_cache; 231 232 /* 233 * Bind hash list size and hash function. It has to be a power of 2 for 234 * hashing. 235 */ 236 #define UDP_BIND_FANOUT_SIZE 512 237 #define UDP_BIND_HASH(lport) \ 238 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 239 240 /* UDP bind fanout hash structure. */ 241 typedef struct udp_fanout_s { 242 udp_t *uf_udp; 243 kmutex_t uf_lock; 244 #if defined(_LP64) || defined(_I32LPx) 245 char uf_pad[48]; 246 #else 247 char uf_pad[56]; 248 #endif 249 } udp_fanout_t; 250 251 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 252 /* udp_fanout_t *udp_bind_fanout. */ 253 static udp_fanout_t *udp_bind_fanout; 254 255 /* 256 * This controls the rate some ndd info report functions can be used 257 * by non-privileged users. It stores the last time such info is 258 * requested. When those report functions are called again, this 259 * is checked with the current time and compare with the ndd param 260 * udp_ndd_get_info_interval. 261 */ 262 static clock_t udp_last_ndd_get_info_time; 263 #define NDD_TOO_QUICK_MSG \ 264 "ndd get info rate too high for non-privileged users, try again " \ 265 "later.\n" 266 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 267 268 /* Option processing attrs */ 269 typedef struct udpattrs_s { 270 union { 271 ip6_pkt_t *udpattr_ipp6; /* For V6 */ 272 ip4_pkt_t *udpattr_ipp4; /* For V4 */ 273 } udpattr_ippu; 274 #define udpattr_ipp6 udpattr_ippu.udpattr_ipp6 275 #define udpattr_ipp4 udpattr_ippu.udpattr_ipp4 276 mblk_t *udpattr_mb; 277 boolean_t udpattr_credset; 278 } udpattrs_t; 279 280 static void udp_addr_req(queue_t *q, mblk_t *mp); 281 static void udp_bind(queue_t *q, mblk_t *mp); 282 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 283 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 284 static int udp_build_hdrs(queue_t *q, udp_t *udp); 285 static void udp_capability_req(queue_t *q, mblk_t *mp); 286 static int udp_close(queue_t *q); 287 static void udp_connect(queue_t *q, mblk_t *mp); 288 static void udp_disconnect(queue_t *q, mblk_t *mp); 289 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 290 int sys_error); 291 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 292 t_scalar_t tlierr, int unixerr); 293 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 294 cred_t *cr); 295 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 296 char *value, caddr_t cp, cred_t *cr); 297 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 298 char *value, caddr_t cp, cred_t *cr); 299 static void udp_icmp_error(queue_t *q, mblk_t *mp); 300 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 301 static void udp_info_req(queue_t *q, mblk_t *mp); 302 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 303 t_scalar_t addr_length); 304 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 305 cred_t *credp); 306 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 307 int *errorp, udpattrs_t *udpattrs); 308 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 309 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 310 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 311 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 312 cred_t *cr); 313 static void udp_report_item(mblk_t *mp, udp_t *udp); 314 static void udp_rput(queue_t *q, mblk_t *mp); 315 static void udp_rput_other(queue_t *, mblk_t *); 316 static int udp_rinfop(queue_t *q, infod_t *dp); 317 static int udp_rrw(queue_t *q, struiod_t *dp); 318 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 319 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 320 cred_t *cr); 321 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 322 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 323 t_scalar_t destlen, t_scalar_t err); 324 static void udp_unbind(queue_t *q, mblk_t *mp); 325 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 326 boolean_t random); 327 static void udp_wput(queue_t *q, mblk_t *mp); 328 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 329 uint16_t port, uint_t srcid, int *error); 330 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 331 int *error); 332 static void udp_wput_other(queue_t *q, mblk_t *mp); 333 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 334 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 335 socklen_t addrlen); 336 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 337 338 static void udp_kstat_init(void); 339 static void udp_kstat_fini(void); 340 static int udp_kstat_update(kstat_t *kp, int rw); 341 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 342 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 343 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 344 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 345 346 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 347 uint_t pkt_len); 348 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 349 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 350 static void udp_exit(conn_t *); 351 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 352 #ifdef DEBUG 353 static void udp_mode_assertions(udp_t *, int); 354 #endif /* DEBUG */ 355 356 major_t UDP6_MAJ; 357 #define UDP6 "udp6" 358 359 #define UDP_RECV_HIWATER (56 * 1024) 360 #define UDP_RECV_LOWATER 128 361 #define UDP_XMIT_HIWATER (56 * 1024) 362 #define UDP_XMIT_LOWATER 1024 363 364 static struct module_info udp_info = { 365 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 366 }; 367 368 static struct qinit udp_rinit = { 369 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 370 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 371 }; 372 373 static struct qinit udp_winit = { 374 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 375 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 376 }; 377 378 static struct qinit winit = { 379 (pfi_t)putnext, NULL, NULL, NULL, NULL, 380 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 381 }; 382 383 /* Support for just SNMP if UDP is not pushed directly over device IP */ 384 struct qinit udp_snmp_rinit = { 385 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 386 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 387 }; 388 389 struct qinit udp_snmp_winit = { 390 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 391 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 392 }; 393 394 struct streamtab udpinfo = { 395 &udp_rinit, &winit 396 }; 397 398 static sin_t sin_null; /* Zero address for quick clears */ 399 static sin6_t sin6_null; /* Zero address for quick clears */ 400 401 /* Hint not protected by any lock */ 402 static in_port_t udp_g_next_port_to_try; 403 404 /* 405 * Extra privileged ports. In host byte order. 406 */ 407 #define UDP_NUM_EPRIV_PORTS 64 408 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 409 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 410 411 /* Only modified during _init and _fini thus no locking is needed. */ 412 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 413 414 /* MIB-2 stuff for SNMP */ 415 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 416 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 417 418 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 419 420 /* Default structure copied into T_INFO_ACK messages */ 421 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 422 T_INFO_ACK, 423 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 424 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 425 T_INVALID, /* CDATA_size. udp does not support connect data. */ 426 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 427 sizeof (sin_t), /* ADDR_size. */ 428 0, /* OPT_size - not initialized here */ 429 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 430 T_CLTS, /* SERV_type. udp supports connection-less. */ 431 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 432 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 433 }; 434 435 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 436 437 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 438 T_INFO_ACK, 439 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 440 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 441 T_INVALID, /* CDATA_size. udp does not support connect data. */ 442 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 443 sizeof (sin6_t), /* ADDR_size. */ 444 0, /* OPT_size - not initialized here */ 445 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 446 T_CLTS, /* SERV_type. udp supports connection-less. */ 447 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 448 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 449 }; 450 451 /* largest UDP port number */ 452 #define UDP_MAX_PORT 65535 453 454 /* 455 * Table of ND variables supported by udp. These are loaded into udp_g_nd 456 * in udp_open. 457 * All of these are alterable, within the min/max values given, at run time. 458 */ 459 /* BEGIN CSTYLED */ 460 udpparam_t udp_param_arr[] = { 461 /*min max value name */ 462 { 0L, 256, 32, "udp_wroff_extra" }, 463 { 1L, 255, 255, "udp_ipv4_ttl" }, 464 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 465 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 466 { 0, 1, 1, "udp_do_checksum" }, 467 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 468 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 469 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 470 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 471 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 472 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 473 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 474 }; 475 /* END CSTYLED */ 476 477 /* 478 * The smallest anonymous port in the privileged port range which UDP 479 * looks for free port. Use in the option UDP_ANONPRIVBIND. 480 */ 481 static in_port_t udp_min_anonpriv_port = 512; 482 483 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 484 uint32_t udp_random_anon_port = 1; 485 486 /* 487 * Hook functions to enable cluster networking. 488 * On non-clustered systems these vectors must always be NULL 489 */ 490 491 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 492 uint8_t *laddrp, in_port_t lport) = NULL; 493 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 494 uint8_t *laddrp, in_port_t lport) = NULL; 495 496 typedef union T_primitives *t_primp_t; 497 498 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 499 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 500 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 501 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 502 (mp)->b_prev = (mblk_t *)proc; \ 503 if ((udp)->udp_mphead == NULL) \ 504 (udp)->udp_mphead = (mp); \ 505 else \ 506 (udp)->udp_mptail->b_next = (mp); \ 507 (udp)->udp_mptail = (mp); \ 508 (udp)->udp_mpcount++; \ 509 } 510 511 #define UDP_READERS_INCREF(udp) { \ 512 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 513 (udp)->udp_reader_count++; \ 514 } 515 516 #define UDP_READERS_DECREF(udp) { \ 517 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 518 (udp)->udp_reader_count--; \ 519 if ((udp)->udp_reader_count == 0) \ 520 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 521 } 522 523 #define UDP_SQUEUE_DECREF(udp) { \ 524 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 525 (udp)->udp_squeue_count--; \ 526 if ((udp)->udp_squeue_count == 0) \ 527 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 528 } 529 530 /* 531 * Notes on UDP endpoint synchronization: 532 * 533 * UDP needs exclusive operation on a per endpoint basis, when executing 534 * functions that modify the endpoint state. udp_rput_other() deals with 535 * packets with IP options, and processing these packets end up having 536 * to update the endpoint's option related state. udp_wput_other() deals 537 * with control operations from the top, e.g. connect() that needs to 538 * update the endpoint state. These could be synchronized using locks, 539 * but the current version uses squeues for this purpose. squeues may 540 * give performance improvement for certain cases such as connected UDP 541 * sockets; thus the framework allows for using squeues. 542 * 543 * The perimeter routines are described as follows: 544 * 545 * udp_enter(): 546 * Enter the UDP endpoint perimeter. 547 * 548 * udp_become_writer(): 549 * Become exclusive on the UDP endpoint. Specifies a function 550 * that will be called exclusively either immediately or later 551 * when the perimeter is available exclusively. 552 * 553 * udp_exit(): 554 * Exit the UDP perimeter. 555 * 556 * Entering UDP from the top or from the bottom must be done using 557 * udp_enter(). No lock must be held while attempting to enter the UDP 558 * perimeter. When finished, udp_exit() must be called to get out of 559 * the perimeter. 560 * 561 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 562 * multiple threads may enter a UDP endpoint concurrently. This is used 563 * for sending and/or receiving normal data. Control operations and other 564 * special cases call udp_become_writer() to become exclusive on a per 565 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 566 * by definition serializes access to the conn_t. When there are no more 567 * pending messages on the squeue for the UDP connection, the endpoint 568 * reverts to MT_HOT mode. During the interregnum when not all MT threads 569 * of an endpoint have finished, messages are queued in the UDP endpoint 570 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 571 * 572 * These modes have the following analogs: 573 * 574 * UDP_MT_HOT/udp_reader_count==0 none 575 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 576 * UDP_MT_QUEUED RW_WRITE_WANTED 577 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 578 * 579 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 580 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 581 * 582 * While in stable modes, UDP keeps track of the number of threads 583 * operating on the endpoint. The udp_reader_count variable represents 584 * the number of threads entering the endpoint as readers while it is 585 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 586 * is only a single reader, i.e. when this counter drops to 1. Likewise, 587 * udp_squeue_count represents the number of threads operating on the 588 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 589 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 590 * when this counter drops to 0. 591 * 592 * The default mode is set to UDP_MT_HOT and UDP alternates between 593 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 594 * 595 * Mode transition: 596 * ---------------------------------------------------------------- 597 * old mode Event New mode 598 * ---------------------------------------------------------------- 599 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 600 * and udp_reader_count == 1 601 * 602 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 603 * and udp_reader_count > 1 604 * 605 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 606 * 607 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 608 * internal UDP queue successfully 609 * moved to squeue AND udp_squeue_count != 0 610 * 611 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 612 * internal UDP queue successfully 613 * moved to squeue AND udp_squeue_count 614 * drops to zero 615 * 616 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 617 * ---------------------------------------------------------------- 618 */ 619 620 static queue_t * 621 UDP_WR(queue_t *q) 622 { 623 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 624 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 625 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 626 627 return (_WR(q)->q_next); 628 } 629 630 static queue_t * 631 UDP_RD(queue_t *q) 632 { 633 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 634 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 635 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 636 637 return (_RD(q)->q_next); 638 } 639 640 #ifdef DEBUG 641 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 642 #else 643 #define UDP_MODE_ASSERTIONS(udp, caller) 644 #endif 645 646 /* Invariants */ 647 #ifdef DEBUG 648 649 uint32_t udp_count[4]; 650 651 /* Context of udp_mode_assertions */ 652 #define UDP_ENTER 1 653 #define UDP_BECOME_WRITER 2 654 #define UDP_EXIT 3 655 656 static void 657 udp_mode_assertions(udp_t *udp, int caller) 658 { 659 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 660 661 switch (udp->udp_mode) { 662 case UDP_MT_HOT: 663 /* 664 * Messages have not yet been enqueued on the internal queue, 665 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 666 * by definition, there can't be any messages enqueued on the 667 * squeue. The UDP could be quiescent, so udp_reader_count 668 * could be zero at entry. 669 */ 670 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 671 udp->udp_squeue_count == 0); 672 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 673 udp_count[0]++; 674 break; 675 676 case UDP_MT_QUEUED: 677 /* 678 * The last MT thread to exit the udp perimeter empties the 679 * internal queue and then switches the UDP to 680 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 681 * mode, it means there must be at least 1 MT thread still in 682 * the perimeter and at least 1 message on the internal queue. 683 */ 684 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 685 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 686 udp_count[1]++; 687 break; 688 689 case UDP_QUEUED_SQUEUE: 690 /* 691 * The switch has happened from MT to SQUEUE. So there can't 692 * any MT threads. Messages could still pile up on the internal 693 * queue until the transition is complete and we move to 694 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 695 * since the squeue could drain any time. 696 */ 697 ASSERT(udp->udp_reader_count == 0); 698 udp_count[2]++; 699 break; 700 701 case UDP_SQUEUE: 702 /* 703 * The transition is complete. Thre can't be any messages on 704 * the internal queue. The udp could be quiescent or the squeue 705 * could drain any time, so we can't assert on nonzero 706 * udp_squeue_count during entry. Nor can we assert that 707 * udp_reader_count is zero, since, a reader thread could have 708 * directly become writer in line by calling udp_become_writer 709 * without going through the queued states. 710 */ 711 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 712 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 713 udp_count[3]++; 714 break; 715 } 716 } 717 #endif 718 719 #define _UDP_ENTER(connp, mp, proc, tag) { \ 720 udp_t *_udp = (connp)->conn_udp; \ 721 \ 722 mutex_enter(&(connp)->conn_lock); \ 723 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 724 mutex_exit(&(connp)->conn_lock); \ 725 freemsg(mp); \ 726 } else { \ 727 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 728 \ 729 switch (_udp->udp_mode) { \ 730 case UDP_MT_HOT: \ 731 /* We can execute as reader right away. */ \ 732 UDP_READERS_INCREF(_udp); \ 733 mutex_exit(&(connp)->conn_lock); \ 734 (*(proc))(connp, mp, (connp)->conn_sqp); \ 735 break; \ 736 \ 737 case UDP_SQUEUE: \ 738 /* \ 739 * We are in squeue mode, send the \ 740 * packet to the squeue \ 741 */ \ 742 _udp->udp_squeue_count++; \ 743 CONN_INC_REF_LOCKED(connp); \ 744 mutex_exit(&(connp)->conn_lock); \ 745 squeue_enter((connp)->conn_sqp, mp, proc, \ 746 connp, tag); \ 747 break; \ 748 \ 749 case UDP_MT_QUEUED: \ 750 case UDP_QUEUED_SQUEUE: \ 751 /* \ 752 * Some messages may have been enqueued \ 753 * ahead of us. Enqueue the new message \ 754 * at the tail of the internal queue to \ 755 * preserve message ordering. \ 756 */ \ 757 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 758 mutex_exit(&(connp)->conn_lock); \ 759 break; \ 760 } \ 761 } \ 762 } 763 764 static void 765 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 766 { 767 _UDP_ENTER(connp, mp, proc, tag); 768 } 769 770 static void 771 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 772 { 773 udp_t *udp; 774 775 udp = connp->conn_udp; 776 777 mutex_enter(&connp->conn_lock); 778 779 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 780 781 switch (udp->udp_mode) { 782 case UDP_MT_HOT: 783 if (udp->udp_reader_count == 1) { 784 /* 785 * We are the only MT thread. Switch to squeue mode 786 * immediately. 787 */ 788 udp->udp_mode = UDP_SQUEUE; 789 udp->udp_squeue_count = 1; 790 CONN_INC_REF_LOCKED(connp); 791 mutex_exit(&connp->conn_lock); 792 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 793 return; 794 } 795 /* FALLTHRU */ 796 797 case UDP_MT_QUEUED: 798 /* Enqueue the packet internally in UDP */ 799 udp->udp_mode = UDP_MT_QUEUED; 800 UDP_ENQUEUE_MP(udp, mp, proc, tag); 801 mutex_exit(&connp->conn_lock); 802 return; 803 804 case UDP_SQUEUE: 805 case UDP_QUEUED_SQUEUE: 806 /* 807 * We are already exclusive. i.e. we are already 808 * writer. Simply call the desired function. 809 */ 810 udp->udp_squeue_count++; 811 mutex_exit(&connp->conn_lock); 812 (*proc)(connp, mp, connp->conn_sqp); 813 return; 814 } 815 } 816 817 /* 818 * Transition from MT mode to SQUEUE mode, when the last MT thread 819 * is exiting the UDP perimeter. Move all messages from the internal 820 * udp queue to the squeue. A better way would be to move all the 821 * messages in one shot, this needs more support from the squeue framework 822 */ 823 static void 824 udp_switch_to_squeue(udp_t *udp) 825 { 826 mblk_t *mp; 827 mblk_t *mp_next; 828 sqproc_t proc; 829 uint8_t tag; 830 conn_t *connp = udp->udp_connp; 831 832 ASSERT(MUTEX_HELD(&connp->conn_lock)); 833 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 834 while (udp->udp_mphead != NULL) { 835 mp = udp->udp_mphead; 836 udp->udp_mphead = NULL; 837 udp->udp_mptail = NULL; 838 udp->udp_mpcount = 0; 839 udp->udp_mode = UDP_QUEUED_SQUEUE; 840 mutex_exit(&connp->conn_lock); 841 /* 842 * It is best not to hold any locks across the calls 843 * to squeue functions. Since we drop the lock we 844 * need to go back and check the udp_mphead once again 845 * after the squeue_fill and hence the while loop at 846 * the top of this function 847 */ 848 for (; mp != NULL; mp = mp_next) { 849 mp_next = mp->b_next; 850 proc = (sqproc_t)mp->b_prev; 851 tag = (uint8_t)((uintptr_t)mp->b_queue); 852 mp->b_next = NULL; 853 mp->b_prev = NULL; 854 mp->b_queue = NULL; 855 CONN_INC_REF(connp); 856 udp->udp_squeue_count++; 857 squeue_fill(connp->conn_sqp, mp, proc, connp, 858 tag); 859 } 860 mutex_enter(&connp->conn_lock); 861 } 862 /* 863 * udp_squeue_count of zero implies that the squeue has drained 864 * even before we arrived here (i.e. after the squeue_fill above) 865 */ 866 udp->udp_mode = (udp->udp_squeue_count != 0) ? 867 UDP_SQUEUE : UDP_MT_HOT; 868 } 869 870 #define _UDP_EXIT(connp) { \ 871 udp_t *_udp = (connp)->conn_udp; \ 872 \ 873 mutex_enter(&(connp)->conn_lock); \ 874 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 875 \ 876 switch (_udp->udp_mode) { \ 877 case UDP_MT_HOT: \ 878 UDP_READERS_DECREF(_udp); \ 879 mutex_exit(&(connp)->conn_lock); \ 880 break; \ 881 \ 882 case UDP_SQUEUE: \ 883 UDP_SQUEUE_DECREF(_udp); \ 884 if (_udp->udp_squeue_count == 0) \ 885 _udp->udp_mode = UDP_MT_HOT; \ 886 mutex_exit(&(connp)->conn_lock); \ 887 break; \ 888 \ 889 case UDP_MT_QUEUED: \ 890 /* \ 891 * If this is the last MT thread, we need to \ 892 * switch to squeue mode \ 893 */ \ 894 UDP_READERS_DECREF(_udp); \ 895 if (_udp->udp_reader_count == 0) \ 896 udp_switch_to_squeue(_udp); \ 897 mutex_exit(&(connp)->conn_lock); \ 898 break; \ 899 \ 900 case UDP_QUEUED_SQUEUE: \ 901 UDP_SQUEUE_DECREF(_udp); \ 902 /* \ 903 * Even if the udp_squeue_count drops to zero, we \ 904 * don't want to change udp_mode to UDP_MT_HOT here. \ 905 * The thread in udp_switch_to_squeue will take care \ 906 * of the transition to UDP_MT_HOT, after emptying \ 907 * any more new messages that have been enqueued in \ 908 * udp_mphead. \ 909 */ \ 910 mutex_exit(&(connp)->conn_lock); \ 911 break; \ 912 } \ 913 } 914 915 static void 916 udp_exit(conn_t *connp) 917 { 918 _UDP_EXIT(connp); 919 } 920 921 /* 922 * Return the next anonymous port in the privileged port range for 923 * bind checking. 924 * 925 * Trusted Extension (TX) notes: TX allows administrator to mark or 926 * reserve ports as Multilevel ports (MLP). MLP has special function 927 * on TX systems. Once a port is made MLP, it's not available as 928 * ordinary port. This creates "holes" in the port name space. It 929 * may be necessary to skip the "holes" find a suitable anon port. 930 */ 931 static in_port_t 932 udp_get_next_priv_port(udp_t *udp) 933 { 934 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 935 in_port_t nextport; 936 boolean_t restart = B_FALSE; 937 938 retry: 939 if (next_priv_port < udp_min_anonpriv_port || 940 next_priv_port >= IPPORT_RESERVED) { 941 next_priv_port = IPPORT_RESERVED - 1; 942 if (restart) 943 return (0); 944 restart = B_TRUE; 945 } 946 947 if (is_system_labeled() && 948 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 949 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 950 next_priv_port = nextport; 951 goto retry; 952 } 953 954 return (next_priv_port--); 955 } 956 957 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 958 /* ARGSUSED */ 959 static int 960 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 961 { 962 udp_fanout_t *udpf; 963 int i; 964 zoneid_t zoneid; 965 conn_t *connp; 966 udp_t *udp; 967 968 connp = Q_TO_CONN(q); 969 udp = connp->conn_udp; 970 971 /* Refer to comments in udp_status_report(). */ 972 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 973 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 974 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 975 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 976 return (0); 977 } 978 } 979 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 980 /* The following may work even if we cannot get a large buf. */ 981 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 982 return (0); 983 } 984 985 (void) mi_mpprintf(mp, 986 "UDP " MI_COL_HDRPAD_STR 987 /* 12345678[89ABCDEF] */ 988 " zone lport src addr dest addr port state"); 989 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 990 991 zoneid = connp->conn_zoneid; 992 993 for (i = 0; i < udp_bind_fanout_size; i++) { 994 udpf = &udp_bind_fanout[i]; 995 mutex_enter(&udpf->uf_lock); 996 997 /* Print the hash index. */ 998 udp = udpf->uf_udp; 999 if (zoneid != GLOBAL_ZONEID) { 1000 /* skip to first entry in this zone; might be none */ 1001 while (udp != NULL && 1002 udp->udp_connp->conn_zoneid != zoneid) 1003 udp = udp->udp_bind_hash; 1004 } 1005 if (udp != NULL) { 1006 uint_t print_len, buf_len; 1007 1008 buf_len = mp->b_cont->b_datap->db_lim - 1009 mp->b_cont->b_wptr; 1010 print_len = snprintf((char *)mp->b_cont->b_wptr, 1011 buf_len, "%d\n", i); 1012 if (print_len < buf_len) { 1013 mp->b_cont->b_wptr += print_len; 1014 } else { 1015 mp->b_cont->b_wptr += buf_len; 1016 } 1017 for (; udp != NULL; udp = udp->udp_bind_hash) { 1018 if (zoneid == GLOBAL_ZONEID || 1019 zoneid == udp->udp_connp->conn_zoneid) 1020 udp_report_item(mp->b_cont, udp); 1021 } 1022 } 1023 mutex_exit(&udpf->uf_lock); 1024 } 1025 udp_last_ndd_get_info_time = ddi_get_lbolt(); 1026 return (0); 1027 } 1028 1029 /* 1030 * Hash list removal routine for udp_t structures. 1031 */ 1032 static void 1033 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 1034 { 1035 udp_t *udpnext; 1036 kmutex_t *lockp; 1037 1038 if (udp->udp_ptpbhn == NULL) 1039 return; 1040 1041 /* 1042 * Extract the lock pointer in case there are concurrent 1043 * hash_remove's for this instance. 1044 */ 1045 ASSERT(udp->udp_port != 0); 1046 if (!caller_holds_lock) { 1047 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1048 ASSERT(lockp != NULL); 1049 mutex_enter(lockp); 1050 } 1051 if (udp->udp_ptpbhn != NULL) { 1052 udpnext = udp->udp_bind_hash; 1053 if (udpnext != NULL) { 1054 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1055 udp->udp_bind_hash = NULL; 1056 } 1057 *udp->udp_ptpbhn = udpnext; 1058 udp->udp_ptpbhn = NULL; 1059 } 1060 if (!caller_holds_lock) { 1061 mutex_exit(lockp); 1062 } 1063 } 1064 1065 static void 1066 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1067 { 1068 udp_t **udpp; 1069 udp_t *udpnext; 1070 1071 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1072 if (udp->udp_ptpbhn != NULL) { 1073 udp_bind_hash_remove(udp, B_TRUE); 1074 } 1075 udpp = &uf->uf_udp; 1076 udpnext = udpp[0]; 1077 if (udpnext != NULL) { 1078 /* 1079 * If the new udp bound to the INADDR_ANY address 1080 * and the first one in the list is not bound to 1081 * INADDR_ANY we skip all entries until we find the 1082 * first one bound to INADDR_ANY. 1083 * This makes sure that applications binding to a 1084 * specific address get preference over those binding to 1085 * INADDR_ANY. 1086 */ 1087 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1088 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1089 while ((udpnext = udpp[0]) != NULL && 1090 !V6_OR_V4_INADDR_ANY( 1091 udpnext->udp_bound_v6src)) { 1092 udpp = &(udpnext->udp_bind_hash); 1093 } 1094 if (udpnext != NULL) 1095 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1096 } else { 1097 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1098 } 1099 } 1100 udp->udp_bind_hash = udpnext; 1101 udp->udp_ptpbhn = udpp; 1102 udpp[0] = udp; 1103 } 1104 1105 /* 1106 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1107 * passed to udp_wput. 1108 * It associates a port number and local address with the stream. 1109 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1110 * protocol type (IPPROTO_UDP) placed in the message following the address. 1111 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1112 * (Called as writer.) 1113 * 1114 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1115 * without setting SO_REUSEADDR. This is needed so that they 1116 * can be viewed as two independent transport protocols. 1117 * However, anonymouns ports are allocated from the same range to avoid 1118 * duplicating the udp_g_next_port_to_try. 1119 */ 1120 static void 1121 udp_bind(queue_t *q, mblk_t *mp) 1122 { 1123 sin_t *sin; 1124 sin6_t *sin6; 1125 mblk_t *mp1; 1126 in_port_t port; /* Host byte order */ 1127 in_port_t requested_port; /* Host byte order */ 1128 struct T_bind_req *tbr; 1129 int count; 1130 in6_addr_t v6src; 1131 boolean_t bind_to_req_port_only; 1132 int loopmax; 1133 udp_fanout_t *udpf; 1134 in_port_t lport; /* Network byte order */ 1135 zoneid_t zoneid; 1136 conn_t *connp; 1137 udp_t *udp; 1138 boolean_t is_inaddr_any; 1139 mlp_type_t addrtype, mlptype; 1140 1141 connp = Q_TO_CONN(q); 1142 udp = connp->conn_udp; 1143 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1144 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1145 "udp_bind: bad req, len %u", 1146 (uint_t)(mp->b_wptr - mp->b_rptr)); 1147 udp_err_ack(q, mp, TPROTO, 0); 1148 return; 1149 } 1150 1151 if (udp->udp_state != TS_UNBND) { 1152 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1153 "udp_bind: bad state, %u", udp->udp_state); 1154 udp_err_ack(q, mp, TOUTSTATE, 0); 1155 return; 1156 } 1157 /* 1158 * Reallocate the message to make sure we have enough room for an 1159 * address and the protocol type. 1160 */ 1161 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1162 if (!mp1) { 1163 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1164 return; 1165 } 1166 1167 mp = mp1; 1168 tbr = (struct T_bind_req *)mp->b_rptr; 1169 switch (tbr->ADDR_length) { 1170 case 0: /* Request for a generic port */ 1171 tbr->ADDR_offset = sizeof (struct T_bind_req); 1172 if (udp->udp_family == AF_INET) { 1173 tbr->ADDR_length = sizeof (sin_t); 1174 sin = (sin_t *)&tbr[1]; 1175 *sin = sin_null; 1176 sin->sin_family = AF_INET; 1177 mp->b_wptr = (uchar_t *)&sin[1]; 1178 } else { 1179 ASSERT(udp->udp_family == AF_INET6); 1180 tbr->ADDR_length = sizeof (sin6_t); 1181 sin6 = (sin6_t *)&tbr[1]; 1182 *sin6 = sin6_null; 1183 sin6->sin6_family = AF_INET6; 1184 mp->b_wptr = (uchar_t *)&sin6[1]; 1185 } 1186 port = 0; 1187 break; 1188 1189 case sizeof (sin_t): /* Complete IPv4 address */ 1190 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1191 sizeof (sin_t)); 1192 if (sin == NULL || !OK_32PTR((char *)sin)) { 1193 udp_err_ack(q, mp, TSYSERR, EINVAL); 1194 return; 1195 } 1196 if (udp->udp_family != AF_INET || 1197 sin->sin_family != AF_INET) { 1198 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1199 return; 1200 } 1201 port = ntohs(sin->sin_port); 1202 break; 1203 1204 case sizeof (sin6_t): /* complete IPv6 address */ 1205 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1206 sizeof (sin6_t)); 1207 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1208 udp_err_ack(q, mp, TSYSERR, EINVAL); 1209 return; 1210 } 1211 if (udp->udp_family != AF_INET6 || 1212 sin6->sin6_family != AF_INET6) { 1213 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1214 return; 1215 } 1216 port = ntohs(sin6->sin6_port); 1217 break; 1218 1219 default: /* Invalid request */ 1220 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1221 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1222 udp_err_ack(q, mp, TBADADDR, 0); 1223 return; 1224 } 1225 1226 requested_port = port; 1227 1228 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1229 bind_to_req_port_only = B_FALSE; 1230 else /* T_BIND_REQ and requested_port != 0 */ 1231 bind_to_req_port_only = B_TRUE; 1232 1233 if (requested_port == 0) { 1234 /* 1235 * If the application passed in zero for the port number, it 1236 * doesn't care which port number we bind to. Get one in the 1237 * valid range. 1238 */ 1239 if (udp->udp_anon_priv_bind) { 1240 port = udp_get_next_priv_port(udp); 1241 } else { 1242 port = udp_update_next_port(udp, 1243 udp_g_next_port_to_try, B_TRUE); 1244 } 1245 } else { 1246 /* 1247 * If the port is in the well-known privileged range, 1248 * make sure the caller was privileged. 1249 */ 1250 int i; 1251 boolean_t priv = B_FALSE; 1252 1253 if (port < udp_smallest_nonpriv_port) { 1254 priv = B_TRUE; 1255 } else { 1256 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1257 if (port == udp_g_epriv_ports[i]) { 1258 priv = B_TRUE; 1259 break; 1260 } 1261 } 1262 } 1263 1264 if (priv) { 1265 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1266 1267 if (secpolicy_net_privaddr(cr, port) != 0) { 1268 udp_err_ack(q, mp, TACCES, 0); 1269 return; 1270 } 1271 } 1272 } 1273 1274 if (port == 0) { 1275 udp_err_ack(q, mp, TNOADDR, 0); 1276 return; 1277 } 1278 1279 /* 1280 * Copy the source address into our udp structure. This address 1281 * may still be zero; if so, IP will fill in the correct address 1282 * each time an outbound packet is passed to it. 1283 */ 1284 if (udp->udp_family == AF_INET) { 1285 ASSERT(sin != NULL); 1286 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1287 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1288 udp->udp_ip_snd_options_len; 1289 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1290 } else { 1291 ASSERT(sin6 != NULL); 1292 v6src = sin6->sin6_addr; 1293 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1294 udp->udp_ipversion = IPV4_VERSION; 1295 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1296 UDPH_SIZE + udp->udp_ip_snd_options_len; 1297 } else { 1298 udp->udp_ipversion = IPV6_VERSION; 1299 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1300 } 1301 } 1302 1303 /* 1304 * If udp_reuseaddr is not set, then we have to make sure that 1305 * the IP address and port number the application requested 1306 * (or we selected for the application) is not being used by 1307 * another stream. If another stream is already using the 1308 * requested IP address and port, the behavior depends on 1309 * "bind_to_req_port_only". If set the bind fails; otherwise we 1310 * search for any an unused port to bind to the the stream. 1311 * 1312 * As per the BSD semantics, as modified by the Deering multicast 1313 * changes, if udp_reuseaddr is set, then we allow multiple binds 1314 * to the same port independent of the local IP address. 1315 * 1316 * This is slightly different than in SunOS 4.X which did not 1317 * support IP multicast. Note that the change implemented by the 1318 * Deering multicast code effects all binds - not only binding 1319 * to IP multicast addresses. 1320 * 1321 * Note that when binding to port zero we ignore SO_REUSEADDR in 1322 * order to guarantee a unique port. 1323 */ 1324 1325 count = 0; 1326 if (udp->udp_anon_priv_bind) { 1327 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1328 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1329 } else { 1330 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1331 } 1332 1333 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1334 zoneid = connp->conn_zoneid; 1335 1336 for (;;) { 1337 udp_t *udp1; 1338 boolean_t found_exclbind = B_FALSE; 1339 1340 /* 1341 * Walk through the list of udp streams bound to 1342 * requested port with the same IP address. 1343 */ 1344 lport = htons(port); 1345 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1346 mutex_enter(&udpf->uf_lock); 1347 for (udp1 = udpf->uf_udp; udp1 != NULL; 1348 udp1 = udp1->udp_bind_hash) { 1349 if (lport != udp1->udp_port) 1350 continue; 1351 1352 /* 1353 * On a labeled system, we must treat bindings to ports 1354 * on shared IP addresses by sockets with MAC exemption 1355 * privilege as being in all zones, as there's 1356 * otherwise no way to identify the right receiver. 1357 */ 1358 if (zoneid != udp1->udp_connp->conn_zoneid && 1359 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1360 continue; 1361 1362 /* 1363 * If UDP_EXCLBIND is set for either the bound or 1364 * binding endpoint, the semantics of bind 1365 * is changed according to the following chart. 1366 * 1367 * spec = specified address (v4 or v6) 1368 * unspec = unspecified address (v4 or v6) 1369 * A = specified addresses are different for endpoints 1370 * 1371 * bound bind to allowed? 1372 * ------------------------------------- 1373 * unspec unspec no 1374 * unspec spec no 1375 * spec unspec no 1376 * spec spec yes if A 1377 * 1378 * For labeled systems, SO_MAC_EXEMPT behaves the same 1379 * as UDP_EXCLBIND, except that zoneid is ignored. 1380 */ 1381 if (udp1->udp_exclbind || udp->udp_exclbind || 1382 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1383 if (V6_OR_V4_INADDR_ANY( 1384 udp1->udp_bound_v6src) || 1385 is_inaddr_any || 1386 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1387 &v6src)) { 1388 found_exclbind = B_TRUE; 1389 break; 1390 } 1391 continue; 1392 } 1393 1394 /* 1395 * Check ipversion to allow IPv4 and IPv6 sockets to 1396 * have disjoint port number spaces. 1397 */ 1398 if (udp->udp_ipversion != udp1->udp_ipversion) { 1399 1400 /* 1401 * On the first time through the loop, if the 1402 * the user intentionally specified a 1403 * particular port number, then ignore any 1404 * bindings of the other protocol that may 1405 * conflict. This allows the user to bind IPv6 1406 * alone and get both v4 and v6, or bind both 1407 * both and get each seperately. On subsequent 1408 * times through the loop, we're checking a 1409 * port that we chose (not the user) and thus 1410 * we do not allow casual duplicate bindings. 1411 */ 1412 if (count == 0 && requested_port != 0) 1413 continue; 1414 } 1415 1416 /* 1417 * No difference depending on SO_REUSEADDR. 1418 * 1419 * If existing port is bound to a 1420 * non-wildcard IP address and 1421 * the requesting stream is bound to 1422 * a distinct different IP addresses 1423 * (non-wildcard, also), keep going. 1424 */ 1425 if (!is_inaddr_any && 1426 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1427 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1428 &v6src)) { 1429 continue; 1430 } 1431 break; 1432 } 1433 1434 if (!found_exclbind && 1435 (udp->udp_reuseaddr && requested_port != 0)) { 1436 break; 1437 } 1438 1439 if (udp1 == NULL) { 1440 /* 1441 * No other stream has this IP address 1442 * and port number. We can use it. 1443 */ 1444 break; 1445 } 1446 mutex_exit(&udpf->uf_lock); 1447 if (bind_to_req_port_only) { 1448 /* 1449 * We get here only when requested port 1450 * is bound (and only first of the for() 1451 * loop iteration). 1452 * 1453 * The semantics of this bind request 1454 * require it to fail so we return from 1455 * the routine (and exit the loop). 1456 * 1457 */ 1458 udp_err_ack(q, mp, TADDRBUSY, 0); 1459 return; 1460 } 1461 1462 if (udp->udp_anon_priv_bind) { 1463 port = udp_get_next_priv_port(udp); 1464 } else { 1465 if ((count == 0) && (requested_port != 0)) { 1466 /* 1467 * If the application wants us to find 1468 * a port, get one to start with. Set 1469 * requested_port to 0, so that we will 1470 * update udp_g_next_port_to_try below. 1471 */ 1472 port = udp_update_next_port(udp, 1473 udp_g_next_port_to_try, B_TRUE); 1474 requested_port = 0; 1475 } else { 1476 port = udp_update_next_port(udp, port + 1, 1477 B_FALSE); 1478 } 1479 } 1480 1481 if (port == 0 || ++count >= loopmax) { 1482 /* 1483 * We've tried every possible port number and 1484 * there are none available, so send an error 1485 * to the user. 1486 */ 1487 udp_err_ack(q, mp, TNOADDR, 0); 1488 return; 1489 } 1490 } 1491 1492 /* 1493 * Copy the source address into our udp structure. This address 1494 * may still be zero; if so, ip will fill in the correct address 1495 * each time an outbound packet is passed to it. 1496 * If we are binding to a broadcast or multicast address udp_rput 1497 * will clear the source address when it receives the T_BIND_ACK. 1498 */ 1499 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1500 udp->udp_port = lport; 1501 /* 1502 * Now reset the the next anonymous port if the application requested 1503 * an anonymous port, or we handed out the next anonymous port. 1504 */ 1505 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1506 udp_g_next_port_to_try = port + 1; 1507 } 1508 1509 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1510 if (udp->udp_family == AF_INET) { 1511 sin->sin_port = udp->udp_port; 1512 } else { 1513 int error; 1514 1515 sin6->sin6_port = udp->udp_port; 1516 /* Rebuild the header template */ 1517 error = udp_build_hdrs(q, udp); 1518 if (error != 0) { 1519 mutex_exit(&udpf->uf_lock); 1520 udp_err_ack(q, mp, TSYSERR, error); 1521 return; 1522 } 1523 } 1524 udp->udp_state = TS_IDLE; 1525 udp_bind_hash_insert(udpf, udp); 1526 mutex_exit(&udpf->uf_lock); 1527 1528 if (cl_inet_bind) { 1529 /* 1530 * Running in cluster mode - register bind information 1531 */ 1532 if (udp->udp_ipversion == IPV4_VERSION) { 1533 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1534 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1535 (in_port_t)udp->udp_port); 1536 } else { 1537 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1538 (uint8_t *)&(udp->udp_v6src), 1539 (in_port_t)udp->udp_port); 1540 } 1541 1542 } 1543 1544 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1545 if (is_system_labeled() && (!connp->conn_anon_port || 1546 connp->conn_anon_mlp)) { 1547 uint16_t mlpport; 1548 cred_t *cr = connp->conn_cred; 1549 zone_t *zone; 1550 1551 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1552 mlptSingle; 1553 addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); 1554 if (addrtype == mlptSingle) { 1555 udp_err_ack(q, mp, TNOADDR, 0); 1556 connp->conn_anon_port = B_FALSE; 1557 connp->conn_mlp_type = mlptSingle; 1558 return; 1559 } 1560 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1561 zone = crgetzone(cr); 1562 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1563 addrtype); 1564 if (mlptype != mlptSingle && 1565 (connp->conn_mlp_type == mlptSingle || 1566 secpolicy_net_bindmlp(cr) != 0)) { 1567 if (udp->udp_debug) { 1568 (void) strlog(UDP_MOD_ID, 0, 1, 1569 SL_ERROR|SL_TRACE, 1570 "udp_bind: no priv for multilevel port %d", 1571 mlpport); 1572 } 1573 udp_err_ack(q, mp, TACCES, 0); 1574 connp->conn_anon_port = B_FALSE; 1575 connp->conn_mlp_type = mlptSingle; 1576 return; 1577 } 1578 1579 /* 1580 * If we're specifically binding a shared IP address and the 1581 * port is MLP on shared addresses, then check to see if this 1582 * zone actually owns the MLP. Reject if not. 1583 */ 1584 if (mlptype == mlptShared && addrtype == mlptShared) { 1585 zoneid_t mlpzone; 1586 1587 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1588 htons(mlpport)); 1589 if (connp->conn_zoneid != mlpzone) { 1590 if (udp->udp_debug) { 1591 (void) strlog(UDP_MOD_ID, 0, 1, 1592 SL_ERROR|SL_TRACE, 1593 "udp_bind: attempt to bind port " 1594 "%d on shared addr in zone %d " 1595 "(should be %d)", 1596 mlpport, connp->conn_zoneid, 1597 mlpzone); 1598 } 1599 udp_err_ack(q, mp, TACCES, 0); 1600 connp->conn_anon_port = B_FALSE; 1601 connp->conn_mlp_type = mlptSingle; 1602 return; 1603 } 1604 } 1605 if (connp->conn_anon_port) { 1606 int error; 1607 1608 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1609 port, B_TRUE); 1610 if (error != 0) { 1611 if (udp->udp_debug) { 1612 (void) strlog(UDP_MOD_ID, 0, 1, 1613 SL_ERROR|SL_TRACE, 1614 "udp_bind: cannot establish anon " 1615 "MLP for port %d", port); 1616 } 1617 udp_err_ack(q, mp, TACCES, 0); 1618 connp->conn_anon_port = B_FALSE; 1619 connp->conn_mlp_type = mlptSingle; 1620 return; 1621 } 1622 } 1623 connp->conn_mlp_type = mlptype; 1624 } 1625 1626 /* Pass the protocol number in the message following the address. */ 1627 *mp->b_wptr++ = IPPROTO_UDP; 1628 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1629 /* 1630 * Append a request for an IRE if udp_v6src not 1631 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1632 */ 1633 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1634 if (!mp->b_cont) { 1635 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1636 return; 1637 } 1638 mp->b_cont->b_wptr += sizeof (ire_t); 1639 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1640 } 1641 if (udp->udp_family == AF_INET6) 1642 mp = ip_bind_v6(q, mp, connp, NULL); 1643 else 1644 mp = ip_bind_v4(q, mp, connp); 1645 1646 if (mp != NULL) 1647 udp_rput_other(_RD(q), mp); 1648 else 1649 CONN_INC_REF(connp); 1650 } 1651 1652 1653 void 1654 udp_resume_bind(conn_t *connp, mblk_t *mp) 1655 { 1656 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1657 } 1658 1659 /* 1660 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1661 */ 1662 /* ARGSUSED */ 1663 static void 1664 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1665 { 1666 conn_t *connp = arg; 1667 1668 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1669 1670 udp_rput_other(connp->conn_rq, mp); 1671 1672 CONN_OPER_PENDING_DONE(connp); 1673 udp_exit(connp); 1674 } 1675 1676 /* 1677 * This routine handles each T_CONN_REQ message passed to udp. It 1678 * associates a default destination address with the stream. 1679 * 1680 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1681 * T_BIND_REQ - specifying local and remote address/port 1682 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1683 * T_OK_ACK - for the T_CONN_REQ 1684 * T_CONN_CON - to keep the TPI user happy 1685 * 1686 * The connect completes in udp_rput. 1687 * When a T_BIND_ACK is received information is extracted from the IRE 1688 * and the two appended messages are sent to the TPI user. 1689 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1690 * it to an error ack for the appropriate primitive. 1691 */ 1692 static void 1693 udp_connect(queue_t *q, mblk_t *mp) 1694 { 1695 sin6_t *sin6; 1696 sin_t *sin; 1697 struct T_conn_req *tcr; 1698 in6_addr_t v6dst; 1699 ipaddr_t v4dst; 1700 uint16_t dstport; 1701 uint32_t flowinfo; 1702 mblk_t *mp1, *mp2; 1703 udp_fanout_t *udpf; 1704 udp_t *udp, *udp1; 1705 1706 udp = Q_TO_UDP(q); 1707 1708 tcr = (struct T_conn_req *)mp->b_rptr; 1709 1710 /* A bit of sanity checking */ 1711 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1712 udp_err_ack(q, mp, TPROTO, 0); 1713 return; 1714 } 1715 /* 1716 * This UDP must have bound to a port already before doing 1717 * a connect. 1718 */ 1719 if (udp->udp_state == TS_UNBND) { 1720 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1721 "udp_connect: bad state, %u", udp->udp_state); 1722 udp_err_ack(q, mp, TOUTSTATE, 0); 1723 return; 1724 } 1725 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1726 1727 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1728 1729 if (udp->udp_state == TS_DATA_XFER) { 1730 /* Already connected - clear out state */ 1731 mutex_enter(&udpf->uf_lock); 1732 udp->udp_v6src = udp->udp_bound_v6src; 1733 udp->udp_state = TS_IDLE; 1734 mutex_exit(&udpf->uf_lock); 1735 } 1736 1737 if (tcr->OPT_length != 0) { 1738 udp_err_ack(q, mp, TBADOPT, 0); 1739 return; 1740 } 1741 1742 /* 1743 * Determine packet type based on type of address passed in 1744 * the request should contain an IPv4 or IPv6 address. 1745 * Make sure that address family matches the type of 1746 * family of the the address passed down 1747 */ 1748 switch (tcr->DEST_length) { 1749 default: 1750 udp_err_ack(q, mp, TBADADDR, 0); 1751 return; 1752 1753 case sizeof (sin_t): 1754 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1755 sizeof (sin_t)); 1756 if (sin == NULL || !OK_32PTR((char *)sin)) { 1757 udp_err_ack(q, mp, TSYSERR, EINVAL); 1758 return; 1759 } 1760 if (udp->udp_family != AF_INET || 1761 sin->sin_family != AF_INET) { 1762 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1763 return; 1764 } 1765 v4dst = sin->sin_addr.s_addr; 1766 dstport = sin->sin_port; 1767 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1768 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1769 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1770 udp->udp_ip_snd_options_len; 1771 break; 1772 1773 case sizeof (sin6_t): 1774 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1775 sizeof (sin6_t)); 1776 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1777 udp_err_ack(q, mp, TSYSERR, EINVAL); 1778 return; 1779 } 1780 if (udp->udp_family != AF_INET6 || 1781 sin6->sin6_family != AF_INET6) { 1782 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1783 return; 1784 } 1785 v6dst = sin6->sin6_addr; 1786 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1787 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1788 udp->udp_ipversion = IPV4_VERSION; 1789 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1790 UDPH_SIZE + udp->udp_ip_snd_options_len; 1791 flowinfo = 0; 1792 } else { 1793 udp->udp_ipversion = IPV6_VERSION; 1794 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1795 flowinfo = sin6->sin6_flowinfo; 1796 } 1797 dstport = sin6->sin6_port; 1798 break; 1799 } 1800 if (dstport == 0) { 1801 udp_err_ack(q, mp, TBADADDR, 0); 1802 return; 1803 } 1804 1805 /* 1806 * Create a default IP header with no IP options. 1807 */ 1808 udp->udp_dstport = dstport; 1809 if (udp->udp_ipversion == IPV4_VERSION) { 1810 /* 1811 * Interpret a zero destination to mean loopback. 1812 * Update the T_CONN_REQ (sin/sin6) since it is used to 1813 * generate the T_CONN_CON. 1814 */ 1815 if (v4dst == INADDR_ANY) { 1816 v4dst = htonl(INADDR_LOOPBACK); 1817 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1818 if (udp->udp_family == AF_INET) { 1819 sin->sin_addr.s_addr = v4dst; 1820 } else { 1821 sin6->sin6_addr = v6dst; 1822 } 1823 } 1824 udp->udp_v6dst = v6dst; 1825 udp->udp_flowinfo = 0; 1826 1827 /* 1828 * If the destination address is multicast and 1829 * an outgoing multicast interface has been set, 1830 * use the address of that interface as our 1831 * source address if no source address has been set. 1832 */ 1833 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1834 CLASSD(v4dst) && 1835 udp->udp_multicast_if_addr != INADDR_ANY) { 1836 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1837 &udp->udp_v6src); 1838 } 1839 } else { 1840 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1841 /* 1842 * Interpret a zero destination to mean loopback. 1843 * Update the T_CONN_REQ (sin/sin6) since it is used to 1844 * generate the T_CONN_CON. 1845 */ 1846 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1847 v6dst = ipv6_loopback; 1848 sin6->sin6_addr = v6dst; 1849 } 1850 udp->udp_v6dst = v6dst; 1851 udp->udp_flowinfo = flowinfo; 1852 /* 1853 * If the destination address is multicast and 1854 * an outgoing multicast interface has been set, 1855 * then the ip bind logic will pick the correct source 1856 * address (i.e. matching the outgoing multicast interface). 1857 */ 1858 } 1859 1860 /* 1861 * Verify that the src/port/dst/port is unique for all 1862 * connections in TS_DATA_XFER 1863 */ 1864 mutex_enter(&udpf->uf_lock); 1865 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1866 if (udp1->udp_state != TS_DATA_XFER) 1867 continue; 1868 if (udp->udp_port != udp1->udp_port || 1869 udp->udp_ipversion != udp1->udp_ipversion || 1870 dstport != udp1->udp_dstport || 1871 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1872 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1873 continue; 1874 mutex_exit(&udpf->uf_lock); 1875 udp_err_ack(q, mp, TBADADDR, 0); 1876 return; 1877 } 1878 udp->udp_state = TS_DATA_XFER; 1879 mutex_exit(&udpf->uf_lock); 1880 1881 /* 1882 * Send down bind to IP to verify that there is a route 1883 * and to determine the source address. 1884 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1885 */ 1886 if (udp->udp_family == AF_INET) 1887 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1888 else 1889 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1890 if (mp1 == NULL) { 1891 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1892 bind_failed: 1893 mutex_enter(&udpf->uf_lock); 1894 udp->udp_state = TS_IDLE; 1895 mutex_exit(&udpf->uf_lock); 1896 return; 1897 } 1898 1899 /* 1900 * We also have to send a connection confirmation to 1901 * keep TLI happy. Prepare it for udp_rput. 1902 */ 1903 if (udp->udp_family == AF_INET) 1904 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1905 sizeof (*sin), NULL, 0); 1906 else 1907 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1908 sizeof (*sin6), NULL, 0); 1909 if (mp2 == NULL) { 1910 freemsg(mp1); 1911 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1912 goto bind_failed; 1913 } 1914 1915 mp = mi_tpi_ok_ack_alloc(mp); 1916 if (mp == NULL) { 1917 /* Unable to reuse the T_CONN_REQ for the ack. */ 1918 freemsg(mp2); 1919 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1920 goto bind_failed; 1921 } 1922 1923 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1924 linkb(mp1, mp); 1925 linkb(mp1, mp2); 1926 1927 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1928 if (udp->udp_family == AF_INET) 1929 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1930 else 1931 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1932 1933 if (mp1 != NULL) 1934 udp_rput_other(_RD(q), mp1); 1935 else 1936 CONN_INC_REF(udp->udp_connp); 1937 } 1938 1939 static int 1940 udp_close(queue_t *q) 1941 { 1942 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1943 udp_t *udp; 1944 queue_t *ip_rq = RD(UDP_WR(q)); 1945 1946 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1947 udp = connp->conn_udp; 1948 1949 ip_quiesce_conn(connp); 1950 /* 1951 * Disable read-side synchronous stream 1952 * interface and drain any queued data. 1953 */ 1954 udp_rcv_drain(q, udp, B_TRUE); 1955 ASSERT(!udp->udp_direct_sockfs); 1956 1957 qprocsoff(q); 1958 1959 /* restore IP module's high and low water marks to default values */ 1960 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1961 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1962 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1963 1964 ASSERT(udp->udp_rcv_cnt == 0); 1965 ASSERT(udp->udp_rcv_msgcnt == 0); 1966 ASSERT(udp->udp_rcv_list_head == NULL); 1967 ASSERT(udp->udp_rcv_list_tail == NULL); 1968 1969 udp_close_free(connp); 1970 1971 /* 1972 * Restore connp as an IP endpoint. 1973 * Locking required to prevent a race with udp_snmp_get()/ 1974 * ipcl_get_next_conn(), which selects conn_t which are 1975 * IPCL_UDP and not CONN_CONDEMNED. 1976 */ 1977 mutex_enter(&connp->conn_lock); 1978 connp->conn_flags &= ~IPCL_UDP; 1979 connp->conn_state_flags &= 1980 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1981 connp->conn_ulp_labeled = B_FALSE; 1982 mutex_exit(&connp->conn_lock); 1983 1984 return (0); 1985 } 1986 1987 /* 1988 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1989 */ 1990 void 1991 udp_quiesce_conn(conn_t *connp) 1992 { 1993 udp_t *udp = connp->conn_udp; 1994 1995 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1996 /* 1997 * Running in cluster mode - register unbind information 1998 */ 1999 if (udp->udp_ipversion == IPV4_VERSION) { 2000 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 2001 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 2002 (in_port_t)udp->udp_port); 2003 } else { 2004 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 2005 (uint8_t *)(&(udp->udp_v6src)), 2006 (in_port_t)udp->udp_port); 2007 } 2008 } 2009 2010 udp_bind_hash_remove(udp, B_FALSE); 2011 2012 mutex_enter(&connp->conn_lock); 2013 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 2014 udp->udp_mode != UDP_MT_HOT) { 2015 cv_wait(&connp->conn_cv, &connp->conn_lock); 2016 } 2017 mutex_exit(&connp->conn_lock); 2018 } 2019 2020 void 2021 udp_close_free(conn_t *connp) 2022 { 2023 udp_t *udp = connp->conn_udp; 2024 2025 /* If there are any options associated with the stream, free them. */ 2026 if (udp->udp_ip_snd_options) { 2027 mi_free((char *)udp->udp_ip_snd_options); 2028 udp->udp_ip_snd_options = NULL; 2029 } 2030 2031 if (udp->udp_ip_rcv_options) { 2032 mi_free((char *)udp->udp_ip_rcv_options); 2033 udp->udp_ip_rcv_options = NULL; 2034 } 2035 2036 /* Free memory associated with sticky options */ 2037 if (udp->udp_sticky_hdrs_len != 0) { 2038 kmem_free(udp->udp_sticky_hdrs, 2039 udp->udp_sticky_hdrs_len); 2040 udp->udp_sticky_hdrs = NULL; 2041 udp->udp_sticky_hdrs_len = 0; 2042 } 2043 2044 ip6_pkt_free(&udp->udp_sticky_ipp); 2045 2046 udp->udp_connp = NULL; 2047 connp->conn_udp = NULL; 2048 kmem_cache_free(udp_cache, udp); 2049 } 2050 2051 /* 2052 * This routine handles each T_DISCON_REQ message passed to udp 2053 * as an indicating that UDP is no longer connected. This results 2054 * in sending a T_BIND_REQ to IP to restore the binding to just 2055 * the local address/port. 2056 * 2057 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2058 * T_BIND_REQ - specifying just the local address/port 2059 * T_OK_ACK - for the T_DISCON_REQ 2060 * 2061 * The disconnect completes in udp_rput. 2062 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2063 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2064 * it to an error ack for the appropriate primitive. 2065 */ 2066 static void 2067 udp_disconnect(queue_t *q, mblk_t *mp) 2068 { 2069 udp_t *udp = Q_TO_UDP(q); 2070 mblk_t *mp1; 2071 udp_fanout_t *udpf; 2072 2073 if (udp->udp_state != TS_DATA_XFER) { 2074 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2075 "udp_disconnect: bad state, %u", udp->udp_state); 2076 udp_err_ack(q, mp, TOUTSTATE, 0); 2077 return; 2078 } 2079 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 2080 mutex_enter(&udpf->uf_lock); 2081 udp->udp_v6src = udp->udp_bound_v6src; 2082 udp->udp_state = TS_IDLE; 2083 mutex_exit(&udpf->uf_lock); 2084 2085 /* 2086 * Send down bind to IP to remove the full binding and revert 2087 * to the local address binding. 2088 */ 2089 if (udp->udp_family == AF_INET) 2090 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2091 else 2092 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2093 if (mp1 == NULL) { 2094 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2095 return; 2096 } 2097 mp = mi_tpi_ok_ack_alloc(mp); 2098 if (mp == NULL) { 2099 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2100 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2101 return; 2102 } 2103 2104 if (udp->udp_family == AF_INET6) { 2105 int error; 2106 2107 /* Rebuild the header template */ 2108 error = udp_build_hdrs(q, udp); 2109 if (error != 0) { 2110 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2111 freemsg(mp1); 2112 return; 2113 } 2114 } 2115 mutex_enter(&udpf->uf_lock); 2116 udp->udp_discon_pending = 1; 2117 mutex_exit(&udpf->uf_lock); 2118 2119 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2120 linkb(mp1, mp); 2121 2122 if (udp->udp_family == AF_INET6) 2123 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2124 else 2125 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2126 2127 if (mp1 != NULL) 2128 udp_rput_other(_RD(q), mp1); 2129 else 2130 CONN_INC_REF(udp->udp_connp); 2131 } 2132 2133 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2134 static void 2135 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2136 { 2137 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2138 putnext(UDP_RD(q), mp); 2139 } 2140 2141 /* Shorthand to generate and send TPI error acks to our client */ 2142 static void 2143 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2144 int sys_error) 2145 { 2146 struct T_error_ack *teackp; 2147 2148 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2149 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2150 teackp = (struct T_error_ack *)mp->b_rptr; 2151 teackp->ERROR_prim = primitive; 2152 teackp->TLI_error = t_error; 2153 teackp->UNIX_error = sys_error; 2154 putnext(UDP_RD(q), mp); 2155 } 2156 } 2157 2158 /*ARGSUSED*/ 2159 static int 2160 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2161 { 2162 int i; 2163 2164 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2165 if (udp_g_epriv_ports[i] != 0) 2166 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2167 } 2168 return (0); 2169 } 2170 2171 /* ARGSUSED */ 2172 static int 2173 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2174 cred_t *cr) 2175 { 2176 long new_value; 2177 int i; 2178 2179 /* 2180 * Fail the request if the new value does not lie within the 2181 * port number limits. 2182 */ 2183 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2184 new_value <= 0 || new_value >= 65536) { 2185 return (EINVAL); 2186 } 2187 2188 /* Check if the value is already in the list */ 2189 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2190 if (new_value == udp_g_epriv_ports[i]) { 2191 return (EEXIST); 2192 } 2193 } 2194 /* Find an empty slot */ 2195 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2196 if (udp_g_epriv_ports[i] == 0) 2197 break; 2198 } 2199 if (i == udp_g_num_epriv_ports) { 2200 return (EOVERFLOW); 2201 } 2202 2203 /* Set the new value */ 2204 udp_g_epriv_ports[i] = (in_port_t)new_value; 2205 return (0); 2206 } 2207 2208 /* ARGSUSED */ 2209 static int 2210 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2211 cred_t *cr) 2212 { 2213 long new_value; 2214 int i; 2215 2216 /* 2217 * Fail the request if the new value does not lie within the 2218 * port number limits. 2219 */ 2220 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2221 new_value <= 0 || new_value >= 65536) { 2222 return (EINVAL); 2223 } 2224 2225 /* Check that the value is already in the list */ 2226 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2227 if (udp_g_epriv_ports[i] == new_value) 2228 break; 2229 } 2230 if (i == udp_g_num_epriv_ports) { 2231 return (ESRCH); 2232 } 2233 2234 /* Clear the value */ 2235 udp_g_epriv_ports[i] = 0; 2236 return (0); 2237 } 2238 2239 /* At minimum we need 4 bytes of UDP header */ 2240 #define ICMP_MIN_UDP_HDR 4 2241 2242 /* 2243 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2244 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2245 * Assumes that IP has pulled up everything up to and including the ICMP header. 2246 * An M_CTL could potentially come here from some other module (i.e. if UDP 2247 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2248 * does not have enough ICMP information , following STREAMS conventions, 2249 * we send it upstream assuming it is an M_CTL we don't understand. 2250 */ 2251 static void 2252 udp_icmp_error(queue_t *q, mblk_t *mp) 2253 { 2254 icmph_t *icmph; 2255 ipha_t *ipha; 2256 int iph_hdr_length; 2257 udpha_t *udpha; 2258 sin_t sin; 2259 sin6_t sin6; 2260 mblk_t *mp1; 2261 int error = 0; 2262 size_t mp_size = MBLKL(mp); 2263 udp_t *udp = Q_TO_UDP(q); 2264 2265 /* 2266 * Assume IP provides aligned packets - otherwise toss 2267 */ 2268 if (!OK_32PTR(mp->b_rptr)) { 2269 freemsg(mp); 2270 return; 2271 } 2272 2273 /* 2274 * Verify that we have a complete IP header and the application has 2275 * asked for errors. If not, send it upstream. 2276 */ 2277 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2278 noticmpv4: 2279 putnext(UDP_RD(q), mp); 2280 return; 2281 } 2282 2283 ipha = (ipha_t *)mp->b_rptr; 2284 /* 2285 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2286 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2287 */ 2288 switch (IPH_HDR_VERSION(ipha)) { 2289 case IPV6_VERSION: 2290 udp_icmp_error_ipv6(q, mp); 2291 return; 2292 case IPV4_VERSION: 2293 break; 2294 default: 2295 goto noticmpv4; 2296 } 2297 2298 /* Skip past the outer IP and ICMP headers */ 2299 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2300 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2301 /* 2302 * If we don't have the correct outer IP header length or if the ULP 2303 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2304 * send the packet upstream. 2305 */ 2306 if (iph_hdr_length < sizeof (ipha_t) || 2307 ipha->ipha_protocol != IPPROTO_ICMP || 2308 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2309 goto noticmpv4; 2310 } 2311 ipha = (ipha_t *)&icmph[1]; 2312 2313 /* Skip past the inner IP and find the ULP header */ 2314 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2315 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2316 /* 2317 * If we don't have the correct inner IP header length or if the ULP 2318 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2319 * bytes of UDP header, send it upstream. 2320 */ 2321 if (iph_hdr_length < sizeof (ipha_t) || 2322 ipha->ipha_protocol != IPPROTO_UDP || 2323 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2324 goto noticmpv4; 2325 } 2326 2327 switch (icmph->icmph_type) { 2328 case ICMP_DEST_UNREACHABLE: 2329 switch (icmph->icmph_code) { 2330 case ICMP_FRAGMENTATION_NEEDED: 2331 /* 2332 * IP has already adjusted the path MTU. 2333 * XXX Somehow pass MTU indication to application? 2334 */ 2335 break; 2336 case ICMP_PORT_UNREACHABLE: 2337 case ICMP_PROTOCOL_UNREACHABLE: 2338 error = ECONNREFUSED; 2339 break; 2340 default: 2341 /* Transient errors */ 2342 break; 2343 } 2344 break; 2345 default: 2346 /* Transient errors */ 2347 break; 2348 } 2349 if (error == 0) { 2350 freemsg(mp); 2351 return; 2352 } 2353 2354 switch (udp->udp_family) { 2355 case AF_INET: 2356 sin = sin_null; 2357 sin.sin_family = AF_INET; 2358 sin.sin_addr.s_addr = ipha->ipha_dst; 2359 sin.sin_port = udpha->uha_dst_port; 2360 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2361 error); 2362 break; 2363 case AF_INET6: 2364 sin6 = sin6_null; 2365 sin6.sin6_family = AF_INET6; 2366 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2367 sin6.sin6_port = udpha->uha_dst_port; 2368 2369 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2370 NULL, 0, error); 2371 break; 2372 } 2373 if (mp1) 2374 putnext(UDP_RD(q), mp1); 2375 freemsg(mp); 2376 } 2377 2378 /* 2379 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2380 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2381 * Assumes that IP has pulled up all the extension headers as well as the 2382 * ICMPv6 header. 2383 * An M_CTL could potentially come here from some other module (i.e. if UDP 2384 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2385 * does not have enough ICMP information , following STREAMS conventions, 2386 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2387 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2388 * field (when cast to ipha_t in udp_icmp_error). 2389 */ 2390 static void 2391 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2392 { 2393 icmp6_t *icmp6; 2394 ip6_t *ip6h, *outer_ip6h; 2395 uint16_t hdr_length; 2396 uint8_t *nexthdrp; 2397 udpha_t *udpha; 2398 sin6_t sin6; 2399 mblk_t *mp1; 2400 int error = 0; 2401 size_t mp_size = MBLKL(mp); 2402 udp_t *udp = Q_TO_UDP(q); 2403 2404 /* 2405 * Verify that we have a complete IP header. If not, send it upstream. 2406 */ 2407 if (mp_size < sizeof (ip6_t)) { 2408 noticmpv6: 2409 putnext(UDP_RD(q), mp); 2410 return; 2411 } 2412 2413 outer_ip6h = (ip6_t *)mp->b_rptr; 2414 /* 2415 * Verify this is an ICMPV6 packet, else send it upstream 2416 */ 2417 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2418 hdr_length = IPV6_HDR_LEN; 2419 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2420 &nexthdrp) || 2421 *nexthdrp != IPPROTO_ICMPV6) { 2422 goto noticmpv6; 2423 } 2424 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2425 ip6h = (ip6_t *)&icmp6[1]; 2426 /* 2427 * Verify we have a complete ICMP and inner IP header. 2428 */ 2429 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2430 goto noticmpv6; 2431 2432 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2433 goto noticmpv6; 2434 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2435 /* 2436 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2437 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2438 * packet upstream. 2439 */ 2440 if ((*nexthdrp != IPPROTO_UDP) || 2441 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2442 goto noticmpv6; 2443 } 2444 2445 switch (icmp6->icmp6_type) { 2446 case ICMP6_DST_UNREACH: 2447 switch (icmp6->icmp6_code) { 2448 case ICMP6_DST_UNREACH_NOPORT: 2449 error = ECONNREFUSED; 2450 break; 2451 case ICMP6_DST_UNREACH_ADMIN: 2452 case ICMP6_DST_UNREACH_NOROUTE: 2453 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2454 case ICMP6_DST_UNREACH_ADDR: 2455 /* Transient errors */ 2456 break; 2457 default: 2458 break; 2459 } 2460 break; 2461 case ICMP6_PACKET_TOO_BIG: { 2462 struct T_unitdata_ind *tudi; 2463 struct T_opthdr *toh; 2464 size_t udi_size; 2465 mblk_t *newmp; 2466 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2467 sizeof (struct ip6_mtuinfo); 2468 sin6_t *sin6; 2469 struct ip6_mtuinfo *mtuinfo; 2470 2471 /* 2472 * If the application has requested to receive path mtu 2473 * information, send up an empty message containing an 2474 * IPV6_PATHMTU ancillary data item. 2475 */ 2476 if (!udp->udp_ipv6_recvpathmtu) 2477 break; 2478 2479 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2480 opt_length; 2481 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2482 BUMP_MIB(&udp_mib, udpInErrors); 2483 break; 2484 } 2485 2486 /* 2487 * newmp->b_cont is left to NULL on purpose. This is an 2488 * empty message containing only ancillary data. 2489 */ 2490 newmp->b_datap->db_type = M_PROTO; 2491 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2492 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2493 tudi->PRIM_type = T_UNITDATA_IND; 2494 tudi->SRC_length = sizeof (sin6_t); 2495 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2496 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2497 tudi->OPT_length = opt_length; 2498 2499 sin6 = (sin6_t *)&tudi[1]; 2500 bzero(sin6, sizeof (sin6_t)); 2501 sin6->sin6_family = AF_INET6; 2502 sin6->sin6_addr = udp->udp_v6dst; 2503 2504 toh = (struct T_opthdr *)&sin6[1]; 2505 toh->level = IPPROTO_IPV6; 2506 toh->name = IPV6_PATHMTU; 2507 toh->len = opt_length; 2508 toh->status = 0; 2509 2510 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2511 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2512 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2513 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2514 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2515 /* 2516 * We've consumed everything we need from the original 2517 * message. Free it, then send our empty message. 2518 */ 2519 freemsg(mp); 2520 putnext(UDP_RD(q), newmp); 2521 return; 2522 } 2523 case ICMP6_TIME_EXCEEDED: 2524 /* Transient errors */ 2525 break; 2526 case ICMP6_PARAM_PROB: 2527 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2528 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2529 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2530 (uchar_t *)nexthdrp) { 2531 error = ECONNREFUSED; 2532 break; 2533 } 2534 break; 2535 } 2536 if (error == 0) { 2537 freemsg(mp); 2538 return; 2539 } 2540 2541 sin6 = sin6_null; 2542 sin6.sin6_family = AF_INET6; 2543 sin6.sin6_addr = ip6h->ip6_dst; 2544 sin6.sin6_port = udpha->uha_dst_port; 2545 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2546 2547 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2548 error); 2549 if (mp1) 2550 putnext(UDP_RD(q), mp1); 2551 freemsg(mp); 2552 } 2553 2554 /* 2555 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2556 * The local address is filled in if endpoint is bound. The remote address 2557 * is filled in if remote address has been precified ("connected endpoint") 2558 * (The concept of connected CLTS sockets is alien to published TPI 2559 * but we support it anyway). 2560 */ 2561 static void 2562 udp_addr_req(queue_t *q, mblk_t *mp) 2563 { 2564 sin_t *sin; 2565 sin6_t *sin6; 2566 mblk_t *ackmp; 2567 struct T_addr_ack *taa; 2568 udp_t *udp = Q_TO_UDP(q); 2569 2570 /* Make it large enough for worst case */ 2571 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2572 2 * sizeof (sin6_t), 1); 2573 if (ackmp == NULL) { 2574 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2575 return; 2576 } 2577 taa = (struct T_addr_ack *)ackmp->b_rptr; 2578 2579 bzero(taa, sizeof (struct T_addr_ack)); 2580 ackmp->b_wptr = (uchar_t *)&taa[1]; 2581 2582 taa->PRIM_type = T_ADDR_ACK; 2583 ackmp->b_datap->db_type = M_PCPROTO; 2584 /* 2585 * Note: Following code assumes 32 bit alignment of basic 2586 * data structures like sin_t and struct T_addr_ack. 2587 */ 2588 if (udp->udp_state != TS_UNBND) { 2589 /* 2590 * Fill in local address first 2591 */ 2592 taa->LOCADDR_offset = sizeof (*taa); 2593 if (udp->udp_family == AF_INET) { 2594 taa->LOCADDR_length = sizeof (sin_t); 2595 sin = (sin_t *)&taa[1]; 2596 /* Fill zeroes and then initialize non-zero fields */ 2597 *sin = sin_null; 2598 sin->sin_family = AF_INET; 2599 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2600 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2601 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2602 sin->sin_addr.s_addr); 2603 } else { 2604 /* 2605 * INADDR_ANY 2606 * udp_v6src is not set, we might be bound to 2607 * broadcast/multicast. Use udp_bound_v6src as 2608 * local address instead (that could 2609 * also still be INADDR_ANY) 2610 */ 2611 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2612 sin->sin_addr.s_addr); 2613 } 2614 sin->sin_port = udp->udp_port; 2615 ackmp->b_wptr = (uchar_t *)&sin[1]; 2616 if (udp->udp_state == TS_DATA_XFER) { 2617 /* 2618 * connected, fill remote address too 2619 */ 2620 taa->REMADDR_length = sizeof (sin_t); 2621 /* assumed 32-bit alignment */ 2622 taa->REMADDR_offset = taa->LOCADDR_offset + 2623 taa->LOCADDR_length; 2624 2625 sin = (sin_t *)(ackmp->b_rptr + 2626 taa->REMADDR_offset); 2627 /* initialize */ 2628 *sin = sin_null; 2629 sin->sin_family = AF_INET; 2630 sin->sin_addr.s_addr = 2631 V4_PART_OF_V6(udp->udp_v6dst); 2632 sin->sin_port = udp->udp_dstport; 2633 ackmp->b_wptr = (uchar_t *)&sin[1]; 2634 } 2635 } else { 2636 taa->LOCADDR_length = sizeof (sin6_t); 2637 sin6 = (sin6_t *)&taa[1]; 2638 /* Fill zeroes and then initialize non-zero fields */ 2639 *sin6 = sin6_null; 2640 sin6->sin6_family = AF_INET6; 2641 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2642 sin6->sin6_addr = udp->udp_v6src; 2643 } else { 2644 /* 2645 * UNSPECIFIED 2646 * udp_v6src is not set, we might be bound to 2647 * broadcast/multicast. Use udp_bound_v6src as 2648 * local address instead (that could 2649 * also still be UNSPECIFIED) 2650 */ 2651 sin6->sin6_addr = 2652 udp->udp_bound_v6src; 2653 } 2654 sin6->sin6_port = udp->udp_port; 2655 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2656 if (udp->udp_state == TS_DATA_XFER) { 2657 /* 2658 * connected, fill remote address too 2659 */ 2660 taa->REMADDR_length = sizeof (sin6_t); 2661 /* assumed 32-bit alignment */ 2662 taa->REMADDR_offset = taa->LOCADDR_offset + 2663 taa->LOCADDR_length; 2664 2665 sin6 = (sin6_t *)(ackmp->b_rptr + 2666 taa->REMADDR_offset); 2667 /* initialize */ 2668 *sin6 = sin6_null; 2669 sin6->sin6_family = AF_INET6; 2670 sin6->sin6_addr = udp->udp_v6dst; 2671 sin6->sin6_port = udp->udp_dstport; 2672 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2673 } 2674 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2675 } 2676 } 2677 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2678 putnext(UDP_RD(q), ackmp); 2679 } 2680 2681 static void 2682 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2683 { 2684 if (udp->udp_family == AF_INET) { 2685 *tap = udp_g_t_info_ack_ipv4; 2686 } else { 2687 *tap = udp_g_t_info_ack_ipv6; 2688 } 2689 tap->CURRENT_state = udp->udp_state; 2690 tap->OPT_size = udp_max_optsize; 2691 } 2692 2693 /* 2694 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2695 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2696 * udp_g_t_info_ack. The current state of the stream is copied from 2697 * udp_state. 2698 */ 2699 static void 2700 udp_capability_req(queue_t *q, mblk_t *mp) 2701 { 2702 t_uscalar_t cap_bits1; 2703 struct T_capability_ack *tcap; 2704 udp_t *udp = Q_TO_UDP(q); 2705 2706 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2707 2708 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2709 mp->b_datap->db_type, T_CAPABILITY_ACK); 2710 if (!mp) 2711 return; 2712 2713 tcap = (struct T_capability_ack *)mp->b_rptr; 2714 tcap->CAP_bits1 = 0; 2715 2716 if (cap_bits1 & TC1_INFO) { 2717 udp_copy_info(&tcap->INFO_ack, udp); 2718 tcap->CAP_bits1 |= TC1_INFO; 2719 } 2720 2721 putnext(UDP_RD(q), mp); 2722 } 2723 2724 /* 2725 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2726 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2727 * The current state of the stream is copied from udp_state. 2728 */ 2729 static void 2730 udp_info_req(queue_t *q, mblk_t *mp) 2731 { 2732 udp_t *udp = Q_TO_UDP(q); 2733 2734 /* Create a T_INFO_ACK message. */ 2735 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2736 T_INFO_ACK); 2737 if (!mp) 2738 return; 2739 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2740 putnext(UDP_RD(q), mp); 2741 } 2742 2743 /* 2744 * IP recognizes seven kinds of bind requests: 2745 * 2746 * - A zero-length address binds only to the protocol number. 2747 * 2748 * - A 4-byte address is treated as a request to 2749 * validate that the address is a valid local IPv4 2750 * address, appropriate for an application to bind to. 2751 * IP does the verification, but does not make any note 2752 * of the address at this time. 2753 * 2754 * - A 16-byte address contains is treated as a request 2755 * to validate a local IPv6 address, as the 4-byte 2756 * address case above. 2757 * 2758 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2759 * use it for the inbound fanout of packets. 2760 * 2761 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2762 * use it for the inbound fanout of packets. 2763 * 2764 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2765 * information consisting of local and remote addresses 2766 * and ports. In this case, the addresses are both 2767 * validated as appropriate for this operation, and, if 2768 * so, the information is retained for use in the 2769 * inbound fanout. 2770 * 2771 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2772 * fanout information, like the 12-byte case above. 2773 * 2774 * IP will also fill in the IRE request mblk with information 2775 * regarding our peer. In all cases, we notify IP of our protocol 2776 * type by appending a single protocol byte to the bind request. 2777 */ 2778 static mblk_t * 2779 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2780 { 2781 char *cp; 2782 mblk_t *mp; 2783 struct T_bind_req *tbr; 2784 ipa_conn_t *ac; 2785 ipa6_conn_t *ac6; 2786 sin_t *sin; 2787 sin6_t *sin6; 2788 2789 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2790 2791 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2792 if (!mp) 2793 return (mp); 2794 mp->b_datap->db_type = M_PROTO; 2795 tbr = (struct T_bind_req *)mp->b_rptr; 2796 tbr->PRIM_type = bind_prim; 2797 tbr->ADDR_offset = sizeof (*tbr); 2798 tbr->CONIND_number = 0; 2799 tbr->ADDR_length = addr_length; 2800 cp = (char *)&tbr[1]; 2801 switch (addr_length) { 2802 case sizeof (ipa_conn_t): 2803 ASSERT(udp->udp_family == AF_INET); 2804 /* Append a request for an IRE */ 2805 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2806 if (!mp->b_cont) { 2807 freemsg(mp); 2808 return (NULL); 2809 } 2810 mp->b_cont->b_wptr += sizeof (ire_t); 2811 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2812 2813 /* cp known to be 32 bit aligned */ 2814 ac = (ipa_conn_t *)cp; 2815 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2816 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2817 ac->ac_fport = udp->udp_dstport; 2818 ac->ac_lport = udp->udp_port; 2819 break; 2820 2821 case sizeof (ipa6_conn_t): 2822 ASSERT(udp->udp_family == AF_INET6); 2823 /* Append a request for an IRE */ 2824 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2825 if (!mp->b_cont) { 2826 freemsg(mp); 2827 return (NULL); 2828 } 2829 mp->b_cont->b_wptr += sizeof (ire_t); 2830 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2831 2832 /* cp known to be 32 bit aligned */ 2833 ac6 = (ipa6_conn_t *)cp; 2834 ac6->ac6_laddr = udp->udp_v6src; 2835 ac6->ac6_faddr = udp->udp_v6dst; 2836 ac6->ac6_fport = udp->udp_dstport; 2837 ac6->ac6_lport = udp->udp_port; 2838 break; 2839 2840 case sizeof (sin_t): 2841 ASSERT(udp->udp_family == AF_INET); 2842 /* Append a request for an IRE */ 2843 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2844 if (!mp->b_cont) { 2845 freemsg(mp); 2846 return (NULL); 2847 } 2848 mp->b_cont->b_wptr += sizeof (ire_t); 2849 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2850 2851 sin = (sin_t *)cp; 2852 *sin = sin_null; 2853 sin->sin_family = AF_INET; 2854 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2855 sin->sin_port = udp->udp_port; 2856 break; 2857 2858 case sizeof (sin6_t): 2859 ASSERT(udp->udp_family == AF_INET6); 2860 /* Append a request for an IRE */ 2861 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2862 if (!mp->b_cont) { 2863 freemsg(mp); 2864 return (NULL); 2865 } 2866 mp->b_cont->b_wptr += sizeof (ire_t); 2867 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2868 2869 sin6 = (sin6_t *)cp; 2870 *sin6 = sin6_null; 2871 sin6->sin6_family = AF_INET6; 2872 sin6->sin6_addr = udp->udp_bound_v6src; 2873 sin6->sin6_port = udp->udp_port; 2874 break; 2875 } 2876 /* Add protocol number to end */ 2877 cp[addr_length] = (char)IPPROTO_UDP; 2878 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2879 return (mp); 2880 } 2881 2882 /* 2883 * This is the open routine for udp. It allocates a udp_t structure for 2884 * the stream and, on the first open of the module, creates an ND table. 2885 */ 2886 /* ARGSUSED */ 2887 static int 2888 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2889 { 2890 int err; 2891 udp_t *udp; 2892 conn_t *connp; 2893 zoneid_t zoneid = getzoneid(); 2894 queue_t *ip_wq; 2895 2896 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2897 2898 /* If the stream is already open, return immediately. */ 2899 if (q->q_ptr != NULL) 2900 return (0); 2901 2902 /* If this is not a push of udp as a module, fail. */ 2903 if (sflag != MODOPEN) 2904 return (EINVAL); 2905 2906 q->q_hiwat = udp_recv_hiwat; 2907 WR(q)->q_hiwat = udp_xmit_hiwat; 2908 WR(q)->q_lowat = udp_xmit_lowat; 2909 2910 /* Insert ourselves in the stream since we're about to walk q_next */ 2911 qprocson(q); 2912 2913 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2914 bzero(udp, sizeof (*udp)); 2915 2916 /* 2917 * UDP is supported only as a module and it has to be pushed directly 2918 * above the device instance of IP. If UDP is pushed anywhere else 2919 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2920 * sake of MIB browsers and fail everything else. 2921 */ 2922 ip_wq = WR(q)->q_next; 2923 if (NOT_OVER_IP(ip_wq)) { 2924 /* Support just SNMP for MIB browsers */ 2925 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2926 connp->conn_rq = q; 2927 connp->conn_wq = WR(q); 2928 connp->conn_flags |= IPCL_UDPMOD; 2929 connp->conn_cred = credp; 2930 connp->conn_zoneid = zoneid; 2931 connp->conn_udp = udp; 2932 udp->udp_connp = connp; 2933 q->q_ptr = WR(q)->q_ptr = connp; 2934 crhold(credp); 2935 q->q_qinfo = &udp_snmp_rinit; 2936 WR(q)->q_qinfo = &udp_snmp_winit; 2937 return (0); 2938 } 2939 2940 /* 2941 * Initialize the udp_t structure for this stream. 2942 */ 2943 q = RD(ip_wq); 2944 connp = Q_TO_CONN(q); 2945 mutex_enter(&connp->conn_lock); 2946 connp->conn_proto = IPPROTO_UDP; 2947 connp->conn_flags |= IPCL_UDP; 2948 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2949 connp->conn_udp = udp; 2950 2951 /* Set the initial state of the stream and the privilege status. */ 2952 udp->udp_connp = connp; 2953 udp->udp_state = TS_UNBND; 2954 udp->udp_mode = UDP_MT_HOT; 2955 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2956 udp->udp_family = AF_INET6; 2957 udp->udp_ipversion = IPV6_VERSION; 2958 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2959 udp->udp_ttl = udp_ipv6_hoplimit; 2960 connp->conn_af_isv6 = B_TRUE; 2961 connp->conn_flags |= IPCL_ISV6; 2962 } else { 2963 udp->udp_family = AF_INET; 2964 udp->udp_ipversion = IPV4_VERSION; 2965 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2966 udp->udp_ttl = udp_ipv4_ttl; 2967 connp->conn_af_isv6 = B_FALSE; 2968 connp->conn_flags &= ~IPCL_ISV6; 2969 } 2970 2971 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2972 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2973 connp->conn_zoneid = zoneid; 2974 2975 udp->udp_open_time = lbolt64; 2976 udp->udp_open_pid = curproc->p_pid; 2977 2978 /* 2979 * If the caller has the process-wide flag set, then default to MAC 2980 * exempt mode. This allows read-down to unlabeled hosts. 2981 */ 2982 if (getpflags(NET_MAC_AWARE, credp) != 0) 2983 udp->udp_mac_exempt = B_TRUE; 2984 2985 if (connp->conn_flags & IPCL_SOCKET) { 2986 udp->udp_issocket = B_TRUE; 2987 udp->udp_direct_sockfs = B_TRUE; 2988 } 2989 2990 connp->conn_ulp_labeled = is_system_labeled(); 2991 2992 mutex_exit(&connp->conn_lock); 2993 2994 /* 2995 * The transmit hiwat/lowat is only looked at on IP's queue. 2996 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2997 * getsockopts. 2998 */ 2999 q->q_hiwat = udp_recv_hiwat; 3000 WR(q)->q_hiwat = udp_xmit_hiwat; 3001 WR(q)->q_lowat = udp_xmit_lowat; 3002 3003 if (udp->udp_family == AF_INET6) { 3004 /* Build initial header template for transmit */ 3005 if ((err = udp_build_hdrs(q, udp)) != 0) { 3006 error: 3007 qprocsoff(UDP_RD(q)); 3008 udp->udp_connp = NULL; 3009 connp->conn_udp = NULL; 3010 kmem_cache_free(udp_cache, udp); 3011 return (err); 3012 } 3013 } 3014 3015 /* Set the Stream head write offset and high watermark. */ 3016 (void) mi_set_sth_wroff(UDP_RD(q), 3017 udp->udp_max_hdr_len + udp_wroff_extra); 3018 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 3019 3020 WR(UDP_RD(q))->q_qinfo = &udp_winit; 3021 3022 return (0); 3023 } 3024 3025 /* 3026 * Which UDP options OK to set through T_UNITDATA_REQ... 3027 */ 3028 /* ARGSUSED */ 3029 static boolean_t 3030 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 3031 { 3032 return (B_TRUE); 3033 } 3034 3035 /* 3036 * This routine gets default values of certain options whose default 3037 * values are maintained by protcol specific code 3038 */ 3039 /* ARGSUSED */ 3040 int 3041 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3042 { 3043 int *i1 = (int *)ptr; 3044 3045 switch (level) { 3046 case IPPROTO_IP: 3047 switch (name) { 3048 case IP_MULTICAST_TTL: 3049 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3050 return (sizeof (uchar_t)); 3051 case IP_MULTICAST_LOOP: 3052 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3053 return (sizeof (uchar_t)); 3054 } 3055 break; 3056 case IPPROTO_IPV6: 3057 switch (name) { 3058 case IPV6_MULTICAST_HOPS: 3059 *i1 = IP_DEFAULT_MULTICAST_TTL; 3060 return (sizeof (int)); 3061 case IPV6_MULTICAST_LOOP: 3062 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3063 return (sizeof (int)); 3064 case IPV6_UNICAST_HOPS: 3065 *i1 = udp_ipv6_hoplimit; 3066 return (sizeof (int)); 3067 } 3068 break; 3069 } 3070 return (-1); 3071 } 3072 3073 /* 3074 * This routine retrieves the current status of socket options 3075 * and expects the caller to pass in the queue pointer of the 3076 * upper instance. It returns the size of the option retrieved. 3077 */ 3078 int 3079 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3080 { 3081 int *i1 = (int *)ptr; 3082 conn_t *connp; 3083 udp_t *udp; 3084 ip6_pkt_t *ipp; 3085 int len; 3086 3087 q = UDP_WR(q); 3088 connp = Q_TO_CONN(q); 3089 udp = connp->conn_udp; 3090 ipp = &udp->udp_sticky_ipp; 3091 3092 switch (level) { 3093 case SOL_SOCKET: 3094 switch (name) { 3095 case SO_DEBUG: 3096 *i1 = udp->udp_debug; 3097 break; /* goto sizeof (int) option return */ 3098 case SO_REUSEADDR: 3099 *i1 = udp->udp_reuseaddr; 3100 break; /* goto sizeof (int) option return */ 3101 case SO_TYPE: 3102 *i1 = SOCK_DGRAM; 3103 break; /* goto sizeof (int) option return */ 3104 3105 /* 3106 * The following three items are available here, 3107 * but are only meaningful to IP. 3108 */ 3109 case SO_DONTROUTE: 3110 *i1 = udp->udp_dontroute; 3111 break; /* goto sizeof (int) option return */ 3112 case SO_USELOOPBACK: 3113 *i1 = udp->udp_useloopback; 3114 break; /* goto sizeof (int) option return */ 3115 case SO_BROADCAST: 3116 *i1 = udp->udp_broadcast; 3117 break; /* goto sizeof (int) option return */ 3118 3119 case SO_SNDBUF: 3120 *i1 = q->q_hiwat; 3121 break; /* goto sizeof (int) option return */ 3122 case SO_RCVBUF: 3123 *i1 = RD(q)->q_hiwat; 3124 break; /* goto sizeof (int) option return */ 3125 case SO_DGRAM_ERRIND: 3126 *i1 = udp->udp_dgram_errind; 3127 break; /* goto sizeof (int) option return */ 3128 case SO_RECVUCRED: 3129 *i1 = udp->udp_recvucred; 3130 break; /* goto sizeof (int) option return */ 3131 case SO_TIMESTAMP: 3132 *i1 = udp->udp_timestamp; 3133 break; /* goto sizeof (int) option return */ 3134 case SO_ANON_MLP: 3135 *i1 = udp->udp_anon_mlp; 3136 break; /* goto sizeof (int) option return */ 3137 case SO_MAC_EXEMPT: 3138 *i1 = udp->udp_mac_exempt; 3139 break; /* goto sizeof (int) option return */ 3140 case SO_ALLZONES: 3141 *i1 = connp->conn_allzones; 3142 break; /* goto sizeof (int) option return */ 3143 case SO_EXCLBIND: 3144 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3145 break; 3146 case SO_PROTOTYPE: 3147 *i1 = IPPROTO_UDP; 3148 break; 3149 case SO_DOMAIN: 3150 *i1 = udp->udp_family; 3151 break; 3152 default: 3153 return (-1); 3154 } 3155 break; 3156 case IPPROTO_IP: 3157 if (udp->udp_family != AF_INET) 3158 return (-1); 3159 switch (name) { 3160 case IP_OPTIONS: 3161 case T_IP_OPTIONS: 3162 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3163 if (len > 0) { 3164 bcopy(udp->udp_ip_rcv_options + 3165 udp->udp_label_len, ptr, len); 3166 } 3167 return (len); 3168 case IP_TOS: 3169 case T_IP_TOS: 3170 *i1 = (int)udp->udp_type_of_service; 3171 break; /* goto sizeof (int) option return */ 3172 case IP_TTL: 3173 *i1 = (int)udp->udp_ttl; 3174 break; /* goto sizeof (int) option return */ 3175 case IP_NEXTHOP: 3176 case IP_RECVPKTINFO: 3177 /* 3178 * This also handles IP_PKTINFO. 3179 * IP_PKTINFO and IP_RECVPKTINFO have the same value. 3180 * Differentiation is based on the size of the argument 3181 * passed in. 3182 * This option is handled in IP which will return an 3183 * error for IP_PKTINFO as it's not supported as a 3184 * sticky option. 3185 */ 3186 return (-EINVAL); 3187 case IP_MULTICAST_IF: 3188 /* 0 address if not set */ 3189 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3190 return (sizeof (ipaddr_t)); 3191 case IP_MULTICAST_TTL: 3192 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3193 return (sizeof (uchar_t)); 3194 case IP_MULTICAST_LOOP: 3195 *ptr = connp->conn_multicast_loop; 3196 return (sizeof (uint8_t)); 3197 case IP_RECVOPTS: 3198 *i1 = udp->udp_recvopts; 3199 break; /* goto sizeof (int) option return */ 3200 case IP_RECVDSTADDR: 3201 *i1 = udp->udp_recvdstaddr; 3202 break; /* goto sizeof (int) option return */ 3203 case IP_RECVIF: 3204 *i1 = udp->udp_recvif; 3205 break; /* goto sizeof (int) option return */ 3206 case IP_RECVSLLA: 3207 *i1 = udp->udp_recvslla; 3208 break; /* goto sizeof (int) option return */ 3209 case IP_RECVTTL: 3210 *i1 = udp->udp_recvttl; 3211 break; /* goto sizeof (int) option return */ 3212 case IP_ADD_MEMBERSHIP: 3213 case IP_DROP_MEMBERSHIP: 3214 case IP_BLOCK_SOURCE: 3215 case IP_UNBLOCK_SOURCE: 3216 case IP_ADD_SOURCE_MEMBERSHIP: 3217 case IP_DROP_SOURCE_MEMBERSHIP: 3218 case MCAST_JOIN_GROUP: 3219 case MCAST_LEAVE_GROUP: 3220 case MCAST_BLOCK_SOURCE: 3221 case MCAST_UNBLOCK_SOURCE: 3222 case MCAST_JOIN_SOURCE_GROUP: 3223 case MCAST_LEAVE_SOURCE_GROUP: 3224 case IP_DONTFAILOVER_IF: 3225 /* cannot "get" the value for these */ 3226 return (-1); 3227 case IP_BOUND_IF: 3228 /* Zero if not set */ 3229 *i1 = udp->udp_bound_if; 3230 break; /* goto sizeof (int) option return */ 3231 case IP_UNSPEC_SRC: 3232 *i1 = udp->udp_unspec_source; 3233 break; /* goto sizeof (int) option return */ 3234 case IP_XMIT_IF: 3235 *i1 = udp->udp_xmit_if; 3236 break; /* goto sizeof (int) option return */ 3237 default: 3238 return (-1); 3239 } 3240 break; 3241 case IPPROTO_IPV6: 3242 if (udp->udp_family != AF_INET6) 3243 return (-1); 3244 switch (name) { 3245 case IPV6_UNICAST_HOPS: 3246 *i1 = (unsigned int)udp->udp_ttl; 3247 break; /* goto sizeof (int) option return */ 3248 case IPV6_MULTICAST_IF: 3249 /* 0 index if not set */ 3250 *i1 = udp->udp_multicast_if_index; 3251 break; /* goto sizeof (int) option return */ 3252 case IPV6_MULTICAST_HOPS: 3253 *i1 = udp->udp_multicast_ttl; 3254 break; /* goto sizeof (int) option return */ 3255 case IPV6_MULTICAST_LOOP: 3256 *i1 = connp->conn_multicast_loop; 3257 break; /* goto sizeof (int) option return */ 3258 case IPV6_JOIN_GROUP: 3259 case IPV6_LEAVE_GROUP: 3260 case MCAST_JOIN_GROUP: 3261 case MCAST_LEAVE_GROUP: 3262 case MCAST_BLOCK_SOURCE: 3263 case MCAST_UNBLOCK_SOURCE: 3264 case MCAST_JOIN_SOURCE_GROUP: 3265 case MCAST_LEAVE_SOURCE_GROUP: 3266 /* cannot "get" the value for these */ 3267 return (-1); 3268 case IPV6_BOUND_IF: 3269 /* Zero if not set */ 3270 *i1 = udp->udp_bound_if; 3271 break; /* goto sizeof (int) option return */ 3272 case IPV6_UNSPEC_SRC: 3273 *i1 = udp->udp_unspec_source; 3274 break; /* goto sizeof (int) option return */ 3275 case IPV6_RECVPKTINFO: 3276 *i1 = udp->udp_ip_recvpktinfo; 3277 break; /* goto sizeof (int) option return */ 3278 case IPV6_RECVTCLASS: 3279 *i1 = udp->udp_ipv6_recvtclass; 3280 break; /* goto sizeof (int) option return */ 3281 case IPV6_RECVPATHMTU: 3282 *i1 = udp->udp_ipv6_recvpathmtu; 3283 break; /* goto sizeof (int) option return */ 3284 case IPV6_RECVHOPLIMIT: 3285 *i1 = udp->udp_ipv6_recvhoplimit; 3286 break; /* goto sizeof (int) option return */ 3287 case IPV6_RECVHOPOPTS: 3288 *i1 = udp->udp_ipv6_recvhopopts; 3289 break; /* goto sizeof (int) option return */ 3290 case IPV6_RECVDSTOPTS: 3291 *i1 = udp->udp_ipv6_recvdstopts; 3292 break; /* goto sizeof (int) option return */ 3293 case _OLD_IPV6_RECVDSTOPTS: 3294 *i1 = udp->udp_old_ipv6_recvdstopts; 3295 break; /* goto sizeof (int) option return */ 3296 case IPV6_RECVRTHDRDSTOPTS: 3297 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3298 break; /* goto sizeof (int) option return */ 3299 case IPV6_RECVRTHDR: 3300 *i1 = udp->udp_ipv6_recvrthdr; 3301 break; /* goto sizeof (int) option return */ 3302 case IPV6_PKTINFO: { 3303 /* XXX assumes that caller has room for max size! */ 3304 struct in6_pktinfo *pkti; 3305 3306 pkti = (struct in6_pktinfo *)ptr; 3307 if (ipp->ipp_fields & IPPF_IFINDEX) 3308 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3309 else 3310 pkti->ipi6_ifindex = 0; 3311 if (ipp->ipp_fields & IPPF_ADDR) 3312 pkti->ipi6_addr = ipp->ipp_addr; 3313 else 3314 pkti->ipi6_addr = ipv6_all_zeros; 3315 return (sizeof (struct in6_pktinfo)); 3316 } 3317 case IPV6_TCLASS: 3318 if (ipp->ipp_fields & IPPF_TCLASS) 3319 *i1 = ipp->ipp_tclass; 3320 else 3321 *i1 = IPV6_FLOW_TCLASS( 3322 IPV6_DEFAULT_VERS_AND_FLOW); 3323 break; /* goto sizeof (int) option return */ 3324 case IPV6_NEXTHOP: { 3325 sin6_t *sin6 = (sin6_t *)ptr; 3326 3327 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3328 return (0); 3329 *sin6 = sin6_null; 3330 sin6->sin6_family = AF_INET6; 3331 sin6->sin6_addr = ipp->ipp_nexthop; 3332 return (sizeof (sin6_t)); 3333 } 3334 case IPV6_HOPOPTS: 3335 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3336 return (0); 3337 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3338 return (0); 3339 /* 3340 * The cipso/label option is added by kernel. 3341 * User is not usually aware of this option. 3342 * We copy out the hbh opt after the label option. 3343 */ 3344 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3345 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3346 if (udp->udp_label_len_v6 > 0) { 3347 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3348 ptr[1] = (ipp->ipp_hopoptslen - 3349 udp->udp_label_len_v6 + 7) / 8 - 1; 3350 } 3351 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3352 case IPV6_RTHDRDSTOPTS: 3353 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3354 return (0); 3355 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3356 return (ipp->ipp_rtdstoptslen); 3357 case IPV6_RTHDR: 3358 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3359 return (0); 3360 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3361 return (ipp->ipp_rthdrlen); 3362 case IPV6_DSTOPTS: 3363 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3364 return (0); 3365 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3366 return (ipp->ipp_dstoptslen); 3367 case IPV6_PATHMTU: 3368 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3369 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3370 default: 3371 return (-1); 3372 } 3373 break; 3374 case IPPROTO_UDP: 3375 switch (name) { 3376 case UDP_ANONPRIVBIND: 3377 *i1 = udp->udp_anon_priv_bind; 3378 break; 3379 case UDP_EXCLBIND: 3380 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3381 break; 3382 case UDP_RCVHDR: 3383 *i1 = udp->udp_rcvhdr ? 1 : 0; 3384 break; 3385 default: 3386 return (-1); 3387 } 3388 break; 3389 default: 3390 return (-1); 3391 } 3392 return (sizeof (int)); 3393 } 3394 3395 /* 3396 * This routine sets socket options; it expects the caller 3397 * to pass in the queue pointer of the upper instance. 3398 */ 3399 /* ARGSUSED */ 3400 int 3401 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3402 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3403 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3404 { 3405 udpattrs_t *attrs = thisdg_attrs; 3406 int *i1 = (int *)invalp; 3407 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3408 boolean_t checkonly; 3409 int error; 3410 conn_t *connp; 3411 udp_t *udp; 3412 uint_t newlen; 3413 3414 q = UDP_WR(q); 3415 connp = Q_TO_CONN(q); 3416 udp = connp->conn_udp; 3417 3418 switch (optset_context) { 3419 case SETFN_OPTCOM_CHECKONLY: 3420 checkonly = B_TRUE; 3421 /* 3422 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3423 * inlen != 0 implies value supplied and 3424 * we have to "pretend" to set it. 3425 * inlen == 0 implies that there is no 3426 * value part in T_CHECK request and just validation 3427 * done elsewhere should be enough, we just return here. 3428 */ 3429 if (inlen == 0) { 3430 *outlenp = 0; 3431 return (0); 3432 } 3433 break; 3434 case SETFN_OPTCOM_NEGOTIATE: 3435 checkonly = B_FALSE; 3436 break; 3437 case SETFN_UD_NEGOTIATE: 3438 case SETFN_CONN_NEGOTIATE: 3439 checkonly = B_FALSE; 3440 /* 3441 * Negotiating local and "association-related" options 3442 * through T_UNITDATA_REQ. 3443 * 3444 * Following routine can filter out ones we do not 3445 * want to be "set" this way. 3446 */ 3447 if (!udp_opt_allow_udr_set(level, name)) { 3448 *outlenp = 0; 3449 return (EINVAL); 3450 } 3451 break; 3452 default: 3453 /* 3454 * We should never get here 3455 */ 3456 *outlenp = 0; 3457 return (EINVAL); 3458 } 3459 3460 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3461 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3462 3463 /* 3464 * For fixed length options, no sanity check 3465 * of passed in length is done. It is assumed *_optcom_req() 3466 * routines do the right thing. 3467 */ 3468 3469 switch (level) { 3470 case SOL_SOCKET: 3471 switch (name) { 3472 case SO_REUSEADDR: 3473 if (!checkonly) 3474 udp->udp_reuseaddr = onoff; 3475 break; 3476 case SO_DEBUG: 3477 if (!checkonly) 3478 udp->udp_debug = onoff; 3479 break; 3480 /* 3481 * The following three items are available here, 3482 * but are only meaningful to IP. 3483 */ 3484 case SO_DONTROUTE: 3485 if (!checkonly) 3486 udp->udp_dontroute = onoff; 3487 break; 3488 case SO_USELOOPBACK: 3489 if (!checkonly) 3490 udp->udp_useloopback = onoff; 3491 break; 3492 case SO_BROADCAST: 3493 if (!checkonly) 3494 udp->udp_broadcast = onoff; 3495 break; 3496 3497 case SO_SNDBUF: 3498 if (*i1 > udp_max_buf) { 3499 *outlenp = 0; 3500 return (ENOBUFS); 3501 } 3502 if (!checkonly) { 3503 q->q_hiwat = *i1; 3504 WR(UDP_RD(q))->q_hiwat = *i1; 3505 } 3506 break; 3507 case SO_RCVBUF: 3508 if (*i1 > udp_max_buf) { 3509 *outlenp = 0; 3510 return (ENOBUFS); 3511 } 3512 if (!checkonly) { 3513 RD(q)->q_hiwat = *i1; 3514 UDP_RD(q)->q_hiwat = *i1; 3515 (void) mi_set_sth_hiwat(UDP_RD(q), 3516 udp_set_rcv_hiwat(udp, *i1)); 3517 } 3518 break; 3519 case SO_DGRAM_ERRIND: 3520 if (!checkonly) 3521 udp->udp_dgram_errind = onoff; 3522 break; 3523 case SO_RECVUCRED: 3524 if (!checkonly) 3525 udp->udp_recvucred = onoff; 3526 break; 3527 case SO_ALLZONES: 3528 /* 3529 * "soft" error (negative) 3530 * option not handled at this level 3531 * Do not modify *outlenp. 3532 */ 3533 return (-EINVAL); 3534 case SO_TIMESTAMP: 3535 if (!checkonly) 3536 udp->udp_timestamp = onoff; 3537 break; 3538 case SO_ANON_MLP: 3539 if (!checkonly) 3540 udp->udp_anon_mlp = onoff; 3541 break; 3542 case SO_MAC_EXEMPT: 3543 if (secpolicy_net_mac_aware(cr) != 0 || 3544 udp->udp_state != TS_UNBND) 3545 return (EACCES); 3546 if (!checkonly) 3547 udp->udp_mac_exempt = onoff; 3548 break; 3549 case SCM_UCRED: { 3550 struct ucred_s *ucr; 3551 cred_t *cr, *newcr; 3552 ts_label_t *tsl; 3553 3554 /* 3555 * Only sockets that have proper privileges and are 3556 * bound to MLPs will have any other value here, so 3557 * this implicitly tests for privilege to set label. 3558 */ 3559 if (connp->conn_mlp_type == mlptSingle) 3560 break; 3561 ucr = (struct ucred_s *)invalp; 3562 if (inlen != ucredsize || 3563 ucr->uc_labeloff < sizeof (*ucr) || 3564 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3565 return (EINVAL); 3566 if (!checkonly) { 3567 mblk_t *mb; 3568 3569 if (attrs == NULL || 3570 (mb = attrs->udpattr_mb) == NULL) 3571 return (EINVAL); 3572 if ((cr = DB_CRED(mb)) == NULL) 3573 cr = udp->udp_connp->conn_cred; 3574 ASSERT(cr != NULL); 3575 if ((tsl = crgetlabel(cr)) == NULL) 3576 return (EINVAL); 3577 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3578 tsl->tsl_doi, KM_NOSLEEP); 3579 if (newcr == NULL) 3580 return (ENOSR); 3581 mblk_setcred(mb, newcr); 3582 attrs->udpattr_credset = B_TRUE; 3583 crfree(newcr); 3584 } 3585 break; 3586 } 3587 case SO_EXCLBIND: 3588 if (!checkonly) 3589 udp->udp_exclbind = onoff; 3590 break; 3591 default: 3592 *outlenp = 0; 3593 return (EINVAL); 3594 } 3595 break; 3596 case IPPROTO_IP: 3597 if (udp->udp_family != AF_INET) { 3598 *outlenp = 0; 3599 return (ENOPROTOOPT); 3600 } 3601 switch (name) { 3602 case IP_OPTIONS: 3603 case T_IP_OPTIONS: 3604 /* Save options for use by IP. */ 3605 newlen = inlen + udp->udp_label_len; 3606 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3607 *outlenp = 0; 3608 return (EINVAL); 3609 } 3610 if (checkonly) 3611 break; 3612 3613 if (!tsol_option_set(&udp->udp_ip_snd_options, 3614 &udp->udp_ip_snd_options_len, 3615 udp->udp_label_len, invalp, inlen)) { 3616 *outlenp = 0; 3617 return (ENOMEM); 3618 } 3619 3620 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3621 UDPH_SIZE + udp->udp_ip_snd_options_len; 3622 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3623 udp_wroff_extra); 3624 break; 3625 3626 case IP_TTL: 3627 if (!checkonly) { 3628 udp->udp_ttl = (uchar_t)*i1; 3629 } 3630 break; 3631 case IP_TOS: 3632 case T_IP_TOS: 3633 if (!checkonly) { 3634 udp->udp_type_of_service = (uchar_t)*i1; 3635 } 3636 break; 3637 case IP_MULTICAST_IF: { 3638 /* 3639 * TODO should check OPTMGMT reply and undo this if 3640 * there is an error. 3641 */ 3642 struct in_addr *inap = (struct in_addr *)invalp; 3643 if (!checkonly) { 3644 udp->udp_multicast_if_addr = 3645 inap->s_addr; 3646 } 3647 break; 3648 } 3649 case IP_MULTICAST_TTL: 3650 if (!checkonly) 3651 udp->udp_multicast_ttl = *invalp; 3652 break; 3653 case IP_MULTICAST_LOOP: 3654 if (!checkonly) 3655 connp->conn_multicast_loop = *invalp; 3656 break; 3657 case IP_RECVOPTS: 3658 if (!checkonly) 3659 udp->udp_recvopts = onoff; 3660 break; 3661 case IP_RECVDSTADDR: 3662 if (!checkonly) 3663 udp->udp_recvdstaddr = onoff; 3664 break; 3665 case IP_RECVIF: 3666 if (!checkonly) 3667 udp->udp_recvif = onoff; 3668 break; 3669 case IP_RECVSLLA: 3670 if (!checkonly) 3671 udp->udp_recvslla = onoff; 3672 break; 3673 case IP_RECVTTL: 3674 if (!checkonly) 3675 udp->udp_recvttl = onoff; 3676 break; 3677 case IP_PKTINFO: { 3678 /* 3679 * This also handles IP_RECVPKTINFO. 3680 * IP_PKTINFO and IP_RECVPKTINFO have same value. 3681 * Differentiation is based on the size of the 3682 * argument passed in. 3683 */ 3684 struct in_pktinfo *pktinfop; 3685 ip4_pkt_t *attr_pktinfop; 3686 3687 if (checkonly) 3688 break; 3689 3690 if (inlen == sizeof (int)) { 3691 /* 3692 * This is IP_RECVPKTINFO option. 3693 * Keep a local copy of whether this option is 3694 * set or not and pass it down to IP for 3695 * processing. 3696 */ 3697 3698 udp->udp_ip_recvpktinfo = onoff; 3699 return (-EINVAL); 3700 } 3701 3702 if (attrs == NULL || 3703 (attr_pktinfop = attrs->udpattr_ipp4) == NULL) { 3704 /* 3705 * sticky option or no buffer to return 3706 * the results. 3707 */ 3708 return (EINVAL); 3709 } 3710 3711 if (inlen != sizeof (struct in_pktinfo)) 3712 return (EINVAL); 3713 3714 pktinfop = (struct in_pktinfo *)invalp; 3715 3716 /* 3717 * At least one of the values should be specified 3718 */ 3719 if (pktinfop->ipi_ifindex == 0 && 3720 pktinfop->ipi_spec_dst.s_addr == INADDR_ANY) { 3721 return (EINVAL); 3722 } 3723 3724 attr_pktinfop->ip4_addr = pktinfop->ipi_spec_dst.s_addr; 3725 attr_pktinfop->ip4_ill_index = pktinfop->ipi_ifindex; 3726 3727 break; 3728 } 3729 case IP_ADD_MEMBERSHIP: 3730 case IP_DROP_MEMBERSHIP: 3731 case IP_BLOCK_SOURCE: 3732 case IP_UNBLOCK_SOURCE: 3733 case IP_ADD_SOURCE_MEMBERSHIP: 3734 case IP_DROP_SOURCE_MEMBERSHIP: 3735 case MCAST_JOIN_GROUP: 3736 case MCAST_LEAVE_GROUP: 3737 case MCAST_BLOCK_SOURCE: 3738 case MCAST_UNBLOCK_SOURCE: 3739 case MCAST_JOIN_SOURCE_GROUP: 3740 case MCAST_LEAVE_SOURCE_GROUP: 3741 case IP_SEC_OPT: 3742 case IP_NEXTHOP: 3743 /* 3744 * "soft" error (negative) 3745 * option not handled at this level 3746 * Do not modify *outlenp. 3747 */ 3748 return (-EINVAL); 3749 case IP_BOUND_IF: 3750 if (!checkonly) 3751 udp->udp_bound_if = *i1; 3752 break; 3753 case IP_UNSPEC_SRC: 3754 if (!checkonly) 3755 udp->udp_unspec_source = onoff; 3756 break; 3757 case IP_XMIT_IF: 3758 if (!checkonly) 3759 udp->udp_xmit_if = *i1; 3760 break; 3761 default: 3762 *outlenp = 0; 3763 return (EINVAL); 3764 } 3765 break; 3766 case IPPROTO_IPV6: { 3767 ip6_pkt_t *ipp; 3768 boolean_t sticky; 3769 3770 if (udp->udp_family != AF_INET6) { 3771 *outlenp = 0; 3772 return (ENOPROTOOPT); 3773 } 3774 /* 3775 * Deal with both sticky options and ancillary data 3776 */ 3777 sticky = B_FALSE; 3778 if (attrs == NULL || (ipp = attrs->udpattr_ipp6) == 3779 NULL) { 3780 /* sticky options, or none */ 3781 ipp = &udp->udp_sticky_ipp; 3782 sticky = B_TRUE; 3783 } 3784 3785 switch (name) { 3786 case IPV6_MULTICAST_IF: 3787 if (!checkonly) 3788 udp->udp_multicast_if_index = *i1; 3789 break; 3790 case IPV6_UNICAST_HOPS: 3791 /* -1 means use default */ 3792 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3793 *outlenp = 0; 3794 return (EINVAL); 3795 } 3796 if (!checkonly) { 3797 if (*i1 == -1) { 3798 udp->udp_ttl = ipp->ipp_unicast_hops = 3799 udp_ipv6_hoplimit; 3800 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3801 /* Pass modified value to IP. */ 3802 *i1 = udp->udp_ttl; 3803 } else { 3804 udp->udp_ttl = ipp->ipp_unicast_hops = 3805 (uint8_t)*i1; 3806 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3807 } 3808 /* Rebuild the header template */ 3809 error = udp_build_hdrs(q, udp); 3810 if (error != 0) { 3811 *outlenp = 0; 3812 return (error); 3813 } 3814 } 3815 break; 3816 case IPV6_MULTICAST_HOPS: 3817 /* -1 means use default */ 3818 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3819 *outlenp = 0; 3820 return (EINVAL); 3821 } 3822 if (!checkonly) { 3823 if (*i1 == -1) { 3824 udp->udp_multicast_ttl = 3825 ipp->ipp_multicast_hops = 3826 IP_DEFAULT_MULTICAST_TTL; 3827 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3828 /* Pass modified value to IP. */ 3829 *i1 = udp->udp_multicast_ttl; 3830 } else { 3831 udp->udp_multicast_ttl = 3832 ipp->ipp_multicast_hops = 3833 (uint8_t)*i1; 3834 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3835 } 3836 } 3837 break; 3838 case IPV6_MULTICAST_LOOP: 3839 if (*i1 != 0 && *i1 != 1) { 3840 *outlenp = 0; 3841 return (EINVAL); 3842 } 3843 if (!checkonly) 3844 connp->conn_multicast_loop = *i1; 3845 break; 3846 case IPV6_JOIN_GROUP: 3847 case IPV6_LEAVE_GROUP: 3848 case MCAST_JOIN_GROUP: 3849 case MCAST_LEAVE_GROUP: 3850 case MCAST_BLOCK_SOURCE: 3851 case MCAST_UNBLOCK_SOURCE: 3852 case MCAST_JOIN_SOURCE_GROUP: 3853 case MCAST_LEAVE_SOURCE_GROUP: 3854 /* 3855 * "soft" error (negative) 3856 * option not handled at this level 3857 * Note: Do not modify *outlenp 3858 */ 3859 return (-EINVAL); 3860 case IPV6_BOUND_IF: 3861 if (!checkonly) 3862 udp->udp_bound_if = *i1; 3863 break; 3864 case IPV6_UNSPEC_SRC: 3865 if (!checkonly) 3866 udp->udp_unspec_source = onoff; 3867 break; 3868 /* 3869 * Set boolean switches for ancillary data delivery 3870 */ 3871 case IPV6_RECVPKTINFO: 3872 if (!checkonly) 3873 udp->udp_ip_recvpktinfo = onoff; 3874 break; 3875 case IPV6_RECVTCLASS: 3876 if (!checkonly) { 3877 udp->udp_ipv6_recvtclass = onoff; 3878 } 3879 break; 3880 case IPV6_RECVPATHMTU: 3881 if (!checkonly) { 3882 udp->udp_ipv6_recvpathmtu = onoff; 3883 } 3884 break; 3885 case IPV6_RECVHOPLIMIT: 3886 if (!checkonly) 3887 udp->udp_ipv6_recvhoplimit = onoff; 3888 break; 3889 case IPV6_RECVHOPOPTS: 3890 if (!checkonly) 3891 udp->udp_ipv6_recvhopopts = onoff; 3892 break; 3893 case IPV6_RECVDSTOPTS: 3894 if (!checkonly) 3895 udp->udp_ipv6_recvdstopts = onoff; 3896 break; 3897 case _OLD_IPV6_RECVDSTOPTS: 3898 if (!checkonly) 3899 udp->udp_old_ipv6_recvdstopts = onoff; 3900 break; 3901 case IPV6_RECVRTHDRDSTOPTS: 3902 if (!checkonly) 3903 udp->udp_ipv6_recvrthdrdstopts = onoff; 3904 break; 3905 case IPV6_RECVRTHDR: 3906 if (!checkonly) 3907 udp->udp_ipv6_recvrthdr = onoff; 3908 break; 3909 /* 3910 * Set sticky options or ancillary data. 3911 * If sticky options, (re)build any extension headers 3912 * that might be needed as a result. 3913 */ 3914 case IPV6_PKTINFO: 3915 /* 3916 * The source address and ifindex are verified 3917 * in ip_opt_set(). For ancillary data the 3918 * source address is checked in ip_wput_v6. 3919 */ 3920 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3921 return (EINVAL); 3922 if (checkonly) 3923 break; 3924 3925 if (inlen == 0) { 3926 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3927 ipp->ipp_sticky_ignored |= 3928 (IPPF_IFINDEX|IPPF_ADDR); 3929 } else { 3930 struct in6_pktinfo *pkti; 3931 3932 pkti = (struct in6_pktinfo *)invalp; 3933 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3934 ipp->ipp_addr = pkti->ipi6_addr; 3935 if (ipp->ipp_ifindex != 0) 3936 ipp->ipp_fields |= IPPF_IFINDEX; 3937 else 3938 ipp->ipp_fields &= ~IPPF_IFINDEX; 3939 if (!IN6_IS_ADDR_UNSPECIFIED( 3940 &ipp->ipp_addr)) 3941 ipp->ipp_fields |= IPPF_ADDR; 3942 else 3943 ipp->ipp_fields &= ~IPPF_ADDR; 3944 } 3945 if (sticky) { 3946 error = udp_build_hdrs(q, udp); 3947 if (error != 0) 3948 return (error); 3949 } 3950 break; 3951 case IPV6_HOPLIMIT: 3952 if (sticky) 3953 return (EINVAL); 3954 if (inlen != 0 && inlen != sizeof (int)) 3955 return (EINVAL); 3956 if (checkonly) 3957 break; 3958 3959 if (inlen == 0) { 3960 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3961 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3962 } else { 3963 if (*i1 > 255 || *i1 < -1) 3964 return (EINVAL); 3965 if (*i1 == -1) 3966 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3967 else 3968 ipp->ipp_hoplimit = *i1; 3969 ipp->ipp_fields |= IPPF_HOPLIMIT; 3970 } 3971 break; 3972 case IPV6_TCLASS: 3973 if (inlen != 0 && inlen != sizeof (int)) 3974 return (EINVAL); 3975 if (checkonly) 3976 break; 3977 3978 if (inlen == 0) { 3979 ipp->ipp_fields &= ~IPPF_TCLASS; 3980 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3981 } else { 3982 if (*i1 > 255 || *i1 < -1) 3983 return (EINVAL); 3984 if (*i1 == -1) 3985 ipp->ipp_tclass = 0; 3986 else 3987 ipp->ipp_tclass = *i1; 3988 ipp->ipp_fields |= IPPF_TCLASS; 3989 } 3990 if (sticky) { 3991 error = udp_build_hdrs(q, udp); 3992 if (error != 0) 3993 return (error); 3994 } 3995 break; 3996 case IPV6_NEXTHOP: 3997 /* 3998 * IP will verify that the nexthop is reachable 3999 * and fail for sticky options. 4000 */ 4001 if (inlen != 0 && inlen != sizeof (sin6_t)) 4002 return (EINVAL); 4003 if (checkonly) 4004 break; 4005 4006 if (inlen == 0) { 4007 ipp->ipp_fields &= ~IPPF_NEXTHOP; 4008 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 4009 } else { 4010 sin6_t *sin6 = (sin6_t *)invalp; 4011 4012 if (sin6->sin6_family != AF_INET6) 4013 return (EAFNOSUPPORT); 4014 if (IN6_IS_ADDR_V4MAPPED( 4015 &sin6->sin6_addr)) 4016 return (EADDRNOTAVAIL); 4017 ipp->ipp_nexthop = sin6->sin6_addr; 4018 if (!IN6_IS_ADDR_UNSPECIFIED( 4019 &ipp->ipp_nexthop)) 4020 ipp->ipp_fields |= IPPF_NEXTHOP; 4021 else 4022 ipp->ipp_fields &= ~IPPF_NEXTHOP; 4023 } 4024 if (sticky) { 4025 error = udp_build_hdrs(q, udp); 4026 if (error != 0) 4027 return (error); 4028 } 4029 break; 4030 case IPV6_HOPOPTS: { 4031 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 4032 /* 4033 * Sanity checks - minimum size, size a multiple of 4034 * eight bytes, and matching size passed in. 4035 */ 4036 if (inlen != 0 && 4037 inlen != (8 * (hopts->ip6h_len + 1))) 4038 return (EINVAL); 4039 4040 if (checkonly) 4041 break; 4042 4043 error = optcom_pkt_set(invalp, inlen, sticky, 4044 (uchar_t **)&ipp->ipp_hopopts, 4045 &ipp->ipp_hopoptslen, 4046 sticky ? udp->udp_label_len_v6 : 0); 4047 if (error != 0) 4048 return (error); 4049 if (ipp->ipp_hopoptslen == 0) { 4050 ipp->ipp_fields &= ~IPPF_HOPOPTS; 4051 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 4052 } else { 4053 ipp->ipp_fields |= IPPF_HOPOPTS; 4054 } 4055 if (sticky) { 4056 error = udp_build_hdrs(q, udp); 4057 if (error != 0) 4058 return (error); 4059 } 4060 break; 4061 } 4062 case IPV6_RTHDRDSTOPTS: { 4063 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4064 4065 /* 4066 * Sanity checks - minimum size, size a multiple of 4067 * eight bytes, and matching size passed in. 4068 */ 4069 if (inlen != 0 && 4070 inlen != (8 * (dopts->ip6d_len + 1))) 4071 return (EINVAL); 4072 4073 if (checkonly) 4074 break; 4075 4076 if (inlen == 0) { 4077 if (sticky && 4078 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4079 kmem_free(ipp->ipp_rtdstopts, 4080 ipp->ipp_rtdstoptslen); 4081 ipp->ipp_rtdstopts = NULL; 4082 ipp->ipp_rtdstoptslen = 0; 4083 } 4084 4085 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4086 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4087 } else { 4088 error = optcom_pkt_set(invalp, inlen, sticky, 4089 (uchar_t **)&ipp->ipp_rtdstopts, 4090 &ipp->ipp_rtdstoptslen, 0); 4091 if (error != 0) 4092 return (error); 4093 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4094 } 4095 if (sticky) { 4096 error = udp_build_hdrs(q, udp); 4097 if (error != 0) 4098 return (error); 4099 } 4100 break; 4101 } 4102 case IPV6_DSTOPTS: { 4103 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4104 4105 /* 4106 * Sanity checks - minimum size, size a multiple of 4107 * eight bytes, and matching size passed in. 4108 */ 4109 if (inlen != 0 && 4110 inlen != (8 * (dopts->ip6d_len + 1))) 4111 return (EINVAL); 4112 4113 if (checkonly) 4114 break; 4115 4116 if (inlen == 0) { 4117 if (sticky && 4118 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4119 kmem_free(ipp->ipp_dstopts, 4120 ipp->ipp_dstoptslen); 4121 ipp->ipp_dstopts = NULL; 4122 ipp->ipp_dstoptslen = 0; 4123 } 4124 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4125 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4126 } else { 4127 error = optcom_pkt_set(invalp, inlen, sticky, 4128 (uchar_t **)&ipp->ipp_dstopts, 4129 &ipp->ipp_dstoptslen, 0); 4130 if (error != 0) 4131 return (error); 4132 ipp->ipp_fields |= IPPF_DSTOPTS; 4133 } 4134 if (sticky) { 4135 error = udp_build_hdrs(q, udp); 4136 if (error != 0) 4137 return (error); 4138 } 4139 break; 4140 } 4141 case IPV6_RTHDR: { 4142 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4143 4144 /* 4145 * Sanity checks - minimum size, size a multiple of 4146 * eight bytes, and matching size passed in. 4147 */ 4148 if (inlen != 0 && 4149 inlen != (8 * (rt->ip6r_len + 1))) 4150 return (EINVAL); 4151 4152 if (checkonly) 4153 break; 4154 4155 if (inlen == 0) { 4156 if (sticky && 4157 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4158 kmem_free(ipp->ipp_rthdr, 4159 ipp->ipp_rthdrlen); 4160 ipp->ipp_rthdr = NULL; 4161 ipp->ipp_rthdrlen = 0; 4162 } 4163 ipp->ipp_fields &= ~IPPF_RTHDR; 4164 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4165 } else { 4166 error = optcom_pkt_set(invalp, inlen, sticky, 4167 (uchar_t **)&ipp->ipp_rthdr, 4168 &ipp->ipp_rthdrlen, 0); 4169 if (error != 0) 4170 return (error); 4171 ipp->ipp_fields |= IPPF_RTHDR; 4172 } 4173 if (sticky) { 4174 error = udp_build_hdrs(q, udp); 4175 if (error != 0) 4176 return (error); 4177 } 4178 break; 4179 } 4180 4181 case IPV6_DONTFRAG: 4182 if (checkonly) 4183 break; 4184 4185 if (onoff) { 4186 ipp->ipp_fields |= IPPF_DONTFRAG; 4187 } else { 4188 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4189 } 4190 break; 4191 4192 case IPV6_USE_MIN_MTU: 4193 if (inlen != sizeof (int)) 4194 return (EINVAL); 4195 4196 if (*i1 < -1 || *i1 > 1) 4197 return (EINVAL); 4198 4199 if (checkonly) 4200 break; 4201 4202 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4203 ipp->ipp_use_min_mtu = *i1; 4204 break; 4205 4206 case IPV6_BOUND_PIF: 4207 case IPV6_SEC_OPT: 4208 case IPV6_DONTFAILOVER_IF: 4209 case IPV6_SRC_PREFERENCES: 4210 case IPV6_V6ONLY: 4211 /* Handled at the IP level */ 4212 return (-EINVAL); 4213 default: 4214 *outlenp = 0; 4215 return (EINVAL); 4216 } 4217 break; 4218 } /* end IPPROTO_IPV6 */ 4219 case IPPROTO_UDP: 4220 switch (name) { 4221 case UDP_ANONPRIVBIND: 4222 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4223 *outlenp = 0; 4224 return (error); 4225 } 4226 if (!checkonly) { 4227 udp->udp_anon_priv_bind = onoff; 4228 } 4229 break; 4230 case UDP_EXCLBIND: 4231 if (!checkonly) 4232 udp->udp_exclbind = onoff; 4233 break; 4234 case UDP_RCVHDR: 4235 if (!checkonly) 4236 udp->udp_rcvhdr = onoff; 4237 break; 4238 default: 4239 *outlenp = 0; 4240 return (EINVAL); 4241 } 4242 break; 4243 default: 4244 *outlenp = 0; 4245 return (EINVAL); 4246 } 4247 /* 4248 * Common case of OK return with outval same as inval. 4249 */ 4250 if (invalp != outvalp) { 4251 /* don't trust bcopy for identical src/dst */ 4252 (void) bcopy(invalp, outvalp, inlen); 4253 } 4254 *outlenp = inlen; 4255 return (0); 4256 } 4257 4258 /* 4259 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4260 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4261 * headers, and the udp header. 4262 * Returns failure if can't allocate memory. 4263 */ 4264 static int 4265 udp_build_hdrs(queue_t *q, udp_t *udp) 4266 { 4267 uchar_t *hdrs; 4268 uint_t hdrs_len; 4269 ip6_t *ip6h; 4270 ip6i_t *ip6i; 4271 udpha_t *udpha; 4272 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4273 4274 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4275 ASSERT(hdrs_len != 0); 4276 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4277 /* Need to reallocate */ 4278 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4279 if (hdrs == NULL) 4280 return (ENOMEM); 4281 4282 if (udp->udp_sticky_hdrs_len != 0) { 4283 kmem_free(udp->udp_sticky_hdrs, 4284 udp->udp_sticky_hdrs_len); 4285 } 4286 udp->udp_sticky_hdrs = hdrs; 4287 udp->udp_sticky_hdrs_len = hdrs_len; 4288 } 4289 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4290 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4291 4292 /* Set header fields not in ipp */ 4293 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4294 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4295 ip6h = (ip6_t *)&ip6i[1]; 4296 } else { 4297 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4298 } 4299 4300 if (!(ipp->ipp_fields & IPPF_ADDR)) 4301 ip6h->ip6_src = udp->udp_v6src; 4302 4303 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4304 udpha->uha_src_port = udp->udp_port; 4305 4306 /* Try to get everything in a single mblk */ 4307 if (hdrs_len > udp->udp_max_hdr_len) { 4308 udp->udp_max_hdr_len = hdrs_len; 4309 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4310 udp_wroff_extra); 4311 } 4312 return (0); 4313 } 4314 4315 /* 4316 * This routine retrieves the value of an ND variable in a udpparam_t 4317 * structure. It is called through nd_getset when a user reads the 4318 * variable. 4319 */ 4320 /* ARGSUSED */ 4321 static int 4322 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4323 { 4324 udpparam_t *udppa = (udpparam_t *)cp; 4325 4326 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4327 return (0); 4328 } 4329 4330 /* 4331 * Walk through the param array specified registering each element with the 4332 * named dispatch (ND) handler. 4333 */ 4334 static boolean_t 4335 udp_param_register(udpparam_t *udppa, int cnt) 4336 { 4337 for (; cnt-- > 0; udppa++) { 4338 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4339 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4340 udp_param_get, udp_param_set, 4341 (caddr_t)udppa)) { 4342 nd_free(&udp_g_nd); 4343 return (B_FALSE); 4344 } 4345 } 4346 } 4347 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4348 udp_extra_priv_ports_get, NULL, NULL)) { 4349 nd_free(&udp_g_nd); 4350 return (B_FALSE); 4351 } 4352 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4353 NULL, udp_extra_priv_ports_add, NULL)) { 4354 nd_free(&udp_g_nd); 4355 return (B_FALSE); 4356 } 4357 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4358 NULL, udp_extra_priv_ports_del, NULL)) { 4359 nd_free(&udp_g_nd); 4360 return (B_FALSE); 4361 } 4362 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4363 NULL)) { 4364 nd_free(&udp_g_nd); 4365 return (B_FALSE); 4366 } 4367 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4368 NULL)) { 4369 nd_free(&udp_g_nd); 4370 return (B_FALSE); 4371 } 4372 return (B_TRUE); 4373 } 4374 4375 /* This routine sets an ND variable in a udpparam_t structure. */ 4376 /* ARGSUSED */ 4377 static int 4378 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4379 { 4380 long new_value; 4381 udpparam_t *udppa = (udpparam_t *)cp; 4382 4383 /* 4384 * Fail the request if the new value does not lie within the 4385 * required bounds. 4386 */ 4387 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4388 new_value < udppa->udp_param_min || 4389 new_value > udppa->udp_param_max) { 4390 return (EINVAL); 4391 } 4392 4393 /* Set the new value */ 4394 udppa->udp_param_value = new_value; 4395 return (0); 4396 } 4397 4398 /* 4399 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4400 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4401 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4402 * then it's assumed to be allocated to be large enough. 4403 * 4404 * Returns zero if trimming of the security option causes all options to go 4405 * away. 4406 */ 4407 static size_t 4408 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4409 { 4410 struct T_opthdr *toh; 4411 size_t hol = ipp->ipp_hopoptslen; 4412 ip6_hbh_t *dstopt = NULL; 4413 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4414 size_t tlen, olen, plen; 4415 boolean_t deleting; 4416 const struct ip6_opt *sopt, *lastpad; 4417 struct ip6_opt *dopt; 4418 4419 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4420 toh->level = IPPROTO_IPV6; 4421 toh->name = IPV6_HOPOPTS; 4422 toh->status = 0; 4423 dstopt = (ip6_hbh_t *)(toh + 1); 4424 } 4425 4426 /* 4427 * If labeling is enabled, then skip the label option 4428 * but get other options if there are any. 4429 */ 4430 if (is_system_labeled()) { 4431 dopt = NULL; 4432 if (dstopt != NULL) { 4433 /* will fill in ip6h_len later */ 4434 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4435 dopt = (struct ip6_opt *)(dstopt + 1); 4436 } 4437 sopt = (const struct ip6_opt *)(srcopt + 1); 4438 hol -= sizeof (*srcopt); 4439 tlen = sizeof (*dstopt); 4440 lastpad = NULL; 4441 deleting = B_FALSE; 4442 /* 4443 * This loop finds the first (lastpad pointer) of any number of 4444 * pads that preceeds the security option, then treats the 4445 * security option as though it were a pad, and then finds the 4446 * next non-pad option (or end of list). 4447 * 4448 * It then treats the entire block as one big pad. To preserve 4449 * alignment of any options that follow, or just the end of the 4450 * list, it computes a minimal new padding size that keeps the 4451 * same alignment for the next option. 4452 * 4453 * If it encounters just a sequence of pads with no security 4454 * option, those are copied as-is rather than collapsed. 4455 * 4456 * Note that to handle the end of list case, the code makes one 4457 * loop with 'hol' set to zero. 4458 */ 4459 for (;;) { 4460 if (hol > 0) { 4461 if (sopt->ip6o_type == IP6OPT_PAD1) { 4462 if (lastpad == NULL) 4463 lastpad = sopt; 4464 sopt = (const struct ip6_opt *) 4465 &sopt->ip6o_len; 4466 hol--; 4467 continue; 4468 } 4469 olen = sopt->ip6o_len + sizeof (*sopt); 4470 if (olen > hol) 4471 olen = hol; 4472 if (sopt->ip6o_type == IP6OPT_PADN || 4473 sopt->ip6o_type == ip6opt_ls) { 4474 if (sopt->ip6o_type == ip6opt_ls) 4475 deleting = B_TRUE; 4476 if (lastpad == NULL) 4477 lastpad = sopt; 4478 sopt = (const struct ip6_opt *) 4479 ((const char *)sopt + olen); 4480 hol -= olen; 4481 continue; 4482 } 4483 } else { 4484 /* if nothing was copied at all, then delete */ 4485 if (tlen == sizeof (*dstopt)) 4486 return (0); 4487 /* last pass; pick up any trailing padding */ 4488 olen = 0; 4489 } 4490 if (deleting) { 4491 /* 4492 * compute aligning effect of deleted material 4493 * to reproduce with pad. 4494 */ 4495 plen = ((const char *)sopt - 4496 (const char *)lastpad) & 7; 4497 tlen += plen; 4498 if (dopt != NULL) { 4499 if (plen == 1) { 4500 dopt->ip6o_type = IP6OPT_PAD1; 4501 } else if (plen > 1) { 4502 plen -= sizeof (*dopt); 4503 dopt->ip6o_type = IP6OPT_PADN; 4504 dopt->ip6o_len = plen; 4505 if (plen > 0) 4506 bzero(dopt + 1, plen); 4507 } 4508 dopt = (struct ip6_opt *) 4509 ((char *)dopt + plen); 4510 } 4511 deleting = B_FALSE; 4512 lastpad = NULL; 4513 } 4514 /* if there's uncopied padding, then copy that now */ 4515 if (lastpad != NULL) { 4516 olen += (const char *)sopt - 4517 (const char *)lastpad; 4518 sopt = lastpad; 4519 lastpad = NULL; 4520 } 4521 if (dopt != NULL && olen > 0) { 4522 bcopy(sopt, dopt, olen); 4523 dopt = (struct ip6_opt *)((char *)dopt + olen); 4524 } 4525 if (hol == 0) 4526 break; 4527 tlen += olen; 4528 sopt = (const struct ip6_opt *) 4529 ((const char *)sopt + olen); 4530 hol -= olen; 4531 } 4532 /* go back and patch up the length value, rounded upward */ 4533 if (dstopt != NULL) 4534 dstopt->ip6h_len = (tlen - 1) >> 3; 4535 } else { 4536 tlen = hol; 4537 if (dstopt != NULL) 4538 bcopy(srcopt, dstopt, hol); 4539 } 4540 4541 tlen += sizeof (*toh); 4542 if (toh != NULL) 4543 toh->len = tlen; 4544 4545 return (tlen); 4546 } 4547 4548 static void 4549 udp_input(conn_t *connp, mblk_t *mp) 4550 { 4551 struct T_unitdata_ind *tudi; 4552 uchar_t *rptr; /* Pointer to IP header */ 4553 int hdr_length; /* Length of IP+UDP headers */ 4554 int udi_size; /* Size of T_unitdata_ind */ 4555 int mp_len; 4556 udp_t *udp; 4557 udpha_t *udpha; 4558 int ipversion; 4559 ip6_pkt_t ipp; 4560 ip6_t *ip6h; 4561 ip6i_t *ip6i; 4562 mblk_t *mp1; 4563 mblk_t *options_mp = NULL; 4564 ip_pktinfo_t *pinfo = NULL; 4565 cred_t *cr = NULL; 4566 queue_t *q = connp->conn_rq; 4567 pid_t cpid; 4568 cred_t *rcr = connp->conn_cred; 4569 4570 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4571 "udp_rput_start: q %p mp %p", q, mp); 4572 4573 udp = connp->conn_udp; 4574 rptr = mp->b_rptr; 4575 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4576 ASSERT(OK_32PTR(rptr)); 4577 4578 /* 4579 * IP should have prepended the options data in an M_CTL 4580 * Check M_CTL "type" to make sure are not here bcos of 4581 * a valid ICMP message 4582 */ 4583 if (DB_TYPE(mp) == M_CTL) { 4584 if (MBLKL(mp) == sizeof (ip_pktinfo_t) && 4585 ((ip_pktinfo_t *)mp->b_rptr)->ip_pkt_ulp_type == 4586 IN_PKTINFO) { 4587 /* 4588 * IP_RECVIF or IP_RECVSLLA or IPF_RECVADDR information 4589 * has been appended to the packet by IP. We need to 4590 * extract the mblk and adjust the rptr 4591 */ 4592 pinfo = (ip_pktinfo_t *)mp->b_rptr; 4593 options_mp = mp; 4594 mp = mp->b_cont; 4595 rptr = mp->b_rptr; 4596 UDP_STAT(udp_in_pktinfo); 4597 } else { 4598 /* 4599 * ICMP messages. 4600 */ 4601 udp_icmp_error(q, mp); 4602 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4603 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4604 return; 4605 } 4606 } 4607 4608 mp_len = msgdsize(mp); 4609 /* 4610 * This is the inbound data path. 4611 * First, we check to make sure the IP version number is correct, 4612 * and then pull the IP and UDP headers into the first mblk. 4613 * Assume IP provides aligned packets - otherwise toss. 4614 * Also, check if we have a complete IP header. 4615 */ 4616 4617 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4618 ipp.ipp_fields = 0; 4619 4620 ipversion = IPH_HDR_VERSION(rptr); 4621 switch (ipversion) { 4622 case IPV4_VERSION: 4623 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4624 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4625 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4626 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4627 (udp->udp_ip_rcv_options_len)) { 4628 /* 4629 * Handle IPv4 packets with options outside of the 4630 * main data path. Not needed for AF_INET6 sockets 4631 * since they don't support a getsockopt of IP_OPTIONS. 4632 */ 4633 if (udp->udp_family == AF_INET6) 4634 break; 4635 /* 4636 * UDP length check performed for IPv4 packets with 4637 * options to check whether UDP length specified in 4638 * the header is the same as the physical length of 4639 * the packet. 4640 */ 4641 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4642 if (mp_len != (ntohs(udpha->uha_length) + 4643 hdr_length - UDPH_SIZE)) { 4644 goto tossit; 4645 } 4646 /* 4647 * Handle the case where the packet has IP options 4648 * and the IP_RECVSLLA & IP_RECVIF are set 4649 */ 4650 if (pinfo != NULL) 4651 mp = options_mp; 4652 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4653 SQTAG_UDP_INPUT); 4654 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4655 "udp_rput_end: q %p (%S)", q, "end"); 4656 return; 4657 } 4658 4659 /* Handle IPV6_RECVHOPLIMIT. */ 4660 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4661 udp->udp_ip_recvpktinfo) { 4662 if (pinfo->ip_pkt_flags & IPF_RECVIF) { 4663 ipp.ipp_fields |= IPPF_IFINDEX; 4664 ipp.ipp_ifindex = pinfo->ip_pkt_ifindex; 4665 } 4666 } 4667 break; 4668 case IPV6_VERSION: 4669 /* 4670 * IPv6 packets can only be received by applications 4671 * that are prepared to receive IPv6 addresses. 4672 * The IP fanout must ensure this. 4673 */ 4674 ASSERT(udp->udp_family == AF_INET6); 4675 4676 ip6h = (ip6_t *)rptr; 4677 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4678 4679 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4680 uint8_t nexthdrp; 4681 /* Look for ifindex information */ 4682 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4683 ip6i = (ip6i_t *)ip6h; 4684 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4685 goto tossit; 4686 4687 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4688 ASSERT(ip6i->ip6i_ifindex != 0); 4689 ipp.ipp_fields |= IPPF_IFINDEX; 4690 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4691 } 4692 rptr = (uchar_t *)&ip6i[1]; 4693 mp->b_rptr = rptr; 4694 if (rptr == mp->b_wptr) { 4695 mp1 = mp->b_cont; 4696 freeb(mp); 4697 mp = mp1; 4698 rptr = mp->b_rptr; 4699 } 4700 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4701 goto tossit; 4702 ip6h = (ip6_t *)rptr; 4703 mp_len = msgdsize(mp); 4704 } 4705 /* 4706 * Find any potentially interesting extension headers 4707 * as well as the length of the IPv6 + extension 4708 * headers. 4709 */ 4710 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4711 UDPH_SIZE; 4712 ASSERT(nexthdrp == IPPROTO_UDP); 4713 } else { 4714 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4715 ip6i = NULL; 4716 } 4717 break; 4718 default: 4719 ASSERT(0); 4720 } 4721 4722 /* 4723 * IP inspected the UDP header thus all of it must be in the mblk. 4724 * UDP length check is performed for IPv6 packets and IPv4 packets 4725 * without options to check if the size of the packet as specified 4726 * by the header is the same as the physical size of the packet. 4727 */ 4728 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4729 if ((MBLKL(mp) < hdr_length) || 4730 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4731 goto tossit; 4732 } 4733 4734 /* Walk past the headers. */ 4735 if (!udp->udp_rcvhdr) { 4736 mp->b_rptr = rptr + hdr_length; 4737 mp_len -= hdr_length; 4738 } 4739 4740 /* 4741 * This is the inbound data path. Packets are passed upstream as 4742 * T_UNITDATA_IND messages with full IP headers still attached. 4743 */ 4744 if (udp->udp_family == AF_INET) { 4745 sin_t *sin; 4746 4747 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4748 4749 /* 4750 * Normally only send up the address. 4751 * If IP_RECVDSTADDR is set we include the destination IP 4752 * address as an option. With IP_RECVOPTS we include all 4753 * the IP options. Only ip_rput_other() handles packets 4754 * that contain IP options. 4755 */ 4756 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4757 if (udp->udp_recvdstaddr) { 4758 udi_size += sizeof (struct T_opthdr) + 4759 sizeof (struct in_addr); 4760 UDP_STAT(udp_in_recvdstaddr); 4761 } 4762 4763 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4764 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4765 udi_size += sizeof (struct T_opthdr) + 4766 sizeof (struct in_pktinfo); 4767 UDP_STAT(udp_ip_recvpktinfo); 4768 } 4769 4770 /* 4771 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4772 * space accordingly 4773 */ 4774 if (udp->udp_recvif && (pinfo != NULL) && 4775 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4776 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4777 UDP_STAT(udp_in_recvif); 4778 } 4779 4780 if (udp->udp_recvslla && (pinfo != NULL) && 4781 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4782 udi_size += sizeof (struct T_opthdr) + 4783 sizeof (struct sockaddr_dl); 4784 UDP_STAT(udp_in_recvslla); 4785 } 4786 4787 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4788 udi_size += sizeof (struct T_opthdr) + ucredsize; 4789 cpid = DB_CPID(mp); 4790 UDP_STAT(udp_in_recvucred); 4791 } 4792 4793 /* 4794 * If SO_TIMESTAMP is set allocate the appropriate sized 4795 * buffer. Since gethrestime() expects a pointer aligned 4796 * argument, we allocate space necessary for extra 4797 * alignment (even though it might not be used). 4798 */ 4799 if (udp->udp_timestamp) { 4800 udi_size += sizeof (struct T_opthdr) + 4801 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4802 UDP_STAT(udp_in_timestamp); 4803 } 4804 4805 /* 4806 * If IP_RECVTTL is set allocate the appropriate sized buffer 4807 */ 4808 if (udp->udp_recvttl) { 4809 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4810 UDP_STAT(udp_in_recvttl); 4811 } 4812 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4813 4814 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4815 mp1 = allocb(udi_size, BPRI_MED); 4816 if (mp1 == NULL) { 4817 freemsg(mp); 4818 if (options_mp != NULL) 4819 freeb(options_mp); 4820 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4821 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4822 BUMP_MIB(&udp_mib, udpInErrors); 4823 return; 4824 } 4825 mp1->b_cont = mp; 4826 mp = mp1; 4827 mp->b_datap->db_type = M_PROTO; 4828 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4829 mp->b_wptr = (uchar_t *)tudi + udi_size; 4830 tudi->PRIM_type = T_UNITDATA_IND; 4831 tudi->SRC_length = sizeof (sin_t); 4832 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4833 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4834 sizeof (sin_t); 4835 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4836 tudi->OPT_length = udi_size; 4837 sin = (sin_t *)&tudi[1]; 4838 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4839 sin->sin_port = udpha->uha_src_port; 4840 sin->sin_family = udp->udp_family; 4841 *(uint32_t *)&sin->sin_zero[0] = 0; 4842 *(uint32_t *)&sin->sin_zero[4] = 0; 4843 4844 /* 4845 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4846 * IP_RECVTTL has been set. 4847 */ 4848 if (udi_size != 0) { 4849 /* 4850 * Copy in destination address before options to avoid 4851 * any padding issues. 4852 */ 4853 char *dstopt; 4854 4855 dstopt = (char *)&sin[1]; 4856 if (udp->udp_recvdstaddr) { 4857 struct T_opthdr *toh; 4858 ipaddr_t *dstptr; 4859 4860 toh = (struct T_opthdr *)dstopt; 4861 toh->level = IPPROTO_IP; 4862 toh->name = IP_RECVDSTADDR; 4863 toh->len = sizeof (struct T_opthdr) + 4864 sizeof (ipaddr_t); 4865 toh->status = 0; 4866 dstopt += sizeof (struct T_opthdr); 4867 dstptr = (ipaddr_t *)dstopt; 4868 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4869 dstopt = (char *)toh + toh->len; 4870 udi_size -= toh->len; 4871 } 4872 4873 if (udp->udp_ip_recvpktinfo && (pinfo != NULL) && 4874 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 4875 struct T_opthdr *toh; 4876 struct in_pktinfo *pktinfop; 4877 4878 toh = (struct T_opthdr *)dstopt; 4879 toh->level = IPPROTO_IP; 4880 toh->name = IP_PKTINFO; 4881 toh->len = sizeof (struct T_opthdr) + 4882 sizeof (*pktinfop); 4883 toh->status = 0; 4884 dstopt += sizeof (struct T_opthdr); 4885 pktinfop = (struct in_pktinfo *)dstopt; 4886 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 4887 pktinfop->ipi_spec_dst = 4888 pinfo->ip_pkt_match_addr; 4889 pktinfop->ipi_addr.s_addr = 4890 ((ipha_t *)rptr)->ipha_dst; 4891 4892 dstopt += sizeof (struct in_pktinfo); 4893 udi_size -= toh->len; 4894 } 4895 4896 if (udp->udp_recvslla && (pinfo != NULL) && 4897 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 4898 4899 struct T_opthdr *toh; 4900 struct sockaddr_dl *dstptr; 4901 4902 toh = (struct T_opthdr *)dstopt; 4903 toh->level = IPPROTO_IP; 4904 toh->name = IP_RECVSLLA; 4905 toh->len = sizeof (struct T_opthdr) + 4906 sizeof (struct sockaddr_dl); 4907 toh->status = 0; 4908 dstopt += sizeof (struct T_opthdr); 4909 dstptr = (struct sockaddr_dl *)dstopt; 4910 bcopy(&pinfo->ip_pkt_slla, dstptr, 4911 sizeof (struct sockaddr_dl)); 4912 dstopt = (char *)toh + toh->len; 4913 udi_size -= toh->len; 4914 } 4915 4916 if (udp->udp_recvif && (pinfo != NULL) && 4917 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 4918 4919 struct T_opthdr *toh; 4920 uint_t *dstptr; 4921 4922 toh = (struct T_opthdr *)dstopt; 4923 toh->level = IPPROTO_IP; 4924 toh->name = IP_RECVIF; 4925 toh->len = sizeof (struct T_opthdr) + 4926 sizeof (uint_t); 4927 toh->status = 0; 4928 dstopt += sizeof (struct T_opthdr); 4929 dstptr = (uint_t *)dstopt; 4930 *dstptr = pinfo->ip_pkt_ifindex; 4931 dstopt = (char *)toh + toh->len; 4932 udi_size -= toh->len; 4933 } 4934 4935 if (cr != NULL) { 4936 struct T_opthdr *toh; 4937 4938 toh = (struct T_opthdr *)dstopt; 4939 toh->level = SOL_SOCKET; 4940 toh->name = SCM_UCRED; 4941 toh->len = sizeof (struct T_opthdr) + ucredsize; 4942 toh->status = 0; 4943 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4944 dstopt = (char *)toh + toh->len; 4945 udi_size -= toh->len; 4946 } 4947 4948 if (udp->udp_timestamp) { 4949 struct T_opthdr *toh; 4950 4951 toh = (struct T_opthdr *)dstopt; 4952 toh->level = SOL_SOCKET; 4953 toh->name = SCM_TIMESTAMP; 4954 toh->len = sizeof (struct T_opthdr) + 4955 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4956 toh->status = 0; 4957 dstopt += sizeof (struct T_opthdr); 4958 /* Align for gethrestime() */ 4959 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4960 sizeof (intptr_t)); 4961 gethrestime((timestruc_t *)dstopt); 4962 dstopt = (char *)toh + toh->len; 4963 udi_size -= toh->len; 4964 } 4965 4966 /* 4967 * CAUTION: 4968 * Due to aligment issues 4969 * Processing of IP_RECVTTL option 4970 * should always be the last. Adding 4971 * any option processing after this will 4972 * cause alignment panic. 4973 */ 4974 if (udp->udp_recvttl) { 4975 struct T_opthdr *toh; 4976 uint8_t *dstptr; 4977 4978 toh = (struct T_opthdr *)dstopt; 4979 toh->level = IPPROTO_IP; 4980 toh->name = IP_RECVTTL; 4981 toh->len = sizeof (struct T_opthdr) + 4982 sizeof (uint8_t); 4983 toh->status = 0; 4984 dstopt += sizeof (struct T_opthdr); 4985 dstptr = (uint8_t *)dstopt; 4986 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4987 dstopt = (char *)toh + toh->len; 4988 udi_size -= toh->len; 4989 } 4990 4991 /* Consumed all of allocated space */ 4992 ASSERT(udi_size == 0); 4993 } 4994 } else { 4995 sin6_t *sin6; 4996 4997 /* 4998 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4999 * 5000 * Normally we only send up the address. If receiving of any 5001 * optional receive side information is enabled, we also send 5002 * that up as options. 5003 * [ Only udp_rput_other() handles packets that contain IP 5004 * options so code to account for does not appear immediately 5005 * below but elsewhere ] 5006 */ 5007 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 5008 5009 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 5010 IPPF_RTHDR|IPPF_IFINDEX)) { 5011 if (udp->udp_ipv6_recvhopopts && 5012 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5013 size_t hlen; 5014 5015 UDP_STAT(udp_in_recvhopopts); 5016 hlen = copy_hop_opts(&ipp, NULL); 5017 if (hlen == 0) 5018 ipp.ipp_fields &= ~IPPF_HOPOPTS; 5019 udi_size += hlen; 5020 } 5021 if ((udp->udp_ipv6_recvdstopts || 5022 udp->udp_old_ipv6_recvdstopts) && 5023 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5024 udi_size += sizeof (struct T_opthdr) + 5025 ipp.ipp_dstoptslen; 5026 UDP_STAT(udp_in_recvdstopts); 5027 } 5028 if (((udp->udp_ipv6_recvdstopts && 5029 udp->udp_ipv6_recvrthdr && 5030 (ipp.ipp_fields & IPPF_RTHDR)) || 5031 udp->udp_ipv6_recvrthdrdstopts) && 5032 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5033 udi_size += sizeof (struct T_opthdr) + 5034 ipp.ipp_rtdstoptslen; 5035 UDP_STAT(udp_in_recvrtdstopts); 5036 } 5037 if (udp->udp_ipv6_recvrthdr && 5038 (ipp.ipp_fields & IPPF_RTHDR)) { 5039 udi_size += sizeof (struct T_opthdr) + 5040 ipp.ipp_rthdrlen; 5041 UDP_STAT(udp_in_recvrthdr); 5042 } 5043 if (udp->udp_ip_recvpktinfo && 5044 (ipp.ipp_fields & IPPF_IFINDEX)) { 5045 udi_size += sizeof (struct T_opthdr) + 5046 sizeof (struct in6_pktinfo); 5047 UDP_STAT(udp_in_recvpktinfo); 5048 } 5049 5050 } 5051 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5052 udi_size += sizeof (struct T_opthdr) + ucredsize; 5053 cpid = DB_CPID(mp); 5054 UDP_STAT(udp_in_recvucred); 5055 } 5056 5057 if (udp->udp_ipv6_recvhoplimit) { 5058 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5059 UDP_STAT(udp_in_recvhoplimit); 5060 } 5061 5062 if (udp->udp_ipv6_recvtclass) { 5063 udi_size += sizeof (struct T_opthdr) + sizeof (int); 5064 UDP_STAT(udp_in_recvtclass); 5065 } 5066 5067 mp1 = allocb(udi_size, BPRI_MED); 5068 if (mp1 == NULL) { 5069 freemsg(mp); 5070 if (options_mp != NULL) 5071 freeb(options_mp); 5072 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5073 "udp_rput_end: q %p (%S)", q, "allocbfail"); 5074 BUMP_MIB(&udp_mib, udpInErrors); 5075 return; 5076 } 5077 mp1->b_cont = mp; 5078 mp = mp1; 5079 mp->b_datap->db_type = M_PROTO; 5080 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5081 mp->b_wptr = (uchar_t *)tudi + udi_size; 5082 tudi->PRIM_type = T_UNITDATA_IND; 5083 tudi->SRC_length = sizeof (sin6_t); 5084 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5085 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 5086 sizeof (sin6_t); 5087 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 5088 tudi->OPT_length = udi_size; 5089 sin6 = (sin6_t *)&tudi[1]; 5090 if (ipversion == IPV4_VERSION) { 5091 in6_addr_t v6dst; 5092 5093 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 5094 &sin6->sin6_addr); 5095 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 5096 &v6dst); 5097 sin6->sin6_flowinfo = 0; 5098 sin6->sin6_scope_id = 0; 5099 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 5100 connp->conn_zoneid); 5101 } else { 5102 sin6->sin6_addr = ip6h->ip6_src; 5103 /* No sin6_flowinfo per API */ 5104 sin6->sin6_flowinfo = 0; 5105 /* For link-scope source pass up scope id */ 5106 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5107 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5108 sin6->sin6_scope_id = ipp.ipp_ifindex; 5109 else 5110 sin6->sin6_scope_id = 0; 5111 sin6->__sin6_src_id = ip_srcid_find_addr( 5112 &ip6h->ip6_dst, connp->conn_zoneid); 5113 } 5114 sin6->sin6_port = udpha->uha_src_port; 5115 sin6->sin6_family = udp->udp_family; 5116 5117 if (udi_size != 0) { 5118 uchar_t *dstopt; 5119 5120 dstopt = (uchar_t *)&sin6[1]; 5121 if (udp->udp_ip_recvpktinfo && 5122 (ipp.ipp_fields & IPPF_IFINDEX)) { 5123 struct T_opthdr *toh; 5124 struct in6_pktinfo *pkti; 5125 5126 toh = (struct T_opthdr *)dstopt; 5127 toh->level = IPPROTO_IPV6; 5128 toh->name = IPV6_PKTINFO; 5129 toh->len = sizeof (struct T_opthdr) + 5130 sizeof (*pkti); 5131 toh->status = 0; 5132 dstopt += sizeof (struct T_opthdr); 5133 pkti = (struct in6_pktinfo *)dstopt; 5134 if (ipversion == IPV6_VERSION) 5135 pkti->ipi6_addr = ip6h->ip6_dst; 5136 else 5137 IN6_IPADDR_TO_V4MAPPED( 5138 ((ipha_t *)rptr)->ipha_dst, 5139 &pkti->ipi6_addr); 5140 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5141 dstopt += sizeof (*pkti); 5142 udi_size -= toh->len; 5143 } 5144 if (udp->udp_ipv6_recvhoplimit) { 5145 struct T_opthdr *toh; 5146 5147 toh = (struct T_opthdr *)dstopt; 5148 toh->level = IPPROTO_IPV6; 5149 toh->name = IPV6_HOPLIMIT; 5150 toh->len = sizeof (struct T_opthdr) + 5151 sizeof (uint_t); 5152 toh->status = 0; 5153 dstopt += sizeof (struct T_opthdr); 5154 if (ipversion == IPV6_VERSION) 5155 *(uint_t *)dstopt = ip6h->ip6_hops; 5156 else 5157 *(uint_t *)dstopt = 5158 ((ipha_t *)rptr)->ipha_ttl; 5159 dstopt += sizeof (uint_t); 5160 udi_size -= toh->len; 5161 } 5162 if (udp->udp_ipv6_recvtclass) { 5163 struct T_opthdr *toh; 5164 5165 toh = (struct T_opthdr *)dstopt; 5166 toh->level = IPPROTO_IPV6; 5167 toh->name = IPV6_TCLASS; 5168 toh->len = sizeof (struct T_opthdr) + 5169 sizeof (uint_t); 5170 toh->status = 0; 5171 dstopt += sizeof (struct T_opthdr); 5172 if (ipversion == IPV6_VERSION) { 5173 *(uint_t *)dstopt = 5174 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5175 } else { 5176 ipha_t *ipha = (ipha_t *)rptr; 5177 *(uint_t *)dstopt = 5178 ipha->ipha_type_of_service; 5179 } 5180 dstopt += sizeof (uint_t); 5181 udi_size -= toh->len; 5182 } 5183 if (udp->udp_ipv6_recvhopopts && 5184 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5185 size_t hlen; 5186 5187 hlen = copy_hop_opts(&ipp, dstopt); 5188 dstopt += hlen; 5189 udi_size -= hlen; 5190 } 5191 if (udp->udp_ipv6_recvdstopts && 5192 udp->udp_ipv6_recvrthdr && 5193 (ipp.ipp_fields & IPPF_RTHDR) && 5194 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5195 struct T_opthdr *toh; 5196 5197 toh = (struct T_opthdr *)dstopt; 5198 toh->level = IPPROTO_IPV6; 5199 toh->name = IPV6_DSTOPTS; 5200 toh->len = sizeof (struct T_opthdr) + 5201 ipp.ipp_rtdstoptslen; 5202 toh->status = 0; 5203 dstopt += sizeof (struct T_opthdr); 5204 bcopy(ipp.ipp_rtdstopts, dstopt, 5205 ipp.ipp_rtdstoptslen); 5206 dstopt += ipp.ipp_rtdstoptslen; 5207 udi_size -= toh->len; 5208 } 5209 if (udp->udp_ipv6_recvrthdr && 5210 (ipp.ipp_fields & IPPF_RTHDR)) { 5211 struct T_opthdr *toh; 5212 5213 toh = (struct T_opthdr *)dstopt; 5214 toh->level = IPPROTO_IPV6; 5215 toh->name = IPV6_RTHDR; 5216 toh->len = sizeof (struct T_opthdr) + 5217 ipp.ipp_rthdrlen; 5218 toh->status = 0; 5219 dstopt += sizeof (struct T_opthdr); 5220 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5221 dstopt += ipp.ipp_rthdrlen; 5222 udi_size -= toh->len; 5223 } 5224 if (udp->udp_ipv6_recvdstopts && 5225 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5226 struct T_opthdr *toh; 5227 5228 toh = (struct T_opthdr *)dstopt; 5229 toh->level = IPPROTO_IPV6; 5230 toh->name = IPV6_DSTOPTS; 5231 toh->len = sizeof (struct T_opthdr) + 5232 ipp.ipp_dstoptslen; 5233 toh->status = 0; 5234 dstopt += sizeof (struct T_opthdr); 5235 bcopy(ipp.ipp_dstopts, dstopt, 5236 ipp.ipp_dstoptslen); 5237 dstopt += ipp.ipp_dstoptslen; 5238 udi_size -= toh->len; 5239 } 5240 5241 if (cr != NULL) { 5242 struct T_opthdr *toh; 5243 5244 toh = (struct T_opthdr *)dstopt; 5245 toh->level = SOL_SOCKET; 5246 toh->name = SCM_UCRED; 5247 toh->len = sizeof (struct T_opthdr) + ucredsize; 5248 toh->status = 0; 5249 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5250 dstopt += toh->len; 5251 udi_size -= toh->len; 5252 } 5253 /* Consumed all of allocated space */ 5254 ASSERT(udi_size == 0); 5255 } 5256 #undef sin6 5257 /* No IP_RECVDSTADDR for IPv6. */ 5258 } 5259 5260 BUMP_MIB(&udp_mib, udpHCInDatagrams); 5261 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5262 "udp_rput_end: q %p (%S)", q, "end"); 5263 if (options_mp != NULL) 5264 freeb(options_mp); 5265 5266 if (udp->udp_direct_sockfs) { 5267 /* 5268 * There is nothing above us except for the stream head; 5269 * use the read-side synchronous stream interface in 5270 * order to reduce the time spent in interrupt thread. 5271 */ 5272 ASSERT(udp->udp_issocket); 5273 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5274 } else { 5275 /* 5276 * Use regular STREAMS interface to pass data upstream 5277 * if this is not a socket endpoint, or if we have 5278 * switched over to the slow mode due to sockmod being 5279 * popped or a module being pushed on top of us. 5280 */ 5281 putnext(UDP_RD(q), mp); 5282 } 5283 return; 5284 5285 tossit: 5286 freemsg(mp); 5287 if (options_mp != NULL) 5288 freeb(options_mp); 5289 BUMP_MIB(&udp_mib, udpInErrors); 5290 } 5291 5292 void 5293 udp_conn_recv(conn_t *connp, mblk_t *mp) 5294 { 5295 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5296 } 5297 5298 /* ARGSUSED */ 5299 static void 5300 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5301 { 5302 udp_input((conn_t *)arg, mp); 5303 _UDP_EXIT((conn_t *)arg); 5304 } 5305 5306 /* 5307 * Process non-M_DATA messages as well as M_DATA messages that requires 5308 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5309 */ 5310 static void 5311 udp_rput_other(queue_t *q, mblk_t *mp) 5312 { 5313 struct T_unitdata_ind *tudi; 5314 mblk_t *mp1; 5315 uchar_t *rptr; 5316 uchar_t *new_rptr; 5317 int hdr_length; 5318 int udi_size; /* Size of T_unitdata_ind */ 5319 int opt_len; /* Length of IP options */ 5320 sin_t *sin; 5321 struct T_error_ack *tea; 5322 mblk_t *options_mp = NULL; 5323 ip_pktinfo_t *pinfo; 5324 boolean_t recv_on = B_FALSE; 5325 cred_t *cr = NULL; 5326 udp_t *udp = Q_TO_UDP(q); 5327 pid_t cpid; 5328 cred_t *rcr = udp->udp_connp->conn_cred; 5329 5330 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5331 "udp_rput_other: q %p mp %p", q, mp); 5332 5333 ASSERT(OK_32PTR(mp->b_rptr)); 5334 rptr = mp->b_rptr; 5335 5336 switch (mp->b_datap->db_type) { 5337 case M_CTL: 5338 /* 5339 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5340 */ 5341 recv_on = B_TRUE; 5342 options_mp = mp; 5343 pinfo = (ip_pktinfo_t *)options_mp->b_rptr; 5344 5345 /* 5346 * The actual data is in mp->b_cont 5347 */ 5348 mp = mp->b_cont; 5349 ASSERT(OK_32PTR(mp->b_rptr)); 5350 rptr = mp->b_rptr; 5351 break; 5352 case M_DATA: 5353 /* 5354 * M_DATA messages contain IPv4 datagrams. They are handled 5355 * after this switch. 5356 */ 5357 break; 5358 case M_PROTO: 5359 case M_PCPROTO: 5360 /* M_PROTO messages contain some type of TPI message. */ 5361 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5362 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5363 freemsg(mp); 5364 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5365 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5366 return; 5367 } 5368 tea = (struct T_error_ack *)rptr; 5369 5370 switch (tea->PRIM_type) { 5371 case T_ERROR_ACK: 5372 switch (tea->ERROR_prim) { 5373 case O_T_BIND_REQ: 5374 case T_BIND_REQ: { 5375 /* 5376 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5377 * clear out the associated port and source 5378 * address before passing the message 5379 * upstream. If this was caused by a T_CONN_REQ 5380 * revert back to bound state. 5381 */ 5382 udp_fanout_t *udpf; 5383 5384 udpf = &udp_bind_fanout[ 5385 UDP_BIND_HASH(udp->udp_port)]; 5386 mutex_enter(&udpf->uf_lock); 5387 if (udp->udp_state == TS_DATA_XFER) { 5388 /* Connect failed */ 5389 tea->ERROR_prim = T_CONN_REQ; 5390 /* Revert back to the bound source */ 5391 udp->udp_v6src = udp->udp_bound_v6src; 5392 udp->udp_state = TS_IDLE; 5393 mutex_exit(&udpf->uf_lock); 5394 if (udp->udp_family == AF_INET6) 5395 (void) udp_build_hdrs(q, udp); 5396 break; 5397 } 5398 5399 if (udp->udp_discon_pending) { 5400 tea->ERROR_prim = T_DISCON_REQ; 5401 udp->udp_discon_pending = 0; 5402 } 5403 V6_SET_ZERO(udp->udp_v6src); 5404 V6_SET_ZERO(udp->udp_bound_v6src); 5405 udp->udp_state = TS_UNBND; 5406 udp_bind_hash_remove(udp, B_TRUE); 5407 udp->udp_port = 0; 5408 mutex_exit(&udpf->uf_lock); 5409 if (udp->udp_family == AF_INET6) 5410 (void) udp_build_hdrs(q, udp); 5411 break; 5412 } 5413 default: 5414 break; 5415 } 5416 break; 5417 case T_BIND_ACK: 5418 udp_rput_bind_ack(q, mp); 5419 return; 5420 5421 case T_OPTMGMT_ACK: 5422 case T_OK_ACK: 5423 break; 5424 default: 5425 freemsg(mp); 5426 return; 5427 } 5428 putnext(UDP_RD(q), mp); 5429 return; 5430 } 5431 5432 /* 5433 * This is the inbound data path. 5434 * First, we make sure the data contains both IP and UDP headers. 5435 * 5436 * This handle IPv4 packets for only AF_INET sockets. 5437 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5438 * is no need saving the options. 5439 */ 5440 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5441 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5442 if (mp->b_wptr - rptr < hdr_length) { 5443 if (!pullupmsg(mp, hdr_length)) { 5444 freemsg(mp); 5445 if (options_mp != NULL) 5446 freeb(options_mp); 5447 BUMP_MIB(&udp_mib, udpInErrors); 5448 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5449 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5450 BUMP_MIB(&udp_mib, udpInErrors); 5451 return; 5452 } 5453 rptr = mp->b_rptr; 5454 } 5455 /* Walk past the headers. */ 5456 new_rptr = rptr + hdr_length; 5457 if (!udp->udp_rcvhdr) 5458 mp->b_rptr = new_rptr; 5459 5460 /* Save the options if any */ 5461 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5462 if (opt_len > 0) { 5463 if (opt_len > udp->udp_ip_rcv_options_len) { 5464 if (udp->udp_ip_rcv_options_len) 5465 mi_free((char *)udp->udp_ip_rcv_options); 5466 udp->udp_ip_rcv_options_len = 0; 5467 udp->udp_ip_rcv_options = 5468 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5469 if (udp->udp_ip_rcv_options) 5470 udp->udp_ip_rcv_options_len = opt_len; 5471 } 5472 if (udp->udp_ip_rcv_options_len) { 5473 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5474 udp->udp_ip_rcv_options, opt_len); 5475 /* Adjust length if we are resusing the space */ 5476 udp->udp_ip_rcv_options_len = opt_len; 5477 } 5478 } else if (udp->udp_ip_rcv_options_len) { 5479 mi_free((char *)udp->udp_ip_rcv_options); 5480 udp->udp_ip_rcv_options = NULL; 5481 udp->udp_ip_rcv_options_len = 0; 5482 } 5483 5484 /* 5485 * Normally only send up the address. 5486 * If IP_RECVDSTADDR is set we include the destination IP 5487 * address as an option. With IP_RECVOPTS we include all 5488 * the IP options. 5489 */ 5490 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5491 if (udp->udp_recvdstaddr) { 5492 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5493 UDP_STAT(udp_in_recvdstaddr); 5494 } 5495 5496 if (udp->udp_ip_recvpktinfo && recv_on && 5497 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5498 udi_size += sizeof (struct T_opthdr) + 5499 sizeof (struct in_pktinfo); 5500 UDP_STAT(udp_ip_recvpktinfo); 5501 } 5502 5503 if (udp->udp_recvopts && opt_len > 0) { 5504 udi_size += sizeof (struct T_opthdr) + opt_len; 5505 UDP_STAT(udp_in_recvopts); 5506 } 5507 5508 /* 5509 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5510 * space accordingly 5511 */ 5512 if (udp->udp_recvif && recv_on && 5513 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5514 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5515 UDP_STAT(udp_in_recvif); 5516 } 5517 5518 if (udp->udp_recvslla && recv_on && 5519 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5520 udi_size += sizeof (struct T_opthdr) + 5521 sizeof (struct sockaddr_dl); 5522 UDP_STAT(udp_in_recvslla); 5523 } 5524 5525 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5526 udi_size += sizeof (struct T_opthdr) + ucredsize; 5527 cpid = DB_CPID(mp); 5528 UDP_STAT(udp_in_recvucred); 5529 } 5530 /* 5531 * If IP_RECVTTL is set allocate the appropriate sized buffer 5532 */ 5533 if (udp->udp_recvttl) { 5534 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5535 UDP_STAT(udp_in_recvttl); 5536 } 5537 5538 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5539 mp1 = allocb(udi_size, BPRI_MED); 5540 if (mp1 == NULL) { 5541 freemsg(mp); 5542 if (options_mp != NULL) 5543 freeb(options_mp); 5544 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5545 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5546 BUMP_MIB(&udp_mib, udpInErrors); 5547 return; 5548 } 5549 mp1->b_cont = mp; 5550 mp = mp1; 5551 mp->b_datap->db_type = M_PROTO; 5552 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5553 mp->b_wptr = (uchar_t *)tudi + udi_size; 5554 tudi->PRIM_type = T_UNITDATA_IND; 5555 tudi->SRC_length = sizeof (sin_t); 5556 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5557 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5558 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5559 tudi->OPT_length = udi_size; 5560 5561 sin = (sin_t *)&tudi[1]; 5562 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5563 sin->sin_port = ((in_port_t *) 5564 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5565 sin->sin_family = AF_INET; 5566 *(uint32_t *)&sin->sin_zero[0] = 0; 5567 *(uint32_t *)&sin->sin_zero[4] = 0; 5568 5569 /* 5570 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5571 * IP_RECVTTL has been set. 5572 */ 5573 if (udi_size != 0) { 5574 /* 5575 * Copy in destination address before options to avoid any 5576 * padding issues. 5577 */ 5578 char *dstopt; 5579 5580 dstopt = (char *)&sin[1]; 5581 if (udp->udp_recvdstaddr) { 5582 struct T_opthdr *toh; 5583 ipaddr_t *dstptr; 5584 5585 toh = (struct T_opthdr *)dstopt; 5586 toh->level = IPPROTO_IP; 5587 toh->name = IP_RECVDSTADDR; 5588 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5589 toh->status = 0; 5590 dstopt += sizeof (struct T_opthdr); 5591 dstptr = (ipaddr_t *)dstopt; 5592 *dstptr = (((ipaddr_t *)rptr)[4]); 5593 dstopt += sizeof (ipaddr_t); 5594 udi_size -= toh->len; 5595 } 5596 if (udp->udp_recvopts && udi_size != 0) { 5597 struct T_opthdr *toh; 5598 5599 toh = (struct T_opthdr *)dstopt; 5600 toh->level = IPPROTO_IP; 5601 toh->name = IP_RECVOPTS; 5602 toh->len = sizeof (struct T_opthdr) + opt_len; 5603 toh->status = 0; 5604 dstopt += sizeof (struct T_opthdr); 5605 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5606 dstopt += opt_len; 5607 udi_size -= toh->len; 5608 } 5609 if (udp->udp_ip_recvpktinfo && recv_on && 5610 (pinfo->ip_pkt_flags & IPF_RECVADDR)) { 5611 5612 struct T_opthdr *toh; 5613 struct in_pktinfo *pktinfop; 5614 5615 toh = (struct T_opthdr *)dstopt; 5616 toh->level = IPPROTO_IP; 5617 toh->name = IP_PKTINFO; 5618 toh->len = sizeof (struct T_opthdr) + 5619 sizeof (*pktinfop); 5620 toh->status = 0; 5621 dstopt += sizeof (struct T_opthdr); 5622 pktinfop = (struct in_pktinfo *)dstopt; 5623 pktinfop->ipi_ifindex = pinfo->ip_pkt_ifindex; 5624 pktinfop->ipi_spec_dst = pinfo->ip_pkt_match_addr; 5625 5626 pktinfop->ipi_addr.s_addr = ((ipha_t *)rptr)->ipha_dst; 5627 5628 dstopt += sizeof (struct in_pktinfo); 5629 udi_size -= toh->len; 5630 } 5631 5632 if (udp->udp_recvslla && recv_on && 5633 (pinfo->ip_pkt_flags & IPF_RECVSLLA)) { 5634 5635 struct T_opthdr *toh; 5636 struct sockaddr_dl *dstptr; 5637 5638 toh = (struct T_opthdr *)dstopt; 5639 toh->level = IPPROTO_IP; 5640 toh->name = IP_RECVSLLA; 5641 toh->len = sizeof (struct T_opthdr) + 5642 sizeof (struct sockaddr_dl); 5643 toh->status = 0; 5644 dstopt += sizeof (struct T_opthdr); 5645 dstptr = (struct sockaddr_dl *)dstopt; 5646 bcopy(&pinfo->ip_pkt_slla, dstptr, 5647 sizeof (struct sockaddr_dl)); 5648 dstopt += sizeof (struct sockaddr_dl); 5649 udi_size -= toh->len; 5650 } 5651 5652 if (udp->udp_recvif && recv_on && 5653 (pinfo->ip_pkt_flags & IPF_RECVIF)) { 5654 5655 struct T_opthdr *toh; 5656 uint_t *dstptr; 5657 5658 toh = (struct T_opthdr *)dstopt; 5659 toh->level = IPPROTO_IP; 5660 toh->name = IP_RECVIF; 5661 toh->len = sizeof (struct T_opthdr) + 5662 sizeof (uint_t); 5663 toh->status = 0; 5664 dstopt += sizeof (struct T_opthdr); 5665 dstptr = (uint_t *)dstopt; 5666 *dstptr = pinfo->ip_pkt_ifindex; 5667 dstopt += sizeof (uint_t); 5668 udi_size -= toh->len; 5669 } 5670 5671 if (cr != NULL) { 5672 struct T_opthdr *toh; 5673 5674 toh = (struct T_opthdr *)dstopt; 5675 toh->level = SOL_SOCKET; 5676 toh->name = SCM_UCRED; 5677 toh->len = sizeof (struct T_opthdr) + ucredsize; 5678 toh->status = 0; 5679 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5680 dstopt += toh->len; 5681 udi_size -= toh->len; 5682 } 5683 5684 if (udp->udp_recvttl) { 5685 struct T_opthdr *toh; 5686 uint8_t *dstptr; 5687 5688 toh = (struct T_opthdr *)dstopt; 5689 toh->level = IPPROTO_IP; 5690 toh->name = IP_RECVTTL; 5691 toh->len = sizeof (struct T_opthdr) + 5692 sizeof (uint8_t); 5693 toh->status = 0; 5694 dstopt += sizeof (struct T_opthdr); 5695 dstptr = (uint8_t *)dstopt; 5696 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5697 dstopt += sizeof (uint8_t); 5698 udi_size -= toh->len; 5699 } 5700 5701 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5702 } 5703 BUMP_MIB(&udp_mib, udpHCInDatagrams); 5704 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5705 "udp_rput_other_end: q %p (%S)", q, "end"); 5706 if (options_mp != NULL) 5707 freeb(options_mp); 5708 5709 if (udp->udp_direct_sockfs) { 5710 /* 5711 * There is nothing above us except for the stream head; 5712 * use the read-side synchronous stream interface in 5713 * order to reduce the time spent in interrupt thread. 5714 */ 5715 ASSERT(udp->udp_issocket); 5716 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5717 } else { 5718 /* 5719 * Use regular STREAMS interface to pass data upstream 5720 * if this is not a socket endpoint, or if we have 5721 * switched over to the slow mode due to sockmod being 5722 * popped or a module being pushed on top of us. 5723 */ 5724 putnext(UDP_RD(q), mp); 5725 } 5726 } 5727 5728 /* ARGSUSED */ 5729 static void 5730 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5731 { 5732 conn_t *connp = arg; 5733 5734 udp_rput_other(connp->conn_rq, mp); 5735 udp_exit(connp); 5736 } 5737 5738 /* 5739 * Process a T_BIND_ACK 5740 */ 5741 static void 5742 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5743 { 5744 udp_t *udp = Q_TO_UDP(q); 5745 mblk_t *mp1; 5746 ire_t *ire; 5747 struct T_bind_ack *tba; 5748 uchar_t *addrp; 5749 ipa_conn_t *ac; 5750 ipa6_conn_t *ac6; 5751 5752 if (udp->udp_discon_pending) 5753 udp->udp_discon_pending = 0; 5754 5755 /* 5756 * If a broadcast/multicast address was bound set 5757 * the source address to 0. 5758 * This ensures no datagrams with broadcast address 5759 * as source address are emitted (which would violate 5760 * RFC1122 - Hosts requirements) 5761 * 5762 * Note that when connecting the returned IRE is 5763 * for the destination address and we only perform 5764 * the broadcast check for the source address (it 5765 * is OK to connect to a broadcast/multicast address.) 5766 */ 5767 mp1 = mp->b_cont; 5768 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5769 ire = (ire_t *)mp1->b_rptr; 5770 5771 /* 5772 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5773 * local address. 5774 */ 5775 if (ire->ire_type == IRE_BROADCAST && 5776 udp->udp_state != TS_DATA_XFER) { 5777 /* This was just a local bind to a broadcast addr */ 5778 V6_SET_ZERO(udp->udp_v6src); 5779 if (udp->udp_family == AF_INET6) 5780 (void) udp_build_hdrs(q, udp); 5781 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5782 /* 5783 * Local address not yet set - pick it from the 5784 * T_bind_ack 5785 */ 5786 tba = (struct T_bind_ack *)mp->b_rptr; 5787 addrp = &mp->b_rptr[tba->ADDR_offset]; 5788 switch (udp->udp_family) { 5789 case AF_INET: 5790 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5791 ac = (ipa_conn_t *)addrp; 5792 } else { 5793 ASSERT(tba->ADDR_length == 5794 sizeof (ipa_conn_x_t)); 5795 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5796 } 5797 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5798 &udp->udp_v6src); 5799 break; 5800 case AF_INET6: 5801 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5802 ac6 = (ipa6_conn_t *)addrp; 5803 } else { 5804 ASSERT(tba->ADDR_length == 5805 sizeof (ipa6_conn_x_t)); 5806 ac6 = &((ipa6_conn_x_t *) 5807 addrp)->ac6x_conn; 5808 } 5809 udp->udp_v6src = ac6->ac6_laddr; 5810 (void) udp_build_hdrs(q, udp); 5811 break; 5812 } 5813 } 5814 mp1 = mp1->b_cont; 5815 } 5816 /* 5817 * Look for one or more appended ACK message added by 5818 * udp_connect or udp_disconnect. 5819 * If none found just send up the T_BIND_ACK. 5820 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5821 * udp_disconnect has appended a T_OK_ACK. 5822 */ 5823 if (mp1 != NULL) { 5824 if (mp->b_cont == mp1) 5825 mp->b_cont = NULL; 5826 else { 5827 ASSERT(mp->b_cont->b_cont == mp1); 5828 mp->b_cont->b_cont = NULL; 5829 } 5830 freemsg(mp); 5831 mp = mp1; 5832 while (mp != NULL) { 5833 mp1 = mp->b_cont; 5834 mp->b_cont = NULL; 5835 putnext(UDP_RD(q), mp); 5836 mp = mp1; 5837 } 5838 return; 5839 } 5840 freemsg(mp->b_cont); 5841 mp->b_cont = NULL; 5842 putnext(UDP_RD(q), mp); 5843 } 5844 5845 /* 5846 * return SNMP stuff in buffer in mpdata 5847 */ 5848 int 5849 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5850 { 5851 mblk_t *mpdata; 5852 mblk_t *mp_conn_ctl; 5853 mblk_t *mp_attr_ctl; 5854 mblk_t *mp6_conn_ctl; 5855 mblk_t *mp6_attr_ctl; 5856 mblk_t *mp_conn_tail; 5857 mblk_t *mp_attr_tail; 5858 mblk_t *mp6_conn_tail; 5859 mblk_t *mp6_attr_tail; 5860 struct opthdr *optp; 5861 mib2_udpEntry_t ude; 5862 mib2_udp6Entry_t ude6; 5863 mib2_transportMLPEntry_t mlp; 5864 int state; 5865 zoneid_t zoneid; 5866 int i; 5867 connf_t *connfp; 5868 conn_t *connp = Q_TO_CONN(q); 5869 udp_t *udp = connp->conn_udp; 5870 int v4_conn_idx; 5871 int v6_conn_idx; 5872 boolean_t needattr; 5873 5874 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5875 if (mpctl == NULL || 5876 (mpdata = mpctl->b_cont) == NULL || 5877 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5878 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5879 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5880 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5881 freemsg(mp_conn_ctl); 5882 freemsg(mp_attr_ctl); 5883 freemsg(mp6_conn_ctl); 5884 return (0); 5885 } 5886 5887 zoneid = connp->conn_zoneid; 5888 5889 /* fixed length structure for IPv4 and IPv6 counters */ 5890 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5891 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5892 /* synchronize 64- and 32-bit counters */ 5893 SYNC32_MIB(&udp_mib, udpInDatagrams, udpHCInDatagrams); 5894 SYNC32_MIB(&udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5895 5896 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5897 optp->level = MIB2_UDP; 5898 optp->name = 0; 5899 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5900 optp->len = msgdsize(mpdata); 5901 qreply(q, mpctl); 5902 5903 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5904 v4_conn_idx = v6_conn_idx = 0; 5905 5906 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5907 connfp = &ipcl_globalhash_fanout[i]; 5908 connp = NULL; 5909 5910 while ((connp = ipcl_get_next_conn(connfp, connp, 5911 IPCL_UDP))) { 5912 udp = connp->conn_udp; 5913 if (zoneid != connp->conn_zoneid) 5914 continue; 5915 5916 /* 5917 * Note that the port numbers are sent in 5918 * host byte order 5919 */ 5920 5921 if (udp->udp_state == TS_UNBND) 5922 state = MIB2_UDP_unbound; 5923 else if (udp->udp_state == TS_IDLE) 5924 state = MIB2_UDP_idle; 5925 else if (udp->udp_state == TS_DATA_XFER) 5926 state = MIB2_UDP_connected; 5927 else 5928 state = MIB2_UDP_unknown; 5929 5930 needattr = B_FALSE; 5931 bzero(&mlp, sizeof (mlp)); 5932 if (connp->conn_mlp_type != mlptSingle) { 5933 if (connp->conn_mlp_type == mlptShared || 5934 connp->conn_mlp_type == mlptBoth) 5935 mlp.tme_flags |= MIB2_TMEF_SHARED; 5936 if (connp->conn_mlp_type == mlptPrivate || 5937 connp->conn_mlp_type == mlptBoth) 5938 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5939 needattr = B_TRUE; 5940 } 5941 5942 /* 5943 * Create an IPv4 table entry for IPv4 entries and also 5944 * any IPv6 entries which are bound to in6addr_any 5945 * (i.e. anything a IPv4 peer could connect/send to). 5946 */ 5947 if (udp->udp_ipversion == IPV4_VERSION || 5948 (udp->udp_state <= TS_IDLE && 5949 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5950 ude.udpEntryInfo.ue_state = state; 5951 /* 5952 * If in6addr_any this will set it to 5953 * INADDR_ANY 5954 */ 5955 ude.udpLocalAddress = 5956 V4_PART_OF_V6(udp->udp_v6src); 5957 ude.udpLocalPort = ntohs(udp->udp_port); 5958 if (udp->udp_state == TS_DATA_XFER) { 5959 /* 5960 * Can potentially get here for 5961 * v6 socket if another process 5962 * (say, ping) has just done a 5963 * sendto(), changing the state 5964 * from the TS_IDLE above to 5965 * TS_DATA_XFER by the time we hit 5966 * this part of the code. 5967 */ 5968 ude.udpEntryInfo.ue_RemoteAddress = 5969 V4_PART_OF_V6(udp->udp_v6dst); 5970 ude.udpEntryInfo.ue_RemotePort = 5971 ntohs(udp->udp_dstport); 5972 } else { 5973 ude.udpEntryInfo.ue_RemoteAddress = 0; 5974 ude.udpEntryInfo.ue_RemotePort = 0; 5975 } 5976 5977 /* 5978 * We make the assumption that all udp_t 5979 * structs will be created within an address 5980 * region no larger than 32-bits. 5981 */ 5982 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5983 ude.udpCreationProcess = 5984 (udp->udp_open_pid < 0) ? 5985 MIB2_UNKNOWN_PROCESS : 5986 udp->udp_open_pid; 5987 ude.udpCreationTime = udp->udp_open_time; 5988 5989 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5990 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5991 mlp.tme_connidx = v4_conn_idx++; 5992 if (needattr) 5993 (void) snmp_append_data2( 5994 mp_attr_ctl->b_cont, &mp_attr_tail, 5995 (char *)&mlp, sizeof (mlp)); 5996 } 5997 if (udp->udp_ipversion == IPV6_VERSION) { 5998 ude6.udp6EntryInfo.ue_state = state; 5999 ude6.udp6LocalAddress = udp->udp_v6src; 6000 ude6.udp6LocalPort = ntohs(udp->udp_port); 6001 ude6.udp6IfIndex = udp->udp_bound_if; 6002 if (udp->udp_state == TS_DATA_XFER) { 6003 ude6.udp6EntryInfo.ue_RemoteAddress = 6004 udp->udp_v6dst; 6005 ude6.udp6EntryInfo.ue_RemotePort = 6006 ntohs(udp->udp_dstport); 6007 } else { 6008 ude6.udp6EntryInfo.ue_RemoteAddress = 6009 sin6_null.sin6_addr; 6010 ude6.udp6EntryInfo.ue_RemotePort = 0; 6011 } 6012 /* 6013 * We make the assumption that all udp_t 6014 * structs will be created within an address 6015 * region no larger than 32-bits. 6016 */ 6017 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 6018 ude6.udp6CreationProcess = 6019 (udp->udp_open_pid < 0) ? 6020 MIB2_UNKNOWN_PROCESS : 6021 udp->udp_open_pid; 6022 ude6.udp6CreationTime = udp->udp_open_time; 6023 6024 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 6025 &mp6_conn_tail, (char *)&ude6, 6026 sizeof (ude6)); 6027 mlp.tme_connidx = v6_conn_idx++; 6028 if (needattr) 6029 (void) snmp_append_data2( 6030 mp6_attr_ctl->b_cont, 6031 &mp6_attr_tail, (char *)&mlp, 6032 sizeof (mlp)); 6033 } 6034 } 6035 } 6036 6037 /* IPv4 UDP endpoints */ 6038 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 6039 sizeof (struct T_optmgmt_ack)]; 6040 optp->level = MIB2_UDP; 6041 optp->name = MIB2_UDP_ENTRY; 6042 optp->len = msgdsize(mp_conn_ctl->b_cont); 6043 qreply(q, mp_conn_ctl); 6044 6045 /* table of MLP attributes... */ 6046 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 6047 sizeof (struct T_optmgmt_ack)]; 6048 optp->level = MIB2_UDP; 6049 optp->name = EXPER_XPORT_MLP; 6050 optp->len = msgdsize(mp_attr_ctl->b_cont); 6051 if (optp->len == 0) 6052 freemsg(mp_attr_ctl); 6053 else 6054 qreply(q, mp_attr_ctl); 6055 6056 /* IPv6 UDP endpoints */ 6057 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 6058 sizeof (struct T_optmgmt_ack)]; 6059 optp->level = MIB2_UDP6; 6060 optp->name = MIB2_UDP6_ENTRY; 6061 optp->len = msgdsize(mp6_conn_ctl->b_cont); 6062 qreply(q, mp6_conn_ctl); 6063 6064 /* table of MLP attributes... */ 6065 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 6066 sizeof (struct T_optmgmt_ack)]; 6067 optp->level = MIB2_UDP6; 6068 optp->name = EXPER_XPORT_MLP; 6069 optp->len = msgdsize(mp6_attr_ctl->b_cont); 6070 if (optp->len == 0) 6071 freemsg(mp6_attr_ctl); 6072 else 6073 qreply(q, mp6_attr_ctl); 6074 6075 return (1); 6076 } 6077 6078 /* 6079 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 6080 * NOTE: Per MIB-II, UDP has no writable data. 6081 * TODO: If this ever actually tries to set anything, it needs to be 6082 * to do the appropriate locking. 6083 */ 6084 /* ARGSUSED */ 6085 int 6086 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 6087 uchar_t *ptr, int len) 6088 { 6089 switch (level) { 6090 case MIB2_UDP: 6091 return (0); 6092 default: 6093 return (1); 6094 } 6095 } 6096 6097 static void 6098 udp_report_item(mblk_t *mp, udp_t *udp) 6099 { 6100 char *state; 6101 char addrbuf1[INET6_ADDRSTRLEN]; 6102 char addrbuf2[INET6_ADDRSTRLEN]; 6103 uint_t print_len, buf_len; 6104 6105 buf_len = mp->b_datap->db_lim - mp->b_wptr; 6106 ASSERT(buf_len >= 0); 6107 if (buf_len == 0) 6108 return; 6109 6110 if (udp->udp_state == TS_UNBND) 6111 state = "UNBOUND"; 6112 else if (udp->udp_state == TS_IDLE) 6113 state = "IDLE"; 6114 else if (udp->udp_state == TS_DATA_XFER) 6115 state = "CONNECTED"; 6116 else 6117 state = "UnkState"; 6118 print_len = snprintf((char *)mp->b_wptr, buf_len, 6119 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 6120 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 6121 inet_ntop(AF_INET6, &udp->udp_v6src, 6122 addrbuf1, sizeof (addrbuf1)), 6123 inet_ntop(AF_INET6, &udp->udp_v6dst, 6124 addrbuf2, sizeof (addrbuf2)), 6125 ntohs(udp->udp_dstport), state); 6126 if (print_len < buf_len) { 6127 mp->b_wptr += print_len; 6128 } else { 6129 mp->b_wptr += buf_len; 6130 } 6131 } 6132 6133 /* Report for ndd "udp_status" */ 6134 /* ARGSUSED */ 6135 static int 6136 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 6137 { 6138 zoneid_t zoneid; 6139 connf_t *connfp; 6140 conn_t *connp = Q_TO_CONN(q); 6141 udp_t *udp = connp->conn_udp; 6142 int i; 6143 6144 /* 6145 * Because of the ndd constraint, at most we can have 64K buffer 6146 * to put in all UDP info. So to be more efficient, just 6147 * allocate a 64K buffer here, assuming we need that large buffer. 6148 * This may be a problem as any user can read udp_status. Therefore 6149 * we limit the rate of doing this using udp_ndd_get_info_interval. 6150 * This should be OK as normal users should not do this too often. 6151 */ 6152 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 6153 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 6154 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 6155 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 6156 return (0); 6157 } 6158 } 6159 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 6160 /* The following may work even if we cannot get a large buf. */ 6161 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6162 return (0); 6163 } 6164 (void) mi_mpprintf(mp, 6165 "UDP " MI_COL_HDRPAD_STR 6166 /* 12345678[89ABCDEF] */ 6167 " zone lport src addr dest addr port state"); 6168 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6169 6170 zoneid = connp->conn_zoneid; 6171 6172 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6173 connfp = &ipcl_globalhash_fanout[i]; 6174 connp = NULL; 6175 6176 while ((connp = ipcl_get_next_conn(connfp, connp, 6177 IPCL_UDP))) { 6178 udp = connp->conn_udp; 6179 if (zoneid != GLOBAL_ZONEID && 6180 zoneid != connp->conn_zoneid) 6181 continue; 6182 6183 udp_report_item(mp->b_cont, udp); 6184 } 6185 } 6186 udp_last_ndd_get_info_time = ddi_get_lbolt(); 6187 return (0); 6188 } 6189 6190 /* 6191 * This routine creates a T_UDERROR_IND message and passes it upstream. 6192 * The address and options are copied from the T_UNITDATA_REQ message 6193 * passed in mp. This message is freed. 6194 */ 6195 static void 6196 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6197 t_scalar_t err) 6198 { 6199 struct T_unitdata_req *tudr; 6200 mblk_t *mp1; 6201 uchar_t *optaddr; 6202 t_scalar_t optlen; 6203 6204 if (DB_TYPE(mp) == M_DATA) { 6205 ASSERT(destaddr != NULL && destlen != 0); 6206 optaddr = NULL; 6207 optlen = 0; 6208 } else { 6209 if ((mp->b_wptr < mp->b_rptr) || 6210 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6211 goto done; 6212 } 6213 tudr = (struct T_unitdata_req *)mp->b_rptr; 6214 destaddr = mp->b_rptr + tudr->DEST_offset; 6215 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6216 destaddr + tudr->DEST_length < mp->b_rptr || 6217 destaddr + tudr->DEST_length > mp->b_wptr) { 6218 goto done; 6219 } 6220 optaddr = mp->b_rptr + tudr->OPT_offset; 6221 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6222 optaddr + tudr->OPT_length < mp->b_rptr || 6223 optaddr + tudr->OPT_length > mp->b_wptr) { 6224 goto done; 6225 } 6226 destlen = tudr->DEST_length; 6227 optlen = tudr->OPT_length; 6228 } 6229 6230 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6231 (char *)optaddr, optlen, err); 6232 if (mp1 != NULL) 6233 putnext(UDP_RD(q), mp1); 6234 6235 done: 6236 freemsg(mp); 6237 } 6238 6239 /* 6240 * This routine removes a port number association from a stream. It 6241 * is called by udp_wput to handle T_UNBIND_REQ messages. 6242 */ 6243 static void 6244 udp_unbind(queue_t *q, mblk_t *mp) 6245 { 6246 udp_t *udp = Q_TO_UDP(q); 6247 6248 /* If a bind has not been done, we can't unbind. */ 6249 if (udp->udp_state == TS_UNBND) { 6250 udp_err_ack(q, mp, TOUTSTATE, 0); 6251 return; 6252 } 6253 if (cl_inet_unbind != NULL) { 6254 /* 6255 * Running in cluster mode - register unbind information 6256 */ 6257 if (udp->udp_ipversion == IPV4_VERSION) { 6258 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6259 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6260 (in_port_t)udp->udp_port); 6261 } else { 6262 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6263 (uint8_t *)&(udp->udp_v6src), 6264 (in_port_t)udp->udp_port); 6265 } 6266 } 6267 6268 udp_bind_hash_remove(udp, B_FALSE); 6269 V6_SET_ZERO(udp->udp_v6src); 6270 V6_SET_ZERO(udp->udp_bound_v6src); 6271 udp->udp_port = 0; 6272 udp->udp_state = TS_UNBND; 6273 6274 if (udp->udp_family == AF_INET6) { 6275 int error; 6276 6277 /* Rebuild the header template */ 6278 error = udp_build_hdrs(q, udp); 6279 if (error != 0) { 6280 udp_err_ack(q, mp, TSYSERR, error); 6281 return; 6282 } 6283 } 6284 /* 6285 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6286 * and therefore ip_unbind must never return NULL. 6287 */ 6288 mp = ip_unbind(q, mp); 6289 ASSERT(mp != NULL); 6290 putnext(UDP_RD(q), mp); 6291 } 6292 6293 /* 6294 * Don't let port fall into the privileged range. 6295 * Since the extra privileged ports can be arbitrary we also 6296 * ensure that we exclude those from consideration. 6297 * udp_g_epriv_ports is not sorted thus we loop over it until 6298 * there are no changes. 6299 */ 6300 static in_port_t 6301 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6302 { 6303 int i; 6304 in_port_t nextport; 6305 boolean_t restart = B_FALSE; 6306 6307 if (random && udp_random_anon_port != 0) { 6308 (void) random_get_pseudo_bytes((uint8_t *)&port, 6309 sizeof (in_port_t)); 6310 /* 6311 * Unless changed by a sys admin, the smallest anon port 6312 * is 32768 and the largest anon port is 65535. It is 6313 * very likely (50%) for the random port to be smaller 6314 * than the smallest anon port. When that happens, 6315 * add port % (anon port range) to the smallest anon 6316 * port to get the random port. It should fall into the 6317 * valid anon port range. 6318 */ 6319 if (port < udp_smallest_anon_port) { 6320 port = udp_smallest_anon_port + 6321 port % (udp_largest_anon_port - 6322 udp_smallest_anon_port); 6323 } 6324 } 6325 6326 retry: 6327 if (port < udp_smallest_anon_port) 6328 port = udp_smallest_anon_port; 6329 6330 if (port > udp_largest_anon_port) { 6331 port = udp_smallest_anon_port; 6332 if (restart) 6333 return (0); 6334 restart = B_TRUE; 6335 } 6336 6337 if (port < udp_smallest_nonpriv_port) 6338 port = udp_smallest_nonpriv_port; 6339 6340 for (i = 0; i < udp_g_num_epriv_ports; i++) { 6341 if (port == udp_g_epriv_ports[i]) { 6342 port++; 6343 /* 6344 * Make sure that the port is in the 6345 * valid range. 6346 */ 6347 goto retry; 6348 } 6349 } 6350 6351 if (is_system_labeled() && 6352 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6353 port, IPPROTO_UDP, B_TRUE)) != 0) { 6354 port = nextport; 6355 goto retry; 6356 } 6357 6358 return (port); 6359 } 6360 6361 static int 6362 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6363 { 6364 int err; 6365 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6366 udp_t *udp = Q_TO_UDP(wq); 6367 6368 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6369 opt_storage, udp->udp_mac_exempt); 6370 if (err == 0) { 6371 err = tsol_update_options(&udp->udp_ip_snd_options, 6372 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6373 opt_storage); 6374 } 6375 if (err != 0) { 6376 DTRACE_PROBE4( 6377 tx__ip__log__info__updatelabel__udp, 6378 char *, "queue(1) failed to update options(2) on mp(3)", 6379 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6380 } else { 6381 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6382 } 6383 return (err); 6384 } 6385 6386 static mblk_t * 6387 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6388 uint_t srcid, int *error) 6389 { 6390 udp_t *udp = connp->conn_udp; 6391 queue_t *q = connp->conn_wq; 6392 mblk_t *mp1 = mp; 6393 mblk_t *mp2; 6394 ipha_t *ipha; 6395 int ip_hdr_length; 6396 uint32_t ip_len; 6397 udpha_t *udpha; 6398 udpattrs_t attrs; 6399 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6400 uint32_t ip_snd_opt_len = 0; 6401 ip4_pkt_t pktinfo; 6402 ip4_pkt_t *pktinfop = &pktinfo; 6403 ip_opt_info_t optinfo; 6404 6405 6406 *error = 0; 6407 pktinfop->ip4_ill_index = 0; 6408 pktinfop->ip4_addr = INADDR_ANY; 6409 optinfo.ip_opt_flags = 0; 6410 optinfo.ip_opt_ill_index = 0; 6411 6412 if (v4dst == INADDR_ANY) 6413 v4dst = htonl(INADDR_LOOPBACK); 6414 6415 /* 6416 * If options passed in, feed it for verification and handling 6417 */ 6418 attrs.udpattr_credset = B_FALSE; 6419 if (DB_TYPE(mp) != M_DATA) { 6420 mp1 = mp->b_cont; 6421 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6422 attrs.udpattr_ipp4 = pktinfop; 6423 attrs.udpattr_mb = mp; 6424 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6425 goto done; 6426 /* 6427 * Note: success in processing options. 6428 * mp option buffer represented by 6429 * OPT_length/offset now potentially modified 6430 * and contain option setting results 6431 */ 6432 ASSERT(*error == 0); 6433 } 6434 } 6435 6436 /* mp1 points to the M_DATA mblk carrying the packet */ 6437 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6438 6439 /* 6440 * Check if our saved options are valid; update if not 6441 * TSOL Note: Since we are not in WRITER mode, UDP packets 6442 * to different destination may require different labels. 6443 * We use conn_lock to ensure that lastdst, ip_snd_options, 6444 * and ip_snd_options_len are consistent for the current 6445 * destination and are updated atomically. 6446 */ 6447 mutex_enter(&connp->conn_lock); 6448 if (is_system_labeled()) { 6449 /* Using UDP MLP requires SCM_UCRED from user */ 6450 if (connp->conn_mlp_type != mlptSingle && 6451 !attrs.udpattr_credset) { 6452 mutex_exit(&connp->conn_lock); 6453 DTRACE_PROBE4( 6454 tx__ip__log__info__output__udp, 6455 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6456 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6457 *error = ECONNREFUSED; 6458 goto done; 6459 } 6460 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6461 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6462 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6463 mutex_exit(&connp->conn_lock); 6464 goto done; 6465 } 6466 } 6467 if (udp->udp_ip_snd_options_len > 0) { 6468 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6469 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6470 } 6471 mutex_exit(&connp->conn_lock); 6472 6473 /* Add an IP header */ 6474 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; 6475 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6476 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6477 !OK_32PTR(ipha)) { 6478 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 6479 if (mp2 == NULL) { 6480 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6481 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6482 *error = ENOMEM; 6483 goto done; 6484 } 6485 mp2->b_wptr = DB_LIM(mp2); 6486 mp2->b_cont = mp1; 6487 mp1 = mp2; 6488 if (DB_TYPE(mp) != M_DATA) 6489 mp->b_cont = mp1; 6490 else 6491 mp = mp1; 6492 6493 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6494 } 6495 ip_hdr_length -= UDPH_SIZE; 6496 #ifdef _BIG_ENDIAN 6497 /* Set version, header length, and tos */ 6498 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6499 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6500 udp->udp_type_of_service); 6501 /* Set ttl and protocol */ 6502 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6503 #else 6504 /* Set version, header length, and tos */ 6505 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6506 ((udp->udp_type_of_service << 8) | 6507 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6508 /* Set ttl and protocol */ 6509 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6510 #endif 6511 if (pktinfop->ip4_addr != INADDR_ANY) { 6512 ipha->ipha_src = pktinfop->ip4_addr; 6513 optinfo.ip_opt_flags = IP_VERIFY_SRC; 6514 } else { 6515 /* 6516 * Copy our address into the packet. If this is zero, 6517 * first look at __sin6_src_id for a hint. If we leave the 6518 * source as INADDR_ANY then ip will fill in the real source 6519 * address. 6520 */ 6521 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6522 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6523 in6_addr_t v6src; 6524 6525 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 6526 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6527 } 6528 } 6529 6530 if (pktinfop->ip4_ill_index != 0) { 6531 optinfo.ip_opt_ill_index = pktinfop->ip4_ill_index; 6532 } 6533 6534 ipha->ipha_fragment_offset_and_flags = 0; 6535 ipha->ipha_ident = 0; 6536 6537 mp1->b_rptr = (uchar_t *)ipha; 6538 6539 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6540 (uintptr_t)UINT_MAX); 6541 6542 /* Determine length of packet */ 6543 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6544 if ((mp2 = mp1->b_cont) != NULL) { 6545 do { 6546 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6547 ip_len += (uint32_t)MBLKL(mp2); 6548 } while ((mp2 = mp2->b_cont) != NULL); 6549 } 6550 /* 6551 * If the size of the packet is greater than the maximum allowed by 6552 * ip, return an error. Passing this down could cause panics because 6553 * the size will have wrapped and be inconsistent with the msg size. 6554 */ 6555 if (ip_len > IP_MAXPACKET) { 6556 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6557 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6558 *error = EMSGSIZE; 6559 goto done; 6560 } 6561 ipha->ipha_length = htons((uint16_t)ip_len); 6562 ip_len -= ip_hdr_length; 6563 ip_len = htons((uint16_t)ip_len); 6564 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6565 6566 /* 6567 * Copy in the destination address 6568 */ 6569 ipha->ipha_dst = v4dst; 6570 6571 /* 6572 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6573 */ 6574 if (CLASSD(v4dst)) 6575 ipha->ipha_ttl = udp->udp_multicast_ttl; 6576 6577 udpha->uha_dst_port = port; 6578 udpha->uha_src_port = udp->udp_port; 6579 6580 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6581 uint32_t cksum; 6582 6583 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6584 /* 6585 * Massage source route putting first source route in ipha_dst. 6586 * Ignore the destination in T_unitdata_req. 6587 * Create a checksum adjustment for a source route, if any. 6588 */ 6589 cksum = ip_massage_options(ipha); 6590 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6591 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6592 (ipha->ipha_dst & 0xFFFF); 6593 if ((int)cksum < 0) 6594 cksum--; 6595 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6596 /* 6597 * IP does the checksum if uha_checksum is non-zero, 6598 * We make it easy for IP to include our pseudo header 6599 * by putting our length in uha_checksum. 6600 */ 6601 cksum += ip_len; 6602 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6603 /* There might be a carry. */ 6604 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6605 #ifdef _LITTLE_ENDIAN 6606 if (udp_do_checksum) 6607 ip_len = (cksum << 16) | ip_len; 6608 #else 6609 if (udp_do_checksum) 6610 ip_len = (ip_len << 16) | cksum; 6611 else 6612 ip_len <<= 16; 6613 #endif 6614 } else { 6615 /* 6616 * IP does the checksum if uha_checksum is non-zero, 6617 * We make it easy for IP to include our pseudo header 6618 * by putting our length in uha_checksum. 6619 */ 6620 if (udp_do_checksum) 6621 ip_len |= (ip_len << 16); 6622 #ifndef _LITTLE_ENDIAN 6623 else 6624 ip_len <<= 16; 6625 #endif 6626 } 6627 6628 /* Set UDP length and checksum */ 6629 *((uint32_t *)&udpha->uha_length) = ip_len; 6630 if (DB_CRED(mp) != NULL) 6631 mblk_setcred(mp1, DB_CRED(mp)); 6632 6633 if (DB_TYPE(mp) != M_DATA) { 6634 ASSERT(mp != mp1); 6635 freeb(mp); 6636 } 6637 6638 /* mp has been consumed and we'll return success */ 6639 ASSERT(*error == 0); 6640 mp = NULL; 6641 6642 /* We're done. Pass the packet to ip. */ 6643 BUMP_MIB(&udp_mib, udpHCOutDatagrams); 6644 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6645 "udp_wput_end: q %p (%S)", q, "end"); 6646 6647 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6648 CONN_OUTBOUND_POLICY_PRESENT(connp) || 6649 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6650 connp->conn_nofailover_ill != NULL || 6651 connp->conn_outgoing_ill != NULL || optinfo.ip_opt_flags != 0 || 6652 optinfo.ip_opt_ill_index != 0 || 6653 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6654 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 6655 UDP_STAT(udp_ip_send); 6656 ip_output_options(connp, mp1, connp->conn_wq, IP_WPUT, 6657 &optinfo); 6658 } else { 6659 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6660 } 6661 6662 done: 6663 if (*error != 0) { 6664 ASSERT(mp != NULL); 6665 BUMP_MIB(&udp_mib, udpOutErrors); 6666 } 6667 return (mp); 6668 } 6669 6670 static void 6671 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6672 { 6673 conn_t *connp = udp->udp_connp; 6674 ipaddr_t src, dst; 6675 ill_t *ill; 6676 ire_t *ire; 6677 ipif_t *ipif = NULL; 6678 mblk_t *ire_fp_mp; 6679 uint_t ire_fp_mp_len; 6680 uint16_t *up; 6681 uint32_t cksum, hcksum_txflags; 6682 queue_t *dev_q; 6683 boolean_t retry_caching; 6684 6685 dst = ipha->ipha_dst; 6686 src = ipha->ipha_src; 6687 ASSERT(ipha->ipha_ident == 0); 6688 6689 if (CLASSD(dst)) { 6690 int err; 6691 6692 ipif = conn_get_held_ipif(connp, 6693 &connp->conn_multicast_ipif, &err); 6694 6695 if (ipif == NULL || ipif->ipif_isv6 || 6696 (ipif->ipif_ill->ill_phyint->phyint_flags & 6697 PHYI_LOOPBACK)) { 6698 if (ipif != NULL) 6699 ipif_refrele(ipif); 6700 UDP_STAT(udp_ip_send); 6701 ip_output(connp, mp, q, IP_WPUT); 6702 return; 6703 } 6704 } 6705 6706 retry_caching = B_FALSE; 6707 mutex_enter(&connp->conn_lock); 6708 ire = connp->conn_ire_cache; 6709 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6710 6711 if (ire == NULL || ire->ire_addr != dst || 6712 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6713 retry_caching = B_TRUE; 6714 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6715 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6716 6717 ASSERT(ipif != NULL); 6718 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6719 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6720 retry_caching = B_TRUE; 6721 } 6722 6723 if (!retry_caching) { 6724 ASSERT(ire != NULL); 6725 IRE_REFHOLD(ire); 6726 mutex_exit(&connp->conn_lock); 6727 } else { 6728 boolean_t cached = B_FALSE; 6729 6730 connp->conn_ire_cache = NULL; 6731 mutex_exit(&connp->conn_lock); 6732 6733 /* Release the old ire */ 6734 if (ire != NULL) { 6735 IRE_REFRELE_NOTR(ire); 6736 ire = NULL; 6737 } 6738 6739 if (CLASSD(dst)) { 6740 ASSERT(ipif != NULL); 6741 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6742 connp->conn_zoneid, MBLK_GETLABEL(mp), 6743 MATCH_IRE_ILL_GROUP); 6744 } else { 6745 ASSERT(ipif == NULL); 6746 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6747 MBLK_GETLABEL(mp)); 6748 } 6749 6750 if (ire == NULL) { 6751 if (ipif != NULL) 6752 ipif_refrele(ipif); 6753 UDP_STAT(udp_ire_null); 6754 ip_output(connp, mp, q, IP_WPUT); 6755 return; 6756 } 6757 IRE_REFHOLD_NOTR(ire); 6758 6759 mutex_enter(&connp->conn_lock); 6760 if (!(connp->conn_state_flags & CONN_CLOSING) && 6761 connp->conn_ire_cache == NULL) { 6762 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6763 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6764 connp->conn_ire_cache = ire; 6765 cached = B_TRUE; 6766 } 6767 rw_exit(&ire->ire_bucket->irb_lock); 6768 } 6769 mutex_exit(&connp->conn_lock); 6770 6771 /* 6772 * We can continue to use the ire but since it was not 6773 * cached, we should drop the extra reference. 6774 */ 6775 if (!cached) 6776 IRE_REFRELE_NOTR(ire); 6777 } 6778 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6779 ASSERT(!CLASSD(dst) || ipif != NULL); 6780 6781 /* 6782 * Check if we can take the fast-path. 6783 * Note that "incomplete" ire's (where the link-layer for next hop 6784 * is not resolved, or where the fast-path header in nce_fp_mp is not 6785 * available yet) are sent down the legacy (slow) path 6786 */ 6787 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6788 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6789 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6790 (connp->conn_nexthop_set) || 6791 (ire->ire_nce == NULL) || 6792 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6793 ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { 6794 if (ipif != NULL) 6795 ipif_refrele(ipif); 6796 UDP_STAT(udp_ip_ire_send); 6797 IRE_REFRELE(ire); 6798 ip_output(connp, mp, q, IP_WPUT); 6799 return; 6800 } 6801 6802 ill = ire_to_ill(ire); 6803 ASSERT(ill != NULL); 6804 6805 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6806 6807 dev_q = ire->ire_stq->q_next; 6808 ASSERT(dev_q != NULL); 6809 /* 6810 * If the service thread is already running, or if the driver 6811 * queue is currently flow-controlled, queue this packet. 6812 */ 6813 if ((q->q_first != NULL || connp->conn_draining) || 6814 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6815 if (ip_output_queue) { 6816 (void) putq(q, mp); 6817 } else { 6818 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6819 freemsg(mp); 6820 } 6821 if (ipif != NULL) 6822 ipif_refrele(ipif); 6823 IRE_REFRELE(ire); 6824 return; 6825 } 6826 6827 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6828 #ifndef _BIG_ENDIAN 6829 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6830 #endif 6831 6832 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6833 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6834 src = ipha->ipha_src = ipif->ipif_src_addr; 6835 else 6836 src = ipha->ipha_src = ire->ire_src_addr; 6837 } 6838 6839 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6840 ASSERT(ill->ill_hcksum_capab != NULL); 6841 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6842 } else { 6843 hcksum_txflags = 0; 6844 } 6845 6846 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6847 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6848 6849 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6850 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6851 if (*up != 0) { 6852 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6853 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6854 ntohs(ipha->ipha_length), cksum); 6855 6856 /* Software checksum? */ 6857 if (DB_CKSUMFLAGS(mp) == 0) { 6858 UDP_STAT(udp_out_sw_cksum); 6859 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6860 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6861 } 6862 } 6863 6864 ipha->ipha_fragment_offset_and_flags |= 6865 (uint32_t)htons(ire->ire_frag_flag); 6866 6867 /* Calculate IP header checksum if hardware isn't capable */ 6868 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6869 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6870 ((uint16_t *)ipha)[4]); 6871 } 6872 6873 if (CLASSD(dst)) { 6874 ilm_t *ilm; 6875 6876 ILM_WALKER_HOLD(ill); 6877 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6878 ILM_WALKER_RELE(ill); 6879 if (ilm != NULL) { 6880 ip_multicast_loopback(q, ill, mp, 6881 connp->conn_multicast_loop ? 0 : 6882 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6883 } 6884 6885 /* If multicast TTL is 0 then we are done */ 6886 if (ipha->ipha_ttl == 0) { 6887 if (ipif != NULL) 6888 ipif_refrele(ipif); 6889 freemsg(mp); 6890 IRE_REFRELE(ire); 6891 return; 6892 } 6893 } 6894 6895 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6896 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6897 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6898 6899 UPDATE_OB_PKT_COUNT(ire); 6900 ire->ire_last_used_time = lbolt; 6901 6902 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6903 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6904 ntohs(ipha->ipha_length)); 6905 6906 if (ILL_DLS_CAPABLE(ill)) { 6907 /* 6908 * Send the packet directly to DLD, where it may be queued 6909 * depending on the availability of transmit resources at 6910 * the media layer. 6911 */ 6912 IP_DLS_ILL_TX(ill, ipha, mp); 6913 } else { 6914 DTRACE_PROBE4(ip4__physical__out__start, 6915 ill_t *, NULL, ill_t *, ill, 6916 ipha_t *, ipha, mblk_t *, mp); 6917 FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, 6918 NULL, ill, ipha, mp, mp); 6919 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6920 if (mp != NULL) 6921 putnext(ire->ire_stq, mp); 6922 } 6923 6924 if (ipif != NULL) 6925 ipif_refrele(ipif); 6926 IRE_REFRELE(ire); 6927 } 6928 6929 static boolean_t 6930 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6931 { 6932 udp_t *udp = Q_TO_UDP(wq); 6933 int err; 6934 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6935 6936 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6937 dst, opt_storage, udp->udp_mac_exempt); 6938 if (err == 0) { 6939 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6940 &udp->udp_label_len_v6, opt_storage); 6941 } 6942 if (err != 0) { 6943 DTRACE_PROBE4( 6944 tx__ip__log__drop__updatelabel__udp6, 6945 char *, "queue(1) failed to update options(2) on mp(3)", 6946 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6947 } else { 6948 udp->udp_v6lastdst = *dst; 6949 } 6950 return (err); 6951 } 6952 6953 /* 6954 * This routine handles all messages passed downstream. It either 6955 * consumes the message or passes it downstream; it never queues a 6956 * a message. 6957 */ 6958 static void 6959 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6960 { 6961 sin6_t *sin6; 6962 sin_t *sin; 6963 ipaddr_t v4dst; 6964 uint16_t port; 6965 uint_t srcid; 6966 queue_t *q = connp->conn_wq; 6967 udp_t *udp = connp->conn_udp; 6968 int error = 0; 6969 struct sockaddr_storage ss; 6970 6971 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6972 "udp_wput_start: connp %p mp %p", connp, mp); 6973 6974 /* 6975 * We directly handle several cases here: T_UNITDATA_REQ message 6976 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6977 * connected and non-connected socket. The latter carries the 6978 * address structure along when this routine gets called. 6979 */ 6980 switch (DB_TYPE(mp)) { 6981 case M_DATA: 6982 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6983 if (!udp->udp_direct_sockfs || 6984 addr == NULL || addrlen == 0) { 6985 /* Not connected; address is required */ 6986 BUMP_MIB(&udp_mib, udpOutErrors); 6987 UDP_STAT(udp_out_err_notconn); 6988 freemsg(mp); 6989 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6990 "udp_wput_end: connp %p (%S)", connp, 6991 "not-connected; address required"); 6992 return; 6993 } 6994 ASSERT(udp->udp_issocket); 6995 UDP_DBGSTAT(udp_data_notconn); 6996 /* Not connected; do some more checks below */ 6997 break; 6998 } 6999 /* M_DATA for connected socket */ 7000 UDP_DBGSTAT(udp_data_conn); 7001 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 7002 7003 /* Initialize addr and addrlen as if they're passed in */ 7004 if (udp->udp_family == AF_INET) { 7005 sin = (sin_t *)&ss; 7006 sin->sin_family = AF_INET; 7007 sin->sin_port = udp->udp_dstport; 7008 sin->sin_addr.s_addr = v4dst; 7009 addr = (struct sockaddr *)sin; 7010 addrlen = sizeof (*sin); 7011 } else { 7012 sin6 = (sin6_t *)&ss; 7013 sin6->sin6_family = AF_INET6; 7014 sin6->sin6_port = udp->udp_dstport; 7015 sin6->sin6_flowinfo = udp->udp_flowinfo; 7016 sin6->sin6_addr = udp->udp_v6dst; 7017 sin6->sin6_scope_id = 0; 7018 sin6->__sin6_src_id = 0; 7019 addr = (struct sockaddr *)sin6; 7020 addrlen = sizeof (*sin6); 7021 } 7022 7023 if (udp->udp_family == AF_INET || 7024 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 7025 /* 7026 * Handle both AF_INET and AF_INET6; the latter 7027 * for IPV4 mapped destination addresses. Note 7028 * here that both addr and addrlen point to the 7029 * corresponding struct depending on the address 7030 * family of the socket. 7031 */ 7032 mp = udp_output_v4(connp, mp, v4dst, 7033 udp->udp_dstport, 0, &error); 7034 } else { 7035 mp = udp_output_v6(connp, mp, sin6, &error); 7036 } 7037 if (error != 0) { 7038 ASSERT(addr != NULL && addrlen != 0); 7039 goto ud_error; 7040 } 7041 return; 7042 case M_PROTO: 7043 case M_PCPROTO: { 7044 struct T_unitdata_req *tudr; 7045 7046 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 7047 tudr = (struct T_unitdata_req *)mp->b_rptr; 7048 7049 /* Handle valid T_UNITDATA_REQ here */ 7050 if (MBLKL(mp) >= sizeof (*tudr) && 7051 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 7052 if (mp->b_cont == NULL) { 7053 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7054 "udp_wput_end: q %p (%S)", q, "badaddr"); 7055 error = EPROTO; 7056 goto ud_error; 7057 } 7058 7059 if (!MBLKIN(mp, 0, tudr->DEST_offset + 7060 tudr->DEST_length)) { 7061 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7062 "udp_wput_end: q %p (%S)", q, "badaddr"); 7063 error = EADDRNOTAVAIL; 7064 goto ud_error; 7065 } 7066 /* 7067 * If a port has not been bound to the stream, fail. 7068 * This is not a problem when sockfs is directly 7069 * above us, because it will ensure that the socket 7070 * is first bound before allowing data to be sent. 7071 */ 7072 if (udp->udp_state == TS_UNBND) { 7073 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7074 "udp_wput_end: q %p (%S)", q, "outstate"); 7075 error = EPROTO; 7076 goto ud_error; 7077 } 7078 addr = (struct sockaddr *) 7079 &mp->b_rptr[tudr->DEST_offset]; 7080 addrlen = tudr->DEST_length; 7081 if (tudr->OPT_length != 0) 7082 UDP_STAT(udp_out_opt); 7083 break; 7084 } 7085 /* FALLTHRU */ 7086 } 7087 default: 7088 udp_become_writer(connp, mp, udp_wput_other_wrapper, 7089 SQTAG_UDP_OUTPUT); 7090 return; 7091 } 7092 ASSERT(addr != NULL); 7093 7094 switch (udp->udp_family) { 7095 case AF_INET6: 7096 sin6 = (sin6_t *)addr; 7097 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 7098 sin6->sin6_family != AF_INET6) { 7099 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7100 "udp_wput_end: q %p (%S)", q, "badaddr"); 7101 error = EADDRNOTAVAIL; 7102 goto ud_error; 7103 } 7104 7105 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 7106 /* 7107 * Destination is a non-IPv4-compatible IPv6 address. 7108 * Send out an IPv6 format packet. 7109 */ 7110 mp = udp_output_v6(connp, mp, sin6, &error); 7111 if (error != 0) 7112 goto ud_error; 7113 7114 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7115 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 7116 return; 7117 } 7118 /* 7119 * If the local address is not zero or a mapped address 7120 * return an error. It would be possible to send an IPv4 7121 * packet but the response would never make it back to the 7122 * application since it is bound to a non-mapped address. 7123 */ 7124 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 7125 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7126 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7127 "udp_wput_end: q %p (%S)", q, "badaddr"); 7128 error = EADDRNOTAVAIL; 7129 goto ud_error; 7130 } 7131 /* Send IPv4 packet without modifying udp_ipversion */ 7132 /* Extract port and ipaddr */ 7133 port = sin6->sin6_port; 7134 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 7135 srcid = sin6->__sin6_src_id; 7136 break; 7137 7138 case AF_INET: 7139 sin = (sin_t *)addr; 7140 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 7141 sin->sin_family != AF_INET) { 7142 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 7143 "udp_wput_end: q %p (%S)", q, "badaddr"); 7144 error = EADDRNOTAVAIL; 7145 goto ud_error; 7146 } 7147 /* Extract port and ipaddr */ 7148 port = sin->sin_port; 7149 v4dst = sin->sin_addr.s_addr; 7150 srcid = 0; 7151 break; 7152 } 7153 7154 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 7155 if (error != 0) { 7156 ud_error: 7157 UDP_STAT(udp_out_err_output); 7158 ASSERT(mp != NULL); 7159 /* mp is freed by the following routine */ 7160 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 7161 (t_scalar_t)error); 7162 } 7163 } 7164 7165 /* ARGSUSED */ 7166 static void 7167 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 7168 { 7169 udp_output((conn_t *)arg, mp, NULL, 0); 7170 _UDP_EXIT((conn_t *)arg); 7171 } 7172 7173 static void 7174 udp_wput(queue_t *q, mblk_t *mp) 7175 { 7176 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 7177 SQTAG_UDP_WPUT); 7178 } 7179 7180 /* 7181 * Allocate and prepare a T_UNITDATA_REQ message. 7182 */ 7183 static mblk_t * 7184 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 7185 { 7186 struct T_unitdata_req *tudr; 7187 mblk_t *mp; 7188 7189 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 7190 if (mp != NULL) { 7191 mp->b_wptr += sizeof (*tudr) + addrlen; 7192 DB_TYPE(mp) = M_PROTO; 7193 7194 tudr = (struct T_unitdata_req *)mp->b_rptr; 7195 tudr->PRIM_type = T_UNITDATA_REQ; 7196 tudr->DEST_length = addrlen; 7197 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7198 tudr->OPT_length = 0; 7199 tudr->OPT_offset = 0; 7200 bcopy(addr, tudr+1, addrlen); 7201 } 7202 return (mp); 7203 } 7204 7205 /* 7206 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7207 * is valid when we are directly beneath the stream head, and thus sockfs 7208 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7209 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7210 * this is done for both connected and non-connected endpoint. 7211 */ 7212 void 7213 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7214 { 7215 conn_t *connp; 7216 udp_t *udp; 7217 7218 q = UDP_WR(q); 7219 connp = Q_TO_CONN(q); 7220 udp = connp->conn_udp; 7221 7222 /* udpsockfs should only send down M_DATA for this entry point */ 7223 ASSERT(DB_TYPE(mp) == M_DATA); 7224 7225 mutex_enter(&connp->conn_lock); 7226 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7227 7228 if (udp->udp_mode != UDP_MT_HOT) { 7229 /* 7230 * We can't enter this conn right away because another 7231 * thread is currently executing as writer; therefore we 7232 * need to deposit the message into the squeue to be 7233 * drained later. If a socket address is present, we 7234 * need to create a T_UNITDATA_REQ message as placeholder. 7235 */ 7236 if (addr != NULL && addrlen != 0) { 7237 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7238 7239 if (tudr_mp == NULL) { 7240 mutex_exit(&connp->conn_lock); 7241 BUMP_MIB(&udp_mib, udpOutErrors); 7242 UDP_STAT(udp_out_err_tudr); 7243 freemsg(mp); 7244 return; 7245 } 7246 /* Tag the packet with T_UNITDATA_REQ */ 7247 tudr_mp->b_cont = mp; 7248 mp = tudr_mp; 7249 } 7250 mutex_exit(&connp->conn_lock); 7251 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7252 return; 7253 } 7254 7255 /* We can execute as reader right away. */ 7256 UDP_READERS_INCREF(udp); 7257 mutex_exit(&connp->conn_lock); 7258 7259 udp_output(connp, mp, addr, addrlen); 7260 7261 udp_exit(connp); 7262 } 7263 7264 /* 7265 * udp_output_v6(): 7266 * Assumes that udp_wput did some sanity checking on the destination 7267 * address. 7268 */ 7269 static mblk_t * 7270 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7271 { 7272 ip6_t *ip6h; 7273 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7274 mblk_t *mp1 = mp; 7275 mblk_t *mp2; 7276 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7277 size_t ip_len; 7278 udpha_t *udph; 7279 udp_t *udp = connp->conn_udp; 7280 queue_t *q = connp->conn_wq; 7281 ip6_pkt_t ipp_s; /* For ancillary data options */ 7282 ip6_pkt_t *ipp = &ipp_s; 7283 ip6_pkt_t *tipp; /* temporary ipp */ 7284 uint32_t csum = 0; 7285 uint_t ignore = 0; 7286 uint_t option_exists = 0, is_sticky = 0; 7287 uint8_t *cp; 7288 uint8_t *nxthdr_ptr; 7289 in6_addr_t ip6_dst; 7290 udpattrs_t attrs; 7291 boolean_t opt_present; 7292 ip6_hbh_t *hopoptsptr = NULL; 7293 uint_t hopoptslen = 0; 7294 boolean_t is_ancillary = B_FALSE; 7295 7296 *error = 0; 7297 7298 /* 7299 * If the local address is a mapped address return 7300 * an error. 7301 * It would be possible to send an IPv6 packet but the 7302 * response would never make it back to the application 7303 * since it is bound to a mapped address. 7304 */ 7305 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7306 *error = EADDRNOTAVAIL; 7307 goto done; 7308 } 7309 7310 ipp->ipp_fields = 0; 7311 ipp->ipp_sticky_ignored = 0; 7312 7313 /* 7314 * If TPI options passed in, feed it for verification and handling 7315 */ 7316 attrs.udpattr_credset = B_FALSE; 7317 opt_present = B_FALSE; 7318 if (DB_TYPE(mp) != M_DATA) { 7319 mp1 = mp->b_cont; 7320 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7321 attrs.udpattr_ipp6 = ipp; 7322 attrs.udpattr_mb = mp; 7323 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7324 goto done; 7325 ASSERT(*error == 0); 7326 opt_present = B_TRUE; 7327 } 7328 } 7329 ignore = ipp->ipp_sticky_ignored; 7330 7331 /* mp1 points to the M_DATA mblk carrying the packet */ 7332 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7333 7334 if (sin6->sin6_scope_id != 0 && 7335 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7336 /* 7337 * IPPF_SCOPE_ID is special. It's neither a sticky 7338 * option nor ancillary data. It needs to be 7339 * explicitly set in options_exists. 7340 */ 7341 option_exists |= IPPF_SCOPE_ID; 7342 } 7343 7344 /* 7345 * Compute the destination address 7346 */ 7347 ip6_dst = sin6->sin6_addr; 7348 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7349 ip6_dst = ipv6_loopback; 7350 7351 /* 7352 * If we're not going to the same destination as last time, then 7353 * recompute the label required. This is done in a separate routine to 7354 * avoid blowing up our stack here. 7355 * 7356 * TSOL Note: Since we are not in WRITER mode, UDP packets 7357 * to different destination may require different labels. 7358 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7359 * and sticky ipp_hopoptslen are consistent for the current 7360 * destination and are updated atomically. 7361 */ 7362 mutex_enter(&connp->conn_lock); 7363 if (is_system_labeled()) { 7364 /* Using UDP MLP requires SCM_UCRED from user */ 7365 if (connp->conn_mlp_type != mlptSingle && 7366 !attrs.udpattr_credset) { 7367 DTRACE_PROBE4( 7368 tx__ip__log__info__output__udp6, 7369 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7370 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7371 *error = ECONNREFUSED; 7372 mutex_exit(&connp->conn_lock); 7373 goto done; 7374 } 7375 if ((opt_present || 7376 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7377 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7378 mutex_exit(&connp->conn_lock); 7379 goto done; 7380 } 7381 } 7382 7383 /* 7384 * If there's a security label here, then we ignore any options the 7385 * user may try to set. We keep the peer's label as a hidden sticky 7386 * option. We make a private copy of this label before releasing the 7387 * lock so that label is kept consistent with the destination addr. 7388 */ 7389 if (udp->udp_label_len_v6 > 0) { 7390 ignore &= ~IPPF_HOPOPTS; 7391 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7392 } 7393 7394 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7395 /* No sticky options nor ancillary data. */ 7396 mutex_exit(&connp->conn_lock); 7397 goto no_options; 7398 } 7399 7400 /* 7401 * Go through the options figuring out where each is going to 7402 * come from and build two masks. The first mask indicates if 7403 * the option exists at all. The second mask indicates if the 7404 * option is sticky or ancillary. 7405 */ 7406 if (!(ignore & IPPF_HOPOPTS)) { 7407 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7408 option_exists |= IPPF_HOPOPTS; 7409 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7410 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7411 option_exists |= IPPF_HOPOPTS; 7412 is_sticky |= IPPF_HOPOPTS; 7413 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7414 hopoptsptr = kmem_alloc( 7415 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7416 if (hopoptsptr == NULL) { 7417 *error = ENOMEM; 7418 mutex_exit(&connp->conn_lock); 7419 goto done; 7420 } 7421 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7422 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7423 hopoptslen); 7424 udp_ip_hdr_len += hopoptslen; 7425 } 7426 } 7427 mutex_exit(&connp->conn_lock); 7428 7429 if (!(ignore & IPPF_RTHDR)) { 7430 if (ipp->ipp_fields & IPPF_RTHDR) { 7431 option_exists |= IPPF_RTHDR; 7432 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7433 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7434 option_exists |= IPPF_RTHDR; 7435 is_sticky |= IPPF_RTHDR; 7436 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7437 } 7438 } 7439 7440 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7441 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7442 option_exists |= IPPF_RTDSTOPTS; 7443 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7444 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7445 option_exists |= IPPF_RTDSTOPTS; 7446 is_sticky |= IPPF_RTDSTOPTS; 7447 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7448 } 7449 } 7450 7451 if (!(ignore & IPPF_DSTOPTS)) { 7452 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7453 option_exists |= IPPF_DSTOPTS; 7454 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7455 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7456 option_exists |= IPPF_DSTOPTS; 7457 is_sticky |= IPPF_DSTOPTS; 7458 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7459 } 7460 } 7461 7462 if (!(ignore & IPPF_IFINDEX)) { 7463 if (ipp->ipp_fields & IPPF_IFINDEX) { 7464 option_exists |= IPPF_IFINDEX; 7465 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7466 option_exists |= IPPF_IFINDEX; 7467 is_sticky |= IPPF_IFINDEX; 7468 } 7469 } 7470 7471 if (!(ignore & IPPF_ADDR)) { 7472 if (ipp->ipp_fields & IPPF_ADDR) { 7473 option_exists |= IPPF_ADDR; 7474 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7475 option_exists |= IPPF_ADDR; 7476 is_sticky |= IPPF_ADDR; 7477 } 7478 } 7479 7480 if (!(ignore & IPPF_DONTFRAG)) { 7481 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7482 option_exists |= IPPF_DONTFRAG; 7483 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7484 option_exists |= IPPF_DONTFRAG; 7485 is_sticky |= IPPF_DONTFRAG; 7486 } 7487 } 7488 7489 if (!(ignore & IPPF_USE_MIN_MTU)) { 7490 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7491 option_exists |= IPPF_USE_MIN_MTU; 7492 } else if (udp->udp_sticky_ipp.ipp_fields & 7493 IPPF_USE_MIN_MTU) { 7494 option_exists |= IPPF_USE_MIN_MTU; 7495 is_sticky |= IPPF_USE_MIN_MTU; 7496 } 7497 } 7498 7499 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7500 option_exists |= IPPF_HOPLIMIT; 7501 /* IPV6_HOPLIMIT can never be sticky */ 7502 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7503 7504 if (!(ignore & IPPF_UNICAST_HOPS) && 7505 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7506 option_exists |= IPPF_UNICAST_HOPS; 7507 is_sticky |= IPPF_UNICAST_HOPS; 7508 } 7509 7510 if (!(ignore & IPPF_MULTICAST_HOPS) && 7511 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7512 option_exists |= IPPF_MULTICAST_HOPS; 7513 is_sticky |= IPPF_MULTICAST_HOPS; 7514 } 7515 7516 if (!(ignore & IPPF_TCLASS)) { 7517 if (ipp->ipp_fields & IPPF_TCLASS) { 7518 option_exists |= IPPF_TCLASS; 7519 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7520 option_exists |= IPPF_TCLASS; 7521 is_sticky |= IPPF_TCLASS; 7522 } 7523 } 7524 7525 if (!(ignore & IPPF_NEXTHOP) && 7526 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7527 option_exists |= IPPF_NEXTHOP; 7528 is_sticky |= IPPF_NEXTHOP; 7529 } 7530 7531 no_options: 7532 7533 /* 7534 * If any options carried in the ip6i_t were specified, we 7535 * need to account for the ip6i_t in the data we'll be sending 7536 * down. 7537 */ 7538 if (option_exists & IPPF_HAS_IP6I) 7539 udp_ip_hdr_len += sizeof (ip6i_t); 7540 7541 /* check/fix buffer config, setup pointers into it */ 7542 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7543 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7544 !OK_32PTR(ip6h)) { 7545 /* Try to get everything in a single mblk next time */ 7546 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7547 udp->udp_max_hdr_len = udp_ip_hdr_len; 7548 (void) mi_set_sth_wroff(UDP_RD(q), 7549 udp->udp_max_hdr_len + udp_wroff_extra); 7550 } 7551 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 7552 if (mp2 == NULL) { 7553 *error = ENOMEM; 7554 goto done; 7555 } 7556 mp2->b_wptr = DB_LIM(mp2); 7557 mp2->b_cont = mp1; 7558 mp1 = mp2; 7559 if (DB_TYPE(mp) != M_DATA) 7560 mp->b_cont = mp1; 7561 else 7562 mp = mp1; 7563 7564 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7565 } 7566 mp1->b_rptr = (unsigned char *)ip6h; 7567 ip6i = (ip6i_t *)ip6h; 7568 7569 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7570 if (option_exists & IPPF_HAS_IP6I) { 7571 ip6h = (ip6_t *)&ip6i[1]; 7572 ip6i->ip6i_flags = 0; 7573 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7574 7575 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7576 if (option_exists & IPPF_SCOPE_ID) { 7577 ip6i->ip6i_flags |= IP6I_IFINDEX; 7578 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7579 } else if (option_exists & IPPF_IFINDEX) { 7580 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7581 ASSERT(tipp->ipp_ifindex != 0); 7582 ip6i->ip6i_flags |= IP6I_IFINDEX; 7583 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7584 } 7585 7586 if (option_exists & IPPF_ADDR) { 7587 /* 7588 * Enable per-packet source address verification if 7589 * IPV6_PKTINFO specified the source address. 7590 * ip6_src is set in the transport's _wput function. 7591 */ 7592 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7593 } 7594 7595 if (option_exists & IPPF_DONTFRAG) { 7596 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7597 } 7598 7599 if (option_exists & IPPF_USE_MIN_MTU) { 7600 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7601 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7602 } 7603 7604 if (option_exists & IPPF_NEXTHOP) { 7605 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7606 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7607 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7608 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7609 } 7610 7611 /* 7612 * tell IP this is an ip6i_t private header 7613 */ 7614 ip6i->ip6i_nxt = IPPROTO_RAW; 7615 } 7616 7617 /* Initialize IPv6 header */ 7618 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7619 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7620 7621 /* Set the hoplimit of the outgoing packet. */ 7622 if (option_exists & IPPF_HOPLIMIT) { 7623 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7624 ip6h->ip6_hops = ipp->ipp_hoplimit; 7625 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7626 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7627 ip6h->ip6_hops = udp->udp_multicast_ttl; 7628 if (option_exists & IPPF_MULTICAST_HOPS) 7629 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7630 } else { 7631 ip6h->ip6_hops = udp->udp_ttl; 7632 if (option_exists & IPPF_UNICAST_HOPS) 7633 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7634 } 7635 7636 if (option_exists & IPPF_ADDR) { 7637 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7638 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7639 ip6h->ip6_src = tipp->ipp_addr; 7640 } else { 7641 /* 7642 * The source address was not set using IPV6_PKTINFO. 7643 * First look at the bound source. 7644 * If unspecified fallback to __sin6_src_id. 7645 */ 7646 ip6h->ip6_src = udp->udp_v6src; 7647 if (sin6->__sin6_src_id != 0 && 7648 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7649 ip_srcid_find_id(sin6->__sin6_src_id, 7650 &ip6h->ip6_src, connp->conn_zoneid); 7651 } 7652 } 7653 7654 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7655 cp = (uint8_t *)&ip6h[1]; 7656 7657 /* 7658 * Here's where we have to start stringing together 7659 * any extension headers in the right order: 7660 * Hop-by-hop, destination, routing, and final destination opts. 7661 */ 7662 if (option_exists & IPPF_HOPOPTS) { 7663 /* Hop-by-hop options */ 7664 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7665 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7666 if (hopoptslen == 0) { 7667 hopoptsptr = tipp->ipp_hopopts; 7668 hopoptslen = tipp->ipp_hopoptslen; 7669 is_ancillary = B_TRUE; 7670 } 7671 7672 *nxthdr_ptr = IPPROTO_HOPOPTS; 7673 nxthdr_ptr = &hbh->ip6h_nxt; 7674 7675 bcopy(hopoptsptr, cp, hopoptslen); 7676 cp += hopoptslen; 7677 7678 if (hopoptsptr != NULL && !is_ancillary) { 7679 kmem_free(hopoptsptr, hopoptslen); 7680 hopoptsptr = NULL; 7681 hopoptslen = 0; 7682 } 7683 } 7684 /* 7685 * En-route destination options 7686 * Only do them if there's a routing header as well 7687 */ 7688 if (option_exists & IPPF_RTDSTOPTS) { 7689 ip6_dest_t *dst = (ip6_dest_t *)cp; 7690 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7691 7692 *nxthdr_ptr = IPPROTO_DSTOPTS; 7693 nxthdr_ptr = &dst->ip6d_nxt; 7694 7695 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7696 cp += tipp->ipp_rtdstoptslen; 7697 } 7698 /* 7699 * Routing header next 7700 */ 7701 if (option_exists & IPPF_RTHDR) { 7702 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7703 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7704 7705 *nxthdr_ptr = IPPROTO_ROUTING; 7706 nxthdr_ptr = &rt->ip6r_nxt; 7707 7708 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7709 cp += tipp->ipp_rthdrlen; 7710 } 7711 /* 7712 * Do ultimate destination options 7713 */ 7714 if (option_exists & IPPF_DSTOPTS) { 7715 ip6_dest_t *dest = (ip6_dest_t *)cp; 7716 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7717 7718 *nxthdr_ptr = IPPROTO_DSTOPTS; 7719 nxthdr_ptr = &dest->ip6d_nxt; 7720 7721 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7722 cp += tipp->ipp_dstoptslen; 7723 } 7724 /* 7725 * Now set the last header pointer to the proto passed in 7726 */ 7727 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7728 *nxthdr_ptr = IPPROTO_UDP; 7729 7730 /* Update UDP header */ 7731 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7732 udph->uha_dst_port = sin6->sin6_port; 7733 udph->uha_src_port = udp->udp_port; 7734 7735 /* 7736 * Copy in the destination address 7737 */ 7738 ip6h->ip6_dst = ip6_dst; 7739 7740 ip6h->ip6_vcf = 7741 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7742 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7743 7744 if (option_exists & IPPF_TCLASS) { 7745 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7746 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7747 tipp->ipp_tclass); 7748 } 7749 7750 if (option_exists & IPPF_RTHDR) { 7751 ip6_rthdr_t *rth; 7752 7753 /* 7754 * Perform any processing needed for source routing. 7755 * We know that all extension headers will be in the same mblk 7756 * as the IPv6 header. 7757 */ 7758 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7759 if (rth != NULL && rth->ip6r_segleft != 0) { 7760 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7761 /* 7762 * Drop packet - only support Type 0 routing. 7763 * Notify the application as well. 7764 */ 7765 *error = EPROTO; 7766 goto done; 7767 } 7768 7769 /* 7770 * rth->ip6r_len is twice the number of 7771 * addresses in the header. Thus it must be even. 7772 */ 7773 if (rth->ip6r_len & 0x1) { 7774 *error = EPROTO; 7775 goto done; 7776 } 7777 /* 7778 * Shuffle the routing header and ip6_dst 7779 * addresses, and get the checksum difference 7780 * between the first hop (in ip6_dst) and 7781 * the destination (in the last routing hdr entry). 7782 */ 7783 csum = ip_massage_options_v6(ip6h, rth); 7784 /* 7785 * Verify that the first hop isn't a mapped address. 7786 * Routers along the path need to do this verification 7787 * for subsequent hops. 7788 */ 7789 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7790 *error = EADDRNOTAVAIL; 7791 goto done; 7792 } 7793 7794 cp += (rth->ip6r_len + 1)*8; 7795 } 7796 } 7797 7798 /* count up length of UDP packet */ 7799 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7800 if ((mp2 = mp1->b_cont) != NULL) { 7801 do { 7802 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7803 ip_len += (uint32_t)MBLKL(mp2); 7804 } while ((mp2 = mp2->b_cont) != NULL); 7805 } 7806 7807 /* 7808 * If the size of the packet is greater than the maximum allowed by 7809 * ip, return an error. Passing this down could cause panics because 7810 * the size will have wrapped and be inconsistent with the msg size. 7811 */ 7812 if (ip_len > IP_MAXPACKET) { 7813 *error = EMSGSIZE; 7814 goto done; 7815 } 7816 7817 /* Store the UDP length. Subtract length of extension hdrs */ 7818 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7819 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7820 7821 /* 7822 * We make it easy for IP to include our pseudo header 7823 * by putting our length in uh_checksum, modified (if 7824 * we have a routing header) by the checksum difference 7825 * between the ultimate destination and first hop addresses. 7826 * Note: UDP over IPv6 must always checksum the packet. 7827 */ 7828 csum += udph->uha_length; 7829 csum = (csum & 0xFFFF) + (csum >> 16); 7830 udph->uha_checksum = (uint16_t)csum; 7831 7832 #ifdef _LITTLE_ENDIAN 7833 ip_len = htons(ip_len); 7834 #endif 7835 ip6h->ip6_plen = ip_len; 7836 if (DB_CRED(mp) != NULL) 7837 mblk_setcred(mp1, DB_CRED(mp)); 7838 7839 if (DB_TYPE(mp) != M_DATA) { 7840 ASSERT(mp != mp1); 7841 freeb(mp); 7842 } 7843 7844 /* mp has been consumed and we'll return success */ 7845 ASSERT(*error == 0); 7846 mp = NULL; 7847 7848 /* We're done. Pass the packet to IP */ 7849 BUMP_MIB(&udp_mib, udpHCOutDatagrams); 7850 ip_output_v6(connp, mp1, q, IP_WPUT); 7851 7852 done: 7853 if (hopoptsptr != NULL && !is_ancillary) { 7854 kmem_free(hopoptsptr, hopoptslen); 7855 hopoptsptr = NULL; 7856 } 7857 if (*error != 0) { 7858 ASSERT(mp != NULL); 7859 BUMP_MIB(&udp_mib, udpOutErrors); 7860 } 7861 return (mp); 7862 } 7863 7864 static void 7865 udp_wput_other(queue_t *q, mblk_t *mp) 7866 { 7867 uchar_t *rptr = mp->b_rptr; 7868 struct datab *db; 7869 struct iocblk *iocp; 7870 cred_t *cr; 7871 conn_t *connp = Q_TO_CONN(q); 7872 udp_t *udp = connp->conn_udp; 7873 7874 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7875 "udp_wput_other_start: q %p", q); 7876 7877 db = mp->b_datap; 7878 7879 cr = DB_CREDDEF(mp, connp->conn_cred); 7880 7881 switch (db->db_type) { 7882 case M_PROTO: 7883 case M_PCPROTO: 7884 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7885 freemsg(mp); 7886 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7887 "udp_wput_other_end: q %p (%S)", 7888 q, "protoshort"); 7889 return; 7890 } 7891 switch (((t_primp_t)rptr)->type) { 7892 case T_ADDR_REQ: 7893 udp_addr_req(q, mp); 7894 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7895 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7896 return; 7897 case O_T_BIND_REQ: 7898 case T_BIND_REQ: 7899 udp_bind(q, mp); 7900 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7901 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7902 return; 7903 case T_CONN_REQ: 7904 udp_connect(q, mp); 7905 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7906 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7907 return; 7908 case T_CAPABILITY_REQ: 7909 udp_capability_req(q, mp); 7910 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7911 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7912 return; 7913 case T_INFO_REQ: 7914 udp_info_req(q, mp); 7915 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7916 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7917 return; 7918 case T_UNITDATA_REQ: 7919 /* 7920 * If a T_UNITDATA_REQ gets here, the address must 7921 * be bad. Valid T_UNITDATA_REQs are handled 7922 * in udp_wput. 7923 */ 7924 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7925 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7926 "udp_wput_other_end: q %p (%S)", 7927 q, "unitdatareq"); 7928 return; 7929 case T_UNBIND_REQ: 7930 udp_unbind(q, mp); 7931 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7932 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7933 return; 7934 case T_SVR4_OPTMGMT_REQ: 7935 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7936 /* 7937 * Use upper queue for option processing in 7938 * case the request is not handled at this 7939 * level and needs to be passed down to IP. 7940 */ 7941 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7942 mp, cr, &udp_opt_obj); 7943 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7944 "udp_wput_other_end: q %p (%S)", 7945 q, "optmgmtreq"); 7946 return; 7947 7948 case T_OPTMGMT_REQ: 7949 /* 7950 * Use upper queue for option processing in 7951 * case the request is not handled at this 7952 * level and needs to be passed down to IP. 7953 */ 7954 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7955 mp, cr, &udp_opt_obj); 7956 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7957 "udp_wput_other_end: q %p (%S)", 7958 q, "optmgmtreq"); 7959 return; 7960 7961 case T_DISCON_REQ: 7962 udp_disconnect(q, mp); 7963 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7964 "udp_wput_other_end: q %p (%S)", 7965 q, "disconreq"); 7966 return; 7967 7968 /* The following TPI message is not supported by udp. */ 7969 case O_T_CONN_RES: 7970 case T_CONN_RES: 7971 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7972 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7973 "udp_wput_other_end: q %p (%S)", 7974 q, "connres/disconreq"); 7975 return; 7976 7977 /* The following 3 TPI messages are illegal for udp. */ 7978 case T_DATA_REQ: 7979 case T_EXDATA_REQ: 7980 case T_ORDREL_REQ: 7981 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7982 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7983 "udp_wput_other_end: q %p (%S)", 7984 q, "data/exdata/ordrel"); 7985 return; 7986 default: 7987 break; 7988 } 7989 break; 7990 case M_FLUSH: 7991 if (*rptr & FLUSHW) 7992 flushq(q, FLUSHDATA); 7993 break; 7994 case M_IOCTL: 7995 iocp = (struct iocblk *)mp->b_rptr; 7996 switch (iocp->ioc_cmd) { 7997 case TI_GETPEERNAME: 7998 if (udp->udp_state != TS_DATA_XFER) { 7999 /* 8000 * If a default destination address has not 8001 * been associated with the stream, then we 8002 * don't know the peer's name. 8003 */ 8004 iocp->ioc_error = ENOTCONN; 8005 iocp->ioc_count = 0; 8006 mp->b_datap->db_type = M_IOCACK; 8007 putnext(UDP_RD(q), mp); 8008 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8009 "udp_wput_other_end: q %p (%S)", 8010 q, "getpeername"); 8011 return; 8012 } 8013 /* FALLTHRU */ 8014 case TI_GETMYNAME: { 8015 /* 8016 * For TI_GETPEERNAME and TI_GETMYNAME, we first 8017 * need to copyin the user's strbuf structure. 8018 * Processing will continue in the M_IOCDATA case 8019 * below. 8020 */ 8021 mi_copyin(q, mp, NULL, 8022 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 8023 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8024 "udp_wput_other_end: q %p (%S)", 8025 q, "getmyname"); 8026 return; 8027 } 8028 case ND_SET: 8029 /* nd_getset performs the necessary checking */ 8030 case ND_GET: 8031 if (nd_getset(q, udp_g_nd, mp)) { 8032 putnext(UDP_RD(q), mp); 8033 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8034 "udp_wput_other_end: q %p (%S)", 8035 q, "get"); 8036 return; 8037 } 8038 break; 8039 case _SIOCSOCKFALLBACK: 8040 /* 8041 * Either sockmod is about to be popped and the 8042 * socket would now be treated as a plain stream, 8043 * or a module is about to be pushed so we could 8044 * no longer use read-side synchronous stream. 8045 * Drain any queued data and disable direct sockfs 8046 * interface from now on. 8047 */ 8048 if (!udp->udp_issocket) { 8049 DB_TYPE(mp) = M_IOCNAK; 8050 iocp->ioc_error = EINVAL; 8051 } else { 8052 udp->udp_issocket = B_FALSE; 8053 if (udp->udp_direct_sockfs) { 8054 /* 8055 * Disable read-side synchronous 8056 * stream interface and drain any 8057 * queued data. 8058 */ 8059 udp_rcv_drain(UDP_RD(q), udp, 8060 B_FALSE); 8061 ASSERT(!udp->udp_direct_sockfs); 8062 UDP_STAT(udp_sock_fallback); 8063 } 8064 DB_TYPE(mp) = M_IOCACK; 8065 iocp->ioc_error = 0; 8066 } 8067 iocp->ioc_count = 0; 8068 iocp->ioc_rval = 0; 8069 putnext(UDP_RD(q), mp); 8070 return; 8071 default: 8072 break; 8073 } 8074 break; 8075 case M_IOCDATA: 8076 udp_wput_iocdata(q, mp); 8077 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8078 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 8079 return; 8080 default: 8081 /* Unrecognized messages are passed through without change. */ 8082 break; 8083 } 8084 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 8085 "udp_wput_other_end: q %p (%S)", q, "end"); 8086 ip_output(connp, mp, q, IP_WPUT); 8087 } 8088 8089 /* ARGSUSED */ 8090 static void 8091 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 8092 { 8093 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 8094 udp_exit((conn_t *)arg); 8095 } 8096 8097 /* 8098 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 8099 * messages. 8100 */ 8101 static void 8102 udp_wput_iocdata(queue_t *q, mblk_t *mp) 8103 { 8104 mblk_t *mp1; 8105 STRUCT_HANDLE(strbuf, sb); 8106 uint16_t port; 8107 in6_addr_t v6addr; 8108 ipaddr_t v4addr; 8109 uint32_t flowinfo = 0; 8110 int addrlen; 8111 udp_t *udp = Q_TO_UDP(q); 8112 8113 /* Make sure it is one of ours. */ 8114 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8115 case TI_GETMYNAME: 8116 case TI_GETPEERNAME: 8117 break; 8118 default: 8119 ip_output(udp->udp_connp, mp, q, IP_WPUT); 8120 return; 8121 } 8122 8123 q = WR(UDP_RD(q)); 8124 switch (mi_copy_state(q, mp, &mp1)) { 8125 case -1: 8126 return; 8127 case MI_COPY_CASE(MI_COPY_IN, 1): 8128 break; 8129 case MI_COPY_CASE(MI_COPY_OUT, 1): 8130 /* 8131 * The address has been copied out, so now 8132 * copyout the strbuf. 8133 */ 8134 mi_copyout(q, mp); 8135 return; 8136 case MI_COPY_CASE(MI_COPY_OUT, 2): 8137 /* 8138 * The address and strbuf have been copied out. 8139 * We're done, so just acknowledge the original 8140 * M_IOCTL. 8141 */ 8142 mi_copy_done(q, mp, 0); 8143 return; 8144 default: 8145 /* 8146 * Something strange has happened, so acknowledge 8147 * the original M_IOCTL with an EPROTO error. 8148 */ 8149 mi_copy_done(q, mp, EPROTO); 8150 return; 8151 } 8152 8153 /* 8154 * Now we have the strbuf structure for TI_GETMYNAME 8155 * and TI_GETPEERNAME. Next we copyout the requested 8156 * address and then we'll copyout the strbuf. 8157 */ 8158 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 8159 (void *)mp1->b_rptr); 8160 if (udp->udp_family == AF_INET) 8161 addrlen = sizeof (sin_t); 8162 else 8163 addrlen = sizeof (sin6_t); 8164 8165 if (STRUCT_FGET(sb, maxlen) < addrlen) { 8166 mi_copy_done(q, mp, EINVAL); 8167 return; 8168 } 8169 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8170 case TI_GETMYNAME: 8171 if (udp->udp_family == AF_INET) { 8172 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8173 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 8174 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8175 v4addr = V4_PART_OF_V6(udp->udp_v6src); 8176 } else { 8177 /* 8178 * INADDR_ANY 8179 * udp_v6src is not set, we might be bound to 8180 * broadcast/multicast. Use udp_bound_v6src as 8181 * local address instead (that could 8182 * also still be INADDR_ANY) 8183 */ 8184 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 8185 } 8186 } else { 8187 /* udp->udp_family == AF_INET6 */ 8188 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8189 v6addr = udp->udp_v6src; 8190 } else { 8191 /* 8192 * UNSPECIFIED 8193 * udp_v6src is not set, we might be bound to 8194 * broadcast/multicast. Use udp_bound_v6src as 8195 * local address instead (that could 8196 * also still be UNSPECIFIED) 8197 */ 8198 v6addr = udp->udp_bound_v6src; 8199 } 8200 } 8201 port = udp->udp_port; 8202 break; 8203 case TI_GETPEERNAME: 8204 if (udp->udp_state != TS_DATA_XFER) { 8205 mi_copy_done(q, mp, ENOTCONN); 8206 return; 8207 } 8208 if (udp->udp_family == AF_INET) { 8209 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8210 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8211 } else { 8212 /* udp->udp_family == AF_INET6) */ 8213 v6addr = udp->udp_v6dst; 8214 flowinfo = udp->udp_flowinfo; 8215 } 8216 port = udp->udp_dstport; 8217 break; 8218 default: 8219 mi_copy_done(q, mp, EPROTO); 8220 return; 8221 } 8222 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8223 if (!mp1) 8224 return; 8225 8226 if (udp->udp_family == AF_INET) { 8227 sin_t *sin; 8228 8229 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8230 sin = (sin_t *)mp1->b_rptr; 8231 mp1->b_wptr = (uchar_t *)&sin[1]; 8232 *sin = sin_null; 8233 sin->sin_family = AF_INET; 8234 sin->sin_addr.s_addr = v4addr; 8235 sin->sin_port = port; 8236 } else { 8237 /* udp->udp_family == AF_INET6 */ 8238 sin6_t *sin6; 8239 8240 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8241 sin6 = (sin6_t *)mp1->b_rptr; 8242 mp1->b_wptr = (uchar_t *)&sin6[1]; 8243 *sin6 = sin6_null; 8244 sin6->sin6_family = AF_INET6; 8245 sin6->sin6_flowinfo = flowinfo; 8246 sin6->sin6_addr = v6addr; 8247 sin6->sin6_port = port; 8248 } 8249 /* Copy out the address */ 8250 mi_copyout(q, mp); 8251 } 8252 8253 8254 static int 8255 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8256 udpattrs_t *udpattrs) 8257 { 8258 struct T_unitdata_req *udreqp; 8259 int is_absreq_failure; 8260 cred_t *cr; 8261 conn_t *connp = Q_TO_CONN(q); 8262 8263 ASSERT(((t_primp_t)mp->b_rptr)->type); 8264 8265 cr = DB_CREDDEF(mp, connp->conn_cred); 8266 8267 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8268 8269 /* 8270 * Use upper queue for option processing since the callback 8271 * routines expect to be called in UDP instance instead of IP. 8272 */ 8273 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8274 udreqp->OPT_offset, cr, &udp_opt_obj, 8275 udpattrs, &is_absreq_failure); 8276 8277 if (*errorp != 0) { 8278 /* 8279 * Note: No special action needed in this 8280 * module for "is_absreq_failure" 8281 */ 8282 return (-1); /* failure */ 8283 } 8284 ASSERT(is_absreq_failure == 0); 8285 return (0); /* success */ 8286 } 8287 8288 void 8289 udp_ddi_init(void) 8290 { 8291 int i; 8292 8293 UDP6_MAJ = ddi_name_to_major(UDP6); 8294 8295 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8296 udp_opt_obj.odb_opt_arr_cnt); 8297 8298 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 8299 /* Not a power of two. Round up to nearest power of two */ 8300 for (i = 0; i < 31; i++) { 8301 if (udp_bind_fanout_size < (1 << i)) 8302 break; 8303 } 8304 udp_bind_fanout_size = 1 << i; 8305 } 8306 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 8307 sizeof (udp_fanout_t), KM_SLEEP); 8308 for (i = 0; i < udp_bind_fanout_size; i++) { 8309 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8310 NULL); 8311 } 8312 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 8313 8314 udp_kstat_init(); 8315 8316 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8317 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8318 } 8319 8320 void 8321 udp_ddi_destroy(void) 8322 { 8323 int i; 8324 8325 nd_free(&udp_g_nd); 8326 8327 for (i = 0; i < udp_bind_fanout_size; i++) { 8328 mutex_destroy(&udp_bind_fanout[i].uf_lock); 8329 } 8330 8331 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 8332 sizeof (udp_fanout_t)); 8333 8334 udp_kstat_fini(); 8335 8336 kmem_cache_destroy(udp_cache); 8337 } 8338 8339 static void 8340 udp_kstat_init(void) 8341 { 8342 udp_named_kstat_t template = { 8343 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 8344 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8345 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 8346 { "entrySize", KSTAT_DATA_INT32, 0 }, 8347 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8348 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8349 }; 8350 8351 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 8352 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 8353 8354 if (udp_mibkp == NULL) 8355 return; 8356 8357 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8358 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8359 8360 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 8361 8362 udp_mibkp->ks_update = udp_kstat_update; 8363 8364 kstat_install(udp_mibkp); 8365 8366 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 8367 "net", KSTAT_TYPE_NAMED, 8368 sizeof (udp_statistics) / sizeof (kstat_named_t), 8369 KSTAT_FLAG_VIRTUAL)) != NULL) { 8370 udp_ksp->ks_data = &udp_statistics; 8371 kstat_install(udp_ksp); 8372 } 8373 } 8374 8375 static void 8376 udp_kstat_fini(void) 8377 { 8378 if (udp_ksp != NULL) { 8379 kstat_delete(udp_ksp); 8380 udp_ksp = NULL; 8381 } 8382 if (udp_mibkp != NULL) { 8383 kstat_delete(udp_mibkp); 8384 udp_mibkp = NULL; 8385 } 8386 } 8387 8388 static int 8389 udp_kstat_update(kstat_t *kp, int rw) 8390 { 8391 udp_named_kstat_t *udpkp; 8392 8393 if ((kp == NULL) || (kp->ks_data == NULL)) 8394 return (EIO); 8395 8396 if (rw == KSTAT_WRITE) 8397 return (EACCES); 8398 8399 udpkp = (udp_named_kstat_t *)kp->ks_data; 8400 8401 udpkp->inDatagrams.value.ui64 = udp_mib.udpHCInDatagrams; 8402 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 8403 udpkp->outDatagrams.value.ui64 = udp_mib.udpHCOutDatagrams; 8404 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 8405 8406 return (0); 8407 } 8408 8409 /* ARGSUSED */ 8410 static void 8411 udp_rput(queue_t *q, mblk_t *mp) 8412 { 8413 /* 8414 * We get here whenever we do qreply() from IP, 8415 * i.e as part of handlings ioctls, etc. 8416 */ 8417 putnext(q, mp); 8418 } 8419 8420 /* 8421 * Read-side synchronous stream info entry point, called as a 8422 * result of handling certain STREAMS ioctl operations. 8423 */ 8424 static int 8425 udp_rinfop(queue_t *q, infod_t *dp) 8426 { 8427 mblk_t *mp; 8428 uint_t cmd = dp->d_cmd; 8429 int res = 0; 8430 int error = 0; 8431 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8432 struct stdata *stp = STREAM(q); 8433 8434 mutex_enter(&udp->udp_drain_lock); 8435 /* If shutdown on read has happened, return nothing */ 8436 mutex_enter(&stp->sd_lock); 8437 if (stp->sd_flag & STREOF) { 8438 mutex_exit(&stp->sd_lock); 8439 goto done; 8440 } 8441 mutex_exit(&stp->sd_lock); 8442 8443 if ((mp = udp->udp_rcv_list_head) == NULL) 8444 goto done; 8445 8446 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8447 8448 if (cmd & INFOD_COUNT) { 8449 /* 8450 * Return the number of messages. 8451 */ 8452 dp->d_count += udp->udp_rcv_msgcnt; 8453 res |= INFOD_COUNT; 8454 } 8455 if (cmd & INFOD_BYTES) { 8456 /* 8457 * Return size of all data messages. 8458 */ 8459 dp->d_bytes += udp->udp_rcv_cnt; 8460 res |= INFOD_BYTES; 8461 } 8462 if (cmd & INFOD_FIRSTBYTES) { 8463 /* 8464 * Return size of first data message. 8465 */ 8466 dp->d_bytes = msgdsize(mp); 8467 res |= INFOD_FIRSTBYTES; 8468 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8469 } 8470 if (cmd & INFOD_COPYOUT) { 8471 mblk_t *mp1 = mp->b_cont; 8472 int n; 8473 /* 8474 * Return data contents of first message. 8475 */ 8476 ASSERT(DB_TYPE(mp1) == M_DATA); 8477 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8478 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8479 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8480 UIO_READ, dp->d_uiop)) != 0) { 8481 goto done; 8482 } 8483 mp1 = mp1->b_cont; 8484 } 8485 res |= INFOD_COPYOUT; 8486 dp->d_cmd &= ~INFOD_COPYOUT; 8487 } 8488 done: 8489 mutex_exit(&udp->udp_drain_lock); 8490 8491 dp->d_res |= res; 8492 8493 return (error); 8494 } 8495 8496 /* 8497 * Read-side synchronous stream entry point. This is called as a result 8498 * of recv/read operation done at sockfs, and is guaranteed to execute 8499 * outside of the interrupt thread context. It returns a single datagram 8500 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8501 */ 8502 static int 8503 udp_rrw(queue_t *q, struiod_t *dp) 8504 { 8505 mblk_t *mp; 8506 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8507 8508 /* We should never get here when we're in SNMP mode */ 8509 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8510 8511 /* 8512 * Dequeue datagram from the head of the list and return 8513 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8514 * set/cleared depending on whether or not there's data 8515 * remaining in the list. 8516 */ 8517 mutex_enter(&udp->udp_drain_lock); 8518 if (!udp->udp_direct_sockfs) { 8519 mutex_exit(&udp->udp_drain_lock); 8520 UDP_STAT(udp_rrw_busy); 8521 return (EBUSY); 8522 } 8523 if ((mp = udp->udp_rcv_list_head) != NULL) { 8524 uint_t size = msgdsize(mp); 8525 8526 /* Last datagram in the list? */ 8527 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8528 udp->udp_rcv_list_tail = NULL; 8529 mp->b_next = NULL; 8530 8531 udp->udp_rcv_cnt -= size; 8532 udp->udp_rcv_msgcnt--; 8533 UDP_STAT(udp_rrw_msgcnt); 8534 8535 /* No longer flow-controlling? */ 8536 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8537 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8538 udp->udp_drain_qfull = B_FALSE; 8539 } 8540 if (udp->udp_rcv_list_head == NULL) { 8541 /* 8542 * Either we just dequeued the last datagram or 8543 * we get here from sockfs and have nothing to 8544 * return; in this case clear RSLEEP. 8545 */ 8546 ASSERT(udp->udp_rcv_cnt == 0); 8547 ASSERT(udp->udp_rcv_msgcnt == 0); 8548 ASSERT(udp->udp_rcv_list_tail == NULL); 8549 STR_WAKEUP_CLEAR(STREAM(q)); 8550 } else { 8551 /* 8552 * More data follows; we need udp_rrw() to be 8553 * called in future to pick up the rest. 8554 */ 8555 STR_WAKEUP_SET(STREAM(q)); 8556 } 8557 mutex_exit(&udp->udp_drain_lock); 8558 dp->d_mp = mp; 8559 return (0); 8560 } 8561 8562 /* 8563 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8564 * list; this is typically executed within the interrupt thread context 8565 * and so we do things as quickly as possible. 8566 */ 8567 static void 8568 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8569 { 8570 ASSERT(q == RD(q)); 8571 ASSERT(pkt_len == msgdsize(mp)); 8572 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8573 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8574 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8575 8576 mutex_enter(&udp->udp_drain_lock); 8577 /* 8578 * Wake up and signal the receiving app; it is okay to do this 8579 * before enqueueing the mp because we are holding the drain lock. 8580 * One of the advantages of synchronous stream is the ability for 8581 * us to find out when the application performs a read on the 8582 * socket by way of udp_rrw() entry point being called. We need 8583 * to generate SIGPOLL/SIGIO for each received data in the case 8584 * of asynchronous socket just as in the strrput() case. However, 8585 * we only wake the application up when necessary, i.e. during the 8586 * first enqueue. When udp_rrw() is called, we send up a single 8587 * datagram upstream and call STR_WAKEUP_SET() again when there 8588 * are still data remaining in our receive queue. 8589 */ 8590 if (udp->udp_rcv_list_head == NULL) { 8591 STR_WAKEUP_SET(STREAM(q)); 8592 udp->udp_rcv_list_head = mp; 8593 } else { 8594 udp->udp_rcv_list_tail->b_next = mp; 8595 } 8596 udp->udp_rcv_list_tail = mp; 8597 udp->udp_rcv_cnt += pkt_len; 8598 udp->udp_rcv_msgcnt++; 8599 8600 /* Need to flow-control? */ 8601 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8602 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8603 udp->udp_drain_qfull = B_TRUE; 8604 8605 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8606 STR_SENDSIG(STREAM(q)); 8607 mutex_exit(&udp->udp_drain_lock); 8608 } 8609 8610 /* 8611 * Drain the contents of receive list to the module upstream; we do 8612 * this during close or when we fallback to the slow mode due to 8613 * sockmod being popped or a module being pushed on top of us. 8614 */ 8615 static void 8616 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8617 { 8618 mblk_t *mp; 8619 8620 ASSERT(q == RD(q)); 8621 8622 mutex_enter(&udp->udp_drain_lock); 8623 /* 8624 * There is no race with a concurrent udp_input() sending 8625 * up packets using putnext() after we have cleared the 8626 * udp_direct_sockfs flag but before we have completed 8627 * sending up the packets in udp_rcv_list, since we are 8628 * either a writer or we have quiesced the conn. 8629 */ 8630 udp->udp_direct_sockfs = B_FALSE; 8631 mutex_exit(&udp->udp_drain_lock); 8632 8633 if (udp->udp_rcv_list_head != NULL) 8634 UDP_STAT(udp_drain); 8635 8636 /* 8637 * Send up everything via putnext(); note here that we 8638 * don't need the udp_drain_lock to protect us since 8639 * nothing can enter udp_rrw() and that we currently 8640 * have exclusive access to this udp. 8641 */ 8642 while ((mp = udp->udp_rcv_list_head) != NULL) { 8643 udp->udp_rcv_list_head = mp->b_next; 8644 mp->b_next = NULL; 8645 udp->udp_rcv_cnt -= msgdsize(mp); 8646 udp->udp_rcv_msgcnt--; 8647 if (closing) { 8648 freemsg(mp); 8649 } else { 8650 putnext(q, mp); 8651 } 8652 } 8653 ASSERT(udp->udp_rcv_cnt == 0); 8654 ASSERT(udp->udp_rcv_msgcnt == 0); 8655 ASSERT(udp->udp_rcv_list_head == NULL); 8656 udp->udp_rcv_list_tail = NULL; 8657 udp->udp_drain_qfull = B_FALSE; 8658 } 8659 8660 static size_t 8661 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8662 { 8663 /* We add a bit of extra buffering */ 8664 size += size >> 1; 8665 if (size > udp_max_buf) 8666 size = udp_max_buf; 8667 8668 udp->udp_rcv_hiwat = size; 8669 return (size); 8670 } 8671 8672 /* 8673 * Little helper for IPsec's NAT-T processing. 8674 */ 8675 boolean_t 8676 udp_compute_checksum(void) 8677 { 8678 return (udp_do_checksum); 8679 } 8680