1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 #include <net/if.h> 64 #include <net/route.h> 65 66 #include <inet/common.h> 67 #include <inet/ip.h> 68 #include <inet/ip_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/mi.h> 74 #include <inet/mib2.h> 75 #include <inet/nd.h> 76 #include <inet/optcom.h> 77 #include <inet/snmpcom.h> 78 #include <inet/kstatcom.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipclassifier.h> 81 #include <inet/ipsec_impl.h> 82 #include <inet/ipp_common.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP uses a combination of its internal perimeter, a global lock and 100 * a set of bind hash locks to protect its data structures. Please see 101 * the note above udp_mode_assertions for details about the internal 102 * perimeter. 103 * 104 * When a UDP endpoint is bound to a local port, it is inserted into 105 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 106 * The size of the array is controlled by the udp_bind_fanout_size variable. 107 * This variable can be changed in /etc/system if the default value is 108 * not large enough. Each bind hash bucket is protected by a per bucket 109 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 110 * structure. An UDP endpoint is removed from the bind hash list only 111 * when it is being unbound or being closed. The per bucket lock also 112 * protects a UDP endpoint's state changes. 113 * 114 * Plumbing notes: 115 * 116 * Both udp and ip are merged, but the streams plumbing is kept unchanged 117 * in that udp is always pushed atop /dev/ip. This is done to preserve 118 * backwards compatibility for certain applications which rely on such 119 * plumbing geometry to do things such as issuing I_POP on the stream 120 * in order to obtain direct access to /dev/ip, etc. 121 * 122 * All UDP processings happen in the /dev/ip instance; the udp module 123 * instance does not possess any state about the endpoint, and merely 124 * acts as a dummy module whose presence is to keep the streams plumbing 125 * appearance unchanged. At open time /dev/ip allocates a conn_t that 126 * happens to embed a udp_t. This stays dormant until the time udp is 127 * pushed, which indicates to /dev/ip that it must convert itself from 128 * an IP to a UDP endpoint. 129 * 130 * We only allow for the following plumbing cases: 131 * 132 * Normal: 133 * /dev/ip is first opened and later udp is pushed directly on top. 134 * This is the default action that happens when a udp socket or 135 * /dev/udp is opened. The conn_t created by /dev/ip instance is 136 * now shared and is marked with IPCL_UDP. 137 * 138 * SNMP-only: 139 * udp is pushed on top of a module other than /dev/ip. When this 140 * happens it will support only SNMP semantics. A new conn_t is 141 * allocated and marked with IPCL_UDPMOD. 142 * 143 * The above cases imply that we don't support any intermediate module to 144 * reside in between /dev/ip and udp -- in fact, we never supported such 145 * scenario in the past as the inter-layer communication semantics have 146 * always been private. Also note that the normal case allows for SNMP 147 * requests to be processed in addition to the rest of UDP operations. 148 * 149 * The normal case plumbing is depicted by the following diagram: 150 * 151 * +---------------+---------------+ 152 * | | | udp 153 * | udp_wq | udp_rq | 154 * | | UDP_RD | 155 * | | | 156 * +---------------+---------------+ 157 * | ^ 158 * v | 159 * +---------------+---------------+ 160 * | | | /dev/ip 161 * | ip_wq | ip_rq | conn_t 162 * | UDP_WR | | 163 * | | | 164 * +---------------+---------------+ 165 * 166 * Messages arriving at udp_wq from above will end up in ip_wq before 167 * it gets processed, i.e. udp write entry points will advance udp_wq 168 * and use its q_next value as ip_wq in order to use the conn_t that 169 * is stored in its q_ptr. Likewise, messages generated by ip to the 170 * module above udp will appear as if they are originated from udp_rq, 171 * i.e. putnext() calls to the module above udp is done using the 172 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 173 * nothing more than calling putnext(). 174 * 175 * The above implies the following rule of thumb: 176 * 177 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 178 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 179 * is no direct reference to conn_t from either udp_wq or udp_rq. 180 * 181 * 2. Write-side entry points of udp can obtain the conn_t via the 182 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 183 * 184 * 3. While in /dev/ip context, putnext() to the module above udp can 185 * be done by supplying the queue value obtained from UDP_RD(). 186 * 187 */ 188 189 static queue_t *UDP_WR(queue_t *); 190 static queue_t *UDP_RD(queue_t *); 191 192 udp_stat_t udp_statistics = { 193 { "udp_ip_send", KSTAT_DATA_UINT64 }, 194 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 195 { "udp_ire_null", KSTAT_DATA_UINT64 }, 196 { "udp_drain", KSTAT_DATA_UINT64 }, 197 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 198 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 199 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 200 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 201 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 202 { "udp_out_opt", KSTAT_DATA_UINT64 }, 203 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 204 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 205 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 206 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 217 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 218 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 219 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 220 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 221 #ifdef DEBUG 222 { "udp_data_conn", KSTAT_DATA_UINT64 }, 223 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 224 #endif 225 }; 226 227 static kstat_t *udp_ksp; 228 struct kmem_cache *udp_cache; 229 230 /* 231 * Bind hash list size and hash function. It has to be a power of 2 for 232 * hashing. 233 */ 234 #define UDP_BIND_FANOUT_SIZE 512 235 #define UDP_BIND_HASH(lport) \ 236 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 237 238 /* UDP bind fanout hash structure. */ 239 typedef struct udp_fanout_s { 240 udp_t *uf_udp; 241 kmutex_t uf_lock; 242 #if defined(_LP64) || defined(_I32LPx) 243 char uf_pad[48]; 244 #else 245 char uf_pad[56]; 246 #endif 247 } udp_fanout_t; 248 249 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 250 /* udp_fanout_t *udp_bind_fanout. */ 251 static udp_fanout_t *udp_bind_fanout; 252 253 /* 254 * This controls the rate some ndd info report functions can be used 255 * by non-privileged users. It stores the last time such info is 256 * requested. When those report functions are called again, this 257 * is checked with the current time and compare with the ndd param 258 * udp_ndd_get_info_interval. 259 */ 260 static clock_t udp_last_ndd_get_info_time; 261 #define NDD_TOO_QUICK_MSG \ 262 "ndd get info rate too high for non-privileged users, try again " \ 263 "later.\n" 264 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 265 266 /* Option processing attrs */ 267 typedef struct udpattrs_s { 268 ip6_pkt_t *udpattr_ipp; 269 mblk_t *udpattr_mb; 270 boolean_t udpattr_credset; 271 } udpattrs_t; 272 273 static void udp_addr_req(queue_t *q, mblk_t *mp); 274 static void udp_bind(queue_t *q, mblk_t *mp); 275 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 276 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 277 static int udp_build_hdrs(queue_t *q, udp_t *udp); 278 static void udp_capability_req(queue_t *q, mblk_t *mp); 279 static int udp_close(queue_t *q); 280 static void udp_connect(queue_t *q, mblk_t *mp); 281 static void udp_disconnect(queue_t *q, mblk_t *mp); 282 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 283 int sys_error); 284 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 285 t_scalar_t tlierr, int unixerr); 286 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 287 cred_t *cr); 288 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 289 char *value, caddr_t cp, cred_t *cr); 290 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 291 char *value, caddr_t cp, cred_t *cr); 292 static void udp_icmp_error(queue_t *q, mblk_t *mp); 293 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 294 static void udp_info_req(queue_t *q, mblk_t *mp); 295 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 296 t_scalar_t addr_length); 297 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 298 cred_t *credp); 299 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 300 int *errorp, udpattrs_t *udpattrs); 301 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 302 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 303 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 304 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 305 cred_t *cr); 306 static void udp_report_item(mblk_t *mp, udp_t *udp); 307 static void udp_rput(queue_t *q, mblk_t *mp); 308 static void udp_rput_other(queue_t *, mblk_t *); 309 static int udp_rinfop(queue_t *q, infod_t *dp); 310 static int udp_rrw(queue_t *q, struiod_t *dp); 311 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 312 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 313 cred_t *cr); 314 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 315 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 316 t_scalar_t destlen, t_scalar_t err); 317 static void udp_unbind(queue_t *q, mblk_t *mp); 318 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 319 boolean_t random); 320 static void udp_wput(queue_t *q, mblk_t *mp); 321 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 322 uint16_t port, uint_t srcid, int *error); 323 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 324 int *error); 325 static void udp_wput_other(queue_t *q, mblk_t *mp); 326 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 327 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 328 socklen_t addrlen); 329 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 330 331 static void udp_kstat_init(void); 332 static void udp_kstat_fini(void); 333 static int udp_kstat_update(kstat_t *kp, int rw); 334 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 335 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 336 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 337 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 338 339 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 340 uint_t pkt_len); 341 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 342 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 343 static void udp_exit(conn_t *); 344 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 345 #ifdef DEBUG 346 static void udp_mode_assertions(udp_t *, int); 347 #endif /* DEBUG */ 348 349 major_t UDP6_MAJ; 350 #define UDP6 "udp6" 351 352 #define UDP_RECV_HIWATER (56 * 1024) 353 #define UDP_RECV_LOWATER 128 354 #define UDP_XMIT_HIWATER (56 * 1024) 355 #define UDP_XMIT_LOWATER 1024 356 357 static struct module_info udp_info = { 358 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 359 }; 360 361 static struct qinit udp_rinit = { 362 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 363 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 364 }; 365 366 static struct qinit udp_winit = { 367 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 368 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 369 }; 370 371 static struct qinit winit = { 372 (pfi_t)putnext, NULL, NULL, NULL, NULL, 373 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 374 }; 375 376 /* Support for just SNMP if UDP is not pushed directly over device IP */ 377 struct qinit udp_snmp_rinit = { 378 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 379 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 380 }; 381 382 struct qinit udp_snmp_winit = { 383 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 384 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 385 }; 386 387 struct streamtab udpinfo = { 388 &udp_rinit, &winit 389 }; 390 391 static sin_t sin_null; /* Zero address for quick clears */ 392 static sin6_t sin6_null; /* Zero address for quick clears */ 393 394 /* Hint not protected by any lock */ 395 static in_port_t udp_g_next_port_to_try; 396 397 /* 398 * Extra privileged ports. In host byte order. 399 */ 400 #define UDP_NUM_EPRIV_PORTS 64 401 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 402 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 403 404 /* Only modified during _init and _fini thus no locking is needed. */ 405 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 406 407 /* MIB-2 stuff for SNMP */ 408 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 409 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 410 411 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 412 413 /* Default structure copied into T_INFO_ACK messages */ 414 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 415 T_INFO_ACK, 416 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 417 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 418 T_INVALID, /* CDATA_size. udp does not support connect data. */ 419 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 420 sizeof (sin_t), /* ADDR_size. */ 421 0, /* OPT_size - not initialized here */ 422 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 423 T_CLTS, /* SERV_type. udp supports connection-less. */ 424 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 425 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 426 }; 427 428 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 429 430 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 431 T_INFO_ACK, 432 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 433 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 434 T_INVALID, /* CDATA_size. udp does not support connect data. */ 435 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 436 sizeof (sin6_t), /* ADDR_size. */ 437 0, /* OPT_size - not initialized here */ 438 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 439 T_CLTS, /* SERV_type. udp supports connection-less. */ 440 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 441 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 442 }; 443 444 /* largest UDP port number */ 445 #define UDP_MAX_PORT 65535 446 447 /* 448 * Table of ND variables supported by udp. These are loaded into udp_g_nd 449 * in udp_open. 450 * All of these are alterable, within the min/max values given, at run time. 451 */ 452 /* BEGIN CSTYLED */ 453 udpparam_t udp_param_arr[] = { 454 /*min max value name */ 455 { 0L, 256, 32, "udp_wroff_extra" }, 456 { 1L, 255, 255, "udp_ipv4_ttl" }, 457 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 458 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 459 { 0, 1, 1, "udp_do_checksum" }, 460 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 461 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 462 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 463 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 464 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 465 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 466 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 467 }; 468 /* END CSTYLED */ 469 470 /* 471 * The smallest anonymous port in the privileged port range which UDP 472 * looks for free port. Use in the option UDP_ANONPRIVBIND. 473 */ 474 static in_port_t udp_min_anonpriv_port = 512; 475 476 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 477 uint32_t udp_random_anon_port = 1; 478 479 /* 480 * Hook functions to enable cluster networking. 481 * On non-clustered systems these vectors must always be NULL 482 */ 483 484 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 485 uint8_t *laddrp, in_port_t lport) = NULL; 486 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 487 uint8_t *laddrp, in_port_t lport) = NULL; 488 489 typedef union T_primitives *t_primp_t; 490 491 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 492 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 493 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 494 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 495 (mp)->b_prev = (mblk_t *)proc; \ 496 if ((udp)->udp_mphead == NULL) \ 497 (udp)->udp_mphead = (mp); \ 498 else \ 499 (udp)->udp_mptail->b_next = (mp); \ 500 (udp)->udp_mptail = (mp); \ 501 (udp)->udp_mpcount++; \ 502 } 503 504 #define UDP_READERS_INCREF(udp) { \ 505 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 506 (udp)->udp_reader_count++; \ 507 } 508 509 #define UDP_READERS_DECREF(udp) { \ 510 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 511 (udp)->udp_reader_count--; \ 512 if ((udp)->udp_reader_count == 0) \ 513 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 514 } 515 516 #define UDP_SQUEUE_DECREF(udp) { \ 517 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 518 (udp)->udp_squeue_count--; \ 519 if ((udp)->udp_squeue_count == 0) \ 520 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 521 } 522 523 /* 524 * Notes on UDP endpoint synchronization: 525 * 526 * UDP needs exclusive operation on a per endpoint basis, when executing 527 * functions that modify the endpoint state. udp_rput_other() deals with 528 * packets with IP options, and processing these packets end up having 529 * to update the endpoint's option related state. udp_wput_other() deals 530 * with control operations from the top, e.g. connect() that needs to 531 * update the endpoint state. These could be synchronized using locks, 532 * but the current version uses squeues for this purpose. squeues may 533 * give performance improvement for certain cases such as connected UDP 534 * sockets; thus the framework allows for using squeues. 535 * 536 * The perimeter routines are described as follows: 537 * 538 * udp_enter(): 539 * Enter the UDP endpoint perimeter. 540 * 541 * udp_become_writer(): 542 * Become exclusive on the UDP endpoint. Specifies a function 543 * that will be called exclusively either immediately or later 544 * when the perimeter is available exclusively. 545 * 546 * udp_exit(): 547 * Exit the UDP perimeter. 548 * 549 * Entering UDP from the top or from the bottom must be done using 550 * udp_enter(). No lock must be held while attempting to enter the UDP 551 * perimeter. When finished, udp_exit() must be called to get out of 552 * the perimeter. 553 * 554 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 555 * multiple threads may enter a UDP endpoint concurrently. This is used 556 * for sending and/or receiving normal data. Control operations and other 557 * special cases call udp_become_writer() to become exclusive on a per 558 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 559 * by definition serializes access to the conn_t. When there are no more 560 * pending messages on the squeue for the UDP connection, the endpoint 561 * reverts to MT_HOT mode. During the interregnum when not all MT threads 562 * of an endpoint have finished, messages are queued in the UDP endpoint 563 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 564 * 565 * These modes have the following analogs: 566 * 567 * UDP_MT_HOT/udp_reader_count==0 none 568 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 569 * UDP_MT_QUEUED RW_WRITE_WANTED 570 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 571 * 572 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 573 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 574 * 575 * While in stable modes, UDP keeps track of the number of threads 576 * operating on the endpoint. The udp_reader_count variable represents 577 * the number of threads entering the endpoint as readers while it is 578 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 579 * is only a single reader, i.e. when this counter drops to 1. Likewise, 580 * udp_squeue_count represents the number of threads operating on the 581 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 582 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 583 * when this counter drops to 0. 584 * 585 * The default mode is set to UDP_MT_HOT and UDP alternates between 586 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 587 * 588 * Mode transition: 589 * ---------------------------------------------------------------- 590 * old mode Event New mode 591 * ---------------------------------------------------------------- 592 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 593 * and udp_reader_count == 1 594 * 595 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 596 * and udp_reader_count > 1 597 * 598 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 599 * 600 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 601 * internal UDP queue successfully 602 * moved to squeue AND udp_squeue_count != 0 603 * 604 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 605 * internal UDP queue successfully 606 * moved to squeue AND udp_squeue_count 607 * drops to zero 608 * 609 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 610 * ---------------------------------------------------------------- 611 */ 612 613 static queue_t * 614 UDP_WR(queue_t *q) 615 { 616 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 617 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 618 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 619 620 return (_WR(q)->q_next); 621 } 622 623 static queue_t * 624 UDP_RD(queue_t *q) 625 { 626 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 627 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 628 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 629 630 return (_RD(q)->q_next); 631 } 632 633 #ifdef DEBUG 634 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 635 #else 636 #define UDP_MODE_ASSERTIONS(udp, caller) 637 #endif 638 639 /* Invariants */ 640 #ifdef DEBUG 641 642 uint32_t udp_count[4]; 643 644 /* Context of udp_mode_assertions */ 645 #define UDP_ENTER 1 646 #define UDP_BECOME_WRITER 2 647 #define UDP_EXIT 3 648 649 static void 650 udp_mode_assertions(udp_t *udp, int caller) 651 { 652 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 653 654 switch (udp->udp_mode) { 655 case UDP_MT_HOT: 656 /* 657 * Messages have not yet been enqueued on the internal queue, 658 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 659 * by definition, there can't be any messages enqueued on the 660 * squeue. The UDP could be quiescent, so udp_reader_count 661 * could be zero at entry. 662 */ 663 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 664 udp->udp_squeue_count == 0); 665 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 666 udp_count[0]++; 667 break; 668 669 case UDP_MT_QUEUED: 670 /* 671 * The last MT thread to exit the udp perimeter empties the 672 * internal queue and then switches the UDP to 673 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 674 * mode, it means there must be at least 1 MT thread still in 675 * the perimeter and at least 1 message on the internal queue. 676 */ 677 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 678 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 679 udp_count[1]++; 680 break; 681 682 case UDP_QUEUED_SQUEUE: 683 /* 684 * The switch has happened from MT to SQUEUE. So there can't 685 * any MT threads. Messages could still pile up on the internal 686 * queue until the transition is complete and we move to 687 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 688 * since the squeue could drain any time. 689 */ 690 ASSERT(udp->udp_reader_count == 0); 691 udp_count[2]++; 692 break; 693 694 case UDP_SQUEUE: 695 /* 696 * The transition is complete. Thre can't be any messages on 697 * the internal queue. The udp could be quiescent or the squeue 698 * could drain any time, so we can't assert on nonzero 699 * udp_squeue_count during entry. Nor can we assert that 700 * udp_reader_count is zero, since, a reader thread could have 701 * directly become writer in line by calling udp_become_writer 702 * without going through the queued states. 703 */ 704 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 705 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 706 udp_count[3]++; 707 break; 708 } 709 } 710 #endif 711 712 #define _UDP_ENTER(connp, mp, proc, tag) { \ 713 udp_t *_udp = (connp)->conn_udp; \ 714 \ 715 mutex_enter(&(connp)->conn_lock); \ 716 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 717 mutex_exit(&(connp)->conn_lock); \ 718 freemsg(mp); \ 719 } else { \ 720 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 721 \ 722 switch (_udp->udp_mode) { \ 723 case UDP_MT_HOT: \ 724 /* We can execute as reader right away. */ \ 725 UDP_READERS_INCREF(_udp); \ 726 mutex_exit(&(connp)->conn_lock); \ 727 (*(proc))(connp, mp, (connp)->conn_sqp); \ 728 break; \ 729 \ 730 case UDP_SQUEUE: \ 731 /* \ 732 * We are in squeue mode, send the \ 733 * packet to the squeue \ 734 */ \ 735 _udp->udp_squeue_count++; \ 736 CONN_INC_REF_LOCKED(connp); \ 737 mutex_exit(&(connp)->conn_lock); \ 738 squeue_enter((connp)->conn_sqp, mp, proc, \ 739 connp, tag); \ 740 break; \ 741 \ 742 case UDP_MT_QUEUED: \ 743 case UDP_QUEUED_SQUEUE: \ 744 /* \ 745 * Some messages may have been enqueued \ 746 * ahead of us. Enqueue the new message \ 747 * at the tail of the internal queue to \ 748 * preserve message ordering. \ 749 */ \ 750 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 751 mutex_exit(&(connp)->conn_lock); \ 752 break; \ 753 } \ 754 } \ 755 } 756 757 static void 758 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 759 { 760 _UDP_ENTER(connp, mp, proc, tag); 761 } 762 763 static void 764 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 765 { 766 udp_t *udp; 767 768 udp = connp->conn_udp; 769 770 mutex_enter(&connp->conn_lock); 771 772 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 773 774 switch (udp->udp_mode) { 775 case UDP_MT_HOT: 776 if (udp->udp_reader_count == 1) { 777 /* 778 * We are the only MT thread. Switch to squeue mode 779 * immediately. 780 */ 781 udp->udp_mode = UDP_SQUEUE; 782 udp->udp_squeue_count = 1; 783 CONN_INC_REF_LOCKED(connp); 784 mutex_exit(&connp->conn_lock); 785 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 786 return; 787 } 788 /* FALLTHRU */ 789 790 case UDP_MT_QUEUED: 791 /* Enqueue the packet internally in UDP */ 792 udp->udp_mode = UDP_MT_QUEUED; 793 UDP_ENQUEUE_MP(udp, mp, proc, tag); 794 mutex_exit(&connp->conn_lock); 795 return; 796 797 case UDP_SQUEUE: 798 case UDP_QUEUED_SQUEUE: 799 /* 800 * We are already exclusive. i.e. we are already 801 * writer. Simply call the desired function. 802 */ 803 udp->udp_squeue_count++; 804 mutex_exit(&connp->conn_lock); 805 (*proc)(connp, mp, connp->conn_sqp); 806 return; 807 } 808 } 809 810 /* 811 * Transition from MT mode to SQUEUE mode, when the last MT thread 812 * is exiting the UDP perimeter. Move all messages from the internal 813 * udp queue to the squeue. A better way would be to move all the 814 * messages in one shot, this needs more support from the squeue framework 815 */ 816 static void 817 udp_switch_to_squeue(udp_t *udp) 818 { 819 mblk_t *mp; 820 mblk_t *mp_next; 821 sqproc_t proc; 822 uint8_t tag; 823 conn_t *connp = udp->udp_connp; 824 825 ASSERT(MUTEX_HELD(&connp->conn_lock)); 826 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 827 while (udp->udp_mphead != NULL) { 828 mp = udp->udp_mphead; 829 udp->udp_mphead = NULL; 830 udp->udp_mptail = NULL; 831 udp->udp_mpcount = 0; 832 udp->udp_mode = UDP_QUEUED_SQUEUE; 833 mutex_exit(&connp->conn_lock); 834 /* 835 * It is best not to hold any locks across the calls 836 * to squeue functions. Since we drop the lock we 837 * need to go back and check the udp_mphead once again 838 * after the squeue_fill and hence the while loop at 839 * the top of this function 840 */ 841 for (; mp != NULL; mp = mp_next) { 842 mp_next = mp->b_next; 843 proc = (sqproc_t)mp->b_prev; 844 tag = (uint8_t)((uintptr_t)mp->b_queue); 845 mp->b_next = NULL; 846 mp->b_prev = NULL; 847 mp->b_queue = NULL; 848 CONN_INC_REF(connp); 849 udp->udp_squeue_count++; 850 squeue_fill(connp->conn_sqp, mp, proc, connp, 851 tag); 852 } 853 mutex_enter(&connp->conn_lock); 854 } 855 /* 856 * udp_squeue_count of zero implies that the squeue has drained 857 * even before we arrived here (i.e. after the squeue_fill above) 858 */ 859 udp->udp_mode = (udp->udp_squeue_count != 0) ? 860 UDP_SQUEUE : UDP_MT_HOT; 861 } 862 863 #define _UDP_EXIT(connp) { \ 864 udp_t *_udp = (connp)->conn_udp; \ 865 \ 866 mutex_enter(&(connp)->conn_lock); \ 867 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 868 \ 869 switch (_udp->udp_mode) { \ 870 case UDP_MT_HOT: \ 871 UDP_READERS_DECREF(_udp); \ 872 mutex_exit(&(connp)->conn_lock); \ 873 break; \ 874 \ 875 case UDP_SQUEUE: \ 876 UDP_SQUEUE_DECREF(_udp); \ 877 if (_udp->udp_squeue_count == 0) \ 878 _udp->udp_mode = UDP_MT_HOT; \ 879 mutex_exit(&(connp)->conn_lock); \ 880 break; \ 881 \ 882 case UDP_MT_QUEUED: \ 883 /* \ 884 * If this is the last MT thread, we need to \ 885 * switch to squeue mode \ 886 */ \ 887 UDP_READERS_DECREF(_udp); \ 888 if (_udp->udp_reader_count == 0) \ 889 udp_switch_to_squeue(_udp); \ 890 mutex_exit(&(connp)->conn_lock); \ 891 break; \ 892 \ 893 case UDP_QUEUED_SQUEUE: \ 894 UDP_SQUEUE_DECREF(_udp); \ 895 /* \ 896 * Even if the udp_squeue_count drops to zero, we \ 897 * don't want to change udp_mode to UDP_MT_HOT here. \ 898 * The thread in udp_switch_to_squeue will take care \ 899 * of the transition to UDP_MT_HOT, after emptying \ 900 * any more new messages that have been enqueued in \ 901 * udp_mphead. \ 902 */ \ 903 mutex_exit(&(connp)->conn_lock); \ 904 break; \ 905 } \ 906 } 907 908 static void 909 udp_exit(conn_t *connp) 910 { 911 _UDP_EXIT(connp); 912 } 913 914 /* 915 * Return the next anonymous port in the privileged port range for 916 * bind checking. 917 * 918 * Trusted Extension (TX) notes: TX allows administrator to mark or 919 * reserve ports as Multilevel ports (MLP). MLP has special function 920 * on TX systems. Once a port is made MLP, it's not available as 921 * ordinary port. This creates "holes" in the port name space. It 922 * may be necessary to skip the "holes" find a suitable anon port. 923 */ 924 static in_port_t 925 udp_get_next_priv_port(udp_t *udp) 926 { 927 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 928 in_port_t nextport; 929 boolean_t restart = B_FALSE; 930 931 retry: 932 if (next_priv_port < udp_min_anonpriv_port || 933 next_priv_port >= IPPORT_RESERVED) { 934 next_priv_port = IPPORT_RESERVED - 1; 935 if (restart) 936 return (0); 937 restart = B_TRUE; 938 } 939 940 if (is_system_labeled() && 941 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 942 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 943 next_priv_port = nextport; 944 goto retry; 945 } 946 947 return (next_priv_port--); 948 } 949 950 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 951 /* ARGSUSED */ 952 static int 953 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 954 { 955 udp_fanout_t *udpf; 956 int i; 957 zoneid_t zoneid; 958 conn_t *connp; 959 udp_t *udp; 960 961 connp = Q_TO_CONN(q); 962 udp = connp->conn_udp; 963 964 /* Refer to comments in udp_status_report(). */ 965 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 966 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 967 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 968 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 969 return (0); 970 } 971 } 972 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 973 /* The following may work even if we cannot get a large buf. */ 974 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 975 return (0); 976 } 977 978 (void) mi_mpprintf(mp, 979 "UDP " MI_COL_HDRPAD_STR 980 /* 12345678[89ABCDEF] */ 981 " zone lport src addr dest addr port state"); 982 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 983 984 zoneid = connp->conn_zoneid; 985 986 for (i = 0; i < udp_bind_fanout_size; i++) { 987 udpf = &udp_bind_fanout[i]; 988 mutex_enter(&udpf->uf_lock); 989 990 /* Print the hash index. */ 991 udp = udpf->uf_udp; 992 if (zoneid != GLOBAL_ZONEID) { 993 /* skip to first entry in this zone; might be none */ 994 while (udp != NULL && 995 udp->udp_connp->conn_zoneid != zoneid) 996 udp = udp->udp_bind_hash; 997 } 998 if (udp != NULL) { 999 uint_t print_len, buf_len; 1000 1001 buf_len = mp->b_cont->b_datap->db_lim - 1002 mp->b_cont->b_wptr; 1003 print_len = snprintf((char *)mp->b_cont->b_wptr, 1004 buf_len, "%d\n", i); 1005 if (print_len < buf_len) { 1006 mp->b_cont->b_wptr += print_len; 1007 } else { 1008 mp->b_cont->b_wptr += buf_len; 1009 } 1010 for (; udp != NULL; udp = udp->udp_bind_hash) { 1011 if (zoneid == GLOBAL_ZONEID || 1012 zoneid == udp->udp_connp->conn_zoneid) 1013 udp_report_item(mp->b_cont, udp); 1014 } 1015 } 1016 mutex_exit(&udpf->uf_lock); 1017 } 1018 udp_last_ndd_get_info_time = ddi_get_lbolt(); 1019 return (0); 1020 } 1021 1022 /* 1023 * Hash list removal routine for udp_t structures. 1024 */ 1025 static void 1026 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 1027 { 1028 udp_t *udpnext; 1029 kmutex_t *lockp; 1030 1031 if (udp->udp_ptpbhn == NULL) 1032 return; 1033 1034 /* 1035 * Extract the lock pointer in case there are concurrent 1036 * hash_remove's for this instance. 1037 */ 1038 ASSERT(udp->udp_port != 0); 1039 if (!caller_holds_lock) { 1040 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1041 ASSERT(lockp != NULL); 1042 mutex_enter(lockp); 1043 } 1044 if (udp->udp_ptpbhn != NULL) { 1045 udpnext = udp->udp_bind_hash; 1046 if (udpnext != NULL) { 1047 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1048 udp->udp_bind_hash = NULL; 1049 } 1050 *udp->udp_ptpbhn = udpnext; 1051 udp->udp_ptpbhn = NULL; 1052 } 1053 if (!caller_holds_lock) { 1054 mutex_exit(lockp); 1055 } 1056 } 1057 1058 static void 1059 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1060 { 1061 udp_t **udpp; 1062 udp_t *udpnext; 1063 1064 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1065 if (udp->udp_ptpbhn != NULL) { 1066 udp_bind_hash_remove(udp, B_TRUE); 1067 } 1068 udpp = &uf->uf_udp; 1069 udpnext = udpp[0]; 1070 if (udpnext != NULL) { 1071 /* 1072 * If the new udp bound to the INADDR_ANY address 1073 * and the first one in the list is not bound to 1074 * INADDR_ANY we skip all entries until we find the 1075 * first one bound to INADDR_ANY. 1076 * This makes sure that applications binding to a 1077 * specific address get preference over those binding to 1078 * INADDR_ANY. 1079 */ 1080 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1081 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1082 while ((udpnext = udpp[0]) != NULL && 1083 !V6_OR_V4_INADDR_ANY( 1084 udpnext->udp_bound_v6src)) { 1085 udpp = &(udpnext->udp_bind_hash); 1086 } 1087 if (udpnext != NULL) 1088 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1089 } else { 1090 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1091 } 1092 } 1093 udp->udp_bind_hash = udpnext; 1094 udp->udp_ptpbhn = udpp; 1095 udpp[0] = udp; 1096 } 1097 1098 /* 1099 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1100 * passed to udp_wput. 1101 * It associates a port number and local address with the stream. 1102 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1103 * protocol type (IPPROTO_UDP) placed in the message following the address. 1104 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1105 * (Called as writer.) 1106 * 1107 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1108 * without setting SO_REUSEADDR. This is needed so that they 1109 * can be viewed as two independent transport protocols. 1110 * However, anonymouns ports are allocated from the same range to avoid 1111 * duplicating the udp_g_next_port_to_try. 1112 */ 1113 static void 1114 udp_bind(queue_t *q, mblk_t *mp) 1115 { 1116 sin_t *sin; 1117 sin6_t *sin6; 1118 mblk_t *mp1; 1119 in_port_t port; /* Host byte order */ 1120 in_port_t requested_port; /* Host byte order */ 1121 struct T_bind_req *tbr; 1122 int count; 1123 in6_addr_t v6src; 1124 boolean_t bind_to_req_port_only; 1125 int loopmax; 1126 udp_fanout_t *udpf; 1127 in_port_t lport; /* Network byte order */ 1128 zoneid_t zoneid; 1129 conn_t *connp; 1130 udp_t *udp; 1131 boolean_t is_inaddr_any; 1132 mlp_type_t addrtype, mlptype; 1133 1134 connp = Q_TO_CONN(q); 1135 udp = connp->conn_udp; 1136 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1137 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1138 "udp_bind: bad req, len %u", 1139 (uint_t)(mp->b_wptr - mp->b_rptr)); 1140 udp_err_ack(q, mp, TPROTO, 0); 1141 return; 1142 } 1143 1144 if (udp->udp_state != TS_UNBND) { 1145 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1146 "udp_bind: bad state, %u", udp->udp_state); 1147 udp_err_ack(q, mp, TOUTSTATE, 0); 1148 return; 1149 } 1150 /* 1151 * Reallocate the message to make sure we have enough room for an 1152 * address and the protocol type. 1153 */ 1154 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1155 if (!mp1) { 1156 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1157 return; 1158 } 1159 1160 mp = mp1; 1161 tbr = (struct T_bind_req *)mp->b_rptr; 1162 switch (tbr->ADDR_length) { 1163 case 0: /* Request for a generic port */ 1164 tbr->ADDR_offset = sizeof (struct T_bind_req); 1165 if (udp->udp_family == AF_INET) { 1166 tbr->ADDR_length = sizeof (sin_t); 1167 sin = (sin_t *)&tbr[1]; 1168 *sin = sin_null; 1169 sin->sin_family = AF_INET; 1170 mp->b_wptr = (uchar_t *)&sin[1]; 1171 } else { 1172 ASSERT(udp->udp_family == AF_INET6); 1173 tbr->ADDR_length = sizeof (sin6_t); 1174 sin6 = (sin6_t *)&tbr[1]; 1175 *sin6 = sin6_null; 1176 sin6->sin6_family = AF_INET6; 1177 mp->b_wptr = (uchar_t *)&sin6[1]; 1178 } 1179 port = 0; 1180 break; 1181 1182 case sizeof (sin_t): /* Complete IPv4 address */ 1183 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1184 sizeof (sin_t)); 1185 if (sin == NULL || !OK_32PTR((char *)sin)) { 1186 udp_err_ack(q, mp, TSYSERR, EINVAL); 1187 return; 1188 } 1189 if (udp->udp_family != AF_INET || 1190 sin->sin_family != AF_INET) { 1191 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1192 return; 1193 } 1194 port = ntohs(sin->sin_port); 1195 break; 1196 1197 case sizeof (sin6_t): /* complete IPv6 address */ 1198 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1199 sizeof (sin6_t)); 1200 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1201 udp_err_ack(q, mp, TSYSERR, EINVAL); 1202 return; 1203 } 1204 if (udp->udp_family != AF_INET6 || 1205 sin6->sin6_family != AF_INET6) { 1206 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1207 return; 1208 } 1209 port = ntohs(sin6->sin6_port); 1210 break; 1211 1212 default: /* Invalid request */ 1213 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1214 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1215 udp_err_ack(q, mp, TBADADDR, 0); 1216 return; 1217 } 1218 1219 requested_port = port; 1220 1221 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1222 bind_to_req_port_only = B_FALSE; 1223 else /* T_BIND_REQ and requested_port != 0 */ 1224 bind_to_req_port_only = B_TRUE; 1225 1226 if (requested_port == 0) { 1227 /* 1228 * If the application passed in zero for the port number, it 1229 * doesn't care which port number we bind to. Get one in the 1230 * valid range. 1231 */ 1232 if (udp->udp_anon_priv_bind) { 1233 port = udp_get_next_priv_port(udp); 1234 } else { 1235 port = udp_update_next_port(udp, 1236 udp_g_next_port_to_try, B_TRUE); 1237 } 1238 } else { 1239 /* 1240 * If the port is in the well-known privileged range, 1241 * make sure the caller was privileged. 1242 */ 1243 int i; 1244 boolean_t priv = B_FALSE; 1245 1246 if (port < udp_smallest_nonpriv_port) { 1247 priv = B_TRUE; 1248 } else { 1249 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1250 if (port == udp_g_epriv_ports[i]) { 1251 priv = B_TRUE; 1252 break; 1253 } 1254 } 1255 } 1256 1257 if (priv) { 1258 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1259 1260 if (secpolicy_net_privaddr(cr, port) != 0) { 1261 udp_err_ack(q, mp, TACCES, 0); 1262 return; 1263 } 1264 } 1265 } 1266 1267 if (port == 0) { 1268 udp_err_ack(q, mp, TNOADDR, 0); 1269 return; 1270 } 1271 1272 /* 1273 * Copy the source address into our udp structure. This address 1274 * may still be zero; if so, IP will fill in the correct address 1275 * each time an outbound packet is passed to it. 1276 */ 1277 if (udp->udp_family == AF_INET) { 1278 ASSERT(sin != NULL); 1279 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1280 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1281 udp->udp_ip_snd_options_len; 1282 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1283 } else { 1284 ASSERT(sin6 != NULL); 1285 v6src = sin6->sin6_addr; 1286 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1287 udp->udp_ipversion = IPV4_VERSION; 1288 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1289 UDPH_SIZE + udp->udp_ip_snd_options_len; 1290 } else { 1291 udp->udp_ipversion = IPV6_VERSION; 1292 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1293 } 1294 } 1295 1296 /* 1297 * If udp_reuseaddr is not set, then we have to make sure that 1298 * the IP address and port number the application requested 1299 * (or we selected for the application) is not being used by 1300 * another stream. If another stream is already using the 1301 * requested IP address and port, the behavior depends on 1302 * "bind_to_req_port_only". If set the bind fails; otherwise we 1303 * search for any an unused port to bind to the the stream. 1304 * 1305 * As per the BSD semantics, as modified by the Deering multicast 1306 * changes, if udp_reuseaddr is set, then we allow multiple binds 1307 * to the same port independent of the local IP address. 1308 * 1309 * This is slightly different than in SunOS 4.X which did not 1310 * support IP multicast. Note that the change implemented by the 1311 * Deering multicast code effects all binds - not only binding 1312 * to IP multicast addresses. 1313 * 1314 * Note that when binding to port zero we ignore SO_REUSEADDR in 1315 * order to guarantee a unique port. 1316 */ 1317 1318 count = 0; 1319 if (udp->udp_anon_priv_bind) { 1320 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1321 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1322 } else { 1323 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1324 } 1325 1326 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1327 zoneid = connp->conn_zoneid; 1328 1329 for (;;) { 1330 udp_t *udp1; 1331 boolean_t found_exclbind = B_FALSE; 1332 1333 /* 1334 * Walk through the list of udp streams bound to 1335 * requested port with the same IP address. 1336 */ 1337 lport = htons(port); 1338 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1339 mutex_enter(&udpf->uf_lock); 1340 for (udp1 = udpf->uf_udp; udp1 != NULL; 1341 udp1 = udp1->udp_bind_hash) { 1342 if (lport != udp1->udp_port) 1343 continue; 1344 1345 /* 1346 * On a labeled system, we must treat bindings to ports 1347 * on shared IP addresses by sockets with MAC exemption 1348 * privilege as being in all zones, as there's 1349 * otherwise no way to identify the right receiver. 1350 */ 1351 if (zoneid != udp1->udp_connp->conn_zoneid && 1352 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1353 continue; 1354 1355 /* 1356 * If UDP_EXCLBIND is set for either the bound or 1357 * binding endpoint, the semantics of bind 1358 * is changed according to the following chart. 1359 * 1360 * spec = specified address (v4 or v6) 1361 * unspec = unspecified address (v4 or v6) 1362 * A = specified addresses are different for endpoints 1363 * 1364 * bound bind to allowed? 1365 * ------------------------------------- 1366 * unspec unspec no 1367 * unspec spec no 1368 * spec unspec no 1369 * spec spec yes if A 1370 * 1371 * For labeled systems, SO_MAC_EXEMPT behaves the same 1372 * as UDP_EXCLBIND, except that zoneid is ignored. 1373 */ 1374 if (udp1->udp_exclbind || udp->udp_exclbind || 1375 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1376 if (V6_OR_V4_INADDR_ANY( 1377 udp1->udp_bound_v6src) || 1378 is_inaddr_any || 1379 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1380 &v6src)) { 1381 found_exclbind = B_TRUE; 1382 break; 1383 } 1384 continue; 1385 } 1386 1387 /* 1388 * Check ipversion to allow IPv4 and IPv6 sockets to 1389 * have disjoint port number spaces. 1390 */ 1391 if (udp->udp_ipversion != udp1->udp_ipversion) { 1392 1393 /* 1394 * On the first time through the loop, if the 1395 * the user intentionally specified a 1396 * particular port number, then ignore any 1397 * bindings of the other protocol that may 1398 * conflict. This allows the user to bind IPv6 1399 * alone and get both v4 and v6, or bind both 1400 * both and get each seperately. On subsequent 1401 * times through the loop, we're checking a 1402 * port that we chose (not the user) and thus 1403 * we do not allow casual duplicate bindings. 1404 */ 1405 if (count == 0 && requested_port != 0) 1406 continue; 1407 } 1408 1409 /* 1410 * No difference depending on SO_REUSEADDR. 1411 * 1412 * If existing port is bound to a 1413 * non-wildcard IP address and 1414 * the requesting stream is bound to 1415 * a distinct different IP addresses 1416 * (non-wildcard, also), keep going. 1417 */ 1418 if (!is_inaddr_any && 1419 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1420 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1421 &v6src)) { 1422 continue; 1423 } 1424 break; 1425 } 1426 1427 if (!found_exclbind && 1428 (udp->udp_reuseaddr && requested_port != 0)) { 1429 break; 1430 } 1431 1432 if (udp1 == NULL) { 1433 /* 1434 * No other stream has this IP address 1435 * and port number. We can use it. 1436 */ 1437 break; 1438 } 1439 mutex_exit(&udpf->uf_lock); 1440 if (bind_to_req_port_only) { 1441 /* 1442 * We get here only when requested port 1443 * is bound (and only first of the for() 1444 * loop iteration). 1445 * 1446 * The semantics of this bind request 1447 * require it to fail so we return from 1448 * the routine (and exit the loop). 1449 * 1450 */ 1451 udp_err_ack(q, mp, TADDRBUSY, 0); 1452 return; 1453 } 1454 1455 if (udp->udp_anon_priv_bind) { 1456 port = udp_get_next_priv_port(udp); 1457 } else { 1458 if ((count == 0) && (requested_port != 0)) { 1459 /* 1460 * If the application wants us to find 1461 * a port, get one to start with. Set 1462 * requested_port to 0, so that we will 1463 * update udp_g_next_port_to_try below. 1464 */ 1465 port = udp_update_next_port(udp, 1466 udp_g_next_port_to_try, B_TRUE); 1467 requested_port = 0; 1468 } else { 1469 port = udp_update_next_port(udp, port + 1, 1470 B_FALSE); 1471 } 1472 } 1473 1474 if (port == 0 || ++count >= loopmax) { 1475 /* 1476 * We've tried every possible port number and 1477 * there are none available, so send an error 1478 * to the user. 1479 */ 1480 udp_err_ack(q, mp, TNOADDR, 0); 1481 return; 1482 } 1483 } 1484 1485 /* 1486 * Copy the source address into our udp structure. This address 1487 * may still be zero; if so, ip will fill in the correct address 1488 * each time an outbound packet is passed to it. 1489 * If we are binding to a broadcast or multicast address udp_rput 1490 * will clear the source address when it receives the T_BIND_ACK. 1491 */ 1492 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1493 udp->udp_port = lport; 1494 /* 1495 * Now reset the the next anonymous port if the application requested 1496 * an anonymous port, or we handed out the next anonymous port. 1497 */ 1498 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1499 udp_g_next_port_to_try = port + 1; 1500 } 1501 1502 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1503 if (udp->udp_family == AF_INET) { 1504 sin->sin_port = udp->udp_port; 1505 } else { 1506 int error; 1507 1508 sin6->sin6_port = udp->udp_port; 1509 /* Rebuild the header template */ 1510 error = udp_build_hdrs(q, udp); 1511 if (error != 0) { 1512 mutex_exit(&udpf->uf_lock); 1513 udp_err_ack(q, mp, TSYSERR, error); 1514 return; 1515 } 1516 } 1517 udp->udp_state = TS_IDLE; 1518 udp_bind_hash_insert(udpf, udp); 1519 mutex_exit(&udpf->uf_lock); 1520 1521 if (cl_inet_bind) { 1522 /* 1523 * Running in cluster mode - register bind information 1524 */ 1525 if (udp->udp_ipversion == IPV4_VERSION) { 1526 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1527 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1528 (in_port_t)udp->udp_port); 1529 } else { 1530 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1531 (uint8_t *)&(udp->udp_v6src), 1532 (in_port_t)udp->udp_port); 1533 } 1534 1535 } 1536 1537 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1538 if (is_system_labeled() && (!connp->conn_anon_port || 1539 connp->conn_anon_mlp)) { 1540 uint16_t mlpport; 1541 cred_t *cr = connp->conn_cred; 1542 zone_t *zone; 1543 1544 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1545 mlptSingle; 1546 addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); 1547 if (addrtype == mlptSingle) { 1548 udp_err_ack(q, mp, TNOADDR, 0); 1549 connp->conn_anon_port = B_FALSE; 1550 connp->conn_mlp_type = mlptSingle; 1551 return; 1552 } 1553 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1554 zone = crgetzone(cr); 1555 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1556 addrtype); 1557 if (mlptype != mlptSingle && 1558 (connp->conn_mlp_type == mlptSingle || 1559 secpolicy_net_bindmlp(cr) != 0)) { 1560 if (udp->udp_debug) { 1561 (void) strlog(UDP_MOD_ID, 0, 1, 1562 SL_ERROR|SL_TRACE, 1563 "udp_bind: no priv for multilevel port %d", 1564 mlpport); 1565 } 1566 udp_err_ack(q, mp, TACCES, 0); 1567 connp->conn_anon_port = B_FALSE; 1568 connp->conn_mlp_type = mlptSingle; 1569 return; 1570 } 1571 1572 /* 1573 * If we're specifically binding a shared IP address and the 1574 * port is MLP on shared addresses, then check to see if this 1575 * zone actually owns the MLP. Reject if not. 1576 */ 1577 if (mlptype == mlptShared && addrtype == mlptShared) { 1578 zoneid_t mlpzone; 1579 1580 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1581 htons(mlpport)); 1582 if (connp->conn_zoneid != mlpzone) { 1583 if (udp->udp_debug) { 1584 (void) strlog(UDP_MOD_ID, 0, 1, 1585 SL_ERROR|SL_TRACE, 1586 "udp_bind: attempt to bind port " 1587 "%d on shared addr in zone %d " 1588 "(should be %d)", 1589 mlpport, connp->conn_zoneid, 1590 mlpzone); 1591 } 1592 udp_err_ack(q, mp, TACCES, 0); 1593 connp->conn_anon_port = B_FALSE; 1594 connp->conn_mlp_type = mlptSingle; 1595 return; 1596 } 1597 } 1598 if (connp->conn_anon_port) { 1599 int error; 1600 1601 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1602 port, B_TRUE); 1603 if (error != 0) { 1604 if (udp->udp_debug) { 1605 (void) strlog(UDP_MOD_ID, 0, 1, 1606 SL_ERROR|SL_TRACE, 1607 "udp_bind: cannot establish anon " 1608 "MLP for port %d", port); 1609 } 1610 udp_err_ack(q, mp, TACCES, 0); 1611 connp->conn_anon_port = B_FALSE; 1612 connp->conn_mlp_type = mlptSingle; 1613 return; 1614 } 1615 } 1616 connp->conn_mlp_type = mlptype; 1617 } 1618 1619 /* Pass the protocol number in the message following the address. */ 1620 *mp->b_wptr++ = IPPROTO_UDP; 1621 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1622 /* 1623 * Append a request for an IRE if udp_v6src not 1624 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1625 */ 1626 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1627 if (!mp->b_cont) { 1628 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1629 return; 1630 } 1631 mp->b_cont->b_wptr += sizeof (ire_t); 1632 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1633 } 1634 if (udp->udp_family == AF_INET6) 1635 mp = ip_bind_v6(q, mp, connp, NULL); 1636 else 1637 mp = ip_bind_v4(q, mp, connp); 1638 1639 if (mp != NULL) 1640 udp_rput_other(_RD(q), mp); 1641 else 1642 CONN_INC_REF(connp); 1643 } 1644 1645 1646 void 1647 udp_resume_bind(conn_t *connp, mblk_t *mp) 1648 { 1649 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1650 } 1651 1652 /* 1653 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1654 */ 1655 /* ARGSUSED */ 1656 static void 1657 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1658 { 1659 conn_t *connp = arg; 1660 1661 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1662 1663 udp_rput_other(connp->conn_rq, mp); 1664 1665 CONN_OPER_PENDING_DONE(connp); 1666 udp_exit(connp); 1667 } 1668 1669 /* 1670 * This routine handles each T_CONN_REQ message passed to udp. It 1671 * associates a default destination address with the stream. 1672 * 1673 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1674 * T_BIND_REQ - specifying local and remote address/port 1675 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1676 * T_OK_ACK - for the T_CONN_REQ 1677 * T_CONN_CON - to keep the TPI user happy 1678 * 1679 * The connect completes in udp_rput. 1680 * When a T_BIND_ACK is received information is extracted from the IRE 1681 * and the two appended messages are sent to the TPI user. 1682 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1683 * it to an error ack for the appropriate primitive. 1684 */ 1685 static void 1686 udp_connect(queue_t *q, mblk_t *mp) 1687 { 1688 sin6_t *sin6; 1689 sin_t *sin; 1690 struct T_conn_req *tcr; 1691 in6_addr_t v6dst; 1692 ipaddr_t v4dst; 1693 uint16_t dstport; 1694 uint32_t flowinfo; 1695 mblk_t *mp1, *mp2; 1696 udp_fanout_t *udpf; 1697 udp_t *udp, *udp1; 1698 1699 udp = Q_TO_UDP(q); 1700 1701 tcr = (struct T_conn_req *)mp->b_rptr; 1702 1703 /* A bit of sanity checking */ 1704 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1705 udp_err_ack(q, mp, TPROTO, 0); 1706 return; 1707 } 1708 /* 1709 * This UDP must have bound to a port already before doing 1710 * a connect. 1711 */ 1712 if (udp->udp_state == TS_UNBND) { 1713 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1714 "udp_connect: bad state, %u", udp->udp_state); 1715 udp_err_ack(q, mp, TOUTSTATE, 0); 1716 return; 1717 } 1718 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1719 1720 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1721 1722 if (udp->udp_state == TS_DATA_XFER) { 1723 /* Already connected - clear out state */ 1724 mutex_enter(&udpf->uf_lock); 1725 udp->udp_v6src = udp->udp_bound_v6src; 1726 udp->udp_state = TS_IDLE; 1727 mutex_exit(&udpf->uf_lock); 1728 } 1729 1730 if (tcr->OPT_length != 0) { 1731 udp_err_ack(q, mp, TBADOPT, 0); 1732 return; 1733 } 1734 1735 /* 1736 * Determine packet type based on type of address passed in 1737 * the request should contain an IPv4 or IPv6 address. 1738 * Make sure that address family matches the type of 1739 * family of the the address passed down 1740 */ 1741 switch (tcr->DEST_length) { 1742 default: 1743 udp_err_ack(q, mp, TBADADDR, 0); 1744 return; 1745 1746 case sizeof (sin_t): 1747 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1748 sizeof (sin_t)); 1749 if (sin == NULL || !OK_32PTR((char *)sin)) { 1750 udp_err_ack(q, mp, TSYSERR, EINVAL); 1751 return; 1752 } 1753 if (udp->udp_family != AF_INET || 1754 sin->sin_family != AF_INET) { 1755 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1756 return; 1757 } 1758 v4dst = sin->sin_addr.s_addr; 1759 dstport = sin->sin_port; 1760 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1761 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1762 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1763 udp->udp_ip_snd_options_len; 1764 break; 1765 1766 case sizeof (sin6_t): 1767 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1768 sizeof (sin6_t)); 1769 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1770 udp_err_ack(q, mp, TSYSERR, EINVAL); 1771 return; 1772 } 1773 if (udp->udp_family != AF_INET6 || 1774 sin6->sin6_family != AF_INET6) { 1775 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1776 return; 1777 } 1778 v6dst = sin6->sin6_addr; 1779 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1780 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1781 udp->udp_ipversion = IPV4_VERSION; 1782 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1783 UDPH_SIZE + udp->udp_ip_snd_options_len; 1784 flowinfo = 0; 1785 } else { 1786 udp->udp_ipversion = IPV6_VERSION; 1787 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1788 flowinfo = sin6->sin6_flowinfo; 1789 } 1790 dstport = sin6->sin6_port; 1791 break; 1792 } 1793 if (dstport == 0) { 1794 udp_err_ack(q, mp, TBADADDR, 0); 1795 return; 1796 } 1797 1798 /* 1799 * Create a default IP header with no IP options. 1800 */ 1801 udp->udp_dstport = dstport; 1802 if (udp->udp_ipversion == IPV4_VERSION) { 1803 /* 1804 * Interpret a zero destination to mean loopback. 1805 * Update the T_CONN_REQ (sin/sin6) since it is used to 1806 * generate the T_CONN_CON. 1807 */ 1808 if (v4dst == INADDR_ANY) { 1809 v4dst = htonl(INADDR_LOOPBACK); 1810 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1811 if (udp->udp_family == AF_INET) { 1812 sin->sin_addr.s_addr = v4dst; 1813 } else { 1814 sin6->sin6_addr = v6dst; 1815 } 1816 } 1817 udp->udp_v6dst = v6dst; 1818 udp->udp_flowinfo = 0; 1819 1820 /* 1821 * If the destination address is multicast and 1822 * an outgoing multicast interface has been set, 1823 * use the address of that interface as our 1824 * source address if no source address has been set. 1825 */ 1826 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1827 CLASSD(v4dst) && 1828 udp->udp_multicast_if_addr != INADDR_ANY) { 1829 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1830 &udp->udp_v6src); 1831 } 1832 } else { 1833 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1834 /* 1835 * Interpret a zero destination to mean loopback. 1836 * Update the T_CONN_REQ (sin/sin6) since it is used to 1837 * generate the T_CONN_CON. 1838 */ 1839 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1840 v6dst = ipv6_loopback; 1841 sin6->sin6_addr = v6dst; 1842 } 1843 udp->udp_v6dst = v6dst; 1844 udp->udp_flowinfo = flowinfo; 1845 /* 1846 * If the destination address is multicast and 1847 * an outgoing multicast interface has been set, 1848 * then the ip bind logic will pick the correct source 1849 * address (i.e. matching the outgoing multicast interface). 1850 */ 1851 } 1852 1853 /* 1854 * Verify that the src/port/dst/port is unique for all 1855 * connections in TS_DATA_XFER 1856 */ 1857 mutex_enter(&udpf->uf_lock); 1858 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1859 if (udp1->udp_state != TS_DATA_XFER) 1860 continue; 1861 if (udp->udp_port != udp1->udp_port || 1862 udp->udp_ipversion != udp1->udp_ipversion || 1863 dstport != udp1->udp_dstport || 1864 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1865 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1866 continue; 1867 mutex_exit(&udpf->uf_lock); 1868 udp_err_ack(q, mp, TBADADDR, 0); 1869 return; 1870 } 1871 udp->udp_state = TS_DATA_XFER; 1872 mutex_exit(&udpf->uf_lock); 1873 1874 /* 1875 * Send down bind to IP to verify that there is a route 1876 * and to determine the source address. 1877 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1878 */ 1879 if (udp->udp_family == AF_INET) 1880 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1881 else 1882 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1883 if (mp1 == NULL) { 1884 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1885 bind_failed: 1886 mutex_enter(&udpf->uf_lock); 1887 udp->udp_state = TS_IDLE; 1888 mutex_exit(&udpf->uf_lock); 1889 return; 1890 } 1891 1892 /* 1893 * We also have to send a connection confirmation to 1894 * keep TLI happy. Prepare it for udp_rput. 1895 */ 1896 if (udp->udp_family == AF_INET) 1897 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1898 sizeof (*sin), NULL, 0); 1899 else 1900 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1901 sizeof (*sin6), NULL, 0); 1902 if (mp2 == NULL) { 1903 freemsg(mp1); 1904 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1905 goto bind_failed; 1906 } 1907 1908 mp = mi_tpi_ok_ack_alloc(mp); 1909 if (mp == NULL) { 1910 /* Unable to reuse the T_CONN_REQ for the ack. */ 1911 freemsg(mp2); 1912 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1913 goto bind_failed; 1914 } 1915 1916 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1917 linkb(mp1, mp); 1918 linkb(mp1, mp2); 1919 1920 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1921 if (udp->udp_family == AF_INET) 1922 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1923 else 1924 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1925 1926 if (mp1 != NULL) 1927 udp_rput_other(_RD(q), mp1); 1928 else 1929 CONN_INC_REF(udp->udp_connp); 1930 } 1931 1932 static int 1933 udp_close(queue_t *q) 1934 { 1935 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1936 udp_t *udp; 1937 queue_t *ip_rq = RD(UDP_WR(q)); 1938 1939 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1940 udp = connp->conn_udp; 1941 1942 ip_quiesce_conn(connp); 1943 /* 1944 * Disable read-side synchronous stream 1945 * interface and drain any queued data. 1946 */ 1947 udp_rcv_drain(q, udp, B_TRUE); 1948 ASSERT(!udp->udp_direct_sockfs); 1949 1950 qprocsoff(q); 1951 1952 /* restore IP module's high and low water marks to default values */ 1953 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1954 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1955 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1956 1957 ASSERT(udp->udp_rcv_cnt == 0); 1958 ASSERT(udp->udp_rcv_msgcnt == 0); 1959 ASSERT(udp->udp_rcv_list_head == NULL); 1960 ASSERT(udp->udp_rcv_list_tail == NULL); 1961 1962 /* connp is now single threaded. */ 1963 udp_close_free(connp); 1964 /* 1965 * Restore connp as an IP endpoint. We don't need 1966 * any locks since we are now single threaded 1967 */ 1968 connp->conn_flags &= ~IPCL_UDP; 1969 connp->conn_state_flags &= 1970 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1971 connp->conn_ulp_labeled = B_FALSE; 1972 return (0); 1973 } 1974 1975 /* 1976 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1977 */ 1978 void 1979 udp_quiesce_conn(conn_t *connp) 1980 { 1981 udp_t *udp = connp->conn_udp; 1982 1983 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1984 /* 1985 * Running in cluster mode - register unbind information 1986 */ 1987 if (udp->udp_ipversion == IPV4_VERSION) { 1988 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1989 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1990 (in_port_t)udp->udp_port); 1991 } else { 1992 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1993 (uint8_t *)(&(udp->udp_v6src)), 1994 (in_port_t)udp->udp_port); 1995 } 1996 } 1997 1998 udp_bind_hash_remove(udp, B_FALSE); 1999 2000 mutex_enter(&connp->conn_lock); 2001 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 2002 udp->udp_mode != UDP_MT_HOT) { 2003 cv_wait(&connp->conn_cv, &connp->conn_lock); 2004 } 2005 mutex_exit(&connp->conn_lock); 2006 } 2007 2008 void 2009 udp_close_free(conn_t *connp) 2010 { 2011 udp_t *udp = connp->conn_udp; 2012 2013 /* If there are any options associated with the stream, free them. */ 2014 if (udp->udp_ip_snd_options) { 2015 mi_free((char *)udp->udp_ip_snd_options); 2016 udp->udp_ip_snd_options = NULL; 2017 } 2018 2019 if (udp->udp_ip_rcv_options) { 2020 mi_free((char *)udp->udp_ip_rcv_options); 2021 udp->udp_ip_rcv_options = NULL; 2022 } 2023 2024 /* Free memory associated with sticky options */ 2025 if (udp->udp_sticky_hdrs_len != 0) { 2026 kmem_free(udp->udp_sticky_hdrs, 2027 udp->udp_sticky_hdrs_len); 2028 udp->udp_sticky_hdrs = NULL; 2029 udp->udp_sticky_hdrs_len = 0; 2030 } 2031 2032 ip6_pkt_free(&udp->udp_sticky_ipp); 2033 2034 udp->udp_connp = NULL; 2035 connp->conn_udp = NULL; 2036 kmem_cache_free(udp_cache, udp); 2037 } 2038 2039 /* 2040 * This routine handles each T_DISCON_REQ message passed to udp 2041 * as an indicating that UDP is no longer connected. This results 2042 * in sending a T_BIND_REQ to IP to restore the binding to just 2043 * the local address/port. 2044 * 2045 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2046 * T_BIND_REQ - specifying just the local address/port 2047 * T_OK_ACK - for the T_DISCON_REQ 2048 * 2049 * The disconnect completes in udp_rput. 2050 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2051 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2052 * it to an error ack for the appropriate primitive. 2053 */ 2054 static void 2055 udp_disconnect(queue_t *q, mblk_t *mp) 2056 { 2057 udp_t *udp = Q_TO_UDP(q); 2058 mblk_t *mp1; 2059 udp_fanout_t *udpf; 2060 2061 if (udp->udp_state != TS_DATA_XFER) { 2062 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2063 "udp_disconnect: bad state, %u", udp->udp_state); 2064 udp_err_ack(q, mp, TOUTSTATE, 0); 2065 return; 2066 } 2067 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 2068 mutex_enter(&udpf->uf_lock); 2069 udp->udp_v6src = udp->udp_bound_v6src; 2070 udp->udp_state = TS_IDLE; 2071 mutex_exit(&udpf->uf_lock); 2072 2073 /* 2074 * Send down bind to IP to remove the full binding and revert 2075 * to the local address binding. 2076 */ 2077 if (udp->udp_family == AF_INET) 2078 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2079 else 2080 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2081 if (mp1 == NULL) { 2082 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2083 return; 2084 } 2085 mp = mi_tpi_ok_ack_alloc(mp); 2086 if (mp == NULL) { 2087 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2088 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2089 return; 2090 } 2091 2092 if (udp->udp_family == AF_INET6) { 2093 int error; 2094 2095 /* Rebuild the header template */ 2096 error = udp_build_hdrs(q, udp); 2097 if (error != 0) { 2098 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2099 freemsg(mp1); 2100 return; 2101 } 2102 } 2103 mutex_enter(&udpf->uf_lock); 2104 udp->udp_discon_pending = 1; 2105 mutex_exit(&udpf->uf_lock); 2106 2107 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2108 linkb(mp1, mp); 2109 2110 if (udp->udp_family == AF_INET6) 2111 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2112 else 2113 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2114 2115 if (mp1 != NULL) 2116 udp_rput_other(_RD(q), mp1); 2117 else 2118 CONN_INC_REF(udp->udp_connp); 2119 } 2120 2121 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2122 static void 2123 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2124 { 2125 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2126 putnext(UDP_RD(q), mp); 2127 } 2128 2129 /* Shorthand to generate and send TPI error acks to our client */ 2130 static void 2131 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2132 int sys_error) 2133 { 2134 struct T_error_ack *teackp; 2135 2136 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2137 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2138 teackp = (struct T_error_ack *)mp->b_rptr; 2139 teackp->ERROR_prim = primitive; 2140 teackp->TLI_error = t_error; 2141 teackp->UNIX_error = sys_error; 2142 putnext(UDP_RD(q), mp); 2143 } 2144 } 2145 2146 /*ARGSUSED*/ 2147 static int 2148 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2149 { 2150 int i; 2151 2152 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2153 if (udp_g_epriv_ports[i] != 0) 2154 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2155 } 2156 return (0); 2157 } 2158 2159 /* ARGSUSED */ 2160 static int 2161 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2162 cred_t *cr) 2163 { 2164 long new_value; 2165 int i; 2166 2167 /* 2168 * Fail the request if the new value does not lie within the 2169 * port number limits. 2170 */ 2171 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2172 new_value <= 0 || new_value >= 65536) { 2173 return (EINVAL); 2174 } 2175 2176 /* Check if the value is already in the list */ 2177 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2178 if (new_value == udp_g_epriv_ports[i]) { 2179 return (EEXIST); 2180 } 2181 } 2182 /* Find an empty slot */ 2183 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2184 if (udp_g_epriv_ports[i] == 0) 2185 break; 2186 } 2187 if (i == udp_g_num_epriv_ports) { 2188 return (EOVERFLOW); 2189 } 2190 2191 /* Set the new value */ 2192 udp_g_epriv_ports[i] = (in_port_t)new_value; 2193 return (0); 2194 } 2195 2196 /* ARGSUSED */ 2197 static int 2198 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2199 cred_t *cr) 2200 { 2201 long new_value; 2202 int i; 2203 2204 /* 2205 * Fail the request if the new value does not lie within the 2206 * port number limits. 2207 */ 2208 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2209 new_value <= 0 || new_value >= 65536) { 2210 return (EINVAL); 2211 } 2212 2213 /* Check that the value is already in the list */ 2214 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2215 if (udp_g_epriv_ports[i] == new_value) 2216 break; 2217 } 2218 if (i == udp_g_num_epriv_ports) { 2219 return (ESRCH); 2220 } 2221 2222 /* Clear the value */ 2223 udp_g_epriv_ports[i] = 0; 2224 return (0); 2225 } 2226 2227 /* At minimum we need 4 bytes of UDP header */ 2228 #define ICMP_MIN_UDP_HDR 4 2229 2230 /* 2231 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2232 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2233 * Assumes that IP has pulled up everything up to and including the ICMP header. 2234 * An M_CTL could potentially come here from some other module (i.e. if UDP 2235 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2236 * does not have enough ICMP information , following STREAMS conventions, 2237 * we send it upstream assuming it is an M_CTL we don't understand. 2238 */ 2239 static void 2240 udp_icmp_error(queue_t *q, mblk_t *mp) 2241 { 2242 icmph_t *icmph; 2243 ipha_t *ipha; 2244 int iph_hdr_length; 2245 udpha_t *udpha; 2246 sin_t sin; 2247 sin6_t sin6; 2248 mblk_t *mp1; 2249 int error = 0; 2250 size_t mp_size = MBLKL(mp); 2251 udp_t *udp = Q_TO_UDP(q); 2252 2253 /* 2254 * Assume IP provides aligned packets - otherwise toss 2255 */ 2256 if (!OK_32PTR(mp->b_rptr)) { 2257 freemsg(mp); 2258 return; 2259 } 2260 2261 /* 2262 * Verify that we have a complete IP header and the application has 2263 * asked for errors. If not, send it upstream. 2264 */ 2265 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2266 noticmpv4: 2267 putnext(UDP_RD(q), mp); 2268 return; 2269 } 2270 2271 ipha = (ipha_t *)mp->b_rptr; 2272 /* 2273 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2274 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2275 */ 2276 switch (IPH_HDR_VERSION(ipha)) { 2277 case IPV6_VERSION: 2278 udp_icmp_error_ipv6(q, mp); 2279 return; 2280 case IPV4_VERSION: 2281 break; 2282 default: 2283 goto noticmpv4; 2284 } 2285 2286 /* Skip past the outer IP and ICMP headers */ 2287 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2288 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2289 /* 2290 * If we don't have the correct outer IP header length or if the ULP 2291 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2292 * send the packet upstream. 2293 */ 2294 if (iph_hdr_length < sizeof (ipha_t) || 2295 ipha->ipha_protocol != IPPROTO_ICMP || 2296 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2297 goto noticmpv4; 2298 } 2299 ipha = (ipha_t *)&icmph[1]; 2300 2301 /* Skip past the inner IP and find the ULP header */ 2302 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2303 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2304 /* 2305 * If we don't have the correct inner IP header length or if the ULP 2306 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2307 * bytes of UDP header, send it upstream. 2308 */ 2309 if (iph_hdr_length < sizeof (ipha_t) || 2310 ipha->ipha_protocol != IPPROTO_UDP || 2311 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2312 goto noticmpv4; 2313 } 2314 2315 switch (icmph->icmph_type) { 2316 case ICMP_DEST_UNREACHABLE: 2317 switch (icmph->icmph_code) { 2318 case ICMP_FRAGMENTATION_NEEDED: 2319 /* 2320 * IP has already adjusted the path MTU. 2321 * XXX Somehow pass MTU indication to application? 2322 */ 2323 break; 2324 case ICMP_PORT_UNREACHABLE: 2325 case ICMP_PROTOCOL_UNREACHABLE: 2326 error = ECONNREFUSED; 2327 break; 2328 default: 2329 /* Transient errors */ 2330 break; 2331 } 2332 break; 2333 default: 2334 /* Transient errors */ 2335 break; 2336 } 2337 if (error == 0) { 2338 freemsg(mp); 2339 return; 2340 } 2341 2342 switch (udp->udp_family) { 2343 case AF_INET: 2344 sin = sin_null; 2345 sin.sin_family = AF_INET; 2346 sin.sin_addr.s_addr = ipha->ipha_dst; 2347 sin.sin_port = udpha->uha_dst_port; 2348 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2349 error); 2350 break; 2351 case AF_INET6: 2352 sin6 = sin6_null; 2353 sin6.sin6_family = AF_INET6; 2354 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2355 sin6.sin6_port = udpha->uha_dst_port; 2356 2357 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2358 NULL, 0, error); 2359 break; 2360 } 2361 if (mp1) 2362 putnext(UDP_RD(q), mp1); 2363 freemsg(mp); 2364 } 2365 2366 /* 2367 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2368 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2369 * Assumes that IP has pulled up all the extension headers as well as the 2370 * ICMPv6 header. 2371 * An M_CTL could potentially come here from some other module (i.e. if UDP 2372 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2373 * does not have enough ICMP information , following STREAMS conventions, 2374 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2375 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2376 * field (when cast to ipha_t in udp_icmp_error). 2377 */ 2378 static void 2379 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2380 { 2381 icmp6_t *icmp6; 2382 ip6_t *ip6h, *outer_ip6h; 2383 uint16_t hdr_length; 2384 uint8_t *nexthdrp; 2385 udpha_t *udpha; 2386 sin6_t sin6; 2387 mblk_t *mp1; 2388 int error = 0; 2389 size_t mp_size = MBLKL(mp); 2390 udp_t *udp = Q_TO_UDP(q); 2391 2392 /* 2393 * Verify that we have a complete IP header. If not, send it upstream. 2394 */ 2395 if (mp_size < sizeof (ip6_t)) { 2396 noticmpv6: 2397 putnext(UDP_RD(q), mp); 2398 return; 2399 } 2400 2401 outer_ip6h = (ip6_t *)mp->b_rptr; 2402 /* 2403 * Verify this is an ICMPV6 packet, else send it upstream 2404 */ 2405 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2406 hdr_length = IPV6_HDR_LEN; 2407 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2408 &nexthdrp) || 2409 *nexthdrp != IPPROTO_ICMPV6) { 2410 goto noticmpv6; 2411 } 2412 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2413 ip6h = (ip6_t *)&icmp6[1]; 2414 /* 2415 * Verify we have a complete ICMP and inner IP header. 2416 */ 2417 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2418 goto noticmpv6; 2419 2420 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2421 goto noticmpv6; 2422 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2423 /* 2424 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2425 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2426 * packet upstream. 2427 */ 2428 if ((*nexthdrp != IPPROTO_UDP) || 2429 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2430 goto noticmpv6; 2431 } 2432 2433 switch (icmp6->icmp6_type) { 2434 case ICMP6_DST_UNREACH: 2435 switch (icmp6->icmp6_code) { 2436 case ICMP6_DST_UNREACH_NOPORT: 2437 error = ECONNREFUSED; 2438 break; 2439 case ICMP6_DST_UNREACH_ADMIN: 2440 case ICMP6_DST_UNREACH_NOROUTE: 2441 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2442 case ICMP6_DST_UNREACH_ADDR: 2443 /* Transient errors */ 2444 break; 2445 default: 2446 break; 2447 } 2448 break; 2449 case ICMP6_PACKET_TOO_BIG: { 2450 struct T_unitdata_ind *tudi; 2451 struct T_opthdr *toh; 2452 size_t udi_size; 2453 mblk_t *newmp; 2454 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2455 sizeof (struct ip6_mtuinfo); 2456 sin6_t *sin6; 2457 struct ip6_mtuinfo *mtuinfo; 2458 2459 /* 2460 * If the application has requested to receive path mtu 2461 * information, send up an empty message containing an 2462 * IPV6_PATHMTU ancillary data item. 2463 */ 2464 if (!udp->udp_ipv6_recvpathmtu) 2465 break; 2466 2467 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2468 opt_length; 2469 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2470 BUMP_MIB(&udp_mib, udpInErrors); 2471 break; 2472 } 2473 2474 /* 2475 * newmp->b_cont is left to NULL on purpose. This is an 2476 * empty message containing only ancillary data. 2477 */ 2478 newmp->b_datap->db_type = M_PROTO; 2479 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2480 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2481 tudi->PRIM_type = T_UNITDATA_IND; 2482 tudi->SRC_length = sizeof (sin6_t); 2483 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2484 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2485 tudi->OPT_length = opt_length; 2486 2487 sin6 = (sin6_t *)&tudi[1]; 2488 bzero(sin6, sizeof (sin6_t)); 2489 sin6->sin6_family = AF_INET6; 2490 sin6->sin6_addr = udp->udp_v6dst; 2491 2492 toh = (struct T_opthdr *)&sin6[1]; 2493 toh->level = IPPROTO_IPV6; 2494 toh->name = IPV6_PATHMTU; 2495 toh->len = opt_length; 2496 toh->status = 0; 2497 2498 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2499 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2500 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2501 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2502 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2503 /* 2504 * We've consumed everything we need from the original 2505 * message. Free it, then send our empty message. 2506 */ 2507 freemsg(mp); 2508 putnext(UDP_RD(q), newmp); 2509 return; 2510 } 2511 case ICMP6_TIME_EXCEEDED: 2512 /* Transient errors */ 2513 break; 2514 case ICMP6_PARAM_PROB: 2515 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2516 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2517 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2518 (uchar_t *)nexthdrp) { 2519 error = ECONNREFUSED; 2520 break; 2521 } 2522 break; 2523 } 2524 if (error == 0) { 2525 freemsg(mp); 2526 return; 2527 } 2528 2529 sin6 = sin6_null; 2530 sin6.sin6_family = AF_INET6; 2531 sin6.sin6_addr = ip6h->ip6_dst; 2532 sin6.sin6_port = udpha->uha_dst_port; 2533 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2534 2535 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2536 error); 2537 if (mp1) 2538 putnext(UDP_RD(q), mp1); 2539 freemsg(mp); 2540 } 2541 2542 /* 2543 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2544 * The local address is filled in if endpoint is bound. The remote address 2545 * is filled in if remote address has been precified ("connected endpoint") 2546 * (The concept of connected CLTS sockets is alien to published TPI 2547 * but we support it anyway). 2548 */ 2549 static void 2550 udp_addr_req(queue_t *q, mblk_t *mp) 2551 { 2552 sin_t *sin; 2553 sin6_t *sin6; 2554 mblk_t *ackmp; 2555 struct T_addr_ack *taa; 2556 udp_t *udp = Q_TO_UDP(q); 2557 2558 /* Make it large enough for worst case */ 2559 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2560 2 * sizeof (sin6_t), 1); 2561 if (ackmp == NULL) { 2562 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2563 return; 2564 } 2565 taa = (struct T_addr_ack *)ackmp->b_rptr; 2566 2567 bzero(taa, sizeof (struct T_addr_ack)); 2568 ackmp->b_wptr = (uchar_t *)&taa[1]; 2569 2570 taa->PRIM_type = T_ADDR_ACK; 2571 ackmp->b_datap->db_type = M_PCPROTO; 2572 /* 2573 * Note: Following code assumes 32 bit alignment of basic 2574 * data structures like sin_t and struct T_addr_ack. 2575 */ 2576 if (udp->udp_state != TS_UNBND) { 2577 /* 2578 * Fill in local address first 2579 */ 2580 taa->LOCADDR_offset = sizeof (*taa); 2581 if (udp->udp_family == AF_INET) { 2582 taa->LOCADDR_length = sizeof (sin_t); 2583 sin = (sin_t *)&taa[1]; 2584 /* Fill zeroes and then initialize non-zero fields */ 2585 *sin = sin_null; 2586 sin->sin_family = AF_INET; 2587 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2588 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2589 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2590 sin->sin_addr.s_addr); 2591 } else { 2592 /* 2593 * INADDR_ANY 2594 * udp_v6src is not set, we might be bound to 2595 * broadcast/multicast. Use udp_bound_v6src as 2596 * local address instead (that could 2597 * also still be INADDR_ANY) 2598 */ 2599 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2600 sin->sin_addr.s_addr); 2601 } 2602 sin->sin_port = udp->udp_port; 2603 ackmp->b_wptr = (uchar_t *)&sin[1]; 2604 if (udp->udp_state == TS_DATA_XFER) { 2605 /* 2606 * connected, fill remote address too 2607 */ 2608 taa->REMADDR_length = sizeof (sin_t); 2609 /* assumed 32-bit alignment */ 2610 taa->REMADDR_offset = taa->LOCADDR_offset + 2611 taa->LOCADDR_length; 2612 2613 sin = (sin_t *)(ackmp->b_rptr + 2614 taa->REMADDR_offset); 2615 /* initialize */ 2616 *sin = sin_null; 2617 sin->sin_family = AF_INET; 2618 sin->sin_addr.s_addr = 2619 V4_PART_OF_V6(udp->udp_v6dst); 2620 sin->sin_port = udp->udp_dstport; 2621 ackmp->b_wptr = (uchar_t *)&sin[1]; 2622 } 2623 } else { 2624 taa->LOCADDR_length = sizeof (sin6_t); 2625 sin6 = (sin6_t *)&taa[1]; 2626 /* Fill zeroes and then initialize non-zero fields */ 2627 *sin6 = sin6_null; 2628 sin6->sin6_family = AF_INET6; 2629 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2630 sin6->sin6_addr = udp->udp_v6src; 2631 } else { 2632 /* 2633 * UNSPECIFIED 2634 * udp_v6src is not set, we might be bound to 2635 * broadcast/multicast. Use udp_bound_v6src as 2636 * local address instead (that could 2637 * also still be UNSPECIFIED) 2638 */ 2639 sin6->sin6_addr = 2640 udp->udp_bound_v6src; 2641 } 2642 sin6->sin6_port = udp->udp_port; 2643 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2644 if (udp->udp_state == TS_DATA_XFER) { 2645 /* 2646 * connected, fill remote address too 2647 */ 2648 taa->REMADDR_length = sizeof (sin6_t); 2649 /* assumed 32-bit alignment */ 2650 taa->REMADDR_offset = taa->LOCADDR_offset + 2651 taa->LOCADDR_length; 2652 2653 sin6 = (sin6_t *)(ackmp->b_rptr + 2654 taa->REMADDR_offset); 2655 /* initialize */ 2656 *sin6 = sin6_null; 2657 sin6->sin6_family = AF_INET6; 2658 sin6->sin6_addr = udp->udp_v6dst; 2659 sin6->sin6_port = udp->udp_dstport; 2660 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2661 } 2662 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2663 } 2664 } 2665 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2666 putnext(UDP_RD(q), ackmp); 2667 } 2668 2669 static void 2670 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2671 { 2672 if (udp->udp_family == AF_INET) { 2673 *tap = udp_g_t_info_ack_ipv4; 2674 } else { 2675 *tap = udp_g_t_info_ack_ipv6; 2676 } 2677 tap->CURRENT_state = udp->udp_state; 2678 tap->OPT_size = udp_max_optsize; 2679 } 2680 2681 /* 2682 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2683 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2684 * udp_g_t_info_ack. The current state of the stream is copied from 2685 * udp_state. 2686 */ 2687 static void 2688 udp_capability_req(queue_t *q, mblk_t *mp) 2689 { 2690 t_uscalar_t cap_bits1; 2691 struct T_capability_ack *tcap; 2692 udp_t *udp = Q_TO_UDP(q); 2693 2694 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2695 2696 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2697 mp->b_datap->db_type, T_CAPABILITY_ACK); 2698 if (!mp) 2699 return; 2700 2701 tcap = (struct T_capability_ack *)mp->b_rptr; 2702 tcap->CAP_bits1 = 0; 2703 2704 if (cap_bits1 & TC1_INFO) { 2705 udp_copy_info(&tcap->INFO_ack, udp); 2706 tcap->CAP_bits1 |= TC1_INFO; 2707 } 2708 2709 putnext(UDP_RD(q), mp); 2710 } 2711 2712 /* 2713 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2714 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2715 * The current state of the stream is copied from udp_state. 2716 */ 2717 static void 2718 udp_info_req(queue_t *q, mblk_t *mp) 2719 { 2720 udp_t *udp = Q_TO_UDP(q); 2721 2722 /* Create a T_INFO_ACK message. */ 2723 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2724 T_INFO_ACK); 2725 if (!mp) 2726 return; 2727 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2728 putnext(UDP_RD(q), mp); 2729 } 2730 2731 /* 2732 * IP recognizes seven kinds of bind requests: 2733 * 2734 * - A zero-length address binds only to the protocol number. 2735 * 2736 * - A 4-byte address is treated as a request to 2737 * validate that the address is a valid local IPv4 2738 * address, appropriate for an application to bind to. 2739 * IP does the verification, but does not make any note 2740 * of the address at this time. 2741 * 2742 * - A 16-byte address contains is treated as a request 2743 * to validate a local IPv6 address, as the 4-byte 2744 * address case above. 2745 * 2746 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2747 * use it for the inbound fanout of packets. 2748 * 2749 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2750 * use it for the inbound fanout of packets. 2751 * 2752 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2753 * information consisting of local and remote addresses 2754 * and ports. In this case, the addresses are both 2755 * validated as appropriate for this operation, and, if 2756 * so, the information is retained for use in the 2757 * inbound fanout. 2758 * 2759 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2760 * fanout information, like the 12-byte case above. 2761 * 2762 * IP will also fill in the IRE request mblk with information 2763 * regarding our peer. In all cases, we notify IP of our protocol 2764 * type by appending a single protocol byte to the bind request. 2765 */ 2766 static mblk_t * 2767 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2768 { 2769 char *cp; 2770 mblk_t *mp; 2771 struct T_bind_req *tbr; 2772 ipa_conn_t *ac; 2773 ipa6_conn_t *ac6; 2774 sin_t *sin; 2775 sin6_t *sin6; 2776 2777 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2778 2779 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2780 if (!mp) 2781 return (mp); 2782 mp->b_datap->db_type = M_PROTO; 2783 tbr = (struct T_bind_req *)mp->b_rptr; 2784 tbr->PRIM_type = bind_prim; 2785 tbr->ADDR_offset = sizeof (*tbr); 2786 tbr->CONIND_number = 0; 2787 tbr->ADDR_length = addr_length; 2788 cp = (char *)&tbr[1]; 2789 switch (addr_length) { 2790 case sizeof (ipa_conn_t): 2791 ASSERT(udp->udp_family == AF_INET); 2792 /* Append a request for an IRE */ 2793 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2794 if (!mp->b_cont) { 2795 freemsg(mp); 2796 return (NULL); 2797 } 2798 mp->b_cont->b_wptr += sizeof (ire_t); 2799 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2800 2801 /* cp known to be 32 bit aligned */ 2802 ac = (ipa_conn_t *)cp; 2803 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2804 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2805 ac->ac_fport = udp->udp_dstport; 2806 ac->ac_lport = udp->udp_port; 2807 break; 2808 2809 case sizeof (ipa6_conn_t): 2810 ASSERT(udp->udp_family == AF_INET6); 2811 /* Append a request for an IRE */ 2812 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2813 if (!mp->b_cont) { 2814 freemsg(mp); 2815 return (NULL); 2816 } 2817 mp->b_cont->b_wptr += sizeof (ire_t); 2818 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2819 2820 /* cp known to be 32 bit aligned */ 2821 ac6 = (ipa6_conn_t *)cp; 2822 ac6->ac6_laddr = udp->udp_v6src; 2823 ac6->ac6_faddr = udp->udp_v6dst; 2824 ac6->ac6_fport = udp->udp_dstport; 2825 ac6->ac6_lport = udp->udp_port; 2826 break; 2827 2828 case sizeof (sin_t): 2829 ASSERT(udp->udp_family == AF_INET); 2830 /* Append a request for an IRE */ 2831 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2832 if (!mp->b_cont) { 2833 freemsg(mp); 2834 return (NULL); 2835 } 2836 mp->b_cont->b_wptr += sizeof (ire_t); 2837 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2838 2839 sin = (sin_t *)cp; 2840 *sin = sin_null; 2841 sin->sin_family = AF_INET; 2842 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2843 sin->sin_port = udp->udp_port; 2844 break; 2845 2846 case sizeof (sin6_t): 2847 ASSERT(udp->udp_family == AF_INET6); 2848 /* Append a request for an IRE */ 2849 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2850 if (!mp->b_cont) { 2851 freemsg(mp); 2852 return (NULL); 2853 } 2854 mp->b_cont->b_wptr += sizeof (ire_t); 2855 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2856 2857 sin6 = (sin6_t *)cp; 2858 *sin6 = sin6_null; 2859 sin6->sin6_family = AF_INET6; 2860 sin6->sin6_addr = udp->udp_bound_v6src; 2861 sin6->sin6_port = udp->udp_port; 2862 break; 2863 } 2864 /* Add protocol number to end */ 2865 cp[addr_length] = (char)IPPROTO_UDP; 2866 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2867 return (mp); 2868 } 2869 2870 /* 2871 * This is the open routine for udp. It allocates a udp_t structure for 2872 * the stream and, on the first open of the module, creates an ND table. 2873 */ 2874 /* ARGSUSED */ 2875 static int 2876 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2877 { 2878 int err; 2879 udp_t *udp; 2880 conn_t *connp; 2881 zoneid_t zoneid = getzoneid(); 2882 queue_t *ip_wq; 2883 char *name; 2884 2885 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2886 2887 /* If the stream is already open, return immediately. */ 2888 if (q->q_ptr != NULL) 2889 return (0); 2890 2891 /* If this is not a push of udp as a module, fail. */ 2892 if (sflag != MODOPEN) 2893 return (EINVAL); 2894 2895 q->q_hiwat = udp_recv_hiwat; 2896 WR(q)->q_hiwat = udp_xmit_hiwat; 2897 WR(q)->q_lowat = udp_xmit_lowat; 2898 2899 /* Insert ourselves in the stream since we're about to walk q_next */ 2900 qprocson(q); 2901 2902 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2903 bzero(udp, sizeof (*udp)); 2904 2905 /* 2906 * UDP is supported only as a module and it has to be pushed directly 2907 * above the device instance of IP. If UDP is pushed anywhere else 2908 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2909 * sake of MIB browsers and fail everything else. 2910 */ 2911 ip_wq = WR(q)->q_next; 2912 if (ip_wq->q_next != NULL || 2913 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2914 strcmp(name, IP_MOD_NAME) != 0 || 2915 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2916 /* Support just SNMP for MIB browsers */ 2917 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2918 connp->conn_rq = q; 2919 connp->conn_wq = WR(q); 2920 connp->conn_flags |= IPCL_UDPMOD; 2921 connp->conn_cred = credp; 2922 connp->conn_zoneid = zoneid; 2923 connp->conn_udp = udp; 2924 udp->udp_connp = connp; 2925 q->q_ptr = WR(q)->q_ptr = connp; 2926 crhold(credp); 2927 q->q_qinfo = &udp_snmp_rinit; 2928 WR(q)->q_qinfo = &udp_snmp_winit; 2929 return (0); 2930 } 2931 2932 /* 2933 * Initialize the udp_t structure for this stream. 2934 */ 2935 q = RD(ip_wq); 2936 connp = Q_TO_CONN(q); 2937 mutex_enter(&connp->conn_lock); 2938 connp->conn_proto = IPPROTO_UDP; 2939 connp->conn_flags |= IPCL_UDP; 2940 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2941 connp->conn_udp = udp; 2942 2943 /* Set the initial state of the stream and the privilege status. */ 2944 udp->udp_connp = connp; 2945 udp->udp_state = TS_UNBND; 2946 udp->udp_mode = UDP_MT_HOT; 2947 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2948 udp->udp_family = AF_INET6; 2949 udp->udp_ipversion = IPV6_VERSION; 2950 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2951 udp->udp_ttl = udp_ipv6_hoplimit; 2952 connp->conn_af_isv6 = B_TRUE; 2953 connp->conn_flags |= IPCL_ISV6; 2954 } else { 2955 udp->udp_family = AF_INET; 2956 udp->udp_ipversion = IPV4_VERSION; 2957 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2958 udp->udp_ttl = udp_ipv4_ttl; 2959 connp->conn_af_isv6 = B_FALSE; 2960 connp->conn_flags &= ~IPCL_ISV6; 2961 } 2962 2963 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2964 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2965 connp->conn_zoneid = zoneid; 2966 2967 /* 2968 * If the caller has the process-wide flag set, then default to MAC 2969 * exempt mode. This allows read-down to unlabeled hosts. 2970 */ 2971 if (getpflags(NET_MAC_AWARE, credp) != 0) 2972 udp->udp_mac_exempt = B_TRUE; 2973 2974 if (connp->conn_flags & IPCL_SOCKET) { 2975 udp->udp_issocket = B_TRUE; 2976 udp->udp_direct_sockfs = B_TRUE; 2977 } 2978 2979 connp->conn_ulp_labeled = is_system_labeled(); 2980 2981 mutex_exit(&connp->conn_lock); 2982 2983 /* 2984 * The transmit hiwat/lowat is only looked at on IP's queue. 2985 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2986 * getsockopts. 2987 */ 2988 q->q_hiwat = udp_recv_hiwat; 2989 WR(q)->q_hiwat = udp_xmit_hiwat; 2990 WR(q)->q_lowat = udp_xmit_lowat; 2991 2992 if (udp->udp_family == AF_INET6) { 2993 /* Build initial header template for transmit */ 2994 if ((err = udp_build_hdrs(q, udp)) != 0) { 2995 error: 2996 qprocsoff(UDP_RD(q)); 2997 udp->udp_connp = NULL; 2998 connp->conn_udp = NULL; 2999 kmem_cache_free(udp_cache, udp); 3000 return (err); 3001 } 3002 } 3003 3004 /* Set the Stream head write offset and high watermark. */ 3005 (void) mi_set_sth_wroff(UDP_RD(q), 3006 udp->udp_max_hdr_len + udp_wroff_extra); 3007 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 3008 3009 WR(UDP_RD(q))->q_qinfo = &udp_winit; 3010 3011 return (0); 3012 } 3013 3014 /* 3015 * Which UDP options OK to set through T_UNITDATA_REQ... 3016 */ 3017 /* ARGSUSED */ 3018 static boolean_t 3019 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 3020 { 3021 return (B_TRUE); 3022 } 3023 3024 /* 3025 * This routine gets default values of certain options whose default 3026 * values are maintained by protcol specific code 3027 */ 3028 /* ARGSUSED */ 3029 int 3030 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3031 { 3032 int *i1 = (int *)ptr; 3033 3034 switch (level) { 3035 case IPPROTO_IP: 3036 switch (name) { 3037 case IP_MULTICAST_TTL: 3038 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3039 return (sizeof (uchar_t)); 3040 case IP_MULTICAST_LOOP: 3041 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3042 return (sizeof (uchar_t)); 3043 } 3044 break; 3045 case IPPROTO_IPV6: 3046 switch (name) { 3047 case IPV6_MULTICAST_HOPS: 3048 *i1 = IP_DEFAULT_MULTICAST_TTL; 3049 return (sizeof (int)); 3050 case IPV6_MULTICAST_LOOP: 3051 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3052 return (sizeof (int)); 3053 case IPV6_UNICAST_HOPS: 3054 *i1 = udp_ipv6_hoplimit; 3055 return (sizeof (int)); 3056 } 3057 break; 3058 } 3059 return (-1); 3060 } 3061 3062 /* 3063 * This routine retrieves the current status of socket options 3064 * and expects the caller to pass in the queue pointer of the 3065 * upper instance. It returns the size of the option retrieved. 3066 */ 3067 int 3068 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3069 { 3070 int *i1 = (int *)ptr; 3071 conn_t *connp; 3072 udp_t *udp; 3073 ip6_pkt_t *ipp; 3074 int len; 3075 3076 q = UDP_WR(q); 3077 connp = Q_TO_CONN(q); 3078 udp = connp->conn_udp; 3079 ipp = &udp->udp_sticky_ipp; 3080 3081 switch (level) { 3082 case SOL_SOCKET: 3083 switch (name) { 3084 case SO_DEBUG: 3085 *i1 = udp->udp_debug; 3086 break; /* goto sizeof (int) option return */ 3087 case SO_REUSEADDR: 3088 *i1 = udp->udp_reuseaddr; 3089 break; /* goto sizeof (int) option return */ 3090 case SO_TYPE: 3091 *i1 = SOCK_DGRAM; 3092 break; /* goto sizeof (int) option return */ 3093 3094 /* 3095 * The following three items are available here, 3096 * but are only meaningful to IP. 3097 */ 3098 case SO_DONTROUTE: 3099 *i1 = udp->udp_dontroute; 3100 break; /* goto sizeof (int) option return */ 3101 case SO_USELOOPBACK: 3102 *i1 = udp->udp_useloopback; 3103 break; /* goto sizeof (int) option return */ 3104 case SO_BROADCAST: 3105 *i1 = udp->udp_broadcast; 3106 break; /* goto sizeof (int) option return */ 3107 3108 case SO_SNDBUF: 3109 *i1 = q->q_hiwat; 3110 break; /* goto sizeof (int) option return */ 3111 case SO_RCVBUF: 3112 *i1 = RD(q)->q_hiwat; 3113 break; /* goto sizeof (int) option return */ 3114 case SO_DGRAM_ERRIND: 3115 *i1 = udp->udp_dgram_errind; 3116 break; /* goto sizeof (int) option return */ 3117 case SO_RECVUCRED: 3118 *i1 = udp->udp_recvucred; 3119 break; /* goto sizeof (int) option return */ 3120 case SO_TIMESTAMP: 3121 *i1 = udp->udp_timestamp; 3122 break; /* goto sizeof (int) option return */ 3123 case SO_ANON_MLP: 3124 *i1 = udp->udp_anon_mlp; 3125 break; /* goto sizeof (int) option return */ 3126 case SO_MAC_EXEMPT: 3127 *i1 = udp->udp_mac_exempt; 3128 break; /* goto sizeof (int) option return */ 3129 case SO_ALLZONES: 3130 *i1 = connp->conn_allzones; 3131 break; /* goto sizeof (int) option return */ 3132 case SO_EXCLBIND: 3133 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3134 break; 3135 default: 3136 return (-1); 3137 } 3138 break; 3139 case IPPROTO_IP: 3140 if (udp->udp_family != AF_INET) 3141 return (-1); 3142 switch (name) { 3143 case IP_OPTIONS: 3144 case T_IP_OPTIONS: 3145 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3146 if (len > 0) { 3147 bcopy(udp->udp_ip_rcv_options + 3148 udp->udp_label_len, ptr, len); 3149 } 3150 return (len); 3151 case IP_TOS: 3152 case T_IP_TOS: 3153 *i1 = (int)udp->udp_type_of_service; 3154 break; /* goto sizeof (int) option return */ 3155 case IP_TTL: 3156 *i1 = (int)udp->udp_ttl; 3157 break; /* goto sizeof (int) option return */ 3158 case IP_NEXTHOP: 3159 /* Handled at IP level */ 3160 return (-EINVAL); 3161 case IP_MULTICAST_IF: 3162 /* 0 address if not set */ 3163 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3164 return (sizeof (ipaddr_t)); 3165 case IP_MULTICAST_TTL: 3166 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3167 return (sizeof (uchar_t)); 3168 case IP_MULTICAST_LOOP: 3169 *ptr = connp->conn_multicast_loop; 3170 return (sizeof (uint8_t)); 3171 case IP_RECVOPTS: 3172 *i1 = udp->udp_recvopts; 3173 break; /* goto sizeof (int) option return */ 3174 case IP_RECVDSTADDR: 3175 *i1 = udp->udp_recvdstaddr; 3176 break; /* goto sizeof (int) option return */ 3177 case IP_RECVIF: 3178 *i1 = udp->udp_recvif; 3179 break; /* goto sizeof (int) option return */ 3180 case IP_RECVSLLA: 3181 *i1 = udp->udp_recvslla; 3182 break; /* goto sizeof (int) option return */ 3183 case IP_RECVTTL: 3184 *i1 = udp->udp_recvttl; 3185 break; /* goto sizeof (int) option return */ 3186 case IP_ADD_MEMBERSHIP: 3187 case IP_DROP_MEMBERSHIP: 3188 case IP_BLOCK_SOURCE: 3189 case IP_UNBLOCK_SOURCE: 3190 case IP_ADD_SOURCE_MEMBERSHIP: 3191 case IP_DROP_SOURCE_MEMBERSHIP: 3192 case MCAST_JOIN_GROUP: 3193 case MCAST_LEAVE_GROUP: 3194 case MCAST_BLOCK_SOURCE: 3195 case MCAST_UNBLOCK_SOURCE: 3196 case MCAST_JOIN_SOURCE_GROUP: 3197 case MCAST_LEAVE_SOURCE_GROUP: 3198 case IP_DONTFAILOVER_IF: 3199 /* cannot "get" the value for these */ 3200 return (-1); 3201 case IP_BOUND_IF: 3202 /* Zero if not set */ 3203 *i1 = udp->udp_bound_if; 3204 break; /* goto sizeof (int) option return */ 3205 case IP_UNSPEC_SRC: 3206 *i1 = udp->udp_unspec_source; 3207 break; /* goto sizeof (int) option return */ 3208 case IP_XMIT_IF: 3209 *i1 = udp->udp_xmit_if; 3210 break; /* goto sizeof (int) option return */ 3211 default: 3212 return (-1); 3213 } 3214 break; 3215 case IPPROTO_IPV6: 3216 if (udp->udp_family != AF_INET6) 3217 return (-1); 3218 switch (name) { 3219 case IPV6_UNICAST_HOPS: 3220 *i1 = (unsigned int)udp->udp_ttl; 3221 break; /* goto sizeof (int) option return */ 3222 case IPV6_MULTICAST_IF: 3223 /* 0 index if not set */ 3224 *i1 = udp->udp_multicast_if_index; 3225 break; /* goto sizeof (int) option return */ 3226 case IPV6_MULTICAST_HOPS: 3227 *i1 = udp->udp_multicast_ttl; 3228 break; /* goto sizeof (int) option return */ 3229 case IPV6_MULTICAST_LOOP: 3230 *i1 = connp->conn_multicast_loop; 3231 break; /* goto sizeof (int) option return */ 3232 case IPV6_JOIN_GROUP: 3233 case IPV6_LEAVE_GROUP: 3234 case MCAST_JOIN_GROUP: 3235 case MCAST_LEAVE_GROUP: 3236 case MCAST_BLOCK_SOURCE: 3237 case MCAST_UNBLOCK_SOURCE: 3238 case MCAST_JOIN_SOURCE_GROUP: 3239 case MCAST_LEAVE_SOURCE_GROUP: 3240 /* cannot "get" the value for these */ 3241 return (-1); 3242 case IPV6_BOUND_IF: 3243 /* Zero if not set */ 3244 *i1 = udp->udp_bound_if; 3245 break; /* goto sizeof (int) option return */ 3246 case IPV6_UNSPEC_SRC: 3247 *i1 = udp->udp_unspec_source; 3248 break; /* goto sizeof (int) option return */ 3249 case IPV6_RECVPKTINFO: 3250 *i1 = udp->udp_ipv6_recvpktinfo; 3251 break; /* goto sizeof (int) option return */ 3252 case IPV6_RECVTCLASS: 3253 *i1 = udp->udp_ipv6_recvtclass; 3254 break; /* goto sizeof (int) option return */ 3255 case IPV6_RECVPATHMTU: 3256 *i1 = udp->udp_ipv6_recvpathmtu; 3257 break; /* goto sizeof (int) option return */ 3258 case IPV6_RECVHOPLIMIT: 3259 *i1 = udp->udp_ipv6_recvhoplimit; 3260 break; /* goto sizeof (int) option return */ 3261 case IPV6_RECVHOPOPTS: 3262 *i1 = udp->udp_ipv6_recvhopopts; 3263 break; /* goto sizeof (int) option return */ 3264 case IPV6_RECVDSTOPTS: 3265 *i1 = udp->udp_ipv6_recvdstopts; 3266 break; /* goto sizeof (int) option return */ 3267 case _OLD_IPV6_RECVDSTOPTS: 3268 *i1 = udp->udp_old_ipv6_recvdstopts; 3269 break; /* goto sizeof (int) option return */ 3270 case IPV6_RECVRTHDRDSTOPTS: 3271 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3272 break; /* goto sizeof (int) option return */ 3273 case IPV6_RECVRTHDR: 3274 *i1 = udp->udp_ipv6_recvrthdr; 3275 break; /* goto sizeof (int) option return */ 3276 case IPV6_PKTINFO: { 3277 /* XXX assumes that caller has room for max size! */ 3278 struct in6_pktinfo *pkti; 3279 3280 pkti = (struct in6_pktinfo *)ptr; 3281 if (ipp->ipp_fields & IPPF_IFINDEX) 3282 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3283 else 3284 pkti->ipi6_ifindex = 0; 3285 if (ipp->ipp_fields & IPPF_ADDR) 3286 pkti->ipi6_addr = ipp->ipp_addr; 3287 else 3288 pkti->ipi6_addr = ipv6_all_zeros; 3289 return (sizeof (struct in6_pktinfo)); 3290 } 3291 case IPV6_TCLASS: 3292 if (ipp->ipp_fields & IPPF_TCLASS) 3293 *i1 = ipp->ipp_tclass; 3294 else 3295 *i1 = IPV6_FLOW_TCLASS( 3296 IPV6_DEFAULT_VERS_AND_FLOW); 3297 break; /* goto sizeof (int) option return */ 3298 case IPV6_NEXTHOP: { 3299 sin6_t *sin6 = (sin6_t *)ptr; 3300 3301 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3302 return (0); 3303 *sin6 = sin6_null; 3304 sin6->sin6_family = AF_INET6; 3305 sin6->sin6_addr = ipp->ipp_nexthop; 3306 return (sizeof (sin6_t)); 3307 } 3308 case IPV6_HOPOPTS: 3309 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3310 return (0); 3311 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3312 return (0); 3313 /* 3314 * The cipso/label option is added by kernel. 3315 * User is not usually aware of this option. 3316 * We copy out the hbh opt after the label option. 3317 */ 3318 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3319 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3320 if (udp->udp_label_len_v6 > 0) { 3321 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3322 ptr[1] = (ipp->ipp_hopoptslen - 3323 udp->udp_label_len_v6 + 7) / 8 - 1; 3324 } 3325 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3326 case IPV6_RTHDRDSTOPTS: 3327 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3328 return (0); 3329 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3330 return (ipp->ipp_rtdstoptslen); 3331 case IPV6_RTHDR: 3332 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3333 return (0); 3334 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3335 return (ipp->ipp_rthdrlen); 3336 case IPV6_DSTOPTS: 3337 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3338 return (0); 3339 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3340 return (ipp->ipp_dstoptslen); 3341 case IPV6_PATHMTU: 3342 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3343 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3344 default: 3345 return (-1); 3346 } 3347 break; 3348 case IPPROTO_UDP: 3349 switch (name) { 3350 case UDP_ANONPRIVBIND: 3351 *i1 = udp->udp_anon_priv_bind; 3352 break; 3353 case UDP_EXCLBIND: 3354 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3355 break; 3356 case UDP_RCVHDR: 3357 *i1 = udp->udp_rcvhdr ? 1 : 0; 3358 break; 3359 default: 3360 return (-1); 3361 } 3362 break; 3363 default: 3364 return (-1); 3365 } 3366 return (sizeof (int)); 3367 } 3368 3369 /* 3370 * This routine sets socket options; it expects the caller 3371 * to pass in the queue pointer of the upper instance. 3372 */ 3373 /* ARGSUSED */ 3374 int 3375 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3376 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3377 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3378 { 3379 udpattrs_t *attrs = thisdg_attrs; 3380 int *i1 = (int *)invalp; 3381 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3382 boolean_t checkonly; 3383 int error; 3384 conn_t *connp; 3385 udp_t *udp; 3386 uint_t newlen; 3387 3388 q = UDP_WR(q); 3389 connp = Q_TO_CONN(q); 3390 udp = connp->conn_udp; 3391 3392 switch (optset_context) { 3393 case SETFN_OPTCOM_CHECKONLY: 3394 checkonly = B_TRUE; 3395 /* 3396 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3397 * inlen != 0 implies value supplied and 3398 * we have to "pretend" to set it. 3399 * inlen == 0 implies that there is no 3400 * value part in T_CHECK request and just validation 3401 * done elsewhere should be enough, we just return here. 3402 */ 3403 if (inlen == 0) { 3404 *outlenp = 0; 3405 return (0); 3406 } 3407 break; 3408 case SETFN_OPTCOM_NEGOTIATE: 3409 checkonly = B_FALSE; 3410 break; 3411 case SETFN_UD_NEGOTIATE: 3412 case SETFN_CONN_NEGOTIATE: 3413 checkonly = B_FALSE; 3414 /* 3415 * Negotiating local and "association-related" options 3416 * through T_UNITDATA_REQ. 3417 * 3418 * Following routine can filter out ones we do not 3419 * want to be "set" this way. 3420 */ 3421 if (!udp_opt_allow_udr_set(level, name)) { 3422 *outlenp = 0; 3423 return (EINVAL); 3424 } 3425 break; 3426 default: 3427 /* 3428 * We should never get here 3429 */ 3430 *outlenp = 0; 3431 return (EINVAL); 3432 } 3433 3434 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3435 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3436 3437 /* 3438 * For fixed length options, no sanity check 3439 * of passed in length is done. It is assumed *_optcom_req() 3440 * routines do the right thing. 3441 */ 3442 3443 switch (level) { 3444 case SOL_SOCKET: 3445 switch (name) { 3446 case SO_REUSEADDR: 3447 if (!checkonly) 3448 udp->udp_reuseaddr = onoff; 3449 break; 3450 case SO_DEBUG: 3451 if (!checkonly) 3452 udp->udp_debug = onoff; 3453 break; 3454 /* 3455 * The following three items are available here, 3456 * but are only meaningful to IP. 3457 */ 3458 case SO_DONTROUTE: 3459 if (!checkonly) 3460 udp->udp_dontroute = onoff; 3461 break; 3462 case SO_USELOOPBACK: 3463 if (!checkonly) 3464 udp->udp_useloopback = onoff; 3465 break; 3466 case SO_BROADCAST: 3467 if (!checkonly) 3468 udp->udp_broadcast = onoff; 3469 break; 3470 3471 case SO_SNDBUF: 3472 if (*i1 > udp_max_buf) { 3473 *outlenp = 0; 3474 return (ENOBUFS); 3475 } 3476 if (!checkonly) { 3477 q->q_hiwat = *i1; 3478 WR(UDP_RD(q))->q_hiwat = *i1; 3479 } 3480 break; 3481 case SO_RCVBUF: 3482 if (*i1 > udp_max_buf) { 3483 *outlenp = 0; 3484 return (ENOBUFS); 3485 } 3486 if (!checkonly) { 3487 RD(q)->q_hiwat = *i1; 3488 UDP_RD(q)->q_hiwat = *i1; 3489 (void) mi_set_sth_hiwat(UDP_RD(q), 3490 udp_set_rcv_hiwat(udp, *i1)); 3491 } 3492 break; 3493 case SO_DGRAM_ERRIND: 3494 if (!checkonly) 3495 udp->udp_dgram_errind = onoff; 3496 break; 3497 case SO_RECVUCRED: 3498 if (!checkonly) 3499 udp->udp_recvucred = onoff; 3500 break; 3501 case SO_ALLZONES: 3502 /* 3503 * "soft" error (negative) 3504 * option not handled at this level 3505 * Do not modify *outlenp. 3506 */ 3507 return (-EINVAL); 3508 case SO_TIMESTAMP: 3509 if (!checkonly) 3510 udp->udp_timestamp = onoff; 3511 break; 3512 case SO_ANON_MLP: 3513 if (!checkonly) 3514 udp->udp_anon_mlp = onoff; 3515 break; 3516 case SO_MAC_EXEMPT: 3517 if (secpolicy_net_mac_aware(cr) != 0 || 3518 udp->udp_state != TS_UNBND) 3519 return (EACCES); 3520 if (!checkonly) 3521 udp->udp_mac_exempt = onoff; 3522 break; 3523 case SCM_UCRED: { 3524 struct ucred_s *ucr; 3525 cred_t *cr, *newcr; 3526 ts_label_t *tsl; 3527 3528 /* 3529 * Only sockets that have proper privileges and are 3530 * bound to MLPs will have any other value here, so 3531 * this implicitly tests for privilege to set label. 3532 */ 3533 if (connp->conn_mlp_type == mlptSingle) 3534 break; 3535 ucr = (struct ucred_s *)invalp; 3536 if (inlen != ucredsize || 3537 ucr->uc_labeloff < sizeof (*ucr) || 3538 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3539 return (EINVAL); 3540 if (!checkonly) { 3541 mblk_t *mb; 3542 3543 if (attrs == NULL || 3544 (mb = attrs->udpattr_mb) == NULL) 3545 return (EINVAL); 3546 if ((cr = DB_CRED(mb)) == NULL) 3547 cr = udp->udp_connp->conn_cred; 3548 ASSERT(cr != NULL); 3549 if ((tsl = crgetlabel(cr)) == NULL) 3550 return (EINVAL); 3551 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3552 tsl->tsl_doi, KM_NOSLEEP); 3553 if (newcr == NULL) 3554 return (ENOSR); 3555 mblk_setcred(mb, newcr); 3556 attrs->udpattr_credset = B_TRUE; 3557 crfree(newcr); 3558 } 3559 break; 3560 } 3561 case SO_EXCLBIND: 3562 if (!checkonly) 3563 udp->udp_exclbind = onoff; 3564 break; 3565 default: 3566 *outlenp = 0; 3567 return (EINVAL); 3568 } 3569 break; 3570 case IPPROTO_IP: 3571 if (udp->udp_family != AF_INET) { 3572 *outlenp = 0; 3573 return (ENOPROTOOPT); 3574 } 3575 switch (name) { 3576 case IP_OPTIONS: 3577 case T_IP_OPTIONS: 3578 /* Save options for use by IP. */ 3579 newlen = inlen + udp->udp_label_len; 3580 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3581 *outlenp = 0; 3582 return (EINVAL); 3583 } 3584 if (checkonly) 3585 break; 3586 3587 if (!tsol_option_set(&udp->udp_ip_snd_options, 3588 &udp->udp_ip_snd_options_len, 3589 udp->udp_label_len, invalp, inlen)) { 3590 *outlenp = 0; 3591 return (ENOMEM); 3592 } 3593 3594 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3595 UDPH_SIZE + udp->udp_ip_snd_options_len; 3596 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3597 udp_wroff_extra); 3598 break; 3599 3600 case IP_TTL: 3601 if (!checkonly) { 3602 udp->udp_ttl = (uchar_t)*i1; 3603 } 3604 break; 3605 case IP_TOS: 3606 case T_IP_TOS: 3607 if (!checkonly) { 3608 udp->udp_type_of_service = (uchar_t)*i1; 3609 } 3610 break; 3611 case IP_MULTICAST_IF: { 3612 /* 3613 * TODO should check OPTMGMT reply and undo this if 3614 * there is an error. 3615 */ 3616 struct in_addr *inap = (struct in_addr *)invalp; 3617 if (!checkonly) { 3618 udp->udp_multicast_if_addr = 3619 inap->s_addr; 3620 } 3621 break; 3622 } 3623 case IP_MULTICAST_TTL: 3624 if (!checkonly) 3625 udp->udp_multicast_ttl = *invalp; 3626 break; 3627 case IP_MULTICAST_LOOP: 3628 if (!checkonly) 3629 connp->conn_multicast_loop = *invalp; 3630 break; 3631 case IP_RECVOPTS: 3632 if (!checkonly) 3633 udp->udp_recvopts = onoff; 3634 break; 3635 case IP_RECVDSTADDR: 3636 if (!checkonly) 3637 udp->udp_recvdstaddr = onoff; 3638 break; 3639 case IP_RECVIF: 3640 if (!checkonly) 3641 udp->udp_recvif = onoff; 3642 break; 3643 case IP_RECVSLLA: 3644 if (!checkonly) 3645 udp->udp_recvslla = onoff; 3646 break; 3647 case IP_RECVTTL: 3648 if (!checkonly) 3649 udp->udp_recvttl = onoff; 3650 break; 3651 case IP_ADD_MEMBERSHIP: 3652 case IP_DROP_MEMBERSHIP: 3653 case IP_BLOCK_SOURCE: 3654 case IP_UNBLOCK_SOURCE: 3655 case IP_ADD_SOURCE_MEMBERSHIP: 3656 case IP_DROP_SOURCE_MEMBERSHIP: 3657 case MCAST_JOIN_GROUP: 3658 case MCAST_LEAVE_GROUP: 3659 case MCAST_BLOCK_SOURCE: 3660 case MCAST_UNBLOCK_SOURCE: 3661 case MCAST_JOIN_SOURCE_GROUP: 3662 case MCAST_LEAVE_SOURCE_GROUP: 3663 case IP_SEC_OPT: 3664 case IP_NEXTHOP: 3665 /* 3666 * "soft" error (negative) 3667 * option not handled at this level 3668 * Do not modify *outlenp. 3669 */ 3670 return (-EINVAL); 3671 case IP_BOUND_IF: 3672 if (!checkonly) 3673 udp->udp_bound_if = *i1; 3674 break; 3675 case IP_UNSPEC_SRC: 3676 if (!checkonly) 3677 udp->udp_unspec_source = onoff; 3678 break; 3679 case IP_XMIT_IF: 3680 if (!checkonly) 3681 udp->udp_xmit_if = *i1; 3682 break; 3683 default: 3684 *outlenp = 0; 3685 return (EINVAL); 3686 } 3687 break; 3688 case IPPROTO_IPV6: { 3689 ip6_pkt_t *ipp; 3690 boolean_t sticky; 3691 3692 if (udp->udp_family != AF_INET6) { 3693 *outlenp = 0; 3694 return (ENOPROTOOPT); 3695 } 3696 /* 3697 * Deal with both sticky options and ancillary data 3698 */ 3699 sticky = B_FALSE; 3700 if (attrs == NULL || (ipp = attrs->udpattr_ipp) == NULL) { 3701 /* sticky options, or none */ 3702 ipp = &udp->udp_sticky_ipp; 3703 sticky = B_TRUE; 3704 } 3705 3706 switch (name) { 3707 case IPV6_MULTICAST_IF: 3708 if (!checkonly) 3709 udp->udp_multicast_if_index = *i1; 3710 break; 3711 case IPV6_UNICAST_HOPS: 3712 /* -1 means use default */ 3713 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3714 *outlenp = 0; 3715 return (EINVAL); 3716 } 3717 if (!checkonly) { 3718 if (*i1 == -1) { 3719 udp->udp_ttl = ipp->ipp_unicast_hops = 3720 udp_ipv6_hoplimit; 3721 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3722 /* Pass modified value to IP. */ 3723 *i1 = udp->udp_ttl; 3724 } else { 3725 udp->udp_ttl = ipp->ipp_unicast_hops = 3726 (uint8_t)*i1; 3727 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3728 } 3729 /* Rebuild the header template */ 3730 error = udp_build_hdrs(q, udp); 3731 if (error != 0) { 3732 *outlenp = 0; 3733 return (error); 3734 } 3735 } 3736 break; 3737 case IPV6_MULTICAST_HOPS: 3738 /* -1 means use default */ 3739 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3740 *outlenp = 0; 3741 return (EINVAL); 3742 } 3743 if (!checkonly) { 3744 if (*i1 == -1) { 3745 udp->udp_multicast_ttl = 3746 ipp->ipp_multicast_hops = 3747 IP_DEFAULT_MULTICAST_TTL; 3748 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3749 /* Pass modified value to IP. */ 3750 *i1 = udp->udp_multicast_ttl; 3751 } else { 3752 udp->udp_multicast_ttl = 3753 ipp->ipp_multicast_hops = 3754 (uint8_t)*i1; 3755 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3756 } 3757 } 3758 break; 3759 case IPV6_MULTICAST_LOOP: 3760 if (*i1 != 0 && *i1 != 1) { 3761 *outlenp = 0; 3762 return (EINVAL); 3763 } 3764 if (!checkonly) 3765 connp->conn_multicast_loop = *i1; 3766 break; 3767 case IPV6_JOIN_GROUP: 3768 case IPV6_LEAVE_GROUP: 3769 case MCAST_JOIN_GROUP: 3770 case MCAST_LEAVE_GROUP: 3771 case MCAST_BLOCK_SOURCE: 3772 case MCAST_UNBLOCK_SOURCE: 3773 case MCAST_JOIN_SOURCE_GROUP: 3774 case MCAST_LEAVE_SOURCE_GROUP: 3775 /* 3776 * "soft" error (negative) 3777 * option not handled at this level 3778 * Note: Do not modify *outlenp 3779 */ 3780 return (-EINVAL); 3781 case IPV6_BOUND_IF: 3782 if (!checkonly) 3783 udp->udp_bound_if = *i1; 3784 break; 3785 case IPV6_UNSPEC_SRC: 3786 if (!checkonly) 3787 udp->udp_unspec_source = onoff; 3788 break; 3789 /* 3790 * Set boolean switches for ancillary data delivery 3791 */ 3792 case IPV6_RECVPKTINFO: 3793 if (!checkonly) 3794 udp->udp_ipv6_recvpktinfo = onoff; 3795 break; 3796 case IPV6_RECVTCLASS: 3797 if (!checkonly) { 3798 udp->udp_ipv6_recvtclass = onoff; 3799 } 3800 break; 3801 case IPV6_RECVPATHMTU: 3802 if (!checkonly) { 3803 udp->udp_ipv6_recvpathmtu = onoff; 3804 } 3805 break; 3806 case IPV6_RECVHOPLIMIT: 3807 if (!checkonly) 3808 udp->udp_ipv6_recvhoplimit = onoff; 3809 break; 3810 case IPV6_RECVHOPOPTS: 3811 if (!checkonly) 3812 udp->udp_ipv6_recvhopopts = onoff; 3813 break; 3814 case IPV6_RECVDSTOPTS: 3815 if (!checkonly) 3816 udp->udp_ipv6_recvdstopts = onoff; 3817 break; 3818 case _OLD_IPV6_RECVDSTOPTS: 3819 if (!checkonly) 3820 udp->udp_old_ipv6_recvdstopts = onoff; 3821 break; 3822 case IPV6_RECVRTHDRDSTOPTS: 3823 if (!checkonly) 3824 udp->udp_ipv6_recvrthdrdstopts = onoff; 3825 break; 3826 case IPV6_RECVRTHDR: 3827 if (!checkonly) 3828 udp->udp_ipv6_recvrthdr = onoff; 3829 break; 3830 /* 3831 * Set sticky options or ancillary data. 3832 * If sticky options, (re)build any extension headers 3833 * that might be needed as a result. 3834 */ 3835 case IPV6_PKTINFO: 3836 /* 3837 * The source address and ifindex are verified 3838 * in ip_opt_set(). For ancillary data the 3839 * source address is checked in ip_wput_v6. 3840 */ 3841 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3842 return (EINVAL); 3843 if (checkonly) 3844 break; 3845 3846 if (inlen == 0) { 3847 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3848 ipp->ipp_sticky_ignored |= 3849 (IPPF_IFINDEX|IPPF_ADDR); 3850 } else { 3851 struct in6_pktinfo *pkti; 3852 3853 pkti = (struct in6_pktinfo *)invalp; 3854 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3855 ipp->ipp_addr = pkti->ipi6_addr; 3856 if (ipp->ipp_ifindex != 0) 3857 ipp->ipp_fields |= IPPF_IFINDEX; 3858 else 3859 ipp->ipp_fields &= ~IPPF_IFINDEX; 3860 if (!IN6_IS_ADDR_UNSPECIFIED( 3861 &ipp->ipp_addr)) 3862 ipp->ipp_fields |= IPPF_ADDR; 3863 else 3864 ipp->ipp_fields &= ~IPPF_ADDR; 3865 } 3866 if (sticky) { 3867 error = udp_build_hdrs(q, udp); 3868 if (error != 0) 3869 return (error); 3870 } 3871 break; 3872 case IPV6_HOPLIMIT: 3873 if (sticky) 3874 return (EINVAL); 3875 if (inlen != 0 && inlen != sizeof (int)) 3876 return (EINVAL); 3877 if (checkonly) 3878 break; 3879 3880 if (inlen == 0) { 3881 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3882 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3883 } else { 3884 if (*i1 > 255 || *i1 < -1) 3885 return (EINVAL); 3886 if (*i1 == -1) 3887 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3888 else 3889 ipp->ipp_hoplimit = *i1; 3890 ipp->ipp_fields |= IPPF_HOPLIMIT; 3891 } 3892 break; 3893 case IPV6_TCLASS: 3894 if (inlen != 0 && inlen != sizeof (int)) 3895 return (EINVAL); 3896 if (checkonly) 3897 break; 3898 3899 if (inlen == 0) { 3900 ipp->ipp_fields &= ~IPPF_TCLASS; 3901 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3902 } else { 3903 if (*i1 > 255 || *i1 < -1) 3904 return (EINVAL); 3905 if (*i1 == -1) 3906 ipp->ipp_tclass = 0; 3907 else 3908 ipp->ipp_tclass = *i1; 3909 ipp->ipp_fields |= IPPF_TCLASS; 3910 } 3911 if (sticky) { 3912 error = udp_build_hdrs(q, udp); 3913 if (error != 0) 3914 return (error); 3915 } 3916 break; 3917 case IPV6_NEXTHOP: 3918 /* 3919 * IP will verify that the nexthop is reachable 3920 * and fail for sticky options. 3921 */ 3922 if (inlen != 0 && inlen != sizeof (sin6_t)) 3923 return (EINVAL); 3924 if (checkonly) 3925 break; 3926 3927 if (inlen == 0) { 3928 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3929 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3930 } else { 3931 sin6_t *sin6 = (sin6_t *)invalp; 3932 3933 if (sin6->sin6_family != AF_INET6) 3934 return (EAFNOSUPPORT); 3935 if (IN6_IS_ADDR_V4MAPPED( 3936 &sin6->sin6_addr)) 3937 return (EADDRNOTAVAIL); 3938 ipp->ipp_nexthop = sin6->sin6_addr; 3939 if (!IN6_IS_ADDR_UNSPECIFIED( 3940 &ipp->ipp_nexthop)) 3941 ipp->ipp_fields |= IPPF_NEXTHOP; 3942 else 3943 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3944 } 3945 if (sticky) { 3946 error = udp_build_hdrs(q, udp); 3947 if (error != 0) 3948 return (error); 3949 } 3950 break; 3951 case IPV6_HOPOPTS: { 3952 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3953 /* 3954 * Sanity checks - minimum size, size a multiple of 3955 * eight bytes, and matching size passed in. 3956 */ 3957 if (inlen != 0 && 3958 inlen != (8 * (hopts->ip6h_len + 1))) 3959 return (EINVAL); 3960 3961 if (checkonly) 3962 break; 3963 3964 error = optcom_pkt_set(invalp, inlen, sticky, 3965 (uchar_t **)&ipp->ipp_hopopts, 3966 &ipp->ipp_hopoptslen, 3967 sticky ? udp->udp_label_len_v6 : 0); 3968 if (error != 0) 3969 return (error); 3970 if (ipp->ipp_hopoptslen == 0) { 3971 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3972 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3973 } else { 3974 ipp->ipp_fields |= IPPF_HOPOPTS; 3975 } 3976 if (sticky) { 3977 error = udp_build_hdrs(q, udp); 3978 if (error != 0) 3979 return (error); 3980 } 3981 break; 3982 } 3983 case IPV6_RTHDRDSTOPTS: { 3984 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3985 3986 /* 3987 * Sanity checks - minimum size, size a multiple of 3988 * eight bytes, and matching size passed in. 3989 */ 3990 if (inlen != 0 && 3991 inlen != (8 * (dopts->ip6d_len + 1))) 3992 return (EINVAL); 3993 3994 if (checkonly) 3995 break; 3996 3997 if (inlen == 0) { 3998 if (sticky && 3999 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4000 kmem_free(ipp->ipp_rtdstopts, 4001 ipp->ipp_rtdstoptslen); 4002 ipp->ipp_rtdstopts = NULL; 4003 ipp->ipp_rtdstoptslen = 0; 4004 } 4005 4006 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4007 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4008 } else { 4009 error = optcom_pkt_set(invalp, inlen, sticky, 4010 (uchar_t **)&ipp->ipp_rtdstopts, 4011 &ipp->ipp_rtdstoptslen, 0); 4012 if (error != 0) 4013 return (error); 4014 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4015 } 4016 if (sticky) { 4017 error = udp_build_hdrs(q, udp); 4018 if (error != 0) 4019 return (error); 4020 } 4021 break; 4022 } 4023 case IPV6_DSTOPTS: { 4024 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4025 4026 /* 4027 * Sanity checks - minimum size, size a multiple of 4028 * eight bytes, and matching size passed in. 4029 */ 4030 if (inlen != 0 && 4031 inlen != (8 * (dopts->ip6d_len + 1))) 4032 return (EINVAL); 4033 4034 if (checkonly) 4035 break; 4036 4037 if (inlen == 0) { 4038 if (sticky && 4039 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4040 kmem_free(ipp->ipp_dstopts, 4041 ipp->ipp_dstoptslen); 4042 ipp->ipp_dstopts = NULL; 4043 ipp->ipp_dstoptslen = 0; 4044 } 4045 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4046 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4047 } else { 4048 error = optcom_pkt_set(invalp, inlen, sticky, 4049 (uchar_t **)&ipp->ipp_dstopts, 4050 &ipp->ipp_dstoptslen, 0); 4051 if (error != 0) 4052 return (error); 4053 ipp->ipp_fields |= IPPF_DSTOPTS; 4054 } 4055 if (sticky) { 4056 error = udp_build_hdrs(q, udp); 4057 if (error != 0) 4058 return (error); 4059 } 4060 break; 4061 } 4062 case IPV6_RTHDR: { 4063 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4064 4065 /* 4066 * Sanity checks - minimum size, size a multiple of 4067 * eight bytes, and matching size passed in. 4068 */ 4069 if (inlen != 0 && 4070 inlen != (8 * (rt->ip6r_len + 1))) 4071 return (EINVAL); 4072 4073 if (checkonly) 4074 break; 4075 4076 if (inlen == 0) { 4077 if (sticky && 4078 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4079 kmem_free(ipp->ipp_rthdr, 4080 ipp->ipp_rthdrlen); 4081 ipp->ipp_rthdr = NULL; 4082 ipp->ipp_rthdrlen = 0; 4083 } 4084 ipp->ipp_fields &= ~IPPF_RTHDR; 4085 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4086 } else { 4087 error = optcom_pkt_set(invalp, inlen, sticky, 4088 (uchar_t **)&ipp->ipp_rthdr, 4089 &ipp->ipp_rthdrlen, 0); 4090 if (error != 0) 4091 return (error); 4092 ipp->ipp_fields |= IPPF_RTHDR; 4093 } 4094 if (sticky) { 4095 error = udp_build_hdrs(q, udp); 4096 if (error != 0) 4097 return (error); 4098 } 4099 break; 4100 } 4101 4102 case IPV6_DONTFRAG: 4103 if (checkonly) 4104 break; 4105 4106 if (onoff) { 4107 ipp->ipp_fields |= IPPF_DONTFRAG; 4108 } else { 4109 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4110 } 4111 break; 4112 4113 case IPV6_USE_MIN_MTU: 4114 if (inlen != sizeof (int)) 4115 return (EINVAL); 4116 4117 if (*i1 < -1 || *i1 > 1) 4118 return (EINVAL); 4119 4120 if (checkonly) 4121 break; 4122 4123 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4124 ipp->ipp_use_min_mtu = *i1; 4125 break; 4126 4127 case IPV6_BOUND_PIF: 4128 case IPV6_SEC_OPT: 4129 case IPV6_DONTFAILOVER_IF: 4130 case IPV6_SRC_PREFERENCES: 4131 case IPV6_V6ONLY: 4132 /* Handled at the IP level */ 4133 return (-EINVAL); 4134 default: 4135 *outlenp = 0; 4136 return (EINVAL); 4137 } 4138 break; 4139 } /* end IPPROTO_IPV6 */ 4140 case IPPROTO_UDP: 4141 switch (name) { 4142 case UDP_ANONPRIVBIND: 4143 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4144 *outlenp = 0; 4145 return (error); 4146 } 4147 if (!checkonly) { 4148 udp->udp_anon_priv_bind = onoff; 4149 } 4150 break; 4151 case UDP_EXCLBIND: 4152 if (!checkonly) 4153 udp->udp_exclbind = onoff; 4154 break; 4155 case UDP_RCVHDR: 4156 if (!checkonly) 4157 udp->udp_rcvhdr = onoff; 4158 break; 4159 default: 4160 *outlenp = 0; 4161 return (EINVAL); 4162 } 4163 break; 4164 default: 4165 *outlenp = 0; 4166 return (EINVAL); 4167 } 4168 /* 4169 * Common case of OK return with outval same as inval. 4170 */ 4171 if (invalp != outvalp) { 4172 /* don't trust bcopy for identical src/dst */ 4173 (void) bcopy(invalp, outvalp, inlen); 4174 } 4175 *outlenp = inlen; 4176 return (0); 4177 } 4178 4179 /* 4180 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4181 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4182 * headers, and the udp header. 4183 * Returns failure if can't allocate memory. 4184 */ 4185 static int 4186 udp_build_hdrs(queue_t *q, udp_t *udp) 4187 { 4188 uchar_t *hdrs; 4189 uint_t hdrs_len; 4190 ip6_t *ip6h; 4191 ip6i_t *ip6i; 4192 udpha_t *udpha; 4193 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4194 4195 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4196 ASSERT(hdrs_len != 0); 4197 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4198 /* Need to reallocate */ 4199 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4200 if (hdrs == NULL) 4201 return (ENOMEM); 4202 4203 if (udp->udp_sticky_hdrs_len != 0) { 4204 kmem_free(udp->udp_sticky_hdrs, 4205 udp->udp_sticky_hdrs_len); 4206 } 4207 udp->udp_sticky_hdrs = hdrs; 4208 udp->udp_sticky_hdrs_len = hdrs_len; 4209 } 4210 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4211 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4212 4213 /* Set header fields not in ipp */ 4214 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4215 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4216 ip6h = (ip6_t *)&ip6i[1]; 4217 } else { 4218 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4219 } 4220 4221 if (!(ipp->ipp_fields & IPPF_ADDR)) 4222 ip6h->ip6_src = udp->udp_v6src; 4223 4224 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4225 udpha->uha_src_port = udp->udp_port; 4226 4227 /* Try to get everything in a single mblk */ 4228 if (hdrs_len > udp->udp_max_hdr_len) { 4229 udp->udp_max_hdr_len = hdrs_len; 4230 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4231 udp_wroff_extra); 4232 } 4233 return (0); 4234 } 4235 4236 /* 4237 * This routine retrieves the value of an ND variable in a udpparam_t 4238 * structure. It is called through nd_getset when a user reads the 4239 * variable. 4240 */ 4241 /* ARGSUSED */ 4242 static int 4243 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4244 { 4245 udpparam_t *udppa = (udpparam_t *)cp; 4246 4247 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4248 return (0); 4249 } 4250 4251 /* 4252 * Walk through the param array specified registering each element with the 4253 * named dispatch (ND) handler. 4254 */ 4255 static boolean_t 4256 udp_param_register(udpparam_t *udppa, int cnt) 4257 { 4258 for (; cnt-- > 0; udppa++) { 4259 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4260 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4261 udp_param_get, udp_param_set, 4262 (caddr_t)udppa)) { 4263 nd_free(&udp_g_nd); 4264 return (B_FALSE); 4265 } 4266 } 4267 } 4268 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4269 udp_extra_priv_ports_get, NULL, NULL)) { 4270 nd_free(&udp_g_nd); 4271 return (B_FALSE); 4272 } 4273 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4274 NULL, udp_extra_priv_ports_add, NULL)) { 4275 nd_free(&udp_g_nd); 4276 return (B_FALSE); 4277 } 4278 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4279 NULL, udp_extra_priv_ports_del, NULL)) { 4280 nd_free(&udp_g_nd); 4281 return (B_FALSE); 4282 } 4283 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4284 NULL)) { 4285 nd_free(&udp_g_nd); 4286 return (B_FALSE); 4287 } 4288 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4289 NULL)) { 4290 nd_free(&udp_g_nd); 4291 return (B_FALSE); 4292 } 4293 return (B_TRUE); 4294 } 4295 4296 /* This routine sets an ND variable in a udpparam_t structure. */ 4297 /* ARGSUSED */ 4298 static int 4299 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4300 { 4301 long new_value; 4302 udpparam_t *udppa = (udpparam_t *)cp; 4303 4304 /* 4305 * Fail the request if the new value does not lie within the 4306 * required bounds. 4307 */ 4308 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4309 new_value < udppa->udp_param_min || 4310 new_value > udppa->udp_param_max) { 4311 return (EINVAL); 4312 } 4313 4314 /* Set the new value */ 4315 udppa->udp_param_value = new_value; 4316 return (0); 4317 } 4318 4319 /* 4320 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4321 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4322 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4323 * then it's assumed to be allocated to be large enough. 4324 * 4325 * Returns zero if trimming of the security option causes all options to go 4326 * away. 4327 */ 4328 static size_t 4329 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4330 { 4331 struct T_opthdr *toh; 4332 size_t hol = ipp->ipp_hopoptslen; 4333 ip6_hbh_t *dstopt = NULL; 4334 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4335 size_t tlen, olen, plen; 4336 boolean_t deleting; 4337 const struct ip6_opt *sopt, *lastpad; 4338 struct ip6_opt *dopt; 4339 4340 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4341 toh->level = IPPROTO_IPV6; 4342 toh->name = IPV6_HOPOPTS; 4343 toh->status = 0; 4344 dstopt = (ip6_hbh_t *)(toh + 1); 4345 } 4346 4347 /* 4348 * If labeling is enabled, then skip the label option 4349 * but get other options if there are any. 4350 */ 4351 if (is_system_labeled()) { 4352 dopt = NULL; 4353 if (dstopt != NULL) { 4354 /* will fill in ip6h_len later */ 4355 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4356 dopt = (struct ip6_opt *)(dstopt + 1); 4357 } 4358 sopt = (const struct ip6_opt *)(srcopt + 1); 4359 hol -= sizeof (*srcopt); 4360 tlen = sizeof (*dstopt); 4361 lastpad = NULL; 4362 deleting = B_FALSE; 4363 /* 4364 * This loop finds the first (lastpad pointer) of any number of 4365 * pads that preceeds the security option, then treats the 4366 * security option as though it were a pad, and then finds the 4367 * next non-pad option (or end of list). 4368 * 4369 * It then treats the entire block as one big pad. To preserve 4370 * alignment of any options that follow, or just the end of the 4371 * list, it computes a minimal new padding size that keeps the 4372 * same alignment for the next option. 4373 * 4374 * If it encounters just a sequence of pads with no security 4375 * option, those are copied as-is rather than collapsed. 4376 * 4377 * Note that to handle the end of list case, the code makes one 4378 * loop with 'hol' set to zero. 4379 */ 4380 for (;;) { 4381 if (hol > 0) { 4382 if (sopt->ip6o_type == IP6OPT_PAD1) { 4383 if (lastpad == NULL) 4384 lastpad = sopt; 4385 sopt = (const struct ip6_opt *) 4386 &sopt->ip6o_len; 4387 hol--; 4388 continue; 4389 } 4390 olen = sopt->ip6o_len + sizeof (*sopt); 4391 if (olen > hol) 4392 olen = hol; 4393 if (sopt->ip6o_type == IP6OPT_PADN || 4394 sopt->ip6o_type == ip6opt_ls) { 4395 if (sopt->ip6o_type == ip6opt_ls) 4396 deleting = B_TRUE; 4397 if (lastpad == NULL) 4398 lastpad = sopt; 4399 sopt = (const struct ip6_opt *) 4400 ((const char *)sopt + olen); 4401 hol -= olen; 4402 continue; 4403 } 4404 } else { 4405 /* if nothing was copied at all, then delete */ 4406 if (tlen == sizeof (*dstopt)) 4407 return (0); 4408 /* last pass; pick up any trailing padding */ 4409 olen = 0; 4410 } 4411 if (deleting) { 4412 /* 4413 * compute aligning effect of deleted material 4414 * to reproduce with pad. 4415 */ 4416 plen = ((const char *)sopt - 4417 (const char *)lastpad) & 7; 4418 tlen += plen; 4419 if (dopt != NULL) { 4420 if (plen == 1) { 4421 dopt->ip6o_type = IP6OPT_PAD1; 4422 } else if (plen > 1) { 4423 plen -= sizeof (*dopt); 4424 dopt->ip6o_type = IP6OPT_PADN; 4425 dopt->ip6o_len = plen; 4426 if (plen > 0) 4427 bzero(dopt + 1, plen); 4428 } 4429 dopt = (struct ip6_opt *) 4430 ((char *)dopt + plen); 4431 } 4432 deleting = B_FALSE; 4433 lastpad = NULL; 4434 } 4435 /* if there's uncopied padding, then copy that now */ 4436 if (lastpad != NULL) { 4437 olen += (const char *)sopt - 4438 (const char *)lastpad; 4439 sopt = lastpad; 4440 lastpad = NULL; 4441 } 4442 if (dopt != NULL && olen > 0) { 4443 bcopy(sopt, dopt, olen); 4444 dopt = (struct ip6_opt *)((char *)dopt + olen); 4445 } 4446 if (hol == 0) 4447 break; 4448 tlen += olen; 4449 sopt = (const struct ip6_opt *) 4450 ((const char *)sopt + olen); 4451 hol -= olen; 4452 } 4453 /* go back and patch up the length value, rounded upward */ 4454 if (dstopt != NULL) 4455 dstopt->ip6h_len = (tlen - 1) >> 3; 4456 } else { 4457 tlen = hol; 4458 if (dstopt != NULL) 4459 bcopy(srcopt, dstopt, hol); 4460 } 4461 4462 tlen += sizeof (*toh); 4463 if (toh != NULL) 4464 toh->len = tlen; 4465 4466 return (tlen); 4467 } 4468 4469 static void 4470 udp_input(conn_t *connp, mblk_t *mp) 4471 { 4472 struct T_unitdata_ind *tudi; 4473 uchar_t *rptr; /* Pointer to IP header */ 4474 int hdr_length; /* Length of IP+UDP headers */ 4475 int udi_size; /* Size of T_unitdata_ind */ 4476 int mp_len; 4477 udp_t *udp; 4478 udpha_t *udpha; 4479 int ipversion; 4480 ip6_pkt_t ipp; 4481 ip6_t *ip6h; 4482 ip6i_t *ip6i; 4483 mblk_t *mp1; 4484 mblk_t *options_mp = NULL; 4485 in_pktinfo_t *pinfo = NULL; 4486 cred_t *cr = NULL; 4487 queue_t *q = connp->conn_rq; 4488 pid_t cpid; 4489 cred_t *rcr = connp->conn_cred; 4490 4491 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4492 "udp_rput_start: q %p mp %p", q, mp); 4493 4494 udp = connp->conn_udp; 4495 rptr = mp->b_rptr; 4496 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4497 ASSERT(OK_32PTR(rptr)); 4498 4499 /* 4500 * IP should have prepended the options data in an M_CTL 4501 * Check M_CTL "type" to make sure are not here bcos of 4502 * a valid ICMP message 4503 */ 4504 if (DB_TYPE(mp) == M_CTL) { 4505 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4506 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4507 IN_PKTINFO) { 4508 /* 4509 * IP_RECVIF or IP_RECVSLLA information has been 4510 * appended to the packet by IP. We need to 4511 * extract the mblk and adjust the rptr 4512 */ 4513 pinfo = (in_pktinfo_t *)mp->b_rptr; 4514 options_mp = mp; 4515 mp = mp->b_cont; 4516 rptr = mp->b_rptr; 4517 UDP_STAT(udp_in_pktinfo); 4518 } else { 4519 /* 4520 * ICMP messages. 4521 */ 4522 udp_icmp_error(q, mp); 4523 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4524 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4525 return; 4526 } 4527 } 4528 4529 mp_len = msgdsize(mp); 4530 /* 4531 * This is the inbound data path. 4532 * First, we check to make sure the IP version number is correct, 4533 * and then pull the IP and UDP headers into the first mblk. 4534 * Assume IP provides aligned packets - otherwise toss. 4535 * Also, check if we have a complete IP header. 4536 */ 4537 4538 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4539 ipp.ipp_fields = 0; 4540 4541 ipversion = IPH_HDR_VERSION(rptr); 4542 switch (ipversion) { 4543 case IPV4_VERSION: 4544 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4545 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4546 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4547 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4548 (udp->udp_ip_rcv_options_len)) { 4549 /* 4550 * Handle IPv4 packets with options outside of the 4551 * main data path. Not needed for AF_INET6 sockets 4552 * since they don't support a getsockopt of IP_OPTIONS. 4553 */ 4554 if (udp->udp_family == AF_INET6) 4555 break; 4556 /* 4557 * UDP length check performed for IPv4 packets with 4558 * options to check whether UDP length specified in 4559 * the header is the same as the physical length of 4560 * the packet. 4561 */ 4562 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4563 if (mp_len != (ntohs(udpha->uha_length) + 4564 hdr_length - UDPH_SIZE)) { 4565 goto tossit; 4566 } 4567 /* 4568 * Handle the case where the packet has IP options 4569 * and the IP_RECVSLLA & IP_RECVIF are set 4570 */ 4571 if (pinfo != NULL) 4572 mp = options_mp; 4573 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4574 SQTAG_UDP_INPUT); 4575 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4576 "udp_rput_end: q %p (%S)", q, "end"); 4577 return; 4578 } 4579 4580 /* Handle IPV6_RECVHOPLIMIT. */ 4581 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4582 udp->udp_ipv6_recvpktinfo) { 4583 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4584 ipp.ipp_fields |= IPPF_IFINDEX; 4585 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4586 } 4587 } 4588 break; 4589 case IPV6_VERSION: 4590 /* 4591 * IPv6 packets can only be received by applications 4592 * that are prepared to receive IPv6 addresses. 4593 * The IP fanout must ensure this. 4594 */ 4595 ASSERT(udp->udp_family == AF_INET6); 4596 4597 ip6h = (ip6_t *)rptr; 4598 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4599 4600 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4601 uint8_t nexthdrp; 4602 /* Look for ifindex information */ 4603 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4604 ip6i = (ip6i_t *)ip6h; 4605 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4606 goto tossit; 4607 4608 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4609 ASSERT(ip6i->ip6i_ifindex != 0); 4610 ipp.ipp_fields |= IPPF_IFINDEX; 4611 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4612 } 4613 rptr = (uchar_t *)&ip6i[1]; 4614 mp->b_rptr = rptr; 4615 if (rptr == mp->b_wptr) { 4616 mp1 = mp->b_cont; 4617 freeb(mp); 4618 mp = mp1; 4619 rptr = mp->b_rptr; 4620 } 4621 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4622 goto tossit; 4623 ip6h = (ip6_t *)rptr; 4624 mp_len = msgdsize(mp); 4625 } 4626 /* 4627 * Find any potentially interesting extension headers 4628 * as well as the length of the IPv6 + extension 4629 * headers. 4630 */ 4631 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4632 UDPH_SIZE; 4633 ASSERT(nexthdrp == IPPROTO_UDP); 4634 } else { 4635 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4636 ip6i = NULL; 4637 } 4638 break; 4639 default: 4640 ASSERT(0); 4641 } 4642 4643 /* 4644 * IP inspected the UDP header thus all of it must be in the mblk. 4645 * UDP length check is performed for IPv6 packets and IPv4 packets 4646 * without options to check if the size of the packet as specified 4647 * by the header is the same as the physical size of the packet. 4648 */ 4649 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4650 if ((MBLKL(mp) < hdr_length) || 4651 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4652 goto tossit; 4653 } 4654 4655 /* Walk past the headers. */ 4656 if (!udp->udp_rcvhdr) { 4657 mp->b_rptr = rptr + hdr_length; 4658 mp_len -= hdr_length; 4659 } 4660 4661 /* 4662 * This is the inbound data path. Packets are passed upstream as 4663 * T_UNITDATA_IND messages with full IP headers still attached. 4664 */ 4665 if (udp->udp_family == AF_INET) { 4666 sin_t *sin; 4667 4668 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4669 4670 /* 4671 * Normally only send up the address. 4672 * If IP_RECVDSTADDR is set we include the destination IP 4673 * address as an option. With IP_RECVOPTS we include all 4674 * the IP options. Only ip_rput_other() handles packets 4675 * that contain IP options. 4676 */ 4677 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4678 if (udp->udp_recvdstaddr) { 4679 udi_size += sizeof (struct T_opthdr) + 4680 sizeof (struct in_addr); 4681 UDP_STAT(udp_in_recvdstaddr); 4682 } 4683 4684 /* 4685 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4686 * space accordingly 4687 */ 4688 if (udp->udp_recvif && (pinfo != NULL) && 4689 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4690 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4691 UDP_STAT(udp_in_recvif); 4692 } 4693 4694 if (udp->udp_recvslla && (pinfo != NULL) && 4695 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4696 udi_size += sizeof (struct T_opthdr) + 4697 sizeof (struct sockaddr_dl); 4698 UDP_STAT(udp_in_recvslla); 4699 } 4700 4701 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4702 udi_size += sizeof (struct T_opthdr) + ucredsize; 4703 cpid = DB_CPID(mp); 4704 UDP_STAT(udp_in_recvucred); 4705 } 4706 4707 /* 4708 * If SO_TIMESTAMP is set allocate the appropriate sized 4709 * buffer. Since gethrestime() expects a pointer aligned 4710 * argument, we allocate space necessary for extra 4711 * alignment (even though it might not be used). 4712 */ 4713 if (udp->udp_timestamp) { 4714 udi_size += sizeof (struct T_opthdr) + 4715 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4716 UDP_STAT(udp_in_timestamp); 4717 } 4718 4719 /* 4720 * If IP_RECVTTL is set allocate the appropriate sized buffer 4721 */ 4722 if (udp->udp_recvttl) { 4723 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4724 UDP_STAT(udp_in_recvttl); 4725 } 4726 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4727 4728 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4729 mp1 = allocb(udi_size, BPRI_MED); 4730 if (mp1 == NULL) { 4731 freemsg(mp); 4732 if (options_mp != NULL) 4733 freeb(options_mp); 4734 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4735 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4736 BUMP_MIB(&udp_mib, udpInErrors); 4737 return; 4738 } 4739 mp1->b_cont = mp; 4740 mp = mp1; 4741 mp->b_datap->db_type = M_PROTO; 4742 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4743 mp->b_wptr = (uchar_t *)tudi + udi_size; 4744 tudi->PRIM_type = T_UNITDATA_IND; 4745 tudi->SRC_length = sizeof (sin_t); 4746 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4747 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4748 sizeof (sin_t); 4749 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4750 tudi->OPT_length = udi_size; 4751 sin = (sin_t *)&tudi[1]; 4752 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4753 sin->sin_port = udpha->uha_src_port; 4754 sin->sin_family = udp->udp_family; 4755 *(uint32_t *)&sin->sin_zero[0] = 0; 4756 *(uint32_t *)&sin->sin_zero[4] = 0; 4757 4758 /* 4759 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4760 * IP_RECVTTL has been set. 4761 */ 4762 if (udi_size != 0) { 4763 /* 4764 * Copy in destination address before options to avoid 4765 * any padding issues. 4766 */ 4767 char *dstopt; 4768 4769 dstopt = (char *)&sin[1]; 4770 if (udp->udp_recvdstaddr) { 4771 struct T_opthdr *toh; 4772 ipaddr_t *dstptr; 4773 4774 toh = (struct T_opthdr *)dstopt; 4775 toh->level = IPPROTO_IP; 4776 toh->name = IP_RECVDSTADDR; 4777 toh->len = sizeof (struct T_opthdr) + 4778 sizeof (ipaddr_t); 4779 toh->status = 0; 4780 dstopt += sizeof (struct T_opthdr); 4781 dstptr = (ipaddr_t *)dstopt; 4782 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4783 dstopt = (char *)toh + toh->len; 4784 udi_size -= toh->len; 4785 } 4786 4787 if (udp->udp_recvslla && (pinfo != NULL) && 4788 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4789 4790 struct T_opthdr *toh; 4791 struct sockaddr_dl *dstptr; 4792 4793 toh = (struct T_opthdr *)dstopt; 4794 toh->level = IPPROTO_IP; 4795 toh->name = IP_RECVSLLA; 4796 toh->len = sizeof (struct T_opthdr) + 4797 sizeof (struct sockaddr_dl); 4798 toh->status = 0; 4799 dstopt += sizeof (struct T_opthdr); 4800 dstptr = (struct sockaddr_dl *)dstopt; 4801 bcopy(&pinfo->in_pkt_slla, dstptr, 4802 sizeof (struct sockaddr_dl)); 4803 dstopt = (char *)toh + toh->len; 4804 udi_size -= toh->len; 4805 } 4806 4807 if (udp->udp_recvif && (pinfo != NULL) && 4808 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4809 4810 struct T_opthdr *toh; 4811 uint_t *dstptr; 4812 4813 toh = (struct T_opthdr *)dstopt; 4814 toh->level = IPPROTO_IP; 4815 toh->name = IP_RECVIF; 4816 toh->len = sizeof (struct T_opthdr) + 4817 sizeof (uint_t); 4818 toh->status = 0; 4819 dstopt += sizeof (struct T_opthdr); 4820 dstptr = (uint_t *)dstopt; 4821 *dstptr = pinfo->in_pkt_ifindex; 4822 dstopt = (char *)toh + toh->len; 4823 udi_size -= toh->len; 4824 } 4825 4826 if (cr != NULL) { 4827 struct T_opthdr *toh; 4828 4829 toh = (struct T_opthdr *)dstopt; 4830 toh->level = SOL_SOCKET; 4831 toh->name = SCM_UCRED; 4832 toh->len = sizeof (struct T_opthdr) + ucredsize; 4833 toh->status = 0; 4834 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4835 dstopt = (char *)toh + toh->len; 4836 udi_size -= toh->len; 4837 } 4838 4839 if (udp->udp_timestamp) { 4840 struct T_opthdr *toh; 4841 4842 toh = (struct T_opthdr *)dstopt; 4843 toh->level = SOL_SOCKET; 4844 toh->name = SCM_TIMESTAMP; 4845 toh->len = sizeof (struct T_opthdr) + 4846 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4847 toh->status = 0; 4848 dstopt += sizeof (struct T_opthdr); 4849 /* Align for gethrestime() */ 4850 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4851 sizeof (intptr_t)); 4852 gethrestime((timestruc_t *)dstopt); 4853 dstopt = (char *)toh + toh->len; 4854 udi_size -= toh->len; 4855 } 4856 4857 /* 4858 * CAUTION: 4859 * Due to aligment issues 4860 * Processing of IP_RECVTTL option 4861 * should always be the last. Adding 4862 * any option processing after this will 4863 * cause alignment panic. 4864 */ 4865 if (udp->udp_recvttl) { 4866 struct T_opthdr *toh; 4867 uint8_t *dstptr; 4868 4869 toh = (struct T_opthdr *)dstopt; 4870 toh->level = IPPROTO_IP; 4871 toh->name = IP_RECVTTL; 4872 toh->len = sizeof (struct T_opthdr) + 4873 sizeof (uint8_t); 4874 toh->status = 0; 4875 dstopt += sizeof (struct T_opthdr); 4876 dstptr = (uint8_t *)dstopt; 4877 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4878 dstopt = (char *)toh + toh->len; 4879 udi_size -= toh->len; 4880 } 4881 4882 /* Consumed all of allocated space */ 4883 ASSERT(udi_size == 0); 4884 } 4885 } else { 4886 sin6_t *sin6; 4887 4888 /* 4889 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4890 * 4891 * Normally we only send up the address. If receiving of any 4892 * optional receive side information is enabled, we also send 4893 * that up as options. 4894 * [ Only udp_rput_other() handles packets that contain IP 4895 * options so code to account for does not appear immediately 4896 * below but elsewhere ] 4897 */ 4898 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4899 4900 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4901 IPPF_RTHDR|IPPF_IFINDEX)) { 4902 if (udp->udp_ipv6_recvhopopts && 4903 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4904 size_t hlen; 4905 4906 UDP_STAT(udp_in_recvhopopts); 4907 hlen = copy_hop_opts(&ipp, NULL); 4908 if (hlen == 0) 4909 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4910 udi_size += hlen; 4911 } 4912 if ((udp->udp_ipv6_recvdstopts || 4913 udp->udp_old_ipv6_recvdstopts) && 4914 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4915 udi_size += sizeof (struct T_opthdr) + 4916 ipp.ipp_dstoptslen; 4917 UDP_STAT(udp_in_recvdstopts); 4918 } 4919 if (((udp->udp_ipv6_recvdstopts && 4920 udp->udp_ipv6_recvrthdr && 4921 (ipp.ipp_fields & IPPF_RTHDR)) || 4922 udp->udp_ipv6_recvrthdrdstopts) && 4923 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4924 udi_size += sizeof (struct T_opthdr) + 4925 ipp.ipp_rtdstoptslen; 4926 UDP_STAT(udp_in_recvrtdstopts); 4927 } 4928 if (udp->udp_ipv6_recvrthdr && 4929 (ipp.ipp_fields & IPPF_RTHDR)) { 4930 udi_size += sizeof (struct T_opthdr) + 4931 ipp.ipp_rthdrlen; 4932 UDP_STAT(udp_in_recvrthdr); 4933 } 4934 if (udp->udp_ipv6_recvpktinfo && 4935 (ipp.ipp_fields & IPPF_IFINDEX)) { 4936 udi_size += sizeof (struct T_opthdr) + 4937 sizeof (struct in6_pktinfo); 4938 UDP_STAT(udp_in_recvpktinfo); 4939 } 4940 4941 } 4942 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4943 udi_size += sizeof (struct T_opthdr) + ucredsize; 4944 cpid = DB_CPID(mp); 4945 UDP_STAT(udp_in_recvucred); 4946 } 4947 4948 if (udp->udp_ipv6_recvhoplimit) { 4949 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4950 UDP_STAT(udp_in_recvhoplimit); 4951 } 4952 4953 if (udp->udp_ipv6_recvtclass) { 4954 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4955 UDP_STAT(udp_in_recvtclass); 4956 } 4957 4958 mp1 = allocb(udi_size, BPRI_MED); 4959 if (mp1 == NULL) { 4960 freemsg(mp); 4961 if (options_mp != NULL) 4962 freeb(options_mp); 4963 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4964 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4965 BUMP_MIB(&udp_mib, udpInErrors); 4966 return; 4967 } 4968 mp1->b_cont = mp; 4969 mp = mp1; 4970 mp->b_datap->db_type = M_PROTO; 4971 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4972 mp->b_wptr = (uchar_t *)tudi + udi_size; 4973 tudi->PRIM_type = T_UNITDATA_IND; 4974 tudi->SRC_length = sizeof (sin6_t); 4975 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4976 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4977 sizeof (sin6_t); 4978 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4979 tudi->OPT_length = udi_size; 4980 sin6 = (sin6_t *)&tudi[1]; 4981 if (ipversion == IPV4_VERSION) { 4982 in6_addr_t v6dst; 4983 4984 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4985 &sin6->sin6_addr); 4986 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4987 &v6dst); 4988 sin6->sin6_flowinfo = 0; 4989 sin6->sin6_scope_id = 0; 4990 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4991 connp->conn_zoneid); 4992 } else { 4993 sin6->sin6_addr = ip6h->ip6_src; 4994 /* No sin6_flowinfo per API */ 4995 sin6->sin6_flowinfo = 0; 4996 /* For link-scope source pass up scope id */ 4997 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4998 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4999 sin6->sin6_scope_id = ipp.ipp_ifindex; 5000 else 5001 sin6->sin6_scope_id = 0; 5002 sin6->__sin6_src_id = ip_srcid_find_addr( 5003 &ip6h->ip6_dst, connp->conn_zoneid); 5004 } 5005 sin6->sin6_port = udpha->uha_src_port; 5006 sin6->sin6_family = udp->udp_family; 5007 5008 if (udi_size != 0) { 5009 uchar_t *dstopt; 5010 5011 dstopt = (uchar_t *)&sin6[1]; 5012 if (udp->udp_ipv6_recvpktinfo && 5013 (ipp.ipp_fields & IPPF_IFINDEX)) { 5014 struct T_opthdr *toh; 5015 struct in6_pktinfo *pkti; 5016 5017 toh = (struct T_opthdr *)dstopt; 5018 toh->level = IPPROTO_IPV6; 5019 toh->name = IPV6_PKTINFO; 5020 toh->len = sizeof (struct T_opthdr) + 5021 sizeof (*pkti); 5022 toh->status = 0; 5023 dstopt += sizeof (struct T_opthdr); 5024 pkti = (struct in6_pktinfo *)dstopt; 5025 if (ipversion == IPV6_VERSION) 5026 pkti->ipi6_addr = ip6h->ip6_dst; 5027 else 5028 IN6_IPADDR_TO_V4MAPPED( 5029 ((ipha_t *)rptr)->ipha_dst, 5030 &pkti->ipi6_addr); 5031 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5032 dstopt += sizeof (*pkti); 5033 udi_size -= toh->len; 5034 } 5035 if (udp->udp_ipv6_recvhoplimit) { 5036 struct T_opthdr *toh; 5037 5038 toh = (struct T_opthdr *)dstopt; 5039 toh->level = IPPROTO_IPV6; 5040 toh->name = IPV6_HOPLIMIT; 5041 toh->len = sizeof (struct T_opthdr) + 5042 sizeof (uint_t); 5043 toh->status = 0; 5044 dstopt += sizeof (struct T_opthdr); 5045 if (ipversion == IPV6_VERSION) 5046 *(uint_t *)dstopt = ip6h->ip6_hops; 5047 else 5048 *(uint_t *)dstopt = 5049 ((ipha_t *)rptr)->ipha_ttl; 5050 dstopt += sizeof (uint_t); 5051 udi_size -= toh->len; 5052 } 5053 if (udp->udp_ipv6_recvtclass) { 5054 struct T_opthdr *toh; 5055 5056 toh = (struct T_opthdr *)dstopt; 5057 toh->level = IPPROTO_IPV6; 5058 toh->name = IPV6_TCLASS; 5059 toh->len = sizeof (struct T_opthdr) + 5060 sizeof (uint_t); 5061 toh->status = 0; 5062 dstopt += sizeof (struct T_opthdr); 5063 if (ipversion == IPV6_VERSION) { 5064 *(uint_t *)dstopt = 5065 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5066 } else { 5067 ipha_t *ipha = (ipha_t *)rptr; 5068 *(uint_t *)dstopt = 5069 ipha->ipha_type_of_service; 5070 } 5071 dstopt += sizeof (uint_t); 5072 udi_size -= toh->len; 5073 } 5074 if (udp->udp_ipv6_recvhopopts && 5075 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5076 size_t hlen; 5077 5078 hlen = copy_hop_opts(&ipp, dstopt); 5079 dstopt += hlen; 5080 udi_size -= hlen; 5081 } 5082 if (udp->udp_ipv6_recvdstopts && 5083 udp->udp_ipv6_recvrthdr && 5084 (ipp.ipp_fields & IPPF_RTHDR) && 5085 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5086 struct T_opthdr *toh; 5087 5088 toh = (struct T_opthdr *)dstopt; 5089 toh->level = IPPROTO_IPV6; 5090 toh->name = IPV6_DSTOPTS; 5091 toh->len = sizeof (struct T_opthdr) + 5092 ipp.ipp_rtdstoptslen; 5093 toh->status = 0; 5094 dstopt += sizeof (struct T_opthdr); 5095 bcopy(ipp.ipp_rtdstopts, dstopt, 5096 ipp.ipp_rtdstoptslen); 5097 dstopt += ipp.ipp_rtdstoptslen; 5098 udi_size -= toh->len; 5099 } 5100 if (udp->udp_ipv6_recvrthdr && 5101 (ipp.ipp_fields & IPPF_RTHDR)) { 5102 struct T_opthdr *toh; 5103 5104 toh = (struct T_opthdr *)dstopt; 5105 toh->level = IPPROTO_IPV6; 5106 toh->name = IPV6_RTHDR; 5107 toh->len = sizeof (struct T_opthdr) + 5108 ipp.ipp_rthdrlen; 5109 toh->status = 0; 5110 dstopt += sizeof (struct T_opthdr); 5111 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5112 dstopt += ipp.ipp_rthdrlen; 5113 udi_size -= toh->len; 5114 } 5115 if (udp->udp_ipv6_recvdstopts && 5116 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5117 struct T_opthdr *toh; 5118 5119 toh = (struct T_opthdr *)dstopt; 5120 toh->level = IPPROTO_IPV6; 5121 toh->name = IPV6_DSTOPTS; 5122 toh->len = sizeof (struct T_opthdr) + 5123 ipp.ipp_dstoptslen; 5124 toh->status = 0; 5125 dstopt += sizeof (struct T_opthdr); 5126 bcopy(ipp.ipp_dstopts, dstopt, 5127 ipp.ipp_dstoptslen); 5128 dstopt += ipp.ipp_dstoptslen; 5129 udi_size -= toh->len; 5130 } 5131 5132 if (cr != NULL) { 5133 struct T_opthdr *toh; 5134 5135 toh = (struct T_opthdr *)dstopt; 5136 toh->level = SOL_SOCKET; 5137 toh->name = SCM_UCRED; 5138 toh->len = sizeof (struct T_opthdr) + ucredsize; 5139 toh->status = 0; 5140 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5141 dstopt += toh->len; 5142 udi_size -= toh->len; 5143 } 5144 /* Consumed all of allocated space */ 5145 ASSERT(udi_size == 0); 5146 } 5147 #undef sin6 5148 /* No IP_RECVDSTADDR for IPv6. */ 5149 } 5150 5151 BUMP_MIB(&udp_mib, udpInDatagrams); 5152 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5153 "udp_rput_end: q %p (%S)", q, "end"); 5154 if (options_mp != NULL) 5155 freeb(options_mp); 5156 5157 if (udp->udp_direct_sockfs) { 5158 /* 5159 * There is nothing above us except for the stream head; 5160 * use the read-side synchronous stream interface in 5161 * order to reduce the time spent in interrupt thread. 5162 */ 5163 ASSERT(udp->udp_issocket); 5164 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5165 } else { 5166 /* 5167 * Use regular STREAMS interface to pass data upstream 5168 * if this is not a socket endpoint, or if we have 5169 * switched over to the slow mode due to sockmod being 5170 * popped or a module being pushed on top of us. 5171 */ 5172 putnext(UDP_RD(q), mp); 5173 } 5174 return; 5175 5176 tossit: 5177 freemsg(mp); 5178 if (options_mp != NULL) 5179 freeb(options_mp); 5180 BUMP_MIB(&udp_mib, udpInErrors); 5181 } 5182 5183 void 5184 udp_conn_recv(conn_t *connp, mblk_t *mp) 5185 { 5186 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5187 } 5188 5189 /* ARGSUSED */ 5190 static void 5191 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5192 { 5193 udp_input((conn_t *)arg, mp); 5194 _UDP_EXIT((conn_t *)arg); 5195 } 5196 5197 /* 5198 * Process non-M_DATA messages as well as M_DATA messages that requires 5199 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5200 */ 5201 static void 5202 udp_rput_other(queue_t *q, mblk_t *mp) 5203 { 5204 struct T_unitdata_ind *tudi; 5205 mblk_t *mp1; 5206 uchar_t *rptr; 5207 uchar_t *new_rptr; 5208 int hdr_length; 5209 int udi_size; /* Size of T_unitdata_ind */ 5210 int opt_len; /* Length of IP options */ 5211 sin_t *sin; 5212 struct T_error_ack *tea; 5213 mblk_t *options_mp = NULL; 5214 in_pktinfo_t *pinfo; 5215 boolean_t recv_on = B_FALSE; 5216 cred_t *cr = NULL; 5217 udp_t *udp = Q_TO_UDP(q); 5218 pid_t cpid; 5219 cred_t *rcr = udp->udp_connp->conn_cred; 5220 5221 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5222 "udp_rput_other: q %p mp %p", q, mp); 5223 5224 ASSERT(OK_32PTR(mp->b_rptr)); 5225 rptr = mp->b_rptr; 5226 5227 switch (mp->b_datap->db_type) { 5228 case M_CTL: 5229 /* 5230 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5231 */ 5232 recv_on = B_TRUE; 5233 options_mp = mp; 5234 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 5235 5236 /* 5237 * The actual data is in mp->b_cont 5238 */ 5239 mp = mp->b_cont; 5240 ASSERT(OK_32PTR(mp->b_rptr)); 5241 rptr = mp->b_rptr; 5242 break; 5243 case M_DATA: 5244 /* 5245 * M_DATA messages contain IPv4 datagrams. They are handled 5246 * after this switch. 5247 */ 5248 break; 5249 case M_PROTO: 5250 case M_PCPROTO: 5251 /* M_PROTO messages contain some type of TPI message. */ 5252 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5253 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5254 freemsg(mp); 5255 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5256 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5257 return; 5258 } 5259 tea = (struct T_error_ack *)rptr; 5260 5261 switch (tea->PRIM_type) { 5262 case T_ERROR_ACK: 5263 switch (tea->ERROR_prim) { 5264 case O_T_BIND_REQ: 5265 case T_BIND_REQ: { 5266 /* 5267 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5268 * clear out the associated port and source 5269 * address before passing the message 5270 * upstream. If this was caused by a T_CONN_REQ 5271 * revert back to bound state. 5272 */ 5273 udp_fanout_t *udpf; 5274 5275 udpf = &udp_bind_fanout[ 5276 UDP_BIND_HASH(udp->udp_port)]; 5277 mutex_enter(&udpf->uf_lock); 5278 if (udp->udp_state == TS_DATA_XFER) { 5279 /* Connect failed */ 5280 tea->ERROR_prim = T_CONN_REQ; 5281 /* Revert back to the bound source */ 5282 udp->udp_v6src = udp->udp_bound_v6src; 5283 udp->udp_state = TS_IDLE; 5284 mutex_exit(&udpf->uf_lock); 5285 if (udp->udp_family == AF_INET6) 5286 (void) udp_build_hdrs(q, udp); 5287 break; 5288 } 5289 5290 if (udp->udp_discon_pending) { 5291 tea->ERROR_prim = T_DISCON_REQ; 5292 udp->udp_discon_pending = 0; 5293 } 5294 V6_SET_ZERO(udp->udp_v6src); 5295 V6_SET_ZERO(udp->udp_bound_v6src); 5296 udp->udp_state = TS_UNBND; 5297 udp_bind_hash_remove(udp, B_TRUE); 5298 udp->udp_port = 0; 5299 mutex_exit(&udpf->uf_lock); 5300 if (udp->udp_family == AF_INET6) 5301 (void) udp_build_hdrs(q, udp); 5302 break; 5303 } 5304 default: 5305 break; 5306 } 5307 break; 5308 case T_BIND_ACK: 5309 udp_rput_bind_ack(q, mp); 5310 return; 5311 5312 case T_OPTMGMT_ACK: 5313 case T_OK_ACK: 5314 break; 5315 default: 5316 freemsg(mp); 5317 return; 5318 } 5319 putnext(UDP_RD(q), mp); 5320 return; 5321 } 5322 5323 /* 5324 * This is the inbound data path. 5325 * First, we make sure the data contains both IP and UDP headers. 5326 * 5327 * This handle IPv4 packets for only AF_INET sockets. 5328 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5329 * is no need saving the options. 5330 */ 5331 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5332 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5333 if (mp->b_wptr - rptr < hdr_length) { 5334 if (!pullupmsg(mp, hdr_length)) { 5335 freemsg(mp); 5336 if (options_mp != NULL) 5337 freeb(options_mp); 5338 BUMP_MIB(&udp_mib, udpInErrors); 5339 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5340 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5341 BUMP_MIB(&udp_mib, udpInErrors); 5342 return; 5343 } 5344 rptr = mp->b_rptr; 5345 } 5346 /* Walk past the headers. */ 5347 new_rptr = rptr + hdr_length; 5348 if (!udp->udp_rcvhdr) 5349 mp->b_rptr = new_rptr; 5350 5351 /* Save the options if any */ 5352 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5353 if (opt_len > 0) { 5354 if (opt_len > udp->udp_ip_rcv_options_len) { 5355 if (udp->udp_ip_rcv_options_len) 5356 mi_free((char *)udp->udp_ip_rcv_options); 5357 udp->udp_ip_rcv_options_len = 0; 5358 udp->udp_ip_rcv_options = 5359 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5360 if (udp->udp_ip_rcv_options) 5361 udp->udp_ip_rcv_options_len = opt_len; 5362 } 5363 if (udp->udp_ip_rcv_options_len) { 5364 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5365 udp->udp_ip_rcv_options, opt_len); 5366 /* Adjust length if we are resusing the space */ 5367 udp->udp_ip_rcv_options_len = opt_len; 5368 } 5369 } else if (udp->udp_ip_rcv_options_len) { 5370 mi_free((char *)udp->udp_ip_rcv_options); 5371 udp->udp_ip_rcv_options = NULL; 5372 udp->udp_ip_rcv_options_len = 0; 5373 } 5374 5375 /* 5376 * Normally only send up the address. 5377 * If IP_RECVDSTADDR is set we include the destination IP 5378 * address as an option. With IP_RECVOPTS we include all 5379 * the IP options. 5380 */ 5381 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5382 if (udp->udp_recvdstaddr) { 5383 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5384 UDP_STAT(udp_in_recvdstaddr); 5385 } 5386 if (udp->udp_recvopts && opt_len > 0) { 5387 udi_size += sizeof (struct T_opthdr) + opt_len; 5388 UDP_STAT(udp_in_recvopts); 5389 } 5390 5391 /* 5392 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5393 * space accordingly 5394 */ 5395 if (udp->udp_recvif && recv_on && 5396 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5397 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5398 UDP_STAT(udp_in_recvif); 5399 } 5400 5401 if (udp->udp_recvslla && recv_on && 5402 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5403 udi_size += sizeof (struct T_opthdr) + 5404 sizeof (struct sockaddr_dl); 5405 UDP_STAT(udp_in_recvslla); 5406 } 5407 5408 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5409 udi_size += sizeof (struct T_opthdr) + ucredsize; 5410 cpid = DB_CPID(mp); 5411 UDP_STAT(udp_in_recvucred); 5412 } 5413 /* 5414 * If IP_RECVTTL is set allocate the appropriate sized buffer 5415 */ 5416 if (udp->udp_recvttl) { 5417 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5418 UDP_STAT(udp_in_recvttl); 5419 } 5420 5421 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5422 mp1 = allocb(udi_size, BPRI_MED); 5423 if (mp1 == NULL) { 5424 freemsg(mp); 5425 if (options_mp != NULL) 5426 freeb(options_mp); 5427 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5428 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5429 BUMP_MIB(&udp_mib, udpInErrors); 5430 return; 5431 } 5432 mp1->b_cont = mp; 5433 mp = mp1; 5434 mp->b_datap->db_type = M_PROTO; 5435 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5436 mp->b_wptr = (uchar_t *)tudi + udi_size; 5437 tudi->PRIM_type = T_UNITDATA_IND; 5438 tudi->SRC_length = sizeof (sin_t); 5439 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5440 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5441 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5442 tudi->OPT_length = udi_size; 5443 5444 sin = (sin_t *)&tudi[1]; 5445 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5446 sin->sin_port = ((in_port_t *) 5447 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5448 sin->sin_family = AF_INET; 5449 *(uint32_t *)&sin->sin_zero[0] = 0; 5450 *(uint32_t *)&sin->sin_zero[4] = 0; 5451 5452 /* 5453 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5454 * IP_RECVTTL has been set. 5455 */ 5456 if (udi_size != 0) { 5457 /* 5458 * Copy in destination address before options to avoid any 5459 * padding issues. 5460 */ 5461 char *dstopt; 5462 5463 dstopt = (char *)&sin[1]; 5464 if (udp->udp_recvdstaddr) { 5465 struct T_opthdr *toh; 5466 ipaddr_t *dstptr; 5467 5468 toh = (struct T_opthdr *)dstopt; 5469 toh->level = IPPROTO_IP; 5470 toh->name = IP_RECVDSTADDR; 5471 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5472 toh->status = 0; 5473 dstopt += sizeof (struct T_opthdr); 5474 dstptr = (ipaddr_t *)dstopt; 5475 *dstptr = (((ipaddr_t *)rptr)[4]); 5476 dstopt += sizeof (ipaddr_t); 5477 udi_size -= toh->len; 5478 } 5479 if (udp->udp_recvopts && udi_size != 0) { 5480 struct T_opthdr *toh; 5481 5482 toh = (struct T_opthdr *)dstopt; 5483 toh->level = IPPROTO_IP; 5484 toh->name = IP_RECVOPTS; 5485 toh->len = sizeof (struct T_opthdr) + opt_len; 5486 toh->status = 0; 5487 dstopt += sizeof (struct T_opthdr); 5488 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5489 dstopt += opt_len; 5490 udi_size -= toh->len; 5491 } 5492 5493 if (udp->udp_recvslla && recv_on && 5494 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5495 5496 struct T_opthdr *toh; 5497 struct sockaddr_dl *dstptr; 5498 5499 toh = (struct T_opthdr *)dstopt; 5500 toh->level = IPPROTO_IP; 5501 toh->name = IP_RECVSLLA; 5502 toh->len = sizeof (struct T_opthdr) + 5503 sizeof (struct sockaddr_dl); 5504 toh->status = 0; 5505 dstopt += sizeof (struct T_opthdr); 5506 dstptr = (struct sockaddr_dl *)dstopt; 5507 bcopy(&pinfo->in_pkt_slla, dstptr, 5508 sizeof (struct sockaddr_dl)); 5509 dstopt += sizeof (struct sockaddr_dl); 5510 udi_size -= toh->len; 5511 } 5512 5513 if (udp->udp_recvif && recv_on && 5514 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5515 5516 struct T_opthdr *toh; 5517 uint_t *dstptr; 5518 5519 toh = (struct T_opthdr *)dstopt; 5520 toh->level = IPPROTO_IP; 5521 toh->name = IP_RECVIF; 5522 toh->len = sizeof (struct T_opthdr) + 5523 sizeof (uint_t); 5524 toh->status = 0; 5525 dstopt += sizeof (struct T_opthdr); 5526 dstptr = (uint_t *)dstopt; 5527 *dstptr = pinfo->in_pkt_ifindex; 5528 dstopt += sizeof (uint_t); 5529 udi_size -= toh->len; 5530 } 5531 5532 if (cr != NULL) { 5533 struct T_opthdr *toh; 5534 5535 toh = (struct T_opthdr *)dstopt; 5536 toh->level = SOL_SOCKET; 5537 toh->name = SCM_UCRED; 5538 toh->len = sizeof (struct T_opthdr) + ucredsize; 5539 toh->status = 0; 5540 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5541 dstopt += toh->len; 5542 udi_size -= toh->len; 5543 } 5544 5545 if (udp->udp_recvttl) { 5546 struct T_opthdr *toh; 5547 uint8_t *dstptr; 5548 5549 toh = (struct T_opthdr *)dstopt; 5550 toh->level = IPPROTO_IP; 5551 toh->name = IP_RECVTTL; 5552 toh->len = sizeof (struct T_opthdr) + 5553 sizeof (uint8_t); 5554 toh->status = 0; 5555 dstopt += sizeof (struct T_opthdr); 5556 dstptr = (uint8_t *)dstopt; 5557 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5558 dstopt += sizeof (uint8_t); 5559 udi_size -= toh->len; 5560 } 5561 5562 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5563 } 5564 BUMP_MIB(&udp_mib, udpInDatagrams); 5565 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5566 "udp_rput_other_end: q %p (%S)", q, "end"); 5567 if (options_mp != NULL) 5568 freeb(options_mp); 5569 5570 if (udp->udp_direct_sockfs) { 5571 /* 5572 * There is nothing above us except for the stream head; 5573 * use the read-side synchronous stream interface in 5574 * order to reduce the time spent in interrupt thread. 5575 */ 5576 ASSERT(udp->udp_issocket); 5577 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5578 } else { 5579 /* 5580 * Use regular STREAMS interface to pass data upstream 5581 * if this is not a socket endpoint, or if we have 5582 * switched over to the slow mode due to sockmod being 5583 * popped or a module being pushed on top of us. 5584 */ 5585 putnext(UDP_RD(q), mp); 5586 } 5587 } 5588 5589 /* ARGSUSED */ 5590 static void 5591 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5592 { 5593 conn_t *connp = arg; 5594 5595 udp_rput_other(connp->conn_rq, mp); 5596 udp_exit(connp); 5597 } 5598 5599 /* 5600 * Process a T_BIND_ACK 5601 */ 5602 static void 5603 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5604 { 5605 udp_t *udp = Q_TO_UDP(q); 5606 mblk_t *mp1; 5607 ire_t *ire; 5608 struct T_bind_ack *tba; 5609 uchar_t *addrp; 5610 ipa_conn_t *ac; 5611 ipa6_conn_t *ac6; 5612 5613 if (udp->udp_discon_pending) 5614 udp->udp_discon_pending = 0; 5615 5616 /* 5617 * If a broadcast/multicast address was bound set 5618 * the source address to 0. 5619 * This ensures no datagrams with broadcast address 5620 * as source address are emitted (which would violate 5621 * RFC1122 - Hosts requirements) 5622 * 5623 * Note that when connecting the returned IRE is 5624 * for the destination address and we only perform 5625 * the broadcast check for the source address (it 5626 * is OK to connect to a broadcast/multicast address.) 5627 */ 5628 mp1 = mp->b_cont; 5629 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5630 ire = (ire_t *)mp1->b_rptr; 5631 5632 /* 5633 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5634 * local address. 5635 */ 5636 if (ire->ire_type == IRE_BROADCAST && 5637 udp->udp_state != TS_DATA_XFER) { 5638 /* This was just a local bind to a broadcast addr */ 5639 V6_SET_ZERO(udp->udp_v6src); 5640 if (udp->udp_family == AF_INET6) 5641 (void) udp_build_hdrs(q, udp); 5642 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5643 /* 5644 * Local address not yet set - pick it from the 5645 * T_bind_ack 5646 */ 5647 tba = (struct T_bind_ack *)mp->b_rptr; 5648 addrp = &mp->b_rptr[tba->ADDR_offset]; 5649 switch (udp->udp_family) { 5650 case AF_INET: 5651 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5652 ac = (ipa_conn_t *)addrp; 5653 } else { 5654 ASSERT(tba->ADDR_length == 5655 sizeof (ipa_conn_x_t)); 5656 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5657 } 5658 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5659 &udp->udp_v6src); 5660 break; 5661 case AF_INET6: 5662 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5663 ac6 = (ipa6_conn_t *)addrp; 5664 } else { 5665 ASSERT(tba->ADDR_length == 5666 sizeof (ipa6_conn_x_t)); 5667 ac6 = &((ipa6_conn_x_t *) 5668 addrp)->ac6x_conn; 5669 } 5670 udp->udp_v6src = ac6->ac6_laddr; 5671 (void) udp_build_hdrs(q, udp); 5672 break; 5673 } 5674 } 5675 mp1 = mp1->b_cont; 5676 } 5677 /* 5678 * Look for one or more appended ACK message added by 5679 * udp_connect or udp_disconnect. 5680 * If none found just send up the T_BIND_ACK. 5681 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5682 * udp_disconnect has appended a T_OK_ACK. 5683 */ 5684 if (mp1 != NULL) { 5685 if (mp->b_cont == mp1) 5686 mp->b_cont = NULL; 5687 else { 5688 ASSERT(mp->b_cont->b_cont == mp1); 5689 mp->b_cont->b_cont = NULL; 5690 } 5691 freemsg(mp); 5692 mp = mp1; 5693 while (mp != NULL) { 5694 mp1 = mp->b_cont; 5695 mp->b_cont = NULL; 5696 putnext(UDP_RD(q), mp); 5697 mp = mp1; 5698 } 5699 return; 5700 } 5701 freemsg(mp->b_cont); 5702 mp->b_cont = NULL; 5703 putnext(UDP_RD(q), mp); 5704 } 5705 5706 /* 5707 * return SNMP stuff in buffer in mpdata 5708 */ 5709 int 5710 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5711 { 5712 mblk_t *mpdata; 5713 mblk_t *mp_conn_ctl; 5714 mblk_t *mp_attr_ctl; 5715 mblk_t *mp6_conn_ctl; 5716 mblk_t *mp6_attr_ctl; 5717 mblk_t *mp_conn_tail; 5718 mblk_t *mp_attr_tail; 5719 mblk_t *mp6_conn_tail; 5720 mblk_t *mp6_attr_tail; 5721 struct opthdr *optp; 5722 mib2_udpEntry_t ude; 5723 mib2_udp6Entry_t ude6; 5724 mib2_transportMLPEntry_t mlp; 5725 int state; 5726 zoneid_t zoneid; 5727 int i; 5728 connf_t *connfp; 5729 conn_t *connp = Q_TO_CONN(q); 5730 udp_t *udp = connp->conn_udp; 5731 int v4_conn_idx; 5732 int v6_conn_idx; 5733 boolean_t needattr; 5734 5735 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5736 if (mpctl == NULL || 5737 (mpdata = mpctl->b_cont) == NULL || 5738 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5739 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5740 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5741 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5742 freemsg(mp_conn_ctl); 5743 freemsg(mp_attr_ctl); 5744 freemsg(mp6_conn_ctl); 5745 return (0); 5746 } 5747 5748 zoneid = connp->conn_zoneid; 5749 5750 /* fixed length structure for IPv4 and IPv6 counters */ 5751 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5752 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5753 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5754 optp->level = MIB2_UDP; 5755 optp->name = 0; 5756 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5757 optp->len = msgdsize(mpdata); 5758 qreply(q, mpctl); 5759 5760 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5761 v4_conn_idx = v6_conn_idx = 0; 5762 5763 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5764 connfp = &ipcl_globalhash_fanout[i]; 5765 connp = NULL; 5766 5767 while ((connp = ipcl_get_next_conn(connfp, connp, 5768 IPCL_UDP))) { 5769 udp = connp->conn_udp; 5770 if (zoneid != connp->conn_zoneid) 5771 continue; 5772 5773 /* 5774 * Note that the port numbers are sent in 5775 * host byte order 5776 */ 5777 5778 if (udp->udp_state == TS_UNBND) 5779 state = MIB2_UDP_unbound; 5780 else if (udp->udp_state == TS_IDLE) 5781 state = MIB2_UDP_idle; 5782 else if (udp->udp_state == TS_DATA_XFER) 5783 state = MIB2_UDP_connected; 5784 else 5785 state = MIB2_UDP_unknown; 5786 5787 needattr = B_FALSE; 5788 bzero(&mlp, sizeof (mlp)); 5789 if (connp->conn_mlp_type != mlptSingle) { 5790 if (connp->conn_mlp_type == mlptShared || 5791 connp->conn_mlp_type == mlptBoth) 5792 mlp.tme_flags |= MIB2_TMEF_SHARED; 5793 if (connp->conn_mlp_type == mlptPrivate || 5794 connp->conn_mlp_type == mlptBoth) 5795 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5796 needattr = B_TRUE; 5797 } 5798 5799 /* 5800 * Create an IPv4 table entry for IPv4 entries and also 5801 * any IPv6 entries which are bound to in6addr_any 5802 * (i.e. anything a IPv4 peer could connect/send to). 5803 */ 5804 if (udp->udp_ipversion == IPV4_VERSION || 5805 (udp->udp_state <= TS_IDLE && 5806 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5807 ude.udpEntryInfo.ue_state = state; 5808 /* 5809 * If in6addr_any this will set it to 5810 * INADDR_ANY 5811 */ 5812 ude.udpLocalAddress = 5813 V4_PART_OF_V6(udp->udp_v6src); 5814 ude.udpLocalPort = ntohs(udp->udp_port); 5815 if (udp->udp_state == TS_DATA_XFER) { 5816 /* 5817 * Can potentially get here for 5818 * v6 socket if another process 5819 * (say, ping) has just done a 5820 * sendto(), changing the state 5821 * from the TS_IDLE above to 5822 * TS_DATA_XFER by the time we hit 5823 * this part of the code. 5824 */ 5825 ude.udpEntryInfo.ue_RemoteAddress = 5826 V4_PART_OF_V6(udp->udp_v6dst); 5827 ude.udpEntryInfo.ue_RemotePort = 5828 ntohs(udp->udp_dstport); 5829 } else { 5830 ude.udpEntryInfo.ue_RemoteAddress = 0; 5831 ude.udpEntryInfo.ue_RemotePort = 0; 5832 } 5833 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5834 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5835 mlp.tme_connidx = v4_conn_idx++; 5836 if (needattr) 5837 (void) snmp_append_data2( 5838 mp_attr_ctl->b_cont, &mp_attr_tail, 5839 (char *)&mlp, sizeof (mlp)); 5840 } 5841 if (udp->udp_ipversion == IPV6_VERSION) { 5842 ude6.udp6EntryInfo.ue_state = state; 5843 ude6.udp6LocalAddress = udp->udp_v6src; 5844 ude6.udp6LocalPort = ntohs(udp->udp_port); 5845 ude6.udp6IfIndex = udp->udp_bound_if; 5846 if (udp->udp_state == TS_DATA_XFER) { 5847 ude6.udp6EntryInfo.ue_RemoteAddress = 5848 udp->udp_v6dst; 5849 ude6.udp6EntryInfo.ue_RemotePort = 5850 ntohs(udp->udp_dstport); 5851 } else { 5852 ude6.udp6EntryInfo.ue_RemoteAddress = 5853 sin6_null.sin6_addr; 5854 ude6.udp6EntryInfo.ue_RemotePort = 0; 5855 } 5856 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5857 &mp6_conn_tail, (char *)&ude6, 5858 sizeof (ude6)); 5859 mlp.tme_connidx = v6_conn_idx++; 5860 if (needattr) 5861 (void) snmp_append_data2( 5862 mp6_attr_ctl->b_cont, 5863 &mp6_attr_tail, (char *)&mlp, 5864 sizeof (mlp)); 5865 } 5866 } 5867 } 5868 5869 /* IPv4 UDP endpoints */ 5870 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5871 sizeof (struct T_optmgmt_ack)]; 5872 optp->level = MIB2_UDP; 5873 optp->name = MIB2_UDP_ENTRY; 5874 optp->len = msgdsize(mp_conn_ctl->b_cont); 5875 qreply(q, mp_conn_ctl); 5876 5877 /* table of MLP attributes... */ 5878 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5879 sizeof (struct T_optmgmt_ack)]; 5880 optp->level = MIB2_UDP; 5881 optp->name = EXPER_XPORT_MLP; 5882 optp->len = msgdsize(mp_attr_ctl->b_cont); 5883 if (optp->len == 0) 5884 freemsg(mp_attr_ctl); 5885 else 5886 qreply(q, mp_attr_ctl); 5887 5888 /* IPv6 UDP endpoints */ 5889 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5890 sizeof (struct T_optmgmt_ack)]; 5891 optp->level = MIB2_UDP6; 5892 optp->name = MIB2_UDP6_ENTRY; 5893 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5894 qreply(q, mp6_conn_ctl); 5895 5896 /* table of MLP attributes... */ 5897 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5898 sizeof (struct T_optmgmt_ack)]; 5899 optp->level = MIB2_UDP6; 5900 optp->name = EXPER_XPORT_MLP; 5901 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5902 if (optp->len == 0) 5903 freemsg(mp6_attr_ctl); 5904 else 5905 qreply(q, mp6_attr_ctl); 5906 5907 return (1); 5908 } 5909 5910 /* 5911 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5912 * NOTE: Per MIB-II, UDP has no writable data. 5913 * TODO: If this ever actually tries to set anything, it needs to be 5914 * to do the appropriate locking. 5915 */ 5916 /* ARGSUSED */ 5917 int 5918 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5919 uchar_t *ptr, int len) 5920 { 5921 switch (level) { 5922 case MIB2_UDP: 5923 return (0); 5924 default: 5925 return (1); 5926 } 5927 } 5928 5929 static void 5930 udp_report_item(mblk_t *mp, udp_t *udp) 5931 { 5932 char *state; 5933 char addrbuf1[INET6_ADDRSTRLEN]; 5934 char addrbuf2[INET6_ADDRSTRLEN]; 5935 uint_t print_len, buf_len; 5936 5937 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5938 ASSERT(buf_len >= 0); 5939 if (buf_len == 0) 5940 return; 5941 5942 if (udp->udp_state == TS_UNBND) 5943 state = "UNBOUND"; 5944 else if (udp->udp_state == TS_IDLE) 5945 state = "IDLE"; 5946 else if (udp->udp_state == TS_DATA_XFER) 5947 state = "CONNECTED"; 5948 else 5949 state = "UnkState"; 5950 print_len = snprintf((char *)mp->b_wptr, buf_len, 5951 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5952 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5953 inet_ntop(AF_INET6, &udp->udp_v6src, 5954 addrbuf1, sizeof (addrbuf1)), 5955 inet_ntop(AF_INET6, &udp->udp_v6dst, 5956 addrbuf2, sizeof (addrbuf2)), 5957 ntohs(udp->udp_dstport), state); 5958 if (print_len < buf_len) { 5959 mp->b_wptr += print_len; 5960 } else { 5961 mp->b_wptr += buf_len; 5962 } 5963 } 5964 5965 /* Report for ndd "udp_status" */ 5966 /* ARGSUSED */ 5967 static int 5968 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5969 { 5970 zoneid_t zoneid; 5971 connf_t *connfp; 5972 conn_t *connp = Q_TO_CONN(q); 5973 udp_t *udp = connp->conn_udp; 5974 int i; 5975 5976 /* 5977 * Because of the ndd constraint, at most we can have 64K buffer 5978 * to put in all UDP info. So to be more efficient, just 5979 * allocate a 64K buffer here, assuming we need that large buffer. 5980 * This may be a problem as any user can read udp_status. Therefore 5981 * we limit the rate of doing this using udp_ndd_get_info_interval. 5982 * This should be OK as normal users should not do this too often. 5983 */ 5984 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5985 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5986 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5987 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5988 return (0); 5989 } 5990 } 5991 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5992 /* The following may work even if we cannot get a large buf. */ 5993 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5994 return (0); 5995 } 5996 (void) mi_mpprintf(mp, 5997 "UDP " MI_COL_HDRPAD_STR 5998 /* 12345678[89ABCDEF] */ 5999 " zone lport src addr dest addr port state"); 6000 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6001 6002 zoneid = connp->conn_zoneid; 6003 6004 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6005 connfp = &ipcl_globalhash_fanout[i]; 6006 connp = NULL; 6007 6008 while ((connp = ipcl_get_next_conn(connfp, connp, 6009 IPCL_UDP))) { 6010 udp = connp->conn_udp; 6011 if (zoneid != GLOBAL_ZONEID && 6012 zoneid != connp->conn_zoneid) 6013 continue; 6014 6015 udp_report_item(mp->b_cont, udp); 6016 } 6017 } 6018 udp_last_ndd_get_info_time = ddi_get_lbolt(); 6019 return (0); 6020 } 6021 6022 /* 6023 * This routine creates a T_UDERROR_IND message and passes it upstream. 6024 * The address and options are copied from the T_UNITDATA_REQ message 6025 * passed in mp. This message is freed. 6026 */ 6027 static void 6028 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6029 t_scalar_t err) 6030 { 6031 struct T_unitdata_req *tudr; 6032 mblk_t *mp1; 6033 uchar_t *optaddr; 6034 t_scalar_t optlen; 6035 6036 if (DB_TYPE(mp) == M_DATA) { 6037 ASSERT(destaddr != NULL && destlen != 0); 6038 optaddr = NULL; 6039 optlen = 0; 6040 } else { 6041 if ((mp->b_wptr < mp->b_rptr) || 6042 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6043 goto done; 6044 } 6045 tudr = (struct T_unitdata_req *)mp->b_rptr; 6046 destaddr = mp->b_rptr + tudr->DEST_offset; 6047 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6048 destaddr + tudr->DEST_length < mp->b_rptr || 6049 destaddr + tudr->DEST_length > mp->b_wptr) { 6050 goto done; 6051 } 6052 optaddr = mp->b_rptr + tudr->OPT_offset; 6053 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6054 optaddr + tudr->OPT_length < mp->b_rptr || 6055 optaddr + tudr->OPT_length > mp->b_wptr) { 6056 goto done; 6057 } 6058 destlen = tudr->DEST_length; 6059 optlen = tudr->OPT_length; 6060 } 6061 6062 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6063 (char *)optaddr, optlen, err); 6064 if (mp1 != NULL) 6065 putnext(UDP_RD(q), mp1); 6066 6067 done: 6068 freemsg(mp); 6069 } 6070 6071 /* 6072 * This routine removes a port number association from a stream. It 6073 * is called by udp_wput to handle T_UNBIND_REQ messages. 6074 */ 6075 static void 6076 udp_unbind(queue_t *q, mblk_t *mp) 6077 { 6078 udp_t *udp = Q_TO_UDP(q); 6079 6080 /* If a bind has not been done, we can't unbind. */ 6081 if (udp->udp_state == TS_UNBND) { 6082 udp_err_ack(q, mp, TOUTSTATE, 0); 6083 return; 6084 } 6085 if (cl_inet_unbind != NULL) { 6086 /* 6087 * Running in cluster mode - register unbind information 6088 */ 6089 if (udp->udp_ipversion == IPV4_VERSION) { 6090 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6091 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6092 (in_port_t)udp->udp_port); 6093 } else { 6094 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6095 (uint8_t *)&(udp->udp_v6src), 6096 (in_port_t)udp->udp_port); 6097 } 6098 } 6099 6100 udp_bind_hash_remove(udp, B_FALSE); 6101 V6_SET_ZERO(udp->udp_v6src); 6102 V6_SET_ZERO(udp->udp_bound_v6src); 6103 udp->udp_port = 0; 6104 udp->udp_state = TS_UNBND; 6105 6106 if (udp->udp_family == AF_INET6) { 6107 int error; 6108 6109 /* Rebuild the header template */ 6110 error = udp_build_hdrs(q, udp); 6111 if (error != 0) { 6112 udp_err_ack(q, mp, TSYSERR, error); 6113 return; 6114 } 6115 } 6116 /* 6117 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6118 * and therefore ip_unbind must never return NULL. 6119 */ 6120 mp = ip_unbind(q, mp); 6121 ASSERT(mp != NULL); 6122 putnext(UDP_RD(q), mp); 6123 } 6124 6125 /* 6126 * Don't let port fall into the privileged range. 6127 * Since the extra privileged ports can be arbitrary we also 6128 * ensure that we exclude those from consideration. 6129 * udp_g_epriv_ports is not sorted thus we loop over it until 6130 * there are no changes. 6131 */ 6132 static in_port_t 6133 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6134 { 6135 int i; 6136 in_port_t nextport; 6137 boolean_t restart = B_FALSE; 6138 6139 if (random && udp_random_anon_port != 0) { 6140 (void) random_get_pseudo_bytes((uint8_t *)&port, 6141 sizeof (in_port_t)); 6142 /* 6143 * Unless changed by a sys admin, the smallest anon port 6144 * is 32768 and the largest anon port is 65535. It is 6145 * very likely (50%) for the random port to be smaller 6146 * than the smallest anon port. When that happens, 6147 * add port % (anon port range) to the smallest anon 6148 * port to get the random port. It should fall into the 6149 * valid anon port range. 6150 */ 6151 if (port < udp_smallest_anon_port) { 6152 port = udp_smallest_anon_port + 6153 port % (udp_largest_anon_port - 6154 udp_smallest_anon_port); 6155 } 6156 } 6157 6158 retry: 6159 if (port < udp_smallest_anon_port) 6160 port = udp_smallest_anon_port; 6161 6162 if (port > udp_largest_anon_port) { 6163 port = udp_smallest_anon_port; 6164 if (restart) 6165 return (0); 6166 restart = B_TRUE; 6167 } 6168 6169 if (port < udp_smallest_nonpriv_port) 6170 port = udp_smallest_nonpriv_port; 6171 6172 for (i = 0; i < udp_g_num_epriv_ports; i++) { 6173 if (port == udp_g_epriv_ports[i]) { 6174 port++; 6175 /* 6176 * Make sure that the port is in the 6177 * valid range. 6178 */ 6179 goto retry; 6180 } 6181 } 6182 6183 if (is_system_labeled() && 6184 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6185 port, IPPROTO_UDP, B_TRUE)) != 0) { 6186 port = nextport; 6187 goto retry; 6188 } 6189 6190 return (port); 6191 } 6192 6193 static int 6194 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6195 { 6196 int err; 6197 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6198 udp_t *udp = Q_TO_UDP(wq); 6199 6200 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6201 opt_storage, udp->udp_mac_exempt); 6202 if (err == 0) { 6203 err = tsol_update_options(&udp->udp_ip_snd_options, 6204 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6205 opt_storage); 6206 } 6207 if (err != 0) { 6208 DTRACE_PROBE4( 6209 tx__ip__log__info__updatelabel__udp, 6210 char *, "queue(1) failed to update options(2) on mp(3)", 6211 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6212 } else { 6213 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6214 } 6215 return (err); 6216 } 6217 6218 static mblk_t * 6219 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6220 uint_t srcid, int *error) 6221 { 6222 udp_t *udp = connp->conn_udp; 6223 queue_t *q = connp->conn_wq; 6224 mblk_t *mp1 = mp; 6225 mblk_t *mp2; 6226 ipha_t *ipha; 6227 int ip_hdr_length; 6228 uint32_t ip_len; 6229 udpha_t *udpha; 6230 udpattrs_t attrs; 6231 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6232 uint32_t ip_snd_opt_len = 0; 6233 6234 *error = 0; 6235 6236 if (v4dst == INADDR_ANY) 6237 v4dst = htonl(INADDR_LOOPBACK); 6238 6239 /* 6240 * If options passed in, feed it for verification and handling 6241 */ 6242 attrs.udpattr_credset = B_FALSE; 6243 if (DB_TYPE(mp) != M_DATA) { 6244 mp1 = mp->b_cont; 6245 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6246 attrs.udpattr_ipp = NULL; 6247 attrs.udpattr_mb = mp; 6248 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6249 goto done; 6250 /* 6251 * Note: success in processing options. 6252 * mp option buffer represented by 6253 * OPT_length/offset now potentially modified 6254 * and contain option setting results 6255 */ 6256 ASSERT(*error == 0); 6257 } 6258 } 6259 6260 /* mp1 points to the M_DATA mblk carrying the packet */ 6261 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6262 6263 /* 6264 * Check if our saved options are valid; update if not 6265 * TSOL Note: Since we are not in WRITER mode, UDP packets 6266 * to different destination may require different labels. 6267 * We use conn_lock to ensure that lastdst, ip_snd_options, 6268 * and ip_snd_options_len are consistent for the current 6269 * destination and are updated atomically. 6270 */ 6271 mutex_enter(&connp->conn_lock); 6272 if (is_system_labeled()) { 6273 /* Using UDP MLP requires SCM_UCRED from user */ 6274 if (connp->conn_mlp_type != mlptSingle && 6275 !attrs.udpattr_credset) { 6276 mutex_exit(&connp->conn_lock); 6277 DTRACE_PROBE4( 6278 tx__ip__log__info__output__udp, 6279 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6280 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6281 *error = ECONNREFUSED; 6282 goto done; 6283 } 6284 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6285 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6286 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6287 mutex_exit(&connp->conn_lock); 6288 goto done; 6289 } 6290 } 6291 if (udp->udp_ip_snd_options_len > 0) { 6292 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6293 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6294 } 6295 mutex_exit(&connp->conn_lock); 6296 6297 /* Add an IP header */ 6298 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; 6299 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6300 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6301 !OK_32PTR(ipha)) { 6302 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 6303 if (mp2 == NULL) { 6304 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6305 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6306 *error = ENOMEM; 6307 goto done; 6308 } 6309 mp2->b_wptr = DB_LIM(mp2); 6310 mp2->b_cont = mp1; 6311 mp1 = mp2; 6312 if (DB_TYPE(mp) != M_DATA) 6313 mp->b_cont = mp1; 6314 else 6315 mp = mp1; 6316 6317 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6318 } 6319 ip_hdr_length -= UDPH_SIZE; 6320 #ifdef _BIG_ENDIAN 6321 /* Set version, header length, and tos */ 6322 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6323 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6324 udp->udp_type_of_service); 6325 /* Set ttl and protocol */ 6326 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6327 #else 6328 /* Set version, header length, and tos */ 6329 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6330 ((udp->udp_type_of_service << 8) | 6331 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6332 /* Set ttl and protocol */ 6333 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6334 #endif 6335 /* 6336 * Copy our address into the packet. If this is zero, 6337 * first look at __sin6_src_id for a hint. If we leave the source 6338 * as INADDR_ANY then ip will fill in the real source address. 6339 */ 6340 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6341 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6342 in6_addr_t v6src; 6343 6344 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 6345 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6346 } 6347 6348 ipha->ipha_fragment_offset_and_flags = 0; 6349 ipha->ipha_ident = 0; 6350 6351 mp1->b_rptr = (uchar_t *)ipha; 6352 6353 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6354 (uintptr_t)UINT_MAX); 6355 6356 /* Determine length of packet */ 6357 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6358 if ((mp2 = mp1->b_cont) != NULL) { 6359 do { 6360 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6361 ip_len += (uint32_t)MBLKL(mp2); 6362 } while ((mp2 = mp2->b_cont) != NULL); 6363 } 6364 /* 6365 * If the size of the packet is greater than the maximum allowed by 6366 * ip, return an error. Passing this down could cause panics because 6367 * the size will have wrapped and be inconsistent with the msg size. 6368 */ 6369 if (ip_len > IP_MAXPACKET) { 6370 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6371 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6372 *error = EMSGSIZE; 6373 goto done; 6374 } 6375 ipha->ipha_length = htons((uint16_t)ip_len); 6376 ip_len -= ip_hdr_length; 6377 ip_len = htons((uint16_t)ip_len); 6378 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6379 6380 /* 6381 * Copy in the destination address 6382 */ 6383 ipha->ipha_dst = v4dst; 6384 6385 /* 6386 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6387 */ 6388 if (CLASSD(v4dst)) 6389 ipha->ipha_ttl = udp->udp_multicast_ttl; 6390 6391 udpha->uha_dst_port = port; 6392 udpha->uha_src_port = udp->udp_port; 6393 6394 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6395 uint32_t cksum; 6396 6397 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6398 /* 6399 * Massage source route putting first source route in ipha_dst. 6400 * Ignore the destination in T_unitdata_req. 6401 * Create a checksum adjustment for a source route, if any. 6402 */ 6403 cksum = ip_massage_options(ipha); 6404 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6405 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6406 (ipha->ipha_dst & 0xFFFF); 6407 if ((int)cksum < 0) 6408 cksum--; 6409 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6410 /* 6411 * IP does the checksum if uha_checksum is non-zero, 6412 * We make it easy for IP to include our pseudo header 6413 * by putting our length in uha_checksum. 6414 */ 6415 cksum += ip_len; 6416 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6417 /* There might be a carry. */ 6418 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6419 #ifdef _LITTLE_ENDIAN 6420 if (udp_do_checksum) 6421 ip_len = (cksum << 16) | ip_len; 6422 #else 6423 if (udp_do_checksum) 6424 ip_len = (ip_len << 16) | cksum; 6425 else 6426 ip_len <<= 16; 6427 #endif 6428 } else { 6429 /* 6430 * IP does the checksum if uha_checksum is non-zero, 6431 * We make it easy for IP to include our pseudo header 6432 * by putting our length in uha_checksum. 6433 */ 6434 if (udp_do_checksum) 6435 ip_len |= (ip_len << 16); 6436 #ifndef _LITTLE_ENDIAN 6437 else 6438 ip_len <<= 16; 6439 #endif 6440 } 6441 /* Set UDP length and checksum */ 6442 *((uint32_t *)&udpha->uha_length) = ip_len; 6443 if (DB_CRED(mp) != NULL) 6444 mblk_setcred(mp1, DB_CRED(mp)); 6445 6446 if (DB_TYPE(mp) != M_DATA) { 6447 ASSERT(mp != mp1); 6448 freeb(mp); 6449 } 6450 6451 /* mp has been consumed and we'll return success */ 6452 ASSERT(*error == 0); 6453 mp = NULL; 6454 6455 /* We're done. Pass the packet to ip. */ 6456 BUMP_MIB(&udp_mib, udpOutDatagrams); 6457 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6458 "udp_wput_end: q %p (%S)", q, "end"); 6459 6460 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6461 CONN_OUTBOUND_POLICY_PRESENT(connp) || 6462 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6463 connp->conn_nofailover_ill != NULL || 6464 connp->conn_outgoing_ill != NULL || 6465 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6466 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 6467 UDP_STAT(udp_ip_send); 6468 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 6469 } else { 6470 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6471 } 6472 6473 done: 6474 if (*error != 0) { 6475 ASSERT(mp != NULL); 6476 BUMP_MIB(&udp_mib, udpOutErrors); 6477 } 6478 return (mp); 6479 } 6480 6481 static void 6482 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6483 { 6484 conn_t *connp = udp->udp_connp; 6485 ipaddr_t src, dst; 6486 ill_t *ill; 6487 ire_t *ire; 6488 ipif_t *ipif = NULL; 6489 mblk_t *ire_fp_mp; 6490 uint_t ire_fp_mp_len; 6491 uint16_t *up; 6492 uint32_t cksum, hcksum_txflags; 6493 queue_t *dev_q; 6494 boolean_t retry_caching; 6495 6496 dst = ipha->ipha_dst; 6497 src = ipha->ipha_src; 6498 ASSERT(ipha->ipha_ident == 0); 6499 6500 if (CLASSD(dst)) { 6501 int err; 6502 6503 ipif = conn_get_held_ipif(connp, 6504 &connp->conn_multicast_ipif, &err); 6505 6506 if (ipif == NULL || ipif->ipif_isv6 || 6507 (ipif->ipif_ill->ill_phyint->phyint_flags & 6508 PHYI_LOOPBACK)) { 6509 if (ipif != NULL) 6510 ipif_refrele(ipif); 6511 UDP_STAT(udp_ip_send); 6512 ip_output(connp, mp, q, IP_WPUT); 6513 return; 6514 } 6515 } 6516 6517 retry_caching = B_FALSE; 6518 mutex_enter(&connp->conn_lock); 6519 ire = connp->conn_ire_cache; 6520 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6521 6522 if (ire == NULL || ire->ire_addr != dst || 6523 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6524 retry_caching = B_TRUE; 6525 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6526 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6527 6528 ASSERT(ipif != NULL); 6529 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6530 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6531 retry_caching = B_TRUE; 6532 } 6533 6534 if (!retry_caching) { 6535 ASSERT(ire != NULL); 6536 IRE_REFHOLD(ire); 6537 mutex_exit(&connp->conn_lock); 6538 } else { 6539 boolean_t cached = B_FALSE; 6540 6541 connp->conn_ire_cache = NULL; 6542 mutex_exit(&connp->conn_lock); 6543 6544 /* Release the old ire */ 6545 if (ire != NULL) { 6546 IRE_REFRELE_NOTR(ire); 6547 ire = NULL; 6548 } 6549 6550 if (CLASSD(dst)) { 6551 ASSERT(ipif != NULL); 6552 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6553 connp->conn_zoneid, MBLK_GETLABEL(mp), 6554 MATCH_IRE_ILL_GROUP); 6555 } else { 6556 ASSERT(ipif == NULL); 6557 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6558 MBLK_GETLABEL(mp)); 6559 } 6560 6561 if (ire == NULL) { 6562 if (ipif != NULL) 6563 ipif_refrele(ipif); 6564 UDP_STAT(udp_ire_null); 6565 ip_output(connp, mp, q, IP_WPUT); 6566 return; 6567 } 6568 IRE_REFHOLD_NOTR(ire); 6569 6570 mutex_enter(&connp->conn_lock); 6571 if (!(connp->conn_state_flags & CONN_CLOSING) && 6572 connp->conn_ire_cache == NULL) { 6573 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6574 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6575 connp->conn_ire_cache = ire; 6576 cached = B_TRUE; 6577 } 6578 rw_exit(&ire->ire_bucket->irb_lock); 6579 } 6580 mutex_exit(&connp->conn_lock); 6581 6582 /* 6583 * We can continue to use the ire but since it was not 6584 * cached, we should drop the extra reference. 6585 */ 6586 if (!cached) 6587 IRE_REFRELE_NOTR(ire); 6588 } 6589 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6590 ASSERT(!CLASSD(dst) || ipif != NULL); 6591 6592 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6593 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6594 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6595 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6596 (connp->conn_nexthop_set) || 6597 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6598 if (ipif != NULL) 6599 ipif_refrele(ipif); 6600 UDP_STAT(udp_ip_ire_send); 6601 IRE_REFRELE(ire); 6602 ip_output(connp, mp, q, IP_WPUT); 6603 return; 6604 } 6605 6606 BUMP_MIB(&ip_mib, ipOutRequests); 6607 6608 ill = ire_to_ill(ire); 6609 ASSERT(ill != NULL); 6610 6611 dev_q = ire->ire_stq->q_next; 6612 ASSERT(dev_q != NULL); 6613 /* 6614 * If the service thread is already running, or if the driver 6615 * queue is currently flow-controlled, queue this packet. 6616 */ 6617 if ((q->q_first != NULL || connp->conn_draining) || 6618 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6619 if (ip_output_queue) { 6620 (void) putq(q, mp); 6621 } else { 6622 BUMP_MIB(&ip_mib, ipOutDiscards); 6623 freemsg(mp); 6624 } 6625 if (ipif != NULL) 6626 ipif_refrele(ipif); 6627 IRE_REFRELE(ire); 6628 return; 6629 } 6630 6631 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6632 #ifndef _BIG_ENDIAN 6633 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6634 #endif 6635 6636 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6637 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6638 src = ipha->ipha_src = ipif->ipif_src_addr; 6639 else 6640 src = ipha->ipha_src = ire->ire_src_addr; 6641 } 6642 6643 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6644 ASSERT(ill->ill_hcksum_capab != NULL); 6645 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6646 } else { 6647 hcksum_txflags = 0; 6648 } 6649 6650 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6651 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6652 6653 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6654 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6655 if (*up != 0) { 6656 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6657 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6658 ntohs(ipha->ipha_length), cksum); 6659 6660 /* Software checksum? */ 6661 if (DB_CKSUMFLAGS(mp) == 0) { 6662 UDP_STAT(udp_out_sw_cksum); 6663 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6664 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6665 } 6666 } 6667 6668 ipha->ipha_fragment_offset_and_flags |= 6669 (uint32_t)htons(ire->ire_frag_flag); 6670 6671 /* Calculate IP header checksum if hardware isn't capable */ 6672 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6673 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6674 ((uint16_t *)ipha)[4]); 6675 } 6676 6677 if (CLASSD(dst)) { 6678 ilm_t *ilm; 6679 6680 ILM_WALKER_HOLD(ill); 6681 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6682 ILM_WALKER_RELE(ill); 6683 if (ilm != NULL) { 6684 ip_multicast_loopback(q, ill, mp, 6685 connp->conn_multicast_loop ? 0 : 6686 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6687 } 6688 6689 /* If multicast TTL is 0 then we are done */ 6690 if (ipha->ipha_ttl == 0) { 6691 if (ipif != NULL) 6692 ipif_refrele(ipif); 6693 freemsg(mp); 6694 IRE_REFRELE(ire); 6695 return; 6696 } 6697 } 6698 6699 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6700 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6701 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6702 6703 UPDATE_OB_PKT_COUNT(ire); 6704 ire->ire_last_used_time = lbolt; 6705 6706 if (ILL_DLS_CAPABLE(ill)) { 6707 /* 6708 * Send the packet directly to DLD, where it may be queued 6709 * depending on the availability of transmit resources at 6710 * the media layer. 6711 */ 6712 IP_DLS_ILL_TX(ill, mp); 6713 } else { 6714 putnext(ire->ire_stq, mp); 6715 } 6716 6717 if (ipif != NULL) 6718 ipif_refrele(ipif); 6719 IRE_REFRELE(ire); 6720 } 6721 6722 static boolean_t 6723 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6724 { 6725 udp_t *udp = Q_TO_UDP(wq); 6726 int err; 6727 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6728 6729 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6730 dst, opt_storage, udp->udp_mac_exempt); 6731 if (err == 0) { 6732 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6733 &udp->udp_label_len_v6, opt_storage); 6734 } 6735 if (err != 0) { 6736 DTRACE_PROBE4( 6737 tx__ip__log__drop__updatelabel__udp6, 6738 char *, "queue(1) failed to update options(2) on mp(3)", 6739 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6740 } else { 6741 udp->udp_v6lastdst = *dst; 6742 } 6743 return (err); 6744 } 6745 6746 /* 6747 * This routine handles all messages passed downstream. It either 6748 * consumes the message or passes it downstream; it never queues a 6749 * a message. 6750 */ 6751 static void 6752 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6753 { 6754 sin6_t *sin6; 6755 sin_t *sin; 6756 ipaddr_t v4dst; 6757 uint16_t port; 6758 uint_t srcid; 6759 queue_t *q = connp->conn_wq; 6760 udp_t *udp = connp->conn_udp; 6761 int error = 0; 6762 struct sockaddr_storage ss; 6763 6764 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6765 "udp_wput_start: connp %p mp %p", connp, mp); 6766 6767 /* 6768 * We directly handle several cases here: T_UNITDATA_REQ message 6769 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6770 * connected and non-connected socket. The latter carries the 6771 * address structure along when this routine gets called. 6772 */ 6773 switch (DB_TYPE(mp)) { 6774 case M_DATA: 6775 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6776 if (!udp->udp_direct_sockfs || 6777 addr == NULL || addrlen == 0) { 6778 /* Not connected; address is required */ 6779 BUMP_MIB(&udp_mib, udpOutErrors); 6780 UDP_STAT(udp_out_err_notconn); 6781 freemsg(mp); 6782 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6783 "udp_wput_end: connp %p (%S)", connp, 6784 "not-connected; address required"); 6785 return; 6786 } 6787 ASSERT(udp->udp_issocket); 6788 UDP_DBGSTAT(udp_data_notconn); 6789 /* Not connected; do some more checks below */ 6790 break; 6791 } 6792 /* M_DATA for connected socket */ 6793 UDP_DBGSTAT(udp_data_conn); 6794 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6795 6796 /* Initialize addr and addrlen as if they're passed in */ 6797 if (udp->udp_family == AF_INET) { 6798 sin = (sin_t *)&ss; 6799 sin->sin_family = AF_INET; 6800 sin->sin_port = udp->udp_dstport; 6801 sin->sin_addr.s_addr = v4dst; 6802 addr = (struct sockaddr *)sin; 6803 addrlen = sizeof (*sin); 6804 } else { 6805 sin6 = (sin6_t *)&ss; 6806 sin6->sin6_family = AF_INET6; 6807 sin6->sin6_port = udp->udp_dstport; 6808 sin6->sin6_flowinfo = udp->udp_flowinfo; 6809 sin6->sin6_addr = udp->udp_v6dst; 6810 sin6->sin6_scope_id = 0; 6811 sin6->__sin6_src_id = 0; 6812 addr = (struct sockaddr *)sin6; 6813 addrlen = sizeof (*sin6); 6814 } 6815 6816 if (udp->udp_family == AF_INET || 6817 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6818 /* 6819 * Handle both AF_INET and AF_INET6; the latter 6820 * for IPV4 mapped destination addresses. Note 6821 * here that both addr and addrlen point to the 6822 * corresponding struct depending on the address 6823 * family of the socket. 6824 */ 6825 mp = udp_output_v4(connp, mp, v4dst, 6826 udp->udp_dstport, 0, &error); 6827 } else { 6828 mp = udp_output_v6(connp, mp, sin6, &error); 6829 } 6830 if (error != 0) { 6831 ASSERT(addr != NULL && addrlen != 0); 6832 goto ud_error; 6833 } 6834 return; 6835 case M_PROTO: 6836 case M_PCPROTO: { 6837 struct T_unitdata_req *tudr; 6838 6839 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6840 tudr = (struct T_unitdata_req *)mp->b_rptr; 6841 6842 /* Handle valid T_UNITDATA_REQ here */ 6843 if (MBLKL(mp) >= sizeof (*tudr) && 6844 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6845 if (mp->b_cont == NULL) { 6846 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6847 "udp_wput_end: q %p (%S)", q, "badaddr"); 6848 error = EPROTO; 6849 goto ud_error; 6850 } 6851 6852 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6853 tudr->DEST_length)) { 6854 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6855 "udp_wput_end: q %p (%S)", q, "badaddr"); 6856 error = EADDRNOTAVAIL; 6857 goto ud_error; 6858 } 6859 /* 6860 * If a port has not been bound to the stream, fail. 6861 * This is not a problem when sockfs is directly 6862 * above us, because it will ensure that the socket 6863 * is first bound before allowing data to be sent. 6864 */ 6865 if (udp->udp_state == TS_UNBND) { 6866 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6867 "udp_wput_end: q %p (%S)", q, "outstate"); 6868 error = EPROTO; 6869 goto ud_error; 6870 } 6871 addr = (struct sockaddr *) 6872 &mp->b_rptr[tudr->DEST_offset]; 6873 addrlen = tudr->DEST_length; 6874 if (tudr->OPT_length != 0) 6875 UDP_STAT(udp_out_opt); 6876 break; 6877 } 6878 /* FALLTHRU */ 6879 } 6880 default: 6881 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6882 SQTAG_UDP_OUTPUT); 6883 return; 6884 } 6885 ASSERT(addr != NULL); 6886 6887 switch (udp->udp_family) { 6888 case AF_INET6: 6889 sin6 = (sin6_t *)addr; 6890 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6891 sin6->sin6_family != AF_INET6) { 6892 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6893 "udp_wput_end: q %p (%S)", q, "badaddr"); 6894 error = EADDRNOTAVAIL; 6895 goto ud_error; 6896 } 6897 6898 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6899 /* 6900 * Destination is a non-IPv4-compatible IPv6 address. 6901 * Send out an IPv6 format packet. 6902 */ 6903 mp = udp_output_v6(connp, mp, sin6, &error); 6904 if (error != 0) 6905 goto ud_error; 6906 6907 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6908 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6909 return; 6910 } 6911 /* 6912 * If the local address is not zero or a mapped address 6913 * return an error. It would be possible to send an IPv4 6914 * packet but the response would never make it back to the 6915 * application since it is bound to a non-mapped address. 6916 */ 6917 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6918 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6919 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6920 "udp_wput_end: q %p (%S)", q, "badaddr"); 6921 error = EADDRNOTAVAIL; 6922 goto ud_error; 6923 } 6924 /* Send IPv4 packet without modifying udp_ipversion */ 6925 /* Extract port and ipaddr */ 6926 port = sin6->sin6_port; 6927 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6928 srcid = sin6->__sin6_src_id; 6929 break; 6930 6931 case AF_INET: 6932 sin = (sin_t *)addr; 6933 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6934 sin->sin_family != AF_INET) { 6935 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6936 "udp_wput_end: q %p (%S)", q, "badaddr"); 6937 error = EADDRNOTAVAIL; 6938 goto ud_error; 6939 } 6940 /* Extract port and ipaddr */ 6941 port = sin->sin_port; 6942 v4dst = sin->sin_addr.s_addr; 6943 srcid = 0; 6944 break; 6945 } 6946 6947 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6948 if (error != 0) { 6949 ud_error: 6950 UDP_STAT(udp_out_err_output); 6951 ASSERT(mp != NULL); 6952 /* mp is freed by the following routine */ 6953 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6954 (t_scalar_t)error); 6955 } 6956 } 6957 6958 /* ARGSUSED */ 6959 static void 6960 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6961 { 6962 udp_output((conn_t *)arg, mp, NULL, 0); 6963 _UDP_EXIT((conn_t *)arg); 6964 } 6965 6966 static void 6967 udp_wput(queue_t *q, mblk_t *mp) 6968 { 6969 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6970 SQTAG_UDP_WPUT); 6971 } 6972 6973 /* 6974 * Allocate and prepare a T_UNITDATA_REQ message. 6975 */ 6976 static mblk_t * 6977 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6978 { 6979 struct T_unitdata_req *tudr; 6980 mblk_t *mp; 6981 6982 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6983 if (mp != NULL) { 6984 mp->b_wptr += sizeof (*tudr) + addrlen; 6985 DB_TYPE(mp) = M_PROTO; 6986 6987 tudr = (struct T_unitdata_req *)mp->b_rptr; 6988 tudr->PRIM_type = T_UNITDATA_REQ; 6989 tudr->DEST_length = addrlen; 6990 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6991 tudr->OPT_length = 0; 6992 tudr->OPT_offset = 0; 6993 bcopy(addr, tudr+1, addrlen); 6994 } 6995 return (mp); 6996 } 6997 6998 /* 6999 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7000 * is valid when we are directly beneath the stream head, and thus sockfs 7001 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7002 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7003 * this is done for both connected and non-connected endpoint. 7004 */ 7005 void 7006 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7007 { 7008 conn_t *connp; 7009 udp_t *udp; 7010 7011 q = UDP_WR(q); 7012 connp = Q_TO_CONN(q); 7013 udp = connp->conn_udp; 7014 7015 /* udpsockfs should only send down M_DATA for this entry point */ 7016 ASSERT(DB_TYPE(mp) == M_DATA); 7017 7018 mutex_enter(&connp->conn_lock); 7019 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7020 7021 if (udp->udp_mode != UDP_MT_HOT) { 7022 /* 7023 * We can't enter this conn right away because another 7024 * thread is currently executing as writer; therefore we 7025 * need to deposit the message into the squeue to be 7026 * drained later. If a socket address is present, we 7027 * need to create a T_UNITDATA_REQ message as placeholder. 7028 */ 7029 if (addr != NULL && addrlen != 0) { 7030 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7031 7032 if (tudr_mp == NULL) { 7033 mutex_exit(&connp->conn_lock); 7034 BUMP_MIB(&udp_mib, udpOutErrors); 7035 UDP_STAT(udp_out_err_tudr); 7036 freemsg(mp); 7037 return; 7038 } 7039 /* Tag the packet with T_UNITDATA_REQ */ 7040 tudr_mp->b_cont = mp; 7041 mp = tudr_mp; 7042 } 7043 mutex_exit(&connp->conn_lock); 7044 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7045 return; 7046 } 7047 7048 /* We can execute as reader right away. */ 7049 UDP_READERS_INCREF(udp); 7050 mutex_exit(&connp->conn_lock); 7051 7052 udp_output(connp, mp, addr, addrlen); 7053 7054 udp_exit(connp); 7055 } 7056 7057 /* 7058 * udp_output_v6(): 7059 * Assumes that udp_wput did some sanity checking on the destination 7060 * address. 7061 */ 7062 static mblk_t * 7063 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7064 { 7065 ip6_t *ip6h; 7066 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7067 mblk_t *mp1 = mp; 7068 mblk_t *mp2; 7069 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7070 size_t ip_len; 7071 udpha_t *udph; 7072 udp_t *udp = connp->conn_udp; 7073 queue_t *q = connp->conn_wq; 7074 ip6_pkt_t ipp_s; /* For ancillary data options */ 7075 ip6_pkt_t *ipp = &ipp_s; 7076 ip6_pkt_t *tipp; /* temporary ipp */ 7077 uint32_t csum = 0; 7078 uint_t ignore = 0; 7079 uint_t option_exists = 0, is_sticky = 0; 7080 uint8_t *cp; 7081 uint8_t *nxthdr_ptr; 7082 in6_addr_t ip6_dst; 7083 udpattrs_t attrs; 7084 boolean_t opt_present; 7085 ip6_hbh_t *hopoptsptr = NULL; 7086 uint_t hopoptslen = 0; 7087 boolean_t is_ancillary = B_FALSE; 7088 7089 *error = 0; 7090 7091 /* 7092 * If the local address is a mapped address return 7093 * an error. 7094 * It would be possible to send an IPv6 packet but the 7095 * response would never make it back to the application 7096 * since it is bound to a mapped address. 7097 */ 7098 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7099 *error = EADDRNOTAVAIL; 7100 goto done; 7101 } 7102 7103 ipp->ipp_fields = 0; 7104 ipp->ipp_sticky_ignored = 0; 7105 7106 /* 7107 * If TPI options passed in, feed it for verification and handling 7108 */ 7109 attrs.udpattr_credset = B_FALSE; 7110 opt_present = B_FALSE; 7111 if (DB_TYPE(mp) != M_DATA) { 7112 mp1 = mp->b_cont; 7113 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7114 attrs.udpattr_ipp = ipp; 7115 attrs.udpattr_mb = mp; 7116 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7117 goto done; 7118 ASSERT(*error == 0); 7119 opt_present = B_TRUE; 7120 } 7121 } 7122 ignore = ipp->ipp_sticky_ignored; 7123 7124 /* mp1 points to the M_DATA mblk carrying the packet */ 7125 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7126 7127 if (sin6->sin6_scope_id != 0 && 7128 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7129 /* 7130 * IPPF_SCOPE_ID is special. It's neither a sticky 7131 * option nor ancillary data. It needs to be 7132 * explicitly set in options_exists. 7133 */ 7134 option_exists |= IPPF_SCOPE_ID; 7135 } 7136 7137 /* 7138 * Compute the destination address 7139 */ 7140 ip6_dst = sin6->sin6_addr; 7141 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7142 ip6_dst = ipv6_loopback; 7143 7144 /* 7145 * If we're not going to the same destination as last time, then 7146 * recompute the label required. This is done in a separate routine to 7147 * avoid blowing up our stack here. 7148 * 7149 * TSOL Note: Since we are not in WRITER mode, UDP packets 7150 * to different destination may require different labels. 7151 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7152 * and sticky ipp_hopoptslen are consistent for the current 7153 * destination and are updated atomically. 7154 */ 7155 mutex_enter(&connp->conn_lock); 7156 if (is_system_labeled()) { 7157 /* Using UDP MLP requires SCM_UCRED from user */ 7158 if (connp->conn_mlp_type != mlptSingle && 7159 !attrs.udpattr_credset) { 7160 DTRACE_PROBE4( 7161 tx__ip__log__info__output__udp6, 7162 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7163 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7164 *error = ECONNREFUSED; 7165 mutex_exit(&connp->conn_lock); 7166 goto done; 7167 } 7168 if ((opt_present || 7169 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7170 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7171 mutex_exit(&connp->conn_lock); 7172 goto done; 7173 } 7174 } 7175 7176 /* 7177 * If there's a security label here, then we ignore any options the 7178 * user may try to set. We keep the peer's label as a hidden sticky 7179 * option. We make a private copy of this label before releasing the 7180 * lock so that label is kept consistent with the destination addr. 7181 */ 7182 if (udp->udp_label_len_v6 > 0) { 7183 ignore &= ~IPPF_HOPOPTS; 7184 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7185 } 7186 7187 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7188 /* No sticky options nor ancillary data. */ 7189 mutex_exit(&connp->conn_lock); 7190 goto no_options; 7191 } 7192 7193 /* 7194 * Go through the options figuring out where each is going to 7195 * come from and build two masks. The first mask indicates if 7196 * the option exists at all. The second mask indicates if the 7197 * option is sticky or ancillary. 7198 */ 7199 if (!(ignore & IPPF_HOPOPTS)) { 7200 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7201 option_exists |= IPPF_HOPOPTS; 7202 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7203 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7204 option_exists |= IPPF_HOPOPTS; 7205 is_sticky |= IPPF_HOPOPTS; 7206 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7207 hopoptsptr = kmem_alloc( 7208 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7209 if (hopoptsptr == NULL) { 7210 *error = ENOMEM; 7211 mutex_exit(&connp->conn_lock); 7212 goto done; 7213 } 7214 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7215 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7216 hopoptslen); 7217 udp_ip_hdr_len += hopoptslen; 7218 } 7219 } 7220 mutex_exit(&connp->conn_lock); 7221 7222 if (!(ignore & IPPF_RTHDR)) { 7223 if (ipp->ipp_fields & IPPF_RTHDR) { 7224 option_exists |= IPPF_RTHDR; 7225 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7226 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7227 option_exists |= IPPF_RTHDR; 7228 is_sticky |= IPPF_RTHDR; 7229 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7230 } 7231 } 7232 7233 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7234 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7235 option_exists |= IPPF_RTDSTOPTS; 7236 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7237 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7238 option_exists |= IPPF_RTDSTOPTS; 7239 is_sticky |= IPPF_RTDSTOPTS; 7240 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7241 } 7242 } 7243 7244 if (!(ignore & IPPF_DSTOPTS)) { 7245 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7246 option_exists |= IPPF_DSTOPTS; 7247 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7248 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7249 option_exists |= IPPF_DSTOPTS; 7250 is_sticky |= IPPF_DSTOPTS; 7251 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7252 } 7253 } 7254 7255 if (!(ignore & IPPF_IFINDEX)) { 7256 if (ipp->ipp_fields & IPPF_IFINDEX) { 7257 option_exists |= IPPF_IFINDEX; 7258 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7259 option_exists |= IPPF_IFINDEX; 7260 is_sticky |= IPPF_IFINDEX; 7261 } 7262 } 7263 7264 if (!(ignore & IPPF_ADDR)) { 7265 if (ipp->ipp_fields & IPPF_ADDR) { 7266 option_exists |= IPPF_ADDR; 7267 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7268 option_exists |= IPPF_ADDR; 7269 is_sticky |= IPPF_ADDR; 7270 } 7271 } 7272 7273 if (!(ignore & IPPF_DONTFRAG)) { 7274 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7275 option_exists |= IPPF_DONTFRAG; 7276 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7277 option_exists |= IPPF_DONTFRAG; 7278 is_sticky |= IPPF_DONTFRAG; 7279 } 7280 } 7281 7282 if (!(ignore & IPPF_USE_MIN_MTU)) { 7283 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7284 option_exists |= IPPF_USE_MIN_MTU; 7285 } else if (udp->udp_sticky_ipp.ipp_fields & 7286 IPPF_USE_MIN_MTU) { 7287 option_exists |= IPPF_USE_MIN_MTU; 7288 is_sticky |= IPPF_USE_MIN_MTU; 7289 } 7290 } 7291 7292 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7293 option_exists |= IPPF_HOPLIMIT; 7294 /* IPV6_HOPLIMIT can never be sticky */ 7295 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7296 7297 if (!(ignore & IPPF_UNICAST_HOPS) && 7298 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7299 option_exists |= IPPF_UNICAST_HOPS; 7300 is_sticky |= IPPF_UNICAST_HOPS; 7301 } 7302 7303 if (!(ignore & IPPF_MULTICAST_HOPS) && 7304 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7305 option_exists |= IPPF_MULTICAST_HOPS; 7306 is_sticky |= IPPF_MULTICAST_HOPS; 7307 } 7308 7309 if (!(ignore & IPPF_TCLASS)) { 7310 if (ipp->ipp_fields & IPPF_TCLASS) { 7311 option_exists |= IPPF_TCLASS; 7312 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7313 option_exists |= IPPF_TCLASS; 7314 is_sticky |= IPPF_TCLASS; 7315 } 7316 } 7317 7318 no_options: 7319 7320 /* 7321 * If any options carried in the ip6i_t were specified, we 7322 * need to account for the ip6i_t in the data we'll be sending 7323 * down. 7324 */ 7325 if (option_exists & IPPF_HAS_IP6I) 7326 udp_ip_hdr_len += sizeof (ip6i_t); 7327 7328 /* check/fix buffer config, setup pointers into it */ 7329 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7330 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7331 !OK_32PTR(ip6h)) { 7332 /* Try to get everything in a single mblk next time */ 7333 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7334 udp->udp_max_hdr_len = udp_ip_hdr_len; 7335 (void) mi_set_sth_wroff(UDP_RD(q), 7336 udp->udp_max_hdr_len + udp_wroff_extra); 7337 } 7338 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 7339 if (mp2 == NULL) { 7340 *error = ENOMEM; 7341 goto done; 7342 } 7343 mp2->b_wptr = DB_LIM(mp2); 7344 mp2->b_cont = mp1; 7345 mp1 = mp2; 7346 if (DB_TYPE(mp) != M_DATA) 7347 mp->b_cont = mp1; 7348 else 7349 mp = mp1; 7350 7351 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7352 } 7353 mp1->b_rptr = (unsigned char *)ip6h; 7354 ip6i = (ip6i_t *)ip6h; 7355 7356 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7357 if (option_exists & IPPF_HAS_IP6I) { 7358 ip6h = (ip6_t *)&ip6i[1]; 7359 ip6i->ip6i_flags = 0; 7360 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7361 7362 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7363 if (option_exists & IPPF_SCOPE_ID) { 7364 ip6i->ip6i_flags |= IP6I_IFINDEX; 7365 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7366 } else if (option_exists & IPPF_IFINDEX) { 7367 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7368 ASSERT(tipp->ipp_ifindex != 0); 7369 ip6i->ip6i_flags |= IP6I_IFINDEX; 7370 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7371 } 7372 7373 if (option_exists & IPPF_ADDR) { 7374 /* 7375 * Enable per-packet source address verification if 7376 * IPV6_PKTINFO specified the source address. 7377 * ip6_src is set in the transport's _wput function. 7378 */ 7379 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7380 } 7381 7382 if (option_exists & IPPF_DONTFRAG) { 7383 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7384 } 7385 7386 if (option_exists & IPPF_USE_MIN_MTU) { 7387 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7388 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7389 } 7390 7391 if (option_exists & IPPF_NEXTHOP) { 7392 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7393 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7394 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7395 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7396 } 7397 7398 /* 7399 * tell IP this is an ip6i_t private header 7400 */ 7401 ip6i->ip6i_nxt = IPPROTO_RAW; 7402 } 7403 7404 /* Initialize IPv6 header */ 7405 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7406 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7407 7408 /* Set the hoplimit of the outgoing packet. */ 7409 if (option_exists & IPPF_HOPLIMIT) { 7410 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7411 ip6h->ip6_hops = ipp->ipp_hoplimit; 7412 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7413 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7414 ip6h->ip6_hops = udp->udp_multicast_ttl; 7415 if (option_exists & IPPF_MULTICAST_HOPS) 7416 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7417 } else { 7418 ip6h->ip6_hops = udp->udp_ttl; 7419 if (option_exists & IPPF_UNICAST_HOPS) 7420 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7421 } 7422 7423 if (option_exists & IPPF_ADDR) { 7424 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7425 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7426 ip6h->ip6_src = tipp->ipp_addr; 7427 } else { 7428 /* 7429 * The source address was not set using IPV6_PKTINFO. 7430 * First look at the bound source. 7431 * If unspecified fallback to __sin6_src_id. 7432 */ 7433 ip6h->ip6_src = udp->udp_v6src; 7434 if (sin6->__sin6_src_id != 0 && 7435 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7436 ip_srcid_find_id(sin6->__sin6_src_id, 7437 &ip6h->ip6_src, connp->conn_zoneid); 7438 } 7439 } 7440 7441 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7442 cp = (uint8_t *)&ip6h[1]; 7443 7444 /* 7445 * Here's where we have to start stringing together 7446 * any extension headers in the right order: 7447 * Hop-by-hop, destination, routing, and final destination opts. 7448 */ 7449 if (option_exists & IPPF_HOPOPTS) { 7450 /* Hop-by-hop options */ 7451 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7452 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7453 if (hopoptslen == 0) { 7454 hopoptsptr = tipp->ipp_hopopts; 7455 hopoptslen = tipp->ipp_hopoptslen; 7456 is_ancillary = B_TRUE; 7457 } 7458 7459 *nxthdr_ptr = IPPROTO_HOPOPTS; 7460 nxthdr_ptr = &hbh->ip6h_nxt; 7461 7462 bcopy(hopoptsptr, cp, hopoptslen); 7463 cp += hopoptslen; 7464 7465 if (hopoptsptr != NULL && !is_ancillary) { 7466 kmem_free(hopoptsptr, hopoptslen); 7467 hopoptsptr = NULL; 7468 hopoptslen = 0; 7469 } 7470 } 7471 /* 7472 * En-route destination options 7473 * Only do them if there's a routing header as well 7474 */ 7475 if (option_exists & IPPF_RTDSTOPTS) { 7476 ip6_dest_t *dst = (ip6_dest_t *)cp; 7477 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7478 7479 *nxthdr_ptr = IPPROTO_DSTOPTS; 7480 nxthdr_ptr = &dst->ip6d_nxt; 7481 7482 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7483 cp += tipp->ipp_rtdstoptslen; 7484 } 7485 /* 7486 * Routing header next 7487 */ 7488 if (option_exists & IPPF_RTHDR) { 7489 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7490 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7491 7492 *nxthdr_ptr = IPPROTO_ROUTING; 7493 nxthdr_ptr = &rt->ip6r_nxt; 7494 7495 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7496 cp += tipp->ipp_rthdrlen; 7497 } 7498 /* 7499 * Do ultimate destination options 7500 */ 7501 if (option_exists & IPPF_DSTOPTS) { 7502 ip6_dest_t *dest = (ip6_dest_t *)cp; 7503 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7504 7505 *nxthdr_ptr = IPPROTO_DSTOPTS; 7506 nxthdr_ptr = &dest->ip6d_nxt; 7507 7508 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7509 cp += tipp->ipp_dstoptslen; 7510 } 7511 /* 7512 * Now set the last header pointer to the proto passed in 7513 */ 7514 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7515 *nxthdr_ptr = IPPROTO_UDP; 7516 7517 /* Update UDP header */ 7518 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7519 udph->uha_dst_port = sin6->sin6_port; 7520 udph->uha_src_port = udp->udp_port; 7521 7522 /* 7523 * Copy in the destination address 7524 */ 7525 ip6h->ip6_dst = ip6_dst; 7526 7527 ip6h->ip6_vcf = 7528 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7529 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7530 7531 if (option_exists & IPPF_TCLASS) { 7532 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7533 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7534 tipp->ipp_tclass); 7535 } 7536 7537 if (option_exists & IPPF_RTHDR) { 7538 ip6_rthdr_t *rth; 7539 7540 /* 7541 * Perform any processing needed for source routing. 7542 * We know that all extension headers will be in the same mblk 7543 * as the IPv6 header. 7544 */ 7545 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7546 if (rth != NULL && rth->ip6r_segleft != 0) { 7547 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7548 /* 7549 * Drop packet - only support Type 0 routing. 7550 * Notify the application as well. 7551 */ 7552 *error = EPROTO; 7553 goto done; 7554 } 7555 7556 /* 7557 * rth->ip6r_len is twice the number of 7558 * addresses in the header. Thus it must be even. 7559 */ 7560 if (rth->ip6r_len & 0x1) { 7561 *error = EPROTO; 7562 goto done; 7563 } 7564 /* 7565 * Shuffle the routing header and ip6_dst 7566 * addresses, and get the checksum difference 7567 * between the first hop (in ip6_dst) and 7568 * the destination (in the last routing hdr entry). 7569 */ 7570 csum = ip_massage_options_v6(ip6h, rth); 7571 /* 7572 * Verify that the first hop isn't a mapped address. 7573 * Routers along the path need to do this verification 7574 * for subsequent hops. 7575 */ 7576 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7577 *error = EADDRNOTAVAIL; 7578 goto done; 7579 } 7580 7581 cp += (rth->ip6r_len + 1)*8; 7582 } 7583 } 7584 7585 /* count up length of UDP packet */ 7586 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7587 if ((mp2 = mp1->b_cont) != NULL) { 7588 do { 7589 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7590 ip_len += (uint32_t)MBLKL(mp2); 7591 } while ((mp2 = mp2->b_cont) != NULL); 7592 } 7593 7594 /* 7595 * If the size of the packet is greater than the maximum allowed by 7596 * ip, return an error. Passing this down could cause panics because 7597 * the size will have wrapped and be inconsistent with the msg size. 7598 */ 7599 if (ip_len > IP_MAXPACKET) { 7600 *error = EMSGSIZE; 7601 goto done; 7602 } 7603 7604 /* Store the UDP length. Subtract length of extension hdrs */ 7605 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7606 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7607 7608 /* 7609 * We make it easy for IP to include our pseudo header 7610 * by putting our length in uh_checksum, modified (if 7611 * we have a routing header) by the checksum difference 7612 * between the ultimate destination and first hop addresses. 7613 * Note: UDP over IPv6 must always checksum the packet. 7614 */ 7615 csum += udph->uha_length; 7616 csum = (csum & 0xFFFF) + (csum >> 16); 7617 udph->uha_checksum = (uint16_t)csum; 7618 7619 #ifdef _LITTLE_ENDIAN 7620 ip_len = htons(ip_len); 7621 #endif 7622 ip6h->ip6_plen = ip_len; 7623 if (DB_CRED(mp) != NULL) 7624 mblk_setcred(mp1, DB_CRED(mp)); 7625 7626 if (DB_TYPE(mp) != M_DATA) { 7627 ASSERT(mp != mp1); 7628 freeb(mp); 7629 } 7630 7631 /* mp has been consumed and we'll return success */ 7632 ASSERT(*error == 0); 7633 mp = NULL; 7634 7635 /* We're done. Pass the packet to IP */ 7636 BUMP_MIB(&udp_mib, udpOutDatagrams); 7637 ip_output_v6(connp, mp1, q, IP_WPUT); 7638 7639 done: 7640 if (hopoptsptr != NULL && !is_ancillary) { 7641 kmem_free(hopoptsptr, hopoptslen); 7642 hopoptsptr = NULL; 7643 } 7644 if (*error != 0) { 7645 ASSERT(mp != NULL); 7646 BUMP_MIB(&udp_mib, udpOutErrors); 7647 } 7648 return (mp); 7649 } 7650 7651 static void 7652 udp_wput_other(queue_t *q, mblk_t *mp) 7653 { 7654 uchar_t *rptr = mp->b_rptr; 7655 struct datab *db; 7656 struct iocblk *iocp; 7657 cred_t *cr; 7658 conn_t *connp = Q_TO_CONN(q); 7659 udp_t *udp = connp->conn_udp; 7660 7661 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7662 "udp_wput_other_start: q %p", q); 7663 7664 db = mp->b_datap; 7665 7666 cr = DB_CREDDEF(mp, connp->conn_cred); 7667 7668 switch (db->db_type) { 7669 case M_PROTO: 7670 case M_PCPROTO: 7671 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7672 freemsg(mp); 7673 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7674 "udp_wput_other_end: q %p (%S)", 7675 q, "protoshort"); 7676 return; 7677 } 7678 switch (((t_primp_t)rptr)->type) { 7679 case T_ADDR_REQ: 7680 udp_addr_req(q, mp); 7681 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7682 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7683 return; 7684 case O_T_BIND_REQ: 7685 case T_BIND_REQ: 7686 udp_bind(q, mp); 7687 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7688 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7689 return; 7690 case T_CONN_REQ: 7691 udp_connect(q, mp); 7692 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7693 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7694 return; 7695 case T_CAPABILITY_REQ: 7696 udp_capability_req(q, mp); 7697 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7698 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7699 return; 7700 case T_INFO_REQ: 7701 udp_info_req(q, mp); 7702 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7703 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7704 return; 7705 case T_UNITDATA_REQ: 7706 /* 7707 * If a T_UNITDATA_REQ gets here, the address must 7708 * be bad. Valid T_UNITDATA_REQs are handled 7709 * in udp_wput. 7710 */ 7711 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7712 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7713 "udp_wput_other_end: q %p (%S)", 7714 q, "unitdatareq"); 7715 return; 7716 case T_UNBIND_REQ: 7717 udp_unbind(q, mp); 7718 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7719 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7720 return; 7721 case T_SVR4_OPTMGMT_REQ: 7722 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7723 /* 7724 * Use upper queue for option processing in 7725 * case the request is not handled at this 7726 * level and needs to be passed down to IP. 7727 */ 7728 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7729 mp, cr, &udp_opt_obj); 7730 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7731 "udp_wput_other_end: q %p (%S)", 7732 q, "optmgmtreq"); 7733 return; 7734 7735 case T_OPTMGMT_REQ: 7736 /* 7737 * Use upper queue for option processing in 7738 * case the request is not handled at this 7739 * level and needs to be passed down to IP. 7740 */ 7741 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7742 mp, cr, &udp_opt_obj); 7743 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7744 "udp_wput_other_end: q %p (%S)", 7745 q, "optmgmtreq"); 7746 return; 7747 7748 case T_DISCON_REQ: 7749 udp_disconnect(q, mp); 7750 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7751 "udp_wput_other_end: q %p (%S)", 7752 q, "disconreq"); 7753 return; 7754 7755 /* The following TPI message is not supported by udp. */ 7756 case O_T_CONN_RES: 7757 case T_CONN_RES: 7758 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7759 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7760 "udp_wput_other_end: q %p (%S)", 7761 q, "connres/disconreq"); 7762 return; 7763 7764 /* The following 3 TPI messages are illegal for udp. */ 7765 case T_DATA_REQ: 7766 case T_EXDATA_REQ: 7767 case T_ORDREL_REQ: 7768 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7769 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7770 "udp_wput_other_end: q %p (%S)", 7771 q, "data/exdata/ordrel"); 7772 return; 7773 default: 7774 break; 7775 } 7776 break; 7777 case M_FLUSH: 7778 if (*rptr & FLUSHW) 7779 flushq(q, FLUSHDATA); 7780 break; 7781 case M_IOCTL: 7782 iocp = (struct iocblk *)mp->b_rptr; 7783 switch (iocp->ioc_cmd) { 7784 case TI_GETPEERNAME: 7785 if (udp->udp_state != TS_DATA_XFER) { 7786 /* 7787 * If a default destination address has not 7788 * been associated with the stream, then we 7789 * don't know the peer's name. 7790 */ 7791 iocp->ioc_error = ENOTCONN; 7792 iocp->ioc_count = 0; 7793 mp->b_datap->db_type = M_IOCACK; 7794 putnext(UDP_RD(q), mp); 7795 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7796 "udp_wput_other_end: q %p (%S)", 7797 q, "getpeername"); 7798 return; 7799 } 7800 /* FALLTHRU */ 7801 case TI_GETMYNAME: { 7802 /* 7803 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7804 * need to copyin the user's strbuf structure. 7805 * Processing will continue in the M_IOCDATA case 7806 * below. 7807 */ 7808 mi_copyin(q, mp, NULL, 7809 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7810 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7811 "udp_wput_other_end: q %p (%S)", 7812 q, "getmyname"); 7813 return; 7814 } 7815 case ND_SET: 7816 /* nd_getset performs the necessary checking */ 7817 case ND_GET: 7818 if (nd_getset(q, udp_g_nd, mp)) { 7819 putnext(UDP_RD(q), mp); 7820 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7821 "udp_wput_other_end: q %p (%S)", 7822 q, "get"); 7823 return; 7824 } 7825 break; 7826 case _SIOCSOCKFALLBACK: 7827 /* 7828 * Either sockmod is about to be popped and the 7829 * socket would now be treated as a plain stream, 7830 * or a module is about to be pushed so we could 7831 * no longer use read-side synchronous stream. 7832 * Drain any queued data and disable direct sockfs 7833 * interface from now on. 7834 */ 7835 if (!udp->udp_issocket) { 7836 DB_TYPE(mp) = M_IOCNAK; 7837 iocp->ioc_error = EINVAL; 7838 } else { 7839 udp->udp_issocket = B_FALSE; 7840 if (udp->udp_direct_sockfs) { 7841 /* 7842 * Disable read-side synchronous 7843 * stream interface and drain any 7844 * queued data. 7845 */ 7846 udp_rcv_drain(UDP_RD(q), udp, 7847 B_FALSE); 7848 ASSERT(!udp->udp_direct_sockfs); 7849 UDP_STAT(udp_sock_fallback); 7850 } 7851 DB_TYPE(mp) = M_IOCACK; 7852 iocp->ioc_error = 0; 7853 } 7854 iocp->ioc_count = 0; 7855 iocp->ioc_rval = 0; 7856 putnext(UDP_RD(q), mp); 7857 return; 7858 default: 7859 break; 7860 } 7861 break; 7862 case M_IOCDATA: 7863 udp_wput_iocdata(q, mp); 7864 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7865 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7866 return; 7867 default: 7868 /* Unrecognized messages are passed through without change. */ 7869 break; 7870 } 7871 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7872 "udp_wput_other_end: q %p (%S)", q, "end"); 7873 ip_output(connp, mp, q, IP_WPUT); 7874 } 7875 7876 /* ARGSUSED */ 7877 static void 7878 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7879 { 7880 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7881 udp_exit((conn_t *)arg); 7882 } 7883 7884 /* 7885 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7886 * messages. 7887 */ 7888 static void 7889 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7890 { 7891 mblk_t *mp1; 7892 STRUCT_HANDLE(strbuf, sb); 7893 uint16_t port; 7894 in6_addr_t v6addr; 7895 ipaddr_t v4addr; 7896 uint32_t flowinfo = 0; 7897 int addrlen; 7898 udp_t *udp = Q_TO_UDP(q); 7899 7900 /* Make sure it is one of ours. */ 7901 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7902 case TI_GETMYNAME: 7903 case TI_GETPEERNAME: 7904 break; 7905 default: 7906 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7907 return; 7908 } 7909 7910 q = WR(UDP_RD(q)); 7911 switch (mi_copy_state(q, mp, &mp1)) { 7912 case -1: 7913 return; 7914 case MI_COPY_CASE(MI_COPY_IN, 1): 7915 break; 7916 case MI_COPY_CASE(MI_COPY_OUT, 1): 7917 /* 7918 * The address has been copied out, so now 7919 * copyout the strbuf. 7920 */ 7921 mi_copyout(q, mp); 7922 return; 7923 case MI_COPY_CASE(MI_COPY_OUT, 2): 7924 /* 7925 * The address and strbuf have been copied out. 7926 * We're done, so just acknowledge the original 7927 * M_IOCTL. 7928 */ 7929 mi_copy_done(q, mp, 0); 7930 return; 7931 default: 7932 /* 7933 * Something strange has happened, so acknowledge 7934 * the original M_IOCTL with an EPROTO error. 7935 */ 7936 mi_copy_done(q, mp, EPROTO); 7937 return; 7938 } 7939 7940 /* 7941 * Now we have the strbuf structure for TI_GETMYNAME 7942 * and TI_GETPEERNAME. Next we copyout the requested 7943 * address and then we'll copyout the strbuf. 7944 */ 7945 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7946 (void *)mp1->b_rptr); 7947 if (udp->udp_family == AF_INET) 7948 addrlen = sizeof (sin_t); 7949 else 7950 addrlen = sizeof (sin6_t); 7951 7952 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7953 mi_copy_done(q, mp, EINVAL); 7954 return; 7955 } 7956 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7957 case TI_GETMYNAME: 7958 if (udp->udp_family == AF_INET) { 7959 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7960 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7961 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7962 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7963 } else { 7964 /* 7965 * INADDR_ANY 7966 * udp_v6src is not set, we might be bound to 7967 * broadcast/multicast. Use udp_bound_v6src as 7968 * local address instead (that could 7969 * also still be INADDR_ANY) 7970 */ 7971 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7972 } 7973 } else { 7974 /* udp->udp_family == AF_INET6 */ 7975 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7976 v6addr = udp->udp_v6src; 7977 } else { 7978 /* 7979 * UNSPECIFIED 7980 * udp_v6src is not set, we might be bound to 7981 * broadcast/multicast. Use udp_bound_v6src as 7982 * local address instead (that could 7983 * also still be UNSPECIFIED) 7984 */ 7985 v6addr = udp->udp_bound_v6src; 7986 } 7987 } 7988 port = udp->udp_port; 7989 break; 7990 case TI_GETPEERNAME: 7991 if (udp->udp_state != TS_DATA_XFER) { 7992 mi_copy_done(q, mp, ENOTCONN); 7993 return; 7994 } 7995 if (udp->udp_family == AF_INET) { 7996 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7997 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7998 } else { 7999 /* udp->udp_family == AF_INET6) */ 8000 v6addr = udp->udp_v6dst; 8001 flowinfo = udp->udp_flowinfo; 8002 } 8003 port = udp->udp_dstport; 8004 break; 8005 default: 8006 mi_copy_done(q, mp, EPROTO); 8007 return; 8008 } 8009 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8010 if (!mp1) 8011 return; 8012 8013 if (udp->udp_family == AF_INET) { 8014 sin_t *sin; 8015 8016 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8017 sin = (sin_t *)mp1->b_rptr; 8018 mp1->b_wptr = (uchar_t *)&sin[1]; 8019 *sin = sin_null; 8020 sin->sin_family = AF_INET; 8021 sin->sin_addr.s_addr = v4addr; 8022 sin->sin_port = port; 8023 } else { 8024 /* udp->udp_family == AF_INET6 */ 8025 sin6_t *sin6; 8026 8027 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8028 sin6 = (sin6_t *)mp1->b_rptr; 8029 mp1->b_wptr = (uchar_t *)&sin6[1]; 8030 *sin6 = sin6_null; 8031 sin6->sin6_family = AF_INET6; 8032 sin6->sin6_flowinfo = flowinfo; 8033 sin6->sin6_addr = v6addr; 8034 sin6->sin6_port = port; 8035 } 8036 /* Copy out the address */ 8037 mi_copyout(q, mp); 8038 } 8039 8040 8041 static int 8042 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8043 udpattrs_t *udpattrs) 8044 { 8045 struct T_unitdata_req *udreqp; 8046 int is_absreq_failure; 8047 cred_t *cr; 8048 conn_t *connp = Q_TO_CONN(q); 8049 8050 ASSERT(((t_primp_t)mp->b_rptr)->type); 8051 8052 cr = DB_CREDDEF(mp, connp->conn_cred); 8053 8054 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8055 8056 /* 8057 * Use upper queue for option processing since the callback 8058 * routines expect to be called in UDP instance instead of IP. 8059 */ 8060 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8061 udreqp->OPT_offset, cr, &udp_opt_obj, 8062 udpattrs, &is_absreq_failure); 8063 8064 if (*errorp != 0) { 8065 /* 8066 * Note: No special action needed in this 8067 * module for "is_absreq_failure" 8068 */ 8069 return (-1); /* failure */ 8070 } 8071 ASSERT(is_absreq_failure == 0); 8072 return (0); /* success */ 8073 } 8074 8075 void 8076 udp_ddi_init(void) 8077 { 8078 int i; 8079 8080 UDP6_MAJ = ddi_name_to_major(UDP6); 8081 8082 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8083 udp_opt_obj.odb_opt_arr_cnt); 8084 8085 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 8086 /* Not a power of two. Round up to nearest power of two */ 8087 for (i = 0; i < 31; i++) { 8088 if (udp_bind_fanout_size < (1 << i)) 8089 break; 8090 } 8091 udp_bind_fanout_size = 1 << i; 8092 } 8093 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 8094 sizeof (udp_fanout_t), KM_SLEEP); 8095 for (i = 0; i < udp_bind_fanout_size; i++) { 8096 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8097 NULL); 8098 } 8099 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 8100 8101 udp_kstat_init(); 8102 8103 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8104 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8105 } 8106 8107 void 8108 udp_ddi_destroy(void) 8109 { 8110 int i; 8111 8112 nd_free(&udp_g_nd); 8113 8114 for (i = 0; i < udp_bind_fanout_size; i++) { 8115 mutex_destroy(&udp_bind_fanout[i].uf_lock); 8116 } 8117 8118 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 8119 sizeof (udp_fanout_t)); 8120 8121 udp_kstat_fini(); 8122 8123 kmem_cache_destroy(udp_cache); 8124 } 8125 8126 static void 8127 udp_kstat_init(void) 8128 { 8129 udp_named_kstat_t template = { 8130 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 8131 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8132 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 8133 { "entrySize", KSTAT_DATA_INT32, 0 }, 8134 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8135 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8136 }; 8137 8138 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 8139 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 8140 8141 if (udp_mibkp == NULL) 8142 return; 8143 8144 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8145 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8146 8147 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 8148 8149 udp_mibkp->ks_update = udp_kstat_update; 8150 8151 kstat_install(udp_mibkp); 8152 8153 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 8154 "net", KSTAT_TYPE_NAMED, 8155 sizeof (udp_statistics) / sizeof (kstat_named_t), 8156 KSTAT_FLAG_VIRTUAL)) != NULL) { 8157 udp_ksp->ks_data = &udp_statistics; 8158 kstat_install(udp_ksp); 8159 } 8160 } 8161 8162 static void 8163 udp_kstat_fini(void) 8164 { 8165 if (udp_ksp != NULL) { 8166 kstat_delete(udp_ksp); 8167 udp_ksp = NULL; 8168 } 8169 if (udp_mibkp != NULL) { 8170 kstat_delete(udp_mibkp); 8171 udp_mibkp = NULL; 8172 } 8173 } 8174 8175 static int 8176 udp_kstat_update(kstat_t *kp, int rw) 8177 { 8178 udp_named_kstat_t *udpkp; 8179 8180 if ((kp == NULL) || (kp->ks_data == NULL)) 8181 return (EIO); 8182 8183 if (rw == KSTAT_WRITE) 8184 return (EACCES); 8185 8186 udpkp = (udp_named_kstat_t *)kp->ks_data; 8187 8188 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 8189 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 8190 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 8191 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 8192 8193 return (0); 8194 } 8195 8196 /* ARGSUSED */ 8197 static void 8198 udp_rput(queue_t *q, mblk_t *mp) 8199 { 8200 /* 8201 * We get here whenever we do qreply() from IP, 8202 * i.e as part of handlings ioctls, etc. 8203 */ 8204 putnext(q, mp); 8205 } 8206 8207 /* 8208 * Read-side synchronous stream info entry point, called as a 8209 * result of handling certain STREAMS ioctl operations. 8210 */ 8211 static int 8212 udp_rinfop(queue_t *q, infod_t *dp) 8213 { 8214 mblk_t *mp; 8215 uint_t cmd = dp->d_cmd; 8216 int res = 0; 8217 int error = 0; 8218 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8219 struct stdata *stp = STREAM(q); 8220 8221 mutex_enter(&udp->udp_drain_lock); 8222 /* If shutdown on read has happened, return nothing */ 8223 mutex_enter(&stp->sd_lock); 8224 if (stp->sd_flag & STREOF) { 8225 mutex_exit(&stp->sd_lock); 8226 goto done; 8227 } 8228 mutex_exit(&stp->sd_lock); 8229 8230 if ((mp = udp->udp_rcv_list_head) == NULL) 8231 goto done; 8232 8233 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8234 8235 if (cmd & INFOD_COUNT) { 8236 /* 8237 * Return the number of messages. 8238 */ 8239 dp->d_count += udp->udp_rcv_msgcnt; 8240 res |= INFOD_COUNT; 8241 } 8242 if (cmd & INFOD_BYTES) { 8243 /* 8244 * Return size of all data messages. 8245 */ 8246 dp->d_bytes += udp->udp_rcv_cnt; 8247 res |= INFOD_BYTES; 8248 } 8249 if (cmd & INFOD_FIRSTBYTES) { 8250 /* 8251 * Return size of first data message. 8252 */ 8253 dp->d_bytes = msgdsize(mp); 8254 res |= INFOD_FIRSTBYTES; 8255 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8256 } 8257 if (cmd & INFOD_COPYOUT) { 8258 mblk_t *mp1 = mp->b_cont; 8259 int n; 8260 /* 8261 * Return data contents of first message. 8262 */ 8263 ASSERT(DB_TYPE(mp1) == M_DATA); 8264 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8265 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8266 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8267 UIO_READ, dp->d_uiop)) != 0) { 8268 goto done; 8269 } 8270 mp1 = mp1->b_cont; 8271 } 8272 res |= INFOD_COPYOUT; 8273 dp->d_cmd &= ~INFOD_COPYOUT; 8274 } 8275 done: 8276 mutex_exit(&udp->udp_drain_lock); 8277 8278 dp->d_res |= res; 8279 8280 return (error); 8281 } 8282 8283 /* 8284 * Read-side synchronous stream entry point. This is called as a result 8285 * of recv/read operation done at sockfs, and is guaranteed to execute 8286 * outside of the interrupt thread context. It returns a single datagram 8287 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8288 */ 8289 static int 8290 udp_rrw(queue_t *q, struiod_t *dp) 8291 { 8292 mblk_t *mp; 8293 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8294 8295 /* We should never get here when we're in SNMP mode */ 8296 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8297 8298 /* 8299 * Dequeue datagram from the head of the list and return 8300 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8301 * set/cleared depending on whether or not there's data 8302 * remaining in the list. 8303 */ 8304 mutex_enter(&udp->udp_drain_lock); 8305 if (!udp->udp_direct_sockfs) { 8306 mutex_exit(&udp->udp_drain_lock); 8307 UDP_STAT(udp_rrw_busy); 8308 return (EBUSY); 8309 } 8310 if ((mp = udp->udp_rcv_list_head) != NULL) { 8311 uint_t size = msgdsize(mp); 8312 8313 /* Last datagram in the list? */ 8314 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8315 udp->udp_rcv_list_tail = NULL; 8316 mp->b_next = NULL; 8317 8318 udp->udp_rcv_cnt -= size; 8319 udp->udp_rcv_msgcnt--; 8320 UDP_STAT(udp_rrw_msgcnt); 8321 8322 /* No longer flow-controlling? */ 8323 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8324 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8325 udp->udp_drain_qfull = B_FALSE; 8326 } 8327 if (udp->udp_rcv_list_head == NULL) { 8328 /* 8329 * Either we just dequeued the last datagram or 8330 * we get here from sockfs and have nothing to 8331 * return; in this case clear RSLEEP. 8332 */ 8333 ASSERT(udp->udp_rcv_cnt == 0); 8334 ASSERT(udp->udp_rcv_msgcnt == 0); 8335 ASSERT(udp->udp_rcv_list_tail == NULL); 8336 STR_WAKEUP_CLEAR(STREAM(q)); 8337 } else { 8338 /* 8339 * More data follows; we need udp_rrw() to be 8340 * called in future to pick up the rest. 8341 */ 8342 STR_WAKEUP_SET(STREAM(q)); 8343 } 8344 mutex_exit(&udp->udp_drain_lock); 8345 dp->d_mp = mp; 8346 return (0); 8347 } 8348 8349 /* 8350 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8351 * list; this is typically executed within the interrupt thread context 8352 * and so we do things as quickly as possible. 8353 */ 8354 static void 8355 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8356 { 8357 ASSERT(q == RD(q)); 8358 ASSERT(pkt_len == msgdsize(mp)); 8359 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8360 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8361 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8362 8363 mutex_enter(&udp->udp_drain_lock); 8364 /* 8365 * Wake up and signal the receiving app; it is okay to do this 8366 * before enqueueing the mp because we are holding the drain lock. 8367 * One of the advantages of synchronous stream is the ability for 8368 * us to find out when the application performs a read on the 8369 * socket by way of udp_rrw() entry point being called. We need 8370 * to generate SIGPOLL/SIGIO for each received data in the case 8371 * of asynchronous socket just as in the strrput() case. However, 8372 * we only wake the application up when necessary, i.e. during the 8373 * first enqueue. When udp_rrw() is called, we send up a single 8374 * datagram upstream and call STR_WAKEUP_SET() again when there 8375 * are still data remaining in our receive queue. 8376 */ 8377 if (udp->udp_rcv_list_head == NULL) { 8378 STR_WAKEUP_SET(STREAM(q)); 8379 udp->udp_rcv_list_head = mp; 8380 } else { 8381 udp->udp_rcv_list_tail->b_next = mp; 8382 } 8383 udp->udp_rcv_list_tail = mp; 8384 udp->udp_rcv_cnt += pkt_len; 8385 udp->udp_rcv_msgcnt++; 8386 8387 /* Need to flow-control? */ 8388 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8389 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8390 udp->udp_drain_qfull = B_TRUE; 8391 8392 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8393 STR_SENDSIG(STREAM(q)); 8394 mutex_exit(&udp->udp_drain_lock); 8395 } 8396 8397 /* 8398 * Drain the contents of receive list to the module upstream; we do 8399 * this during close or when we fallback to the slow mode due to 8400 * sockmod being popped or a module being pushed on top of us. 8401 */ 8402 static void 8403 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8404 { 8405 mblk_t *mp; 8406 8407 ASSERT(q == RD(q)); 8408 8409 mutex_enter(&udp->udp_drain_lock); 8410 /* 8411 * There is no race with a concurrent udp_input() sending 8412 * up packets using putnext() after we have cleared the 8413 * udp_direct_sockfs flag but before we have completed 8414 * sending up the packets in udp_rcv_list, since we are 8415 * either a writer or we have quiesced the conn. 8416 */ 8417 udp->udp_direct_sockfs = B_FALSE; 8418 mutex_exit(&udp->udp_drain_lock); 8419 8420 if (udp->udp_rcv_list_head != NULL) 8421 UDP_STAT(udp_drain); 8422 8423 /* 8424 * Send up everything via putnext(); note here that we 8425 * don't need the udp_drain_lock to protect us since 8426 * nothing can enter udp_rrw() and that we currently 8427 * have exclusive access to this udp. 8428 */ 8429 while ((mp = udp->udp_rcv_list_head) != NULL) { 8430 udp->udp_rcv_list_head = mp->b_next; 8431 mp->b_next = NULL; 8432 udp->udp_rcv_cnt -= msgdsize(mp); 8433 udp->udp_rcv_msgcnt--; 8434 if (closing) { 8435 freemsg(mp); 8436 } else { 8437 putnext(q, mp); 8438 } 8439 } 8440 ASSERT(udp->udp_rcv_cnt == 0); 8441 ASSERT(udp->udp_rcv_msgcnt == 0); 8442 ASSERT(udp->udp_rcv_list_head == NULL); 8443 udp->udp_rcv_list_tail = NULL; 8444 udp->udp_drain_qfull = B_FALSE; 8445 } 8446 8447 static size_t 8448 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8449 { 8450 /* We add a bit of extra buffering */ 8451 size += size >> 1; 8452 if (size > udp_max_buf) 8453 size = udp_max_buf; 8454 8455 udp->udp_rcv_hiwat = size; 8456 return (size); 8457 } 8458 8459 /* 8460 * Little helper for IPsec's NAT-T processing. 8461 */ 8462 boolean_t 8463 udp_compute_checksum(void) 8464 { 8465 return (udp_do_checksum); 8466 } 8467