1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 #include <net/if.h> 64 #include <net/route.h> 65 66 #include <inet/common.h> 67 #include <inet/ip.h> 68 #include <inet/ip_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/mi.h> 74 #include <inet/mib2.h> 75 #include <inet/nd.h> 76 #include <inet/optcom.h> 77 #include <inet/snmpcom.h> 78 #include <inet/kstatcom.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipclassifier.h> 81 #include <inet/ipsec_impl.h> 82 #include <inet/ipp_common.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 #include <sys/tsol/label.h> 93 #include <sys/tsol/tnet.h> 94 #include <rpc/pmap_prot.h> 95 96 /* 97 * Synchronization notes: 98 * 99 * UDP uses a combination of its internal perimeter, a global lock and 100 * a set of bind hash locks to protect its data structures. Please see 101 * the note above udp_mode_assertions for details about the internal 102 * perimeter. 103 * 104 * When a UDP endpoint is bound to a local port, it is inserted into 105 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 106 * The size of the array is controlled by the udp_bind_fanout_size variable. 107 * This variable can be changed in /etc/system if the default value is 108 * not large enough. Each bind hash bucket is protected by a per bucket 109 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 110 * structure. An UDP endpoint is removed from the bind hash list only 111 * when it is being unbound or being closed. The per bucket lock also 112 * protects a UDP endpoint's state changes. 113 * 114 * Plumbing notes: 115 * 116 * Both udp and ip are merged, but the streams plumbing is kept unchanged 117 * in that udp is always pushed atop /dev/ip. This is done to preserve 118 * backwards compatibility for certain applications which rely on such 119 * plumbing geometry to do things such as issuing I_POP on the stream 120 * in order to obtain direct access to /dev/ip, etc. 121 * 122 * All UDP processings happen in the /dev/ip instance; the udp module 123 * instance does not possess any state about the endpoint, and merely 124 * acts as a dummy module whose presence is to keep the streams plumbing 125 * appearance unchanged. At open time /dev/ip allocates a conn_t that 126 * happens to embed a udp_t. This stays dormant until the time udp is 127 * pushed, which indicates to /dev/ip that it must convert itself from 128 * an IP to a UDP endpoint. 129 * 130 * We only allow for the following plumbing cases: 131 * 132 * Normal: 133 * /dev/ip is first opened and later udp is pushed directly on top. 134 * This is the default action that happens when a udp socket or 135 * /dev/udp is opened. The conn_t created by /dev/ip instance is 136 * now shared and is marked with IPCL_UDP. 137 * 138 * SNMP-only: 139 * udp is pushed on top of a module other than /dev/ip. When this 140 * happens it will support only SNMP semantics. A new conn_t is 141 * allocated and marked with IPCL_UDPMOD. 142 * 143 * The above cases imply that we don't support any intermediate module to 144 * reside in between /dev/ip and udp -- in fact, we never supported such 145 * scenario in the past as the inter-layer communication semantics have 146 * always been private. Also note that the normal case allows for SNMP 147 * requests to be processed in addition to the rest of UDP operations. 148 * 149 * The normal case plumbing is depicted by the following diagram: 150 * 151 * +---------------+---------------+ 152 * | | | udp 153 * | udp_wq | udp_rq | 154 * | | UDP_RD | 155 * | | | 156 * +---------------+---------------+ 157 * | ^ 158 * v | 159 * +---------------+---------------+ 160 * | | | /dev/ip 161 * | ip_wq | ip_rq | conn_t 162 * | UDP_WR | | 163 * | | | 164 * +---------------+---------------+ 165 * 166 * Messages arriving at udp_wq from above will end up in ip_wq before 167 * it gets processed, i.e. udp write entry points will advance udp_wq 168 * and use its q_next value as ip_wq in order to use the conn_t that 169 * is stored in its q_ptr. Likewise, messages generated by ip to the 170 * module above udp will appear as if they are originated from udp_rq, 171 * i.e. putnext() calls to the module above udp is done using the 172 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 173 * nothing more than calling putnext(). 174 * 175 * The above implies the following rule of thumb: 176 * 177 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 178 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 179 * is no direct reference to conn_t from either udp_wq or udp_rq. 180 * 181 * 2. Write-side entry points of udp can obtain the conn_t via the 182 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 183 * 184 * 3. While in /dev/ip context, putnext() to the module above udp can 185 * be done by supplying the queue value obtained from UDP_RD(). 186 * 187 */ 188 189 static queue_t *UDP_WR(queue_t *); 190 static queue_t *UDP_RD(queue_t *); 191 192 udp_stat_t udp_statistics = { 193 { "udp_ip_send", KSTAT_DATA_UINT64 }, 194 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 195 { "udp_ire_null", KSTAT_DATA_UINT64 }, 196 { "udp_drain", KSTAT_DATA_UINT64 }, 197 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 198 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 199 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 200 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 201 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 202 { "udp_out_opt", KSTAT_DATA_UINT64 }, 203 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 204 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 205 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 206 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 217 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 218 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 219 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 220 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 221 #ifdef DEBUG 222 { "udp_data_conn", KSTAT_DATA_UINT64 }, 223 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 224 #endif 225 }; 226 227 static kstat_t *udp_ksp; 228 struct kmem_cache *udp_cache; 229 230 /* 231 * Bind hash list size and hash function. It has to be a power of 2 for 232 * hashing. 233 */ 234 #define UDP_BIND_FANOUT_SIZE 512 235 #define UDP_BIND_HASH(lport) \ 236 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 237 238 /* UDP bind fanout hash structure. */ 239 typedef struct udp_fanout_s { 240 udp_t *uf_udp; 241 kmutex_t uf_lock; 242 #if defined(_LP64) || defined(_I32LPx) 243 char uf_pad[48]; 244 #else 245 char uf_pad[56]; 246 #endif 247 } udp_fanout_t; 248 249 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 250 /* udp_fanout_t *udp_bind_fanout. */ 251 static udp_fanout_t *udp_bind_fanout; 252 253 /* 254 * This controls the rate some ndd info report functions can be used 255 * by non-privileged users. It stores the last time such info is 256 * requested. When those report functions are called again, this 257 * is checked with the current time and compare with the ndd param 258 * udp_ndd_get_info_interval. 259 */ 260 static clock_t udp_last_ndd_get_info_time; 261 #define NDD_TOO_QUICK_MSG \ 262 "ndd get info rate too high for non-privileged users, try again " \ 263 "later.\n" 264 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 265 266 /* Option processing attrs */ 267 typedef struct udpattrs_s { 268 ip6_pkt_t *udpattr_ipp; 269 mblk_t *udpattr_mb; 270 boolean_t udpattr_credset; 271 } udpattrs_t; 272 273 static void udp_addr_req(queue_t *q, mblk_t *mp); 274 static void udp_bind(queue_t *q, mblk_t *mp); 275 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 276 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 277 static int udp_build_hdrs(queue_t *q, udp_t *udp); 278 static void udp_capability_req(queue_t *q, mblk_t *mp); 279 static int udp_close(queue_t *q); 280 static void udp_connect(queue_t *q, mblk_t *mp); 281 static void udp_disconnect(queue_t *q, mblk_t *mp); 282 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 283 int sys_error); 284 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 285 t_scalar_t tlierr, int unixerr); 286 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 287 cred_t *cr); 288 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 289 char *value, caddr_t cp, cred_t *cr); 290 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 291 char *value, caddr_t cp, cred_t *cr); 292 static void udp_icmp_error(queue_t *q, mblk_t *mp); 293 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 294 static void udp_info_req(queue_t *q, mblk_t *mp); 295 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 296 t_scalar_t addr_length); 297 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 298 cred_t *credp); 299 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 300 int *errorp, udpattrs_t *udpattrs); 301 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 302 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 303 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 304 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 305 cred_t *cr); 306 static void udp_report_item(mblk_t *mp, udp_t *udp); 307 static void udp_rput(queue_t *q, mblk_t *mp); 308 static void udp_rput_other(queue_t *, mblk_t *); 309 static int udp_rinfop(queue_t *q, infod_t *dp); 310 static int udp_rrw(queue_t *q, struiod_t *dp); 311 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 312 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 313 cred_t *cr); 314 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 315 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 316 t_scalar_t destlen, t_scalar_t err); 317 static void udp_unbind(queue_t *q, mblk_t *mp); 318 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 319 boolean_t random); 320 static void udp_wput(queue_t *q, mblk_t *mp); 321 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 322 uint16_t port, uint_t srcid, int *error); 323 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 324 int *error); 325 static void udp_wput_other(queue_t *q, mblk_t *mp); 326 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 327 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 328 socklen_t addrlen); 329 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 330 331 static void udp_kstat_init(void); 332 static void udp_kstat_fini(void); 333 static int udp_kstat_update(kstat_t *kp, int rw); 334 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 335 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 336 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 337 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 338 339 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 340 uint_t pkt_len); 341 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 342 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 343 static void udp_exit(conn_t *); 344 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 345 #ifdef DEBUG 346 static void udp_mode_assertions(udp_t *, int); 347 #endif /* DEBUG */ 348 349 major_t UDP6_MAJ; 350 #define UDP6 "udp6" 351 352 #define UDP_RECV_HIWATER (56 * 1024) 353 #define UDP_RECV_LOWATER 128 354 #define UDP_XMIT_HIWATER (56 * 1024) 355 #define UDP_XMIT_LOWATER 1024 356 357 static struct module_info udp_info = { 358 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 359 }; 360 361 static struct qinit udp_rinit = { 362 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 363 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 364 }; 365 366 static struct qinit udp_winit = { 367 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 368 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 369 }; 370 371 static struct qinit winit = { 372 (pfi_t)putnext, NULL, NULL, NULL, NULL, 373 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 374 }; 375 376 /* Support for just SNMP if UDP is not pushed directly over device IP */ 377 struct qinit udp_snmp_rinit = { 378 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 379 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 380 }; 381 382 struct qinit udp_snmp_winit = { 383 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 384 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 385 }; 386 387 struct streamtab udpinfo = { 388 &udp_rinit, &winit 389 }; 390 391 static sin_t sin_null; /* Zero address for quick clears */ 392 static sin6_t sin6_null; /* Zero address for quick clears */ 393 394 /* Hint not protected by any lock */ 395 static in_port_t udp_g_next_port_to_try; 396 397 /* 398 * Extra privileged ports. In host byte order. 399 */ 400 #define UDP_NUM_EPRIV_PORTS 64 401 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 402 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 403 404 /* Only modified during _init and _fini thus no locking is needed. */ 405 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 406 407 /* MIB-2 stuff for SNMP */ 408 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 409 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 410 411 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 412 413 /* Default structure copied into T_INFO_ACK messages */ 414 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 415 T_INFO_ACK, 416 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 417 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 418 T_INVALID, /* CDATA_size. udp does not support connect data. */ 419 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 420 sizeof (sin_t), /* ADDR_size. */ 421 0, /* OPT_size - not initialized here */ 422 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 423 T_CLTS, /* SERV_type. udp supports connection-less. */ 424 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 425 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 426 }; 427 428 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 429 430 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 431 T_INFO_ACK, 432 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 433 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 434 T_INVALID, /* CDATA_size. udp does not support connect data. */ 435 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 436 sizeof (sin6_t), /* ADDR_size. */ 437 0, /* OPT_size - not initialized here */ 438 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 439 T_CLTS, /* SERV_type. udp supports connection-less. */ 440 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 441 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 442 }; 443 444 /* largest UDP port number */ 445 #define UDP_MAX_PORT 65535 446 447 /* 448 * Table of ND variables supported by udp. These are loaded into udp_g_nd 449 * in udp_open. 450 * All of these are alterable, within the min/max values given, at run time. 451 */ 452 /* BEGIN CSTYLED */ 453 udpparam_t udp_param_arr[] = { 454 /*min max value name */ 455 { 0L, 256, 32, "udp_wroff_extra" }, 456 { 1L, 255, 255, "udp_ipv4_ttl" }, 457 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 458 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 459 { 0, 1, 1, "udp_do_checksum" }, 460 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 461 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 462 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 463 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 464 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 465 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 466 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 467 }; 468 /* END CSTYLED */ 469 470 /* 471 * The smallest anonymous port in the privileged port range which UDP 472 * looks for free port. Use in the option UDP_ANONPRIVBIND. 473 */ 474 static in_port_t udp_min_anonpriv_port = 512; 475 476 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 477 uint32_t udp_random_anon_port = 1; 478 479 /* 480 * Hook functions to enable cluster networking. 481 * On non-clustered systems these vectors must always be NULL 482 */ 483 484 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 485 uint8_t *laddrp, in_port_t lport) = NULL; 486 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 487 uint8_t *laddrp, in_port_t lport) = NULL; 488 489 typedef union T_primitives *t_primp_t; 490 491 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 492 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 493 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 494 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 495 (mp)->b_prev = (mblk_t *)proc; \ 496 if ((udp)->udp_mphead == NULL) \ 497 (udp)->udp_mphead = (mp); \ 498 else \ 499 (udp)->udp_mptail->b_next = (mp); \ 500 (udp)->udp_mptail = (mp); \ 501 (udp)->udp_mpcount++; \ 502 } 503 504 #define UDP_READERS_INCREF(udp) { \ 505 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 506 (udp)->udp_reader_count++; \ 507 } 508 509 #define UDP_READERS_DECREF(udp) { \ 510 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 511 (udp)->udp_reader_count--; \ 512 if ((udp)->udp_reader_count == 0) \ 513 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 514 } 515 516 #define UDP_SQUEUE_DECREF(udp) { \ 517 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 518 (udp)->udp_squeue_count--; \ 519 if ((udp)->udp_squeue_count == 0) \ 520 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 521 } 522 523 /* 524 * Notes on UDP endpoint synchronization: 525 * 526 * UDP needs exclusive operation on a per endpoint basis, when executing 527 * functions that modify the endpoint state. udp_rput_other() deals with 528 * packets with IP options, and processing these packets end up having 529 * to update the endpoint's option related state. udp_wput_other() deals 530 * with control operations from the top, e.g. connect() that needs to 531 * update the endpoint state. These could be synchronized using locks, 532 * but the current version uses squeues for this purpose. squeues may 533 * give performance improvement for certain cases such as connected UDP 534 * sockets; thus the framework allows for using squeues. 535 * 536 * The perimeter routines are described as follows: 537 * 538 * udp_enter(): 539 * Enter the UDP endpoint perimeter. 540 * 541 * udp_become_writer(): 542 * Become exclusive on the UDP endpoint. Specifies a function 543 * that will be called exclusively either immediately or later 544 * when the perimeter is available exclusively. 545 * 546 * udp_exit(): 547 * Exit the UDP perimeter. 548 * 549 * Entering UDP from the top or from the bottom must be done using 550 * udp_enter(). No lock must be held while attempting to enter the UDP 551 * perimeter. When finished, udp_exit() must be called to get out of 552 * the perimeter. 553 * 554 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 555 * multiple threads may enter a UDP endpoint concurrently. This is used 556 * for sending and/or receiving normal data. Control operations and other 557 * special cases call udp_become_writer() to become exclusive on a per 558 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 559 * by definition serializes access to the conn_t. When there are no more 560 * pending messages on the squeue for the UDP connection, the endpoint 561 * reverts to MT_HOT mode. During the interregnum when not all MT threads 562 * of an endpoint have finished, messages are queued in the UDP endpoint 563 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 564 * 565 * These modes have the following analogs: 566 * 567 * UDP_MT_HOT/udp_reader_count==0 none 568 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 569 * UDP_MT_QUEUED RW_WRITE_WANTED 570 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 571 * 572 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 573 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 574 * 575 * While in stable modes, UDP keeps track of the number of threads 576 * operating on the endpoint. The udp_reader_count variable represents 577 * the number of threads entering the endpoint as readers while it is 578 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 579 * is only a single reader, i.e. when this counter drops to 1. Likewise, 580 * udp_squeue_count represents the number of threads operating on the 581 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 582 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 583 * when this counter drops to 0. 584 * 585 * The default mode is set to UDP_MT_HOT and UDP alternates between 586 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 587 * 588 * Mode transition: 589 * ---------------------------------------------------------------- 590 * old mode Event New mode 591 * ---------------------------------------------------------------- 592 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 593 * and udp_reader_count == 1 594 * 595 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 596 * and udp_reader_count > 1 597 * 598 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 599 * 600 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 601 * internal UDP queue successfully 602 * moved to squeue AND udp_squeue_count != 0 603 * 604 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 605 * internal UDP queue successfully 606 * moved to squeue AND udp_squeue_count 607 * drops to zero 608 * 609 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 610 * ---------------------------------------------------------------- 611 */ 612 613 static queue_t * 614 UDP_WR(queue_t *q) 615 { 616 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 617 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 618 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 619 620 return (_WR(q)->q_next); 621 } 622 623 static queue_t * 624 UDP_RD(queue_t *q) 625 { 626 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 627 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 628 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 629 630 return (_RD(q)->q_next); 631 } 632 633 #ifdef DEBUG 634 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 635 #else 636 #define UDP_MODE_ASSERTIONS(udp, caller) 637 #endif 638 639 /* Invariants */ 640 #ifdef DEBUG 641 642 uint32_t udp_count[4]; 643 644 /* Context of udp_mode_assertions */ 645 #define UDP_ENTER 1 646 #define UDP_BECOME_WRITER 2 647 #define UDP_EXIT 3 648 649 static void 650 udp_mode_assertions(udp_t *udp, int caller) 651 { 652 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 653 654 switch (udp->udp_mode) { 655 case UDP_MT_HOT: 656 /* 657 * Messages have not yet been enqueued on the internal queue, 658 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 659 * by definition, there can't be any messages enqueued on the 660 * squeue. The UDP could be quiescent, so udp_reader_count 661 * could be zero at entry. 662 */ 663 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 664 udp->udp_squeue_count == 0); 665 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 666 udp_count[0]++; 667 break; 668 669 case UDP_MT_QUEUED: 670 /* 671 * The last MT thread to exit the udp perimeter empties the 672 * internal queue and then switches the UDP to 673 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 674 * mode, it means there must be at least 1 MT thread still in 675 * the perimeter and at least 1 message on the internal queue. 676 */ 677 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 678 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 679 udp_count[1]++; 680 break; 681 682 case UDP_QUEUED_SQUEUE: 683 /* 684 * The switch has happened from MT to SQUEUE. So there can't 685 * any MT threads. Messages could still pile up on the internal 686 * queue until the transition is complete and we move to 687 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 688 * since the squeue could drain any time. 689 */ 690 ASSERT(udp->udp_reader_count == 0); 691 udp_count[2]++; 692 break; 693 694 case UDP_SQUEUE: 695 /* 696 * The transition is complete. Thre can't be any messages on 697 * the internal queue. The udp could be quiescent or the squeue 698 * could drain any time, so we can't assert on nonzero 699 * udp_squeue_count during entry. Nor can we assert that 700 * udp_reader_count is zero, since, a reader thread could have 701 * directly become writer in line by calling udp_become_writer 702 * without going through the queued states. 703 */ 704 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 705 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 706 udp_count[3]++; 707 break; 708 } 709 } 710 #endif 711 712 #define _UDP_ENTER(connp, mp, proc, tag) { \ 713 udp_t *_udp = (connp)->conn_udp; \ 714 \ 715 mutex_enter(&(connp)->conn_lock); \ 716 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 717 mutex_exit(&(connp)->conn_lock); \ 718 freemsg(mp); \ 719 } else { \ 720 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 721 \ 722 switch (_udp->udp_mode) { \ 723 case UDP_MT_HOT: \ 724 /* We can execute as reader right away. */ \ 725 UDP_READERS_INCREF(_udp); \ 726 mutex_exit(&(connp)->conn_lock); \ 727 (*(proc))(connp, mp, (connp)->conn_sqp); \ 728 break; \ 729 \ 730 case UDP_SQUEUE: \ 731 /* \ 732 * We are in squeue mode, send the \ 733 * packet to the squeue \ 734 */ \ 735 _udp->udp_squeue_count++; \ 736 CONN_INC_REF_LOCKED(connp); \ 737 mutex_exit(&(connp)->conn_lock); \ 738 squeue_enter((connp)->conn_sqp, mp, proc, \ 739 connp, tag); \ 740 break; \ 741 \ 742 case UDP_MT_QUEUED: \ 743 case UDP_QUEUED_SQUEUE: \ 744 /* \ 745 * Some messages may have been enqueued \ 746 * ahead of us. Enqueue the new message \ 747 * at the tail of the internal queue to \ 748 * preserve message ordering. \ 749 */ \ 750 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 751 mutex_exit(&(connp)->conn_lock); \ 752 break; \ 753 } \ 754 } \ 755 } 756 757 static void 758 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 759 { 760 _UDP_ENTER(connp, mp, proc, tag); 761 } 762 763 static void 764 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 765 { 766 udp_t *udp; 767 768 udp = connp->conn_udp; 769 770 mutex_enter(&connp->conn_lock); 771 772 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 773 774 switch (udp->udp_mode) { 775 case UDP_MT_HOT: 776 if (udp->udp_reader_count == 1) { 777 /* 778 * We are the only MT thread. Switch to squeue mode 779 * immediately. 780 */ 781 udp->udp_mode = UDP_SQUEUE; 782 udp->udp_squeue_count = 1; 783 CONN_INC_REF_LOCKED(connp); 784 mutex_exit(&connp->conn_lock); 785 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 786 return; 787 } 788 /* FALLTHRU */ 789 790 case UDP_MT_QUEUED: 791 /* Enqueue the packet internally in UDP */ 792 udp->udp_mode = UDP_MT_QUEUED; 793 UDP_ENQUEUE_MP(udp, mp, proc, tag); 794 mutex_exit(&connp->conn_lock); 795 return; 796 797 case UDP_SQUEUE: 798 case UDP_QUEUED_SQUEUE: 799 /* 800 * We are already exclusive. i.e. we are already 801 * writer. Simply call the desired function. 802 */ 803 udp->udp_squeue_count++; 804 mutex_exit(&connp->conn_lock); 805 (*proc)(connp, mp, connp->conn_sqp); 806 return; 807 } 808 } 809 810 /* 811 * Transition from MT mode to SQUEUE mode, when the last MT thread 812 * is exiting the UDP perimeter. Move all messages from the internal 813 * udp queue to the squeue. A better way would be to move all the 814 * messages in one shot, this needs more support from the squeue framework 815 */ 816 static void 817 udp_switch_to_squeue(udp_t *udp) 818 { 819 mblk_t *mp; 820 mblk_t *mp_next; 821 sqproc_t proc; 822 uint8_t tag; 823 conn_t *connp = udp->udp_connp; 824 825 ASSERT(MUTEX_HELD(&connp->conn_lock)); 826 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 827 while (udp->udp_mphead != NULL) { 828 mp = udp->udp_mphead; 829 udp->udp_mphead = NULL; 830 udp->udp_mptail = NULL; 831 udp->udp_mpcount = 0; 832 udp->udp_mode = UDP_QUEUED_SQUEUE; 833 mutex_exit(&connp->conn_lock); 834 /* 835 * It is best not to hold any locks across the calls 836 * to squeue functions. Since we drop the lock we 837 * need to go back and check the udp_mphead once again 838 * after the squeue_fill and hence the while loop at 839 * the top of this function 840 */ 841 for (; mp != NULL; mp = mp_next) { 842 mp_next = mp->b_next; 843 proc = (sqproc_t)mp->b_prev; 844 tag = (uint8_t)((uintptr_t)mp->b_queue); 845 mp->b_next = NULL; 846 mp->b_prev = NULL; 847 mp->b_queue = NULL; 848 CONN_INC_REF(connp); 849 udp->udp_squeue_count++; 850 squeue_fill(connp->conn_sqp, mp, proc, connp, 851 tag); 852 } 853 mutex_enter(&connp->conn_lock); 854 } 855 /* 856 * udp_squeue_count of zero implies that the squeue has drained 857 * even before we arrived here (i.e. after the squeue_fill above) 858 */ 859 udp->udp_mode = (udp->udp_squeue_count != 0) ? 860 UDP_SQUEUE : UDP_MT_HOT; 861 } 862 863 #define _UDP_EXIT(connp) { \ 864 udp_t *_udp = (connp)->conn_udp; \ 865 \ 866 mutex_enter(&(connp)->conn_lock); \ 867 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 868 \ 869 switch (_udp->udp_mode) { \ 870 case UDP_MT_HOT: \ 871 UDP_READERS_DECREF(_udp); \ 872 mutex_exit(&(connp)->conn_lock); \ 873 break; \ 874 \ 875 case UDP_SQUEUE: \ 876 UDP_SQUEUE_DECREF(_udp); \ 877 if (_udp->udp_squeue_count == 0) \ 878 _udp->udp_mode = UDP_MT_HOT; \ 879 mutex_exit(&(connp)->conn_lock); \ 880 break; \ 881 \ 882 case UDP_MT_QUEUED: \ 883 /* \ 884 * If this is the last MT thread, we need to \ 885 * switch to squeue mode \ 886 */ \ 887 UDP_READERS_DECREF(_udp); \ 888 if (_udp->udp_reader_count == 0) \ 889 udp_switch_to_squeue(_udp); \ 890 mutex_exit(&(connp)->conn_lock); \ 891 break; \ 892 \ 893 case UDP_QUEUED_SQUEUE: \ 894 UDP_SQUEUE_DECREF(_udp); \ 895 /* \ 896 * Even if the udp_squeue_count drops to zero, we \ 897 * don't want to change udp_mode to UDP_MT_HOT here. \ 898 * The thread in udp_switch_to_squeue will take care \ 899 * of the transition to UDP_MT_HOT, after emptying \ 900 * any more new messages that have been enqueued in \ 901 * udp_mphead. \ 902 */ \ 903 mutex_exit(&(connp)->conn_lock); \ 904 break; \ 905 } \ 906 } 907 908 static void 909 udp_exit(conn_t *connp) 910 { 911 _UDP_EXIT(connp); 912 } 913 914 /* 915 * Return the next anonymous port in the privileged port range for 916 * bind checking. 917 * 918 * Trusted Extension (TX) notes: TX allows administrator to mark or 919 * reserve ports as Multilevel ports (MLP). MLP has special function 920 * on TX systems. Once a port is made MLP, it's not available as 921 * ordinary port. This creates "holes" in the port name space. It 922 * may be necessary to skip the "holes" find a suitable anon port. 923 */ 924 static in_port_t 925 udp_get_next_priv_port(udp_t *udp) 926 { 927 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 928 in_port_t nextport; 929 boolean_t restart = B_FALSE; 930 931 retry: 932 if (next_priv_port < udp_min_anonpriv_port || 933 next_priv_port >= IPPORT_RESERVED) { 934 next_priv_port = IPPORT_RESERVED - 1; 935 if (restart) 936 return (0); 937 restart = B_TRUE; 938 } 939 940 if (is_system_labeled() && 941 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 942 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 943 next_priv_port = nextport; 944 goto retry; 945 } 946 947 return (next_priv_port--); 948 } 949 950 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 951 /* ARGSUSED */ 952 static int 953 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 954 { 955 udp_fanout_t *udpf; 956 int i; 957 zoneid_t zoneid; 958 conn_t *connp; 959 udp_t *udp; 960 961 connp = Q_TO_CONN(q); 962 udp = connp->conn_udp; 963 964 /* Refer to comments in udp_status_report(). */ 965 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 966 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 967 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 968 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 969 return (0); 970 } 971 } 972 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 973 /* The following may work even if we cannot get a large buf. */ 974 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 975 return (0); 976 } 977 978 (void) mi_mpprintf(mp, 979 "UDP " MI_COL_HDRPAD_STR 980 /* 12345678[89ABCDEF] */ 981 " zone lport src addr dest addr port state"); 982 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 983 984 zoneid = connp->conn_zoneid; 985 986 for (i = 0; i < udp_bind_fanout_size; i++) { 987 udpf = &udp_bind_fanout[i]; 988 mutex_enter(&udpf->uf_lock); 989 990 /* Print the hash index. */ 991 udp = udpf->uf_udp; 992 if (zoneid != GLOBAL_ZONEID) { 993 /* skip to first entry in this zone; might be none */ 994 while (udp != NULL && 995 udp->udp_connp->conn_zoneid != zoneid) 996 udp = udp->udp_bind_hash; 997 } 998 if (udp != NULL) { 999 uint_t print_len, buf_len; 1000 1001 buf_len = mp->b_cont->b_datap->db_lim - 1002 mp->b_cont->b_wptr; 1003 print_len = snprintf((char *)mp->b_cont->b_wptr, 1004 buf_len, "%d\n", i); 1005 if (print_len < buf_len) { 1006 mp->b_cont->b_wptr += print_len; 1007 } else { 1008 mp->b_cont->b_wptr += buf_len; 1009 } 1010 for (; udp != NULL; udp = udp->udp_bind_hash) { 1011 if (zoneid == GLOBAL_ZONEID || 1012 zoneid == udp->udp_connp->conn_zoneid) 1013 udp_report_item(mp->b_cont, udp); 1014 } 1015 } 1016 mutex_exit(&udpf->uf_lock); 1017 } 1018 udp_last_ndd_get_info_time = ddi_get_lbolt(); 1019 return (0); 1020 } 1021 1022 /* 1023 * Hash list removal routine for udp_t structures. 1024 */ 1025 static void 1026 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 1027 { 1028 udp_t *udpnext; 1029 kmutex_t *lockp; 1030 1031 if (udp->udp_ptpbhn == NULL) 1032 return; 1033 1034 /* 1035 * Extract the lock pointer in case there are concurrent 1036 * hash_remove's for this instance. 1037 */ 1038 ASSERT(udp->udp_port != 0); 1039 if (!caller_holds_lock) { 1040 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1041 ASSERT(lockp != NULL); 1042 mutex_enter(lockp); 1043 } 1044 if (udp->udp_ptpbhn != NULL) { 1045 udpnext = udp->udp_bind_hash; 1046 if (udpnext != NULL) { 1047 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1048 udp->udp_bind_hash = NULL; 1049 } 1050 *udp->udp_ptpbhn = udpnext; 1051 udp->udp_ptpbhn = NULL; 1052 } 1053 if (!caller_holds_lock) { 1054 mutex_exit(lockp); 1055 } 1056 } 1057 1058 static void 1059 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1060 { 1061 udp_t **udpp; 1062 udp_t *udpnext; 1063 1064 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1065 if (udp->udp_ptpbhn != NULL) { 1066 udp_bind_hash_remove(udp, B_TRUE); 1067 } 1068 udpp = &uf->uf_udp; 1069 udpnext = udpp[0]; 1070 if (udpnext != NULL) { 1071 /* 1072 * If the new udp bound to the INADDR_ANY address 1073 * and the first one in the list is not bound to 1074 * INADDR_ANY we skip all entries until we find the 1075 * first one bound to INADDR_ANY. 1076 * This makes sure that applications binding to a 1077 * specific address get preference over those binding to 1078 * INADDR_ANY. 1079 */ 1080 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1081 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1082 while ((udpnext = udpp[0]) != NULL && 1083 !V6_OR_V4_INADDR_ANY( 1084 udpnext->udp_bound_v6src)) { 1085 udpp = &(udpnext->udp_bind_hash); 1086 } 1087 if (udpnext != NULL) 1088 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1089 } else { 1090 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1091 } 1092 } 1093 udp->udp_bind_hash = udpnext; 1094 udp->udp_ptpbhn = udpp; 1095 udpp[0] = udp; 1096 } 1097 1098 /* 1099 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1100 * passed to udp_wput. 1101 * It associates a port number and local address with the stream. 1102 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1103 * protocol type (IPPROTO_UDP) placed in the message following the address. 1104 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1105 * (Called as writer.) 1106 * 1107 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1108 * without setting SO_REUSEADDR. This is needed so that they 1109 * can be viewed as two independent transport protocols. 1110 * However, anonymouns ports are allocated from the same range to avoid 1111 * duplicating the udp_g_next_port_to_try. 1112 */ 1113 static void 1114 udp_bind(queue_t *q, mblk_t *mp) 1115 { 1116 sin_t *sin; 1117 sin6_t *sin6; 1118 mblk_t *mp1; 1119 in_port_t port; /* Host byte order */ 1120 in_port_t requested_port; /* Host byte order */ 1121 struct T_bind_req *tbr; 1122 int count; 1123 in6_addr_t v6src; 1124 boolean_t bind_to_req_port_only; 1125 int loopmax; 1126 udp_fanout_t *udpf; 1127 in_port_t lport; /* Network byte order */ 1128 zoneid_t zoneid; 1129 conn_t *connp; 1130 udp_t *udp; 1131 boolean_t is_inaddr_any; 1132 mlp_type_t addrtype, mlptype; 1133 1134 connp = Q_TO_CONN(q); 1135 udp = connp->conn_udp; 1136 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1137 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1138 "udp_bind: bad req, len %u", 1139 (uint_t)(mp->b_wptr - mp->b_rptr)); 1140 udp_err_ack(q, mp, TPROTO, 0); 1141 return; 1142 } 1143 1144 if (udp->udp_state != TS_UNBND) { 1145 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1146 "udp_bind: bad state, %u", udp->udp_state); 1147 udp_err_ack(q, mp, TOUTSTATE, 0); 1148 return; 1149 } 1150 /* 1151 * Reallocate the message to make sure we have enough room for an 1152 * address and the protocol type. 1153 */ 1154 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1155 if (!mp1) { 1156 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1157 return; 1158 } 1159 1160 mp = mp1; 1161 tbr = (struct T_bind_req *)mp->b_rptr; 1162 switch (tbr->ADDR_length) { 1163 case 0: /* Request for a generic port */ 1164 tbr->ADDR_offset = sizeof (struct T_bind_req); 1165 if (udp->udp_family == AF_INET) { 1166 tbr->ADDR_length = sizeof (sin_t); 1167 sin = (sin_t *)&tbr[1]; 1168 *sin = sin_null; 1169 sin->sin_family = AF_INET; 1170 mp->b_wptr = (uchar_t *)&sin[1]; 1171 } else { 1172 ASSERT(udp->udp_family == AF_INET6); 1173 tbr->ADDR_length = sizeof (sin6_t); 1174 sin6 = (sin6_t *)&tbr[1]; 1175 *sin6 = sin6_null; 1176 sin6->sin6_family = AF_INET6; 1177 mp->b_wptr = (uchar_t *)&sin6[1]; 1178 } 1179 port = 0; 1180 break; 1181 1182 case sizeof (sin_t): /* Complete IPv4 address */ 1183 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1184 sizeof (sin_t)); 1185 if (sin == NULL || !OK_32PTR((char *)sin)) { 1186 udp_err_ack(q, mp, TSYSERR, EINVAL); 1187 return; 1188 } 1189 if (udp->udp_family != AF_INET || 1190 sin->sin_family != AF_INET) { 1191 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1192 return; 1193 } 1194 port = ntohs(sin->sin_port); 1195 break; 1196 1197 case sizeof (sin6_t): /* complete IPv6 address */ 1198 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1199 sizeof (sin6_t)); 1200 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1201 udp_err_ack(q, mp, TSYSERR, EINVAL); 1202 return; 1203 } 1204 if (udp->udp_family != AF_INET6 || 1205 sin6->sin6_family != AF_INET6) { 1206 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1207 return; 1208 } 1209 port = ntohs(sin6->sin6_port); 1210 break; 1211 1212 default: /* Invalid request */ 1213 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1214 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1215 udp_err_ack(q, mp, TBADADDR, 0); 1216 return; 1217 } 1218 1219 requested_port = port; 1220 1221 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1222 bind_to_req_port_only = B_FALSE; 1223 else /* T_BIND_REQ and requested_port != 0 */ 1224 bind_to_req_port_only = B_TRUE; 1225 1226 if (requested_port == 0) { 1227 /* 1228 * If the application passed in zero for the port number, it 1229 * doesn't care which port number we bind to. Get one in the 1230 * valid range. 1231 */ 1232 if (udp->udp_anon_priv_bind) { 1233 port = udp_get_next_priv_port(udp); 1234 } else { 1235 port = udp_update_next_port(udp, 1236 udp_g_next_port_to_try, B_TRUE); 1237 } 1238 } else { 1239 /* 1240 * If the port is in the well-known privileged range, 1241 * make sure the caller was privileged. 1242 */ 1243 int i; 1244 boolean_t priv = B_FALSE; 1245 1246 if (port < udp_smallest_nonpriv_port) { 1247 priv = B_TRUE; 1248 } else { 1249 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1250 if (port == udp_g_epriv_ports[i]) { 1251 priv = B_TRUE; 1252 break; 1253 } 1254 } 1255 } 1256 1257 if (priv) { 1258 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1259 1260 if (secpolicy_net_privaddr(cr, port) != 0) { 1261 udp_err_ack(q, mp, TACCES, 0); 1262 return; 1263 } 1264 } 1265 } 1266 1267 if (port == 0) { 1268 udp_err_ack(q, mp, TNOADDR, 0); 1269 return; 1270 } 1271 1272 /* 1273 * Copy the source address into our udp structure. This address 1274 * may still be zero; if so, IP will fill in the correct address 1275 * each time an outbound packet is passed to it. 1276 */ 1277 if (udp->udp_family == AF_INET) { 1278 ASSERT(sin != NULL); 1279 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1280 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1281 udp->udp_ip_snd_options_len; 1282 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1283 } else { 1284 ASSERT(sin6 != NULL); 1285 v6src = sin6->sin6_addr; 1286 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1287 udp->udp_ipversion = IPV4_VERSION; 1288 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1289 UDPH_SIZE + udp->udp_ip_snd_options_len; 1290 } else { 1291 udp->udp_ipversion = IPV6_VERSION; 1292 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1293 } 1294 } 1295 1296 /* 1297 * If udp_reuseaddr is not set, then we have to make sure that 1298 * the IP address and port number the application requested 1299 * (or we selected for the application) is not being used by 1300 * another stream. If another stream is already using the 1301 * requested IP address and port, the behavior depends on 1302 * "bind_to_req_port_only". If set the bind fails; otherwise we 1303 * search for any an unused port to bind to the the stream. 1304 * 1305 * As per the BSD semantics, as modified by the Deering multicast 1306 * changes, if udp_reuseaddr is set, then we allow multiple binds 1307 * to the same port independent of the local IP address. 1308 * 1309 * This is slightly different than in SunOS 4.X which did not 1310 * support IP multicast. Note that the change implemented by the 1311 * Deering multicast code effects all binds - not only binding 1312 * to IP multicast addresses. 1313 * 1314 * Note that when binding to port zero we ignore SO_REUSEADDR in 1315 * order to guarantee a unique port. 1316 */ 1317 1318 count = 0; 1319 if (udp->udp_anon_priv_bind) { 1320 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1321 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1322 } else { 1323 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1324 } 1325 1326 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1327 zoneid = connp->conn_zoneid; 1328 1329 for (;;) { 1330 udp_t *udp1; 1331 boolean_t found_exclbind = B_FALSE; 1332 1333 /* 1334 * Walk through the list of udp streams bound to 1335 * requested port with the same IP address. 1336 */ 1337 lport = htons(port); 1338 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1339 mutex_enter(&udpf->uf_lock); 1340 for (udp1 = udpf->uf_udp; udp1 != NULL; 1341 udp1 = udp1->udp_bind_hash) { 1342 if (lport != udp1->udp_port) 1343 continue; 1344 1345 /* 1346 * On a labeled system, we must treat bindings to ports 1347 * on shared IP addresses by sockets with MAC exemption 1348 * privilege as being in all zones, as there's 1349 * otherwise no way to identify the right receiver. 1350 */ 1351 if (zoneid != udp1->udp_connp->conn_zoneid && 1352 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1353 continue; 1354 1355 /* 1356 * If UDP_EXCLBIND is set for either the bound or 1357 * binding endpoint, the semantics of bind 1358 * is changed according to the following chart. 1359 * 1360 * spec = specified address (v4 or v6) 1361 * unspec = unspecified address (v4 or v6) 1362 * A = specified addresses are different for endpoints 1363 * 1364 * bound bind to allowed? 1365 * ------------------------------------- 1366 * unspec unspec no 1367 * unspec spec no 1368 * spec unspec no 1369 * spec spec yes if A 1370 * 1371 * For labeled systems, SO_MAC_EXEMPT behaves the same 1372 * as UDP_EXCLBIND, except that zoneid is ignored. 1373 */ 1374 if (udp1->udp_exclbind || udp->udp_exclbind || 1375 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1376 if (V6_OR_V4_INADDR_ANY( 1377 udp1->udp_bound_v6src) || 1378 is_inaddr_any || 1379 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1380 &v6src)) { 1381 found_exclbind = B_TRUE; 1382 break; 1383 } 1384 continue; 1385 } 1386 1387 /* 1388 * Check ipversion to allow IPv4 and IPv6 sockets to 1389 * have disjoint port number spaces. 1390 */ 1391 if (udp->udp_ipversion != udp1->udp_ipversion) 1392 continue; 1393 1394 /* 1395 * No difference depending on SO_REUSEADDR. 1396 * 1397 * If existing port is bound to a 1398 * non-wildcard IP address and 1399 * the requesting stream is bound to 1400 * a distinct different IP addresses 1401 * (non-wildcard, also), keep going. 1402 */ 1403 if (!is_inaddr_any && 1404 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1405 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1406 &v6src)) { 1407 continue; 1408 } 1409 break; 1410 } 1411 1412 if (!found_exclbind && 1413 (udp->udp_reuseaddr && requested_port != 0)) { 1414 break; 1415 } 1416 1417 if (udp1 == NULL) { 1418 /* 1419 * No other stream has this IP address 1420 * and port number. We can use it. 1421 */ 1422 break; 1423 } 1424 mutex_exit(&udpf->uf_lock); 1425 if (bind_to_req_port_only) { 1426 /* 1427 * We get here only when requested port 1428 * is bound (and only first of the for() 1429 * loop iteration). 1430 * 1431 * The semantics of this bind request 1432 * require it to fail so we return from 1433 * the routine (and exit the loop). 1434 * 1435 */ 1436 udp_err_ack(q, mp, TADDRBUSY, 0); 1437 return; 1438 } 1439 1440 if (udp->udp_anon_priv_bind) { 1441 port = udp_get_next_priv_port(udp); 1442 } else { 1443 if ((count == 0) && (requested_port != 0)) { 1444 /* 1445 * If the application wants us to find 1446 * a port, get one to start with. Set 1447 * requested_port to 0, so that we will 1448 * update udp_g_next_port_to_try below. 1449 */ 1450 port = udp_update_next_port(udp, 1451 udp_g_next_port_to_try, B_TRUE); 1452 requested_port = 0; 1453 } else { 1454 port = udp_update_next_port(udp, port + 1, 1455 B_FALSE); 1456 } 1457 } 1458 1459 if (port == 0 || ++count >= loopmax) { 1460 /* 1461 * We've tried every possible port number and 1462 * there are none available, so send an error 1463 * to the user. 1464 */ 1465 udp_err_ack(q, mp, TNOADDR, 0); 1466 return; 1467 } 1468 } 1469 1470 /* 1471 * Copy the source address into our udp structure. This address 1472 * may still be zero; if so, ip will fill in the correct address 1473 * each time an outbound packet is passed to it. 1474 * If we are binding to a broadcast or multicast address udp_rput 1475 * will clear the source address when it receives the T_BIND_ACK. 1476 */ 1477 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1478 udp->udp_port = lport; 1479 /* 1480 * Now reset the the next anonymous port if the application requested 1481 * an anonymous port, or we handed out the next anonymous port. 1482 */ 1483 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1484 udp_g_next_port_to_try = port + 1; 1485 } 1486 1487 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1488 if (udp->udp_family == AF_INET) { 1489 sin->sin_port = udp->udp_port; 1490 } else { 1491 int error; 1492 1493 sin6->sin6_port = udp->udp_port; 1494 /* Rebuild the header template */ 1495 error = udp_build_hdrs(q, udp); 1496 if (error != 0) { 1497 mutex_exit(&udpf->uf_lock); 1498 udp_err_ack(q, mp, TSYSERR, error); 1499 return; 1500 } 1501 } 1502 udp->udp_state = TS_IDLE; 1503 udp_bind_hash_insert(udpf, udp); 1504 mutex_exit(&udpf->uf_lock); 1505 1506 if (cl_inet_bind) { 1507 /* 1508 * Running in cluster mode - register bind information 1509 */ 1510 if (udp->udp_ipversion == IPV4_VERSION) { 1511 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1512 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1513 (in_port_t)udp->udp_port); 1514 } else { 1515 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1516 (uint8_t *)&(udp->udp_v6src), 1517 (in_port_t)udp->udp_port); 1518 } 1519 1520 } 1521 1522 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1523 if (is_system_labeled() && (!connp->conn_anon_port || 1524 connp->conn_anon_mlp)) { 1525 uint16_t mlpport; 1526 cred_t *cr = connp->conn_cred; 1527 zone_t *zone; 1528 1529 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1530 mlptSingle; 1531 addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); 1532 if (addrtype == mlptSingle) { 1533 udp_err_ack(q, mp, TNOADDR, 0); 1534 connp->conn_anon_port = B_FALSE; 1535 connp->conn_mlp_type = mlptSingle; 1536 return; 1537 } 1538 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1539 zone = crgetzone(cr); 1540 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1541 addrtype); 1542 if (mlptype != mlptSingle && 1543 (connp->conn_mlp_type == mlptSingle || 1544 secpolicy_net_bindmlp(cr) != 0)) { 1545 if (udp->udp_debug) { 1546 (void) strlog(UDP_MOD_ID, 0, 1, 1547 SL_ERROR|SL_TRACE, 1548 "udp_bind: no priv for multilevel port %d", 1549 mlpport); 1550 } 1551 udp_err_ack(q, mp, TACCES, 0); 1552 connp->conn_anon_port = B_FALSE; 1553 connp->conn_mlp_type = mlptSingle; 1554 return; 1555 } 1556 1557 /* 1558 * If we're specifically binding a shared IP address and the 1559 * port is MLP on shared addresses, then check to see if this 1560 * zone actually owns the MLP. Reject if not. 1561 */ 1562 if (mlptype == mlptShared && addrtype == mlptShared) { 1563 zoneid_t mlpzone; 1564 1565 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1566 htons(mlpport)); 1567 if (connp->conn_zoneid != mlpzone) { 1568 if (udp->udp_debug) { 1569 (void) strlog(UDP_MOD_ID, 0, 1, 1570 SL_ERROR|SL_TRACE, 1571 "udp_bind: attempt to bind port " 1572 "%d on shared addr in zone %d " 1573 "(should be %d)", 1574 mlpport, connp->conn_zoneid, 1575 mlpzone); 1576 } 1577 udp_err_ack(q, mp, TACCES, 0); 1578 connp->conn_anon_port = B_FALSE; 1579 connp->conn_mlp_type = mlptSingle; 1580 return; 1581 } 1582 } 1583 if (connp->conn_anon_port) { 1584 int error; 1585 1586 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1587 port, B_TRUE); 1588 if (error != 0) { 1589 if (udp->udp_debug) { 1590 (void) strlog(UDP_MOD_ID, 0, 1, 1591 SL_ERROR|SL_TRACE, 1592 "udp_bind: cannot establish anon " 1593 "MLP for port %d", port); 1594 } 1595 udp_err_ack(q, mp, TACCES, 0); 1596 connp->conn_anon_port = B_FALSE; 1597 connp->conn_mlp_type = mlptSingle; 1598 return; 1599 } 1600 } 1601 connp->conn_mlp_type = mlptype; 1602 } 1603 1604 /* Pass the protocol number in the message following the address. */ 1605 *mp->b_wptr++ = IPPROTO_UDP; 1606 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1607 /* 1608 * Append a request for an IRE if udp_v6src not 1609 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1610 */ 1611 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1612 if (!mp->b_cont) { 1613 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1614 return; 1615 } 1616 mp->b_cont->b_wptr += sizeof (ire_t); 1617 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1618 } 1619 if (udp->udp_family == AF_INET6) 1620 mp = ip_bind_v6(q, mp, connp, NULL); 1621 else 1622 mp = ip_bind_v4(q, mp, connp); 1623 1624 if (mp != NULL) 1625 udp_rput_other(_RD(q), mp); 1626 else 1627 CONN_INC_REF(connp); 1628 } 1629 1630 1631 void 1632 udp_resume_bind(conn_t *connp, mblk_t *mp) 1633 { 1634 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1635 } 1636 1637 /* 1638 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1639 */ 1640 /* ARGSUSED */ 1641 static void 1642 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1643 { 1644 conn_t *connp = arg; 1645 1646 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1647 1648 udp_rput_other(connp->conn_rq, mp); 1649 1650 CONN_OPER_PENDING_DONE(connp); 1651 udp_exit(connp); 1652 } 1653 1654 /* 1655 * This routine handles each T_CONN_REQ message passed to udp. It 1656 * associates a default destination address with the stream. 1657 * 1658 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1659 * T_BIND_REQ - specifying local and remote address/port 1660 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1661 * T_OK_ACK - for the T_CONN_REQ 1662 * T_CONN_CON - to keep the TPI user happy 1663 * 1664 * The connect completes in udp_rput. 1665 * When a T_BIND_ACK is received information is extracted from the IRE 1666 * and the two appended messages are sent to the TPI user. 1667 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1668 * it to an error ack for the appropriate primitive. 1669 */ 1670 static void 1671 udp_connect(queue_t *q, mblk_t *mp) 1672 { 1673 sin6_t *sin6; 1674 sin_t *sin; 1675 struct T_conn_req *tcr; 1676 in6_addr_t v6dst; 1677 ipaddr_t v4dst; 1678 uint16_t dstport; 1679 uint32_t flowinfo; 1680 mblk_t *mp1, *mp2; 1681 udp_fanout_t *udpf; 1682 udp_t *udp, *udp1; 1683 1684 udp = Q_TO_UDP(q); 1685 1686 tcr = (struct T_conn_req *)mp->b_rptr; 1687 1688 /* A bit of sanity checking */ 1689 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1690 udp_err_ack(q, mp, TPROTO, 0); 1691 return; 1692 } 1693 /* 1694 * This UDP must have bound to a port already before doing 1695 * a connect. 1696 */ 1697 if (udp->udp_state == TS_UNBND) { 1698 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1699 "udp_connect: bad state, %u", udp->udp_state); 1700 udp_err_ack(q, mp, TOUTSTATE, 0); 1701 return; 1702 } 1703 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1704 1705 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1706 1707 if (udp->udp_state == TS_DATA_XFER) { 1708 /* Already connected - clear out state */ 1709 mutex_enter(&udpf->uf_lock); 1710 udp->udp_v6src = udp->udp_bound_v6src; 1711 udp->udp_state = TS_IDLE; 1712 mutex_exit(&udpf->uf_lock); 1713 } 1714 1715 if (tcr->OPT_length != 0) { 1716 udp_err_ack(q, mp, TBADOPT, 0); 1717 return; 1718 } 1719 1720 /* 1721 * Determine packet type based on type of address passed in 1722 * the request should contain an IPv4 or IPv6 address. 1723 * Make sure that address family matches the type of 1724 * family of the the address passed down 1725 */ 1726 switch (tcr->DEST_length) { 1727 default: 1728 udp_err_ack(q, mp, TBADADDR, 0); 1729 return; 1730 1731 case sizeof (sin_t): 1732 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1733 sizeof (sin_t)); 1734 if (sin == NULL || !OK_32PTR((char *)sin)) { 1735 udp_err_ack(q, mp, TSYSERR, EINVAL); 1736 return; 1737 } 1738 if (udp->udp_family != AF_INET || 1739 sin->sin_family != AF_INET) { 1740 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1741 return; 1742 } 1743 v4dst = sin->sin_addr.s_addr; 1744 dstport = sin->sin_port; 1745 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1746 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1747 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1748 udp->udp_ip_snd_options_len; 1749 break; 1750 1751 case sizeof (sin6_t): 1752 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1753 sizeof (sin6_t)); 1754 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1755 udp_err_ack(q, mp, TSYSERR, EINVAL); 1756 return; 1757 } 1758 if (udp->udp_family != AF_INET6 || 1759 sin6->sin6_family != AF_INET6) { 1760 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1761 return; 1762 } 1763 v6dst = sin6->sin6_addr; 1764 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1765 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1766 udp->udp_ipversion = IPV4_VERSION; 1767 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1768 UDPH_SIZE + udp->udp_ip_snd_options_len; 1769 flowinfo = 0; 1770 } else { 1771 udp->udp_ipversion = IPV6_VERSION; 1772 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1773 flowinfo = sin6->sin6_flowinfo; 1774 } 1775 dstport = sin6->sin6_port; 1776 break; 1777 } 1778 if (dstport == 0) { 1779 udp_err_ack(q, mp, TBADADDR, 0); 1780 return; 1781 } 1782 1783 /* 1784 * Create a default IP header with no IP options. 1785 */ 1786 udp->udp_dstport = dstport; 1787 if (udp->udp_ipversion == IPV4_VERSION) { 1788 /* 1789 * Interpret a zero destination to mean loopback. 1790 * Update the T_CONN_REQ (sin/sin6) since it is used to 1791 * generate the T_CONN_CON. 1792 */ 1793 if (v4dst == INADDR_ANY) { 1794 v4dst = htonl(INADDR_LOOPBACK); 1795 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1796 if (udp->udp_family == AF_INET) { 1797 sin->sin_addr.s_addr = v4dst; 1798 } else { 1799 sin6->sin6_addr = v6dst; 1800 } 1801 } 1802 udp->udp_v6dst = v6dst; 1803 udp->udp_flowinfo = 0; 1804 1805 /* 1806 * If the destination address is multicast and 1807 * an outgoing multicast interface has been set, 1808 * use the address of that interface as our 1809 * source address if no source address has been set. 1810 */ 1811 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1812 CLASSD(v4dst) && 1813 udp->udp_multicast_if_addr != INADDR_ANY) { 1814 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1815 &udp->udp_v6src); 1816 } 1817 } else { 1818 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1819 /* 1820 * Interpret a zero destination to mean loopback. 1821 * Update the T_CONN_REQ (sin/sin6) since it is used to 1822 * generate the T_CONN_CON. 1823 */ 1824 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1825 v6dst = ipv6_loopback; 1826 sin6->sin6_addr = v6dst; 1827 } 1828 udp->udp_v6dst = v6dst; 1829 udp->udp_flowinfo = flowinfo; 1830 /* 1831 * If the destination address is multicast and 1832 * an outgoing multicast interface has been set, 1833 * then the ip bind logic will pick the correct source 1834 * address (i.e. matching the outgoing multicast interface). 1835 */ 1836 } 1837 1838 /* 1839 * Verify that the src/port/dst/port is unique for all 1840 * connections in TS_DATA_XFER 1841 */ 1842 mutex_enter(&udpf->uf_lock); 1843 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1844 if (udp1->udp_state != TS_DATA_XFER) 1845 continue; 1846 if (udp->udp_port != udp1->udp_port || 1847 udp->udp_ipversion != udp1->udp_ipversion || 1848 dstport != udp1->udp_dstport || 1849 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1850 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1851 continue; 1852 mutex_exit(&udpf->uf_lock); 1853 udp_err_ack(q, mp, TBADADDR, 0); 1854 return; 1855 } 1856 udp->udp_state = TS_DATA_XFER; 1857 mutex_exit(&udpf->uf_lock); 1858 1859 /* 1860 * Send down bind to IP to verify that there is a route 1861 * and to determine the source address. 1862 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1863 */ 1864 if (udp->udp_family == AF_INET) 1865 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1866 else 1867 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1868 if (mp1 == NULL) { 1869 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1870 bind_failed: 1871 mutex_enter(&udpf->uf_lock); 1872 udp->udp_state = TS_IDLE; 1873 mutex_exit(&udpf->uf_lock); 1874 return; 1875 } 1876 1877 /* 1878 * We also have to send a connection confirmation to 1879 * keep TLI happy. Prepare it for udp_rput. 1880 */ 1881 if (udp->udp_family == AF_INET) 1882 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1883 sizeof (*sin), NULL, 0); 1884 else 1885 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1886 sizeof (*sin6), NULL, 0); 1887 if (mp2 == NULL) { 1888 freemsg(mp1); 1889 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1890 goto bind_failed; 1891 } 1892 1893 mp = mi_tpi_ok_ack_alloc(mp); 1894 if (mp == NULL) { 1895 /* Unable to reuse the T_CONN_REQ for the ack. */ 1896 freemsg(mp2); 1897 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1898 goto bind_failed; 1899 } 1900 1901 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1902 linkb(mp1, mp); 1903 linkb(mp1, mp2); 1904 1905 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1906 if (udp->udp_family == AF_INET) 1907 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1908 else 1909 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1910 1911 if (mp1 != NULL) 1912 udp_rput_other(_RD(q), mp1); 1913 else 1914 CONN_INC_REF(udp->udp_connp); 1915 } 1916 1917 static int 1918 udp_close(queue_t *q) 1919 { 1920 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1921 udp_t *udp; 1922 queue_t *ip_rq = RD(UDP_WR(q)); 1923 1924 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1925 udp = connp->conn_udp; 1926 1927 ip_quiesce_conn(connp); 1928 /* 1929 * Disable read-side synchronous stream 1930 * interface and drain any queued data. 1931 */ 1932 udp_rcv_drain(q, udp, B_TRUE); 1933 ASSERT(!udp->udp_direct_sockfs); 1934 1935 qprocsoff(q); 1936 1937 /* restore IP module's high and low water marks to default values */ 1938 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1939 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1940 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1941 1942 ASSERT(udp->udp_rcv_cnt == 0); 1943 ASSERT(udp->udp_rcv_msgcnt == 0); 1944 ASSERT(udp->udp_rcv_list_head == NULL); 1945 ASSERT(udp->udp_rcv_list_tail == NULL); 1946 1947 /* connp is now single threaded. */ 1948 udp_close_free(connp); 1949 /* 1950 * Restore connp as an IP endpoint. We don't need 1951 * any locks since we are now single threaded 1952 */ 1953 connp->conn_flags &= ~IPCL_UDP; 1954 connp->conn_state_flags &= 1955 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1956 connp->conn_ulp_labeled = B_FALSE; 1957 return (0); 1958 } 1959 1960 /* 1961 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1962 */ 1963 void 1964 udp_quiesce_conn(conn_t *connp) 1965 { 1966 udp_t *udp = connp->conn_udp; 1967 1968 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1969 /* 1970 * Running in cluster mode - register unbind information 1971 */ 1972 if (udp->udp_ipversion == IPV4_VERSION) { 1973 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1974 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1975 (in_port_t)udp->udp_port); 1976 } else { 1977 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1978 (uint8_t *)(&(udp->udp_v6src)), 1979 (in_port_t)udp->udp_port); 1980 } 1981 } 1982 1983 udp_bind_hash_remove(udp, B_FALSE); 1984 1985 mutex_enter(&connp->conn_lock); 1986 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1987 udp->udp_mode != UDP_MT_HOT) { 1988 cv_wait(&connp->conn_cv, &connp->conn_lock); 1989 } 1990 mutex_exit(&connp->conn_lock); 1991 } 1992 1993 void 1994 udp_close_free(conn_t *connp) 1995 { 1996 udp_t *udp = connp->conn_udp; 1997 1998 /* If there are any options associated with the stream, free them. */ 1999 if (udp->udp_ip_snd_options) { 2000 mi_free((char *)udp->udp_ip_snd_options); 2001 udp->udp_ip_snd_options = NULL; 2002 } 2003 2004 if (udp->udp_ip_rcv_options) { 2005 mi_free((char *)udp->udp_ip_rcv_options); 2006 udp->udp_ip_rcv_options = NULL; 2007 } 2008 2009 /* Free memory associated with sticky options */ 2010 if (udp->udp_sticky_hdrs_len != 0) { 2011 kmem_free(udp->udp_sticky_hdrs, 2012 udp->udp_sticky_hdrs_len); 2013 udp->udp_sticky_hdrs = NULL; 2014 udp->udp_sticky_hdrs_len = 0; 2015 } 2016 2017 ip6_pkt_free(&udp->udp_sticky_ipp); 2018 2019 udp->udp_connp = NULL; 2020 connp->conn_udp = NULL; 2021 kmem_cache_free(udp_cache, udp); 2022 } 2023 2024 /* 2025 * This routine handles each T_DISCON_REQ message passed to udp 2026 * as an indicating that UDP is no longer connected. This results 2027 * in sending a T_BIND_REQ to IP to restore the binding to just 2028 * the local address/port. 2029 * 2030 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2031 * T_BIND_REQ - specifying just the local address/port 2032 * T_OK_ACK - for the T_DISCON_REQ 2033 * 2034 * The disconnect completes in udp_rput. 2035 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2036 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2037 * it to an error ack for the appropriate primitive. 2038 */ 2039 static void 2040 udp_disconnect(queue_t *q, mblk_t *mp) 2041 { 2042 udp_t *udp = Q_TO_UDP(q); 2043 mblk_t *mp1; 2044 udp_fanout_t *udpf; 2045 2046 if (udp->udp_state != TS_DATA_XFER) { 2047 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2048 "udp_disconnect: bad state, %u", udp->udp_state); 2049 udp_err_ack(q, mp, TOUTSTATE, 0); 2050 return; 2051 } 2052 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 2053 mutex_enter(&udpf->uf_lock); 2054 udp->udp_v6src = udp->udp_bound_v6src; 2055 udp->udp_state = TS_IDLE; 2056 mutex_exit(&udpf->uf_lock); 2057 2058 /* 2059 * Send down bind to IP to remove the full binding and revert 2060 * to the local address binding. 2061 */ 2062 if (udp->udp_family == AF_INET) 2063 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2064 else 2065 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2066 if (mp1 == NULL) { 2067 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2068 return; 2069 } 2070 mp = mi_tpi_ok_ack_alloc(mp); 2071 if (mp == NULL) { 2072 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2073 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2074 return; 2075 } 2076 2077 if (udp->udp_family == AF_INET6) { 2078 int error; 2079 2080 /* Rebuild the header template */ 2081 error = udp_build_hdrs(q, udp); 2082 if (error != 0) { 2083 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2084 freemsg(mp1); 2085 return; 2086 } 2087 } 2088 mutex_enter(&udpf->uf_lock); 2089 udp->udp_discon_pending = 1; 2090 mutex_exit(&udpf->uf_lock); 2091 2092 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2093 linkb(mp1, mp); 2094 2095 if (udp->udp_family == AF_INET6) 2096 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2097 else 2098 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2099 2100 if (mp1 != NULL) 2101 udp_rput_other(_RD(q), mp1); 2102 else 2103 CONN_INC_REF(udp->udp_connp); 2104 } 2105 2106 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2107 static void 2108 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2109 { 2110 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2111 putnext(UDP_RD(q), mp); 2112 } 2113 2114 /* Shorthand to generate and send TPI error acks to our client */ 2115 static void 2116 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2117 int sys_error) 2118 { 2119 struct T_error_ack *teackp; 2120 2121 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2122 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2123 teackp = (struct T_error_ack *)mp->b_rptr; 2124 teackp->ERROR_prim = primitive; 2125 teackp->TLI_error = t_error; 2126 teackp->UNIX_error = sys_error; 2127 putnext(UDP_RD(q), mp); 2128 } 2129 } 2130 2131 /*ARGSUSED*/ 2132 static int 2133 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2134 { 2135 int i; 2136 2137 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2138 if (udp_g_epriv_ports[i] != 0) 2139 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2140 } 2141 return (0); 2142 } 2143 2144 /* ARGSUSED */ 2145 static int 2146 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2147 cred_t *cr) 2148 { 2149 long new_value; 2150 int i; 2151 2152 /* 2153 * Fail the request if the new value does not lie within the 2154 * port number limits. 2155 */ 2156 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2157 new_value <= 0 || new_value >= 65536) { 2158 return (EINVAL); 2159 } 2160 2161 /* Check if the value is already in the list */ 2162 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2163 if (new_value == udp_g_epriv_ports[i]) { 2164 return (EEXIST); 2165 } 2166 } 2167 /* Find an empty slot */ 2168 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2169 if (udp_g_epriv_ports[i] == 0) 2170 break; 2171 } 2172 if (i == udp_g_num_epriv_ports) { 2173 return (EOVERFLOW); 2174 } 2175 2176 /* Set the new value */ 2177 udp_g_epriv_ports[i] = (in_port_t)new_value; 2178 return (0); 2179 } 2180 2181 /* ARGSUSED */ 2182 static int 2183 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2184 cred_t *cr) 2185 { 2186 long new_value; 2187 int i; 2188 2189 /* 2190 * Fail the request if the new value does not lie within the 2191 * port number limits. 2192 */ 2193 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2194 new_value <= 0 || new_value >= 65536) { 2195 return (EINVAL); 2196 } 2197 2198 /* Check that the value is already in the list */ 2199 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2200 if (udp_g_epriv_ports[i] == new_value) 2201 break; 2202 } 2203 if (i == udp_g_num_epriv_ports) { 2204 return (ESRCH); 2205 } 2206 2207 /* Clear the value */ 2208 udp_g_epriv_ports[i] = 0; 2209 return (0); 2210 } 2211 2212 /* At minimum we need 4 bytes of UDP header */ 2213 #define ICMP_MIN_UDP_HDR 4 2214 2215 /* 2216 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2217 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2218 * Assumes that IP has pulled up everything up to and including the ICMP header. 2219 * An M_CTL could potentially come here from some other module (i.e. if UDP 2220 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2221 * does not have enough ICMP information , following STREAMS conventions, 2222 * we send it upstream assuming it is an M_CTL we don't understand. 2223 */ 2224 static void 2225 udp_icmp_error(queue_t *q, mblk_t *mp) 2226 { 2227 icmph_t *icmph; 2228 ipha_t *ipha; 2229 int iph_hdr_length; 2230 udpha_t *udpha; 2231 sin_t sin; 2232 sin6_t sin6; 2233 mblk_t *mp1; 2234 int error = 0; 2235 size_t mp_size = MBLKL(mp); 2236 udp_t *udp = Q_TO_UDP(q); 2237 2238 /* 2239 * Assume IP provides aligned packets - otherwise toss 2240 */ 2241 if (!OK_32PTR(mp->b_rptr)) { 2242 freemsg(mp); 2243 return; 2244 } 2245 2246 /* 2247 * Verify that we have a complete IP header and the application has 2248 * asked for errors. If not, send it upstream. 2249 */ 2250 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2251 noticmpv4: 2252 putnext(UDP_RD(q), mp); 2253 return; 2254 } 2255 2256 ipha = (ipha_t *)mp->b_rptr; 2257 /* 2258 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2259 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2260 */ 2261 switch (IPH_HDR_VERSION(ipha)) { 2262 case IPV6_VERSION: 2263 udp_icmp_error_ipv6(q, mp); 2264 return; 2265 case IPV4_VERSION: 2266 break; 2267 default: 2268 goto noticmpv4; 2269 } 2270 2271 /* Skip past the outer IP and ICMP headers */ 2272 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2273 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2274 /* 2275 * If we don't have the correct outer IP header length or if the ULP 2276 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2277 * send the packet upstream. 2278 */ 2279 if (iph_hdr_length < sizeof (ipha_t) || 2280 ipha->ipha_protocol != IPPROTO_ICMP || 2281 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2282 goto noticmpv4; 2283 } 2284 ipha = (ipha_t *)&icmph[1]; 2285 2286 /* Skip past the inner IP and find the ULP header */ 2287 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2288 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2289 /* 2290 * If we don't have the correct inner IP header length or if the ULP 2291 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2292 * bytes of UDP header, send it upstream. 2293 */ 2294 if (iph_hdr_length < sizeof (ipha_t) || 2295 ipha->ipha_protocol != IPPROTO_UDP || 2296 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2297 goto noticmpv4; 2298 } 2299 2300 switch (icmph->icmph_type) { 2301 case ICMP_DEST_UNREACHABLE: 2302 switch (icmph->icmph_code) { 2303 case ICMP_FRAGMENTATION_NEEDED: 2304 /* 2305 * IP has already adjusted the path MTU. 2306 * XXX Somehow pass MTU indication to application? 2307 */ 2308 break; 2309 case ICMP_PORT_UNREACHABLE: 2310 case ICMP_PROTOCOL_UNREACHABLE: 2311 error = ECONNREFUSED; 2312 break; 2313 default: 2314 /* Transient errors */ 2315 break; 2316 } 2317 break; 2318 default: 2319 /* Transient errors */ 2320 break; 2321 } 2322 if (error == 0) { 2323 freemsg(mp); 2324 return; 2325 } 2326 2327 switch (udp->udp_family) { 2328 case AF_INET: 2329 sin = sin_null; 2330 sin.sin_family = AF_INET; 2331 sin.sin_addr.s_addr = ipha->ipha_dst; 2332 sin.sin_port = udpha->uha_dst_port; 2333 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2334 error); 2335 break; 2336 case AF_INET6: 2337 sin6 = sin6_null; 2338 sin6.sin6_family = AF_INET6; 2339 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2340 sin6.sin6_port = udpha->uha_dst_port; 2341 2342 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2343 NULL, 0, error); 2344 break; 2345 } 2346 if (mp1) 2347 putnext(UDP_RD(q), mp1); 2348 freemsg(mp); 2349 } 2350 2351 /* 2352 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2353 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2354 * Assumes that IP has pulled up all the extension headers as well as the 2355 * ICMPv6 header. 2356 * An M_CTL could potentially come here from some other module (i.e. if UDP 2357 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2358 * does not have enough ICMP information , following STREAMS conventions, 2359 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2360 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2361 * field (when cast to ipha_t in udp_icmp_error). 2362 */ 2363 static void 2364 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2365 { 2366 icmp6_t *icmp6; 2367 ip6_t *ip6h, *outer_ip6h; 2368 uint16_t hdr_length; 2369 uint8_t *nexthdrp; 2370 udpha_t *udpha; 2371 sin6_t sin6; 2372 mblk_t *mp1; 2373 int error = 0; 2374 size_t mp_size = MBLKL(mp); 2375 udp_t *udp = Q_TO_UDP(q); 2376 2377 /* 2378 * Verify that we have a complete IP header. If not, send it upstream. 2379 */ 2380 if (mp_size < sizeof (ip6_t)) { 2381 noticmpv6: 2382 putnext(UDP_RD(q), mp); 2383 return; 2384 } 2385 2386 outer_ip6h = (ip6_t *)mp->b_rptr; 2387 /* 2388 * Verify this is an ICMPV6 packet, else send it upstream 2389 */ 2390 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2391 hdr_length = IPV6_HDR_LEN; 2392 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2393 &nexthdrp) || 2394 *nexthdrp != IPPROTO_ICMPV6) { 2395 goto noticmpv6; 2396 } 2397 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2398 ip6h = (ip6_t *)&icmp6[1]; 2399 /* 2400 * Verify we have a complete ICMP and inner IP header. 2401 */ 2402 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2403 goto noticmpv6; 2404 2405 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2406 goto noticmpv6; 2407 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2408 /* 2409 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2410 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2411 * packet upstream. 2412 */ 2413 if ((*nexthdrp != IPPROTO_UDP) || 2414 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2415 goto noticmpv6; 2416 } 2417 2418 switch (icmp6->icmp6_type) { 2419 case ICMP6_DST_UNREACH: 2420 switch (icmp6->icmp6_code) { 2421 case ICMP6_DST_UNREACH_NOPORT: 2422 error = ECONNREFUSED; 2423 break; 2424 case ICMP6_DST_UNREACH_ADMIN: 2425 case ICMP6_DST_UNREACH_NOROUTE: 2426 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2427 case ICMP6_DST_UNREACH_ADDR: 2428 /* Transient errors */ 2429 break; 2430 default: 2431 break; 2432 } 2433 break; 2434 case ICMP6_PACKET_TOO_BIG: { 2435 struct T_unitdata_ind *tudi; 2436 struct T_opthdr *toh; 2437 size_t udi_size; 2438 mblk_t *newmp; 2439 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2440 sizeof (struct ip6_mtuinfo); 2441 sin6_t *sin6; 2442 struct ip6_mtuinfo *mtuinfo; 2443 2444 /* 2445 * If the application has requested to receive path mtu 2446 * information, send up an empty message containing an 2447 * IPV6_PATHMTU ancillary data item. 2448 */ 2449 if (!udp->udp_ipv6_recvpathmtu) 2450 break; 2451 2452 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2453 opt_length; 2454 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2455 BUMP_MIB(&udp_mib, udpInErrors); 2456 break; 2457 } 2458 2459 /* 2460 * newmp->b_cont is left to NULL on purpose. This is an 2461 * empty message containing only ancillary data. 2462 */ 2463 newmp->b_datap->db_type = M_PROTO; 2464 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2465 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2466 tudi->PRIM_type = T_UNITDATA_IND; 2467 tudi->SRC_length = sizeof (sin6_t); 2468 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2469 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2470 tudi->OPT_length = opt_length; 2471 2472 sin6 = (sin6_t *)&tudi[1]; 2473 bzero(sin6, sizeof (sin6_t)); 2474 sin6->sin6_family = AF_INET6; 2475 sin6->sin6_addr = udp->udp_v6dst; 2476 2477 toh = (struct T_opthdr *)&sin6[1]; 2478 toh->level = IPPROTO_IPV6; 2479 toh->name = IPV6_PATHMTU; 2480 toh->len = opt_length; 2481 toh->status = 0; 2482 2483 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2484 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2485 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2486 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2487 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2488 /* 2489 * We've consumed everything we need from the original 2490 * message. Free it, then send our empty message. 2491 */ 2492 freemsg(mp); 2493 putnext(UDP_RD(q), newmp); 2494 return; 2495 } 2496 case ICMP6_TIME_EXCEEDED: 2497 /* Transient errors */ 2498 break; 2499 case ICMP6_PARAM_PROB: 2500 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2501 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2502 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2503 (uchar_t *)nexthdrp) { 2504 error = ECONNREFUSED; 2505 break; 2506 } 2507 break; 2508 } 2509 if (error == 0) { 2510 freemsg(mp); 2511 return; 2512 } 2513 2514 sin6 = sin6_null; 2515 sin6.sin6_family = AF_INET6; 2516 sin6.sin6_addr = ip6h->ip6_dst; 2517 sin6.sin6_port = udpha->uha_dst_port; 2518 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2519 2520 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2521 error); 2522 if (mp1) 2523 putnext(UDP_RD(q), mp1); 2524 freemsg(mp); 2525 } 2526 2527 /* 2528 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2529 * The local address is filled in if endpoint is bound. The remote address 2530 * is filled in if remote address has been precified ("connected endpoint") 2531 * (The concept of connected CLTS sockets is alien to published TPI 2532 * but we support it anyway). 2533 */ 2534 static void 2535 udp_addr_req(queue_t *q, mblk_t *mp) 2536 { 2537 sin_t *sin; 2538 sin6_t *sin6; 2539 mblk_t *ackmp; 2540 struct T_addr_ack *taa; 2541 udp_t *udp = Q_TO_UDP(q); 2542 2543 /* Make it large enough for worst case */ 2544 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2545 2 * sizeof (sin6_t), 1); 2546 if (ackmp == NULL) { 2547 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2548 return; 2549 } 2550 taa = (struct T_addr_ack *)ackmp->b_rptr; 2551 2552 bzero(taa, sizeof (struct T_addr_ack)); 2553 ackmp->b_wptr = (uchar_t *)&taa[1]; 2554 2555 taa->PRIM_type = T_ADDR_ACK; 2556 ackmp->b_datap->db_type = M_PCPROTO; 2557 /* 2558 * Note: Following code assumes 32 bit alignment of basic 2559 * data structures like sin_t and struct T_addr_ack. 2560 */ 2561 if (udp->udp_state != TS_UNBND) { 2562 /* 2563 * Fill in local address first 2564 */ 2565 taa->LOCADDR_offset = sizeof (*taa); 2566 if (udp->udp_family == AF_INET) { 2567 taa->LOCADDR_length = sizeof (sin_t); 2568 sin = (sin_t *)&taa[1]; 2569 /* Fill zeroes and then initialize non-zero fields */ 2570 *sin = sin_null; 2571 sin->sin_family = AF_INET; 2572 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2573 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2574 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2575 sin->sin_addr.s_addr); 2576 } else { 2577 /* 2578 * INADDR_ANY 2579 * udp_v6src is not set, we might be bound to 2580 * broadcast/multicast. Use udp_bound_v6src as 2581 * local address instead (that could 2582 * also still be INADDR_ANY) 2583 */ 2584 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2585 sin->sin_addr.s_addr); 2586 } 2587 sin->sin_port = udp->udp_port; 2588 ackmp->b_wptr = (uchar_t *)&sin[1]; 2589 if (udp->udp_state == TS_DATA_XFER) { 2590 /* 2591 * connected, fill remote address too 2592 */ 2593 taa->REMADDR_length = sizeof (sin_t); 2594 /* assumed 32-bit alignment */ 2595 taa->REMADDR_offset = taa->LOCADDR_offset + 2596 taa->LOCADDR_length; 2597 2598 sin = (sin_t *)(ackmp->b_rptr + 2599 taa->REMADDR_offset); 2600 /* initialize */ 2601 *sin = sin_null; 2602 sin->sin_family = AF_INET; 2603 sin->sin_addr.s_addr = 2604 V4_PART_OF_V6(udp->udp_v6dst); 2605 sin->sin_port = udp->udp_dstport; 2606 ackmp->b_wptr = (uchar_t *)&sin[1]; 2607 } 2608 } else { 2609 taa->LOCADDR_length = sizeof (sin6_t); 2610 sin6 = (sin6_t *)&taa[1]; 2611 /* Fill zeroes and then initialize non-zero fields */ 2612 *sin6 = sin6_null; 2613 sin6->sin6_family = AF_INET6; 2614 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2615 sin6->sin6_addr = udp->udp_v6src; 2616 } else { 2617 /* 2618 * UNSPECIFIED 2619 * udp_v6src is not set, we might be bound to 2620 * broadcast/multicast. Use udp_bound_v6src as 2621 * local address instead (that could 2622 * also still be UNSPECIFIED) 2623 */ 2624 sin6->sin6_addr = 2625 udp->udp_bound_v6src; 2626 } 2627 sin6->sin6_port = udp->udp_port; 2628 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2629 if (udp->udp_state == TS_DATA_XFER) { 2630 /* 2631 * connected, fill remote address too 2632 */ 2633 taa->REMADDR_length = sizeof (sin6_t); 2634 /* assumed 32-bit alignment */ 2635 taa->REMADDR_offset = taa->LOCADDR_offset + 2636 taa->LOCADDR_length; 2637 2638 sin6 = (sin6_t *)(ackmp->b_rptr + 2639 taa->REMADDR_offset); 2640 /* initialize */ 2641 *sin6 = sin6_null; 2642 sin6->sin6_family = AF_INET6; 2643 sin6->sin6_addr = udp->udp_v6dst; 2644 sin6->sin6_port = udp->udp_dstport; 2645 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2646 } 2647 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2648 } 2649 } 2650 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2651 putnext(UDP_RD(q), ackmp); 2652 } 2653 2654 static void 2655 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2656 { 2657 if (udp->udp_family == AF_INET) { 2658 *tap = udp_g_t_info_ack_ipv4; 2659 } else { 2660 *tap = udp_g_t_info_ack_ipv6; 2661 } 2662 tap->CURRENT_state = udp->udp_state; 2663 tap->OPT_size = udp_max_optsize; 2664 } 2665 2666 /* 2667 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2668 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2669 * udp_g_t_info_ack. The current state of the stream is copied from 2670 * udp_state. 2671 */ 2672 static void 2673 udp_capability_req(queue_t *q, mblk_t *mp) 2674 { 2675 t_uscalar_t cap_bits1; 2676 struct T_capability_ack *tcap; 2677 udp_t *udp = Q_TO_UDP(q); 2678 2679 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2680 2681 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2682 mp->b_datap->db_type, T_CAPABILITY_ACK); 2683 if (!mp) 2684 return; 2685 2686 tcap = (struct T_capability_ack *)mp->b_rptr; 2687 tcap->CAP_bits1 = 0; 2688 2689 if (cap_bits1 & TC1_INFO) { 2690 udp_copy_info(&tcap->INFO_ack, udp); 2691 tcap->CAP_bits1 |= TC1_INFO; 2692 } 2693 2694 putnext(UDP_RD(q), mp); 2695 } 2696 2697 /* 2698 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2699 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2700 * The current state of the stream is copied from udp_state. 2701 */ 2702 static void 2703 udp_info_req(queue_t *q, mblk_t *mp) 2704 { 2705 udp_t *udp = Q_TO_UDP(q); 2706 2707 /* Create a T_INFO_ACK message. */ 2708 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2709 T_INFO_ACK); 2710 if (!mp) 2711 return; 2712 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2713 putnext(UDP_RD(q), mp); 2714 } 2715 2716 /* 2717 * IP recognizes seven kinds of bind requests: 2718 * 2719 * - A zero-length address binds only to the protocol number. 2720 * 2721 * - A 4-byte address is treated as a request to 2722 * validate that the address is a valid local IPv4 2723 * address, appropriate for an application to bind to. 2724 * IP does the verification, but does not make any note 2725 * of the address at this time. 2726 * 2727 * - A 16-byte address contains is treated as a request 2728 * to validate a local IPv6 address, as the 4-byte 2729 * address case above. 2730 * 2731 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2732 * use it for the inbound fanout of packets. 2733 * 2734 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2735 * use it for the inbound fanout of packets. 2736 * 2737 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2738 * information consisting of local and remote addresses 2739 * and ports. In this case, the addresses are both 2740 * validated as appropriate for this operation, and, if 2741 * so, the information is retained for use in the 2742 * inbound fanout. 2743 * 2744 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2745 * fanout information, like the 12-byte case above. 2746 * 2747 * IP will also fill in the IRE request mblk with information 2748 * regarding our peer. In all cases, we notify IP of our protocol 2749 * type by appending a single protocol byte to the bind request. 2750 */ 2751 static mblk_t * 2752 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2753 { 2754 char *cp; 2755 mblk_t *mp; 2756 struct T_bind_req *tbr; 2757 ipa_conn_t *ac; 2758 ipa6_conn_t *ac6; 2759 sin_t *sin; 2760 sin6_t *sin6; 2761 2762 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2763 2764 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2765 if (!mp) 2766 return (mp); 2767 mp->b_datap->db_type = M_PROTO; 2768 tbr = (struct T_bind_req *)mp->b_rptr; 2769 tbr->PRIM_type = bind_prim; 2770 tbr->ADDR_offset = sizeof (*tbr); 2771 tbr->CONIND_number = 0; 2772 tbr->ADDR_length = addr_length; 2773 cp = (char *)&tbr[1]; 2774 switch (addr_length) { 2775 case sizeof (ipa_conn_t): 2776 ASSERT(udp->udp_family == AF_INET); 2777 /* Append a request for an IRE */ 2778 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2779 if (!mp->b_cont) { 2780 freemsg(mp); 2781 return (NULL); 2782 } 2783 mp->b_cont->b_wptr += sizeof (ire_t); 2784 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2785 2786 /* cp known to be 32 bit aligned */ 2787 ac = (ipa_conn_t *)cp; 2788 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2789 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2790 ac->ac_fport = udp->udp_dstport; 2791 ac->ac_lport = udp->udp_port; 2792 break; 2793 2794 case sizeof (ipa6_conn_t): 2795 ASSERT(udp->udp_family == AF_INET6); 2796 /* Append a request for an IRE */ 2797 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2798 if (!mp->b_cont) { 2799 freemsg(mp); 2800 return (NULL); 2801 } 2802 mp->b_cont->b_wptr += sizeof (ire_t); 2803 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2804 2805 /* cp known to be 32 bit aligned */ 2806 ac6 = (ipa6_conn_t *)cp; 2807 ac6->ac6_laddr = udp->udp_v6src; 2808 ac6->ac6_faddr = udp->udp_v6dst; 2809 ac6->ac6_fport = udp->udp_dstport; 2810 ac6->ac6_lport = udp->udp_port; 2811 break; 2812 2813 case sizeof (sin_t): 2814 ASSERT(udp->udp_family == AF_INET); 2815 /* Append a request for an IRE */ 2816 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2817 if (!mp->b_cont) { 2818 freemsg(mp); 2819 return (NULL); 2820 } 2821 mp->b_cont->b_wptr += sizeof (ire_t); 2822 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2823 2824 sin = (sin_t *)cp; 2825 *sin = sin_null; 2826 sin->sin_family = AF_INET; 2827 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2828 sin->sin_port = udp->udp_port; 2829 break; 2830 2831 case sizeof (sin6_t): 2832 ASSERT(udp->udp_family == AF_INET6); 2833 /* Append a request for an IRE */ 2834 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2835 if (!mp->b_cont) { 2836 freemsg(mp); 2837 return (NULL); 2838 } 2839 mp->b_cont->b_wptr += sizeof (ire_t); 2840 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2841 2842 sin6 = (sin6_t *)cp; 2843 *sin6 = sin6_null; 2844 sin6->sin6_family = AF_INET6; 2845 sin6->sin6_addr = udp->udp_bound_v6src; 2846 sin6->sin6_port = udp->udp_port; 2847 break; 2848 } 2849 /* Add protocol number to end */ 2850 cp[addr_length] = (char)IPPROTO_UDP; 2851 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2852 return (mp); 2853 } 2854 2855 /* 2856 * This is the open routine for udp. It allocates a udp_t structure for 2857 * the stream and, on the first open of the module, creates an ND table. 2858 */ 2859 /* ARGSUSED */ 2860 static int 2861 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2862 { 2863 int err; 2864 udp_t *udp; 2865 conn_t *connp; 2866 zoneid_t zoneid = getzoneid(); 2867 queue_t *ip_wq; 2868 char *name; 2869 2870 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2871 2872 /* If the stream is already open, return immediately. */ 2873 if (q->q_ptr != NULL) 2874 return (0); 2875 2876 /* If this is not a push of udp as a module, fail. */ 2877 if (sflag != MODOPEN) 2878 return (EINVAL); 2879 2880 q->q_hiwat = udp_recv_hiwat; 2881 WR(q)->q_hiwat = udp_xmit_hiwat; 2882 WR(q)->q_lowat = udp_xmit_lowat; 2883 2884 /* Insert ourselves in the stream since we're about to walk q_next */ 2885 qprocson(q); 2886 2887 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2888 bzero(udp, sizeof (*udp)); 2889 2890 /* 2891 * UDP is supported only as a module and it has to be pushed directly 2892 * above the device instance of IP. If UDP is pushed anywhere else 2893 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2894 * sake of MIB browsers and fail everything else. 2895 */ 2896 ip_wq = WR(q)->q_next; 2897 if (ip_wq->q_next != NULL || 2898 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2899 strcmp(name, IP_MOD_NAME) != 0 || 2900 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2901 /* Support just SNMP for MIB browsers */ 2902 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2903 connp->conn_rq = q; 2904 connp->conn_wq = WR(q); 2905 connp->conn_flags |= IPCL_UDPMOD; 2906 connp->conn_cred = credp; 2907 connp->conn_zoneid = zoneid; 2908 connp->conn_udp = udp; 2909 udp->udp_connp = connp; 2910 q->q_ptr = WR(q)->q_ptr = connp; 2911 crhold(credp); 2912 q->q_qinfo = &udp_snmp_rinit; 2913 WR(q)->q_qinfo = &udp_snmp_winit; 2914 return (0); 2915 } 2916 2917 /* 2918 * Initialize the udp_t structure for this stream. 2919 */ 2920 q = RD(ip_wq); 2921 connp = Q_TO_CONN(q); 2922 mutex_enter(&connp->conn_lock); 2923 connp->conn_proto = IPPROTO_UDP; 2924 connp->conn_flags |= IPCL_UDP; 2925 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2926 connp->conn_udp = udp; 2927 2928 /* Set the initial state of the stream and the privilege status. */ 2929 udp->udp_connp = connp; 2930 udp->udp_state = TS_UNBND; 2931 udp->udp_mode = UDP_MT_HOT; 2932 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2933 udp->udp_family = AF_INET6; 2934 udp->udp_ipversion = IPV6_VERSION; 2935 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2936 udp->udp_ttl = udp_ipv6_hoplimit; 2937 connp->conn_af_isv6 = B_TRUE; 2938 connp->conn_flags |= IPCL_ISV6; 2939 } else { 2940 udp->udp_family = AF_INET; 2941 udp->udp_ipversion = IPV4_VERSION; 2942 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2943 udp->udp_ttl = udp_ipv4_ttl; 2944 connp->conn_af_isv6 = B_FALSE; 2945 connp->conn_flags &= ~IPCL_ISV6; 2946 } 2947 2948 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2949 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2950 connp->conn_zoneid = zoneid; 2951 2952 /* 2953 * If the caller has the process-wide flag set, then default to MAC 2954 * exempt mode. This allows read-down to unlabeled hosts. 2955 */ 2956 if (getpflags(NET_MAC_AWARE, credp) != 0) 2957 udp->udp_mac_exempt = B_TRUE; 2958 2959 if (connp->conn_flags & IPCL_SOCKET) { 2960 udp->udp_issocket = B_TRUE; 2961 udp->udp_direct_sockfs = B_TRUE; 2962 } 2963 2964 connp->conn_ulp_labeled = is_system_labeled(); 2965 2966 mutex_exit(&connp->conn_lock); 2967 2968 /* 2969 * The transmit hiwat/lowat is only looked at on IP's queue. 2970 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2971 * getsockopts. 2972 */ 2973 q->q_hiwat = udp_recv_hiwat; 2974 WR(q)->q_hiwat = udp_xmit_hiwat; 2975 WR(q)->q_lowat = udp_xmit_lowat; 2976 2977 if (udp->udp_family == AF_INET6) { 2978 /* Build initial header template for transmit */ 2979 if ((err = udp_build_hdrs(q, udp)) != 0) { 2980 error: 2981 qprocsoff(UDP_RD(q)); 2982 udp->udp_connp = NULL; 2983 connp->conn_udp = NULL; 2984 kmem_cache_free(udp_cache, udp); 2985 return (err); 2986 } 2987 } 2988 2989 /* Set the Stream head write offset and high watermark. */ 2990 (void) mi_set_sth_wroff(UDP_RD(q), 2991 udp->udp_max_hdr_len + udp_wroff_extra); 2992 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2993 2994 WR(UDP_RD(q))->q_qinfo = &udp_winit; 2995 2996 return (0); 2997 } 2998 2999 /* 3000 * Which UDP options OK to set through T_UNITDATA_REQ... 3001 */ 3002 /* ARGSUSED */ 3003 static boolean_t 3004 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 3005 { 3006 return (B_TRUE); 3007 } 3008 3009 /* 3010 * This routine gets default values of certain options whose default 3011 * values are maintained by protcol specific code 3012 */ 3013 /* ARGSUSED */ 3014 int 3015 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3016 { 3017 int *i1 = (int *)ptr; 3018 3019 switch (level) { 3020 case IPPROTO_IP: 3021 switch (name) { 3022 case IP_MULTICAST_TTL: 3023 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3024 return (sizeof (uchar_t)); 3025 case IP_MULTICAST_LOOP: 3026 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3027 return (sizeof (uchar_t)); 3028 } 3029 break; 3030 case IPPROTO_IPV6: 3031 switch (name) { 3032 case IPV6_MULTICAST_HOPS: 3033 *i1 = IP_DEFAULT_MULTICAST_TTL; 3034 return (sizeof (int)); 3035 case IPV6_MULTICAST_LOOP: 3036 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3037 return (sizeof (int)); 3038 case IPV6_UNICAST_HOPS: 3039 *i1 = udp_ipv6_hoplimit; 3040 return (sizeof (int)); 3041 } 3042 break; 3043 } 3044 return (-1); 3045 } 3046 3047 /* 3048 * This routine retrieves the current status of socket options 3049 * and expects the caller to pass in the queue pointer of the 3050 * upper instance. It returns the size of the option retrieved. 3051 */ 3052 int 3053 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3054 { 3055 int *i1 = (int *)ptr; 3056 conn_t *connp; 3057 udp_t *udp; 3058 ip6_pkt_t *ipp; 3059 int len; 3060 3061 q = UDP_WR(q); 3062 connp = Q_TO_CONN(q); 3063 udp = connp->conn_udp; 3064 ipp = &udp->udp_sticky_ipp; 3065 3066 switch (level) { 3067 case SOL_SOCKET: 3068 switch (name) { 3069 case SO_DEBUG: 3070 *i1 = udp->udp_debug; 3071 break; /* goto sizeof (int) option return */ 3072 case SO_REUSEADDR: 3073 *i1 = udp->udp_reuseaddr; 3074 break; /* goto sizeof (int) option return */ 3075 case SO_TYPE: 3076 *i1 = SOCK_DGRAM; 3077 break; /* goto sizeof (int) option return */ 3078 3079 /* 3080 * The following three items are available here, 3081 * but are only meaningful to IP. 3082 */ 3083 case SO_DONTROUTE: 3084 *i1 = udp->udp_dontroute; 3085 break; /* goto sizeof (int) option return */ 3086 case SO_USELOOPBACK: 3087 *i1 = udp->udp_useloopback; 3088 break; /* goto sizeof (int) option return */ 3089 case SO_BROADCAST: 3090 *i1 = udp->udp_broadcast; 3091 break; /* goto sizeof (int) option return */ 3092 3093 case SO_SNDBUF: 3094 *i1 = q->q_hiwat; 3095 break; /* goto sizeof (int) option return */ 3096 case SO_RCVBUF: 3097 *i1 = RD(q)->q_hiwat; 3098 break; /* goto sizeof (int) option return */ 3099 case SO_DGRAM_ERRIND: 3100 *i1 = udp->udp_dgram_errind; 3101 break; /* goto sizeof (int) option return */ 3102 case SO_RECVUCRED: 3103 *i1 = udp->udp_recvucred; 3104 break; /* goto sizeof (int) option return */ 3105 case SO_TIMESTAMP: 3106 *i1 = udp->udp_timestamp; 3107 break; /* goto sizeof (int) option return */ 3108 case SO_ANON_MLP: 3109 *i1 = udp->udp_anon_mlp; 3110 break; /* goto sizeof (int) option return */ 3111 case SO_MAC_EXEMPT: 3112 *i1 = udp->udp_mac_exempt; 3113 break; /* goto sizeof (int) option return */ 3114 case SO_ALLZONES: 3115 *i1 = connp->conn_allzones; 3116 break; /* goto sizeof (int) option return */ 3117 default: 3118 return (-1); 3119 } 3120 break; 3121 case IPPROTO_IP: 3122 if (udp->udp_family != AF_INET) 3123 return (-1); 3124 switch (name) { 3125 case IP_OPTIONS: 3126 case T_IP_OPTIONS: 3127 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3128 if (len > 0) { 3129 bcopy(udp->udp_ip_rcv_options + 3130 udp->udp_label_len, ptr, len); 3131 } 3132 return (len); 3133 case IP_TOS: 3134 case T_IP_TOS: 3135 *i1 = (int)udp->udp_type_of_service; 3136 break; /* goto sizeof (int) option return */ 3137 case IP_TTL: 3138 *i1 = (int)udp->udp_ttl; 3139 break; /* goto sizeof (int) option return */ 3140 case IP_NEXTHOP: 3141 /* Handled at IP level */ 3142 return (-EINVAL); 3143 case IP_MULTICAST_IF: 3144 /* 0 address if not set */ 3145 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3146 return (sizeof (ipaddr_t)); 3147 case IP_MULTICAST_TTL: 3148 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3149 return (sizeof (uchar_t)); 3150 case IP_MULTICAST_LOOP: 3151 *ptr = connp->conn_multicast_loop; 3152 return (sizeof (uint8_t)); 3153 case IP_RECVOPTS: 3154 *i1 = udp->udp_recvopts; 3155 break; /* goto sizeof (int) option return */ 3156 case IP_RECVDSTADDR: 3157 *i1 = udp->udp_recvdstaddr; 3158 break; /* goto sizeof (int) option return */ 3159 case IP_RECVIF: 3160 *i1 = udp->udp_recvif; 3161 break; /* goto sizeof (int) option return */ 3162 case IP_RECVSLLA: 3163 *i1 = udp->udp_recvslla; 3164 break; /* goto sizeof (int) option return */ 3165 case IP_RECVTTL: 3166 *i1 = udp->udp_recvttl; 3167 break; /* goto sizeof (int) option return */ 3168 case IP_ADD_MEMBERSHIP: 3169 case IP_DROP_MEMBERSHIP: 3170 case IP_BLOCK_SOURCE: 3171 case IP_UNBLOCK_SOURCE: 3172 case IP_ADD_SOURCE_MEMBERSHIP: 3173 case IP_DROP_SOURCE_MEMBERSHIP: 3174 case MCAST_JOIN_GROUP: 3175 case MCAST_LEAVE_GROUP: 3176 case MCAST_BLOCK_SOURCE: 3177 case MCAST_UNBLOCK_SOURCE: 3178 case MCAST_JOIN_SOURCE_GROUP: 3179 case MCAST_LEAVE_SOURCE_GROUP: 3180 case IP_DONTFAILOVER_IF: 3181 /* cannot "get" the value for these */ 3182 return (-1); 3183 case IP_BOUND_IF: 3184 /* Zero if not set */ 3185 *i1 = udp->udp_bound_if; 3186 break; /* goto sizeof (int) option return */ 3187 case IP_UNSPEC_SRC: 3188 *i1 = udp->udp_unspec_source; 3189 break; /* goto sizeof (int) option return */ 3190 case IP_XMIT_IF: 3191 *i1 = udp->udp_xmit_if; 3192 break; /* goto sizeof (int) option return */ 3193 default: 3194 return (-1); 3195 } 3196 break; 3197 case IPPROTO_IPV6: 3198 if (udp->udp_family != AF_INET6) 3199 return (-1); 3200 switch (name) { 3201 case IPV6_UNICAST_HOPS: 3202 *i1 = (unsigned int)udp->udp_ttl; 3203 break; /* goto sizeof (int) option return */ 3204 case IPV6_MULTICAST_IF: 3205 /* 0 index if not set */ 3206 *i1 = udp->udp_multicast_if_index; 3207 break; /* goto sizeof (int) option return */ 3208 case IPV6_MULTICAST_HOPS: 3209 *i1 = udp->udp_multicast_ttl; 3210 break; /* goto sizeof (int) option return */ 3211 case IPV6_MULTICAST_LOOP: 3212 *i1 = connp->conn_multicast_loop; 3213 break; /* goto sizeof (int) option return */ 3214 case IPV6_JOIN_GROUP: 3215 case IPV6_LEAVE_GROUP: 3216 case MCAST_JOIN_GROUP: 3217 case MCAST_LEAVE_GROUP: 3218 case MCAST_BLOCK_SOURCE: 3219 case MCAST_UNBLOCK_SOURCE: 3220 case MCAST_JOIN_SOURCE_GROUP: 3221 case MCAST_LEAVE_SOURCE_GROUP: 3222 /* cannot "get" the value for these */ 3223 return (-1); 3224 case IPV6_BOUND_IF: 3225 /* Zero if not set */ 3226 *i1 = udp->udp_bound_if; 3227 break; /* goto sizeof (int) option return */ 3228 case IPV6_UNSPEC_SRC: 3229 *i1 = udp->udp_unspec_source; 3230 break; /* goto sizeof (int) option return */ 3231 case IPV6_RECVPKTINFO: 3232 *i1 = udp->udp_ipv6_recvpktinfo; 3233 break; /* goto sizeof (int) option return */ 3234 case IPV6_RECVTCLASS: 3235 *i1 = udp->udp_ipv6_recvtclass; 3236 break; /* goto sizeof (int) option return */ 3237 case IPV6_RECVPATHMTU: 3238 *i1 = udp->udp_ipv6_recvpathmtu; 3239 break; /* goto sizeof (int) option return */ 3240 case IPV6_RECVHOPLIMIT: 3241 *i1 = udp->udp_ipv6_recvhoplimit; 3242 break; /* goto sizeof (int) option return */ 3243 case IPV6_RECVHOPOPTS: 3244 *i1 = udp->udp_ipv6_recvhopopts; 3245 break; /* goto sizeof (int) option return */ 3246 case IPV6_RECVDSTOPTS: 3247 *i1 = udp->udp_ipv6_recvdstopts; 3248 break; /* goto sizeof (int) option return */ 3249 case _OLD_IPV6_RECVDSTOPTS: 3250 *i1 = udp->udp_old_ipv6_recvdstopts; 3251 break; /* goto sizeof (int) option return */ 3252 case IPV6_RECVRTHDRDSTOPTS: 3253 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3254 break; /* goto sizeof (int) option return */ 3255 case IPV6_RECVRTHDR: 3256 *i1 = udp->udp_ipv6_recvrthdr; 3257 break; /* goto sizeof (int) option return */ 3258 case IPV6_PKTINFO: { 3259 /* XXX assumes that caller has room for max size! */ 3260 struct in6_pktinfo *pkti; 3261 3262 pkti = (struct in6_pktinfo *)ptr; 3263 if (ipp->ipp_fields & IPPF_IFINDEX) 3264 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3265 else 3266 pkti->ipi6_ifindex = 0; 3267 if (ipp->ipp_fields & IPPF_ADDR) 3268 pkti->ipi6_addr = ipp->ipp_addr; 3269 else 3270 pkti->ipi6_addr = ipv6_all_zeros; 3271 return (sizeof (struct in6_pktinfo)); 3272 } 3273 case IPV6_TCLASS: 3274 if (ipp->ipp_fields & IPPF_TCLASS) 3275 *i1 = ipp->ipp_tclass; 3276 else 3277 *i1 = IPV6_FLOW_TCLASS( 3278 IPV6_DEFAULT_VERS_AND_FLOW); 3279 break; /* goto sizeof (int) option return */ 3280 case IPV6_NEXTHOP: { 3281 sin6_t *sin6 = (sin6_t *)ptr; 3282 3283 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3284 return (0); 3285 *sin6 = sin6_null; 3286 sin6->sin6_family = AF_INET6; 3287 sin6->sin6_addr = ipp->ipp_nexthop; 3288 return (sizeof (sin6_t)); 3289 } 3290 case IPV6_HOPOPTS: 3291 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3292 return (0); 3293 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3294 return (0); 3295 /* 3296 * The cipso/label option is added by kernel. 3297 * User is not usually aware of this option. 3298 * We copy out the hbh opt after the label option. 3299 */ 3300 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3301 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3302 if (udp->udp_label_len_v6 > 0) { 3303 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3304 ptr[1] = (ipp->ipp_hopoptslen - 3305 udp->udp_label_len_v6 + 7) / 8 - 1; 3306 } 3307 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3308 case IPV6_RTHDRDSTOPTS: 3309 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3310 return (0); 3311 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3312 return (ipp->ipp_rtdstoptslen); 3313 case IPV6_RTHDR: 3314 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3315 return (0); 3316 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3317 return (ipp->ipp_rthdrlen); 3318 case IPV6_DSTOPTS: 3319 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3320 return (0); 3321 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3322 return (ipp->ipp_dstoptslen); 3323 case IPV6_PATHMTU: 3324 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3325 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3326 default: 3327 return (-1); 3328 } 3329 break; 3330 case IPPROTO_UDP: 3331 switch (name) { 3332 case UDP_ANONPRIVBIND: 3333 *i1 = udp->udp_anon_priv_bind; 3334 break; 3335 case UDP_EXCLBIND: 3336 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3337 break; 3338 case UDP_RCVHDR: 3339 *i1 = udp->udp_rcvhdr ? 1 : 0; 3340 break; 3341 default: 3342 return (-1); 3343 } 3344 break; 3345 default: 3346 return (-1); 3347 } 3348 return (sizeof (int)); 3349 } 3350 3351 /* 3352 * This routine sets socket options; it expects the caller 3353 * to pass in the queue pointer of the upper instance. 3354 */ 3355 /* ARGSUSED */ 3356 int 3357 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3358 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3359 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3360 { 3361 udpattrs_t *attrs = thisdg_attrs; 3362 int *i1 = (int *)invalp; 3363 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3364 boolean_t checkonly; 3365 int error; 3366 conn_t *connp; 3367 udp_t *udp; 3368 uint_t newlen; 3369 3370 q = UDP_WR(q); 3371 connp = Q_TO_CONN(q); 3372 udp = connp->conn_udp; 3373 3374 switch (optset_context) { 3375 case SETFN_OPTCOM_CHECKONLY: 3376 checkonly = B_TRUE; 3377 /* 3378 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3379 * inlen != 0 implies value supplied and 3380 * we have to "pretend" to set it. 3381 * inlen == 0 implies that there is no 3382 * value part in T_CHECK request and just validation 3383 * done elsewhere should be enough, we just return here. 3384 */ 3385 if (inlen == 0) { 3386 *outlenp = 0; 3387 return (0); 3388 } 3389 break; 3390 case SETFN_OPTCOM_NEGOTIATE: 3391 checkonly = B_FALSE; 3392 break; 3393 case SETFN_UD_NEGOTIATE: 3394 case SETFN_CONN_NEGOTIATE: 3395 checkonly = B_FALSE; 3396 /* 3397 * Negotiating local and "association-related" options 3398 * through T_UNITDATA_REQ. 3399 * 3400 * Following routine can filter out ones we do not 3401 * want to be "set" this way. 3402 */ 3403 if (!udp_opt_allow_udr_set(level, name)) { 3404 *outlenp = 0; 3405 return (EINVAL); 3406 } 3407 break; 3408 default: 3409 /* 3410 * We should never get here 3411 */ 3412 *outlenp = 0; 3413 return (EINVAL); 3414 } 3415 3416 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3417 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3418 3419 /* 3420 * For fixed length options, no sanity check 3421 * of passed in length is done. It is assumed *_optcom_req() 3422 * routines do the right thing. 3423 */ 3424 3425 switch (level) { 3426 case SOL_SOCKET: 3427 switch (name) { 3428 case SO_REUSEADDR: 3429 if (!checkonly) 3430 udp->udp_reuseaddr = onoff; 3431 break; 3432 case SO_DEBUG: 3433 if (!checkonly) 3434 udp->udp_debug = onoff; 3435 break; 3436 /* 3437 * The following three items are available here, 3438 * but are only meaningful to IP. 3439 */ 3440 case SO_DONTROUTE: 3441 if (!checkonly) 3442 udp->udp_dontroute = onoff; 3443 break; 3444 case SO_USELOOPBACK: 3445 if (!checkonly) 3446 udp->udp_useloopback = onoff; 3447 break; 3448 case SO_BROADCAST: 3449 if (!checkonly) 3450 udp->udp_broadcast = onoff; 3451 break; 3452 3453 case SO_SNDBUF: 3454 if (*i1 > udp_max_buf) { 3455 *outlenp = 0; 3456 return (ENOBUFS); 3457 } 3458 if (!checkonly) { 3459 q->q_hiwat = *i1; 3460 WR(UDP_RD(q))->q_hiwat = *i1; 3461 } 3462 break; 3463 case SO_RCVBUF: 3464 if (*i1 > udp_max_buf) { 3465 *outlenp = 0; 3466 return (ENOBUFS); 3467 } 3468 if (!checkonly) { 3469 RD(q)->q_hiwat = *i1; 3470 UDP_RD(q)->q_hiwat = *i1; 3471 (void) mi_set_sth_hiwat(UDP_RD(q), 3472 udp_set_rcv_hiwat(udp, *i1)); 3473 } 3474 break; 3475 case SO_DGRAM_ERRIND: 3476 if (!checkonly) 3477 udp->udp_dgram_errind = onoff; 3478 break; 3479 case SO_RECVUCRED: 3480 if (!checkonly) 3481 udp->udp_recvucred = onoff; 3482 break; 3483 case SO_ALLZONES: 3484 /* 3485 * "soft" error (negative) 3486 * option not handled at this level 3487 * Do not modify *outlenp. 3488 */ 3489 return (-EINVAL); 3490 case SO_TIMESTAMP: 3491 if (!checkonly) 3492 udp->udp_timestamp = onoff; 3493 break; 3494 case SO_ANON_MLP: 3495 if (!checkonly) 3496 udp->udp_anon_mlp = onoff; 3497 break; 3498 case SO_MAC_EXEMPT: 3499 if (secpolicy_net_mac_aware(cr) != 0 || 3500 udp->udp_state != TS_UNBND) 3501 return (EACCES); 3502 if (!checkonly) 3503 udp->udp_mac_exempt = onoff; 3504 break; 3505 case SCM_UCRED: { 3506 struct ucred_s *ucr; 3507 cred_t *cr, *newcr; 3508 ts_label_t *tsl; 3509 3510 /* 3511 * Only sockets that have proper privileges and are 3512 * bound to MLPs will have any other value here, so 3513 * this implicitly tests for privilege to set label. 3514 */ 3515 if (connp->conn_mlp_type == mlptSingle) 3516 break; 3517 ucr = (struct ucred_s *)invalp; 3518 if (inlen != ucredsize || 3519 ucr->uc_labeloff < sizeof (*ucr) || 3520 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3521 return (EINVAL); 3522 if (!checkonly) { 3523 mblk_t *mb; 3524 3525 if (attrs == NULL || 3526 (mb = attrs->udpattr_mb) == NULL) 3527 return (EINVAL); 3528 if ((cr = DB_CRED(mb)) == NULL) 3529 cr = udp->udp_connp->conn_cred; 3530 ASSERT(cr != NULL); 3531 if ((tsl = crgetlabel(cr)) == NULL) 3532 return (EINVAL); 3533 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3534 tsl->tsl_doi, KM_NOSLEEP); 3535 if (newcr == NULL) 3536 return (ENOSR); 3537 mblk_setcred(mb, newcr); 3538 attrs->udpattr_credset = B_TRUE; 3539 crfree(newcr); 3540 } 3541 break; 3542 } 3543 default: 3544 *outlenp = 0; 3545 return (EINVAL); 3546 } 3547 break; 3548 case IPPROTO_IP: 3549 if (udp->udp_family != AF_INET) { 3550 *outlenp = 0; 3551 return (ENOPROTOOPT); 3552 } 3553 switch (name) { 3554 case IP_OPTIONS: 3555 case T_IP_OPTIONS: 3556 /* Save options for use by IP. */ 3557 newlen = inlen + udp->udp_label_len; 3558 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3559 *outlenp = 0; 3560 return (EINVAL); 3561 } 3562 if (checkonly) 3563 break; 3564 3565 if (!tsol_option_set(&udp->udp_ip_snd_options, 3566 &udp->udp_ip_snd_options_len, 3567 udp->udp_label_len, invalp, inlen)) { 3568 *outlenp = 0; 3569 return (ENOMEM); 3570 } 3571 3572 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3573 UDPH_SIZE + udp->udp_ip_snd_options_len; 3574 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3575 udp_wroff_extra); 3576 break; 3577 3578 case IP_TTL: 3579 if (!checkonly) { 3580 udp->udp_ttl = (uchar_t)*i1; 3581 } 3582 break; 3583 case IP_TOS: 3584 case T_IP_TOS: 3585 if (!checkonly) { 3586 udp->udp_type_of_service = (uchar_t)*i1; 3587 } 3588 break; 3589 case IP_MULTICAST_IF: { 3590 /* 3591 * TODO should check OPTMGMT reply and undo this if 3592 * there is an error. 3593 */ 3594 struct in_addr *inap = (struct in_addr *)invalp; 3595 if (!checkonly) { 3596 udp->udp_multicast_if_addr = 3597 inap->s_addr; 3598 } 3599 break; 3600 } 3601 case IP_MULTICAST_TTL: 3602 if (!checkonly) 3603 udp->udp_multicast_ttl = *invalp; 3604 break; 3605 case IP_MULTICAST_LOOP: 3606 if (!checkonly) 3607 connp->conn_multicast_loop = *invalp; 3608 break; 3609 case IP_RECVOPTS: 3610 if (!checkonly) 3611 udp->udp_recvopts = onoff; 3612 break; 3613 case IP_RECVDSTADDR: 3614 if (!checkonly) 3615 udp->udp_recvdstaddr = onoff; 3616 break; 3617 case IP_RECVIF: 3618 if (!checkonly) 3619 udp->udp_recvif = onoff; 3620 break; 3621 case IP_RECVSLLA: 3622 if (!checkonly) 3623 udp->udp_recvslla = onoff; 3624 break; 3625 case IP_RECVTTL: 3626 if (!checkonly) 3627 udp->udp_recvttl = onoff; 3628 break; 3629 case IP_ADD_MEMBERSHIP: 3630 case IP_DROP_MEMBERSHIP: 3631 case IP_BLOCK_SOURCE: 3632 case IP_UNBLOCK_SOURCE: 3633 case IP_ADD_SOURCE_MEMBERSHIP: 3634 case IP_DROP_SOURCE_MEMBERSHIP: 3635 case MCAST_JOIN_GROUP: 3636 case MCAST_LEAVE_GROUP: 3637 case MCAST_BLOCK_SOURCE: 3638 case MCAST_UNBLOCK_SOURCE: 3639 case MCAST_JOIN_SOURCE_GROUP: 3640 case MCAST_LEAVE_SOURCE_GROUP: 3641 case IP_SEC_OPT: 3642 case IP_NEXTHOP: 3643 /* 3644 * "soft" error (negative) 3645 * option not handled at this level 3646 * Do not modify *outlenp. 3647 */ 3648 return (-EINVAL); 3649 case IP_BOUND_IF: 3650 if (!checkonly) 3651 udp->udp_bound_if = *i1; 3652 break; 3653 case IP_UNSPEC_SRC: 3654 if (!checkonly) 3655 udp->udp_unspec_source = onoff; 3656 break; 3657 case IP_XMIT_IF: 3658 if (!checkonly) 3659 udp->udp_xmit_if = *i1; 3660 break; 3661 default: 3662 *outlenp = 0; 3663 return (EINVAL); 3664 } 3665 break; 3666 case IPPROTO_IPV6: { 3667 ip6_pkt_t *ipp; 3668 boolean_t sticky; 3669 3670 if (udp->udp_family != AF_INET6) { 3671 *outlenp = 0; 3672 return (ENOPROTOOPT); 3673 } 3674 /* 3675 * Deal with both sticky options and ancillary data 3676 */ 3677 sticky = B_FALSE; 3678 if (attrs == NULL || (ipp = attrs->udpattr_ipp) == NULL) { 3679 /* sticky options, or none */ 3680 ipp = &udp->udp_sticky_ipp; 3681 sticky = B_TRUE; 3682 } 3683 3684 switch (name) { 3685 case IPV6_MULTICAST_IF: 3686 if (!checkonly) 3687 udp->udp_multicast_if_index = *i1; 3688 break; 3689 case IPV6_UNICAST_HOPS: 3690 /* -1 means use default */ 3691 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3692 *outlenp = 0; 3693 return (EINVAL); 3694 } 3695 if (!checkonly) { 3696 if (*i1 == -1) { 3697 udp->udp_ttl = ipp->ipp_unicast_hops = 3698 udp_ipv6_hoplimit; 3699 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3700 /* Pass modified value to IP. */ 3701 *i1 = udp->udp_ttl; 3702 } else { 3703 udp->udp_ttl = ipp->ipp_unicast_hops = 3704 (uint8_t)*i1; 3705 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3706 } 3707 /* Rebuild the header template */ 3708 error = udp_build_hdrs(q, udp); 3709 if (error != 0) { 3710 *outlenp = 0; 3711 return (error); 3712 } 3713 } 3714 break; 3715 case IPV6_MULTICAST_HOPS: 3716 /* -1 means use default */ 3717 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3718 *outlenp = 0; 3719 return (EINVAL); 3720 } 3721 if (!checkonly) { 3722 if (*i1 == -1) { 3723 udp->udp_multicast_ttl = 3724 ipp->ipp_multicast_hops = 3725 IP_DEFAULT_MULTICAST_TTL; 3726 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3727 /* Pass modified value to IP. */ 3728 *i1 = udp->udp_multicast_ttl; 3729 } else { 3730 udp->udp_multicast_ttl = 3731 ipp->ipp_multicast_hops = 3732 (uint8_t)*i1; 3733 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3734 } 3735 } 3736 break; 3737 case IPV6_MULTICAST_LOOP: 3738 if (*i1 != 0 && *i1 != 1) { 3739 *outlenp = 0; 3740 return (EINVAL); 3741 } 3742 if (!checkonly) 3743 connp->conn_multicast_loop = *i1; 3744 break; 3745 case IPV6_JOIN_GROUP: 3746 case IPV6_LEAVE_GROUP: 3747 case MCAST_JOIN_GROUP: 3748 case MCAST_LEAVE_GROUP: 3749 case MCAST_BLOCK_SOURCE: 3750 case MCAST_UNBLOCK_SOURCE: 3751 case MCAST_JOIN_SOURCE_GROUP: 3752 case MCAST_LEAVE_SOURCE_GROUP: 3753 /* 3754 * "soft" error (negative) 3755 * option not handled at this level 3756 * Note: Do not modify *outlenp 3757 */ 3758 return (-EINVAL); 3759 case IPV6_BOUND_IF: 3760 if (!checkonly) 3761 udp->udp_bound_if = *i1; 3762 break; 3763 case IPV6_UNSPEC_SRC: 3764 if (!checkonly) 3765 udp->udp_unspec_source = onoff; 3766 break; 3767 /* 3768 * Set boolean switches for ancillary data delivery 3769 */ 3770 case IPV6_RECVPKTINFO: 3771 if (!checkonly) 3772 udp->udp_ipv6_recvpktinfo = onoff; 3773 break; 3774 case IPV6_RECVTCLASS: 3775 if (!checkonly) { 3776 udp->udp_ipv6_recvtclass = onoff; 3777 } 3778 break; 3779 case IPV6_RECVPATHMTU: 3780 if (!checkonly) { 3781 udp->udp_ipv6_recvpathmtu = onoff; 3782 } 3783 break; 3784 case IPV6_RECVHOPLIMIT: 3785 if (!checkonly) 3786 udp->udp_ipv6_recvhoplimit = onoff; 3787 break; 3788 case IPV6_RECVHOPOPTS: 3789 if (!checkonly) 3790 udp->udp_ipv6_recvhopopts = onoff; 3791 break; 3792 case IPV6_RECVDSTOPTS: 3793 if (!checkonly) 3794 udp->udp_ipv6_recvdstopts = onoff; 3795 break; 3796 case _OLD_IPV6_RECVDSTOPTS: 3797 if (!checkonly) 3798 udp->udp_old_ipv6_recvdstopts = onoff; 3799 break; 3800 case IPV6_RECVRTHDRDSTOPTS: 3801 if (!checkonly) 3802 udp->udp_ipv6_recvrthdrdstopts = onoff; 3803 break; 3804 case IPV6_RECVRTHDR: 3805 if (!checkonly) 3806 udp->udp_ipv6_recvrthdr = onoff; 3807 break; 3808 /* 3809 * Set sticky options or ancillary data. 3810 * If sticky options, (re)build any extension headers 3811 * that might be needed as a result. 3812 */ 3813 case IPV6_PKTINFO: 3814 /* 3815 * The source address and ifindex are verified 3816 * in ip_opt_set(). For ancillary data the 3817 * source address is checked in ip_wput_v6. 3818 */ 3819 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3820 return (EINVAL); 3821 if (checkonly) 3822 break; 3823 3824 if (inlen == 0) { 3825 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3826 ipp->ipp_sticky_ignored |= 3827 (IPPF_IFINDEX|IPPF_ADDR); 3828 } else { 3829 struct in6_pktinfo *pkti; 3830 3831 pkti = (struct in6_pktinfo *)invalp; 3832 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3833 ipp->ipp_addr = pkti->ipi6_addr; 3834 if (ipp->ipp_ifindex != 0) 3835 ipp->ipp_fields |= IPPF_IFINDEX; 3836 else 3837 ipp->ipp_fields &= ~IPPF_IFINDEX; 3838 if (!IN6_IS_ADDR_UNSPECIFIED( 3839 &ipp->ipp_addr)) 3840 ipp->ipp_fields |= IPPF_ADDR; 3841 else 3842 ipp->ipp_fields &= ~IPPF_ADDR; 3843 } 3844 if (sticky) { 3845 error = udp_build_hdrs(q, udp); 3846 if (error != 0) 3847 return (error); 3848 } 3849 break; 3850 case IPV6_HOPLIMIT: 3851 if (sticky) 3852 return (EINVAL); 3853 if (inlen != 0 && inlen != sizeof (int)) 3854 return (EINVAL); 3855 if (checkonly) 3856 break; 3857 3858 if (inlen == 0) { 3859 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3860 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3861 } else { 3862 if (*i1 > 255 || *i1 < -1) 3863 return (EINVAL); 3864 if (*i1 == -1) 3865 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3866 else 3867 ipp->ipp_hoplimit = *i1; 3868 ipp->ipp_fields |= IPPF_HOPLIMIT; 3869 } 3870 break; 3871 case IPV6_TCLASS: 3872 if (inlen != 0 && inlen != sizeof (int)) 3873 return (EINVAL); 3874 if (checkonly) 3875 break; 3876 3877 if (inlen == 0) { 3878 ipp->ipp_fields &= ~IPPF_TCLASS; 3879 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3880 } else { 3881 if (*i1 > 255 || *i1 < -1) 3882 return (EINVAL); 3883 if (*i1 == -1) 3884 ipp->ipp_tclass = 0; 3885 else 3886 ipp->ipp_tclass = *i1; 3887 ipp->ipp_fields |= IPPF_TCLASS; 3888 } 3889 if (sticky) { 3890 error = udp_build_hdrs(q, udp); 3891 if (error != 0) 3892 return (error); 3893 } 3894 break; 3895 case IPV6_NEXTHOP: 3896 /* 3897 * IP will verify that the nexthop is reachable 3898 * and fail for sticky options. 3899 */ 3900 if (inlen != 0 && inlen != sizeof (sin6_t)) 3901 return (EINVAL); 3902 if (checkonly) 3903 break; 3904 3905 if (inlen == 0) { 3906 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3907 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3908 } else { 3909 sin6_t *sin6 = (sin6_t *)invalp; 3910 3911 if (sin6->sin6_family != AF_INET6) 3912 return (EAFNOSUPPORT); 3913 if (IN6_IS_ADDR_V4MAPPED( 3914 &sin6->sin6_addr)) 3915 return (EADDRNOTAVAIL); 3916 ipp->ipp_nexthop = sin6->sin6_addr; 3917 if (!IN6_IS_ADDR_UNSPECIFIED( 3918 &ipp->ipp_nexthop)) 3919 ipp->ipp_fields |= IPPF_NEXTHOP; 3920 else 3921 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3922 } 3923 if (sticky) { 3924 error = udp_build_hdrs(q, udp); 3925 if (error != 0) 3926 return (error); 3927 } 3928 break; 3929 case IPV6_HOPOPTS: { 3930 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3931 /* 3932 * Sanity checks - minimum size, size a multiple of 3933 * eight bytes, and matching size passed in. 3934 */ 3935 if (inlen != 0 && 3936 inlen != (8 * (hopts->ip6h_len + 1))) 3937 return (EINVAL); 3938 3939 if (checkonly) 3940 break; 3941 3942 error = optcom_pkt_set(invalp, inlen, sticky, 3943 (uchar_t **)&ipp->ipp_hopopts, 3944 &ipp->ipp_hopoptslen, 3945 sticky ? udp->udp_label_len_v6 : 0); 3946 if (error != 0) 3947 return (error); 3948 if (ipp->ipp_hopoptslen == 0) { 3949 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3950 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3951 } else { 3952 ipp->ipp_fields |= IPPF_HOPOPTS; 3953 } 3954 if (sticky) { 3955 error = udp_build_hdrs(q, udp); 3956 if (error != 0) 3957 return (error); 3958 } 3959 break; 3960 } 3961 case IPV6_RTHDRDSTOPTS: { 3962 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3963 3964 /* 3965 * Sanity checks - minimum size, size a multiple of 3966 * eight bytes, and matching size passed in. 3967 */ 3968 if (inlen != 0 && 3969 inlen != (8 * (dopts->ip6d_len + 1))) 3970 return (EINVAL); 3971 3972 if (checkonly) 3973 break; 3974 3975 if (inlen == 0) { 3976 if (sticky && 3977 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3978 kmem_free(ipp->ipp_rtdstopts, 3979 ipp->ipp_rtdstoptslen); 3980 ipp->ipp_rtdstopts = NULL; 3981 ipp->ipp_rtdstoptslen = 0; 3982 } 3983 3984 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3985 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3986 } else { 3987 error = optcom_pkt_set(invalp, inlen, sticky, 3988 (uchar_t **)&ipp->ipp_rtdstopts, 3989 &ipp->ipp_rtdstoptslen, 0); 3990 if (error != 0) 3991 return (error); 3992 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3993 } 3994 if (sticky) { 3995 error = udp_build_hdrs(q, udp); 3996 if (error != 0) 3997 return (error); 3998 } 3999 break; 4000 } 4001 case IPV6_DSTOPTS: { 4002 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4003 4004 /* 4005 * Sanity checks - minimum size, size a multiple of 4006 * eight bytes, and matching size passed in. 4007 */ 4008 if (inlen != 0 && 4009 inlen != (8 * (dopts->ip6d_len + 1))) 4010 return (EINVAL); 4011 4012 if (checkonly) 4013 break; 4014 4015 if (inlen == 0) { 4016 if (sticky && 4017 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4018 kmem_free(ipp->ipp_dstopts, 4019 ipp->ipp_dstoptslen); 4020 ipp->ipp_dstopts = NULL; 4021 ipp->ipp_dstoptslen = 0; 4022 } 4023 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4024 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4025 } else { 4026 error = optcom_pkt_set(invalp, inlen, sticky, 4027 (uchar_t **)&ipp->ipp_dstopts, 4028 &ipp->ipp_dstoptslen, 0); 4029 if (error != 0) 4030 return (error); 4031 ipp->ipp_fields |= IPPF_DSTOPTS; 4032 } 4033 if (sticky) { 4034 error = udp_build_hdrs(q, udp); 4035 if (error != 0) 4036 return (error); 4037 } 4038 break; 4039 } 4040 case IPV6_RTHDR: { 4041 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4042 4043 /* 4044 * Sanity checks - minimum size, size a multiple of 4045 * eight bytes, and matching size passed in. 4046 */ 4047 if (inlen != 0 && 4048 inlen != (8 * (rt->ip6r_len + 1))) 4049 return (EINVAL); 4050 4051 if (checkonly) 4052 break; 4053 4054 if (inlen == 0) { 4055 if (sticky && 4056 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4057 kmem_free(ipp->ipp_rthdr, 4058 ipp->ipp_rthdrlen); 4059 ipp->ipp_rthdr = NULL; 4060 ipp->ipp_rthdrlen = 0; 4061 } 4062 ipp->ipp_fields &= ~IPPF_RTHDR; 4063 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4064 } else { 4065 error = optcom_pkt_set(invalp, inlen, sticky, 4066 (uchar_t **)&ipp->ipp_rthdr, 4067 &ipp->ipp_rthdrlen, 0); 4068 if (error != 0) 4069 return (error); 4070 ipp->ipp_fields |= IPPF_RTHDR; 4071 } 4072 if (sticky) { 4073 error = udp_build_hdrs(q, udp); 4074 if (error != 0) 4075 return (error); 4076 } 4077 break; 4078 } 4079 4080 case IPV6_DONTFRAG: 4081 if (checkonly) 4082 break; 4083 4084 if (onoff) { 4085 ipp->ipp_fields |= IPPF_DONTFRAG; 4086 } else { 4087 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4088 } 4089 break; 4090 4091 case IPV6_USE_MIN_MTU: 4092 if (inlen != sizeof (int)) 4093 return (EINVAL); 4094 4095 if (*i1 < -1 || *i1 > 1) 4096 return (EINVAL); 4097 4098 if (checkonly) 4099 break; 4100 4101 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4102 ipp->ipp_use_min_mtu = *i1; 4103 break; 4104 4105 case IPV6_BOUND_PIF: 4106 case IPV6_SEC_OPT: 4107 case IPV6_DONTFAILOVER_IF: 4108 case IPV6_SRC_PREFERENCES: 4109 case IPV6_V6ONLY: 4110 /* Handled at the IP level */ 4111 return (-EINVAL); 4112 default: 4113 *outlenp = 0; 4114 return (EINVAL); 4115 } 4116 break; 4117 } /* end IPPROTO_IPV6 */ 4118 case IPPROTO_UDP: 4119 switch (name) { 4120 case UDP_ANONPRIVBIND: 4121 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4122 *outlenp = 0; 4123 return (error); 4124 } 4125 if (!checkonly) { 4126 udp->udp_anon_priv_bind = onoff; 4127 } 4128 break; 4129 case UDP_EXCLBIND: 4130 if (!checkonly) 4131 udp->udp_exclbind = onoff; 4132 break; 4133 case UDP_RCVHDR: 4134 if (!checkonly) 4135 udp->udp_rcvhdr = onoff; 4136 break; 4137 default: 4138 *outlenp = 0; 4139 return (EINVAL); 4140 } 4141 break; 4142 default: 4143 *outlenp = 0; 4144 return (EINVAL); 4145 } 4146 /* 4147 * Common case of OK return with outval same as inval. 4148 */ 4149 if (invalp != outvalp) { 4150 /* don't trust bcopy for identical src/dst */ 4151 (void) bcopy(invalp, outvalp, inlen); 4152 } 4153 *outlenp = inlen; 4154 return (0); 4155 } 4156 4157 /* 4158 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4159 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4160 * headers, and the udp header. 4161 * Returns failure if can't allocate memory. 4162 */ 4163 static int 4164 udp_build_hdrs(queue_t *q, udp_t *udp) 4165 { 4166 uchar_t *hdrs; 4167 uint_t hdrs_len; 4168 ip6_t *ip6h; 4169 ip6i_t *ip6i; 4170 udpha_t *udpha; 4171 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4172 4173 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4174 ASSERT(hdrs_len != 0); 4175 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4176 /* Need to reallocate */ 4177 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4178 if (hdrs == NULL) 4179 return (ENOMEM); 4180 4181 if (udp->udp_sticky_hdrs_len != 0) { 4182 kmem_free(udp->udp_sticky_hdrs, 4183 udp->udp_sticky_hdrs_len); 4184 } 4185 udp->udp_sticky_hdrs = hdrs; 4186 udp->udp_sticky_hdrs_len = hdrs_len; 4187 } 4188 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4189 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4190 4191 /* Set header fields not in ipp */ 4192 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4193 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4194 ip6h = (ip6_t *)&ip6i[1]; 4195 } else { 4196 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4197 } 4198 4199 if (!(ipp->ipp_fields & IPPF_ADDR)) 4200 ip6h->ip6_src = udp->udp_v6src; 4201 4202 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4203 udpha->uha_src_port = udp->udp_port; 4204 4205 /* Try to get everything in a single mblk */ 4206 if (hdrs_len > udp->udp_max_hdr_len) { 4207 udp->udp_max_hdr_len = hdrs_len; 4208 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4209 udp_wroff_extra); 4210 } 4211 return (0); 4212 } 4213 4214 /* 4215 * This routine retrieves the value of an ND variable in a udpparam_t 4216 * structure. It is called through nd_getset when a user reads the 4217 * variable. 4218 */ 4219 /* ARGSUSED */ 4220 static int 4221 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4222 { 4223 udpparam_t *udppa = (udpparam_t *)cp; 4224 4225 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4226 return (0); 4227 } 4228 4229 /* 4230 * Walk through the param array specified registering each element with the 4231 * named dispatch (ND) handler. 4232 */ 4233 static boolean_t 4234 udp_param_register(udpparam_t *udppa, int cnt) 4235 { 4236 for (; cnt-- > 0; udppa++) { 4237 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4238 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4239 udp_param_get, udp_param_set, 4240 (caddr_t)udppa)) { 4241 nd_free(&udp_g_nd); 4242 return (B_FALSE); 4243 } 4244 } 4245 } 4246 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4247 udp_extra_priv_ports_get, NULL, NULL)) { 4248 nd_free(&udp_g_nd); 4249 return (B_FALSE); 4250 } 4251 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4252 NULL, udp_extra_priv_ports_add, NULL)) { 4253 nd_free(&udp_g_nd); 4254 return (B_FALSE); 4255 } 4256 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4257 NULL, udp_extra_priv_ports_del, NULL)) { 4258 nd_free(&udp_g_nd); 4259 return (B_FALSE); 4260 } 4261 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4262 NULL)) { 4263 nd_free(&udp_g_nd); 4264 return (B_FALSE); 4265 } 4266 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4267 NULL)) { 4268 nd_free(&udp_g_nd); 4269 return (B_FALSE); 4270 } 4271 return (B_TRUE); 4272 } 4273 4274 /* This routine sets an ND variable in a udpparam_t structure. */ 4275 /* ARGSUSED */ 4276 static int 4277 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4278 { 4279 long new_value; 4280 udpparam_t *udppa = (udpparam_t *)cp; 4281 4282 /* 4283 * Fail the request if the new value does not lie within the 4284 * required bounds. 4285 */ 4286 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4287 new_value < udppa->udp_param_min || 4288 new_value > udppa->udp_param_max) { 4289 return (EINVAL); 4290 } 4291 4292 /* Set the new value */ 4293 udppa->udp_param_value = new_value; 4294 return (0); 4295 } 4296 4297 /* 4298 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4299 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4300 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4301 * then it's assumed to be allocated to be large enough. 4302 * 4303 * Returns zero if trimming of the security option causes all options to go 4304 * away. 4305 */ 4306 static size_t 4307 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4308 { 4309 struct T_opthdr *toh; 4310 size_t hol = ipp->ipp_hopoptslen; 4311 ip6_hbh_t *dstopt = NULL; 4312 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4313 size_t tlen, olen, plen; 4314 boolean_t deleting; 4315 const struct ip6_opt *sopt, *lastpad; 4316 struct ip6_opt *dopt; 4317 4318 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4319 toh->level = IPPROTO_IPV6; 4320 toh->name = IPV6_HOPOPTS; 4321 toh->status = 0; 4322 dstopt = (ip6_hbh_t *)(toh + 1); 4323 } 4324 4325 /* 4326 * If labeling is enabled, then skip the label option 4327 * but get other options if there are any. 4328 */ 4329 if (is_system_labeled()) { 4330 dopt = NULL; 4331 if (dstopt != NULL) { 4332 /* will fill in ip6h_len later */ 4333 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4334 dopt = (struct ip6_opt *)(dstopt + 1); 4335 } 4336 sopt = (const struct ip6_opt *)(srcopt + 1); 4337 hol -= sizeof (*srcopt); 4338 tlen = sizeof (*dstopt); 4339 lastpad = NULL; 4340 deleting = B_FALSE; 4341 /* 4342 * This loop finds the first (lastpad pointer) of any number of 4343 * pads that preceeds the security option, then treats the 4344 * security option as though it were a pad, and then finds the 4345 * next non-pad option (or end of list). 4346 * 4347 * It then treats the entire block as one big pad. To preserve 4348 * alignment of any options that follow, or just the end of the 4349 * list, it computes a minimal new padding size that keeps the 4350 * same alignment for the next option. 4351 * 4352 * If it encounters just a sequence of pads with no security 4353 * option, those are copied as-is rather than collapsed. 4354 * 4355 * Note that to handle the end of list case, the code makes one 4356 * loop with 'hol' set to zero. 4357 */ 4358 for (;;) { 4359 if (hol > 0) { 4360 if (sopt->ip6o_type == IP6OPT_PAD1) { 4361 if (lastpad == NULL) 4362 lastpad = sopt; 4363 sopt = (const struct ip6_opt *) 4364 &sopt->ip6o_len; 4365 hol--; 4366 continue; 4367 } 4368 olen = sopt->ip6o_len + sizeof (*sopt); 4369 if (olen > hol) 4370 olen = hol; 4371 if (sopt->ip6o_type == IP6OPT_PADN || 4372 sopt->ip6o_type == ip6opt_ls) { 4373 if (sopt->ip6o_type == ip6opt_ls) 4374 deleting = B_TRUE; 4375 if (lastpad == NULL) 4376 lastpad = sopt; 4377 sopt = (const struct ip6_opt *) 4378 ((const char *)sopt + olen); 4379 hol -= olen; 4380 continue; 4381 } 4382 } else { 4383 /* if nothing was copied at all, then delete */ 4384 if (tlen == sizeof (*dstopt)) 4385 return (0); 4386 /* last pass; pick up any trailing padding */ 4387 olen = 0; 4388 } 4389 if (deleting) { 4390 /* 4391 * compute aligning effect of deleted material 4392 * to reproduce with pad. 4393 */ 4394 plen = ((const char *)sopt - 4395 (const char *)lastpad) & 7; 4396 tlen += plen; 4397 if (dopt != NULL) { 4398 if (plen == 1) { 4399 dopt->ip6o_type = IP6OPT_PAD1; 4400 } else if (plen > 1) { 4401 plen -= sizeof (*dopt); 4402 dopt->ip6o_type = IP6OPT_PADN; 4403 dopt->ip6o_len = plen; 4404 if (plen > 0) 4405 bzero(dopt + 1, plen); 4406 } 4407 dopt = (struct ip6_opt *) 4408 ((char *)dopt + plen); 4409 } 4410 deleting = B_FALSE; 4411 lastpad = NULL; 4412 } 4413 /* if there's uncopied padding, then copy that now */ 4414 if (lastpad != NULL) { 4415 olen += (const char *)sopt - 4416 (const char *)lastpad; 4417 sopt = lastpad; 4418 lastpad = NULL; 4419 } 4420 if (dopt != NULL && olen > 0) { 4421 bcopy(sopt, dopt, olen); 4422 dopt = (struct ip6_opt *)((char *)dopt + olen); 4423 } 4424 if (hol == 0) 4425 break; 4426 tlen += olen; 4427 sopt = (const struct ip6_opt *) 4428 ((const char *)sopt + olen); 4429 hol -= olen; 4430 } 4431 /* go back and patch up the length value, rounded upward */ 4432 if (dstopt != NULL) 4433 dstopt->ip6h_len = (tlen - 1) >> 3; 4434 } else { 4435 tlen = hol; 4436 if (dstopt != NULL) 4437 bcopy(srcopt, dstopt, hol); 4438 } 4439 4440 tlen += sizeof (*toh); 4441 if (toh != NULL) 4442 toh->len = tlen; 4443 4444 return (tlen); 4445 } 4446 4447 static void 4448 udp_input(conn_t *connp, mblk_t *mp) 4449 { 4450 struct T_unitdata_ind *tudi; 4451 uchar_t *rptr; /* Pointer to IP header */ 4452 int hdr_length; /* Length of IP+UDP headers */ 4453 int udi_size; /* Size of T_unitdata_ind */ 4454 int mp_len; 4455 udp_t *udp; 4456 udpha_t *udpha; 4457 int ipversion; 4458 ip6_pkt_t ipp; 4459 ip6_t *ip6h; 4460 ip6i_t *ip6i; 4461 mblk_t *mp1; 4462 mblk_t *options_mp = NULL; 4463 in_pktinfo_t *pinfo = NULL; 4464 cred_t *cr = NULL; 4465 queue_t *q = connp->conn_rq; 4466 pid_t cpid; 4467 cred_t *rcr = connp->conn_cred; 4468 4469 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4470 "udp_rput_start: q %p mp %p", q, mp); 4471 4472 udp = connp->conn_udp; 4473 rptr = mp->b_rptr; 4474 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4475 ASSERT(OK_32PTR(rptr)); 4476 4477 /* 4478 * IP should have prepended the options data in an M_CTL 4479 * Check M_CTL "type" to make sure are not here bcos of 4480 * a valid ICMP message 4481 */ 4482 if (DB_TYPE(mp) == M_CTL) { 4483 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4484 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4485 IN_PKTINFO) { 4486 /* 4487 * IP_RECVIF or IP_RECVSLLA information has been 4488 * appended to the packet by IP. We need to 4489 * extract the mblk and adjust the rptr 4490 */ 4491 pinfo = (in_pktinfo_t *)mp->b_rptr; 4492 options_mp = mp; 4493 mp = mp->b_cont; 4494 rptr = mp->b_rptr; 4495 UDP_STAT(udp_in_pktinfo); 4496 } else { 4497 /* 4498 * ICMP messages. 4499 */ 4500 udp_icmp_error(q, mp); 4501 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4502 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4503 return; 4504 } 4505 } 4506 4507 mp_len = msgdsize(mp); 4508 /* 4509 * This is the inbound data path. 4510 * First, we check to make sure the IP version number is correct, 4511 * and then pull the IP and UDP headers into the first mblk. 4512 * Assume IP provides aligned packets - otherwise toss. 4513 * Also, check if we have a complete IP header. 4514 */ 4515 4516 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4517 ipp.ipp_fields = 0; 4518 4519 ipversion = IPH_HDR_VERSION(rptr); 4520 switch (ipversion) { 4521 case IPV4_VERSION: 4522 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4523 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4524 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4525 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4526 (udp->udp_ip_rcv_options_len)) { 4527 /* 4528 * Handle IPv4 packets with options outside of the 4529 * main data path. Not needed for AF_INET6 sockets 4530 * since they don't support a getsockopt of IP_OPTIONS. 4531 */ 4532 if (udp->udp_family == AF_INET6) 4533 break; 4534 /* 4535 * UDP length check performed for IPv4 packets with 4536 * options to check whether UDP length specified in 4537 * the header is the same as the physical length of 4538 * the packet. 4539 */ 4540 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4541 if (mp_len != (ntohs(udpha->uha_length) + 4542 hdr_length - UDPH_SIZE)) { 4543 goto tossit; 4544 } 4545 /* 4546 * Handle the case where the packet has IP options 4547 * and the IP_RECVSLLA & IP_RECVIF are set 4548 */ 4549 if (pinfo != NULL) 4550 mp = options_mp; 4551 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4552 SQTAG_UDP_INPUT); 4553 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4554 "udp_rput_end: q %p (%S)", q, "end"); 4555 return; 4556 } 4557 4558 /* Handle IPV6_RECVHOPLIMIT. */ 4559 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4560 udp->udp_ipv6_recvpktinfo) { 4561 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4562 ipp.ipp_fields |= IPPF_IFINDEX; 4563 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4564 } 4565 } 4566 break; 4567 case IPV6_VERSION: 4568 /* 4569 * IPv6 packets can only be received by applications 4570 * that are prepared to receive IPv6 addresses. 4571 * The IP fanout must ensure this. 4572 */ 4573 ASSERT(udp->udp_family == AF_INET6); 4574 4575 ip6h = (ip6_t *)rptr; 4576 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4577 4578 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4579 uint8_t nexthdrp; 4580 /* Look for ifindex information */ 4581 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4582 ip6i = (ip6i_t *)ip6h; 4583 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4584 goto tossit; 4585 4586 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4587 ASSERT(ip6i->ip6i_ifindex != 0); 4588 ipp.ipp_fields |= IPPF_IFINDEX; 4589 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4590 } 4591 rptr = (uchar_t *)&ip6i[1]; 4592 mp->b_rptr = rptr; 4593 if (rptr == mp->b_wptr) { 4594 mp1 = mp->b_cont; 4595 freeb(mp); 4596 mp = mp1; 4597 rptr = mp->b_rptr; 4598 } 4599 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4600 goto tossit; 4601 ip6h = (ip6_t *)rptr; 4602 mp_len = msgdsize(mp); 4603 } 4604 /* 4605 * Find any potentially interesting extension headers 4606 * as well as the length of the IPv6 + extension 4607 * headers. 4608 */ 4609 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4610 UDPH_SIZE; 4611 ASSERT(nexthdrp == IPPROTO_UDP); 4612 } else { 4613 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4614 ip6i = NULL; 4615 } 4616 break; 4617 default: 4618 ASSERT(0); 4619 } 4620 4621 /* 4622 * IP inspected the UDP header thus all of it must be in the mblk. 4623 * UDP length check is performed for IPv6 packets and IPv4 packets 4624 * without options to check if the size of the packet as specified 4625 * by the header is the same as the physical size of the packet. 4626 */ 4627 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4628 if ((MBLKL(mp) < hdr_length) || 4629 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4630 goto tossit; 4631 } 4632 4633 /* Walk past the headers. */ 4634 if (!udp->udp_rcvhdr) { 4635 mp->b_rptr = rptr + hdr_length; 4636 mp_len -= hdr_length; 4637 } 4638 4639 /* 4640 * This is the inbound data path. Packets are passed upstream as 4641 * T_UNITDATA_IND messages with full IP headers still attached. 4642 */ 4643 if (udp->udp_family == AF_INET) { 4644 sin_t *sin; 4645 4646 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4647 4648 /* 4649 * Normally only send up the address. 4650 * If IP_RECVDSTADDR is set we include the destination IP 4651 * address as an option. With IP_RECVOPTS we include all 4652 * the IP options. Only ip_rput_other() handles packets 4653 * that contain IP options. 4654 */ 4655 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4656 if (udp->udp_recvdstaddr) { 4657 udi_size += sizeof (struct T_opthdr) + 4658 sizeof (struct in_addr); 4659 UDP_STAT(udp_in_recvdstaddr); 4660 } 4661 4662 /* 4663 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4664 * space accordingly 4665 */ 4666 if (udp->udp_recvif && (pinfo != NULL) && 4667 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4668 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4669 UDP_STAT(udp_in_recvif); 4670 } 4671 4672 if (udp->udp_recvslla && (pinfo != NULL) && 4673 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4674 udi_size += sizeof (struct T_opthdr) + 4675 sizeof (struct sockaddr_dl); 4676 UDP_STAT(udp_in_recvslla); 4677 } 4678 4679 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4680 udi_size += sizeof (struct T_opthdr) + ucredsize; 4681 cpid = DB_CPID(mp); 4682 UDP_STAT(udp_in_recvucred); 4683 } 4684 /* 4685 * If IP_RECVTTL is set allocate the appropriate sized buffer 4686 */ 4687 if (udp->udp_recvttl) { 4688 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4689 UDP_STAT(udp_in_recvttl); 4690 } 4691 /* 4692 * If SO_TIMESTAMP is set allocate the appropriate sized 4693 * buffer. Since gethrestime() expects a pointer aligned 4694 * argument, we allocate space necessary for extra 4695 * alignment (even though it might not be used). 4696 */ 4697 if (udp->udp_timestamp) { 4698 udi_size += sizeof (struct T_opthdr) + 4699 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4700 UDP_STAT(udp_in_timestamp); 4701 } 4702 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4703 4704 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4705 mp1 = allocb(udi_size, BPRI_MED); 4706 if (mp1 == NULL) { 4707 freemsg(mp); 4708 if (options_mp != NULL) 4709 freeb(options_mp); 4710 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4711 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4712 BUMP_MIB(&udp_mib, udpInErrors); 4713 return; 4714 } 4715 mp1->b_cont = mp; 4716 mp = mp1; 4717 mp->b_datap->db_type = M_PROTO; 4718 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4719 mp->b_wptr = (uchar_t *)tudi + udi_size; 4720 tudi->PRIM_type = T_UNITDATA_IND; 4721 tudi->SRC_length = sizeof (sin_t); 4722 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4723 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4724 sizeof (sin_t); 4725 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4726 tudi->OPT_length = udi_size; 4727 sin = (sin_t *)&tudi[1]; 4728 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4729 sin->sin_port = udpha->uha_src_port; 4730 sin->sin_family = udp->udp_family; 4731 *(uint32_t *)&sin->sin_zero[0] = 0; 4732 *(uint32_t *)&sin->sin_zero[4] = 0; 4733 4734 /* 4735 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4736 * IP_RECVTTL has been set. 4737 */ 4738 if (udi_size != 0) { 4739 /* 4740 * Copy in destination address before options to avoid 4741 * any padding issues. 4742 */ 4743 char *dstopt; 4744 4745 dstopt = (char *)&sin[1]; 4746 if (udp->udp_recvdstaddr) { 4747 struct T_opthdr *toh; 4748 ipaddr_t *dstptr; 4749 4750 toh = (struct T_opthdr *)dstopt; 4751 toh->level = IPPROTO_IP; 4752 toh->name = IP_RECVDSTADDR; 4753 toh->len = sizeof (struct T_opthdr) + 4754 sizeof (ipaddr_t); 4755 toh->status = 0; 4756 dstopt += sizeof (struct T_opthdr); 4757 dstptr = (ipaddr_t *)dstopt; 4758 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4759 dstopt += sizeof (ipaddr_t); 4760 udi_size -= toh->len; 4761 } 4762 4763 if (udp->udp_recvslla && (pinfo != NULL) && 4764 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4765 4766 struct T_opthdr *toh; 4767 struct sockaddr_dl *dstptr; 4768 4769 toh = (struct T_opthdr *)dstopt; 4770 toh->level = IPPROTO_IP; 4771 toh->name = IP_RECVSLLA; 4772 toh->len = sizeof (struct T_opthdr) + 4773 sizeof (struct sockaddr_dl); 4774 toh->status = 0; 4775 dstopt += sizeof (struct T_opthdr); 4776 dstptr = (struct sockaddr_dl *)dstopt; 4777 bcopy(&pinfo->in_pkt_slla, dstptr, 4778 sizeof (struct sockaddr_dl)); 4779 dstopt += sizeof (struct sockaddr_dl); 4780 udi_size -= toh->len; 4781 } 4782 4783 if (udp->udp_recvif && (pinfo != NULL) && 4784 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4785 4786 struct T_opthdr *toh; 4787 uint_t *dstptr; 4788 4789 toh = (struct T_opthdr *)dstopt; 4790 toh->level = IPPROTO_IP; 4791 toh->name = IP_RECVIF; 4792 toh->len = sizeof (struct T_opthdr) + 4793 sizeof (uint_t); 4794 toh->status = 0; 4795 dstopt += sizeof (struct T_opthdr); 4796 dstptr = (uint_t *)dstopt; 4797 *dstptr = pinfo->in_pkt_ifindex; 4798 dstopt += sizeof (uint_t); 4799 udi_size -= toh->len; 4800 } 4801 4802 if (cr != NULL) { 4803 struct T_opthdr *toh; 4804 4805 toh = (struct T_opthdr *)dstopt; 4806 toh->level = SOL_SOCKET; 4807 toh->name = SCM_UCRED; 4808 toh->len = sizeof (struct T_opthdr) + ucredsize; 4809 toh->status = 0; 4810 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4811 dstopt += toh->len; 4812 udi_size -= toh->len; 4813 } 4814 4815 if (udp->udp_recvttl) { 4816 struct T_opthdr *toh; 4817 uint8_t *dstptr; 4818 4819 toh = (struct T_opthdr *)dstopt; 4820 toh->level = IPPROTO_IP; 4821 toh->name = IP_RECVTTL; 4822 toh->len = sizeof (struct T_opthdr) + 4823 sizeof (uint8_t); 4824 toh->status = 0; 4825 dstopt += sizeof (struct T_opthdr); 4826 dstptr = (uint8_t *)dstopt; 4827 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4828 dstopt += sizeof (uint8_t); 4829 udi_size -= toh->len; 4830 } 4831 if (udp->udp_timestamp) { 4832 struct T_opthdr *toh; 4833 4834 toh = (struct T_opthdr *)dstopt; 4835 toh->level = SOL_SOCKET; 4836 toh->name = SCM_TIMESTAMP; 4837 toh->len = sizeof (struct T_opthdr) + 4838 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4839 toh->status = 0; 4840 dstopt += sizeof (struct T_opthdr); 4841 /* Align for gethrestime() */ 4842 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4843 sizeof (intptr_t)); 4844 gethrestime((timestruc_t *)dstopt); 4845 dstopt += sizeof (timestruc_t); 4846 udi_size -= toh->len; 4847 } 4848 4849 /* Consumed all of allocated space */ 4850 ASSERT(udi_size == 0); 4851 } 4852 } else { 4853 sin6_t *sin6; 4854 4855 /* 4856 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4857 * 4858 * Normally we only send up the address. If receiving of any 4859 * optional receive side information is enabled, we also send 4860 * that up as options. 4861 * [ Only udp_rput_other() handles packets that contain IP 4862 * options so code to account for does not appear immediately 4863 * below but elsewhere ] 4864 */ 4865 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4866 4867 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4868 IPPF_RTHDR|IPPF_IFINDEX)) { 4869 if (udp->udp_ipv6_recvhopopts && 4870 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4871 size_t hlen; 4872 4873 UDP_STAT(udp_in_recvhopopts); 4874 hlen = copy_hop_opts(&ipp, NULL); 4875 if (hlen == 0) 4876 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4877 udi_size += hlen; 4878 } 4879 if ((udp->udp_ipv6_recvdstopts || 4880 udp->udp_old_ipv6_recvdstopts) && 4881 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4882 udi_size += sizeof (struct T_opthdr) + 4883 ipp.ipp_dstoptslen; 4884 UDP_STAT(udp_in_recvdstopts); 4885 } 4886 if (((udp->udp_ipv6_recvdstopts && 4887 udp->udp_ipv6_recvrthdr && 4888 (ipp.ipp_fields & IPPF_RTHDR)) || 4889 udp->udp_ipv6_recvrthdrdstopts) && 4890 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4891 udi_size += sizeof (struct T_opthdr) + 4892 ipp.ipp_rtdstoptslen; 4893 UDP_STAT(udp_in_recvrtdstopts); 4894 } 4895 if (udp->udp_ipv6_recvrthdr && 4896 (ipp.ipp_fields & IPPF_RTHDR)) { 4897 udi_size += sizeof (struct T_opthdr) + 4898 ipp.ipp_rthdrlen; 4899 UDP_STAT(udp_in_recvrthdr); 4900 } 4901 if (udp->udp_ipv6_recvpktinfo && 4902 (ipp.ipp_fields & IPPF_IFINDEX)) { 4903 udi_size += sizeof (struct T_opthdr) + 4904 sizeof (struct in6_pktinfo); 4905 UDP_STAT(udp_in_recvpktinfo); 4906 } 4907 4908 } 4909 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4910 udi_size += sizeof (struct T_opthdr) + ucredsize; 4911 cpid = DB_CPID(mp); 4912 UDP_STAT(udp_in_recvucred); 4913 } 4914 4915 if (udp->udp_ipv6_recvhoplimit) { 4916 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4917 UDP_STAT(udp_in_recvhoplimit); 4918 } 4919 4920 if (udp->udp_ipv6_recvtclass) { 4921 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4922 UDP_STAT(udp_in_recvtclass); 4923 } 4924 4925 mp1 = allocb(udi_size, BPRI_MED); 4926 if (mp1 == NULL) { 4927 freemsg(mp); 4928 if (options_mp != NULL) 4929 freeb(options_mp); 4930 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4931 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4932 BUMP_MIB(&udp_mib, udpInErrors); 4933 return; 4934 } 4935 mp1->b_cont = mp; 4936 mp = mp1; 4937 mp->b_datap->db_type = M_PROTO; 4938 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4939 mp->b_wptr = (uchar_t *)tudi + udi_size; 4940 tudi->PRIM_type = T_UNITDATA_IND; 4941 tudi->SRC_length = sizeof (sin6_t); 4942 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4943 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4944 sizeof (sin6_t); 4945 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4946 tudi->OPT_length = udi_size; 4947 sin6 = (sin6_t *)&tudi[1]; 4948 if (ipversion == IPV4_VERSION) { 4949 in6_addr_t v6dst; 4950 4951 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4952 &sin6->sin6_addr); 4953 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4954 &v6dst); 4955 sin6->sin6_flowinfo = 0; 4956 sin6->sin6_scope_id = 0; 4957 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4958 connp->conn_zoneid); 4959 } else { 4960 sin6->sin6_addr = ip6h->ip6_src; 4961 /* No sin6_flowinfo per API */ 4962 sin6->sin6_flowinfo = 0; 4963 /* For link-scope source pass up scope id */ 4964 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4965 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4966 sin6->sin6_scope_id = ipp.ipp_ifindex; 4967 else 4968 sin6->sin6_scope_id = 0; 4969 sin6->__sin6_src_id = ip_srcid_find_addr( 4970 &ip6h->ip6_dst, connp->conn_zoneid); 4971 } 4972 sin6->sin6_port = udpha->uha_src_port; 4973 sin6->sin6_family = udp->udp_family; 4974 4975 if (udi_size != 0) { 4976 uchar_t *dstopt; 4977 4978 dstopt = (uchar_t *)&sin6[1]; 4979 if (udp->udp_ipv6_recvpktinfo && 4980 (ipp.ipp_fields & IPPF_IFINDEX)) { 4981 struct T_opthdr *toh; 4982 struct in6_pktinfo *pkti; 4983 4984 toh = (struct T_opthdr *)dstopt; 4985 toh->level = IPPROTO_IPV6; 4986 toh->name = IPV6_PKTINFO; 4987 toh->len = sizeof (struct T_opthdr) + 4988 sizeof (*pkti); 4989 toh->status = 0; 4990 dstopt += sizeof (struct T_opthdr); 4991 pkti = (struct in6_pktinfo *)dstopt; 4992 if (ipversion == IPV6_VERSION) 4993 pkti->ipi6_addr = ip6h->ip6_dst; 4994 else 4995 IN6_IPADDR_TO_V4MAPPED( 4996 ((ipha_t *)rptr)->ipha_dst, 4997 &pkti->ipi6_addr); 4998 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4999 dstopt += sizeof (*pkti); 5000 udi_size -= toh->len; 5001 } 5002 if (udp->udp_ipv6_recvhoplimit) { 5003 struct T_opthdr *toh; 5004 5005 toh = (struct T_opthdr *)dstopt; 5006 toh->level = IPPROTO_IPV6; 5007 toh->name = IPV6_HOPLIMIT; 5008 toh->len = sizeof (struct T_opthdr) + 5009 sizeof (uint_t); 5010 toh->status = 0; 5011 dstopt += sizeof (struct T_opthdr); 5012 if (ipversion == IPV6_VERSION) 5013 *(uint_t *)dstopt = ip6h->ip6_hops; 5014 else 5015 *(uint_t *)dstopt = 5016 ((ipha_t *)rptr)->ipha_ttl; 5017 dstopt += sizeof (uint_t); 5018 udi_size -= toh->len; 5019 } 5020 if (udp->udp_ipv6_recvtclass) { 5021 struct T_opthdr *toh; 5022 5023 toh = (struct T_opthdr *)dstopt; 5024 toh->level = IPPROTO_IPV6; 5025 toh->name = IPV6_TCLASS; 5026 toh->len = sizeof (struct T_opthdr) + 5027 sizeof (uint_t); 5028 toh->status = 0; 5029 dstopt += sizeof (struct T_opthdr); 5030 if (ipversion == IPV6_VERSION) { 5031 *(uint_t *)dstopt = 5032 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5033 } else { 5034 ipha_t *ipha = (ipha_t *)rptr; 5035 *(uint_t *)dstopt = 5036 ipha->ipha_type_of_service; 5037 } 5038 dstopt += sizeof (uint_t); 5039 udi_size -= toh->len; 5040 } 5041 if (udp->udp_ipv6_recvhopopts && 5042 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5043 size_t hlen; 5044 5045 hlen = copy_hop_opts(&ipp, dstopt); 5046 dstopt += hlen; 5047 udi_size -= hlen; 5048 } 5049 if (udp->udp_ipv6_recvdstopts && 5050 udp->udp_ipv6_recvrthdr && 5051 (ipp.ipp_fields & IPPF_RTHDR) && 5052 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5053 struct T_opthdr *toh; 5054 5055 toh = (struct T_opthdr *)dstopt; 5056 toh->level = IPPROTO_IPV6; 5057 toh->name = IPV6_DSTOPTS; 5058 toh->len = sizeof (struct T_opthdr) + 5059 ipp.ipp_rtdstoptslen; 5060 toh->status = 0; 5061 dstopt += sizeof (struct T_opthdr); 5062 bcopy(ipp.ipp_rtdstopts, dstopt, 5063 ipp.ipp_rtdstoptslen); 5064 dstopt += ipp.ipp_rtdstoptslen; 5065 udi_size -= toh->len; 5066 } 5067 if (udp->udp_ipv6_recvrthdr && 5068 (ipp.ipp_fields & IPPF_RTHDR)) { 5069 struct T_opthdr *toh; 5070 5071 toh = (struct T_opthdr *)dstopt; 5072 toh->level = IPPROTO_IPV6; 5073 toh->name = IPV6_RTHDR; 5074 toh->len = sizeof (struct T_opthdr) + 5075 ipp.ipp_rthdrlen; 5076 toh->status = 0; 5077 dstopt += sizeof (struct T_opthdr); 5078 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5079 dstopt += ipp.ipp_rthdrlen; 5080 udi_size -= toh->len; 5081 } 5082 if (udp->udp_ipv6_recvdstopts && 5083 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5084 struct T_opthdr *toh; 5085 5086 toh = (struct T_opthdr *)dstopt; 5087 toh->level = IPPROTO_IPV6; 5088 toh->name = IPV6_DSTOPTS; 5089 toh->len = sizeof (struct T_opthdr) + 5090 ipp.ipp_dstoptslen; 5091 toh->status = 0; 5092 dstopt += sizeof (struct T_opthdr); 5093 bcopy(ipp.ipp_dstopts, dstopt, 5094 ipp.ipp_dstoptslen); 5095 dstopt += ipp.ipp_dstoptslen; 5096 udi_size -= toh->len; 5097 } 5098 5099 if (cr != NULL) { 5100 struct T_opthdr *toh; 5101 5102 toh = (struct T_opthdr *)dstopt; 5103 toh->level = SOL_SOCKET; 5104 toh->name = SCM_UCRED; 5105 toh->len = sizeof (struct T_opthdr) + ucredsize; 5106 toh->status = 0; 5107 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5108 dstopt += toh->len; 5109 udi_size -= toh->len; 5110 } 5111 /* Consumed all of allocated space */ 5112 ASSERT(udi_size == 0); 5113 } 5114 #undef sin6 5115 /* No IP_RECVDSTADDR for IPv6. */ 5116 } 5117 5118 BUMP_MIB(&udp_mib, udpInDatagrams); 5119 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5120 "udp_rput_end: q %p (%S)", q, "end"); 5121 if (options_mp != NULL) 5122 freeb(options_mp); 5123 5124 if (udp->udp_direct_sockfs) { 5125 /* 5126 * There is nothing above us except for the stream head; 5127 * use the read-side synchronous stream interface in 5128 * order to reduce the time spent in interrupt thread. 5129 */ 5130 ASSERT(udp->udp_issocket); 5131 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5132 } else { 5133 /* 5134 * Use regular STREAMS interface to pass data upstream 5135 * if this is not a socket endpoint, or if we have 5136 * switched over to the slow mode due to sockmod being 5137 * popped or a module being pushed on top of us. 5138 */ 5139 putnext(UDP_RD(q), mp); 5140 } 5141 return; 5142 5143 tossit: 5144 freemsg(mp); 5145 if (options_mp != NULL) 5146 freeb(options_mp); 5147 BUMP_MIB(&udp_mib, udpInErrors); 5148 } 5149 5150 void 5151 udp_conn_recv(conn_t *connp, mblk_t *mp) 5152 { 5153 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5154 } 5155 5156 /* ARGSUSED */ 5157 static void 5158 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5159 { 5160 udp_input((conn_t *)arg, mp); 5161 _UDP_EXIT((conn_t *)arg); 5162 } 5163 5164 /* 5165 * Process non-M_DATA messages as well as M_DATA messages that requires 5166 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5167 */ 5168 static void 5169 udp_rput_other(queue_t *q, mblk_t *mp) 5170 { 5171 struct T_unitdata_ind *tudi; 5172 mblk_t *mp1; 5173 uchar_t *rptr; 5174 uchar_t *new_rptr; 5175 int hdr_length; 5176 int udi_size; /* Size of T_unitdata_ind */ 5177 int opt_len; /* Length of IP options */ 5178 sin_t *sin; 5179 struct T_error_ack *tea; 5180 mblk_t *options_mp = NULL; 5181 in_pktinfo_t *pinfo; 5182 boolean_t recv_on = B_FALSE; 5183 cred_t *cr = NULL; 5184 udp_t *udp = Q_TO_UDP(q); 5185 pid_t cpid; 5186 cred_t *rcr = udp->udp_connp->conn_cred; 5187 5188 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5189 "udp_rput_other: q %p mp %p", q, mp); 5190 5191 ASSERT(OK_32PTR(mp->b_rptr)); 5192 rptr = mp->b_rptr; 5193 5194 switch (mp->b_datap->db_type) { 5195 case M_CTL: 5196 /* 5197 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5198 */ 5199 recv_on = B_TRUE; 5200 options_mp = mp; 5201 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 5202 5203 /* 5204 * The actual data is in mp->b_cont 5205 */ 5206 mp = mp->b_cont; 5207 ASSERT(OK_32PTR(mp->b_rptr)); 5208 rptr = mp->b_rptr; 5209 break; 5210 case M_DATA: 5211 /* 5212 * M_DATA messages contain IPv4 datagrams. They are handled 5213 * after this switch. 5214 */ 5215 break; 5216 case M_PROTO: 5217 case M_PCPROTO: 5218 /* M_PROTO messages contain some type of TPI message. */ 5219 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5220 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5221 freemsg(mp); 5222 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5223 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5224 return; 5225 } 5226 tea = (struct T_error_ack *)rptr; 5227 5228 switch (tea->PRIM_type) { 5229 case T_ERROR_ACK: 5230 switch (tea->ERROR_prim) { 5231 case O_T_BIND_REQ: 5232 case T_BIND_REQ: { 5233 /* 5234 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5235 * clear out the associated port and source 5236 * address before passing the message 5237 * upstream. If this was caused by a T_CONN_REQ 5238 * revert back to bound state. 5239 */ 5240 udp_fanout_t *udpf; 5241 5242 udpf = &udp_bind_fanout[ 5243 UDP_BIND_HASH(udp->udp_port)]; 5244 mutex_enter(&udpf->uf_lock); 5245 if (udp->udp_state == TS_DATA_XFER) { 5246 /* Connect failed */ 5247 tea->ERROR_prim = T_CONN_REQ; 5248 /* Revert back to the bound source */ 5249 udp->udp_v6src = udp->udp_bound_v6src; 5250 udp->udp_state = TS_IDLE; 5251 mutex_exit(&udpf->uf_lock); 5252 if (udp->udp_family == AF_INET6) 5253 (void) udp_build_hdrs(q, udp); 5254 break; 5255 } 5256 5257 if (udp->udp_discon_pending) { 5258 tea->ERROR_prim = T_DISCON_REQ; 5259 udp->udp_discon_pending = 0; 5260 } 5261 V6_SET_ZERO(udp->udp_v6src); 5262 V6_SET_ZERO(udp->udp_bound_v6src); 5263 udp->udp_state = TS_UNBND; 5264 udp_bind_hash_remove(udp, B_TRUE); 5265 udp->udp_port = 0; 5266 mutex_exit(&udpf->uf_lock); 5267 if (udp->udp_family == AF_INET6) 5268 (void) udp_build_hdrs(q, udp); 5269 break; 5270 } 5271 default: 5272 break; 5273 } 5274 break; 5275 case T_BIND_ACK: 5276 udp_rput_bind_ack(q, mp); 5277 return; 5278 5279 case T_OPTMGMT_ACK: 5280 case T_OK_ACK: 5281 break; 5282 default: 5283 freemsg(mp); 5284 return; 5285 } 5286 putnext(UDP_RD(q), mp); 5287 return; 5288 } 5289 5290 /* 5291 * This is the inbound data path. 5292 * First, we make sure the data contains both IP and UDP headers. 5293 * 5294 * This handle IPv4 packets for only AF_INET sockets. 5295 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5296 * is no need saving the options. 5297 */ 5298 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5299 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5300 if (mp->b_wptr - rptr < hdr_length) { 5301 if (!pullupmsg(mp, hdr_length)) { 5302 freemsg(mp); 5303 if (options_mp != NULL) 5304 freeb(options_mp); 5305 BUMP_MIB(&udp_mib, udpInErrors); 5306 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5307 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5308 BUMP_MIB(&udp_mib, udpInErrors); 5309 return; 5310 } 5311 rptr = mp->b_rptr; 5312 } 5313 /* Walk past the headers. */ 5314 new_rptr = rptr + hdr_length; 5315 if (!udp->udp_rcvhdr) 5316 mp->b_rptr = new_rptr; 5317 5318 /* Save the options if any */ 5319 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5320 if (opt_len > 0) { 5321 if (opt_len > udp->udp_ip_rcv_options_len) { 5322 if (udp->udp_ip_rcv_options_len) 5323 mi_free((char *)udp->udp_ip_rcv_options); 5324 udp->udp_ip_rcv_options_len = 0; 5325 udp->udp_ip_rcv_options = 5326 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5327 if (udp->udp_ip_rcv_options) 5328 udp->udp_ip_rcv_options_len = opt_len; 5329 } 5330 if (udp->udp_ip_rcv_options_len) { 5331 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5332 udp->udp_ip_rcv_options, opt_len); 5333 /* Adjust length if we are resusing the space */ 5334 udp->udp_ip_rcv_options_len = opt_len; 5335 } 5336 } else if (udp->udp_ip_rcv_options_len) { 5337 mi_free((char *)udp->udp_ip_rcv_options); 5338 udp->udp_ip_rcv_options = NULL; 5339 udp->udp_ip_rcv_options_len = 0; 5340 } 5341 5342 /* 5343 * Normally only send up the address. 5344 * If IP_RECVDSTADDR is set we include the destination IP 5345 * address as an option. With IP_RECVOPTS we include all 5346 * the IP options. 5347 */ 5348 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5349 if (udp->udp_recvdstaddr) { 5350 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5351 UDP_STAT(udp_in_recvdstaddr); 5352 } 5353 if (udp->udp_recvopts && opt_len > 0) { 5354 udi_size += sizeof (struct T_opthdr) + opt_len; 5355 UDP_STAT(udp_in_recvopts); 5356 } 5357 5358 /* 5359 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5360 * space accordingly 5361 */ 5362 if (udp->udp_recvif && recv_on && 5363 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5364 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5365 UDP_STAT(udp_in_recvif); 5366 } 5367 5368 if (udp->udp_recvslla && recv_on && 5369 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5370 udi_size += sizeof (struct T_opthdr) + 5371 sizeof (struct sockaddr_dl); 5372 UDP_STAT(udp_in_recvslla); 5373 } 5374 5375 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5376 udi_size += sizeof (struct T_opthdr) + ucredsize; 5377 cpid = DB_CPID(mp); 5378 UDP_STAT(udp_in_recvucred); 5379 } 5380 /* 5381 * If IP_RECVTTL is set allocate the appropriate sized buffer 5382 */ 5383 if (udp->udp_recvttl) { 5384 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5385 UDP_STAT(udp_in_recvttl); 5386 } 5387 5388 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5389 mp1 = allocb(udi_size, BPRI_MED); 5390 if (mp1 == NULL) { 5391 freemsg(mp); 5392 if (options_mp != NULL) 5393 freeb(options_mp); 5394 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5395 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5396 BUMP_MIB(&udp_mib, udpInErrors); 5397 return; 5398 } 5399 mp1->b_cont = mp; 5400 mp = mp1; 5401 mp->b_datap->db_type = M_PROTO; 5402 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5403 mp->b_wptr = (uchar_t *)tudi + udi_size; 5404 tudi->PRIM_type = T_UNITDATA_IND; 5405 tudi->SRC_length = sizeof (sin_t); 5406 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5407 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5408 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5409 tudi->OPT_length = udi_size; 5410 5411 sin = (sin_t *)&tudi[1]; 5412 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5413 sin->sin_port = ((in_port_t *) 5414 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5415 sin->sin_family = AF_INET; 5416 *(uint32_t *)&sin->sin_zero[0] = 0; 5417 *(uint32_t *)&sin->sin_zero[4] = 0; 5418 5419 /* 5420 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5421 * IP_RECVTTL has been set. 5422 */ 5423 if (udi_size != 0) { 5424 /* 5425 * Copy in destination address before options to avoid any 5426 * padding issues. 5427 */ 5428 char *dstopt; 5429 5430 dstopt = (char *)&sin[1]; 5431 if (udp->udp_recvdstaddr) { 5432 struct T_opthdr *toh; 5433 ipaddr_t *dstptr; 5434 5435 toh = (struct T_opthdr *)dstopt; 5436 toh->level = IPPROTO_IP; 5437 toh->name = IP_RECVDSTADDR; 5438 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5439 toh->status = 0; 5440 dstopt += sizeof (struct T_opthdr); 5441 dstptr = (ipaddr_t *)dstopt; 5442 *dstptr = (((ipaddr_t *)rptr)[4]); 5443 dstopt += sizeof (ipaddr_t); 5444 udi_size -= toh->len; 5445 } 5446 if (udp->udp_recvopts && udi_size != 0) { 5447 struct T_opthdr *toh; 5448 5449 toh = (struct T_opthdr *)dstopt; 5450 toh->level = IPPROTO_IP; 5451 toh->name = IP_RECVOPTS; 5452 toh->len = sizeof (struct T_opthdr) + opt_len; 5453 toh->status = 0; 5454 dstopt += sizeof (struct T_opthdr); 5455 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5456 dstopt += opt_len; 5457 udi_size -= toh->len; 5458 } 5459 5460 if (udp->udp_recvslla && recv_on && 5461 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5462 5463 struct T_opthdr *toh; 5464 struct sockaddr_dl *dstptr; 5465 5466 toh = (struct T_opthdr *)dstopt; 5467 toh->level = IPPROTO_IP; 5468 toh->name = IP_RECVSLLA; 5469 toh->len = sizeof (struct T_opthdr) + 5470 sizeof (struct sockaddr_dl); 5471 toh->status = 0; 5472 dstopt += sizeof (struct T_opthdr); 5473 dstptr = (struct sockaddr_dl *)dstopt; 5474 bcopy(&pinfo->in_pkt_slla, dstptr, 5475 sizeof (struct sockaddr_dl)); 5476 dstopt += sizeof (struct sockaddr_dl); 5477 udi_size -= toh->len; 5478 } 5479 5480 if (udp->udp_recvif && recv_on && 5481 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5482 5483 struct T_opthdr *toh; 5484 uint_t *dstptr; 5485 5486 toh = (struct T_opthdr *)dstopt; 5487 toh->level = IPPROTO_IP; 5488 toh->name = IP_RECVIF; 5489 toh->len = sizeof (struct T_opthdr) + 5490 sizeof (uint_t); 5491 toh->status = 0; 5492 dstopt += sizeof (struct T_opthdr); 5493 dstptr = (uint_t *)dstopt; 5494 *dstptr = pinfo->in_pkt_ifindex; 5495 dstopt += sizeof (uint_t); 5496 udi_size -= toh->len; 5497 } 5498 5499 if (cr != NULL) { 5500 struct T_opthdr *toh; 5501 5502 toh = (struct T_opthdr *)dstopt; 5503 toh->level = SOL_SOCKET; 5504 toh->name = SCM_UCRED; 5505 toh->len = sizeof (struct T_opthdr) + ucredsize; 5506 toh->status = 0; 5507 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5508 dstopt += toh->len; 5509 udi_size -= toh->len; 5510 } 5511 5512 if (udp->udp_recvttl) { 5513 struct T_opthdr *toh; 5514 uint8_t *dstptr; 5515 5516 toh = (struct T_opthdr *)dstopt; 5517 toh->level = IPPROTO_IP; 5518 toh->name = IP_RECVTTL; 5519 toh->len = sizeof (struct T_opthdr) + 5520 sizeof (uint8_t); 5521 toh->status = 0; 5522 dstopt += sizeof (struct T_opthdr); 5523 dstptr = (uint8_t *)dstopt; 5524 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5525 dstopt += sizeof (uint8_t); 5526 udi_size -= toh->len; 5527 } 5528 5529 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5530 } 5531 BUMP_MIB(&udp_mib, udpInDatagrams); 5532 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5533 "udp_rput_other_end: q %p (%S)", q, "end"); 5534 if (options_mp != NULL) 5535 freeb(options_mp); 5536 5537 if (udp->udp_direct_sockfs) { 5538 /* 5539 * There is nothing above us except for the stream head; 5540 * use the read-side synchronous stream interface in 5541 * order to reduce the time spent in interrupt thread. 5542 */ 5543 ASSERT(udp->udp_issocket); 5544 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5545 } else { 5546 /* 5547 * Use regular STREAMS interface to pass data upstream 5548 * if this is not a socket endpoint, or if we have 5549 * switched over to the slow mode due to sockmod being 5550 * popped or a module being pushed on top of us. 5551 */ 5552 putnext(UDP_RD(q), mp); 5553 } 5554 } 5555 5556 /* ARGSUSED */ 5557 static void 5558 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5559 { 5560 conn_t *connp = arg; 5561 5562 udp_rput_other(connp->conn_rq, mp); 5563 udp_exit(connp); 5564 } 5565 5566 /* 5567 * Process a T_BIND_ACK 5568 */ 5569 static void 5570 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5571 { 5572 udp_t *udp = Q_TO_UDP(q); 5573 mblk_t *mp1; 5574 ire_t *ire; 5575 struct T_bind_ack *tba; 5576 uchar_t *addrp; 5577 ipa_conn_t *ac; 5578 ipa6_conn_t *ac6; 5579 5580 if (udp->udp_discon_pending) 5581 udp->udp_discon_pending = 0; 5582 5583 /* 5584 * If a broadcast/multicast address was bound set 5585 * the source address to 0. 5586 * This ensures no datagrams with broadcast address 5587 * as source address are emitted (which would violate 5588 * RFC1122 - Hosts requirements) 5589 * 5590 * Note that when connecting the returned IRE is 5591 * for the destination address and we only perform 5592 * the broadcast check for the source address (it 5593 * is OK to connect to a broadcast/multicast address.) 5594 */ 5595 mp1 = mp->b_cont; 5596 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5597 ire = (ire_t *)mp1->b_rptr; 5598 5599 /* 5600 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5601 * local address. 5602 */ 5603 if (ire->ire_type == IRE_BROADCAST && 5604 udp->udp_state != TS_DATA_XFER) { 5605 /* This was just a local bind to a broadcast addr */ 5606 V6_SET_ZERO(udp->udp_v6src); 5607 if (udp->udp_family == AF_INET6) 5608 (void) udp_build_hdrs(q, udp); 5609 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5610 /* 5611 * Local address not yet set - pick it from the 5612 * T_bind_ack 5613 */ 5614 tba = (struct T_bind_ack *)mp->b_rptr; 5615 addrp = &mp->b_rptr[tba->ADDR_offset]; 5616 switch (udp->udp_family) { 5617 case AF_INET: 5618 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5619 ac = (ipa_conn_t *)addrp; 5620 } else { 5621 ASSERT(tba->ADDR_length == 5622 sizeof (ipa_conn_x_t)); 5623 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5624 } 5625 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5626 &udp->udp_v6src); 5627 break; 5628 case AF_INET6: 5629 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5630 ac6 = (ipa6_conn_t *)addrp; 5631 } else { 5632 ASSERT(tba->ADDR_length == 5633 sizeof (ipa6_conn_x_t)); 5634 ac6 = &((ipa6_conn_x_t *) 5635 addrp)->ac6x_conn; 5636 } 5637 udp->udp_v6src = ac6->ac6_laddr; 5638 (void) udp_build_hdrs(q, udp); 5639 break; 5640 } 5641 } 5642 mp1 = mp1->b_cont; 5643 } 5644 /* 5645 * Look for one or more appended ACK message added by 5646 * udp_connect or udp_disconnect. 5647 * If none found just send up the T_BIND_ACK. 5648 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5649 * udp_disconnect has appended a T_OK_ACK. 5650 */ 5651 if (mp1 != NULL) { 5652 if (mp->b_cont == mp1) 5653 mp->b_cont = NULL; 5654 else { 5655 ASSERT(mp->b_cont->b_cont == mp1); 5656 mp->b_cont->b_cont = NULL; 5657 } 5658 freemsg(mp); 5659 mp = mp1; 5660 while (mp != NULL) { 5661 mp1 = mp->b_cont; 5662 mp->b_cont = NULL; 5663 putnext(UDP_RD(q), mp); 5664 mp = mp1; 5665 } 5666 return; 5667 } 5668 freemsg(mp->b_cont); 5669 mp->b_cont = NULL; 5670 putnext(UDP_RD(q), mp); 5671 } 5672 5673 /* 5674 * return SNMP stuff in buffer in mpdata 5675 */ 5676 int 5677 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5678 { 5679 mblk_t *mpdata; 5680 mblk_t *mp_conn_ctl; 5681 mblk_t *mp_attr_ctl; 5682 mblk_t *mp6_conn_ctl; 5683 mblk_t *mp6_attr_ctl; 5684 mblk_t *mp_conn_tail; 5685 mblk_t *mp_attr_tail; 5686 mblk_t *mp6_conn_tail; 5687 mblk_t *mp6_attr_tail; 5688 struct opthdr *optp; 5689 mib2_udpEntry_t ude; 5690 mib2_udp6Entry_t ude6; 5691 mib2_transportMLPEntry_t mlp; 5692 int state; 5693 zoneid_t zoneid; 5694 int i; 5695 connf_t *connfp; 5696 conn_t *connp = Q_TO_CONN(q); 5697 udp_t *udp = connp->conn_udp; 5698 int v4_conn_idx; 5699 int v6_conn_idx; 5700 boolean_t needattr; 5701 5702 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5703 if (mpctl == NULL || 5704 (mpdata = mpctl->b_cont) == NULL || 5705 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5706 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5707 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5708 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5709 freemsg(mp_conn_ctl); 5710 freemsg(mp_attr_ctl); 5711 freemsg(mp6_conn_ctl); 5712 return (0); 5713 } 5714 5715 zoneid = connp->conn_zoneid; 5716 5717 /* fixed length structure for IPv4 and IPv6 counters */ 5718 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5719 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5720 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5721 optp->level = MIB2_UDP; 5722 optp->name = 0; 5723 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5724 optp->len = msgdsize(mpdata); 5725 qreply(q, mpctl); 5726 5727 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5728 v4_conn_idx = v6_conn_idx = 0; 5729 5730 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5731 connfp = &ipcl_globalhash_fanout[i]; 5732 connp = NULL; 5733 5734 while ((connp = ipcl_get_next_conn(connfp, connp, 5735 IPCL_UDP))) { 5736 udp = connp->conn_udp; 5737 if (zoneid != connp->conn_zoneid) 5738 continue; 5739 5740 /* 5741 * Note that the port numbers are sent in 5742 * host byte order 5743 */ 5744 5745 if (udp->udp_state == TS_UNBND) 5746 state = MIB2_UDP_unbound; 5747 else if (udp->udp_state == TS_IDLE) 5748 state = MIB2_UDP_idle; 5749 else if (udp->udp_state == TS_DATA_XFER) 5750 state = MIB2_UDP_connected; 5751 else 5752 state = MIB2_UDP_unknown; 5753 5754 needattr = B_FALSE; 5755 bzero(&mlp, sizeof (mlp)); 5756 if (connp->conn_mlp_type != mlptSingle) { 5757 if (connp->conn_mlp_type == mlptShared || 5758 connp->conn_mlp_type == mlptBoth) 5759 mlp.tme_flags |= MIB2_TMEF_SHARED; 5760 if (connp->conn_mlp_type == mlptPrivate || 5761 connp->conn_mlp_type == mlptBoth) 5762 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5763 needattr = B_TRUE; 5764 } 5765 5766 /* 5767 * Create an IPv4 table entry for IPv4 entries and also 5768 * any IPv6 entries which are bound to in6addr_any 5769 * (i.e. anything a IPv4 peer could connect/send to). 5770 */ 5771 if (udp->udp_ipversion == IPV4_VERSION || 5772 (udp->udp_state <= TS_IDLE && 5773 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5774 ude.udpEntryInfo.ue_state = state; 5775 /* 5776 * If in6addr_any this will set it to 5777 * INADDR_ANY 5778 */ 5779 ude.udpLocalAddress = 5780 V4_PART_OF_V6(udp->udp_v6src); 5781 ude.udpLocalPort = ntohs(udp->udp_port); 5782 if (udp->udp_state == TS_DATA_XFER) { 5783 /* 5784 * Can potentially get here for 5785 * v6 socket if another process 5786 * (say, ping) has just done a 5787 * sendto(), changing the state 5788 * from the TS_IDLE above to 5789 * TS_DATA_XFER by the time we hit 5790 * this part of the code. 5791 */ 5792 ude.udpEntryInfo.ue_RemoteAddress = 5793 V4_PART_OF_V6(udp->udp_v6dst); 5794 ude.udpEntryInfo.ue_RemotePort = 5795 ntohs(udp->udp_dstport); 5796 } else { 5797 ude.udpEntryInfo.ue_RemoteAddress = 0; 5798 ude.udpEntryInfo.ue_RemotePort = 0; 5799 } 5800 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5801 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5802 mlp.tme_connidx = v4_conn_idx++; 5803 if (needattr) 5804 (void) snmp_append_data2( 5805 mp_attr_ctl->b_cont, &mp_attr_tail, 5806 (char *)&mlp, sizeof (mlp)); 5807 } 5808 if (udp->udp_ipversion == IPV6_VERSION) { 5809 ude6.udp6EntryInfo.ue_state = state; 5810 ude6.udp6LocalAddress = udp->udp_v6src; 5811 ude6.udp6LocalPort = ntohs(udp->udp_port); 5812 ude6.udp6IfIndex = udp->udp_bound_if; 5813 if (udp->udp_state == TS_DATA_XFER) { 5814 ude6.udp6EntryInfo.ue_RemoteAddress = 5815 udp->udp_v6dst; 5816 ude6.udp6EntryInfo.ue_RemotePort = 5817 ntohs(udp->udp_dstport); 5818 } else { 5819 ude6.udp6EntryInfo.ue_RemoteAddress = 5820 sin6_null.sin6_addr; 5821 ude6.udp6EntryInfo.ue_RemotePort = 0; 5822 } 5823 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5824 &mp6_conn_tail, (char *)&ude6, 5825 sizeof (ude6)); 5826 mlp.tme_connidx = v6_conn_idx++; 5827 if (needattr) 5828 (void) snmp_append_data2( 5829 mp6_attr_ctl->b_cont, 5830 &mp6_attr_tail, (char *)&mlp, 5831 sizeof (mlp)); 5832 } 5833 } 5834 } 5835 5836 /* IPv4 UDP endpoints */ 5837 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5838 sizeof (struct T_optmgmt_ack)]; 5839 optp->level = MIB2_UDP; 5840 optp->name = MIB2_UDP_ENTRY; 5841 optp->len = msgdsize(mp_conn_ctl->b_cont); 5842 qreply(q, mp_conn_ctl); 5843 5844 /* table of MLP attributes... */ 5845 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5846 sizeof (struct T_optmgmt_ack)]; 5847 optp->level = MIB2_UDP; 5848 optp->name = EXPER_XPORT_MLP; 5849 optp->len = msgdsize(mp_attr_ctl->b_cont); 5850 if (optp->len == 0) 5851 freemsg(mp_attr_ctl); 5852 else 5853 qreply(q, mp_attr_ctl); 5854 5855 /* IPv6 UDP endpoints */ 5856 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5857 sizeof (struct T_optmgmt_ack)]; 5858 optp->level = MIB2_UDP6; 5859 optp->name = MIB2_UDP6_ENTRY; 5860 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5861 qreply(q, mp6_conn_ctl); 5862 5863 /* table of MLP attributes... */ 5864 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5865 sizeof (struct T_optmgmt_ack)]; 5866 optp->level = MIB2_UDP6; 5867 optp->name = EXPER_XPORT_MLP; 5868 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5869 if (optp->len == 0) 5870 freemsg(mp6_attr_ctl); 5871 else 5872 qreply(q, mp6_attr_ctl); 5873 5874 return (1); 5875 } 5876 5877 /* 5878 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5879 * NOTE: Per MIB-II, UDP has no writable data. 5880 * TODO: If this ever actually tries to set anything, it needs to be 5881 * to do the appropriate locking. 5882 */ 5883 /* ARGSUSED */ 5884 int 5885 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5886 uchar_t *ptr, int len) 5887 { 5888 switch (level) { 5889 case MIB2_UDP: 5890 return (0); 5891 default: 5892 return (1); 5893 } 5894 } 5895 5896 static void 5897 udp_report_item(mblk_t *mp, udp_t *udp) 5898 { 5899 char *state; 5900 char addrbuf1[INET6_ADDRSTRLEN]; 5901 char addrbuf2[INET6_ADDRSTRLEN]; 5902 uint_t print_len, buf_len; 5903 5904 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5905 ASSERT(buf_len >= 0); 5906 if (buf_len == 0) 5907 return; 5908 5909 if (udp->udp_state == TS_UNBND) 5910 state = "UNBOUND"; 5911 else if (udp->udp_state == TS_IDLE) 5912 state = "IDLE"; 5913 else if (udp->udp_state == TS_DATA_XFER) 5914 state = "CONNECTED"; 5915 else 5916 state = "UnkState"; 5917 print_len = snprintf((char *)mp->b_wptr, buf_len, 5918 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5919 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5920 inet_ntop(AF_INET6, &udp->udp_v6src, 5921 addrbuf1, sizeof (addrbuf1)), 5922 inet_ntop(AF_INET6, &udp->udp_v6dst, 5923 addrbuf2, sizeof (addrbuf2)), 5924 ntohs(udp->udp_dstport), state); 5925 if (print_len < buf_len) { 5926 mp->b_wptr += print_len; 5927 } else { 5928 mp->b_wptr += buf_len; 5929 } 5930 } 5931 5932 /* Report for ndd "udp_status" */ 5933 /* ARGSUSED */ 5934 static int 5935 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5936 { 5937 zoneid_t zoneid; 5938 connf_t *connfp; 5939 conn_t *connp = Q_TO_CONN(q); 5940 udp_t *udp = connp->conn_udp; 5941 int i; 5942 5943 /* 5944 * Because of the ndd constraint, at most we can have 64K buffer 5945 * to put in all UDP info. So to be more efficient, just 5946 * allocate a 64K buffer here, assuming we need that large buffer. 5947 * This may be a problem as any user can read udp_status. Therefore 5948 * we limit the rate of doing this using udp_ndd_get_info_interval. 5949 * This should be OK as normal users should not do this too often. 5950 */ 5951 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5952 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5953 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5954 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5955 return (0); 5956 } 5957 } 5958 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5959 /* The following may work even if we cannot get a large buf. */ 5960 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5961 return (0); 5962 } 5963 (void) mi_mpprintf(mp, 5964 "UDP " MI_COL_HDRPAD_STR 5965 /* 12345678[89ABCDEF] */ 5966 " zone lport src addr dest addr port state"); 5967 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5968 5969 zoneid = connp->conn_zoneid; 5970 5971 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5972 connfp = &ipcl_globalhash_fanout[i]; 5973 connp = NULL; 5974 5975 while ((connp = ipcl_get_next_conn(connfp, connp, 5976 IPCL_UDP))) { 5977 udp = connp->conn_udp; 5978 if (zoneid != GLOBAL_ZONEID && 5979 zoneid != connp->conn_zoneid) 5980 continue; 5981 5982 udp_report_item(mp->b_cont, udp); 5983 } 5984 } 5985 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5986 return (0); 5987 } 5988 5989 /* 5990 * This routine creates a T_UDERROR_IND message and passes it upstream. 5991 * The address and options are copied from the T_UNITDATA_REQ message 5992 * passed in mp. This message is freed. 5993 */ 5994 static void 5995 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5996 t_scalar_t err) 5997 { 5998 struct T_unitdata_req *tudr; 5999 mblk_t *mp1; 6000 uchar_t *optaddr; 6001 t_scalar_t optlen; 6002 6003 if (DB_TYPE(mp) == M_DATA) { 6004 ASSERT(destaddr != NULL && destlen != 0); 6005 optaddr = NULL; 6006 optlen = 0; 6007 } else { 6008 if ((mp->b_wptr < mp->b_rptr) || 6009 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6010 goto done; 6011 } 6012 tudr = (struct T_unitdata_req *)mp->b_rptr; 6013 destaddr = mp->b_rptr + tudr->DEST_offset; 6014 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6015 destaddr + tudr->DEST_length < mp->b_rptr || 6016 destaddr + tudr->DEST_length > mp->b_wptr) { 6017 goto done; 6018 } 6019 optaddr = mp->b_rptr + tudr->OPT_offset; 6020 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6021 optaddr + tudr->OPT_length < mp->b_rptr || 6022 optaddr + tudr->OPT_length > mp->b_wptr) { 6023 goto done; 6024 } 6025 destlen = tudr->DEST_length; 6026 optlen = tudr->OPT_length; 6027 } 6028 6029 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6030 (char *)optaddr, optlen, err); 6031 if (mp1 != NULL) 6032 putnext(UDP_RD(q), mp1); 6033 6034 done: 6035 freemsg(mp); 6036 } 6037 6038 /* 6039 * This routine removes a port number association from a stream. It 6040 * is called by udp_wput to handle T_UNBIND_REQ messages. 6041 */ 6042 static void 6043 udp_unbind(queue_t *q, mblk_t *mp) 6044 { 6045 udp_t *udp = Q_TO_UDP(q); 6046 6047 /* If a bind has not been done, we can't unbind. */ 6048 if (udp->udp_state == TS_UNBND) { 6049 udp_err_ack(q, mp, TOUTSTATE, 0); 6050 return; 6051 } 6052 if (cl_inet_unbind != NULL) { 6053 /* 6054 * Running in cluster mode - register unbind information 6055 */ 6056 if (udp->udp_ipversion == IPV4_VERSION) { 6057 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6058 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6059 (in_port_t)udp->udp_port); 6060 } else { 6061 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6062 (uint8_t *)&(udp->udp_v6src), 6063 (in_port_t)udp->udp_port); 6064 } 6065 } 6066 6067 udp_bind_hash_remove(udp, B_FALSE); 6068 V6_SET_ZERO(udp->udp_v6src); 6069 V6_SET_ZERO(udp->udp_bound_v6src); 6070 udp->udp_port = 0; 6071 udp->udp_state = TS_UNBND; 6072 6073 if (udp->udp_family == AF_INET6) { 6074 int error; 6075 6076 /* Rebuild the header template */ 6077 error = udp_build_hdrs(q, udp); 6078 if (error != 0) { 6079 udp_err_ack(q, mp, TSYSERR, error); 6080 return; 6081 } 6082 } 6083 /* 6084 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6085 * and therefore ip_unbind must never return NULL. 6086 */ 6087 mp = ip_unbind(q, mp); 6088 ASSERT(mp != NULL); 6089 putnext(UDP_RD(q), mp); 6090 } 6091 6092 /* 6093 * Don't let port fall into the privileged range. 6094 * Since the extra privileged ports can be arbitrary we also 6095 * ensure that we exclude those from consideration. 6096 * udp_g_epriv_ports is not sorted thus we loop over it until 6097 * there are no changes. 6098 */ 6099 static in_port_t 6100 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6101 { 6102 int i; 6103 in_port_t nextport; 6104 boolean_t restart = B_FALSE; 6105 6106 if (random && udp_random_anon_port != 0) { 6107 (void) random_get_pseudo_bytes((uint8_t *)&port, 6108 sizeof (in_port_t)); 6109 /* 6110 * Unless changed by a sys admin, the smallest anon port 6111 * is 32768 and the largest anon port is 65535. It is 6112 * very likely (50%) for the random port to be smaller 6113 * than the smallest anon port. When that happens, 6114 * add port % (anon port range) to the smallest anon 6115 * port to get the random port. It should fall into the 6116 * valid anon port range. 6117 */ 6118 if (port < udp_smallest_anon_port) { 6119 port = udp_smallest_anon_port + 6120 port % (udp_largest_anon_port - 6121 udp_smallest_anon_port); 6122 } 6123 } 6124 6125 retry: 6126 if (port < udp_smallest_anon_port) 6127 port = udp_smallest_anon_port; 6128 6129 if (port > udp_largest_anon_port) { 6130 port = udp_smallest_anon_port; 6131 if (restart) 6132 return (0); 6133 restart = B_TRUE; 6134 } 6135 6136 if (port < udp_smallest_nonpriv_port) 6137 port = udp_smallest_nonpriv_port; 6138 6139 for (i = 0; i < udp_g_num_epriv_ports; i++) { 6140 if (port == udp_g_epriv_ports[i]) { 6141 port++; 6142 /* 6143 * Make sure that the port is in the 6144 * valid range. 6145 */ 6146 goto retry; 6147 } 6148 } 6149 6150 if (is_system_labeled() && 6151 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6152 port, IPPROTO_UDP, B_TRUE)) != 0) { 6153 port = nextport; 6154 goto retry; 6155 } 6156 6157 return (port); 6158 } 6159 6160 static int 6161 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6162 { 6163 int err; 6164 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6165 udp_t *udp = Q_TO_UDP(wq); 6166 6167 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6168 opt_storage, udp->udp_mac_exempt); 6169 if (err == 0) { 6170 err = tsol_update_options(&udp->udp_ip_snd_options, 6171 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6172 opt_storage); 6173 } 6174 if (err != 0) { 6175 DTRACE_PROBE4( 6176 tx__ip__log__info__updatelabel__udp, 6177 char *, "queue(1) failed to update options(2) on mp(3)", 6178 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6179 } else { 6180 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6181 } 6182 return (err); 6183 } 6184 6185 static mblk_t * 6186 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6187 uint_t srcid, int *error) 6188 { 6189 udp_t *udp = connp->conn_udp; 6190 queue_t *q = connp->conn_wq; 6191 mblk_t *mp1 = mp; 6192 mblk_t *mp2; 6193 ipha_t *ipha; 6194 int ip_hdr_length; 6195 uint32_t ip_len; 6196 udpha_t *udpha; 6197 udpattrs_t attrs; 6198 6199 *error = 0; 6200 6201 if (v4dst == INADDR_ANY) 6202 v4dst = htonl(INADDR_LOOPBACK); 6203 6204 /* 6205 * If options passed in, feed it for verification and handling 6206 */ 6207 attrs.udpattr_credset = B_FALSE; 6208 if (DB_TYPE(mp) != M_DATA) { 6209 mp1 = mp->b_cont; 6210 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6211 attrs.udpattr_ipp = NULL; 6212 attrs.udpattr_mb = mp; 6213 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6214 goto done; 6215 /* 6216 * Note: success in processing options. 6217 * mp option buffer represented by 6218 * OPT_length/offset now potentially modified 6219 * and contain option setting results 6220 */ 6221 ASSERT(*error == 0); 6222 } 6223 } 6224 6225 /* mp1 points to the M_DATA mblk carrying the packet */ 6226 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6227 6228 /* Check if our saved options are valid; update if not */ 6229 if (is_system_labeled()) { 6230 /* Using UDP MLP requires SCM_UCRED from user */ 6231 if (connp->conn_mlp_type != mlptSingle && 6232 !attrs.udpattr_credset) { 6233 DTRACE_PROBE4( 6234 tx__ip__log__info__output__udp, 6235 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6236 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6237 *error = ECONNREFUSED; 6238 goto done; 6239 } 6240 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6241 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6242 (*error = udp_update_label(q, mp, v4dst)) != 0) 6243 goto done; 6244 } 6245 6246 /* Add an IP header */ 6247 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 6248 udp->udp_ip_snd_options_len; 6249 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6250 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6251 !OK_32PTR(ipha)) { 6252 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 6253 if (mp2 == NULL) { 6254 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6255 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6256 *error = ENOMEM; 6257 goto done; 6258 } 6259 mp2->b_wptr = DB_LIM(mp2); 6260 mp2->b_cont = mp1; 6261 mp1 = mp2; 6262 if (DB_TYPE(mp) != M_DATA) 6263 mp->b_cont = mp1; 6264 else 6265 mp = mp1; 6266 6267 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6268 } 6269 ip_hdr_length -= UDPH_SIZE; 6270 #ifdef _BIG_ENDIAN 6271 /* Set version, header length, and tos */ 6272 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6273 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6274 udp->udp_type_of_service); 6275 /* Set ttl and protocol */ 6276 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6277 #else 6278 /* Set version, header length, and tos */ 6279 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6280 ((udp->udp_type_of_service << 8) | 6281 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6282 /* Set ttl and protocol */ 6283 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6284 #endif 6285 /* 6286 * Copy our address into the packet. If this is zero, 6287 * first look at __sin6_src_id for a hint. If we leave the source 6288 * as INADDR_ANY then ip will fill in the real source address. 6289 */ 6290 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6291 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6292 in6_addr_t v6src; 6293 6294 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 6295 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6296 } 6297 6298 ipha->ipha_fragment_offset_and_flags = 0; 6299 ipha->ipha_ident = 0; 6300 6301 mp1->b_rptr = (uchar_t *)ipha; 6302 6303 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6304 (uintptr_t)UINT_MAX); 6305 6306 /* Determine length of packet */ 6307 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6308 if ((mp2 = mp1->b_cont) != NULL) { 6309 do { 6310 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6311 ip_len += (uint32_t)MBLKL(mp2); 6312 } while ((mp2 = mp2->b_cont) != NULL); 6313 } 6314 /* 6315 * If the size of the packet is greater than the maximum allowed by 6316 * ip, return an error. Passing this down could cause panics because 6317 * the size will have wrapped and be inconsistent with the msg size. 6318 */ 6319 if (ip_len > IP_MAXPACKET) { 6320 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6321 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6322 *error = EMSGSIZE; 6323 goto done; 6324 } 6325 ipha->ipha_length = htons((uint16_t)ip_len); 6326 ip_len -= ip_hdr_length; 6327 ip_len = htons((uint16_t)ip_len); 6328 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6329 6330 /* 6331 * Copy in the destination address 6332 */ 6333 ipha->ipha_dst = v4dst; 6334 6335 /* 6336 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6337 */ 6338 if (CLASSD(v4dst)) 6339 ipha->ipha_ttl = udp->udp_multicast_ttl; 6340 6341 udpha->uha_dst_port = port; 6342 udpha->uha_src_port = udp->udp_port; 6343 6344 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6345 uint32_t cksum; 6346 6347 bcopy(udp->udp_ip_snd_options, &ipha[1], 6348 udp->udp_ip_snd_options_len); 6349 /* 6350 * Massage source route putting first source route in ipha_dst. 6351 * Ignore the destination in T_unitdata_req. 6352 * Create a checksum adjustment for a source route, if any. 6353 */ 6354 cksum = ip_massage_options(ipha); 6355 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6356 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6357 (ipha->ipha_dst & 0xFFFF); 6358 if ((int)cksum < 0) 6359 cksum--; 6360 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6361 /* 6362 * IP does the checksum if uha_checksum is non-zero, 6363 * We make it easy for IP to include our pseudo header 6364 * by putting our length in uha_checksum. 6365 */ 6366 cksum += ip_len; 6367 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6368 /* There might be a carry. */ 6369 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6370 #ifdef _LITTLE_ENDIAN 6371 if (udp_do_checksum) 6372 ip_len = (cksum << 16) | ip_len; 6373 #else 6374 if (udp_do_checksum) 6375 ip_len = (ip_len << 16) | cksum; 6376 else 6377 ip_len <<= 16; 6378 #endif 6379 } else { 6380 /* 6381 * IP does the checksum if uha_checksum is non-zero, 6382 * We make it easy for IP to include our pseudo header 6383 * by putting our length in uha_checksum. 6384 */ 6385 if (udp_do_checksum) 6386 ip_len |= (ip_len << 16); 6387 #ifndef _LITTLE_ENDIAN 6388 else 6389 ip_len <<= 16; 6390 #endif 6391 } 6392 /* Set UDP length and checksum */ 6393 *((uint32_t *)&udpha->uha_length) = ip_len; 6394 if (DB_CRED(mp) != NULL) 6395 mblk_setcred(mp1, DB_CRED(mp)); 6396 6397 if (DB_TYPE(mp) != M_DATA) { 6398 ASSERT(mp != mp1); 6399 freeb(mp); 6400 } 6401 6402 /* mp has been consumed and we'll return success */ 6403 ASSERT(*error == 0); 6404 mp = NULL; 6405 6406 /* We're done. Pass the packet to ip. */ 6407 BUMP_MIB(&udp_mib, udpOutDatagrams); 6408 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6409 "udp_wput_end: q %p (%S)", q, "end"); 6410 6411 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6412 CONN_OUTBOUND_POLICY_PRESENT(connp) || 6413 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6414 connp->conn_nofailover_ill != NULL || 6415 connp->conn_outgoing_ill != NULL || 6416 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6417 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 6418 UDP_STAT(udp_ip_send); 6419 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 6420 } else { 6421 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6422 } 6423 6424 done: 6425 if (*error != 0) { 6426 ASSERT(mp != NULL); 6427 BUMP_MIB(&udp_mib, udpOutErrors); 6428 } 6429 return (mp); 6430 } 6431 6432 static void 6433 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6434 { 6435 conn_t *connp = udp->udp_connp; 6436 ipaddr_t src, dst; 6437 ill_t *ill; 6438 ire_t *ire; 6439 ipif_t *ipif = NULL; 6440 mblk_t *ire_fp_mp; 6441 uint_t ire_fp_mp_len; 6442 uint16_t *up; 6443 uint32_t cksum, hcksum_txflags; 6444 queue_t *dev_q; 6445 boolean_t retry_caching; 6446 6447 dst = ipha->ipha_dst; 6448 src = ipha->ipha_src; 6449 ASSERT(ipha->ipha_ident == 0); 6450 6451 if (CLASSD(dst)) { 6452 int err; 6453 6454 ipif = conn_get_held_ipif(connp, 6455 &connp->conn_multicast_ipif, &err); 6456 6457 if (ipif == NULL || ipif->ipif_isv6 || 6458 (ipif->ipif_ill->ill_phyint->phyint_flags & 6459 PHYI_LOOPBACK)) { 6460 if (ipif != NULL) 6461 ipif_refrele(ipif); 6462 UDP_STAT(udp_ip_send); 6463 ip_output(connp, mp, q, IP_WPUT); 6464 return; 6465 } 6466 } 6467 6468 retry_caching = B_FALSE; 6469 mutex_enter(&connp->conn_lock); 6470 ire = connp->conn_ire_cache; 6471 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6472 6473 if (ire == NULL || ire->ire_addr != dst || 6474 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6475 retry_caching = B_TRUE; 6476 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6477 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6478 6479 ASSERT(ipif != NULL); 6480 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6481 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6482 retry_caching = B_TRUE; 6483 } 6484 6485 if (!retry_caching) { 6486 ASSERT(ire != NULL); 6487 IRE_REFHOLD(ire); 6488 mutex_exit(&connp->conn_lock); 6489 } else { 6490 boolean_t cached = B_FALSE; 6491 6492 connp->conn_ire_cache = NULL; 6493 mutex_exit(&connp->conn_lock); 6494 6495 /* Release the old ire */ 6496 if (ire != NULL) { 6497 IRE_REFRELE_NOTR(ire); 6498 ire = NULL; 6499 } 6500 6501 if (CLASSD(dst)) { 6502 ASSERT(ipif != NULL); 6503 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6504 connp->conn_zoneid, MBLK_GETLABEL(mp), 6505 MATCH_IRE_ILL_GROUP); 6506 } else { 6507 ASSERT(ipif == NULL); 6508 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6509 MBLK_GETLABEL(mp)); 6510 } 6511 6512 if (ire == NULL) { 6513 if (ipif != NULL) 6514 ipif_refrele(ipif); 6515 UDP_STAT(udp_ire_null); 6516 ip_output(connp, mp, q, IP_WPUT); 6517 return; 6518 } 6519 IRE_REFHOLD_NOTR(ire); 6520 6521 mutex_enter(&connp->conn_lock); 6522 if (!(connp->conn_state_flags & CONN_CLOSING) && 6523 connp->conn_ire_cache == NULL) { 6524 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6525 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6526 connp->conn_ire_cache = ire; 6527 cached = B_TRUE; 6528 } 6529 rw_exit(&ire->ire_bucket->irb_lock); 6530 } 6531 mutex_exit(&connp->conn_lock); 6532 6533 /* 6534 * We can continue to use the ire but since it was not 6535 * cached, we should drop the extra reference. 6536 */ 6537 if (!cached) 6538 IRE_REFRELE_NOTR(ire); 6539 } 6540 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6541 ASSERT(!CLASSD(dst) || ipif != NULL); 6542 6543 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6544 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6545 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6546 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6547 (connp->conn_nexthop_set) || 6548 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6549 if (ipif != NULL) 6550 ipif_refrele(ipif); 6551 UDP_STAT(udp_ip_ire_send); 6552 IRE_REFRELE(ire); 6553 ip_output(connp, mp, q, IP_WPUT); 6554 return; 6555 } 6556 6557 BUMP_MIB(&ip_mib, ipOutRequests); 6558 6559 ill = ire_to_ill(ire); 6560 ASSERT(ill != NULL); 6561 6562 dev_q = ire->ire_stq->q_next; 6563 ASSERT(dev_q != NULL); 6564 /* 6565 * If the service thread is already running, or if the driver 6566 * queue is currently flow-controlled, queue this packet. 6567 */ 6568 if ((q->q_first != NULL || connp->conn_draining) || 6569 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6570 if (ip_output_queue) { 6571 (void) putq(q, mp); 6572 } else { 6573 BUMP_MIB(&ip_mib, ipOutDiscards); 6574 freemsg(mp); 6575 } 6576 if (ipif != NULL) 6577 ipif_refrele(ipif); 6578 IRE_REFRELE(ire); 6579 return; 6580 } 6581 6582 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6583 #ifndef _BIG_ENDIAN 6584 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6585 #endif 6586 6587 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6588 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6589 src = ipha->ipha_src = ipif->ipif_src_addr; 6590 else 6591 src = ipha->ipha_src = ire->ire_src_addr; 6592 } 6593 6594 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6595 ASSERT(ill->ill_hcksum_capab != NULL); 6596 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6597 } else { 6598 hcksum_txflags = 0; 6599 } 6600 6601 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6602 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6603 6604 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6605 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6606 if (*up != 0) { 6607 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6608 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6609 ntohs(ipha->ipha_length), cksum); 6610 6611 /* Software checksum? */ 6612 if (DB_CKSUMFLAGS(mp) == 0) { 6613 UDP_STAT(udp_out_sw_cksum); 6614 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6615 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6616 } 6617 } 6618 6619 ipha->ipha_fragment_offset_and_flags |= 6620 (uint32_t)htons(ire->ire_frag_flag); 6621 6622 /* Calculate IP header checksum if hardware isn't capable */ 6623 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6624 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6625 ((uint16_t *)ipha)[4]); 6626 } 6627 6628 if (CLASSD(dst)) { 6629 ilm_t *ilm; 6630 6631 ILM_WALKER_HOLD(ill); 6632 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6633 ILM_WALKER_RELE(ill); 6634 if (ilm != NULL) { 6635 ip_multicast_loopback(q, ill, mp, 6636 connp->conn_multicast_loop ? 0 : 6637 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6638 } 6639 6640 /* If multicast TTL is 0 then we are done */ 6641 if (ipha->ipha_ttl == 0) { 6642 if (ipif != NULL) 6643 ipif_refrele(ipif); 6644 freemsg(mp); 6645 IRE_REFRELE(ire); 6646 return; 6647 } 6648 } 6649 6650 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6651 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6652 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6653 6654 UPDATE_OB_PKT_COUNT(ire); 6655 ire->ire_last_used_time = lbolt; 6656 6657 if (ILL_DLS_CAPABLE(ill)) { 6658 /* 6659 * Send the packet directly to DLD, where it may be queued 6660 * depending on the availability of transmit resources at 6661 * the media layer. 6662 */ 6663 IP_DLS_ILL_TX(ill, mp); 6664 } else { 6665 putnext(ire->ire_stq, mp); 6666 } 6667 6668 if (ipif != NULL) 6669 ipif_refrele(ipif); 6670 IRE_REFRELE(ire); 6671 } 6672 6673 static boolean_t 6674 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6675 { 6676 udp_t *udp = Q_TO_UDP(wq); 6677 int err; 6678 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6679 6680 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6681 dst, opt_storage, udp->udp_mac_exempt); 6682 if (err == 0) { 6683 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6684 &udp->udp_label_len_v6, opt_storage); 6685 } 6686 if (err != 0) { 6687 DTRACE_PROBE4( 6688 tx__ip__log__drop__updatelabel__udp6, 6689 char *, "queue(1) failed to update options(2) on mp(3)", 6690 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6691 } else { 6692 udp->udp_v6lastdst = *dst; 6693 } 6694 return (err); 6695 } 6696 6697 /* 6698 * This routine handles all messages passed downstream. It either 6699 * consumes the message or passes it downstream; it never queues a 6700 * a message. 6701 */ 6702 static void 6703 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6704 { 6705 sin6_t *sin6; 6706 sin_t *sin; 6707 ipaddr_t v4dst; 6708 uint16_t port; 6709 uint_t srcid; 6710 queue_t *q = connp->conn_wq; 6711 udp_t *udp = connp->conn_udp; 6712 int error = 0; 6713 struct sockaddr_storage ss; 6714 6715 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6716 "udp_wput_start: connp %p mp %p", connp, mp); 6717 6718 /* 6719 * We directly handle several cases here: T_UNITDATA_REQ message 6720 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6721 * connected and non-connected socket. The latter carries the 6722 * address structure along when this routine gets called. 6723 */ 6724 switch (DB_TYPE(mp)) { 6725 case M_DATA: 6726 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6727 if (!udp->udp_direct_sockfs || 6728 addr == NULL || addrlen == 0) { 6729 /* Not connected; address is required */ 6730 BUMP_MIB(&udp_mib, udpOutErrors); 6731 UDP_STAT(udp_out_err_notconn); 6732 freemsg(mp); 6733 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6734 "udp_wput_end: connp %p (%S)", connp, 6735 "not-connected; address required"); 6736 return; 6737 } 6738 ASSERT(udp->udp_issocket); 6739 UDP_DBGSTAT(udp_data_notconn); 6740 /* Not connected; do some more checks below */ 6741 break; 6742 } 6743 /* M_DATA for connected socket */ 6744 UDP_DBGSTAT(udp_data_conn); 6745 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6746 6747 /* Initialize addr and addrlen as if they're passed in */ 6748 if (udp->udp_family == AF_INET) { 6749 sin = (sin_t *)&ss; 6750 sin->sin_family = AF_INET; 6751 sin->sin_port = udp->udp_dstport; 6752 sin->sin_addr.s_addr = v4dst; 6753 addr = (struct sockaddr *)sin; 6754 addrlen = sizeof (*sin); 6755 } else { 6756 sin6 = (sin6_t *)&ss; 6757 sin6->sin6_family = AF_INET6; 6758 sin6->sin6_port = udp->udp_dstport; 6759 sin6->sin6_flowinfo = udp->udp_flowinfo; 6760 sin6->sin6_addr = udp->udp_v6dst; 6761 sin6->sin6_scope_id = 0; 6762 sin6->__sin6_src_id = 0; 6763 addr = (struct sockaddr *)sin6; 6764 addrlen = sizeof (*sin6); 6765 } 6766 6767 if (udp->udp_family == AF_INET || 6768 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6769 /* 6770 * Handle both AF_INET and AF_INET6; the latter 6771 * for IPV4 mapped destination addresses. Note 6772 * here that both addr and addrlen point to the 6773 * corresponding struct depending on the address 6774 * family of the socket. 6775 */ 6776 mp = udp_output_v4(connp, mp, v4dst, 6777 udp->udp_dstport, 0, &error); 6778 } else { 6779 mp = udp_output_v6(connp, mp, sin6, &error); 6780 } 6781 if (error != 0) { 6782 ASSERT(addr != NULL && addrlen != 0); 6783 goto ud_error; 6784 } 6785 return; 6786 case M_PROTO: 6787 case M_PCPROTO: { 6788 struct T_unitdata_req *tudr; 6789 6790 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6791 tudr = (struct T_unitdata_req *)mp->b_rptr; 6792 6793 /* Handle valid T_UNITDATA_REQ here */ 6794 if (MBLKL(mp) >= sizeof (*tudr) && 6795 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6796 if (mp->b_cont == NULL) { 6797 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6798 "udp_wput_end: q %p (%S)", q, "badaddr"); 6799 error = EPROTO; 6800 goto ud_error; 6801 } 6802 6803 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6804 tudr->DEST_length)) { 6805 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6806 "udp_wput_end: q %p (%S)", q, "badaddr"); 6807 error = EADDRNOTAVAIL; 6808 goto ud_error; 6809 } 6810 /* 6811 * If a port has not been bound to the stream, fail. 6812 * This is not a problem when sockfs is directly 6813 * above us, because it will ensure that the socket 6814 * is first bound before allowing data to be sent. 6815 */ 6816 if (udp->udp_state == TS_UNBND) { 6817 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6818 "udp_wput_end: q %p (%S)", q, "outstate"); 6819 error = EPROTO; 6820 goto ud_error; 6821 } 6822 addr = (struct sockaddr *) 6823 &mp->b_rptr[tudr->DEST_offset]; 6824 addrlen = tudr->DEST_length; 6825 if (tudr->OPT_length != 0) 6826 UDP_STAT(udp_out_opt); 6827 break; 6828 } 6829 /* FALLTHRU */ 6830 } 6831 default: 6832 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6833 SQTAG_UDP_OUTPUT); 6834 return; 6835 } 6836 ASSERT(addr != NULL); 6837 6838 switch (udp->udp_family) { 6839 case AF_INET6: 6840 sin6 = (sin6_t *)addr; 6841 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6842 sin6->sin6_family != AF_INET6) { 6843 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6844 "udp_wput_end: q %p (%S)", q, "badaddr"); 6845 error = EADDRNOTAVAIL; 6846 goto ud_error; 6847 } 6848 6849 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6850 /* 6851 * Destination is a non-IPv4-compatible IPv6 address. 6852 * Send out an IPv6 format packet. 6853 */ 6854 mp = udp_output_v6(connp, mp, sin6, &error); 6855 if (error != 0) 6856 goto ud_error; 6857 6858 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6859 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6860 return; 6861 } 6862 /* 6863 * If the local address is not zero or a mapped address 6864 * return an error. It would be possible to send an IPv4 6865 * packet but the response would never make it back to the 6866 * application since it is bound to a non-mapped address. 6867 */ 6868 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6869 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6870 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6871 "udp_wput_end: q %p (%S)", q, "badaddr"); 6872 error = EADDRNOTAVAIL; 6873 goto ud_error; 6874 } 6875 /* Send IPv4 packet without modifying udp_ipversion */ 6876 /* Extract port and ipaddr */ 6877 port = sin6->sin6_port; 6878 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6879 srcid = sin6->__sin6_src_id; 6880 break; 6881 6882 case AF_INET: 6883 sin = (sin_t *)addr; 6884 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6885 sin->sin_family != AF_INET) { 6886 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6887 "udp_wput_end: q %p (%S)", q, "badaddr"); 6888 error = EADDRNOTAVAIL; 6889 goto ud_error; 6890 } 6891 /* Extract port and ipaddr */ 6892 port = sin->sin_port; 6893 v4dst = sin->sin_addr.s_addr; 6894 srcid = 0; 6895 break; 6896 } 6897 6898 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6899 if (error != 0) { 6900 ud_error: 6901 UDP_STAT(udp_out_err_output); 6902 ASSERT(mp != NULL); 6903 /* mp is freed by the following routine */ 6904 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6905 (t_scalar_t)error); 6906 } 6907 } 6908 6909 /* ARGSUSED */ 6910 static void 6911 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6912 { 6913 udp_output((conn_t *)arg, mp, NULL, 0); 6914 _UDP_EXIT((conn_t *)arg); 6915 } 6916 6917 static void 6918 udp_wput(queue_t *q, mblk_t *mp) 6919 { 6920 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6921 SQTAG_UDP_WPUT); 6922 } 6923 6924 /* 6925 * Allocate and prepare a T_UNITDATA_REQ message. 6926 */ 6927 static mblk_t * 6928 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6929 { 6930 struct T_unitdata_req *tudr; 6931 mblk_t *mp; 6932 6933 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6934 if (mp != NULL) { 6935 mp->b_wptr += sizeof (*tudr) + addrlen; 6936 DB_TYPE(mp) = M_PROTO; 6937 6938 tudr = (struct T_unitdata_req *)mp->b_rptr; 6939 tudr->PRIM_type = T_UNITDATA_REQ; 6940 tudr->DEST_length = addrlen; 6941 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6942 tudr->OPT_length = 0; 6943 tudr->OPT_offset = 0; 6944 bcopy(addr, tudr+1, addrlen); 6945 } 6946 return (mp); 6947 } 6948 6949 /* 6950 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6951 * is valid when we are directly beneath the stream head, and thus sockfs 6952 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6953 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6954 * this is done for both connected and non-connected endpoint. 6955 */ 6956 void 6957 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6958 { 6959 conn_t *connp; 6960 udp_t *udp; 6961 6962 q = UDP_WR(q); 6963 connp = Q_TO_CONN(q); 6964 udp = connp->conn_udp; 6965 6966 /* udpsockfs should only send down M_DATA for this entry point */ 6967 ASSERT(DB_TYPE(mp) == M_DATA); 6968 6969 mutex_enter(&connp->conn_lock); 6970 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6971 6972 if (udp->udp_mode != UDP_MT_HOT) { 6973 /* 6974 * We can't enter this conn right away because another 6975 * thread is currently executing as writer; therefore we 6976 * need to deposit the message into the squeue to be 6977 * drained later. If a socket address is present, we 6978 * need to create a T_UNITDATA_REQ message as placeholder. 6979 */ 6980 if (addr != NULL && addrlen != 0) { 6981 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6982 6983 if (tudr_mp == NULL) { 6984 mutex_exit(&connp->conn_lock); 6985 BUMP_MIB(&udp_mib, udpOutErrors); 6986 UDP_STAT(udp_out_err_tudr); 6987 freemsg(mp); 6988 return; 6989 } 6990 /* Tag the packet with T_UNITDATA_REQ */ 6991 tudr_mp->b_cont = mp; 6992 mp = tudr_mp; 6993 } 6994 mutex_exit(&connp->conn_lock); 6995 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6996 return; 6997 } 6998 6999 /* We can execute as reader right away. */ 7000 UDP_READERS_INCREF(udp); 7001 mutex_exit(&connp->conn_lock); 7002 7003 udp_output(connp, mp, addr, addrlen); 7004 7005 udp_exit(connp); 7006 } 7007 7008 /* 7009 * udp_output_v6(): 7010 * Assumes that udp_wput did some sanity checking on the destination 7011 * address. 7012 */ 7013 static mblk_t * 7014 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7015 { 7016 ip6_t *ip6h; 7017 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7018 mblk_t *mp1 = mp; 7019 mblk_t *mp2; 7020 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7021 size_t ip_len; 7022 udpha_t *udph; 7023 udp_t *udp = connp->conn_udp; 7024 queue_t *q = connp->conn_wq; 7025 ip6_pkt_t ipp_s; /* For ancillary data options */ 7026 ip6_pkt_t *ipp = &ipp_s; 7027 ip6_pkt_t *tipp; /* temporary ipp */ 7028 uint32_t csum = 0; 7029 uint_t ignore = 0; 7030 uint_t option_exists = 0, is_sticky = 0; 7031 uint8_t *cp; 7032 uint8_t *nxthdr_ptr; 7033 in6_addr_t ip6_dst; 7034 udpattrs_t attrs; 7035 boolean_t opt_present; 7036 7037 *error = 0; 7038 7039 /* 7040 * If the local address is a mapped address return 7041 * an error. 7042 * It would be possible to send an IPv6 packet but the 7043 * response would never make it back to the application 7044 * since it is bound to a mapped address. 7045 */ 7046 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7047 *error = EADDRNOTAVAIL; 7048 goto done; 7049 } 7050 7051 ipp->ipp_fields = 0; 7052 ipp->ipp_sticky_ignored = 0; 7053 7054 /* 7055 * If TPI options passed in, feed it for verification and handling 7056 */ 7057 attrs.udpattr_credset = B_FALSE; 7058 opt_present = B_FALSE; 7059 if (DB_TYPE(mp) != M_DATA) { 7060 mp1 = mp->b_cont; 7061 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7062 attrs.udpattr_ipp = ipp; 7063 attrs.udpattr_mb = mp; 7064 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7065 goto done; 7066 ASSERT(*error == 0); 7067 opt_present = B_TRUE; 7068 } 7069 } 7070 ignore = ipp->ipp_sticky_ignored; 7071 7072 /* mp1 points to the M_DATA mblk carrying the packet */ 7073 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7074 7075 if (sin6->sin6_scope_id != 0 && 7076 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7077 /* 7078 * IPPF_SCOPE_ID is special. It's neither a sticky 7079 * option nor ancillary data. It needs to be 7080 * explicitly set in options_exists. 7081 */ 7082 option_exists |= IPPF_SCOPE_ID; 7083 } 7084 7085 /* 7086 * Compute the destination address 7087 */ 7088 ip6_dst = sin6->sin6_addr; 7089 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7090 ip6_dst = ipv6_loopback; 7091 7092 /* 7093 * If we're not going to the same destination as last time, then 7094 * recompute the label required. This is done in a separate routine to 7095 * avoid blowing up our stack here. 7096 */ 7097 if (is_system_labeled()) { 7098 /* Using UDP MLP requires SCM_UCRED from user */ 7099 if (connp->conn_mlp_type != mlptSingle && 7100 !attrs.udpattr_credset) { 7101 DTRACE_PROBE4( 7102 tx__ip__log__info__output__udp6, 7103 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7104 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7105 *error = ECONNREFUSED; 7106 goto done; 7107 } 7108 if ((opt_present || 7109 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7110 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) 7111 goto done; 7112 } 7113 7114 /* 7115 * If there's a security label here, then we ignore any options the 7116 * user may try to set. We keep the peer's label as a hidden sticky 7117 * option. 7118 */ 7119 if (udp->udp_label_len_v6 > 0) { 7120 ignore &= ~IPPF_HOPOPTS; 7121 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7122 } 7123 7124 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7125 /* No sticky options nor ancillary data. */ 7126 goto no_options; 7127 } 7128 7129 /* 7130 * Go through the options figuring out where each is going to 7131 * come from and build two masks. The first mask indicates if 7132 * the option exists at all. The second mask indicates if the 7133 * option is sticky or ancillary. 7134 */ 7135 if (!(ignore & IPPF_HOPOPTS)) { 7136 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7137 option_exists |= IPPF_HOPOPTS; 7138 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7139 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7140 option_exists |= IPPF_HOPOPTS; 7141 is_sticky |= IPPF_HOPOPTS; 7142 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 7143 } 7144 } 7145 7146 if (!(ignore & IPPF_RTHDR)) { 7147 if (ipp->ipp_fields & IPPF_RTHDR) { 7148 option_exists |= IPPF_RTHDR; 7149 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7150 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7151 option_exists |= IPPF_RTHDR; 7152 is_sticky |= IPPF_RTHDR; 7153 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7154 } 7155 } 7156 7157 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7158 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7159 option_exists |= IPPF_RTDSTOPTS; 7160 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7161 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7162 option_exists |= IPPF_RTDSTOPTS; 7163 is_sticky |= IPPF_RTDSTOPTS; 7164 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7165 } 7166 } 7167 7168 if (!(ignore & IPPF_DSTOPTS)) { 7169 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7170 option_exists |= IPPF_DSTOPTS; 7171 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7172 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7173 option_exists |= IPPF_DSTOPTS; 7174 is_sticky |= IPPF_DSTOPTS; 7175 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7176 } 7177 } 7178 7179 if (!(ignore & IPPF_IFINDEX)) { 7180 if (ipp->ipp_fields & IPPF_IFINDEX) { 7181 option_exists |= IPPF_IFINDEX; 7182 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7183 option_exists |= IPPF_IFINDEX; 7184 is_sticky |= IPPF_IFINDEX; 7185 } 7186 } 7187 7188 if (!(ignore & IPPF_ADDR)) { 7189 if (ipp->ipp_fields & IPPF_ADDR) { 7190 option_exists |= IPPF_ADDR; 7191 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7192 option_exists |= IPPF_ADDR; 7193 is_sticky |= IPPF_ADDR; 7194 } 7195 } 7196 7197 if (!(ignore & IPPF_DONTFRAG)) { 7198 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7199 option_exists |= IPPF_DONTFRAG; 7200 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7201 option_exists |= IPPF_DONTFRAG; 7202 is_sticky |= IPPF_DONTFRAG; 7203 } 7204 } 7205 7206 if (!(ignore & IPPF_USE_MIN_MTU)) { 7207 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7208 option_exists |= IPPF_USE_MIN_MTU; 7209 } else if (udp->udp_sticky_ipp.ipp_fields & 7210 IPPF_USE_MIN_MTU) { 7211 option_exists |= IPPF_USE_MIN_MTU; 7212 is_sticky |= IPPF_USE_MIN_MTU; 7213 } 7214 } 7215 7216 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7217 option_exists |= IPPF_HOPLIMIT; 7218 /* IPV6_HOPLIMIT can never be sticky */ 7219 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7220 7221 if (!(ignore & IPPF_UNICAST_HOPS) && 7222 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7223 option_exists |= IPPF_UNICAST_HOPS; 7224 is_sticky |= IPPF_UNICAST_HOPS; 7225 } 7226 7227 if (!(ignore & IPPF_MULTICAST_HOPS) && 7228 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7229 option_exists |= IPPF_MULTICAST_HOPS; 7230 is_sticky |= IPPF_MULTICAST_HOPS; 7231 } 7232 7233 if (!(ignore & IPPF_TCLASS)) { 7234 if (ipp->ipp_fields & IPPF_TCLASS) { 7235 option_exists |= IPPF_TCLASS; 7236 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7237 option_exists |= IPPF_TCLASS; 7238 is_sticky |= IPPF_TCLASS; 7239 } 7240 } 7241 7242 no_options: 7243 7244 /* 7245 * If any options carried in the ip6i_t were specified, we 7246 * need to account for the ip6i_t in the data we'll be sending 7247 * down. 7248 */ 7249 if (option_exists & IPPF_HAS_IP6I) 7250 udp_ip_hdr_len += sizeof (ip6i_t); 7251 7252 /* check/fix buffer config, setup pointers into it */ 7253 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7254 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7255 !OK_32PTR(ip6h)) { 7256 /* Try to get everything in a single mblk next time */ 7257 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7258 udp->udp_max_hdr_len = udp_ip_hdr_len; 7259 (void) mi_set_sth_wroff(UDP_RD(q), 7260 udp->udp_max_hdr_len + udp_wroff_extra); 7261 } 7262 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 7263 if (mp2 == NULL) { 7264 *error = ENOMEM; 7265 goto done; 7266 } 7267 mp2->b_wptr = DB_LIM(mp2); 7268 mp2->b_cont = mp1; 7269 mp1 = mp2; 7270 if (DB_TYPE(mp) != M_DATA) 7271 mp->b_cont = mp1; 7272 else 7273 mp = mp1; 7274 7275 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7276 } 7277 mp1->b_rptr = (unsigned char *)ip6h; 7278 ip6i = (ip6i_t *)ip6h; 7279 7280 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7281 if (option_exists & IPPF_HAS_IP6I) { 7282 ip6h = (ip6_t *)&ip6i[1]; 7283 ip6i->ip6i_flags = 0; 7284 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7285 7286 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7287 if (option_exists & IPPF_SCOPE_ID) { 7288 ip6i->ip6i_flags |= IP6I_IFINDEX; 7289 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7290 } else if (option_exists & IPPF_IFINDEX) { 7291 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7292 ASSERT(tipp->ipp_ifindex != 0); 7293 ip6i->ip6i_flags |= IP6I_IFINDEX; 7294 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7295 } 7296 7297 if (option_exists & IPPF_ADDR) { 7298 /* 7299 * Enable per-packet source address verification if 7300 * IPV6_PKTINFO specified the source address. 7301 * ip6_src is set in the transport's _wput function. 7302 */ 7303 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7304 } 7305 7306 if (option_exists & IPPF_DONTFRAG) { 7307 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7308 } 7309 7310 if (option_exists & IPPF_USE_MIN_MTU) { 7311 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7312 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7313 } 7314 7315 if (option_exists & IPPF_NEXTHOP) { 7316 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7317 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7318 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7319 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7320 } 7321 7322 /* 7323 * tell IP this is an ip6i_t private header 7324 */ 7325 ip6i->ip6i_nxt = IPPROTO_RAW; 7326 } 7327 7328 /* Initialize IPv6 header */ 7329 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7330 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7331 7332 /* Set the hoplimit of the outgoing packet. */ 7333 if (option_exists & IPPF_HOPLIMIT) { 7334 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7335 ip6h->ip6_hops = ipp->ipp_hoplimit; 7336 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7337 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7338 ip6h->ip6_hops = udp->udp_multicast_ttl; 7339 if (option_exists & IPPF_MULTICAST_HOPS) 7340 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7341 } else { 7342 ip6h->ip6_hops = udp->udp_ttl; 7343 if (option_exists & IPPF_UNICAST_HOPS) 7344 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7345 } 7346 7347 if (option_exists & IPPF_ADDR) { 7348 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7349 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7350 ip6h->ip6_src = tipp->ipp_addr; 7351 } else { 7352 /* 7353 * The source address was not set using IPV6_PKTINFO. 7354 * First look at the bound source. 7355 * If unspecified fallback to __sin6_src_id. 7356 */ 7357 ip6h->ip6_src = udp->udp_v6src; 7358 if (sin6->__sin6_src_id != 0 && 7359 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7360 ip_srcid_find_id(sin6->__sin6_src_id, 7361 &ip6h->ip6_src, connp->conn_zoneid); 7362 } 7363 } 7364 7365 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7366 cp = (uint8_t *)&ip6h[1]; 7367 7368 /* 7369 * Here's where we have to start stringing together 7370 * any extension headers in the right order: 7371 * Hop-by-hop, destination, routing, and final destination opts. 7372 */ 7373 if (option_exists & IPPF_HOPOPTS) { 7374 /* Hop-by-hop options */ 7375 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7376 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7377 7378 *nxthdr_ptr = IPPROTO_HOPOPTS; 7379 nxthdr_ptr = &hbh->ip6h_nxt; 7380 7381 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 7382 cp += tipp->ipp_hopoptslen; 7383 } 7384 /* 7385 * En-route destination options 7386 * Only do them if there's a routing header as well 7387 */ 7388 if (option_exists & IPPF_RTDSTOPTS) { 7389 ip6_dest_t *dst = (ip6_dest_t *)cp; 7390 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7391 7392 *nxthdr_ptr = IPPROTO_DSTOPTS; 7393 nxthdr_ptr = &dst->ip6d_nxt; 7394 7395 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7396 cp += tipp->ipp_rtdstoptslen; 7397 } 7398 /* 7399 * Routing header next 7400 */ 7401 if (option_exists & IPPF_RTHDR) { 7402 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7403 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7404 7405 *nxthdr_ptr = IPPROTO_ROUTING; 7406 nxthdr_ptr = &rt->ip6r_nxt; 7407 7408 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7409 cp += tipp->ipp_rthdrlen; 7410 } 7411 /* 7412 * Do ultimate destination options 7413 */ 7414 if (option_exists & IPPF_DSTOPTS) { 7415 ip6_dest_t *dest = (ip6_dest_t *)cp; 7416 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7417 7418 *nxthdr_ptr = IPPROTO_DSTOPTS; 7419 nxthdr_ptr = &dest->ip6d_nxt; 7420 7421 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7422 cp += tipp->ipp_dstoptslen; 7423 } 7424 /* 7425 * Now set the last header pointer to the proto passed in 7426 */ 7427 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7428 *nxthdr_ptr = IPPROTO_UDP; 7429 7430 /* Update UDP header */ 7431 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7432 udph->uha_dst_port = sin6->sin6_port; 7433 udph->uha_src_port = udp->udp_port; 7434 7435 /* 7436 * Copy in the destination address 7437 */ 7438 ip6h->ip6_dst = ip6_dst; 7439 7440 ip6h->ip6_vcf = 7441 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7442 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7443 7444 if (option_exists & IPPF_TCLASS) { 7445 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7446 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7447 tipp->ipp_tclass); 7448 } 7449 7450 if (option_exists & IPPF_RTHDR) { 7451 ip6_rthdr_t *rth; 7452 7453 /* 7454 * Perform any processing needed for source routing. 7455 * We know that all extension headers will be in the same mblk 7456 * as the IPv6 header. 7457 */ 7458 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7459 if (rth != NULL && rth->ip6r_segleft != 0) { 7460 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7461 /* 7462 * Drop packet - only support Type 0 routing. 7463 * Notify the application as well. 7464 */ 7465 *error = EPROTO; 7466 goto done; 7467 } 7468 7469 /* 7470 * rth->ip6r_len is twice the number of 7471 * addresses in the header. Thus it must be even. 7472 */ 7473 if (rth->ip6r_len & 0x1) { 7474 *error = EPROTO; 7475 goto done; 7476 } 7477 /* 7478 * Shuffle the routing header and ip6_dst 7479 * addresses, and get the checksum difference 7480 * between the first hop (in ip6_dst) and 7481 * the destination (in the last routing hdr entry). 7482 */ 7483 csum = ip_massage_options_v6(ip6h, rth); 7484 /* 7485 * Verify that the first hop isn't a mapped address. 7486 * Routers along the path need to do this verification 7487 * for subsequent hops. 7488 */ 7489 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7490 *error = EADDRNOTAVAIL; 7491 goto done; 7492 } 7493 7494 cp += (rth->ip6r_len + 1)*8; 7495 } 7496 } 7497 7498 /* count up length of UDP packet */ 7499 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7500 if ((mp2 = mp1->b_cont) != NULL) { 7501 do { 7502 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7503 ip_len += (uint32_t)MBLKL(mp2); 7504 } while ((mp2 = mp2->b_cont) != NULL); 7505 } 7506 7507 /* 7508 * If the size of the packet is greater than the maximum allowed by 7509 * ip, return an error. Passing this down could cause panics because 7510 * the size will have wrapped and be inconsistent with the msg size. 7511 */ 7512 if (ip_len > IP_MAXPACKET) { 7513 *error = EMSGSIZE; 7514 goto done; 7515 } 7516 7517 /* Store the UDP length. Subtract length of extension hdrs */ 7518 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7519 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7520 7521 /* 7522 * We make it easy for IP to include our pseudo header 7523 * by putting our length in uh_checksum, modified (if 7524 * we have a routing header) by the checksum difference 7525 * between the ultimate destination and first hop addresses. 7526 * Note: UDP over IPv6 must always checksum the packet. 7527 */ 7528 csum += udph->uha_length; 7529 csum = (csum & 0xFFFF) + (csum >> 16); 7530 udph->uha_checksum = (uint16_t)csum; 7531 7532 #ifdef _LITTLE_ENDIAN 7533 ip_len = htons(ip_len); 7534 #endif 7535 ip6h->ip6_plen = ip_len; 7536 if (DB_CRED(mp) != NULL) 7537 mblk_setcred(mp1, DB_CRED(mp)); 7538 7539 if (DB_TYPE(mp) != M_DATA) { 7540 ASSERT(mp != mp1); 7541 freeb(mp); 7542 } 7543 7544 /* mp has been consumed and we'll return success */ 7545 ASSERT(*error == 0); 7546 mp = NULL; 7547 7548 /* We're done. Pass the packet to IP */ 7549 BUMP_MIB(&udp_mib, udpOutDatagrams); 7550 ip_output_v6(connp, mp1, q, IP_WPUT); 7551 7552 done: 7553 if (*error != 0) { 7554 ASSERT(mp != NULL); 7555 BUMP_MIB(&udp_mib, udpOutErrors); 7556 } 7557 return (mp); 7558 } 7559 7560 static void 7561 udp_wput_other(queue_t *q, mblk_t *mp) 7562 { 7563 uchar_t *rptr = mp->b_rptr; 7564 struct datab *db; 7565 struct iocblk *iocp; 7566 cred_t *cr; 7567 conn_t *connp = Q_TO_CONN(q); 7568 udp_t *udp = connp->conn_udp; 7569 7570 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7571 "udp_wput_other_start: q %p", q); 7572 7573 db = mp->b_datap; 7574 7575 cr = DB_CREDDEF(mp, connp->conn_cred); 7576 7577 switch (db->db_type) { 7578 case M_PROTO: 7579 case M_PCPROTO: 7580 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7581 freemsg(mp); 7582 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7583 "udp_wput_other_end: q %p (%S)", 7584 q, "protoshort"); 7585 return; 7586 } 7587 switch (((t_primp_t)rptr)->type) { 7588 case T_ADDR_REQ: 7589 udp_addr_req(q, mp); 7590 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7591 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7592 return; 7593 case O_T_BIND_REQ: 7594 case T_BIND_REQ: 7595 udp_bind(q, mp); 7596 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7597 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7598 return; 7599 case T_CONN_REQ: 7600 udp_connect(q, mp); 7601 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7602 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7603 return; 7604 case T_CAPABILITY_REQ: 7605 udp_capability_req(q, mp); 7606 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7607 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7608 return; 7609 case T_INFO_REQ: 7610 udp_info_req(q, mp); 7611 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7612 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7613 return; 7614 case T_UNITDATA_REQ: 7615 /* 7616 * If a T_UNITDATA_REQ gets here, the address must 7617 * be bad. Valid T_UNITDATA_REQs are handled 7618 * in udp_wput. 7619 */ 7620 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7621 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7622 "udp_wput_other_end: q %p (%S)", 7623 q, "unitdatareq"); 7624 return; 7625 case T_UNBIND_REQ: 7626 udp_unbind(q, mp); 7627 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7628 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7629 return; 7630 case T_SVR4_OPTMGMT_REQ: 7631 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7632 /* 7633 * Use upper queue for option processing in 7634 * case the request is not handled at this 7635 * level and needs to be passed down to IP. 7636 */ 7637 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7638 mp, cr, &udp_opt_obj); 7639 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7640 "udp_wput_other_end: q %p (%S)", 7641 q, "optmgmtreq"); 7642 return; 7643 7644 case T_OPTMGMT_REQ: 7645 /* 7646 * Use upper queue for option processing in 7647 * case the request is not handled at this 7648 * level and needs to be passed down to IP. 7649 */ 7650 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7651 mp, cr, &udp_opt_obj); 7652 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7653 "udp_wput_other_end: q %p (%S)", 7654 q, "optmgmtreq"); 7655 return; 7656 7657 case T_DISCON_REQ: 7658 udp_disconnect(q, mp); 7659 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7660 "udp_wput_other_end: q %p (%S)", 7661 q, "disconreq"); 7662 return; 7663 7664 /* The following TPI message is not supported by udp. */ 7665 case O_T_CONN_RES: 7666 case T_CONN_RES: 7667 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7668 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7669 "udp_wput_other_end: q %p (%S)", 7670 q, "connres/disconreq"); 7671 return; 7672 7673 /* The following 3 TPI messages are illegal for udp. */ 7674 case T_DATA_REQ: 7675 case T_EXDATA_REQ: 7676 case T_ORDREL_REQ: 7677 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7678 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7679 "udp_wput_other_end: q %p (%S)", 7680 q, "data/exdata/ordrel"); 7681 return; 7682 default: 7683 break; 7684 } 7685 break; 7686 case M_FLUSH: 7687 if (*rptr & FLUSHW) 7688 flushq(q, FLUSHDATA); 7689 break; 7690 case M_IOCTL: 7691 iocp = (struct iocblk *)mp->b_rptr; 7692 switch (iocp->ioc_cmd) { 7693 case TI_GETPEERNAME: 7694 if (udp->udp_state != TS_DATA_XFER) { 7695 /* 7696 * If a default destination address has not 7697 * been associated with the stream, then we 7698 * don't know the peer's name. 7699 */ 7700 iocp->ioc_error = ENOTCONN; 7701 iocp->ioc_count = 0; 7702 mp->b_datap->db_type = M_IOCACK; 7703 putnext(UDP_RD(q), mp); 7704 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7705 "udp_wput_other_end: q %p (%S)", 7706 q, "getpeername"); 7707 return; 7708 } 7709 /* FALLTHRU */ 7710 case TI_GETMYNAME: { 7711 /* 7712 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7713 * need to copyin the user's strbuf structure. 7714 * Processing will continue in the M_IOCDATA case 7715 * below. 7716 */ 7717 mi_copyin(q, mp, NULL, 7718 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7719 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7720 "udp_wput_other_end: q %p (%S)", 7721 q, "getmyname"); 7722 return; 7723 } 7724 case ND_SET: 7725 /* nd_getset performs the necessary checking */ 7726 case ND_GET: 7727 if (nd_getset(q, udp_g_nd, mp)) { 7728 putnext(UDP_RD(q), mp); 7729 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7730 "udp_wput_other_end: q %p (%S)", 7731 q, "get"); 7732 return; 7733 } 7734 break; 7735 case _SIOCSOCKFALLBACK: 7736 /* 7737 * Either sockmod is about to be popped and the 7738 * socket would now be treated as a plain stream, 7739 * or a module is about to be pushed so we could 7740 * no longer use read-side synchronous stream. 7741 * Drain any queued data and disable direct sockfs 7742 * interface from now on. 7743 */ 7744 if (!udp->udp_issocket) { 7745 DB_TYPE(mp) = M_IOCNAK; 7746 iocp->ioc_error = EINVAL; 7747 } else { 7748 udp->udp_issocket = B_FALSE; 7749 if (udp->udp_direct_sockfs) { 7750 /* 7751 * Disable read-side synchronous 7752 * stream interface and drain any 7753 * queued data. 7754 */ 7755 udp_rcv_drain(UDP_RD(q), udp, 7756 B_FALSE); 7757 ASSERT(!udp->udp_direct_sockfs); 7758 UDP_STAT(udp_sock_fallback); 7759 } 7760 DB_TYPE(mp) = M_IOCACK; 7761 iocp->ioc_error = 0; 7762 } 7763 iocp->ioc_count = 0; 7764 iocp->ioc_rval = 0; 7765 putnext(UDP_RD(q), mp); 7766 return; 7767 default: 7768 break; 7769 } 7770 break; 7771 case M_IOCDATA: 7772 udp_wput_iocdata(q, mp); 7773 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7774 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7775 return; 7776 default: 7777 /* Unrecognized messages are passed through without change. */ 7778 break; 7779 } 7780 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7781 "udp_wput_other_end: q %p (%S)", q, "end"); 7782 ip_output(connp, mp, q, IP_WPUT); 7783 } 7784 7785 /* ARGSUSED */ 7786 static void 7787 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7788 { 7789 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7790 udp_exit((conn_t *)arg); 7791 } 7792 7793 /* 7794 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7795 * messages. 7796 */ 7797 static void 7798 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7799 { 7800 mblk_t *mp1; 7801 STRUCT_HANDLE(strbuf, sb); 7802 uint16_t port; 7803 in6_addr_t v6addr; 7804 ipaddr_t v4addr; 7805 uint32_t flowinfo = 0; 7806 int addrlen; 7807 udp_t *udp = Q_TO_UDP(q); 7808 7809 /* Make sure it is one of ours. */ 7810 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7811 case TI_GETMYNAME: 7812 case TI_GETPEERNAME: 7813 break; 7814 default: 7815 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7816 return; 7817 } 7818 7819 q = WR(UDP_RD(q)); 7820 switch (mi_copy_state(q, mp, &mp1)) { 7821 case -1: 7822 return; 7823 case MI_COPY_CASE(MI_COPY_IN, 1): 7824 break; 7825 case MI_COPY_CASE(MI_COPY_OUT, 1): 7826 /* 7827 * The address has been copied out, so now 7828 * copyout the strbuf. 7829 */ 7830 mi_copyout(q, mp); 7831 return; 7832 case MI_COPY_CASE(MI_COPY_OUT, 2): 7833 /* 7834 * The address and strbuf have been copied out. 7835 * We're done, so just acknowledge the original 7836 * M_IOCTL. 7837 */ 7838 mi_copy_done(q, mp, 0); 7839 return; 7840 default: 7841 /* 7842 * Something strange has happened, so acknowledge 7843 * the original M_IOCTL with an EPROTO error. 7844 */ 7845 mi_copy_done(q, mp, EPROTO); 7846 return; 7847 } 7848 7849 /* 7850 * Now we have the strbuf structure for TI_GETMYNAME 7851 * and TI_GETPEERNAME. Next we copyout the requested 7852 * address and then we'll copyout the strbuf. 7853 */ 7854 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7855 (void *)mp1->b_rptr); 7856 if (udp->udp_family == AF_INET) 7857 addrlen = sizeof (sin_t); 7858 else 7859 addrlen = sizeof (sin6_t); 7860 7861 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7862 mi_copy_done(q, mp, EINVAL); 7863 return; 7864 } 7865 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7866 case TI_GETMYNAME: 7867 if (udp->udp_family == AF_INET) { 7868 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7869 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7870 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7871 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7872 } else { 7873 /* 7874 * INADDR_ANY 7875 * udp_v6src is not set, we might be bound to 7876 * broadcast/multicast. Use udp_bound_v6src as 7877 * local address instead (that could 7878 * also still be INADDR_ANY) 7879 */ 7880 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7881 } 7882 } else { 7883 /* udp->udp_family == AF_INET6 */ 7884 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7885 v6addr = udp->udp_v6src; 7886 } else { 7887 /* 7888 * UNSPECIFIED 7889 * udp_v6src is not set, we might be bound to 7890 * broadcast/multicast. Use udp_bound_v6src as 7891 * local address instead (that could 7892 * also still be UNSPECIFIED) 7893 */ 7894 v6addr = udp->udp_bound_v6src; 7895 } 7896 } 7897 port = udp->udp_port; 7898 break; 7899 case TI_GETPEERNAME: 7900 if (udp->udp_state != TS_DATA_XFER) { 7901 mi_copy_done(q, mp, ENOTCONN); 7902 return; 7903 } 7904 if (udp->udp_family == AF_INET) { 7905 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7906 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7907 } else { 7908 /* udp->udp_family == AF_INET6) */ 7909 v6addr = udp->udp_v6dst; 7910 flowinfo = udp->udp_flowinfo; 7911 } 7912 port = udp->udp_dstport; 7913 break; 7914 default: 7915 mi_copy_done(q, mp, EPROTO); 7916 return; 7917 } 7918 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7919 if (!mp1) 7920 return; 7921 7922 if (udp->udp_family == AF_INET) { 7923 sin_t *sin; 7924 7925 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7926 sin = (sin_t *)mp1->b_rptr; 7927 mp1->b_wptr = (uchar_t *)&sin[1]; 7928 *sin = sin_null; 7929 sin->sin_family = AF_INET; 7930 sin->sin_addr.s_addr = v4addr; 7931 sin->sin_port = port; 7932 } else { 7933 /* udp->udp_family == AF_INET6 */ 7934 sin6_t *sin6; 7935 7936 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7937 sin6 = (sin6_t *)mp1->b_rptr; 7938 mp1->b_wptr = (uchar_t *)&sin6[1]; 7939 *sin6 = sin6_null; 7940 sin6->sin6_family = AF_INET6; 7941 sin6->sin6_flowinfo = flowinfo; 7942 sin6->sin6_addr = v6addr; 7943 sin6->sin6_port = port; 7944 } 7945 /* Copy out the address */ 7946 mi_copyout(q, mp); 7947 } 7948 7949 7950 static int 7951 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7952 udpattrs_t *udpattrs) 7953 { 7954 struct T_unitdata_req *udreqp; 7955 int is_absreq_failure; 7956 cred_t *cr; 7957 conn_t *connp = Q_TO_CONN(q); 7958 7959 ASSERT(((t_primp_t)mp->b_rptr)->type); 7960 7961 cr = DB_CREDDEF(mp, connp->conn_cred); 7962 7963 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7964 7965 /* 7966 * Use upper queue for option processing since the callback 7967 * routines expect to be called in UDP instance instead of IP. 7968 */ 7969 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7970 udreqp->OPT_offset, cr, &udp_opt_obj, 7971 udpattrs, &is_absreq_failure); 7972 7973 if (*errorp != 0) { 7974 /* 7975 * Note: No special action needed in this 7976 * module for "is_absreq_failure" 7977 */ 7978 return (-1); /* failure */ 7979 } 7980 ASSERT(is_absreq_failure == 0); 7981 return (0); /* success */ 7982 } 7983 7984 void 7985 udp_ddi_init(void) 7986 { 7987 int i; 7988 7989 UDP6_MAJ = ddi_name_to_major(UDP6); 7990 7991 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7992 udp_opt_obj.odb_opt_arr_cnt); 7993 7994 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7995 /* Not a power of two. Round up to nearest power of two */ 7996 for (i = 0; i < 31; i++) { 7997 if (udp_bind_fanout_size < (1 << i)) 7998 break; 7999 } 8000 udp_bind_fanout_size = 1 << i; 8001 } 8002 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 8003 sizeof (udp_fanout_t), KM_SLEEP); 8004 for (i = 0; i < udp_bind_fanout_size; i++) { 8005 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8006 NULL); 8007 } 8008 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 8009 8010 udp_kstat_init(); 8011 8012 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8013 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8014 } 8015 8016 void 8017 udp_ddi_destroy(void) 8018 { 8019 int i; 8020 8021 nd_free(&udp_g_nd); 8022 8023 for (i = 0; i < udp_bind_fanout_size; i++) { 8024 mutex_destroy(&udp_bind_fanout[i].uf_lock); 8025 } 8026 8027 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 8028 sizeof (udp_fanout_t)); 8029 8030 udp_kstat_fini(); 8031 8032 kmem_cache_destroy(udp_cache); 8033 } 8034 8035 static void 8036 udp_kstat_init(void) 8037 { 8038 udp_named_kstat_t template = { 8039 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 8040 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8041 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 8042 { "entrySize", KSTAT_DATA_INT32, 0 }, 8043 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8044 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8045 }; 8046 8047 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 8048 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 8049 8050 if (udp_mibkp == NULL) 8051 return; 8052 8053 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8054 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8055 8056 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 8057 8058 udp_mibkp->ks_update = udp_kstat_update; 8059 8060 kstat_install(udp_mibkp); 8061 8062 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 8063 "net", KSTAT_TYPE_NAMED, 8064 sizeof (udp_statistics) / sizeof (kstat_named_t), 8065 KSTAT_FLAG_VIRTUAL)) != NULL) { 8066 udp_ksp->ks_data = &udp_statistics; 8067 kstat_install(udp_ksp); 8068 } 8069 } 8070 8071 static void 8072 udp_kstat_fini(void) 8073 { 8074 if (udp_ksp != NULL) { 8075 kstat_delete(udp_ksp); 8076 udp_ksp = NULL; 8077 } 8078 if (udp_mibkp != NULL) { 8079 kstat_delete(udp_mibkp); 8080 udp_mibkp = NULL; 8081 } 8082 } 8083 8084 static int 8085 udp_kstat_update(kstat_t *kp, int rw) 8086 { 8087 udp_named_kstat_t *udpkp; 8088 8089 if ((kp == NULL) || (kp->ks_data == NULL)) 8090 return (EIO); 8091 8092 if (rw == KSTAT_WRITE) 8093 return (EACCES); 8094 8095 udpkp = (udp_named_kstat_t *)kp->ks_data; 8096 8097 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 8098 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 8099 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 8100 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 8101 8102 return (0); 8103 } 8104 8105 /* ARGSUSED */ 8106 static void 8107 udp_rput(queue_t *q, mblk_t *mp) 8108 { 8109 /* 8110 * We get here whenever we do qreply() from IP, 8111 * i.e as part of handlings ioctls, etc. 8112 */ 8113 putnext(q, mp); 8114 } 8115 8116 /* 8117 * Read-side synchronous stream info entry point, called as a 8118 * result of handling certain STREAMS ioctl operations. 8119 */ 8120 static int 8121 udp_rinfop(queue_t *q, infod_t *dp) 8122 { 8123 mblk_t *mp; 8124 uint_t cmd = dp->d_cmd; 8125 int res = 0; 8126 int error = 0; 8127 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8128 struct stdata *stp = STREAM(q); 8129 8130 mutex_enter(&udp->udp_drain_lock); 8131 /* If shutdown on read has happened, return nothing */ 8132 mutex_enter(&stp->sd_lock); 8133 if (stp->sd_flag & STREOF) { 8134 mutex_exit(&stp->sd_lock); 8135 goto done; 8136 } 8137 mutex_exit(&stp->sd_lock); 8138 8139 if ((mp = udp->udp_rcv_list_head) == NULL) 8140 goto done; 8141 8142 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8143 8144 if (cmd & INFOD_COUNT) { 8145 /* 8146 * Return the number of messages. 8147 */ 8148 dp->d_count += udp->udp_rcv_msgcnt; 8149 res |= INFOD_COUNT; 8150 } 8151 if (cmd & INFOD_BYTES) { 8152 /* 8153 * Return size of all data messages. 8154 */ 8155 dp->d_bytes += udp->udp_rcv_cnt; 8156 res |= INFOD_BYTES; 8157 } 8158 if (cmd & INFOD_FIRSTBYTES) { 8159 /* 8160 * Return size of first data message. 8161 */ 8162 dp->d_bytes = msgdsize(mp); 8163 res |= INFOD_FIRSTBYTES; 8164 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8165 } 8166 if (cmd & INFOD_COPYOUT) { 8167 mblk_t *mp1 = mp->b_cont; 8168 int n; 8169 /* 8170 * Return data contents of first message. 8171 */ 8172 ASSERT(DB_TYPE(mp1) == M_DATA); 8173 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8174 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8175 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8176 UIO_READ, dp->d_uiop)) != 0) { 8177 goto done; 8178 } 8179 mp1 = mp1->b_cont; 8180 } 8181 res |= INFOD_COPYOUT; 8182 dp->d_cmd &= ~INFOD_COPYOUT; 8183 } 8184 done: 8185 mutex_exit(&udp->udp_drain_lock); 8186 8187 dp->d_res |= res; 8188 8189 return (error); 8190 } 8191 8192 /* 8193 * Read-side synchronous stream entry point. This is called as a result 8194 * of recv/read operation done at sockfs, and is guaranteed to execute 8195 * outside of the interrupt thread context. It returns a single datagram 8196 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8197 */ 8198 static int 8199 udp_rrw(queue_t *q, struiod_t *dp) 8200 { 8201 mblk_t *mp; 8202 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8203 8204 /* We should never get here when we're in SNMP mode */ 8205 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8206 8207 /* 8208 * Dequeue datagram from the head of the list and return 8209 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8210 * set/cleared depending on whether or not there's data 8211 * remaining in the list. 8212 */ 8213 mutex_enter(&udp->udp_drain_lock); 8214 if (!udp->udp_direct_sockfs) { 8215 mutex_exit(&udp->udp_drain_lock); 8216 UDP_STAT(udp_rrw_busy); 8217 return (EBUSY); 8218 } 8219 if ((mp = udp->udp_rcv_list_head) != NULL) { 8220 uint_t size = msgdsize(mp); 8221 8222 /* Last datagram in the list? */ 8223 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8224 udp->udp_rcv_list_tail = NULL; 8225 mp->b_next = NULL; 8226 8227 udp->udp_rcv_cnt -= size; 8228 udp->udp_rcv_msgcnt--; 8229 UDP_STAT(udp_rrw_msgcnt); 8230 8231 /* No longer flow-controlling? */ 8232 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8233 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8234 udp->udp_drain_qfull = B_FALSE; 8235 } 8236 if (udp->udp_rcv_list_head == NULL) { 8237 /* 8238 * Either we just dequeued the last datagram or 8239 * we get here from sockfs and have nothing to 8240 * return; in this case clear RSLEEP. 8241 */ 8242 ASSERT(udp->udp_rcv_cnt == 0); 8243 ASSERT(udp->udp_rcv_msgcnt == 0); 8244 ASSERT(udp->udp_rcv_list_tail == NULL); 8245 STR_WAKEUP_CLEAR(STREAM(q)); 8246 } else { 8247 /* 8248 * More data follows; we need udp_rrw() to be 8249 * called in future to pick up the rest. 8250 */ 8251 STR_WAKEUP_SET(STREAM(q)); 8252 } 8253 mutex_exit(&udp->udp_drain_lock); 8254 dp->d_mp = mp; 8255 return (0); 8256 } 8257 8258 /* 8259 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8260 * list; this is typically executed within the interrupt thread context 8261 * and so we do things as quickly as possible. 8262 */ 8263 static void 8264 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8265 { 8266 ASSERT(q == RD(q)); 8267 ASSERT(pkt_len == msgdsize(mp)); 8268 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8269 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8270 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8271 8272 mutex_enter(&udp->udp_drain_lock); 8273 /* 8274 * Wake up and signal the receiving app; it is okay to do this 8275 * before enqueueing the mp because we are holding the drain lock. 8276 * One of the advantages of synchronous stream is the ability for 8277 * us to find out when the application performs a read on the 8278 * socket by way of udp_rrw() entry point being called. We need 8279 * to generate SIGPOLL/SIGIO for each received data in the case 8280 * of asynchronous socket just as in the strrput() case. However, 8281 * we only wake the application up when necessary, i.e. during the 8282 * first enqueue. When udp_rrw() is called, we send up a single 8283 * datagram upstream and call STR_WAKEUP_SET() again when there 8284 * are still data remaining in our receive queue. 8285 */ 8286 if (udp->udp_rcv_list_head == NULL) { 8287 STR_WAKEUP_SET(STREAM(q)); 8288 udp->udp_rcv_list_head = mp; 8289 } else { 8290 udp->udp_rcv_list_tail->b_next = mp; 8291 } 8292 udp->udp_rcv_list_tail = mp; 8293 udp->udp_rcv_cnt += pkt_len; 8294 udp->udp_rcv_msgcnt++; 8295 8296 /* Need to flow-control? */ 8297 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8298 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8299 udp->udp_drain_qfull = B_TRUE; 8300 8301 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8302 STR_SENDSIG(STREAM(q)); 8303 mutex_exit(&udp->udp_drain_lock); 8304 } 8305 8306 /* 8307 * Drain the contents of receive list to the module upstream; we do 8308 * this during close or when we fallback to the slow mode due to 8309 * sockmod being popped or a module being pushed on top of us. 8310 */ 8311 static void 8312 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8313 { 8314 mblk_t *mp; 8315 8316 ASSERT(q == RD(q)); 8317 8318 mutex_enter(&udp->udp_drain_lock); 8319 /* 8320 * There is no race with a concurrent udp_input() sending 8321 * up packets using putnext() after we have cleared the 8322 * udp_direct_sockfs flag but before we have completed 8323 * sending up the packets in udp_rcv_list, since we are 8324 * either a writer or we have quiesced the conn. 8325 */ 8326 udp->udp_direct_sockfs = B_FALSE; 8327 mutex_exit(&udp->udp_drain_lock); 8328 8329 if (udp->udp_rcv_list_head != NULL) 8330 UDP_STAT(udp_drain); 8331 8332 /* 8333 * Send up everything via putnext(); note here that we 8334 * don't need the udp_drain_lock to protect us since 8335 * nothing can enter udp_rrw() and that we currently 8336 * have exclusive access to this udp. 8337 */ 8338 while ((mp = udp->udp_rcv_list_head) != NULL) { 8339 udp->udp_rcv_list_head = mp->b_next; 8340 mp->b_next = NULL; 8341 udp->udp_rcv_cnt -= msgdsize(mp); 8342 udp->udp_rcv_msgcnt--; 8343 if (closing) { 8344 freemsg(mp); 8345 } else { 8346 putnext(q, mp); 8347 } 8348 } 8349 ASSERT(udp->udp_rcv_cnt == 0); 8350 ASSERT(udp->udp_rcv_msgcnt == 0); 8351 ASSERT(udp->udp_rcv_list_head == NULL); 8352 udp->udp_rcv_list_tail = NULL; 8353 udp->udp_drain_qfull = B_FALSE; 8354 } 8355 8356 static size_t 8357 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8358 { 8359 /* We add a bit of extra buffering */ 8360 size += size >> 1; 8361 if (size > udp_max_buf) 8362 size = udp_max_buf; 8363 8364 udp->udp_rcv_hiwat = size; 8365 return (size); 8366 } 8367 8368 /* 8369 * Little helper for IPsec's NAT-T processing. 8370 */ 8371 boolean_t 8372 udp_compute_checksum(void) 8373 { 8374 return (udp_do_checksum); 8375 } 8376