1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 #include <net/if.h> 64 #include <net/route.h> 65 66 #include <inet/common.h> 67 #include <inet/ip.h> 68 #include <inet/ip_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/ip_ndp.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 #include <sys/tsol/label.h> 94 #include <sys/tsol/tnet.h> 95 #include <rpc/pmap_prot.h> 96 97 /* 98 * Synchronization notes: 99 * 100 * UDP uses a combination of its internal perimeter, a global lock and 101 * a set of bind hash locks to protect its data structures. Please see 102 * the note above udp_mode_assertions for details about the internal 103 * perimeter. 104 * 105 * When a UDP endpoint is bound to a local port, it is inserted into 106 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 107 * The size of the array is controlled by the udp_bind_fanout_size variable. 108 * This variable can be changed in /etc/system if the default value is 109 * not large enough. Each bind hash bucket is protected by a per bucket 110 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 111 * structure. An UDP endpoint is removed from the bind hash list only 112 * when it is being unbound or being closed. The per bucket lock also 113 * protects a UDP endpoint's state changes. 114 * 115 * Plumbing notes: 116 * 117 * Both udp and ip are merged, but the streams plumbing is kept unchanged 118 * in that udp is always pushed atop /dev/ip. This is done to preserve 119 * backwards compatibility for certain applications which rely on such 120 * plumbing geometry to do things such as issuing I_POP on the stream 121 * in order to obtain direct access to /dev/ip, etc. 122 * 123 * All UDP processings happen in the /dev/ip instance; the udp module 124 * instance does not possess any state about the endpoint, and merely 125 * acts as a dummy module whose presence is to keep the streams plumbing 126 * appearance unchanged. At open time /dev/ip allocates a conn_t that 127 * happens to embed a udp_t. This stays dormant until the time udp is 128 * pushed, which indicates to /dev/ip that it must convert itself from 129 * an IP to a UDP endpoint. 130 * 131 * We only allow for the following plumbing cases: 132 * 133 * Normal: 134 * /dev/ip is first opened and later udp is pushed directly on top. 135 * This is the default action that happens when a udp socket or 136 * /dev/udp is opened. The conn_t created by /dev/ip instance is 137 * now shared and is marked with IPCL_UDP. 138 * 139 * SNMP-only: 140 * udp is pushed on top of a module other than /dev/ip. When this 141 * happens it will support only SNMP semantics. A new conn_t is 142 * allocated and marked with IPCL_UDPMOD. 143 * 144 * The above cases imply that we don't support any intermediate module to 145 * reside in between /dev/ip and udp -- in fact, we never supported such 146 * scenario in the past as the inter-layer communication semantics have 147 * always been private. Also note that the normal case allows for SNMP 148 * requests to be processed in addition to the rest of UDP operations. 149 * 150 * The normal case plumbing is depicted by the following diagram: 151 * 152 * +---------------+---------------+ 153 * | | | udp 154 * | udp_wq | udp_rq | 155 * | | UDP_RD | 156 * | | | 157 * +---------------+---------------+ 158 * | ^ 159 * v | 160 * +---------------+---------------+ 161 * | | | /dev/ip 162 * | ip_wq | ip_rq | conn_t 163 * | UDP_WR | | 164 * | | | 165 * +---------------+---------------+ 166 * 167 * Messages arriving at udp_wq from above will end up in ip_wq before 168 * it gets processed, i.e. udp write entry points will advance udp_wq 169 * and use its q_next value as ip_wq in order to use the conn_t that 170 * is stored in its q_ptr. Likewise, messages generated by ip to the 171 * module above udp will appear as if they are originated from udp_rq, 172 * i.e. putnext() calls to the module above udp is done using the 173 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 174 * nothing more than calling putnext(). 175 * 176 * The above implies the following rule of thumb: 177 * 178 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 179 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 180 * is no direct reference to conn_t from either udp_wq or udp_rq. 181 * 182 * 2. Write-side entry points of udp can obtain the conn_t via the 183 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 184 * 185 * 3. While in /dev/ip context, putnext() to the module above udp can 186 * be done by supplying the queue value obtained from UDP_RD(). 187 * 188 */ 189 190 static queue_t *UDP_WR(queue_t *); 191 static queue_t *UDP_RD(queue_t *); 192 193 udp_stat_t udp_statistics = { 194 { "udp_ip_send", KSTAT_DATA_UINT64 }, 195 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 196 { "udp_ire_null", KSTAT_DATA_UINT64 }, 197 { "udp_drain", KSTAT_DATA_UINT64 }, 198 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 199 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 200 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 201 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 202 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 203 { "udp_out_opt", KSTAT_DATA_UINT64 }, 204 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 205 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 206 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 207 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 217 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 218 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 219 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 220 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 221 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 222 #ifdef DEBUG 223 { "udp_data_conn", KSTAT_DATA_UINT64 }, 224 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 225 #endif 226 }; 227 228 static kstat_t *udp_ksp; 229 struct kmem_cache *udp_cache; 230 231 /* 232 * Bind hash list size and hash function. It has to be a power of 2 for 233 * hashing. 234 */ 235 #define UDP_BIND_FANOUT_SIZE 512 236 #define UDP_BIND_HASH(lport) \ 237 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 238 239 /* UDP bind fanout hash structure. */ 240 typedef struct udp_fanout_s { 241 udp_t *uf_udp; 242 kmutex_t uf_lock; 243 #if defined(_LP64) || defined(_I32LPx) 244 char uf_pad[48]; 245 #else 246 char uf_pad[56]; 247 #endif 248 } udp_fanout_t; 249 250 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 251 /* udp_fanout_t *udp_bind_fanout. */ 252 static udp_fanout_t *udp_bind_fanout; 253 254 /* 255 * This controls the rate some ndd info report functions can be used 256 * by non-privileged users. It stores the last time such info is 257 * requested. When those report functions are called again, this 258 * is checked with the current time and compare with the ndd param 259 * udp_ndd_get_info_interval. 260 */ 261 static clock_t udp_last_ndd_get_info_time; 262 #define NDD_TOO_QUICK_MSG \ 263 "ndd get info rate too high for non-privileged users, try again " \ 264 "later.\n" 265 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 266 267 /* Option processing attrs */ 268 typedef struct udpattrs_s { 269 ip6_pkt_t *udpattr_ipp; 270 mblk_t *udpattr_mb; 271 boolean_t udpattr_credset; 272 } udpattrs_t; 273 274 static void udp_addr_req(queue_t *q, mblk_t *mp); 275 static void udp_bind(queue_t *q, mblk_t *mp); 276 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 277 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 278 static int udp_build_hdrs(queue_t *q, udp_t *udp); 279 static void udp_capability_req(queue_t *q, mblk_t *mp); 280 static int udp_close(queue_t *q); 281 static void udp_connect(queue_t *q, mblk_t *mp); 282 static void udp_disconnect(queue_t *q, mblk_t *mp); 283 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 284 int sys_error); 285 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 286 t_scalar_t tlierr, int unixerr); 287 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 288 cred_t *cr); 289 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 290 char *value, caddr_t cp, cred_t *cr); 291 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 292 char *value, caddr_t cp, cred_t *cr); 293 static void udp_icmp_error(queue_t *q, mblk_t *mp); 294 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 295 static void udp_info_req(queue_t *q, mblk_t *mp); 296 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 297 t_scalar_t addr_length); 298 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 299 cred_t *credp); 300 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 301 int *errorp, udpattrs_t *udpattrs); 302 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 303 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 304 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 305 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 306 cred_t *cr); 307 static void udp_report_item(mblk_t *mp, udp_t *udp); 308 static void udp_rput(queue_t *q, mblk_t *mp); 309 static void udp_rput_other(queue_t *, mblk_t *); 310 static int udp_rinfop(queue_t *q, infod_t *dp); 311 static int udp_rrw(queue_t *q, struiod_t *dp); 312 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 313 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 314 cred_t *cr); 315 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 316 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 317 t_scalar_t destlen, t_scalar_t err); 318 static void udp_unbind(queue_t *q, mblk_t *mp); 319 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 320 boolean_t random); 321 static void udp_wput(queue_t *q, mblk_t *mp); 322 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 323 uint16_t port, uint_t srcid, int *error); 324 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 325 int *error); 326 static void udp_wput_other(queue_t *q, mblk_t *mp); 327 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 328 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 329 socklen_t addrlen); 330 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 331 332 static void udp_kstat_init(void); 333 static void udp_kstat_fini(void); 334 static int udp_kstat_update(kstat_t *kp, int rw); 335 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 336 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 337 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 338 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 339 340 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 341 uint_t pkt_len); 342 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 343 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 344 static void udp_exit(conn_t *); 345 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 346 #ifdef DEBUG 347 static void udp_mode_assertions(udp_t *, int); 348 #endif /* DEBUG */ 349 350 major_t UDP6_MAJ; 351 #define UDP6 "udp6" 352 353 #define UDP_RECV_HIWATER (56 * 1024) 354 #define UDP_RECV_LOWATER 128 355 #define UDP_XMIT_HIWATER (56 * 1024) 356 #define UDP_XMIT_LOWATER 1024 357 358 static struct module_info udp_info = { 359 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 360 }; 361 362 static struct qinit udp_rinit = { 363 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 364 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 365 }; 366 367 static struct qinit udp_winit = { 368 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 369 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 370 }; 371 372 static struct qinit winit = { 373 (pfi_t)putnext, NULL, NULL, NULL, NULL, 374 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 375 }; 376 377 /* Support for just SNMP if UDP is not pushed directly over device IP */ 378 struct qinit udp_snmp_rinit = { 379 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 380 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 381 }; 382 383 struct qinit udp_snmp_winit = { 384 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 385 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 386 }; 387 388 struct streamtab udpinfo = { 389 &udp_rinit, &winit 390 }; 391 392 static sin_t sin_null; /* Zero address for quick clears */ 393 static sin6_t sin6_null; /* Zero address for quick clears */ 394 395 /* Hint not protected by any lock */ 396 static in_port_t udp_g_next_port_to_try; 397 398 /* 399 * Extra privileged ports. In host byte order. 400 */ 401 #define UDP_NUM_EPRIV_PORTS 64 402 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 403 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 404 405 /* Only modified during _init and _fini thus no locking is needed. */ 406 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 407 408 /* MIB-2 stuff for SNMP */ 409 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 410 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 411 412 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 413 414 /* Default structure copied into T_INFO_ACK messages */ 415 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 416 T_INFO_ACK, 417 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 418 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 419 T_INVALID, /* CDATA_size. udp does not support connect data. */ 420 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 421 sizeof (sin_t), /* ADDR_size. */ 422 0, /* OPT_size - not initialized here */ 423 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 424 T_CLTS, /* SERV_type. udp supports connection-less. */ 425 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 426 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 427 }; 428 429 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 430 431 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 432 T_INFO_ACK, 433 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 434 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 435 T_INVALID, /* CDATA_size. udp does not support connect data. */ 436 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 437 sizeof (sin6_t), /* ADDR_size. */ 438 0, /* OPT_size - not initialized here */ 439 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 440 T_CLTS, /* SERV_type. udp supports connection-less. */ 441 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 442 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 443 }; 444 445 /* largest UDP port number */ 446 #define UDP_MAX_PORT 65535 447 448 /* 449 * Table of ND variables supported by udp. These are loaded into udp_g_nd 450 * in udp_open. 451 * All of these are alterable, within the min/max values given, at run time. 452 */ 453 /* BEGIN CSTYLED */ 454 udpparam_t udp_param_arr[] = { 455 /*min max value name */ 456 { 0L, 256, 32, "udp_wroff_extra" }, 457 { 1L, 255, 255, "udp_ipv4_ttl" }, 458 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 459 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 460 { 0, 1, 1, "udp_do_checksum" }, 461 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 462 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 463 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 464 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 465 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 466 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 467 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 468 }; 469 /* END CSTYLED */ 470 471 /* 472 * The smallest anonymous port in the privileged port range which UDP 473 * looks for free port. Use in the option UDP_ANONPRIVBIND. 474 */ 475 static in_port_t udp_min_anonpriv_port = 512; 476 477 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 478 uint32_t udp_random_anon_port = 1; 479 480 /* 481 * Hook functions to enable cluster networking. 482 * On non-clustered systems these vectors must always be NULL 483 */ 484 485 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 486 uint8_t *laddrp, in_port_t lport) = NULL; 487 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 488 uint8_t *laddrp, in_port_t lport) = NULL; 489 490 typedef union T_primitives *t_primp_t; 491 492 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 493 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 494 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 495 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 496 (mp)->b_prev = (mblk_t *)proc; \ 497 if ((udp)->udp_mphead == NULL) \ 498 (udp)->udp_mphead = (mp); \ 499 else \ 500 (udp)->udp_mptail->b_next = (mp); \ 501 (udp)->udp_mptail = (mp); \ 502 (udp)->udp_mpcount++; \ 503 } 504 505 #define UDP_READERS_INCREF(udp) { \ 506 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 507 (udp)->udp_reader_count++; \ 508 } 509 510 #define UDP_READERS_DECREF(udp) { \ 511 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 512 (udp)->udp_reader_count--; \ 513 if ((udp)->udp_reader_count == 0) \ 514 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 515 } 516 517 #define UDP_SQUEUE_DECREF(udp) { \ 518 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 519 (udp)->udp_squeue_count--; \ 520 if ((udp)->udp_squeue_count == 0) \ 521 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 522 } 523 524 /* 525 * Notes on UDP endpoint synchronization: 526 * 527 * UDP needs exclusive operation on a per endpoint basis, when executing 528 * functions that modify the endpoint state. udp_rput_other() deals with 529 * packets with IP options, and processing these packets end up having 530 * to update the endpoint's option related state. udp_wput_other() deals 531 * with control operations from the top, e.g. connect() that needs to 532 * update the endpoint state. These could be synchronized using locks, 533 * but the current version uses squeues for this purpose. squeues may 534 * give performance improvement for certain cases such as connected UDP 535 * sockets; thus the framework allows for using squeues. 536 * 537 * The perimeter routines are described as follows: 538 * 539 * udp_enter(): 540 * Enter the UDP endpoint perimeter. 541 * 542 * udp_become_writer(): 543 * Become exclusive on the UDP endpoint. Specifies a function 544 * that will be called exclusively either immediately or later 545 * when the perimeter is available exclusively. 546 * 547 * udp_exit(): 548 * Exit the UDP perimeter. 549 * 550 * Entering UDP from the top or from the bottom must be done using 551 * udp_enter(). No lock must be held while attempting to enter the UDP 552 * perimeter. When finished, udp_exit() must be called to get out of 553 * the perimeter. 554 * 555 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 556 * multiple threads may enter a UDP endpoint concurrently. This is used 557 * for sending and/or receiving normal data. Control operations and other 558 * special cases call udp_become_writer() to become exclusive on a per 559 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 560 * by definition serializes access to the conn_t. When there are no more 561 * pending messages on the squeue for the UDP connection, the endpoint 562 * reverts to MT_HOT mode. During the interregnum when not all MT threads 563 * of an endpoint have finished, messages are queued in the UDP endpoint 564 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 565 * 566 * These modes have the following analogs: 567 * 568 * UDP_MT_HOT/udp_reader_count==0 none 569 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 570 * UDP_MT_QUEUED RW_WRITE_WANTED 571 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 572 * 573 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 574 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 575 * 576 * While in stable modes, UDP keeps track of the number of threads 577 * operating on the endpoint. The udp_reader_count variable represents 578 * the number of threads entering the endpoint as readers while it is 579 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 580 * is only a single reader, i.e. when this counter drops to 1. Likewise, 581 * udp_squeue_count represents the number of threads operating on the 582 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 583 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 584 * when this counter drops to 0. 585 * 586 * The default mode is set to UDP_MT_HOT and UDP alternates between 587 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 588 * 589 * Mode transition: 590 * ---------------------------------------------------------------- 591 * old mode Event New mode 592 * ---------------------------------------------------------------- 593 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 594 * and udp_reader_count == 1 595 * 596 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 597 * and udp_reader_count > 1 598 * 599 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 600 * 601 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 602 * internal UDP queue successfully 603 * moved to squeue AND udp_squeue_count != 0 604 * 605 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 606 * internal UDP queue successfully 607 * moved to squeue AND udp_squeue_count 608 * drops to zero 609 * 610 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 611 * ---------------------------------------------------------------- 612 */ 613 614 static queue_t * 615 UDP_WR(queue_t *q) 616 { 617 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 618 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 619 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 620 621 return (_WR(q)->q_next); 622 } 623 624 static queue_t * 625 UDP_RD(queue_t *q) 626 { 627 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 628 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 629 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 630 631 return (_RD(q)->q_next); 632 } 633 634 #ifdef DEBUG 635 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 636 #else 637 #define UDP_MODE_ASSERTIONS(udp, caller) 638 #endif 639 640 /* Invariants */ 641 #ifdef DEBUG 642 643 uint32_t udp_count[4]; 644 645 /* Context of udp_mode_assertions */ 646 #define UDP_ENTER 1 647 #define UDP_BECOME_WRITER 2 648 #define UDP_EXIT 3 649 650 static void 651 udp_mode_assertions(udp_t *udp, int caller) 652 { 653 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 654 655 switch (udp->udp_mode) { 656 case UDP_MT_HOT: 657 /* 658 * Messages have not yet been enqueued on the internal queue, 659 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 660 * by definition, there can't be any messages enqueued on the 661 * squeue. The UDP could be quiescent, so udp_reader_count 662 * could be zero at entry. 663 */ 664 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 665 udp->udp_squeue_count == 0); 666 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 667 udp_count[0]++; 668 break; 669 670 case UDP_MT_QUEUED: 671 /* 672 * The last MT thread to exit the udp perimeter empties the 673 * internal queue and then switches the UDP to 674 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 675 * mode, it means there must be at least 1 MT thread still in 676 * the perimeter and at least 1 message on the internal queue. 677 */ 678 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 679 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 680 udp_count[1]++; 681 break; 682 683 case UDP_QUEUED_SQUEUE: 684 /* 685 * The switch has happened from MT to SQUEUE. So there can't 686 * any MT threads. Messages could still pile up on the internal 687 * queue until the transition is complete and we move to 688 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 689 * since the squeue could drain any time. 690 */ 691 ASSERT(udp->udp_reader_count == 0); 692 udp_count[2]++; 693 break; 694 695 case UDP_SQUEUE: 696 /* 697 * The transition is complete. Thre can't be any messages on 698 * the internal queue. The udp could be quiescent or the squeue 699 * could drain any time, so we can't assert on nonzero 700 * udp_squeue_count during entry. Nor can we assert that 701 * udp_reader_count is zero, since, a reader thread could have 702 * directly become writer in line by calling udp_become_writer 703 * without going through the queued states. 704 */ 705 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 706 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 707 udp_count[3]++; 708 break; 709 } 710 } 711 #endif 712 713 #define _UDP_ENTER(connp, mp, proc, tag) { \ 714 udp_t *_udp = (connp)->conn_udp; \ 715 \ 716 mutex_enter(&(connp)->conn_lock); \ 717 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 718 mutex_exit(&(connp)->conn_lock); \ 719 freemsg(mp); \ 720 } else { \ 721 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 722 \ 723 switch (_udp->udp_mode) { \ 724 case UDP_MT_HOT: \ 725 /* We can execute as reader right away. */ \ 726 UDP_READERS_INCREF(_udp); \ 727 mutex_exit(&(connp)->conn_lock); \ 728 (*(proc))(connp, mp, (connp)->conn_sqp); \ 729 break; \ 730 \ 731 case UDP_SQUEUE: \ 732 /* \ 733 * We are in squeue mode, send the \ 734 * packet to the squeue \ 735 */ \ 736 _udp->udp_squeue_count++; \ 737 CONN_INC_REF_LOCKED(connp); \ 738 mutex_exit(&(connp)->conn_lock); \ 739 squeue_enter((connp)->conn_sqp, mp, proc, \ 740 connp, tag); \ 741 break; \ 742 \ 743 case UDP_MT_QUEUED: \ 744 case UDP_QUEUED_SQUEUE: \ 745 /* \ 746 * Some messages may have been enqueued \ 747 * ahead of us. Enqueue the new message \ 748 * at the tail of the internal queue to \ 749 * preserve message ordering. \ 750 */ \ 751 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 752 mutex_exit(&(connp)->conn_lock); \ 753 break; \ 754 } \ 755 } \ 756 } 757 758 static void 759 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 760 { 761 _UDP_ENTER(connp, mp, proc, tag); 762 } 763 764 static void 765 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 766 { 767 udp_t *udp; 768 769 udp = connp->conn_udp; 770 771 mutex_enter(&connp->conn_lock); 772 773 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 774 775 switch (udp->udp_mode) { 776 case UDP_MT_HOT: 777 if (udp->udp_reader_count == 1) { 778 /* 779 * We are the only MT thread. Switch to squeue mode 780 * immediately. 781 */ 782 udp->udp_mode = UDP_SQUEUE; 783 udp->udp_squeue_count = 1; 784 CONN_INC_REF_LOCKED(connp); 785 mutex_exit(&connp->conn_lock); 786 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 787 return; 788 } 789 /* FALLTHRU */ 790 791 case UDP_MT_QUEUED: 792 /* Enqueue the packet internally in UDP */ 793 udp->udp_mode = UDP_MT_QUEUED; 794 UDP_ENQUEUE_MP(udp, mp, proc, tag); 795 mutex_exit(&connp->conn_lock); 796 return; 797 798 case UDP_SQUEUE: 799 case UDP_QUEUED_SQUEUE: 800 /* 801 * We are already exclusive. i.e. we are already 802 * writer. Simply call the desired function. 803 */ 804 udp->udp_squeue_count++; 805 mutex_exit(&connp->conn_lock); 806 (*proc)(connp, mp, connp->conn_sqp); 807 return; 808 } 809 } 810 811 /* 812 * Transition from MT mode to SQUEUE mode, when the last MT thread 813 * is exiting the UDP perimeter. Move all messages from the internal 814 * udp queue to the squeue. A better way would be to move all the 815 * messages in one shot, this needs more support from the squeue framework 816 */ 817 static void 818 udp_switch_to_squeue(udp_t *udp) 819 { 820 mblk_t *mp; 821 mblk_t *mp_next; 822 sqproc_t proc; 823 uint8_t tag; 824 conn_t *connp = udp->udp_connp; 825 826 ASSERT(MUTEX_HELD(&connp->conn_lock)); 827 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 828 while (udp->udp_mphead != NULL) { 829 mp = udp->udp_mphead; 830 udp->udp_mphead = NULL; 831 udp->udp_mptail = NULL; 832 udp->udp_mpcount = 0; 833 udp->udp_mode = UDP_QUEUED_SQUEUE; 834 mutex_exit(&connp->conn_lock); 835 /* 836 * It is best not to hold any locks across the calls 837 * to squeue functions. Since we drop the lock we 838 * need to go back and check the udp_mphead once again 839 * after the squeue_fill and hence the while loop at 840 * the top of this function 841 */ 842 for (; mp != NULL; mp = mp_next) { 843 mp_next = mp->b_next; 844 proc = (sqproc_t)mp->b_prev; 845 tag = (uint8_t)((uintptr_t)mp->b_queue); 846 mp->b_next = NULL; 847 mp->b_prev = NULL; 848 mp->b_queue = NULL; 849 CONN_INC_REF(connp); 850 udp->udp_squeue_count++; 851 squeue_fill(connp->conn_sqp, mp, proc, connp, 852 tag); 853 } 854 mutex_enter(&connp->conn_lock); 855 } 856 /* 857 * udp_squeue_count of zero implies that the squeue has drained 858 * even before we arrived here (i.e. after the squeue_fill above) 859 */ 860 udp->udp_mode = (udp->udp_squeue_count != 0) ? 861 UDP_SQUEUE : UDP_MT_HOT; 862 } 863 864 #define _UDP_EXIT(connp) { \ 865 udp_t *_udp = (connp)->conn_udp; \ 866 \ 867 mutex_enter(&(connp)->conn_lock); \ 868 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 869 \ 870 switch (_udp->udp_mode) { \ 871 case UDP_MT_HOT: \ 872 UDP_READERS_DECREF(_udp); \ 873 mutex_exit(&(connp)->conn_lock); \ 874 break; \ 875 \ 876 case UDP_SQUEUE: \ 877 UDP_SQUEUE_DECREF(_udp); \ 878 if (_udp->udp_squeue_count == 0) \ 879 _udp->udp_mode = UDP_MT_HOT; \ 880 mutex_exit(&(connp)->conn_lock); \ 881 break; \ 882 \ 883 case UDP_MT_QUEUED: \ 884 /* \ 885 * If this is the last MT thread, we need to \ 886 * switch to squeue mode \ 887 */ \ 888 UDP_READERS_DECREF(_udp); \ 889 if (_udp->udp_reader_count == 0) \ 890 udp_switch_to_squeue(_udp); \ 891 mutex_exit(&(connp)->conn_lock); \ 892 break; \ 893 \ 894 case UDP_QUEUED_SQUEUE: \ 895 UDP_SQUEUE_DECREF(_udp); \ 896 /* \ 897 * Even if the udp_squeue_count drops to zero, we \ 898 * don't want to change udp_mode to UDP_MT_HOT here. \ 899 * The thread in udp_switch_to_squeue will take care \ 900 * of the transition to UDP_MT_HOT, after emptying \ 901 * any more new messages that have been enqueued in \ 902 * udp_mphead. \ 903 */ \ 904 mutex_exit(&(connp)->conn_lock); \ 905 break; \ 906 } \ 907 } 908 909 static void 910 udp_exit(conn_t *connp) 911 { 912 _UDP_EXIT(connp); 913 } 914 915 /* 916 * Return the next anonymous port in the privileged port range for 917 * bind checking. 918 * 919 * Trusted Extension (TX) notes: TX allows administrator to mark or 920 * reserve ports as Multilevel ports (MLP). MLP has special function 921 * on TX systems. Once a port is made MLP, it's not available as 922 * ordinary port. This creates "holes" in the port name space. It 923 * may be necessary to skip the "holes" find a suitable anon port. 924 */ 925 static in_port_t 926 udp_get_next_priv_port(udp_t *udp) 927 { 928 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 929 in_port_t nextport; 930 boolean_t restart = B_FALSE; 931 932 retry: 933 if (next_priv_port < udp_min_anonpriv_port || 934 next_priv_port >= IPPORT_RESERVED) { 935 next_priv_port = IPPORT_RESERVED - 1; 936 if (restart) 937 return (0); 938 restart = B_TRUE; 939 } 940 941 if (is_system_labeled() && 942 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 943 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 944 next_priv_port = nextport; 945 goto retry; 946 } 947 948 return (next_priv_port--); 949 } 950 951 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 952 /* ARGSUSED */ 953 static int 954 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 955 { 956 udp_fanout_t *udpf; 957 int i; 958 zoneid_t zoneid; 959 conn_t *connp; 960 udp_t *udp; 961 962 connp = Q_TO_CONN(q); 963 udp = connp->conn_udp; 964 965 /* Refer to comments in udp_status_report(). */ 966 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 967 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 968 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 969 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 970 return (0); 971 } 972 } 973 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 974 /* The following may work even if we cannot get a large buf. */ 975 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 976 return (0); 977 } 978 979 (void) mi_mpprintf(mp, 980 "UDP " MI_COL_HDRPAD_STR 981 /* 12345678[89ABCDEF] */ 982 " zone lport src addr dest addr port state"); 983 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 984 985 zoneid = connp->conn_zoneid; 986 987 for (i = 0; i < udp_bind_fanout_size; i++) { 988 udpf = &udp_bind_fanout[i]; 989 mutex_enter(&udpf->uf_lock); 990 991 /* Print the hash index. */ 992 udp = udpf->uf_udp; 993 if (zoneid != GLOBAL_ZONEID) { 994 /* skip to first entry in this zone; might be none */ 995 while (udp != NULL && 996 udp->udp_connp->conn_zoneid != zoneid) 997 udp = udp->udp_bind_hash; 998 } 999 if (udp != NULL) { 1000 uint_t print_len, buf_len; 1001 1002 buf_len = mp->b_cont->b_datap->db_lim - 1003 mp->b_cont->b_wptr; 1004 print_len = snprintf((char *)mp->b_cont->b_wptr, 1005 buf_len, "%d\n", i); 1006 if (print_len < buf_len) { 1007 mp->b_cont->b_wptr += print_len; 1008 } else { 1009 mp->b_cont->b_wptr += buf_len; 1010 } 1011 for (; udp != NULL; udp = udp->udp_bind_hash) { 1012 if (zoneid == GLOBAL_ZONEID || 1013 zoneid == udp->udp_connp->conn_zoneid) 1014 udp_report_item(mp->b_cont, udp); 1015 } 1016 } 1017 mutex_exit(&udpf->uf_lock); 1018 } 1019 udp_last_ndd_get_info_time = ddi_get_lbolt(); 1020 return (0); 1021 } 1022 1023 /* 1024 * Hash list removal routine for udp_t structures. 1025 */ 1026 static void 1027 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 1028 { 1029 udp_t *udpnext; 1030 kmutex_t *lockp; 1031 1032 if (udp->udp_ptpbhn == NULL) 1033 return; 1034 1035 /* 1036 * Extract the lock pointer in case there are concurrent 1037 * hash_remove's for this instance. 1038 */ 1039 ASSERT(udp->udp_port != 0); 1040 if (!caller_holds_lock) { 1041 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1042 ASSERT(lockp != NULL); 1043 mutex_enter(lockp); 1044 } 1045 if (udp->udp_ptpbhn != NULL) { 1046 udpnext = udp->udp_bind_hash; 1047 if (udpnext != NULL) { 1048 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1049 udp->udp_bind_hash = NULL; 1050 } 1051 *udp->udp_ptpbhn = udpnext; 1052 udp->udp_ptpbhn = NULL; 1053 } 1054 if (!caller_holds_lock) { 1055 mutex_exit(lockp); 1056 } 1057 } 1058 1059 static void 1060 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1061 { 1062 udp_t **udpp; 1063 udp_t *udpnext; 1064 1065 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1066 if (udp->udp_ptpbhn != NULL) { 1067 udp_bind_hash_remove(udp, B_TRUE); 1068 } 1069 udpp = &uf->uf_udp; 1070 udpnext = udpp[0]; 1071 if (udpnext != NULL) { 1072 /* 1073 * If the new udp bound to the INADDR_ANY address 1074 * and the first one in the list is not bound to 1075 * INADDR_ANY we skip all entries until we find the 1076 * first one bound to INADDR_ANY. 1077 * This makes sure that applications binding to a 1078 * specific address get preference over those binding to 1079 * INADDR_ANY. 1080 */ 1081 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1082 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1083 while ((udpnext = udpp[0]) != NULL && 1084 !V6_OR_V4_INADDR_ANY( 1085 udpnext->udp_bound_v6src)) { 1086 udpp = &(udpnext->udp_bind_hash); 1087 } 1088 if (udpnext != NULL) 1089 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1090 } else { 1091 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1092 } 1093 } 1094 udp->udp_bind_hash = udpnext; 1095 udp->udp_ptpbhn = udpp; 1096 udpp[0] = udp; 1097 } 1098 1099 /* 1100 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1101 * passed to udp_wput. 1102 * It associates a port number and local address with the stream. 1103 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1104 * protocol type (IPPROTO_UDP) placed in the message following the address. 1105 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1106 * (Called as writer.) 1107 * 1108 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1109 * without setting SO_REUSEADDR. This is needed so that they 1110 * can be viewed as two independent transport protocols. 1111 * However, anonymouns ports are allocated from the same range to avoid 1112 * duplicating the udp_g_next_port_to_try. 1113 */ 1114 static void 1115 udp_bind(queue_t *q, mblk_t *mp) 1116 { 1117 sin_t *sin; 1118 sin6_t *sin6; 1119 mblk_t *mp1; 1120 in_port_t port; /* Host byte order */ 1121 in_port_t requested_port; /* Host byte order */ 1122 struct T_bind_req *tbr; 1123 int count; 1124 in6_addr_t v6src; 1125 boolean_t bind_to_req_port_only; 1126 int loopmax; 1127 udp_fanout_t *udpf; 1128 in_port_t lport; /* Network byte order */ 1129 zoneid_t zoneid; 1130 conn_t *connp; 1131 udp_t *udp; 1132 boolean_t is_inaddr_any; 1133 mlp_type_t addrtype, mlptype; 1134 1135 connp = Q_TO_CONN(q); 1136 udp = connp->conn_udp; 1137 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1138 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1139 "udp_bind: bad req, len %u", 1140 (uint_t)(mp->b_wptr - mp->b_rptr)); 1141 udp_err_ack(q, mp, TPROTO, 0); 1142 return; 1143 } 1144 1145 if (udp->udp_state != TS_UNBND) { 1146 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1147 "udp_bind: bad state, %u", udp->udp_state); 1148 udp_err_ack(q, mp, TOUTSTATE, 0); 1149 return; 1150 } 1151 /* 1152 * Reallocate the message to make sure we have enough room for an 1153 * address and the protocol type. 1154 */ 1155 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1156 if (!mp1) { 1157 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1158 return; 1159 } 1160 1161 mp = mp1; 1162 tbr = (struct T_bind_req *)mp->b_rptr; 1163 switch (tbr->ADDR_length) { 1164 case 0: /* Request for a generic port */ 1165 tbr->ADDR_offset = sizeof (struct T_bind_req); 1166 if (udp->udp_family == AF_INET) { 1167 tbr->ADDR_length = sizeof (sin_t); 1168 sin = (sin_t *)&tbr[1]; 1169 *sin = sin_null; 1170 sin->sin_family = AF_INET; 1171 mp->b_wptr = (uchar_t *)&sin[1]; 1172 } else { 1173 ASSERT(udp->udp_family == AF_INET6); 1174 tbr->ADDR_length = sizeof (sin6_t); 1175 sin6 = (sin6_t *)&tbr[1]; 1176 *sin6 = sin6_null; 1177 sin6->sin6_family = AF_INET6; 1178 mp->b_wptr = (uchar_t *)&sin6[1]; 1179 } 1180 port = 0; 1181 break; 1182 1183 case sizeof (sin_t): /* Complete IPv4 address */ 1184 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1185 sizeof (sin_t)); 1186 if (sin == NULL || !OK_32PTR((char *)sin)) { 1187 udp_err_ack(q, mp, TSYSERR, EINVAL); 1188 return; 1189 } 1190 if (udp->udp_family != AF_INET || 1191 sin->sin_family != AF_INET) { 1192 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1193 return; 1194 } 1195 port = ntohs(sin->sin_port); 1196 break; 1197 1198 case sizeof (sin6_t): /* complete IPv6 address */ 1199 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1200 sizeof (sin6_t)); 1201 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1202 udp_err_ack(q, mp, TSYSERR, EINVAL); 1203 return; 1204 } 1205 if (udp->udp_family != AF_INET6 || 1206 sin6->sin6_family != AF_INET6) { 1207 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1208 return; 1209 } 1210 port = ntohs(sin6->sin6_port); 1211 break; 1212 1213 default: /* Invalid request */ 1214 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1215 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1216 udp_err_ack(q, mp, TBADADDR, 0); 1217 return; 1218 } 1219 1220 requested_port = port; 1221 1222 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1223 bind_to_req_port_only = B_FALSE; 1224 else /* T_BIND_REQ and requested_port != 0 */ 1225 bind_to_req_port_only = B_TRUE; 1226 1227 if (requested_port == 0) { 1228 /* 1229 * If the application passed in zero for the port number, it 1230 * doesn't care which port number we bind to. Get one in the 1231 * valid range. 1232 */ 1233 if (udp->udp_anon_priv_bind) { 1234 port = udp_get_next_priv_port(udp); 1235 } else { 1236 port = udp_update_next_port(udp, 1237 udp_g_next_port_to_try, B_TRUE); 1238 } 1239 } else { 1240 /* 1241 * If the port is in the well-known privileged range, 1242 * make sure the caller was privileged. 1243 */ 1244 int i; 1245 boolean_t priv = B_FALSE; 1246 1247 if (port < udp_smallest_nonpriv_port) { 1248 priv = B_TRUE; 1249 } else { 1250 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1251 if (port == udp_g_epriv_ports[i]) { 1252 priv = B_TRUE; 1253 break; 1254 } 1255 } 1256 } 1257 1258 if (priv) { 1259 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1260 1261 if (secpolicy_net_privaddr(cr, port) != 0) { 1262 udp_err_ack(q, mp, TACCES, 0); 1263 return; 1264 } 1265 } 1266 } 1267 1268 if (port == 0) { 1269 udp_err_ack(q, mp, TNOADDR, 0); 1270 return; 1271 } 1272 1273 /* 1274 * Copy the source address into our udp structure. This address 1275 * may still be zero; if so, IP will fill in the correct address 1276 * each time an outbound packet is passed to it. 1277 */ 1278 if (udp->udp_family == AF_INET) { 1279 ASSERT(sin != NULL); 1280 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1281 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1282 udp->udp_ip_snd_options_len; 1283 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1284 } else { 1285 ASSERT(sin6 != NULL); 1286 v6src = sin6->sin6_addr; 1287 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1288 udp->udp_ipversion = IPV4_VERSION; 1289 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1290 UDPH_SIZE + udp->udp_ip_snd_options_len; 1291 } else { 1292 udp->udp_ipversion = IPV6_VERSION; 1293 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1294 } 1295 } 1296 1297 /* 1298 * If udp_reuseaddr is not set, then we have to make sure that 1299 * the IP address and port number the application requested 1300 * (or we selected for the application) is not being used by 1301 * another stream. If another stream is already using the 1302 * requested IP address and port, the behavior depends on 1303 * "bind_to_req_port_only". If set the bind fails; otherwise we 1304 * search for any an unused port to bind to the the stream. 1305 * 1306 * As per the BSD semantics, as modified by the Deering multicast 1307 * changes, if udp_reuseaddr is set, then we allow multiple binds 1308 * to the same port independent of the local IP address. 1309 * 1310 * This is slightly different than in SunOS 4.X which did not 1311 * support IP multicast. Note that the change implemented by the 1312 * Deering multicast code effects all binds - not only binding 1313 * to IP multicast addresses. 1314 * 1315 * Note that when binding to port zero we ignore SO_REUSEADDR in 1316 * order to guarantee a unique port. 1317 */ 1318 1319 count = 0; 1320 if (udp->udp_anon_priv_bind) { 1321 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1322 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1323 } else { 1324 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1325 } 1326 1327 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1328 zoneid = connp->conn_zoneid; 1329 1330 for (;;) { 1331 udp_t *udp1; 1332 boolean_t found_exclbind = B_FALSE; 1333 1334 /* 1335 * Walk through the list of udp streams bound to 1336 * requested port with the same IP address. 1337 */ 1338 lport = htons(port); 1339 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1340 mutex_enter(&udpf->uf_lock); 1341 for (udp1 = udpf->uf_udp; udp1 != NULL; 1342 udp1 = udp1->udp_bind_hash) { 1343 if (lport != udp1->udp_port) 1344 continue; 1345 1346 /* 1347 * On a labeled system, we must treat bindings to ports 1348 * on shared IP addresses by sockets with MAC exemption 1349 * privilege as being in all zones, as there's 1350 * otherwise no way to identify the right receiver. 1351 */ 1352 if (zoneid != udp1->udp_connp->conn_zoneid && 1353 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1354 continue; 1355 1356 /* 1357 * If UDP_EXCLBIND is set for either the bound or 1358 * binding endpoint, the semantics of bind 1359 * is changed according to the following chart. 1360 * 1361 * spec = specified address (v4 or v6) 1362 * unspec = unspecified address (v4 or v6) 1363 * A = specified addresses are different for endpoints 1364 * 1365 * bound bind to allowed? 1366 * ------------------------------------- 1367 * unspec unspec no 1368 * unspec spec no 1369 * spec unspec no 1370 * spec spec yes if A 1371 * 1372 * For labeled systems, SO_MAC_EXEMPT behaves the same 1373 * as UDP_EXCLBIND, except that zoneid is ignored. 1374 */ 1375 if (udp1->udp_exclbind || udp->udp_exclbind || 1376 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1377 if (V6_OR_V4_INADDR_ANY( 1378 udp1->udp_bound_v6src) || 1379 is_inaddr_any || 1380 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1381 &v6src)) { 1382 found_exclbind = B_TRUE; 1383 break; 1384 } 1385 continue; 1386 } 1387 1388 /* 1389 * Check ipversion to allow IPv4 and IPv6 sockets to 1390 * have disjoint port number spaces. 1391 */ 1392 if (udp->udp_ipversion != udp1->udp_ipversion) { 1393 1394 /* 1395 * On the first time through the loop, if the 1396 * the user intentionally specified a 1397 * particular port number, then ignore any 1398 * bindings of the other protocol that may 1399 * conflict. This allows the user to bind IPv6 1400 * alone and get both v4 and v6, or bind both 1401 * both and get each seperately. On subsequent 1402 * times through the loop, we're checking a 1403 * port that we chose (not the user) and thus 1404 * we do not allow casual duplicate bindings. 1405 */ 1406 if (count == 0 && requested_port != 0) 1407 continue; 1408 } 1409 1410 /* 1411 * No difference depending on SO_REUSEADDR. 1412 * 1413 * If existing port is bound to a 1414 * non-wildcard IP address and 1415 * the requesting stream is bound to 1416 * a distinct different IP addresses 1417 * (non-wildcard, also), keep going. 1418 */ 1419 if (!is_inaddr_any && 1420 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1421 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1422 &v6src)) { 1423 continue; 1424 } 1425 break; 1426 } 1427 1428 if (!found_exclbind && 1429 (udp->udp_reuseaddr && requested_port != 0)) { 1430 break; 1431 } 1432 1433 if (udp1 == NULL) { 1434 /* 1435 * No other stream has this IP address 1436 * and port number. We can use it. 1437 */ 1438 break; 1439 } 1440 mutex_exit(&udpf->uf_lock); 1441 if (bind_to_req_port_only) { 1442 /* 1443 * We get here only when requested port 1444 * is bound (and only first of the for() 1445 * loop iteration). 1446 * 1447 * The semantics of this bind request 1448 * require it to fail so we return from 1449 * the routine (and exit the loop). 1450 * 1451 */ 1452 udp_err_ack(q, mp, TADDRBUSY, 0); 1453 return; 1454 } 1455 1456 if (udp->udp_anon_priv_bind) { 1457 port = udp_get_next_priv_port(udp); 1458 } else { 1459 if ((count == 0) && (requested_port != 0)) { 1460 /* 1461 * If the application wants us to find 1462 * a port, get one to start with. Set 1463 * requested_port to 0, so that we will 1464 * update udp_g_next_port_to_try below. 1465 */ 1466 port = udp_update_next_port(udp, 1467 udp_g_next_port_to_try, B_TRUE); 1468 requested_port = 0; 1469 } else { 1470 port = udp_update_next_port(udp, port + 1, 1471 B_FALSE); 1472 } 1473 } 1474 1475 if (port == 0 || ++count >= loopmax) { 1476 /* 1477 * We've tried every possible port number and 1478 * there are none available, so send an error 1479 * to the user. 1480 */ 1481 udp_err_ack(q, mp, TNOADDR, 0); 1482 return; 1483 } 1484 } 1485 1486 /* 1487 * Copy the source address into our udp structure. This address 1488 * may still be zero; if so, ip will fill in the correct address 1489 * each time an outbound packet is passed to it. 1490 * If we are binding to a broadcast or multicast address udp_rput 1491 * will clear the source address when it receives the T_BIND_ACK. 1492 */ 1493 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1494 udp->udp_port = lport; 1495 /* 1496 * Now reset the the next anonymous port if the application requested 1497 * an anonymous port, or we handed out the next anonymous port. 1498 */ 1499 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1500 udp_g_next_port_to_try = port + 1; 1501 } 1502 1503 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1504 if (udp->udp_family == AF_INET) { 1505 sin->sin_port = udp->udp_port; 1506 } else { 1507 int error; 1508 1509 sin6->sin6_port = udp->udp_port; 1510 /* Rebuild the header template */ 1511 error = udp_build_hdrs(q, udp); 1512 if (error != 0) { 1513 mutex_exit(&udpf->uf_lock); 1514 udp_err_ack(q, mp, TSYSERR, error); 1515 return; 1516 } 1517 } 1518 udp->udp_state = TS_IDLE; 1519 udp_bind_hash_insert(udpf, udp); 1520 mutex_exit(&udpf->uf_lock); 1521 1522 if (cl_inet_bind) { 1523 /* 1524 * Running in cluster mode - register bind information 1525 */ 1526 if (udp->udp_ipversion == IPV4_VERSION) { 1527 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1528 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1529 (in_port_t)udp->udp_port); 1530 } else { 1531 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1532 (uint8_t *)&(udp->udp_v6src), 1533 (in_port_t)udp->udp_port); 1534 } 1535 1536 } 1537 1538 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1539 if (is_system_labeled() && (!connp->conn_anon_port || 1540 connp->conn_anon_mlp)) { 1541 uint16_t mlpport; 1542 cred_t *cr = connp->conn_cred; 1543 zone_t *zone; 1544 1545 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1546 mlptSingle; 1547 addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); 1548 if (addrtype == mlptSingle) { 1549 udp_err_ack(q, mp, TNOADDR, 0); 1550 connp->conn_anon_port = B_FALSE; 1551 connp->conn_mlp_type = mlptSingle; 1552 return; 1553 } 1554 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1555 zone = crgetzone(cr); 1556 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1557 addrtype); 1558 if (mlptype != mlptSingle && 1559 (connp->conn_mlp_type == mlptSingle || 1560 secpolicy_net_bindmlp(cr) != 0)) { 1561 if (udp->udp_debug) { 1562 (void) strlog(UDP_MOD_ID, 0, 1, 1563 SL_ERROR|SL_TRACE, 1564 "udp_bind: no priv for multilevel port %d", 1565 mlpport); 1566 } 1567 udp_err_ack(q, mp, TACCES, 0); 1568 connp->conn_anon_port = B_FALSE; 1569 connp->conn_mlp_type = mlptSingle; 1570 return; 1571 } 1572 1573 /* 1574 * If we're specifically binding a shared IP address and the 1575 * port is MLP on shared addresses, then check to see if this 1576 * zone actually owns the MLP. Reject if not. 1577 */ 1578 if (mlptype == mlptShared && addrtype == mlptShared) { 1579 zoneid_t mlpzone; 1580 1581 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1582 htons(mlpport)); 1583 if (connp->conn_zoneid != mlpzone) { 1584 if (udp->udp_debug) { 1585 (void) strlog(UDP_MOD_ID, 0, 1, 1586 SL_ERROR|SL_TRACE, 1587 "udp_bind: attempt to bind port " 1588 "%d on shared addr in zone %d " 1589 "(should be %d)", 1590 mlpport, connp->conn_zoneid, 1591 mlpzone); 1592 } 1593 udp_err_ack(q, mp, TACCES, 0); 1594 connp->conn_anon_port = B_FALSE; 1595 connp->conn_mlp_type = mlptSingle; 1596 return; 1597 } 1598 } 1599 if (connp->conn_anon_port) { 1600 int error; 1601 1602 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1603 port, B_TRUE); 1604 if (error != 0) { 1605 if (udp->udp_debug) { 1606 (void) strlog(UDP_MOD_ID, 0, 1, 1607 SL_ERROR|SL_TRACE, 1608 "udp_bind: cannot establish anon " 1609 "MLP for port %d", port); 1610 } 1611 udp_err_ack(q, mp, TACCES, 0); 1612 connp->conn_anon_port = B_FALSE; 1613 connp->conn_mlp_type = mlptSingle; 1614 return; 1615 } 1616 } 1617 connp->conn_mlp_type = mlptype; 1618 } 1619 1620 /* Pass the protocol number in the message following the address. */ 1621 *mp->b_wptr++ = IPPROTO_UDP; 1622 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1623 /* 1624 * Append a request for an IRE if udp_v6src not 1625 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1626 */ 1627 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1628 if (!mp->b_cont) { 1629 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1630 return; 1631 } 1632 mp->b_cont->b_wptr += sizeof (ire_t); 1633 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1634 } 1635 if (udp->udp_family == AF_INET6) 1636 mp = ip_bind_v6(q, mp, connp, NULL); 1637 else 1638 mp = ip_bind_v4(q, mp, connp); 1639 1640 if (mp != NULL) 1641 udp_rput_other(_RD(q), mp); 1642 else 1643 CONN_INC_REF(connp); 1644 } 1645 1646 1647 void 1648 udp_resume_bind(conn_t *connp, mblk_t *mp) 1649 { 1650 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1651 } 1652 1653 /* 1654 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1655 */ 1656 /* ARGSUSED */ 1657 static void 1658 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1659 { 1660 conn_t *connp = arg; 1661 1662 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1663 1664 udp_rput_other(connp->conn_rq, mp); 1665 1666 CONN_OPER_PENDING_DONE(connp); 1667 udp_exit(connp); 1668 } 1669 1670 /* 1671 * This routine handles each T_CONN_REQ message passed to udp. It 1672 * associates a default destination address with the stream. 1673 * 1674 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1675 * T_BIND_REQ - specifying local and remote address/port 1676 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1677 * T_OK_ACK - for the T_CONN_REQ 1678 * T_CONN_CON - to keep the TPI user happy 1679 * 1680 * The connect completes in udp_rput. 1681 * When a T_BIND_ACK is received information is extracted from the IRE 1682 * and the two appended messages are sent to the TPI user. 1683 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1684 * it to an error ack for the appropriate primitive. 1685 */ 1686 static void 1687 udp_connect(queue_t *q, mblk_t *mp) 1688 { 1689 sin6_t *sin6; 1690 sin_t *sin; 1691 struct T_conn_req *tcr; 1692 in6_addr_t v6dst; 1693 ipaddr_t v4dst; 1694 uint16_t dstport; 1695 uint32_t flowinfo; 1696 mblk_t *mp1, *mp2; 1697 udp_fanout_t *udpf; 1698 udp_t *udp, *udp1; 1699 1700 udp = Q_TO_UDP(q); 1701 1702 tcr = (struct T_conn_req *)mp->b_rptr; 1703 1704 /* A bit of sanity checking */ 1705 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1706 udp_err_ack(q, mp, TPROTO, 0); 1707 return; 1708 } 1709 /* 1710 * This UDP must have bound to a port already before doing 1711 * a connect. 1712 */ 1713 if (udp->udp_state == TS_UNBND) { 1714 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1715 "udp_connect: bad state, %u", udp->udp_state); 1716 udp_err_ack(q, mp, TOUTSTATE, 0); 1717 return; 1718 } 1719 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1720 1721 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1722 1723 if (udp->udp_state == TS_DATA_XFER) { 1724 /* Already connected - clear out state */ 1725 mutex_enter(&udpf->uf_lock); 1726 udp->udp_v6src = udp->udp_bound_v6src; 1727 udp->udp_state = TS_IDLE; 1728 mutex_exit(&udpf->uf_lock); 1729 } 1730 1731 if (tcr->OPT_length != 0) { 1732 udp_err_ack(q, mp, TBADOPT, 0); 1733 return; 1734 } 1735 1736 /* 1737 * Determine packet type based on type of address passed in 1738 * the request should contain an IPv4 or IPv6 address. 1739 * Make sure that address family matches the type of 1740 * family of the the address passed down 1741 */ 1742 switch (tcr->DEST_length) { 1743 default: 1744 udp_err_ack(q, mp, TBADADDR, 0); 1745 return; 1746 1747 case sizeof (sin_t): 1748 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1749 sizeof (sin_t)); 1750 if (sin == NULL || !OK_32PTR((char *)sin)) { 1751 udp_err_ack(q, mp, TSYSERR, EINVAL); 1752 return; 1753 } 1754 if (udp->udp_family != AF_INET || 1755 sin->sin_family != AF_INET) { 1756 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1757 return; 1758 } 1759 v4dst = sin->sin_addr.s_addr; 1760 dstport = sin->sin_port; 1761 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1762 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1763 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1764 udp->udp_ip_snd_options_len; 1765 break; 1766 1767 case sizeof (sin6_t): 1768 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1769 sizeof (sin6_t)); 1770 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1771 udp_err_ack(q, mp, TSYSERR, EINVAL); 1772 return; 1773 } 1774 if (udp->udp_family != AF_INET6 || 1775 sin6->sin6_family != AF_INET6) { 1776 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1777 return; 1778 } 1779 v6dst = sin6->sin6_addr; 1780 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1781 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1782 udp->udp_ipversion = IPV4_VERSION; 1783 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1784 UDPH_SIZE + udp->udp_ip_snd_options_len; 1785 flowinfo = 0; 1786 } else { 1787 udp->udp_ipversion = IPV6_VERSION; 1788 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1789 flowinfo = sin6->sin6_flowinfo; 1790 } 1791 dstport = sin6->sin6_port; 1792 break; 1793 } 1794 if (dstport == 0) { 1795 udp_err_ack(q, mp, TBADADDR, 0); 1796 return; 1797 } 1798 1799 /* 1800 * Create a default IP header with no IP options. 1801 */ 1802 udp->udp_dstport = dstport; 1803 if (udp->udp_ipversion == IPV4_VERSION) { 1804 /* 1805 * Interpret a zero destination to mean loopback. 1806 * Update the T_CONN_REQ (sin/sin6) since it is used to 1807 * generate the T_CONN_CON. 1808 */ 1809 if (v4dst == INADDR_ANY) { 1810 v4dst = htonl(INADDR_LOOPBACK); 1811 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1812 if (udp->udp_family == AF_INET) { 1813 sin->sin_addr.s_addr = v4dst; 1814 } else { 1815 sin6->sin6_addr = v6dst; 1816 } 1817 } 1818 udp->udp_v6dst = v6dst; 1819 udp->udp_flowinfo = 0; 1820 1821 /* 1822 * If the destination address is multicast and 1823 * an outgoing multicast interface has been set, 1824 * use the address of that interface as our 1825 * source address if no source address has been set. 1826 */ 1827 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1828 CLASSD(v4dst) && 1829 udp->udp_multicast_if_addr != INADDR_ANY) { 1830 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1831 &udp->udp_v6src); 1832 } 1833 } else { 1834 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1835 /* 1836 * Interpret a zero destination to mean loopback. 1837 * Update the T_CONN_REQ (sin/sin6) since it is used to 1838 * generate the T_CONN_CON. 1839 */ 1840 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1841 v6dst = ipv6_loopback; 1842 sin6->sin6_addr = v6dst; 1843 } 1844 udp->udp_v6dst = v6dst; 1845 udp->udp_flowinfo = flowinfo; 1846 /* 1847 * If the destination address is multicast and 1848 * an outgoing multicast interface has been set, 1849 * then the ip bind logic will pick the correct source 1850 * address (i.e. matching the outgoing multicast interface). 1851 */ 1852 } 1853 1854 /* 1855 * Verify that the src/port/dst/port is unique for all 1856 * connections in TS_DATA_XFER 1857 */ 1858 mutex_enter(&udpf->uf_lock); 1859 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1860 if (udp1->udp_state != TS_DATA_XFER) 1861 continue; 1862 if (udp->udp_port != udp1->udp_port || 1863 udp->udp_ipversion != udp1->udp_ipversion || 1864 dstport != udp1->udp_dstport || 1865 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1866 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1867 continue; 1868 mutex_exit(&udpf->uf_lock); 1869 udp_err_ack(q, mp, TBADADDR, 0); 1870 return; 1871 } 1872 udp->udp_state = TS_DATA_XFER; 1873 mutex_exit(&udpf->uf_lock); 1874 1875 /* 1876 * Send down bind to IP to verify that there is a route 1877 * and to determine the source address. 1878 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1879 */ 1880 if (udp->udp_family == AF_INET) 1881 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1882 else 1883 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1884 if (mp1 == NULL) { 1885 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1886 bind_failed: 1887 mutex_enter(&udpf->uf_lock); 1888 udp->udp_state = TS_IDLE; 1889 mutex_exit(&udpf->uf_lock); 1890 return; 1891 } 1892 1893 /* 1894 * We also have to send a connection confirmation to 1895 * keep TLI happy. Prepare it for udp_rput. 1896 */ 1897 if (udp->udp_family == AF_INET) 1898 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1899 sizeof (*sin), NULL, 0); 1900 else 1901 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1902 sizeof (*sin6), NULL, 0); 1903 if (mp2 == NULL) { 1904 freemsg(mp1); 1905 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1906 goto bind_failed; 1907 } 1908 1909 mp = mi_tpi_ok_ack_alloc(mp); 1910 if (mp == NULL) { 1911 /* Unable to reuse the T_CONN_REQ for the ack. */ 1912 freemsg(mp2); 1913 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1914 goto bind_failed; 1915 } 1916 1917 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1918 linkb(mp1, mp); 1919 linkb(mp1, mp2); 1920 1921 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1922 if (udp->udp_family == AF_INET) 1923 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1924 else 1925 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1926 1927 if (mp1 != NULL) 1928 udp_rput_other(_RD(q), mp1); 1929 else 1930 CONN_INC_REF(udp->udp_connp); 1931 } 1932 1933 static int 1934 udp_close(queue_t *q) 1935 { 1936 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1937 udp_t *udp; 1938 queue_t *ip_rq = RD(UDP_WR(q)); 1939 1940 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1941 udp = connp->conn_udp; 1942 1943 ip_quiesce_conn(connp); 1944 /* 1945 * Disable read-side synchronous stream 1946 * interface and drain any queued data. 1947 */ 1948 udp_rcv_drain(q, udp, B_TRUE); 1949 ASSERT(!udp->udp_direct_sockfs); 1950 1951 qprocsoff(q); 1952 1953 /* restore IP module's high and low water marks to default values */ 1954 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1955 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1956 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1957 1958 ASSERT(udp->udp_rcv_cnt == 0); 1959 ASSERT(udp->udp_rcv_msgcnt == 0); 1960 ASSERT(udp->udp_rcv_list_head == NULL); 1961 ASSERT(udp->udp_rcv_list_tail == NULL); 1962 1963 udp_close_free(connp); 1964 1965 /* 1966 * Restore connp as an IP endpoint. 1967 * Locking required to prevent a race with udp_snmp_get()/ 1968 * ipcl_get_next_conn(), which selects conn_t which are 1969 * IPCL_UDP and not CONN_CONDEMNED. 1970 */ 1971 mutex_enter(&connp->conn_lock); 1972 connp->conn_flags &= ~IPCL_UDP; 1973 connp->conn_state_flags &= 1974 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1975 connp->conn_ulp_labeled = B_FALSE; 1976 mutex_exit(&connp->conn_lock); 1977 1978 return (0); 1979 } 1980 1981 /* 1982 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1983 */ 1984 void 1985 udp_quiesce_conn(conn_t *connp) 1986 { 1987 udp_t *udp = connp->conn_udp; 1988 1989 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1990 /* 1991 * Running in cluster mode - register unbind information 1992 */ 1993 if (udp->udp_ipversion == IPV4_VERSION) { 1994 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1995 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1996 (in_port_t)udp->udp_port); 1997 } else { 1998 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1999 (uint8_t *)(&(udp->udp_v6src)), 2000 (in_port_t)udp->udp_port); 2001 } 2002 } 2003 2004 udp_bind_hash_remove(udp, B_FALSE); 2005 2006 mutex_enter(&connp->conn_lock); 2007 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 2008 udp->udp_mode != UDP_MT_HOT) { 2009 cv_wait(&connp->conn_cv, &connp->conn_lock); 2010 } 2011 mutex_exit(&connp->conn_lock); 2012 } 2013 2014 void 2015 udp_close_free(conn_t *connp) 2016 { 2017 udp_t *udp = connp->conn_udp; 2018 2019 /* If there are any options associated with the stream, free them. */ 2020 if (udp->udp_ip_snd_options) { 2021 mi_free((char *)udp->udp_ip_snd_options); 2022 udp->udp_ip_snd_options = NULL; 2023 } 2024 2025 if (udp->udp_ip_rcv_options) { 2026 mi_free((char *)udp->udp_ip_rcv_options); 2027 udp->udp_ip_rcv_options = NULL; 2028 } 2029 2030 /* Free memory associated with sticky options */ 2031 if (udp->udp_sticky_hdrs_len != 0) { 2032 kmem_free(udp->udp_sticky_hdrs, 2033 udp->udp_sticky_hdrs_len); 2034 udp->udp_sticky_hdrs = NULL; 2035 udp->udp_sticky_hdrs_len = 0; 2036 } 2037 2038 ip6_pkt_free(&udp->udp_sticky_ipp); 2039 2040 udp->udp_connp = NULL; 2041 connp->conn_udp = NULL; 2042 kmem_cache_free(udp_cache, udp); 2043 } 2044 2045 /* 2046 * This routine handles each T_DISCON_REQ message passed to udp 2047 * as an indicating that UDP is no longer connected. This results 2048 * in sending a T_BIND_REQ to IP to restore the binding to just 2049 * the local address/port. 2050 * 2051 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2052 * T_BIND_REQ - specifying just the local address/port 2053 * T_OK_ACK - for the T_DISCON_REQ 2054 * 2055 * The disconnect completes in udp_rput. 2056 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2057 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2058 * it to an error ack for the appropriate primitive. 2059 */ 2060 static void 2061 udp_disconnect(queue_t *q, mblk_t *mp) 2062 { 2063 udp_t *udp = Q_TO_UDP(q); 2064 mblk_t *mp1; 2065 udp_fanout_t *udpf; 2066 2067 if (udp->udp_state != TS_DATA_XFER) { 2068 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2069 "udp_disconnect: bad state, %u", udp->udp_state); 2070 udp_err_ack(q, mp, TOUTSTATE, 0); 2071 return; 2072 } 2073 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 2074 mutex_enter(&udpf->uf_lock); 2075 udp->udp_v6src = udp->udp_bound_v6src; 2076 udp->udp_state = TS_IDLE; 2077 mutex_exit(&udpf->uf_lock); 2078 2079 /* 2080 * Send down bind to IP to remove the full binding and revert 2081 * to the local address binding. 2082 */ 2083 if (udp->udp_family == AF_INET) 2084 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2085 else 2086 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2087 if (mp1 == NULL) { 2088 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2089 return; 2090 } 2091 mp = mi_tpi_ok_ack_alloc(mp); 2092 if (mp == NULL) { 2093 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2094 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2095 return; 2096 } 2097 2098 if (udp->udp_family == AF_INET6) { 2099 int error; 2100 2101 /* Rebuild the header template */ 2102 error = udp_build_hdrs(q, udp); 2103 if (error != 0) { 2104 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2105 freemsg(mp1); 2106 return; 2107 } 2108 } 2109 mutex_enter(&udpf->uf_lock); 2110 udp->udp_discon_pending = 1; 2111 mutex_exit(&udpf->uf_lock); 2112 2113 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2114 linkb(mp1, mp); 2115 2116 if (udp->udp_family == AF_INET6) 2117 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2118 else 2119 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2120 2121 if (mp1 != NULL) 2122 udp_rput_other(_RD(q), mp1); 2123 else 2124 CONN_INC_REF(udp->udp_connp); 2125 } 2126 2127 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2128 static void 2129 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2130 { 2131 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2132 putnext(UDP_RD(q), mp); 2133 } 2134 2135 /* Shorthand to generate and send TPI error acks to our client */ 2136 static void 2137 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2138 int sys_error) 2139 { 2140 struct T_error_ack *teackp; 2141 2142 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2143 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2144 teackp = (struct T_error_ack *)mp->b_rptr; 2145 teackp->ERROR_prim = primitive; 2146 teackp->TLI_error = t_error; 2147 teackp->UNIX_error = sys_error; 2148 putnext(UDP_RD(q), mp); 2149 } 2150 } 2151 2152 /*ARGSUSED*/ 2153 static int 2154 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2155 { 2156 int i; 2157 2158 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2159 if (udp_g_epriv_ports[i] != 0) 2160 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2161 } 2162 return (0); 2163 } 2164 2165 /* ARGSUSED */ 2166 static int 2167 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2168 cred_t *cr) 2169 { 2170 long new_value; 2171 int i; 2172 2173 /* 2174 * Fail the request if the new value does not lie within the 2175 * port number limits. 2176 */ 2177 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2178 new_value <= 0 || new_value >= 65536) { 2179 return (EINVAL); 2180 } 2181 2182 /* Check if the value is already in the list */ 2183 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2184 if (new_value == udp_g_epriv_ports[i]) { 2185 return (EEXIST); 2186 } 2187 } 2188 /* Find an empty slot */ 2189 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2190 if (udp_g_epriv_ports[i] == 0) 2191 break; 2192 } 2193 if (i == udp_g_num_epriv_ports) { 2194 return (EOVERFLOW); 2195 } 2196 2197 /* Set the new value */ 2198 udp_g_epriv_ports[i] = (in_port_t)new_value; 2199 return (0); 2200 } 2201 2202 /* ARGSUSED */ 2203 static int 2204 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2205 cred_t *cr) 2206 { 2207 long new_value; 2208 int i; 2209 2210 /* 2211 * Fail the request if the new value does not lie within the 2212 * port number limits. 2213 */ 2214 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2215 new_value <= 0 || new_value >= 65536) { 2216 return (EINVAL); 2217 } 2218 2219 /* Check that the value is already in the list */ 2220 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2221 if (udp_g_epriv_ports[i] == new_value) 2222 break; 2223 } 2224 if (i == udp_g_num_epriv_ports) { 2225 return (ESRCH); 2226 } 2227 2228 /* Clear the value */ 2229 udp_g_epriv_ports[i] = 0; 2230 return (0); 2231 } 2232 2233 /* At minimum we need 4 bytes of UDP header */ 2234 #define ICMP_MIN_UDP_HDR 4 2235 2236 /* 2237 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2238 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2239 * Assumes that IP has pulled up everything up to and including the ICMP header. 2240 * An M_CTL could potentially come here from some other module (i.e. if UDP 2241 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2242 * does not have enough ICMP information , following STREAMS conventions, 2243 * we send it upstream assuming it is an M_CTL we don't understand. 2244 */ 2245 static void 2246 udp_icmp_error(queue_t *q, mblk_t *mp) 2247 { 2248 icmph_t *icmph; 2249 ipha_t *ipha; 2250 int iph_hdr_length; 2251 udpha_t *udpha; 2252 sin_t sin; 2253 sin6_t sin6; 2254 mblk_t *mp1; 2255 int error = 0; 2256 size_t mp_size = MBLKL(mp); 2257 udp_t *udp = Q_TO_UDP(q); 2258 2259 /* 2260 * Assume IP provides aligned packets - otherwise toss 2261 */ 2262 if (!OK_32PTR(mp->b_rptr)) { 2263 freemsg(mp); 2264 return; 2265 } 2266 2267 /* 2268 * Verify that we have a complete IP header and the application has 2269 * asked for errors. If not, send it upstream. 2270 */ 2271 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2272 noticmpv4: 2273 putnext(UDP_RD(q), mp); 2274 return; 2275 } 2276 2277 ipha = (ipha_t *)mp->b_rptr; 2278 /* 2279 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2280 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2281 */ 2282 switch (IPH_HDR_VERSION(ipha)) { 2283 case IPV6_VERSION: 2284 udp_icmp_error_ipv6(q, mp); 2285 return; 2286 case IPV4_VERSION: 2287 break; 2288 default: 2289 goto noticmpv4; 2290 } 2291 2292 /* Skip past the outer IP and ICMP headers */ 2293 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2294 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2295 /* 2296 * If we don't have the correct outer IP header length or if the ULP 2297 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2298 * send the packet upstream. 2299 */ 2300 if (iph_hdr_length < sizeof (ipha_t) || 2301 ipha->ipha_protocol != IPPROTO_ICMP || 2302 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2303 goto noticmpv4; 2304 } 2305 ipha = (ipha_t *)&icmph[1]; 2306 2307 /* Skip past the inner IP and find the ULP header */ 2308 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2309 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2310 /* 2311 * If we don't have the correct inner IP header length or if the ULP 2312 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2313 * bytes of UDP header, send it upstream. 2314 */ 2315 if (iph_hdr_length < sizeof (ipha_t) || 2316 ipha->ipha_protocol != IPPROTO_UDP || 2317 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2318 goto noticmpv4; 2319 } 2320 2321 switch (icmph->icmph_type) { 2322 case ICMP_DEST_UNREACHABLE: 2323 switch (icmph->icmph_code) { 2324 case ICMP_FRAGMENTATION_NEEDED: 2325 /* 2326 * IP has already adjusted the path MTU. 2327 * XXX Somehow pass MTU indication to application? 2328 */ 2329 break; 2330 case ICMP_PORT_UNREACHABLE: 2331 case ICMP_PROTOCOL_UNREACHABLE: 2332 error = ECONNREFUSED; 2333 break; 2334 default: 2335 /* Transient errors */ 2336 break; 2337 } 2338 break; 2339 default: 2340 /* Transient errors */ 2341 break; 2342 } 2343 if (error == 0) { 2344 freemsg(mp); 2345 return; 2346 } 2347 2348 switch (udp->udp_family) { 2349 case AF_INET: 2350 sin = sin_null; 2351 sin.sin_family = AF_INET; 2352 sin.sin_addr.s_addr = ipha->ipha_dst; 2353 sin.sin_port = udpha->uha_dst_port; 2354 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2355 error); 2356 break; 2357 case AF_INET6: 2358 sin6 = sin6_null; 2359 sin6.sin6_family = AF_INET6; 2360 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2361 sin6.sin6_port = udpha->uha_dst_port; 2362 2363 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2364 NULL, 0, error); 2365 break; 2366 } 2367 if (mp1) 2368 putnext(UDP_RD(q), mp1); 2369 freemsg(mp); 2370 } 2371 2372 /* 2373 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2374 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2375 * Assumes that IP has pulled up all the extension headers as well as the 2376 * ICMPv6 header. 2377 * An M_CTL could potentially come here from some other module (i.e. if UDP 2378 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2379 * does not have enough ICMP information , following STREAMS conventions, 2380 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2381 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2382 * field (when cast to ipha_t in udp_icmp_error). 2383 */ 2384 static void 2385 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2386 { 2387 icmp6_t *icmp6; 2388 ip6_t *ip6h, *outer_ip6h; 2389 uint16_t hdr_length; 2390 uint8_t *nexthdrp; 2391 udpha_t *udpha; 2392 sin6_t sin6; 2393 mblk_t *mp1; 2394 int error = 0; 2395 size_t mp_size = MBLKL(mp); 2396 udp_t *udp = Q_TO_UDP(q); 2397 2398 /* 2399 * Verify that we have a complete IP header. If not, send it upstream. 2400 */ 2401 if (mp_size < sizeof (ip6_t)) { 2402 noticmpv6: 2403 putnext(UDP_RD(q), mp); 2404 return; 2405 } 2406 2407 outer_ip6h = (ip6_t *)mp->b_rptr; 2408 /* 2409 * Verify this is an ICMPV6 packet, else send it upstream 2410 */ 2411 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2412 hdr_length = IPV6_HDR_LEN; 2413 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2414 &nexthdrp) || 2415 *nexthdrp != IPPROTO_ICMPV6) { 2416 goto noticmpv6; 2417 } 2418 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2419 ip6h = (ip6_t *)&icmp6[1]; 2420 /* 2421 * Verify we have a complete ICMP and inner IP header. 2422 */ 2423 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2424 goto noticmpv6; 2425 2426 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2427 goto noticmpv6; 2428 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2429 /* 2430 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2431 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2432 * packet upstream. 2433 */ 2434 if ((*nexthdrp != IPPROTO_UDP) || 2435 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2436 goto noticmpv6; 2437 } 2438 2439 switch (icmp6->icmp6_type) { 2440 case ICMP6_DST_UNREACH: 2441 switch (icmp6->icmp6_code) { 2442 case ICMP6_DST_UNREACH_NOPORT: 2443 error = ECONNREFUSED; 2444 break; 2445 case ICMP6_DST_UNREACH_ADMIN: 2446 case ICMP6_DST_UNREACH_NOROUTE: 2447 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2448 case ICMP6_DST_UNREACH_ADDR: 2449 /* Transient errors */ 2450 break; 2451 default: 2452 break; 2453 } 2454 break; 2455 case ICMP6_PACKET_TOO_BIG: { 2456 struct T_unitdata_ind *tudi; 2457 struct T_opthdr *toh; 2458 size_t udi_size; 2459 mblk_t *newmp; 2460 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2461 sizeof (struct ip6_mtuinfo); 2462 sin6_t *sin6; 2463 struct ip6_mtuinfo *mtuinfo; 2464 2465 /* 2466 * If the application has requested to receive path mtu 2467 * information, send up an empty message containing an 2468 * IPV6_PATHMTU ancillary data item. 2469 */ 2470 if (!udp->udp_ipv6_recvpathmtu) 2471 break; 2472 2473 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2474 opt_length; 2475 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2476 BUMP_MIB(&udp_mib, udpInErrors); 2477 break; 2478 } 2479 2480 /* 2481 * newmp->b_cont is left to NULL on purpose. This is an 2482 * empty message containing only ancillary data. 2483 */ 2484 newmp->b_datap->db_type = M_PROTO; 2485 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2486 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2487 tudi->PRIM_type = T_UNITDATA_IND; 2488 tudi->SRC_length = sizeof (sin6_t); 2489 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2490 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2491 tudi->OPT_length = opt_length; 2492 2493 sin6 = (sin6_t *)&tudi[1]; 2494 bzero(sin6, sizeof (sin6_t)); 2495 sin6->sin6_family = AF_INET6; 2496 sin6->sin6_addr = udp->udp_v6dst; 2497 2498 toh = (struct T_opthdr *)&sin6[1]; 2499 toh->level = IPPROTO_IPV6; 2500 toh->name = IPV6_PATHMTU; 2501 toh->len = opt_length; 2502 toh->status = 0; 2503 2504 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2505 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2506 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2507 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2508 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2509 /* 2510 * We've consumed everything we need from the original 2511 * message. Free it, then send our empty message. 2512 */ 2513 freemsg(mp); 2514 putnext(UDP_RD(q), newmp); 2515 return; 2516 } 2517 case ICMP6_TIME_EXCEEDED: 2518 /* Transient errors */ 2519 break; 2520 case ICMP6_PARAM_PROB: 2521 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2522 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2523 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2524 (uchar_t *)nexthdrp) { 2525 error = ECONNREFUSED; 2526 break; 2527 } 2528 break; 2529 } 2530 if (error == 0) { 2531 freemsg(mp); 2532 return; 2533 } 2534 2535 sin6 = sin6_null; 2536 sin6.sin6_family = AF_INET6; 2537 sin6.sin6_addr = ip6h->ip6_dst; 2538 sin6.sin6_port = udpha->uha_dst_port; 2539 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2540 2541 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2542 error); 2543 if (mp1) 2544 putnext(UDP_RD(q), mp1); 2545 freemsg(mp); 2546 } 2547 2548 /* 2549 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2550 * The local address is filled in if endpoint is bound. The remote address 2551 * is filled in if remote address has been precified ("connected endpoint") 2552 * (The concept of connected CLTS sockets is alien to published TPI 2553 * but we support it anyway). 2554 */ 2555 static void 2556 udp_addr_req(queue_t *q, mblk_t *mp) 2557 { 2558 sin_t *sin; 2559 sin6_t *sin6; 2560 mblk_t *ackmp; 2561 struct T_addr_ack *taa; 2562 udp_t *udp = Q_TO_UDP(q); 2563 2564 /* Make it large enough for worst case */ 2565 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2566 2 * sizeof (sin6_t), 1); 2567 if (ackmp == NULL) { 2568 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2569 return; 2570 } 2571 taa = (struct T_addr_ack *)ackmp->b_rptr; 2572 2573 bzero(taa, sizeof (struct T_addr_ack)); 2574 ackmp->b_wptr = (uchar_t *)&taa[1]; 2575 2576 taa->PRIM_type = T_ADDR_ACK; 2577 ackmp->b_datap->db_type = M_PCPROTO; 2578 /* 2579 * Note: Following code assumes 32 bit alignment of basic 2580 * data structures like sin_t and struct T_addr_ack. 2581 */ 2582 if (udp->udp_state != TS_UNBND) { 2583 /* 2584 * Fill in local address first 2585 */ 2586 taa->LOCADDR_offset = sizeof (*taa); 2587 if (udp->udp_family == AF_INET) { 2588 taa->LOCADDR_length = sizeof (sin_t); 2589 sin = (sin_t *)&taa[1]; 2590 /* Fill zeroes and then initialize non-zero fields */ 2591 *sin = sin_null; 2592 sin->sin_family = AF_INET; 2593 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2594 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2595 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2596 sin->sin_addr.s_addr); 2597 } else { 2598 /* 2599 * INADDR_ANY 2600 * udp_v6src is not set, we might be bound to 2601 * broadcast/multicast. Use udp_bound_v6src as 2602 * local address instead (that could 2603 * also still be INADDR_ANY) 2604 */ 2605 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2606 sin->sin_addr.s_addr); 2607 } 2608 sin->sin_port = udp->udp_port; 2609 ackmp->b_wptr = (uchar_t *)&sin[1]; 2610 if (udp->udp_state == TS_DATA_XFER) { 2611 /* 2612 * connected, fill remote address too 2613 */ 2614 taa->REMADDR_length = sizeof (sin_t); 2615 /* assumed 32-bit alignment */ 2616 taa->REMADDR_offset = taa->LOCADDR_offset + 2617 taa->LOCADDR_length; 2618 2619 sin = (sin_t *)(ackmp->b_rptr + 2620 taa->REMADDR_offset); 2621 /* initialize */ 2622 *sin = sin_null; 2623 sin->sin_family = AF_INET; 2624 sin->sin_addr.s_addr = 2625 V4_PART_OF_V6(udp->udp_v6dst); 2626 sin->sin_port = udp->udp_dstport; 2627 ackmp->b_wptr = (uchar_t *)&sin[1]; 2628 } 2629 } else { 2630 taa->LOCADDR_length = sizeof (sin6_t); 2631 sin6 = (sin6_t *)&taa[1]; 2632 /* Fill zeroes and then initialize non-zero fields */ 2633 *sin6 = sin6_null; 2634 sin6->sin6_family = AF_INET6; 2635 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2636 sin6->sin6_addr = udp->udp_v6src; 2637 } else { 2638 /* 2639 * UNSPECIFIED 2640 * udp_v6src is not set, we might be bound to 2641 * broadcast/multicast. Use udp_bound_v6src as 2642 * local address instead (that could 2643 * also still be UNSPECIFIED) 2644 */ 2645 sin6->sin6_addr = 2646 udp->udp_bound_v6src; 2647 } 2648 sin6->sin6_port = udp->udp_port; 2649 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2650 if (udp->udp_state == TS_DATA_XFER) { 2651 /* 2652 * connected, fill remote address too 2653 */ 2654 taa->REMADDR_length = sizeof (sin6_t); 2655 /* assumed 32-bit alignment */ 2656 taa->REMADDR_offset = taa->LOCADDR_offset + 2657 taa->LOCADDR_length; 2658 2659 sin6 = (sin6_t *)(ackmp->b_rptr + 2660 taa->REMADDR_offset); 2661 /* initialize */ 2662 *sin6 = sin6_null; 2663 sin6->sin6_family = AF_INET6; 2664 sin6->sin6_addr = udp->udp_v6dst; 2665 sin6->sin6_port = udp->udp_dstport; 2666 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2667 } 2668 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2669 } 2670 } 2671 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2672 putnext(UDP_RD(q), ackmp); 2673 } 2674 2675 static void 2676 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2677 { 2678 if (udp->udp_family == AF_INET) { 2679 *tap = udp_g_t_info_ack_ipv4; 2680 } else { 2681 *tap = udp_g_t_info_ack_ipv6; 2682 } 2683 tap->CURRENT_state = udp->udp_state; 2684 tap->OPT_size = udp_max_optsize; 2685 } 2686 2687 /* 2688 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2689 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2690 * udp_g_t_info_ack. The current state of the stream is copied from 2691 * udp_state. 2692 */ 2693 static void 2694 udp_capability_req(queue_t *q, mblk_t *mp) 2695 { 2696 t_uscalar_t cap_bits1; 2697 struct T_capability_ack *tcap; 2698 udp_t *udp = Q_TO_UDP(q); 2699 2700 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2701 2702 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2703 mp->b_datap->db_type, T_CAPABILITY_ACK); 2704 if (!mp) 2705 return; 2706 2707 tcap = (struct T_capability_ack *)mp->b_rptr; 2708 tcap->CAP_bits1 = 0; 2709 2710 if (cap_bits1 & TC1_INFO) { 2711 udp_copy_info(&tcap->INFO_ack, udp); 2712 tcap->CAP_bits1 |= TC1_INFO; 2713 } 2714 2715 putnext(UDP_RD(q), mp); 2716 } 2717 2718 /* 2719 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2720 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2721 * The current state of the stream is copied from udp_state. 2722 */ 2723 static void 2724 udp_info_req(queue_t *q, mblk_t *mp) 2725 { 2726 udp_t *udp = Q_TO_UDP(q); 2727 2728 /* Create a T_INFO_ACK message. */ 2729 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2730 T_INFO_ACK); 2731 if (!mp) 2732 return; 2733 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2734 putnext(UDP_RD(q), mp); 2735 } 2736 2737 /* 2738 * IP recognizes seven kinds of bind requests: 2739 * 2740 * - A zero-length address binds only to the protocol number. 2741 * 2742 * - A 4-byte address is treated as a request to 2743 * validate that the address is a valid local IPv4 2744 * address, appropriate for an application to bind to. 2745 * IP does the verification, but does not make any note 2746 * of the address at this time. 2747 * 2748 * - A 16-byte address contains is treated as a request 2749 * to validate a local IPv6 address, as the 4-byte 2750 * address case above. 2751 * 2752 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2753 * use it for the inbound fanout of packets. 2754 * 2755 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2756 * use it for the inbound fanout of packets. 2757 * 2758 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2759 * information consisting of local and remote addresses 2760 * and ports. In this case, the addresses are both 2761 * validated as appropriate for this operation, and, if 2762 * so, the information is retained for use in the 2763 * inbound fanout. 2764 * 2765 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2766 * fanout information, like the 12-byte case above. 2767 * 2768 * IP will also fill in the IRE request mblk with information 2769 * regarding our peer. In all cases, we notify IP of our protocol 2770 * type by appending a single protocol byte to the bind request. 2771 */ 2772 static mblk_t * 2773 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2774 { 2775 char *cp; 2776 mblk_t *mp; 2777 struct T_bind_req *tbr; 2778 ipa_conn_t *ac; 2779 ipa6_conn_t *ac6; 2780 sin_t *sin; 2781 sin6_t *sin6; 2782 2783 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2784 2785 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2786 if (!mp) 2787 return (mp); 2788 mp->b_datap->db_type = M_PROTO; 2789 tbr = (struct T_bind_req *)mp->b_rptr; 2790 tbr->PRIM_type = bind_prim; 2791 tbr->ADDR_offset = sizeof (*tbr); 2792 tbr->CONIND_number = 0; 2793 tbr->ADDR_length = addr_length; 2794 cp = (char *)&tbr[1]; 2795 switch (addr_length) { 2796 case sizeof (ipa_conn_t): 2797 ASSERT(udp->udp_family == AF_INET); 2798 /* Append a request for an IRE */ 2799 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2800 if (!mp->b_cont) { 2801 freemsg(mp); 2802 return (NULL); 2803 } 2804 mp->b_cont->b_wptr += sizeof (ire_t); 2805 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2806 2807 /* cp known to be 32 bit aligned */ 2808 ac = (ipa_conn_t *)cp; 2809 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2810 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2811 ac->ac_fport = udp->udp_dstport; 2812 ac->ac_lport = udp->udp_port; 2813 break; 2814 2815 case sizeof (ipa6_conn_t): 2816 ASSERT(udp->udp_family == AF_INET6); 2817 /* Append a request for an IRE */ 2818 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2819 if (!mp->b_cont) { 2820 freemsg(mp); 2821 return (NULL); 2822 } 2823 mp->b_cont->b_wptr += sizeof (ire_t); 2824 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2825 2826 /* cp known to be 32 bit aligned */ 2827 ac6 = (ipa6_conn_t *)cp; 2828 ac6->ac6_laddr = udp->udp_v6src; 2829 ac6->ac6_faddr = udp->udp_v6dst; 2830 ac6->ac6_fport = udp->udp_dstport; 2831 ac6->ac6_lport = udp->udp_port; 2832 break; 2833 2834 case sizeof (sin_t): 2835 ASSERT(udp->udp_family == AF_INET); 2836 /* Append a request for an IRE */ 2837 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2838 if (!mp->b_cont) { 2839 freemsg(mp); 2840 return (NULL); 2841 } 2842 mp->b_cont->b_wptr += sizeof (ire_t); 2843 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2844 2845 sin = (sin_t *)cp; 2846 *sin = sin_null; 2847 sin->sin_family = AF_INET; 2848 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2849 sin->sin_port = udp->udp_port; 2850 break; 2851 2852 case sizeof (sin6_t): 2853 ASSERT(udp->udp_family == AF_INET6); 2854 /* Append a request for an IRE */ 2855 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2856 if (!mp->b_cont) { 2857 freemsg(mp); 2858 return (NULL); 2859 } 2860 mp->b_cont->b_wptr += sizeof (ire_t); 2861 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2862 2863 sin6 = (sin6_t *)cp; 2864 *sin6 = sin6_null; 2865 sin6->sin6_family = AF_INET6; 2866 sin6->sin6_addr = udp->udp_bound_v6src; 2867 sin6->sin6_port = udp->udp_port; 2868 break; 2869 } 2870 /* Add protocol number to end */ 2871 cp[addr_length] = (char)IPPROTO_UDP; 2872 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2873 return (mp); 2874 } 2875 2876 /* 2877 * This is the open routine for udp. It allocates a udp_t structure for 2878 * the stream and, on the first open of the module, creates an ND table. 2879 */ 2880 /* ARGSUSED */ 2881 static int 2882 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2883 { 2884 int err; 2885 udp_t *udp; 2886 conn_t *connp; 2887 zoneid_t zoneid = getzoneid(); 2888 queue_t *ip_wq; 2889 char *name; 2890 2891 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2892 2893 /* If the stream is already open, return immediately. */ 2894 if (q->q_ptr != NULL) 2895 return (0); 2896 2897 /* If this is not a push of udp as a module, fail. */ 2898 if (sflag != MODOPEN) 2899 return (EINVAL); 2900 2901 q->q_hiwat = udp_recv_hiwat; 2902 WR(q)->q_hiwat = udp_xmit_hiwat; 2903 WR(q)->q_lowat = udp_xmit_lowat; 2904 2905 /* Insert ourselves in the stream since we're about to walk q_next */ 2906 qprocson(q); 2907 2908 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2909 bzero(udp, sizeof (*udp)); 2910 2911 /* 2912 * UDP is supported only as a module and it has to be pushed directly 2913 * above the device instance of IP. If UDP is pushed anywhere else 2914 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2915 * sake of MIB browsers and fail everything else. 2916 */ 2917 ip_wq = WR(q)->q_next; 2918 if (ip_wq->q_next != NULL || 2919 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2920 strcmp(name, IP_MOD_NAME) != 0 || 2921 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2922 /* Support just SNMP for MIB browsers */ 2923 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2924 connp->conn_rq = q; 2925 connp->conn_wq = WR(q); 2926 connp->conn_flags |= IPCL_UDPMOD; 2927 connp->conn_cred = credp; 2928 connp->conn_zoneid = zoneid; 2929 connp->conn_udp = udp; 2930 udp->udp_connp = connp; 2931 q->q_ptr = WR(q)->q_ptr = connp; 2932 crhold(credp); 2933 q->q_qinfo = &udp_snmp_rinit; 2934 WR(q)->q_qinfo = &udp_snmp_winit; 2935 return (0); 2936 } 2937 2938 /* 2939 * Initialize the udp_t structure for this stream. 2940 */ 2941 q = RD(ip_wq); 2942 connp = Q_TO_CONN(q); 2943 mutex_enter(&connp->conn_lock); 2944 connp->conn_proto = IPPROTO_UDP; 2945 connp->conn_flags |= IPCL_UDP; 2946 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2947 connp->conn_udp = udp; 2948 2949 /* Set the initial state of the stream and the privilege status. */ 2950 udp->udp_connp = connp; 2951 udp->udp_state = TS_UNBND; 2952 udp->udp_mode = UDP_MT_HOT; 2953 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2954 udp->udp_family = AF_INET6; 2955 udp->udp_ipversion = IPV6_VERSION; 2956 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2957 udp->udp_ttl = udp_ipv6_hoplimit; 2958 connp->conn_af_isv6 = B_TRUE; 2959 connp->conn_flags |= IPCL_ISV6; 2960 } else { 2961 udp->udp_family = AF_INET; 2962 udp->udp_ipversion = IPV4_VERSION; 2963 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2964 udp->udp_ttl = udp_ipv4_ttl; 2965 connp->conn_af_isv6 = B_FALSE; 2966 connp->conn_flags &= ~IPCL_ISV6; 2967 } 2968 2969 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2970 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2971 connp->conn_zoneid = zoneid; 2972 2973 /* 2974 * If the caller has the process-wide flag set, then default to MAC 2975 * exempt mode. This allows read-down to unlabeled hosts. 2976 */ 2977 if (getpflags(NET_MAC_AWARE, credp) != 0) 2978 udp->udp_mac_exempt = B_TRUE; 2979 2980 if (connp->conn_flags & IPCL_SOCKET) { 2981 udp->udp_issocket = B_TRUE; 2982 udp->udp_direct_sockfs = B_TRUE; 2983 } 2984 2985 connp->conn_ulp_labeled = is_system_labeled(); 2986 2987 mutex_exit(&connp->conn_lock); 2988 2989 /* 2990 * The transmit hiwat/lowat is only looked at on IP's queue. 2991 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2992 * getsockopts. 2993 */ 2994 q->q_hiwat = udp_recv_hiwat; 2995 WR(q)->q_hiwat = udp_xmit_hiwat; 2996 WR(q)->q_lowat = udp_xmit_lowat; 2997 2998 if (udp->udp_family == AF_INET6) { 2999 /* Build initial header template for transmit */ 3000 if ((err = udp_build_hdrs(q, udp)) != 0) { 3001 error: 3002 qprocsoff(UDP_RD(q)); 3003 udp->udp_connp = NULL; 3004 connp->conn_udp = NULL; 3005 kmem_cache_free(udp_cache, udp); 3006 return (err); 3007 } 3008 } 3009 3010 /* Set the Stream head write offset and high watermark. */ 3011 (void) mi_set_sth_wroff(UDP_RD(q), 3012 udp->udp_max_hdr_len + udp_wroff_extra); 3013 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 3014 3015 WR(UDP_RD(q))->q_qinfo = &udp_winit; 3016 3017 return (0); 3018 } 3019 3020 /* 3021 * Which UDP options OK to set through T_UNITDATA_REQ... 3022 */ 3023 /* ARGSUSED */ 3024 static boolean_t 3025 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 3026 { 3027 return (B_TRUE); 3028 } 3029 3030 /* 3031 * This routine gets default values of certain options whose default 3032 * values are maintained by protcol specific code 3033 */ 3034 /* ARGSUSED */ 3035 int 3036 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3037 { 3038 int *i1 = (int *)ptr; 3039 3040 switch (level) { 3041 case IPPROTO_IP: 3042 switch (name) { 3043 case IP_MULTICAST_TTL: 3044 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3045 return (sizeof (uchar_t)); 3046 case IP_MULTICAST_LOOP: 3047 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3048 return (sizeof (uchar_t)); 3049 } 3050 break; 3051 case IPPROTO_IPV6: 3052 switch (name) { 3053 case IPV6_MULTICAST_HOPS: 3054 *i1 = IP_DEFAULT_MULTICAST_TTL; 3055 return (sizeof (int)); 3056 case IPV6_MULTICAST_LOOP: 3057 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3058 return (sizeof (int)); 3059 case IPV6_UNICAST_HOPS: 3060 *i1 = udp_ipv6_hoplimit; 3061 return (sizeof (int)); 3062 } 3063 break; 3064 } 3065 return (-1); 3066 } 3067 3068 /* 3069 * This routine retrieves the current status of socket options 3070 * and expects the caller to pass in the queue pointer of the 3071 * upper instance. It returns the size of the option retrieved. 3072 */ 3073 int 3074 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3075 { 3076 int *i1 = (int *)ptr; 3077 conn_t *connp; 3078 udp_t *udp; 3079 ip6_pkt_t *ipp; 3080 int len; 3081 3082 q = UDP_WR(q); 3083 connp = Q_TO_CONN(q); 3084 udp = connp->conn_udp; 3085 ipp = &udp->udp_sticky_ipp; 3086 3087 switch (level) { 3088 case SOL_SOCKET: 3089 switch (name) { 3090 case SO_DEBUG: 3091 *i1 = udp->udp_debug; 3092 break; /* goto sizeof (int) option return */ 3093 case SO_REUSEADDR: 3094 *i1 = udp->udp_reuseaddr; 3095 break; /* goto sizeof (int) option return */ 3096 case SO_TYPE: 3097 *i1 = SOCK_DGRAM; 3098 break; /* goto sizeof (int) option return */ 3099 3100 /* 3101 * The following three items are available here, 3102 * but are only meaningful to IP. 3103 */ 3104 case SO_DONTROUTE: 3105 *i1 = udp->udp_dontroute; 3106 break; /* goto sizeof (int) option return */ 3107 case SO_USELOOPBACK: 3108 *i1 = udp->udp_useloopback; 3109 break; /* goto sizeof (int) option return */ 3110 case SO_BROADCAST: 3111 *i1 = udp->udp_broadcast; 3112 break; /* goto sizeof (int) option return */ 3113 3114 case SO_SNDBUF: 3115 *i1 = q->q_hiwat; 3116 break; /* goto sizeof (int) option return */ 3117 case SO_RCVBUF: 3118 *i1 = RD(q)->q_hiwat; 3119 break; /* goto sizeof (int) option return */ 3120 case SO_DGRAM_ERRIND: 3121 *i1 = udp->udp_dgram_errind; 3122 break; /* goto sizeof (int) option return */ 3123 case SO_RECVUCRED: 3124 *i1 = udp->udp_recvucred; 3125 break; /* goto sizeof (int) option return */ 3126 case SO_TIMESTAMP: 3127 *i1 = udp->udp_timestamp; 3128 break; /* goto sizeof (int) option return */ 3129 case SO_ANON_MLP: 3130 *i1 = udp->udp_anon_mlp; 3131 break; /* goto sizeof (int) option return */ 3132 case SO_MAC_EXEMPT: 3133 *i1 = udp->udp_mac_exempt; 3134 break; /* goto sizeof (int) option return */ 3135 case SO_ALLZONES: 3136 *i1 = connp->conn_allzones; 3137 break; /* goto sizeof (int) option return */ 3138 case SO_EXCLBIND: 3139 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3140 break; 3141 default: 3142 return (-1); 3143 } 3144 break; 3145 case IPPROTO_IP: 3146 if (udp->udp_family != AF_INET) 3147 return (-1); 3148 switch (name) { 3149 case IP_OPTIONS: 3150 case T_IP_OPTIONS: 3151 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3152 if (len > 0) { 3153 bcopy(udp->udp_ip_rcv_options + 3154 udp->udp_label_len, ptr, len); 3155 } 3156 return (len); 3157 case IP_TOS: 3158 case T_IP_TOS: 3159 *i1 = (int)udp->udp_type_of_service; 3160 break; /* goto sizeof (int) option return */ 3161 case IP_TTL: 3162 *i1 = (int)udp->udp_ttl; 3163 break; /* goto sizeof (int) option return */ 3164 case IP_NEXTHOP: 3165 /* Handled at IP level */ 3166 return (-EINVAL); 3167 case IP_MULTICAST_IF: 3168 /* 0 address if not set */ 3169 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3170 return (sizeof (ipaddr_t)); 3171 case IP_MULTICAST_TTL: 3172 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3173 return (sizeof (uchar_t)); 3174 case IP_MULTICAST_LOOP: 3175 *ptr = connp->conn_multicast_loop; 3176 return (sizeof (uint8_t)); 3177 case IP_RECVOPTS: 3178 *i1 = udp->udp_recvopts; 3179 break; /* goto sizeof (int) option return */ 3180 case IP_RECVDSTADDR: 3181 *i1 = udp->udp_recvdstaddr; 3182 break; /* goto sizeof (int) option return */ 3183 case IP_RECVIF: 3184 *i1 = udp->udp_recvif; 3185 break; /* goto sizeof (int) option return */ 3186 case IP_RECVSLLA: 3187 *i1 = udp->udp_recvslla; 3188 break; /* goto sizeof (int) option return */ 3189 case IP_RECVTTL: 3190 *i1 = udp->udp_recvttl; 3191 break; /* goto sizeof (int) option return */ 3192 case IP_ADD_MEMBERSHIP: 3193 case IP_DROP_MEMBERSHIP: 3194 case IP_BLOCK_SOURCE: 3195 case IP_UNBLOCK_SOURCE: 3196 case IP_ADD_SOURCE_MEMBERSHIP: 3197 case IP_DROP_SOURCE_MEMBERSHIP: 3198 case MCAST_JOIN_GROUP: 3199 case MCAST_LEAVE_GROUP: 3200 case MCAST_BLOCK_SOURCE: 3201 case MCAST_UNBLOCK_SOURCE: 3202 case MCAST_JOIN_SOURCE_GROUP: 3203 case MCAST_LEAVE_SOURCE_GROUP: 3204 case IP_DONTFAILOVER_IF: 3205 /* cannot "get" the value for these */ 3206 return (-1); 3207 case IP_BOUND_IF: 3208 /* Zero if not set */ 3209 *i1 = udp->udp_bound_if; 3210 break; /* goto sizeof (int) option return */ 3211 case IP_UNSPEC_SRC: 3212 *i1 = udp->udp_unspec_source; 3213 break; /* goto sizeof (int) option return */ 3214 case IP_XMIT_IF: 3215 *i1 = udp->udp_xmit_if; 3216 break; /* goto sizeof (int) option return */ 3217 default: 3218 return (-1); 3219 } 3220 break; 3221 case IPPROTO_IPV6: 3222 if (udp->udp_family != AF_INET6) 3223 return (-1); 3224 switch (name) { 3225 case IPV6_UNICAST_HOPS: 3226 *i1 = (unsigned int)udp->udp_ttl; 3227 break; /* goto sizeof (int) option return */ 3228 case IPV6_MULTICAST_IF: 3229 /* 0 index if not set */ 3230 *i1 = udp->udp_multicast_if_index; 3231 break; /* goto sizeof (int) option return */ 3232 case IPV6_MULTICAST_HOPS: 3233 *i1 = udp->udp_multicast_ttl; 3234 break; /* goto sizeof (int) option return */ 3235 case IPV6_MULTICAST_LOOP: 3236 *i1 = connp->conn_multicast_loop; 3237 break; /* goto sizeof (int) option return */ 3238 case IPV6_JOIN_GROUP: 3239 case IPV6_LEAVE_GROUP: 3240 case MCAST_JOIN_GROUP: 3241 case MCAST_LEAVE_GROUP: 3242 case MCAST_BLOCK_SOURCE: 3243 case MCAST_UNBLOCK_SOURCE: 3244 case MCAST_JOIN_SOURCE_GROUP: 3245 case MCAST_LEAVE_SOURCE_GROUP: 3246 /* cannot "get" the value for these */ 3247 return (-1); 3248 case IPV6_BOUND_IF: 3249 /* Zero if not set */ 3250 *i1 = udp->udp_bound_if; 3251 break; /* goto sizeof (int) option return */ 3252 case IPV6_UNSPEC_SRC: 3253 *i1 = udp->udp_unspec_source; 3254 break; /* goto sizeof (int) option return */ 3255 case IPV6_RECVPKTINFO: 3256 *i1 = udp->udp_ipv6_recvpktinfo; 3257 break; /* goto sizeof (int) option return */ 3258 case IPV6_RECVTCLASS: 3259 *i1 = udp->udp_ipv6_recvtclass; 3260 break; /* goto sizeof (int) option return */ 3261 case IPV6_RECVPATHMTU: 3262 *i1 = udp->udp_ipv6_recvpathmtu; 3263 break; /* goto sizeof (int) option return */ 3264 case IPV6_RECVHOPLIMIT: 3265 *i1 = udp->udp_ipv6_recvhoplimit; 3266 break; /* goto sizeof (int) option return */ 3267 case IPV6_RECVHOPOPTS: 3268 *i1 = udp->udp_ipv6_recvhopopts; 3269 break; /* goto sizeof (int) option return */ 3270 case IPV6_RECVDSTOPTS: 3271 *i1 = udp->udp_ipv6_recvdstopts; 3272 break; /* goto sizeof (int) option return */ 3273 case _OLD_IPV6_RECVDSTOPTS: 3274 *i1 = udp->udp_old_ipv6_recvdstopts; 3275 break; /* goto sizeof (int) option return */ 3276 case IPV6_RECVRTHDRDSTOPTS: 3277 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3278 break; /* goto sizeof (int) option return */ 3279 case IPV6_RECVRTHDR: 3280 *i1 = udp->udp_ipv6_recvrthdr; 3281 break; /* goto sizeof (int) option return */ 3282 case IPV6_PKTINFO: { 3283 /* XXX assumes that caller has room for max size! */ 3284 struct in6_pktinfo *pkti; 3285 3286 pkti = (struct in6_pktinfo *)ptr; 3287 if (ipp->ipp_fields & IPPF_IFINDEX) 3288 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3289 else 3290 pkti->ipi6_ifindex = 0; 3291 if (ipp->ipp_fields & IPPF_ADDR) 3292 pkti->ipi6_addr = ipp->ipp_addr; 3293 else 3294 pkti->ipi6_addr = ipv6_all_zeros; 3295 return (sizeof (struct in6_pktinfo)); 3296 } 3297 case IPV6_TCLASS: 3298 if (ipp->ipp_fields & IPPF_TCLASS) 3299 *i1 = ipp->ipp_tclass; 3300 else 3301 *i1 = IPV6_FLOW_TCLASS( 3302 IPV6_DEFAULT_VERS_AND_FLOW); 3303 break; /* goto sizeof (int) option return */ 3304 case IPV6_NEXTHOP: { 3305 sin6_t *sin6 = (sin6_t *)ptr; 3306 3307 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3308 return (0); 3309 *sin6 = sin6_null; 3310 sin6->sin6_family = AF_INET6; 3311 sin6->sin6_addr = ipp->ipp_nexthop; 3312 return (sizeof (sin6_t)); 3313 } 3314 case IPV6_HOPOPTS: 3315 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3316 return (0); 3317 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3318 return (0); 3319 /* 3320 * The cipso/label option is added by kernel. 3321 * User is not usually aware of this option. 3322 * We copy out the hbh opt after the label option. 3323 */ 3324 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3325 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3326 if (udp->udp_label_len_v6 > 0) { 3327 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3328 ptr[1] = (ipp->ipp_hopoptslen - 3329 udp->udp_label_len_v6 + 7) / 8 - 1; 3330 } 3331 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3332 case IPV6_RTHDRDSTOPTS: 3333 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3334 return (0); 3335 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3336 return (ipp->ipp_rtdstoptslen); 3337 case IPV6_RTHDR: 3338 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3339 return (0); 3340 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3341 return (ipp->ipp_rthdrlen); 3342 case IPV6_DSTOPTS: 3343 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3344 return (0); 3345 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3346 return (ipp->ipp_dstoptslen); 3347 case IPV6_PATHMTU: 3348 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3349 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3350 default: 3351 return (-1); 3352 } 3353 break; 3354 case IPPROTO_UDP: 3355 switch (name) { 3356 case UDP_ANONPRIVBIND: 3357 *i1 = udp->udp_anon_priv_bind; 3358 break; 3359 case UDP_EXCLBIND: 3360 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3361 break; 3362 case UDP_RCVHDR: 3363 *i1 = udp->udp_rcvhdr ? 1 : 0; 3364 break; 3365 default: 3366 return (-1); 3367 } 3368 break; 3369 default: 3370 return (-1); 3371 } 3372 return (sizeof (int)); 3373 } 3374 3375 /* 3376 * This routine sets socket options; it expects the caller 3377 * to pass in the queue pointer of the upper instance. 3378 */ 3379 /* ARGSUSED */ 3380 int 3381 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3382 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3383 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3384 { 3385 udpattrs_t *attrs = thisdg_attrs; 3386 int *i1 = (int *)invalp; 3387 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3388 boolean_t checkonly; 3389 int error; 3390 conn_t *connp; 3391 udp_t *udp; 3392 uint_t newlen; 3393 3394 q = UDP_WR(q); 3395 connp = Q_TO_CONN(q); 3396 udp = connp->conn_udp; 3397 3398 switch (optset_context) { 3399 case SETFN_OPTCOM_CHECKONLY: 3400 checkonly = B_TRUE; 3401 /* 3402 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3403 * inlen != 0 implies value supplied and 3404 * we have to "pretend" to set it. 3405 * inlen == 0 implies that there is no 3406 * value part in T_CHECK request and just validation 3407 * done elsewhere should be enough, we just return here. 3408 */ 3409 if (inlen == 0) { 3410 *outlenp = 0; 3411 return (0); 3412 } 3413 break; 3414 case SETFN_OPTCOM_NEGOTIATE: 3415 checkonly = B_FALSE; 3416 break; 3417 case SETFN_UD_NEGOTIATE: 3418 case SETFN_CONN_NEGOTIATE: 3419 checkonly = B_FALSE; 3420 /* 3421 * Negotiating local and "association-related" options 3422 * through T_UNITDATA_REQ. 3423 * 3424 * Following routine can filter out ones we do not 3425 * want to be "set" this way. 3426 */ 3427 if (!udp_opt_allow_udr_set(level, name)) { 3428 *outlenp = 0; 3429 return (EINVAL); 3430 } 3431 break; 3432 default: 3433 /* 3434 * We should never get here 3435 */ 3436 *outlenp = 0; 3437 return (EINVAL); 3438 } 3439 3440 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3441 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3442 3443 /* 3444 * For fixed length options, no sanity check 3445 * of passed in length is done. It is assumed *_optcom_req() 3446 * routines do the right thing. 3447 */ 3448 3449 switch (level) { 3450 case SOL_SOCKET: 3451 switch (name) { 3452 case SO_REUSEADDR: 3453 if (!checkonly) 3454 udp->udp_reuseaddr = onoff; 3455 break; 3456 case SO_DEBUG: 3457 if (!checkonly) 3458 udp->udp_debug = onoff; 3459 break; 3460 /* 3461 * The following three items are available here, 3462 * but are only meaningful to IP. 3463 */ 3464 case SO_DONTROUTE: 3465 if (!checkonly) 3466 udp->udp_dontroute = onoff; 3467 break; 3468 case SO_USELOOPBACK: 3469 if (!checkonly) 3470 udp->udp_useloopback = onoff; 3471 break; 3472 case SO_BROADCAST: 3473 if (!checkonly) 3474 udp->udp_broadcast = onoff; 3475 break; 3476 3477 case SO_SNDBUF: 3478 if (*i1 > udp_max_buf) { 3479 *outlenp = 0; 3480 return (ENOBUFS); 3481 } 3482 if (!checkonly) { 3483 q->q_hiwat = *i1; 3484 WR(UDP_RD(q))->q_hiwat = *i1; 3485 } 3486 break; 3487 case SO_RCVBUF: 3488 if (*i1 > udp_max_buf) { 3489 *outlenp = 0; 3490 return (ENOBUFS); 3491 } 3492 if (!checkonly) { 3493 RD(q)->q_hiwat = *i1; 3494 UDP_RD(q)->q_hiwat = *i1; 3495 (void) mi_set_sth_hiwat(UDP_RD(q), 3496 udp_set_rcv_hiwat(udp, *i1)); 3497 } 3498 break; 3499 case SO_DGRAM_ERRIND: 3500 if (!checkonly) 3501 udp->udp_dgram_errind = onoff; 3502 break; 3503 case SO_RECVUCRED: 3504 if (!checkonly) 3505 udp->udp_recvucred = onoff; 3506 break; 3507 case SO_ALLZONES: 3508 /* 3509 * "soft" error (negative) 3510 * option not handled at this level 3511 * Do not modify *outlenp. 3512 */ 3513 return (-EINVAL); 3514 case SO_TIMESTAMP: 3515 if (!checkonly) 3516 udp->udp_timestamp = onoff; 3517 break; 3518 case SO_ANON_MLP: 3519 if (!checkonly) 3520 udp->udp_anon_mlp = onoff; 3521 break; 3522 case SO_MAC_EXEMPT: 3523 if (secpolicy_net_mac_aware(cr) != 0 || 3524 udp->udp_state != TS_UNBND) 3525 return (EACCES); 3526 if (!checkonly) 3527 udp->udp_mac_exempt = onoff; 3528 break; 3529 case SCM_UCRED: { 3530 struct ucred_s *ucr; 3531 cred_t *cr, *newcr; 3532 ts_label_t *tsl; 3533 3534 /* 3535 * Only sockets that have proper privileges and are 3536 * bound to MLPs will have any other value here, so 3537 * this implicitly tests for privilege to set label. 3538 */ 3539 if (connp->conn_mlp_type == mlptSingle) 3540 break; 3541 ucr = (struct ucred_s *)invalp; 3542 if (inlen != ucredsize || 3543 ucr->uc_labeloff < sizeof (*ucr) || 3544 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3545 return (EINVAL); 3546 if (!checkonly) { 3547 mblk_t *mb; 3548 3549 if (attrs == NULL || 3550 (mb = attrs->udpattr_mb) == NULL) 3551 return (EINVAL); 3552 if ((cr = DB_CRED(mb)) == NULL) 3553 cr = udp->udp_connp->conn_cred; 3554 ASSERT(cr != NULL); 3555 if ((tsl = crgetlabel(cr)) == NULL) 3556 return (EINVAL); 3557 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3558 tsl->tsl_doi, KM_NOSLEEP); 3559 if (newcr == NULL) 3560 return (ENOSR); 3561 mblk_setcred(mb, newcr); 3562 attrs->udpattr_credset = B_TRUE; 3563 crfree(newcr); 3564 } 3565 break; 3566 } 3567 case SO_EXCLBIND: 3568 if (!checkonly) 3569 udp->udp_exclbind = onoff; 3570 break; 3571 default: 3572 *outlenp = 0; 3573 return (EINVAL); 3574 } 3575 break; 3576 case IPPROTO_IP: 3577 if (udp->udp_family != AF_INET) { 3578 *outlenp = 0; 3579 return (ENOPROTOOPT); 3580 } 3581 switch (name) { 3582 case IP_OPTIONS: 3583 case T_IP_OPTIONS: 3584 /* Save options for use by IP. */ 3585 newlen = inlen + udp->udp_label_len; 3586 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3587 *outlenp = 0; 3588 return (EINVAL); 3589 } 3590 if (checkonly) 3591 break; 3592 3593 if (!tsol_option_set(&udp->udp_ip_snd_options, 3594 &udp->udp_ip_snd_options_len, 3595 udp->udp_label_len, invalp, inlen)) { 3596 *outlenp = 0; 3597 return (ENOMEM); 3598 } 3599 3600 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3601 UDPH_SIZE + udp->udp_ip_snd_options_len; 3602 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3603 udp_wroff_extra); 3604 break; 3605 3606 case IP_TTL: 3607 if (!checkonly) { 3608 udp->udp_ttl = (uchar_t)*i1; 3609 } 3610 break; 3611 case IP_TOS: 3612 case T_IP_TOS: 3613 if (!checkonly) { 3614 udp->udp_type_of_service = (uchar_t)*i1; 3615 } 3616 break; 3617 case IP_MULTICAST_IF: { 3618 /* 3619 * TODO should check OPTMGMT reply and undo this if 3620 * there is an error. 3621 */ 3622 struct in_addr *inap = (struct in_addr *)invalp; 3623 if (!checkonly) { 3624 udp->udp_multicast_if_addr = 3625 inap->s_addr; 3626 } 3627 break; 3628 } 3629 case IP_MULTICAST_TTL: 3630 if (!checkonly) 3631 udp->udp_multicast_ttl = *invalp; 3632 break; 3633 case IP_MULTICAST_LOOP: 3634 if (!checkonly) 3635 connp->conn_multicast_loop = *invalp; 3636 break; 3637 case IP_RECVOPTS: 3638 if (!checkonly) 3639 udp->udp_recvopts = onoff; 3640 break; 3641 case IP_RECVDSTADDR: 3642 if (!checkonly) 3643 udp->udp_recvdstaddr = onoff; 3644 break; 3645 case IP_RECVIF: 3646 if (!checkonly) 3647 udp->udp_recvif = onoff; 3648 break; 3649 case IP_RECVSLLA: 3650 if (!checkonly) 3651 udp->udp_recvslla = onoff; 3652 break; 3653 case IP_RECVTTL: 3654 if (!checkonly) 3655 udp->udp_recvttl = onoff; 3656 break; 3657 case IP_ADD_MEMBERSHIP: 3658 case IP_DROP_MEMBERSHIP: 3659 case IP_BLOCK_SOURCE: 3660 case IP_UNBLOCK_SOURCE: 3661 case IP_ADD_SOURCE_MEMBERSHIP: 3662 case IP_DROP_SOURCE_MEMBERSHIP: 3663 case MCAST_JOIN_GROUP: 3664 case MCAST_LEAVE_GROUP: 3665 case MCAST_BLOCK_SOURCE: 3666 case MCAST_UNBLOCK_SOURCE: 3667 case MCAST_JOIN_SOURCE_GROUP: 3668 case MCAST_LEAVE_SOURCE_GROUP: 3669 case IP_SEC_OPT: 3670 case IP_NEXTHOP: 3671 /* 3672 * "soft" error (negative) 3673 * option not handled at this level 3674 * Do not modify *outlenp. 3675 */ 3676 return (-EINVAL); 3677 case IP_BOUND_IF: 3678 if (!checkonly) 3679 udp->udp_bound_if = *i1; 3680 break; 3681 case IP_UNSPEC_SRC: 3682 if (!checkonly) 3683 udp->udp_unspec_source = onoff; 3684 break; 3685 case IP_XMIT_IF: 3686 if (!checkonly) 3687 udp->udp_xmit_if = *i1; 3688 break; 3689 default: 3690 *outlenp = 0; 3691 return (EINVAL); 3692 } 3693 break; 3694 case IPPROTO_IPV6: { 3695 ip6_pkt_t *ipp; 3696 boolean_t sticky; 3697 3698 if (udp->udp_family != AF_INET6) { 3699 *outlenp = 0; 3700 return (ENOPROTOOPT); 3701 } 3702 /* 3703 * Deal with both sticky options and ancillary data 3704 */ 3705 sticky = B_FALSE; 3706 if (attrs == NULL || (ipp = attrs->udpattr_ipp) == NULL) { 3707 /* sticky options, or none */ 3708 ipp = &udp->udp_sticky_ipp; 3709 sticky = B_TRUE; 3710 } 3711 3712 switch (name) { 3713 case IPV6_MULTICAST_IF: 3714 if (!checkonly) 3715 udp->udp_multicast_if_index = *i1; 3716 break; 3717 case IPV6_UNICAST_HOPS: 3718 /* -1 means use default */ 3719 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3720 *outlenp = 0; 3721 return (EINVAL); 3722 } 3723 if (!checkonly) { 3724 if (*i1 == -1) { 3725 udp->udp_ttl = ipp->ipp_unicast_hops = 3726 udp_ipv6_hoplimit; 3727 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3728 /* Pass modified value to IP. */ 3729 *i1 = udp->udp_ttl; 3730 } else { 3731 udp->udp_ttl = ipp->ipp_unicast_hops = 3732 (uint8_t)*i1; 3733 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3734 } 3735 /* Rebuild the header template */ 3736 error = udp_build_hdrs(q, udp); 3737 if (error != 0) { 3738 *outlenp = 0; 3739 return (error); 3740 } 3741 } 3742 break; 3743 case IPV6_MULTICAST_HOPS: 3744 /* -1 means use default */ 3745 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3746 *outlenp = 0; 3747 return (EINVAL); 3748 } 3749 if (!checkonly) { 3750 if (*i1 == -1) { 3751 udp->udp_multicast_ttl = 3752 ipp->ipp_multicast_hops = 3753 IP_DEFAULT_MULTICAST_TTL; 3754 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3755 /* Pass modified value to IP. */ 3756 *i1 = udp->udp_multicast_ttl; 3757 } else { 3758 udp->udp_multicast_ttl = 3759 ipp->ipp_multicast_hops = 3760 (uint8_t)*i1; 3761 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3762 } 3763 } 3764 break; 3765 case IPV6_MULTICAST_LOOP: 3766 if (*i1 != 0 && *i1 != 1) { 3767 *outlenp = 0; 3768 return (EINVAL); 3769 } 3770 if (!checkonly) 3771 connp->conn_multicast_loop = *i1; 3772 break; 3773 case IPV6_JOIN_GROUP: 3774 case IPV6_LEAVE_GROUP: 3775 case MCAST_JOIN_GROUP: 3776 case MCAST_LEAVE_GROUP: 3777 case MCAST_BLOCK_SOURCE: 3778 case MCAST_UNBLOCK_SOURCE: 3779 case MCAST_JOIN_SOURCE_GROUP: 3780 case MCAST_LEAVE_SOURCE_GROUP: 3781 /* 3782 * "soft" error (negative) 3783 * option not handled at this level 3784 * Note: Do not modify *outlenp 3785 */ 3786 return (-EINVAL); 3787 case IPV6_BOUND_IF: 3788 if (!checkonly) 3789 udp->udp_bound_if = *i1; 3790 break; 3791 case IPV6_UNSPEC_SRC: 3792 if (!checkonly) 3793 udp->udp_unspec_source = onoff; 3794 break; 3795 /* 3796 * Set boolean switches for ancillary data delivery 3797 */ 3798 case IPV6_RECVPKTINFO: 3799 if (!checkonly) 3800 udp->udp_ipv6_recvpktinfo = onoff; 3801 break; 3802 case IPV6_RECVTCLASS: 3803 if (!checkonly) { 3804 udp->udp_ipv6_recvtclass = onoff; 3805 } 3806 break; 3807 case IPV6_RECVPATHMTU: 3808 if (!checkonly) { 3809 udp->udp_ipv6_recvpathmtu = onoff; 3810 } 3811 break; 3812 case IPV6_RECVHOPLIMIT: 3813 if (!checkonly) 3814 udp->udp_ipv6_recvhoplimit = onoff; 3815 break; 3816 case IPV6_RECVHOPOPTS: 3817 if (!checkonly) 3818 udp->udp_ipv6_recvhopopts = onoff; 3819 break; 3820 case IPV6_RECVDSTOPTS: 3821 if (!checkonly) 3822 udp->udp_ipv6_recvdstopts = onoff; 3823 break; 3824 case _OLD_IPV6_RECVDSTOPTS: 3825 if (!checkonly) 3826 udp->udp_old_ipv6_recvdstopts = onoff; 3827 break; 3828 case IPV6_RECVRTHDRDSTOPTS: 3829 if (!checkonly) 3830 udp->udp_ipv6_recvrthdrdstopts = onoff; 3831 break; 3832 case IPV6_RECVRTHDR: 3833 if (!checkonly) 3834 udp->udp_ipv6_recvrthdr = onoff; 3835 break; 3836 /* 3837 * Set sticky options or ancillary data. 3838 * If sticky options, (re)build any extension headers 3839 * that might be needed as a result. 3840 */ 3841 case IPV6_PKTINFO: 3842 /* 3843 * The source address and ifindex are verified 3844 * in ip_opt_set(). For ancillary data the 3845 * source address is checked in ip_wput_v6. 3846 */ 3847 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3848 return (EINVAL); 3849 if (checkonly) 3850 break; 3851 3852 if (inlen == 0) { 3853 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3854 ipp->ipp_sticky_ignored |= 3855 (IPPF_IFINDEX|IPPF_ADDR); 3856 } else { 3857 struct in6_pktinfo *pkti; 3858 3859 pkti = (struct in6_pktinfo *)invalp; 3860 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3861 ipp->ipp_addr = pkti->ipi6_addr; 3862 if (ipp->ipp_ifindex != 0) 3863 ipp->ipp_fields |= IPPF_IFINDEX; 3864 else 3865 ipp->ipp_fields &= ~IPPF_IFINDEX; 3866 if (!IN6_IS_ADDR_UNSPECIFIED( 3867 &ipp->ipp_addr)) 3868 ipp->ipp_fields |= IPPF_ADDR; 3869 else 3870 ipp->ipp_fields &= ~IPPF_ADDR; 3871 } 3872 if (sticky) { 3873 error = udp_build_hdrs(q, udp); 3874 if (error != 0) 3875 return (error); 3876 } 3877 break; 3878 case IPV6_HOPLIMIT: 3879 if (sticky) 3880 return (EINVAL); 3881 if (inlen != 0 && inlen != sizeof (int)) 3882 return (EINVAL); 3883 if (checkonly) 3884 break; 3885 3886 if (inlen == 0) { 3887 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3888 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3889 } else { 3890 if (*i1 > 255 || *i1 < -1) 3891 return (EINVAL); 3892 if (*i1 == -1) 3893 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3894 else 3895 ipp->ipp_hoplimit = *i1; 3896 ipp->ipp_fields |= IPPF_HOPLIMIT; 3897 } 3898 break; 3899 case IPV6_TCLASS: 3900 if (inlen != 0 && inlen != sizeof (int)) 3901 return (EINVAL); 3902 if (checkonly) 3903 break; 3904 3905 if (inlen == 0) { 3906 ipp->ipp_fields &= ~IPPF_TCLASS; 3907 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3908 } else { 3909 if (*i1 > 255 || *i1 < -1) 3910 return (EINVAL); 3911 if (*i1 == -1) 3912 ipp->ipp_tclass = 0; 3913 else 3914 ipp->ipp_tclass = *i1; 3915 ipp->ipp_fields |= IPPF_TCLASS; 3916 } 3917 if (sticky) { 3918 error = udp_build_hdrs(q, udp); 3919 if (error != 0) 3920 return (error); 3921 } 3922 break; 3923 case IPV6_NEXTHOP: 3924 /* 3925 * IP will verify that the nexthop is reachable 3926 * and fail for sticky options. 3927 */ 3928 if (inlen != 0 && inlen != sizeof (sin6_t)) 3929 return (EINVAL); 3930 if (checkonly) 3931 break; 3932 3933 if (inlen == 0) { 3934 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3935 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3936 } else { 3937 sin6_t *sin6 = (sin6_t *)invalp; 3938 3939 if (sin6->sin6_family != AF_INET6) 3940 return (EAFNOSUPPORT); 3941 if (IN6_IS_ADDR_V4MAPPED( 3942 &sin6->sin6_addr)) 3943 return (EADDRNOTAVAIL); 3944 ipp->ipp_nexthop = sin6->sin6_addr; 3945 if (!IN6_IS_ADDR_UNSPECIFIED( 3946 &ipp->ipp_nexthop)) 3947 ipp->ipp_fields |= IPPF_NEXTHOP; 3948 else 3949 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3950 } 3951 if (sticky) { 3952 error = udp_build_hdrs(q, udp); 3953 if (error != 0) 3954 return (error); 3955 } 3956 break; 3957 case IPV6_HOPOPTS: { 3958 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3959 /* 3960 * Sanity checks - minimum size, size a multiple of 3961 * eight bytes, and matching size passed in. 3962 */ 3963 if (inlen != 0 && 3964 inlen != (8 * (hopts->ip6h_len + 1))) 3965 return (EINVAL); 3966 3967 if (checkonly) 3968 break; 3969 3970 error = optcom_pkt_set(invalp, inlen, sticky, 3971 (uchar_t **)&ipp->ipp_hopopts, 3972 &ipp->ipp_hopoptslen, 3973 sticky ? udp->udp_label_len_v6 : 0); 3974 if (error != 0) 3975 return (error); 3976 if (ipp->ipp_hopoptslen == 0) { 3977 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3978 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3979 } else { 3980 ipp->ipp_fields |= IPPF_HOPOPTS; 3981 } 3982 if (sticky) { 3983 error = udp_build_hdrs(q, udp); 3984 if (error != 0) 3985 return (error); 3986 } 3987 break; 3988 } 3989 case IPV6_RTHDRDSTOPTS: { 3990 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3991 3992 /* 3993 * Sanity checks - minimum size, size a multiple of 3994 * eight bytes, and matching size passed in. 3995 */ 3996 if (inlen != 0 && 3997 inlen != (8 * (dopts->ip6d_len + 1))) 3998 return (EINVAL); 3999 4000 if (checkonly) 4001 break; 4002 4003 if (inlen == 0) { 4004 if (sticky && 4005 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4006 kmem_free(ipp->ipp_rtdstopts, 4007 ipp->ipp_rtdstoptslen); 4008 ipp->ipp_rtdstopts = NULL; 4009 ipp->ipp_rtdstoptslen = 0; 4010 } 4011 4012 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4013 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4014 } else { 4015 error = optcom_pkt_set(invalp, inlen, sticky, 4016 (uchar_t **)&ipp->ipp_rtdstopts, 4017 &ipp->ipp_rtdstoptslen, 0); 4018 if (error != 0) 4019 return (error); 4020 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4021 } 4022 if (sticky) { 4023 error = udp_build_hdrs(q, udp); 4024 if (error != 0) 4025 return (error); 4026 } 4027 break; 4028 } 4029 case IPV6_DSTOPTS: { 4030 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4031 4032 /* 4033 * Sanity checks - minimum size, size a multiple of 4034 * eight bytes, and matching size passed in. 4035 */ 4036 if (inlen != 0 && 4037 inlen != (8 * (dopts->ip6d_len + 1))) 4038 return (EINVAL); 4039 4040 if (checkonly) 4041 break; 4042 4043 if (inlen == 0) { 4044 if (sticky && 4045 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4046 kmem_free(ipp->ipp_dstopts, 4047 ipp->ipp_dstoptslen); 4048 ipp->ipp_dstopts = NULL; 4049 ipp->ipp_dstoptslen = 0; 4050 } 4051 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4052 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4053 } else { 4054 error = optcom_pkt_set(invalp, inlen, sticky, 4055 (uchar_t **)&ipp->ipp_dstopts, 4056 &ipp->ipp_dstoptslen, 0); 4057 if (error != 0) 4058 return (error); 4059 ipp->ipp_fields |= IPPF_DSTOPTS; 4060 } 4061 if (sticky) { 4062 error = udp_build_hdrs(q, udp); 4063 if (error != 0) 4064 return (error); 4065 } 4066 break; 4067 } 4068 case IPV6_RTHDR: { 4069 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4070 4071 /* 4072 * Sanity checks - minimum size, size a multiple of 4073 * eight bytes, and matching size passed in. 4074 */ 4075 if (inlen != 0 && 4076 inlen != (8 * (rt->ip6r_len + 1))) 4077 return (EINVAL); 4078 4079 if (checkonly) 4080 break; 4081 4082 if (inlen == 0) { 4083 if (sticky && 4084 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4085 kmem_free(ipp->ipp_rthdr, 4086 ipp->ipp_rthdrlen); 4087 ipp->ipp_rthdr = NULL; 4088 ipp->ipp_rthdrlen = 0; 4089 } 4090 ipp->ipp_fields &= ~IPPF_RTHDR; 4091 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4092 } else { 4093 error = optcom_pkt_set(invalp, inlen, sticky, 4094 (uchar_t **)&ipp->ipp_rthdr, 4095 &ipp->ipp_rthdrlen, 0); 4096 if (error != 0) 4097 return (error); 4098 ipp->ipp_fields |= IPPF_RTHDR; 4099 } 4100 if (sticky) { 4101 error = udp_build_hdrs(q, udp); 4102 if (error != 0) 4103 return (error); 4104 } 4105 break; 4106 } 4107 4108 case IPV6_DONTFRAG: 4109 if (checkonly) 4110 break; 4111 4112 if (onoff) { 4113 ipp->ipp_fields |= IPPF_DONTFRAG; 4114 } else { 4115 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4116 } 4117 break; 4118 4119 case IPV6_USE_MIN_MTU: 4120 if (inlen != sizeof (int)) 4121 return (EINVAL); 4122 4123 if (*i1 < -1 || *i1 > 1) 4124 return (EINVAL); 4125 4126 if (checkonly) 4127 break; 4128 4129 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4130 ipp->ipp_use_min_mtu = *i1; 4131 break; 4132 4133 case IPV6_BOUND_PIF: 4134 case IPV6_SEC_OPT: 4135 case IPV6_DONTFAILOVER_IF: 4136 case IPV6_SRC_PREFERENCES: 4137 case IPV6_V6ONLY: 4138 /* Handled at the IP level */ 4139 return (-EINVAL); 4140 default: 4141 *outlenp = 0; 4142 return (EINVAL); 4143 } 4144 break; 4145 } /* end IPPROTO_IPV6 */ 4146 case IPPROTO_UDP: 4147 switch (name) { 4148 case UDP_ANONPRIVBIND: 4149 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4150 *outlenp = 0; 4151 return (error); 4152 } 4153 if (!checkonly) { 4154 udp->udp_anon_priv_bind = onoff; 4155 } 4156 break; 4157 case UDP_EXCLBIND: 4158 if (!checkonly) 4159 udp->udp_exclbind = onoff; 4160 break; 4161 case UDP_RCVHDR: 4162 if (!checkonly) 4163 udp->udp_rcvhdr = onoff; 4164 break; 4165 default: 4166 *outlenp = 0; 4167 return (EINVAL); 4168 } 4169 break; 4170 default: 4171 *outlenp = 0; 4172 return (EINVAL); 4173 } 4174 /* 4175 * Common case of OK return with outval same as inval. 4176 */ 4177 if (invalp != outvalp) { 4178 /* don't trust bcopy for identical src/dst */ 4179 (void) bcopy(invalp, outvalp, inlen); 4180 } 4181 *outlenp = inlen; 4182 return (0); 4183 } 4184 4185 /* 4186 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4187 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4188 * headers, and the udp header. 4189 * Returns failure if can't allocate memory. 4190 */ 4191 static int 4192 udp_build_hdrs(queue_t *q, udp_t *udp) 4193 { 4194 uchar_t *hdrs; 4195 uint_t hdrs_len; 4196 ip6_t *ip6h; 4197 ip6i_t *ip6i; 4198 udpha_t *udpha; 4199 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4200 4201 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4202 ASSERT(hdrs_len != 0); 4203 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4204 /* Need to reallocate */ 4205 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4206 if (hdrs == NULL) 4207 return (ENOMEM); 4208 4209 if (udp->udp_sticky_hdrs_len != 0) { 4210 kmem_free(udp->udp_sticky_hdrs, 4211 udp->udp_sticky_hdrs_len); 4212 } 4213 udp->udp_sticky_hdrs = hdrs; 4214 udp->udp_sticky_hdrs_len = hdrs_len; 4215 } 4216 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4217 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4218 4219 /* Set header fields not in ipp */ 4220 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4221 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4222 ip6h = (ip6_t *)&ip6i[1]; 4223 } else { 4224 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4225 } 4226 4227 if (!(ipp->ipp_fields & IPPF_ADDR)) 4228 ip6h->ip6_src = udp->udp_v6src; 4229 4230 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4231 udpha->uha_src_port = udp->udp_port; 4232 4233 /* Try to get everything in a single mblk */ 4234 if (hdrs_len > udp->udp_max_hdr_len) { 4235 udp->udp_max_hdr_len = hdrs_len; 4236 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4237 udp_wroff_extra); 4238 } 4239 return (0); 4240 } 4241 4242 /* 4243 * This routine retrieves the value of an ND variable in a udpparam_t 4244 * structure. It is called through nd_getset when a user reads the 4245 * variable. 4246 */ 4247 /* ARGSUSED */ 4248 static int 4249 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4250 { 4251 udpparam_t *udppa = (udpparam_t *)cp; 4252 4253 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4254 return (0); 4255 } 4256 4257 /* 4258 * Walk through the param array specified registering each element with the 4259 * named dispatch (ND) handler. 4260 */ 4261 static boolean_t 4262 udp_param_register(udpparam_t *udppa, int cnt) 4263 { 4264 for (; cnt-- > 0; udppa++) { 4265 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4266 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4267 udp_param_get, udp_param_set, 4268 (caddr_t)udppa)) { 4269 nd_free(&udp_g_nd); 4270 return (B_FALSE); 4271 } 4272 } 4273 } 4274 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4275 udp_extra_priv_ports_get, NULL, NULL)) { 4276 nd_free(&udp_g_nd); 4277 return (B_FALSE); 4278 } 4279 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4280 NULL, udp_extra_priv_ports_add, NULL)) { 4281 nd_free(&udp_g_nd); 4282 return (B_FALSE); 4283 } 4284 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4285 NULL, udp_extra_priv_ports_del, NULL)) { 4286 nd_free(&udp_g_nd); 4287 return (B_FALSE); 4288 } 4289 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4290 NULL)) { 4291 nd_free(&udp_g_nd); 4292 return (B_FALSE); 4293 } 4294 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4295 NULL)) { 4296 nd_free(&udp_g_nd); 4297 return (B_FALSE); 4298 } 4299 return (B_TRUE); 4300 } 4301 4302 /* This routine sets an ND variable in a udpparam_t structure. */ 4303 /* ARGSUSED */ 4304 static int 4305 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4306 { 4307 long new_value; 4308 udpparam_t *udppa = (udpparam_t *)cp; 4309 4310 /* 4311 * Fail the request if the new value does not lie within the 4312 * required bounds. 4313 */ 4314 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4315 new_value < udppa->udp_param_min || 4316 new_value > udppa->udp_param_max) { 4317 return (EINVAL); 4318 } 4319 4320 /* Set the new value */ 4321 udppa->udp_param_value = new_value; 4322 return (0); 4323 } 4324 4325 /* 4326 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4327 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4328 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4329 * then it's assumed to be allocated to be large enough. 4330 * 4331 * Returns zero if trimming of the security option causes all options to go 4332 * away. 4333 */ 4334 static size_t 4335 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4336 { 4337 struct T_opthdr *toh; 4338 size_t hol = ipp->ipp_hopoptslen; 4339 ip6_hbh_t *dstopt = NULL; 4340 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4341 size_t tlen, olen, plen; 4342 boolean_t deleting; 4343 const struct ip6_opt *sopt, *lastpad; 4344 struct ip6_opt *dopt; 4345 4346 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4347 toh->level = IPPROTO_IPV6; 4348 toh->name = IPV6_HOPOPTS; 4349 toh->status = 0; 4350 dstopt = (ip6_hbh_t *)(toh + 1); 4351 } 4352 4353 /* 4354 * If labeling is enabled, then skip the label option 4355 * but get other options if there are any. 4356 */ 4357 if (is_system_labeled()) { 4358 dopt = NULL; 4359 if (dstopt != NULL) { 4360 /* will fill in ip6h_len later */ 4361 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4362 dopt = (struct ip6_opt *)(dstopt + 1); 4363 } 4364 sopt = (const struct ip6_opt *)(srcopt + 1); 4365 hol -= sizeof (*srcopt); 4366 tlen = sizeof (*dstopt); 4367 lastpad = NULL; 4368 deleting = B_FALSE; 4369 /* 4370 * This loop finds the first (lastpad pointer) of any number of 4371 * pads that preceeds the security option, then treats the 4372 * security option as though it were a pad, and then finds the 4373 * next non-pad option (or end of list). 4374 * 4375 * It then treats the entire block as one big pad. To preserve 4376 * alignment of any options that follow, or just the end of the 4377 * list, it computes a minimal new padding size that keeps the 4378 * same alignment for the next option. 4379 * 4380 * If it encounters just a sequence of pads with no security 4381 * option, those are copied as-is rather than collapsed. 4382 * 4383 * Note that to handle the end of list case, the code makes one 4384 * loop with 'hol' set to zero. 4385 */ 4386 for (;;) { 4387 if (hol > 0) { 4388 if (sopt->ip6o_type == IP6OPT_PAD1) { 4389 if (lastpad == NULL) 4390 lastpad = sopt; 4391 sopt = (const struct ip6_opt *) 4392 &sopt->ip6o_len; 4393 hol--; 4394 continue; 4395 } 4396 olen = sopt->ip6o_len + sizeof (*sopt); 4397 if (olen > hol) 4398 olen = hol; 4399 if (sopt->ip6o_type == IP6OPT_PADN || 4400 sopt->ip6o_type == ip6opt_ls) { 4401 if (sopt->ip6o_type == ip6opt_ls) 4402 deleting = B_TRUE; 4403 if (lastpad == NULL) 4404 lastpad = sopt; 4405 sopt = (const struct ip6_opt *) 4406 ((const char *)sopt + olen); 4407 hol -= olen; 4408 continue; 4409 } 4410 } else { 4411 /* if nothing was copied at all, then delete */ 4412 if (tlen == sizeof (*dstopt)) 4413 return (0); 4414 /* last pass; pick up any trailing padding */ 4415 olen = 0; 4416 } 4417 if (deleting) { 4418 /* 4419 * compute aligning effect of deleted material 4420 * to reproduce with pad. 4421 */ 4422 plen = ((const char *)sopt - 4423 (const char *)lastpad) & 7; 4424 tlen += plen; 4425 if (dopt != NULL) { 4426 if (plen == 1) { 4427 dopt->ip6o_type = IP6OPT_PAD1; 4428 } else if (plen > 1) { 4429 plen -= sizeof (*dopt); 4430 dopt->ip6o_type = IP6OPT_PADN; 4431 dopt->ip6o_len = plen; 4432 if (plen > 0) 4433 bzero(dopt + 1, plen); 4434 } 4435 dopt = (struct ip6_opt *) 4436 ((char *)dopt + plen); 4437 } 4438 deleting = B_FALSE; 4439 lastpad = NULL; 4440 } 4441 /* if there's uncopied padding, then copy that now */ 4442 if (lastpad != NULL) { 4443 olen += (const char *)sopt - 4444 (const char *)lastpad; 4445 sopt = lastpad; 4446 lastpad = NULL; 4447 } 4448 if (dopt != NULL && olen > 0) { 4449 bcopy(sopt, dopt, olen); 4450 dopt = (struct ip6_opt *)((char *)dopt + olen); 4451 } 4452 if (hol == 0) 4453 break; 4454 tlen += olen; 4455 sopt = (const struct ip6_opt *) 4456 ((const char *)sopt + olen); 4457 hol -= olen; 4458 } 4459 /* go back and patch up the length value, rounded upward */ 4460 if (dstopt != NULL) 4461 dstopt->ip6h_len = (tlen - 1) >> 3; 4462 } else { 4463 tlen = hol; 4464 if (dstopt != NULL) 4465 bcopy(srcopt, dstopt, hol); 4466 } 4467 4468 tlen += sizeof (*toh); 4469 if (toh != NULL) 4470 toh->len = tlen; 4471 4472 return (tlen); 4473 } 4474 4475 static void 4476 udp_input(conn_t *connp, mblk_t *mp) 4477 { 4478 struct T_unitdata_ind *tudi; 4479 uchar_t *rptr; /* Pointer to IP header */ 4480 int hdr_length; /* Length of IP+UDP headers */ 4481 int udi_size; /* Size of T_unitdata_ind */ 4482 int mp_len; 4483 udp_t *udp; 4484 udpha_t *udpha; 4485 int ipversion; 4486 ip6_pkt_t ipp; 4487 ip6_t *ip6h; 4488 ip6i_t *ip6i; 4489 mblk_t *mp1; 4490 mblk_t *options_mp = NULL; 4491 in_pktinfo_t *pinfo = NULL; 4492 cred_t *cr = NULL; 4493 queue_t *q = connp->conn_rq; 4494 pid_t cpid; 4495 cred_t *rcr = connp->conn_cred; 4496 4497 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4498 "udp_rput_start: q %p mp %p", q, mp); 4499 4500 udp = connp->conn_udp; 4501 rptr = mp->b_rptr; 4502 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4503 ASSERT(OK_32PTR(rptr)); 4504 4505 /* 4506 * IP should have prepended the options data in an M_CTL 4507 * Check M_CTL "type" to make sure are not here bcos of 4508 * a valid ICMP message 4509 */ 4510 if (DB_TYPE(mp) == M_CTL) { 4511 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4512 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4513 IN_PKTINFO) { 4514 /* 4515 * IP_RECVIF or IP_RECVSLLA information has been 4516 * appended to the packet by IP. We need to 4517 * extract the mblk and adjust the rptr 4518 */ 4519 pinfo = (in_pktinfo_t *)mp->b_rptr; 4520 options_mp = mp; 4521 mp = mp->b_cont; 4522 rptr = mp->b_rptr; 4523 UDP_STAT(udp_in_pktinfo); 4524 } else { 4525 /* 4526 * ICMP messages. 4527 */ 4528 udp_icmp_error(q, mp); 4529 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4530 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4531 return; 4532 } 4533 } 4534 4535 mp_len = msgdsize(mp); 4536 /* 4537 * This is the inbound data path. 4538 * First, we check to make sure the IP version number is correct, 4539 * and then pull the IP and UDP headers into the first mblk. 4540 * Assume IP provides aligned packets - otherwise toss. 4541 * Also, check if we have a complete IP header. 4542 */ 4543 4544 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4545 ipp.ipp_fields = 0; 4546 4547 ipversion = IPH_HDR_VERSION(rptr); 4548 switch (ipversion) { 4549 case IPV4_VERSION: 4550 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4551 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4552 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4553 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4554 (udp->udp_ip_rcv_options_len)) { 4555 /* 4556 * Handle IPv4 packets with options outside of the 4557 * main data path. Not needed for AF_INET6 sockets 4558 * since they don't support a getsockopt of IP_OPTIONS. 4559 */ 4560 if (udp->udp_family == AF_INET6) 4561 break; 4562 /* 4563 * UDP length check performed for IPv4 packets with 4564 * options to check whether UDP length specified in 4565 * the header is the same as the physical length of 4566 * the packet. 4567 */ 4568 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4569 if (mp_len != (ntohs(udpha->uha_length) + 4570 hdr_length - UDPH_SIZE)) { 4571 goto tossit; 4572 } 4573 /* 4574 * Handle the case where the packet has IP options 4575 * and the IP_RECVSLLA & IP_RECVIF are set 4576 */ 4577 if (pinfo != NULL) 4578 mp = options_mp; 4579 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4580 SQTAG_UDP_INPUT); 4581 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4582 "udp_rput_end: q %p (%S)", q, "end"); 4583 return; 4584 } 4585 4586 /* Handle IPV6_RECVHOPLIMIT. */ 4587 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4588 udp->udp_ipv6_recvpktinfo) { 4589 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4590 ipp.ipp_fields |= IPPF_IFINDEX; 4591 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4592 } 4593 } 4594 break; 4595 case IPV6_VERSION: 4596 /* 4597 * IPv6 packets can only be received by applications 4598 * that are prepared to receive IPv6 addresses. 4599 * The IP fanout must ensure this. 4600 */ 4601 ASSERT(udp->udp_family == AF_INET6); 4602 4603 ip6h = (ip6_t *)rptr; 4604 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4605 4606 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4607 uint8_t nexthdrp; 4608 /* Look for ifindex information */ 4609 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4610 ip6i = (ip6i_t *)ip6h; 4611 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4612 goto tossit; 4613 4614 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4615 ASSERT(ip6i->ip6i_ifindex != 0); 4616 ipp.ipp_fields |= IPPF_IFINDEX; 4617 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4618 } 4619 rptr = (uchar_t *)&ip6i[1]; 4620 mp->b_rptr = rptr; 4621 if (rptr == mp->b_wptr) { 4622 mp1 = mp->b_cont; 4623 freeb(mp); 4624 mp = mp1; 4625 rptr = mp->b_rptr; 4626 } 4627 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4628 goto tossit; 4629 ip6h = (ip6_t *)rptr; 4630 mp_len = msgdsize(mp); 4631 } 4632 /* 4633 * Find any potentially interesting extension headers 4634 * as well as the length of the IPv6 + extension 4635 * headers. 4636 */ 4637 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4638 UDPH_SIZE; 4639 ASSERT(nexthdrp == IPPROTO_UDP); 4640 } else { 4641 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4642 ip6i = NULL; 4643 } 4644 break; 4645 default: 4646 ASSERT(0); 4647 } 4648 4649 /* 4650 * IP inspected the UDP header thus all of it must be in the mblk. 4651 * UDP length check is performed for IPv6 packets and IPv4 packets 4652 * without options to check if the size of the packet as specified 4653 * by the header is the same as the physical size of the packet. 4654 */ 4655 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4656 if ((MBLKL(mp) < hdr_length) || 4657 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4658 goto tossit; 4659 } 4660 4661 /* Walk past the headers. */ 4662 if (!udp->udp_rcvhdr) { 4663 mp->b_rptr = rptr + hdr_length; 4664 mp_len -= hdr_length; 4665 } 4666 4667 /* 4668 * This is the inbound data path. Packets are passed upstream as 4669 * T_UNITDATA_IND messages with full IP headers still attached. 4670 */ 4671 if (udp->udp_family == AF_INET) { 4672 sin_t *sin; 4673 4674 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4675 4676 /* 4677 * Normally only send up the address. 4678 * If IP_RECVDSTADDR is set we include the destination IP 4679 * address as an option. With IP_RECVOPTS we include all 4680 * the IP options. Only ip_rput_other() handles packets 4681 * that contain IP options. 4682 */ 4683 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4684 if (udp->udp_recvdstaddr) { 4685 udi_size += sizeof (struct T_opthdr) + 4686 sizeof (struct in_addr); 4687 UDP_STAT(udp_in_recvdstaddr); 4688 } 4689 4690 /* 4691 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4692 * space accordingly 4693 */ 4694 if (udp->udp_recvif && (pinfo != NULL) && 4695 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4696 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4697 UDP_STAT(udp_in_recvif); 4698 } 4699 4700 if (udp->udp_recvslla && (pinfo != NULL) && 4701 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4702 udi_size += sizeof (struct T_opthdr) + 4703 sizeof (struct sockaddr_dl); 4704 UDP_STAT(udp_in_recvslla); 4705 } 4706 4707 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4708 udi_size += sizeof (struct T_opthdr) + ucredsize; 4709 cpid = DB_CPID(mp); 4710 UDP_STAT(udp_in_recvucred); 4711 } 4712 4713 /* 4714 * If SO_TIMESTAMP is set allocate the appropriate sized 4715 * buffer. Since gethrestime() expects a pointer aligned 4716 * argument, we allocate space necessary for extra 4717 * alignment (even though it might not be used). 4718 */ 4719 if (udp->udp_timestamp) { 4720 udi_size += sizeof (struct T_opthdr) + 4721 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4722 UDP_STAT(udp_in_timestamp); 4723 } 4724 4725 /* 4726 * If IP_RECVTTL is set allocate the appropriate sized buffer 4727 */ 4728 if (udp->udp_recvttl) { 4729 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4730 UDP_STAT(udp_in_recvttl); 4731 } 4732 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4733 4734 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4735 mp1 = allocb(udi_size, BPRI_MED); 4736 if (mp1 == NULL) { 4737 freemsg(mp); 4738 if (options_mp != NULL) 4739 freeb(options_mp); 4740 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4741 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4742 BUMP_MIB(&udp_mib, udpInErrors); 4743 return; 4744 } 4745 mp1->b_cont = mp; 4746 mp = mp1; 4747 mp->b_datap->db_type = M_PROTO; 4748 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4749 mp->b_wptr = (uchar_t *)tudi + udi_size; 4750 tudi->PRIM_type = T_UNITDATA_IND; 4751 tudi->SRC_length = sizeof (sin_t); 4752 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4753 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4754 sizeof (sin_t); 4755 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4756 tudi->OPT_length = udi_size; 4757 sin = (sin_t *)&tudi[1]; 4758 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4759 sin->sin_port = udpha->uha_src_port; 4760 sin->sin_family = udp->udp_family; 4761 *(uint32_t *)&sin->sin_zero[0] = 0; 4762 *(uint32_t *)&sin->sin_zero[4] = 0; 4763 4764 /* 4765 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4766 * IP_RECVTTL has been set. 4767 */ 4768 if (udi_size != 0) { 4769 /* 4770 * Copy in destination address before options to avoid 4771 * any padding issues. 4772 */ 4773 char *dstopt; 4774 4775 dstopt = (char *)&sin[1]; 4776 if (udp->udp_recvdstaddr) { 4777 struct T_opthdr *toh; 4778 ipaddr_t *dstptr; 4779 4780 toh = (struct T_opthdr *)dstopt; 4781 toh->level = IPPROTO_IP; 4782 toh->name = IP_RECVDSTADDR; 4783 toh->len = sizeof (struct T_opthdr) + 4784 sizeof (ipaddr_t); 4785 toh->status = 0; 4786 dstopt += sizeof (struct T_opthdr); 4787 dstptr = (ipaddr_t *)dstopt; 4788 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4789 dstopt = (char *)toh + toh->len; 4790 udi_size -= toh->len; 4791 } 4792 4793 if (udp->udp_recvslla && (pinfo != NULL) && 4794 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4795 4796 struct T_opthdr *toh; 4797 struct sockaddr_dl *dstptr; 4798 4799 toh = (struct T_opthdr *)dstopt; 4800 toh->level = IPPROTO_IP; 4801 toh->name = IP_RECVSLLA; 4802 toh->len = sizeof (struct T_opthdr) + 4803 sizeof (struct sockaddr_dl); 4804 toh->status = 0; 4805 dstopt += sizeof (struct T_opthdr); 4806 dstptr = (struct sockaddr_dl *)dstopt; 4807 bcopy(&pinfo->in_pkt_slla, dstptr, 4808 sizeof (struct sockaddr_dl)); 4809 dstopt = (char *)toh + toh->len; 4810 udi_size -= toh->len; 4811 } 4812 4813 if (udp->udp_recvif && (pinfo != NULL) && 4814 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4815 4816 struct T_opthdr *toh; 4817 uint_t *dstptr; 4818 4819 toh = (struct T_opthdr *)dstopt; 4820 toh->level = IPPROTO_IP; 4821 toh->name = IP_RECVIF; 4822 toh->len = sizeof (struct T_opthdr) + 4823 sizeof (uint_t); 4824 toh->status = 0; 4825 dstopt += sizeof (struct T_opthdr); 4826 dstptr = (uint_t *)dstopt; 4827 *dstptr = pinfo->in_pkt_ifindex; 4828 dstopt = (char *)toh + toh->len; 4829 udi_size -= toh->len; 4830 } 4831 4832 if (cr != NULL) { 4833 struct T_opthdr *toh; 4834 4835 toh = (struct T_opthdr *)dstopt; 4836 toh->level = SOL_SOCKET; 4837 toh->name = SCM_UCRED; 4838 toh->len = sizeof (struct T_opthdr) + ucredsize; 4839 toh->status = 0; 4840 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4841 dstopt = (char *)toh + toh->len; 4842 udi_size -= toh->len; 4843 } 4844 4845 if (udp->udp_timestamp) { 4846 struct T_opthdr *toh; 4847 4848 toh = (struct T_opthdr *)dstopt; 4849 toh->level = SOL_SOCKET; 4850 toh->name = SCM_TIMESTAMP; 4851 toh->len = sizeof (struct T_opthdr) + 4852 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4853 toh->status = 0; 4854 dstopt += sizeof (struct T_opthdr); 4855 /* Align for gethrestime() */ 4856 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4857 sizeof (intptr_t)); 4858 gethrestime((timestruc_t *)dstopt); 4859 dstopt = (char *)toh + toh->len; 4860 udi_size -= toh->len; 4861 } 4862 4863 /* 4864 * CAUTION: 4865 * Due to aligment issues 4866 * Processing of IP_RECVTTL option 4867 * should always be the last. Adding 4868 * any option processing after this will 4869 * cause alignment panic. 4870 */ 4871 if (udp->udp_recvttl) { 4872 struct T_opthdr *toh; 4873 uint8_t *dstptr; 4874 4875 toh = (struct T_opthdr *)dstopt; 4876 toh->level = IPPROTO_IP; 4877 toh->name = IP_RECVTTL; 4878 toh->len = sizeof (struct T_opthdr) + 4879 sizeof (uint8_t); 4880 toh->status = 0; 4881 dstopt += sizeof (struct T_opthdr); 4882 dstptr = (uint8_t *)dstopt; 4883 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4884 dstopt = (char *)toh + toh->len; 4885 udi_size -= toh->len; 4886 } 4887 4888 /* Consumed all of allocated space */ 4889 ASSERT(udi_size == 0); 4890 } 4891 } else { 4892 sin6_t *sin6; 4893 4894 /* 4895 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4896 * 4897 * Normally we only send up the address. If receiving of any 4898 * optional receive side information is enabled, we also send 4899 * that up as options. 4900 * [ Only udp_rput_other() handles packets that contain IP 4901 * options so code to account for does not appear immediately 4902 * below but elsewhere ] 4903 */ 4904 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4905 4906 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4907 IPPF_RTHDR|IPPF_IFINDEX)) { 4908 if (udp->udp_ipv6_recvhopopts && 4909 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4910 size_t hlen; 4911 4912 UDP_STAT(udp_in_recvhopopts); 4913 hlen = copy_hop_opts(&ipp, NULL); 4914 if (hlen == 0) 4915 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4916 udi_size += hlen; 4917 } 4918 if ((udp->udp_ipv6_recvdstopts || 4919 udp->udp_old_ipv6_recvdstopts) && 4920 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4921 udi_size += sizeof (struct T_opthdr) + 4922 ipp.ipp_dstoptslen; 4923 UDP_STAT(udp_in_recvdstopts); 4924 } 4925 if (((udp->udp_ipv6_recvdstopts && 4926 udp->udp_ipv6_recvrthdr && 4927 (ipp.ipp_fields & IPPF_RTHDR)) || 4928 udp->udp_ipv6_recvrthdrdstopts) && 4929 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4930 udi_size += sizeof (struct T_opthdr) + 4931 ipp.ipp_rtdstoptslen; 4932 UDP_STAT(udp_in_recvrtdstopts); 4933 } 4934 if (udp->udp_ipv6_recvrthdr && 4935 (ipp.ipp_fields & IPPF_RTHDR)) { 4936 udi_size += sizeof (struct T_opthdr) + 4937 ipp.ipp_rthdrlen; 4938 UDP_STAT(udp_in_recvrthdr); 4939 } 4940 if (udp->udp_ipv6_recvpktinfo && 4941 (ipp.ipp_fields & IPPF_IFINDEX)) { 4942 udi_size += sizeof (struct T_opthdr) + 4943 sizeof (struct in6_pktinfo); 4944 UDP_STAT(udp_in_recvpktinfo); 4945 } 4946 4947 } 4948 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4949 udi_size += sizeof (struct T_opthdr) + ucredsize; 4950 cpid = DB_CPID(mp); 4951 UDP_STAT(udp_in_recvucred); 4952 } 4953 4954 if (udp->udp_ipv6_recvhoplimit) { 4955 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4956 UDP_STAT(udp_in_recvhoplimit); 4957 } 4958 4959 if (udp->udp_ipv6_recvtclass) { 4960 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4961 UDP_STAT(udp_in_recvtclass); 4962 } 4963 4964 mp1 = allocb(udi_size, BPRI_MED); 4965 if (mp1 == NULL) { 4966 freemsg(mp); 4967 if (options_mp != NULL) 4968 freeb(options_mp); 4969 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4970 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4971 BUMP_MIB(&udp_mib, udpInErrors); 4972 return; 4973 } 4974 mp1->b_cont = mp; 4975 mp = mp1; 4976 mp->b_datap->db_type = M_PROTO; 4977 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4978 mp->b_wptr = (uchar_t *)tudi + udi_size; 4979 tudi->PRIM_type = T_UNITDATA_IND; 4980 tudi->SRC_length = sizeof (sin6_t); 4981 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4982 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4983 sizeof (sin6_t); 4984 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4985 tudi->OPT_length = udi_size; 4986 sin6 = (sin6_t *)&tudi[1]; 4987 if (ipversion == IPV4_VERSION) { 4988 in6_addr_t v6dst; 4989 4990 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4991 &sin6->sin6_addr); 4992 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4993 &v6dst); 4994 sin6->sin6_flowinfo = 0; 4995 sin6->sin6_scope_id = 0; 4996 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4997 connp->conn_zoneid); 4998 } else { 4999 sin6->sin6_addr = ip6h->ip6_src; 5000 /* No sin6_flowinfo per API */ 5001 sin6->sin6_flowinfo = 0; 5002 /* For link-scope source pass up scope id */ 5003 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5004 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5005 sin6->sin6_scope_id = ipp.ipp_ifindex; 5006 else 5007 sin6->sin6_scope_id = 0; 5008 sin6->__sin6_src_id = ip_srcid_find_addr( 5009 &ip6h->ip6_dst, connp->conn_zoneid); 5010 } 5011 sin6->sin6_port = udpha->uha_src_port; 5012 sin6->sin6_family = udp->udp_family; 5013 5014 if (udi_size != 0) { 5015 uchar_t *dstopt; 5016 5017 dstopt = (uchar_t *)&sin6[1]; 5018 if (udp->udp_ipv6_recvpktinfo && 5019 (ipp.ipp_fields & IPPF_IFINDEX)) { 5020 struct T_opthdr *toh; 5021 struct in6_pktinfo *pkti; 5022 5023 toh = (struct T_opthdr *)dstopt; 5024 toh->level = IPPROTO_IPV6; 5025 toh->name = IPV6_PKTINFO; 5026 toh->len = sizeof (struct T_opthdr) + 5027 sizeof (*pkti); 5028 toh->status = 0; 5029 dstopt += sizeof (struct T_opthdr); 5030 pkti = (struct in6_pktinfo *)dstopt; 5031 if (ipversion == IPV6_VERSION) 5032 pkti->ipi6_addr = ip6h->ip6_dst; 5033 else 5034 IN6_IPADDR_TO_V4MAPPED( 5035 ((ipha_t *)rptr)->ipha_dst, 5036 &pkti->ipi6_addr); 5037 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5038 dstopt += sizeof (*pkti); 5039 udi_size -= toh->len; 5040 } 5041 if (udp->udp_ipv6_recvhoplimit) { 5042 struct T_opthdr *toh; 5043 5044 toh = (struct T_opthdr *)dstopt; 5045 toh->level = IPPROTO_IPV6; 5046 toh->name = IPV6_HOPLIMIT; 5047 toh->len = sizeof (struct T_opthdr) + 5048 sizeof (uint_t); 5049 toh->status = 0; 5050 dstopt += sizeof (struct T_opthdr); 5051 if (ipversion == IPV6_VERSION) 5052 *(uint_t *)dstopt = ip6h->ip6_hops; 5053 else 5054 *(uint_t *)dstopt = 5055 ((ipha_t *)rptr)->ipha_ttl; 5056 dstopt += sizeof (uint_t); 5057 udi_size -= toh->len; 5058 } 5059 if (udp->udp_ipv6_recvtclass) { 5060 struct T_opthdr *toh; 5061 5062 toh = (struct T_opthdr *)dstopt; 5063 toh->level = IPPROTO_IPV6; 5064 toh->name = IPV6_TCLASS; 5065 toh->len = sizeof (struct T_opthdr) + 5066 sizeof (uint_t); 5067 toh->status = 0; 5068 dstopt += sizeof (struct T_opthdr); 5069 if (ipversion == IPV6_VERSION) { 5070 *(uint_t *)dstopt = 5071 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5072 } else { 5073 ipha_t *ipha = (ipha_t *)rptr; 5074 *(uint_t *)dstopt = 5075 ipha->ipha_type_of_service; 5076 } 5077 dstopt += sizeof (uint_t); 5078 udi_size -= toh->len; 5079 } 5080 if (udp->udp_ipv6_recvhopopts && 5081 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5082 size_t hlen; 5083 5084 hlen = copy_hop_opts(&ipp, dstopt); 5085 dstopt += hlen; 5086 udi_size -= hlen; 5087 } 5088 if (udp->udp_ipv6_recvdstopts && 5089 udp->udp_ipv6_recvrthdr && 5090 (ipp.ipp_fields & IPPF_RTHDR) && 5091 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5092 struct T_opthdr *toh; 5093 5094 toh = (struct T_opthdr *)dstopt; 5095 toh->level = IPPROTO_IPV6; 5096 toh->name = IPV6_DSTOPTS; 5097 toh->len = sizeof (struct T_opthdr) + 5098 ipp.ipp_rtdstoptslen; 5099 toh->status = 0; 5100 dstopt += sizeof (struct T_opthdr); 5101 bcopy(ipp.ipp_rtdstopts, dstopt, 5102 ipp.ipp_rtdstoptslen); 5103 dstopt += ipp.ipp_rtdstoptslen; 5104 udi_size -= toh->len; 5105 } 5106 if (udp->udp_ipv6_recvrthdr && 5107 (ipp.ipp_fields & IPPF_RTHDR)) { 5108 struct T_opthdr *toh; 5109 5110 toh = (struct T_opthdr *)dstopt; 5111 toh->level = IPPROTO_IPV6; 5112 toh->name = IPV6_RTHDR; 5113 toh->len = sizeof (struct T_opthdr) + 5114 ipp.ipp_rthdrlen; 5115 toh->status = 0; 5116 dstopt += sizeof (struct T_opthdr); 5117 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5118 dstopt += ipp.ipp_rthdrlen; 5119 udi_size -= toh->len; 5120 } 5121 if (udp->udp_ipv6_recvdstopts && 5122 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5123 struct T_opthdr *toh; 5124 5125 toh = (struct T_opthdr *)dstopt; 5126 toh->level = IPPROTO_IPV6; 5127 toh->name = IPV6_DSTOPTS; 5128 toh->len = sizeof (struct T_opthdr) + 5129 ipp.ipp_dstoptslen; 5130 toh->status = 0; 5131 dstopt += sizeof (struct T_opthdr); 5132 bcopy(ipp.ipp_dstopts, dstopt, 5133 ipp.ipp_dstoptslen); 5134 dstopt += ipp.ipp_dstoptslen; 5135 udi_size -= toh->len; 5136 } 5137 5138 if (cr != NULL) { 5139 struct T_opthdr *toh; 5140 5141 toh = (struct T_opthdr *)dstopt; 5142 toh->level = SOL_SOCKET; 5143 toh->name = SCM_UCRED; 5144 toh->len = sizeof (struct T_opthdr) + ucredsize; 5145 toh->status = 0; 5146 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5147 dstopt += toh->len; 5148 udi_size -= toh->len; 5149 } 5150 /* Consumed all of allocated space */ 5151 ASSERT(udi_size == 0); 5152 } 5153 #undef sin6 5154 /* No IP_RECVDSTADDR for IPv6. */ 5155 } 5156 5157 BUMP_MIB(&udp_mib, udpInDatagrams); 5158 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5159 "udp_rput_end: q %p (%S)", q, "end"); 5160 if (options_mp != NULL) 5161 freeb(options_mp); 5162 5163 if (udp->udp_direct_sockfs) { 5164 /* 5165 * There is nothing above us except for the stream head; 5166 * use the read-side synchronous stream interface in 5167 * order to reduce the time spent in interrupt thread. 5168 */ 5169 ASSERT(udp->udp_issocket); 5170 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5171 } else { 5172 /* 5173 * Use regular STREAMS interface to pass data upstream 5174 * if this is not a socket endpoint, or if we have 5175 * switched over to the slow mode due to sockmod being 5176 * popped or a module being pushed on top of us. 5177 */ 5178 putnext(UDP_RD(q), mp); 5179 } 5180 return; 5181 5182 tossit: 5183 freemsg(mp); 5184 if (options_mp != NULL) 5185 freeb(options_mp); 5186 BUMP_MIB(&udp_mib, udpInErrors); 5187 } 5188 5189 void 5190 udp_conn_recv(conn_t *connp, mblk_t *mp) 5191 { 5192 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5193 } 5194 5195 /* ARGSUSED */ 5196 static void 5197 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5198 { 5199 udp_input((conn_t *)arg, mp); 5200 _UDP_EXIT((conn_t *)arg); 5201 } 5202 5203 /* 5204 * Process non-M_DATA messages as well as M_DATA messages that requires 5205 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5206 */ 5207 static void 5208 udp_rput_other(queue_t *q, mblk_t *mp) 5209 { 5210 struct T_unitdata_ind *tudi; 5211 mblk_t *mp1; 5212 uchar_t *rptr; 5213 uchar_t *new_rptr; 5214 int hdr_length; 5215 int udi_size; /* Size of T_unitdata_ind */ 5216 int opt_len; /* Length of IP options */ 5217 sin_t *sin; 5218 struct T_error_ack *tea; 5219 mblk_t *options_mp = NULL; 5220 in_pktinfo_t *pinfo; 5221 boolean_t recv_on = B_FALSE; 5222 cred_t *cr = NULL; 5223 udp_t *udp = Q_TO_UDP(q); 5224 pid_t cpid; 5225 cred_t *rcr = udp->udp_connp->conn_cred; 5226 5227 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5228 "udp_rput_other: q %p mp %p", q, mp); 5229 5230 ASSERT(OK_32PTR(mp->b_rptr)); 5231 rptr = mp->b_rptr; 5232 5233 switch (mp->b_datap->db_type) { 5234 case M_CTL: 5235 /* 5236 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5237 */ 5238 recv_on = B_TRUE; 5239 options_mp = mp; 5240 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 5241 5242 /* 5243 * The actual data is in mp->b_cont 5244 */ 5245 mp = mp->b_cont; 5246 ASSERT(OK_32PTR(mp->b_rptr)); 5247 rptr = mp->b_rptr; 5248 break; 5249 case M_DATA: 5250 /* 5251 * M_DATA messages contain IPv4 datagrams. They are handled 5252 * after this switch. 5253 */ 5254 break; 5255 case M_PROTO: 5256 case M_PCPROTO: 5257 /* M_PROTO messages contain some type of TPI message. */ 5258 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5259 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5260 freemsg(mp); 5261 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5262 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5263 return; 5264 } 5265 tea = (struct T_error_ack *)rptr; 5266 5267 switch (tea->PRIM_type) { 5268 case T_ERROR_ACK: 5269 switch (tea->ERROR_prim) { 5270 case O_T_BIND_REQ: 5271 case T_BIND_REQ: { 5272 /* 5273 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5274 * clear out the associated port and source 5275 * address before passing the message 5276 * upstream. If this was caused by a T_CONN_REQ 5277 * revert back to bound state. 5278 */ 5279 udp_fanout_t *udpf; 5280 5281 udpf = &udp_bind_fanout[ 5282 UDP_BIND_HASH(udp->udp_port)]; 5283 mutex_enter(&udpf->uf_lock); 5284 if (udp->udp_state == TS_DATA_XFER) { 5285 /* Connect failed */ 5286 tea->ERROR_prim = T_CONN_REQ; 5287 /* Revert back to the bound source */ 5288 udp->udp_v6src = udp->udp_bound_v6src; 5289 udp->udp_state = TS_IDLE; 5290 mutex_exit(&udpf->uf_lock); 5291 if (udp->udp_family == AF_INET6) 5292 (void) udp_build_hdrs(q, udp); 5293 break; 5294 } 5295 5296 if (udp->udp_discon_pending) { 5297 tea->ERROR_prim = T_DISCON_REQ; 5298 udp->udp_discon_pending = 0; 5299 } 5300 V6_SET_ZERO(udp->udp_v6src); 5301 V6_SET_ZERO(udp->udp_bound_v6src); 5302 udp->udp_state = TS_UNBND; 5303 udp_bind_hash_remove(udp, B_TRUE); 5304 udp->udp_port = 0; 5305 mutex_exit(&udpf->uf_lock); 5306 if (udp->udp_family == AF_INET6) 5307 (void) udp_build_hdrs(q, udp); 5308 break; 5309 } 5310 default: 5311 break; 5312 } 5313 break; 5314 case T_BIND_ACK: 5315 udp_rput_bind_ack(q, mp); 5316 return; 5317 5318 case T_OPTMGMT_ACK: 5319 case T_OK_ACK: 5320 break; 5321 default: 5322 freemsg(mp); 5323 return; 5324 } 5325 putnext(UDP_RD(q), mp); 5326 return; 5327 } 5328 5329 /* 5330 * This is the inbound data path. 5331 * First, we make sure the data contains both IP and UDP headers. 5332 * 5333 * This handle IPv4 packets for only AF_INET sockets. 5334 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5335 * is no need saving the options. 5336 */ 5337 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5338 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5339 if (mp->b_wptr - rptr < hdr_length) { 5340 if (!pullupmsg(mp, hdr_length)) { 5341 freemsg(mp); 5342 if (options_mp != NULL) 5343 freeb(options_mp); 5344 BUMP_MIB(&udp_mib, udpInErrors); 5345 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5346 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5347 BUMP_MIB(&udp_mib, udpInErrors); 5348 return; 5349 } 5350 rptr = mp->b_rptr; 5351 } 5352 /* Walk past the headers. */ 5353 new_rptr = rptr + hdr_length; 5354 if (!udp->udp_rcvhdr) 5355 mp->b_rptr = new_rptr; 5356 5357 /* Save the options if any */ 5358 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5359 if (opt_len > 0) { 5360 if (opt_len > udp->udp_ip_rcv_options_len) { 5361 if (udp->udp_ip_rcv_options_len) 5362 mi_free((char *)udp->udp_ip_rcv_options); 5363 udp->udp_ip_rcv_options_len = 0; 5364 udp->udp_ip_rcv_options = 5365 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5366 if (udp->udp_ip_rcv_options) 5367 udp->udp_ip_rcv_options_len = opt_len; 5368 } 5369 if (udp->udp_ip_rcv_options_len) { 5370 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5371 udp->udp_ip_rcv_options, opt_len); 5372 /* Adjust length if we are resusing the space */ 5373 udp->udp_ip_rcv_options_len = opt_len; 5374 } 5375 } else if (udp->udp_ip_rcv_options_len) { 5376 mi_free((char *)udp->udp_ip_rcv_options); 5377 udp->udp_ip_rcv_options = NULL; 5378 udp->udp_ip_rcv_options_len = 0; 5379 } 5380 5381 /* 5382 * Normally only send up the address. 5383 * If IP_RECVDSTADDR is set we include the destination IP 5384 * address as an option. With IP_RECVOPTS we include all 5385 * the IP options. 5386 */ 5387 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5388 if (udp->udp_recvdstaddr) { 5389 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5390 UDP_STAT(udp_in_recvdstaddr); 5391 } 5392 if (udp->udp_recvopts && opt_len > 0) { 5393 udi_size += sizeof (struct T_opthdr) + opt_len; 5394 UDP_STAT(udp_in_recvopts); 5395 } 5396 5397 /* 5398 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5399 * space accordingly 5400 */ 5401 if (udp->udp_recvif && recv_on && 5402 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5403 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5404 UDP_STAT(udp_in_recvif); 5405 } 5406 5407 if (udp->udp_recvslla && recv_on && 5408 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5409 udi_size += sizeof (struct T_opthdr) + 5410 sizeof (struct sockaddr_dl); 5411 UDP_STAT(udp_in_recvslla); 5412 } 5413 5414 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5415 udi_size += sizeof (struct T_opthdr) + ucredsize; 5416 cpid = DB_CPID(mp); 5417 UDP_STAT(udp_in_recvucred); 5418 } 5419 /* 5420 * If IP_RECVTTL is set allocate the appropriate sized buffer 5421 */ 5422 if (udp->udp_recvttl) { 5423 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5424 UDP_STAT(udp_in_recvttl); 5425 } 5426 5427 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5428 mp1 = allocb(udi_size, BPRI_MED); 5429 if (mp1 == NULL) { 5430 freemsg(mp); 5431 if (options_mp != NULL) 5432 freeb(options_mp); 5433 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5434 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5435 BUMP_MIB(&udp_mib, udpInErrors); 5436 return; 5437 } 5438 mp1->b_cont = mp; 5439 mp = mp1; 5440 mp->b_datap->db_type = M_PROTO; 5441 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5442 mp->b_wptr = (uchar_t *)tudi + udi_size; 5443 tudi->PRIM_type = T_UNITDATA_IND; 5444 tudi->SRC_length = sizeof (sin_t); 5445 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5446 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5447 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5448 tudi->OPT_length = udi_size; 5449 5450 sin = (sin_t *)&tudi[1]; 5451 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5452 sin->sin_port = ((in_port_t *) 5453 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5454 sin->sin_family = AF_INET; 5455 *(uint32_t *)&sin->sin_zero[0] = 0; 5456 *(uint32_t *)&sin->sin_zero[4] = 0; 5457 5458 /* 5459 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5460 * IP_RECVTTL has been set. 5461 */ 5462 if (udi_size != 0) { 5463 /* 5464 * Copy in destination address before options to avoid any 5465 * padding issues. 5466 */ 5467 char *dstopt; 5468 5469 dstopt = (char *)&sin[1]; 5470 if (udp->udp_recvdstaddr) { 5471 struct T_opthdr *toh; 5472 ipaddr_t *dstptr; 5473 5474 toh = (struct T_opthdr *)dstopt; 5475 toh->level = IPPROTO_IP; 5476 toh->name = IP_RECVDSTADDR; 5477 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5478 toh->status = 0; 5479 dstopt += sizeof (struct T_opthdr); 5480 dstptr = (ipaddr_t *)dstopt; 5481 *dstptr = (((ipaddr_t *)rptr)[4]); 5482 dstopt += sizeof (ipaddr_t); 5483 udi_size -= toh->len; 5484 } 5485 if (udp->udp_recvopts && udi_size != 0) { 5486 struct T_opthdr *toh; 5487 5488 toh = (struct T_opthdr *)dstopt; 5489 toh->level = IPPROTO_IP; 5490 toh->name = IP_RECVOPTS; 5491 toh->len = sizeof (struct T_opthdr) + opt_len; 5492 toh->status = 0; 5493 dstopt += sizeof (struct T_opthdr); 5494 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5495 dstopt += opt_len; 5496 udi_size -= toh->len; 5497 } 5498 5499 if (udp->udp_recvslla && recv_on && 5500 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5501 5502 struct T_opthdr *toh; 5503 struct sockaddr_dl *dstptr; 5504 5505 toh = (struct T_opthdr *)dstopt; 5506 toh->level = IPPROTO_IP; 5507 toh->name = IP_RECVSLLA; 5508 toh->len = sizeof (struct T_opthdr) + 5509 sizeof (struct sockaddr_dl); 5510 toh->status = 0; 5511 dstopt += sizeof (struct T_opthdr); 5512 dstptr = (struct sockaddr_dl *)dstopt; 5513 bcopy(&pinfo->in_pkt_slla, dstptr, 5514 sizeof (struct sockaddr_dl)); 5515 dstopt += sizeof (struct sockaddr_dl); 5516 udi_size -= toh->len; 5517 } 5518 5519 if (udp->udp_recvif && recv_on && 5520 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5521 5522 struct T_opthdr *toh; 5523 uint_t *dstptr; 5524 5525 toh = (struct T_opthdr *)dstopt; 5526 toh->level = IPPROTO_IP; 5527 toh->name = IP_RECVIF; 5528 toh->len = sizeof (struct T_opthdr) + 5529 sizeof (uint_t); 5530 toh->status = 0; 5531 dstopt += sizeof (struct T_opthdr); 5532 dstptr = (uint_t *)dstopt; 5533 *dstptr = pinfo->in_pkt_ifindex; 5534 dstopt += sizeof (uint_t); 5535 udi_size -= toh->len; 5536 } 5537 5538 if (cr != NULL) { 5539 struct T_opthdr *toh; 5540 5541 toh = (struct T_opthdr *)dstopt; 5542 toh->level = SOL_SOCKET; 5543 toh->name = SCM_UCRED; 5544 toh->len = sizeof (struct T_opthdr) + ucredsize; 5545 toh->status = 0; 5546 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5547 dstopt += toh->len; 5548 udi_size -= toh->len; 5549 } 5550 5551 if (udp->udp_recvttl) { 5552 struct T_opthdr *toh; 5553 uint8_t *dstptr; 5554 5555 toh = (struct T_opthdr *)dstopt; 5556 toh->level = IPPROTO_IP; 5557 toh->name = IP_RECVTTL; 5558 toh->len = sizeof (struct T_opthdr) + 5559 sizeof (uint8_t); 5560 toh->status = 0; 5561 dstopt += sizeof (struct T_opthdr); 5562 dstptr = (uint8_t *)dstopt; 5563 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5564 dstopt += sizeof (uint8_t); 5565 udi_size -= toh->len; 5566 } 5567 5568 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5569 } 5570 BUMP_MIB(&udp_mib, udpInDatagrams); 5571 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5572 "udp_rput_other_end: q %p (%S)", q, "end"); 5573 if (options_mp != NULL) 5574 freeb(options_mp); 5575 5576 if (udp->udp_direct_sockfs) { 5577 /* 5578 * There is nothing above us except for the stream head; 5579 * use the read-side synchronous stream interface in 5580 * order to reduce the time spent in interrupt thread. 5581 */ 5582 ASSERT(udp->udp_issocket); 5583 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5584 } else { 5585 /* 5586 * Use regular STREAMS interface to pass data upstream 5587 * if this is not a socket endpoint, or if we have 5588 * switched over to the slow mode due to sockmod being 5589 * popped or a module being pushed on top of us. 5590 */ 5591 putnext(UDP_RD(q), mp); 5592 } 5593 } 5594 5595 /* ARGSUSED */ 5596 static void 5597 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5598 { 5599 conn_t *connp = arg; 5600 5601 udp_rput_other(connp->conn_rq, mp); 5602 udp_exit(connp); 5603 } 5604 5605 /* 5606 * Process a T_BIND_ACK 5607 */ 5608 static void 5609 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5610 { 5611 udp_t *udp = Q_TO_UDP(q); 5612 mblk_t *mp1; 5613 ire_t *ire; 5614 struct T_bind_ack *tba; 5615 uchar_t *addrp; 5616 ipa_conn_t *ac; 5617 ipa6_conn_t *ac6; 5618 5619 if (udp->udp_discon_pending) 5620 udp->udp_discon_pending = 0; 5621 5622 /* 5623 * If a broadcast/multicast address was bound set 5624 * the source address to 0. 5625 * This ensures no datagrams with broadcast address 5626 * as source address are emitted (which would violate 5627 * RFC1122 - Hosts requirements) 5628 * 5629 * Note that when connecting the returned IRE is 5630 * for the destination address and we only perform 5631 * the broadcast check for the source address (it 5632 * is OK to connect to a broadcast/multicast address.) 5633 */ 5634 mp1 = mp->b_cont; 5635 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5636 ire = (ire_t *)mp1->b_rptr; 5637 5638 /* 5639 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5640 * local address. 5641 */ 5642 if (ire->ire_type == IRE_BROADCAST && 5643 udp->udp_state != TS_DATA_XFER) { 5644 /* This was just a local bind to a broadcast addr */ 5645 V6_SET_ZERO(udp->udp_v6src); 5646 if (udp->udp_family == AF_INET6) 5647 (void) udp_build_hdrs(q, udp); 5648 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5649 /* 5650 * Local address not yet set - pick it from the 5651 * T_bind_ack 5652 */ 5653 tba = (struct T_bind_ack *)mp->b_rptr; 5654 addrp = &mp->b_rptr[tba->ADDR_offset]; 5655 switch (udp->udp_family) { 5656 case AF_INET: 5657 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5658 ac = (ipa_conn_t *)addrp; 5659 } else { 5660 ASSERT(tba->ADDR_length == 5661 sizeof (ipa_conn_x_t)); 5662 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5663 } 5664 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5665 &udp->udp_v6src); 5666 break; 5667 case AF_INET6: 5668 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5669 ac6 = (ipa6_conn_t *)addrp; 5670 } else { 5671 ASSERT(tba->ADDR_length == 5672 sizeof (ipa6_conn_x_t)); 5673 ac6 = &((ipa6_conn_x_t *) 5674 addrp)->ac6x_conn; 5675 } 5676 udp->udp_v6src = ac6->ac6_laddr; 5677 (void) udp_build_hdrs(q, udp); 5678 break; 5679 } 5680 } 5681 mp1 = mp1->b_cont; 5682 } 5683 /* 5684 * Look for one or more appended ACK message added by 5685 * udp_connect or udp_disconnect. 5686 * If none found just send up the T_BIND_ACK. 5687 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5688 * udp_disconnect has appended a T_OK_ACK. 5689 */ 5690 if (mp1 != NULL) { 5691 if (mp->b_cont == mp1) 5692 mp->b_cont = NULL; 5693 else { 5694 ASSERT(mp->b_cont->b_cont == mp1); 5695 mp->b_cont->b_cont = NULL; 5696 } 5697 freemsg(mp); 5698 mp = mp1; 5699 while (mp != NULL) { 5700 mp1 = mp->b_cont; 5701 mp->b_cont = NULL; 5702 putnext(UDP_RD(q), mp); 5703 mp = mp1; 5704 } 5705 return; 5706 } 5707 freemsg(mp->b_cont); 5708 mp->b_cont = NULL; 5709 putnext(UDP_RD(q), mp); 5710 } 5711 5712 /* 5713 * return SNMP stuff in buffer in mpdata 5714 */ 5715 int 5716 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5717 { 5718 mblk_t *mpdata; 5719 mblk_t *mp_conn_ctl; 5720 mblk_t *mp_attr_ctl; 5721 mblk_t *mp6_conn_ctl; 5722 mblk_t *mp6_attr_ctl; 5723 mblk_t *mp_conn_tail; 5724 mblk_t *mp_attr_tail; 5725 mblk_t *mp6_conn_tail; 5726 mblk_t *mp6_attr_tail; 5727 struct opthdr *optp; 5728 mib2_udpEntry_t ude; 5729 mib2_udp6Entry_t ude6; 5730 mib2_transportMLPEntry_t mlp; 5731 int state; 5732 zoneid_t zoneid; 5733 int i; 5734 connf_t *connfp; 5735 conn_t *connp = Q_TO_CONN(q); 5736 udp_t *udp = connp->conn_udp; 5737 int v4_conn_idx; 5738 int v6_conn_idx; 5739 boolean_t needattr; 5740 5741 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5742 if (mpctl == NULL || 5743 (mpdata = mpctl->b_cont) == NULL || 5744 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5745 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5746 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5747 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5748 freemsg(mp_conn_ctl); 5749 freemsg(mp_attr_ctl); 5750 freemsg(mp6_conn_ctl); 5751 return (0); 5752 } 5753 5754 zoneid = connp->conn_zoneid; 5755 5756 /* fixed length structure for IPv4 and IPv6 counters */ 5757 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5758 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5759 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5760 optp->level = MIB2_UDP; 5761 optp->name = 0; 5762 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5763 optp->len = msgdsize(mpdata); 5764 qreply(q, mpctl); 5765 5766 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5767 v4_conn_idx = v6_conn_idx = 0; 5768 5769 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5770 connfp = &ipcl_globalhash_fanout[i]; 5771 connp = NULL; 5772 5773 while ((connp = ipcl_get_next_conn(connfp, connp, 5774 IPCL_UDP))) { 5775 udp = connp->conn_udp; 5776 if (zoneid != connp->conn_zoneid) 5777 continue; 5778 5779 /* 5780 * Note that the port numbers are sent in 5781 * host byte order 5782 */ 5783 5784 if (udp->udp_state == TS_UNBND) 5785 state = MIB2_UDP_unbound; 5786 else if (udp->udp_state == TS_IDLE) 5787 state = MIB2_UDP_idle; 5788 else if (udp->udp_state == TS_DATA_XFER) 5789 state = MIB2_UDP_connected; 5790 else 5791 state = MIB2_UDP_unknown; 5792 5793 needattr = B_FALSE; 5794 bzero(&mlp, sizeof (mlp)); 5795 if (connp->conn_mlp_type != mlptSingle) { 5796 if (connp->conn_mlp_type == mlptShared || 5797 connp->conn_mlp_type == mlptBoth) 5798 mlp.tme_flags |= MIB2_TMEF_SHARED; 5799 if (connp->conn_mlp_type == mlptPrivate || 5800 connp->conn_mlp_type == mlptBoth) 5801 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5802 needattr = B_TRUE; 5803 } 5804 5805 /* 5806 * Create an IPv4 table entry for IPv4 entries and also 5807 * any IPv6 entries which are bound to in6addr_any 5808 * (i.e. anything a IPv4 peer could connect/send to). 5809 */ 5810 if (udp->udp_ipversion == IPV4_VERSION || 5811 (udp->udp_state <= TS_IDLE && 5812 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5813 ude.udpEntryInfo.ue_state = state; 5814 /* 5815 * If in6addr_any this will set it to 5816 * INADDR_ANY 5817 */ 5818 ude.udpLocalAddress = 5819 V4_PART_OF_V6(udp->udp_v6src); 5820 ude.udpLocalPort = ntohs(udp->udp_port); 5821 if (udp->udp_state == TS_DATA_XFER) { 5822 /* 5823 * Can potentially get here for 5824 * v6 socket if another process 5825 * (say, ping) has just done a 5826 * sendto(), changing the state 5827 * from the TS_IDLE above to 5828 * TS_DATA_XFER by the time we hit 5829 * this part of the code. 5830 */ 5831 ude.udpEntryInfo.ue_RemoteAddress = 5832 V4_PART_OF_V6(udp->udp_v6dst); 5833 ude.udpEntryInfo.ue_RemotePort = 5834 ntohs(udp->udp_dstport); 5835 } else { 5836 ude.udpEntryInfo.ue_RemoteAddress = 0; 5837 ude.udpEntryInfo.ue_RemotePort = 0; 5838 } 5839 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5840 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5841 mlp.tme_connidx = v4_conn_idx++; 5842 if (needattr) 5843 (void) snmp_append_data2( 5844 mp_attr_ctl->b_cont, &mp_attr_tail, 5845 (char *)&mlp, sizeof (mlp)); 5846 } 5847 if (udp->udp_ipversion == IPV6_VERSION) { 5848 ude6.udp6EntryInfo.ue_state = state; 5849 ude6.udp6LocalAddress = udp->udp_v6src; 5850 ude6.udp6LocalPort = ntohs(udp->udp_port); 5851 ude6.udp6IfIndex = udp->udp_bound_if; 5852 if (udp->udp_state == TS_DATA_XFER) { 5853 ude6.udp6EntryInfo.ue_RemoteAddress = 5854 udp->udp_v6dst; 5855 ude6.udp6EntryInfo.ue_RemotePort = 5856 ntohs(udp->udp_dstport); 5857 } else { 5858 ude6.udp6EntryInfo.ue_RemoteAddress = 5859 sin6_null.sin6_addr; 5860 ude6.udp6EntryInfo.ue_RemotePort = 0; 5861 } 5862 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5863 &mp6_conn_tail, (char *)&ude6, 5864 sizeof (ude6)); 5865 mlp.tme_connidx = v6_conn_idx++; 5866 if (needattr) 5867 (void) snmp_append_data2( 5868 mp6_attr_ctl->b_cont, 5869 &mp6_attr_tail, (char *)&mlp, 5870 sizeof (mlp)); 5871 } 5872 } 5873 } 5874 5875 /* IPv4 UDP endpoints */ 5876 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5877 sizeof (struct T_optmgmt_ack)]; 5878 optp->level = MIB2_UDP; 5879 optp->name = MIB2_UDP_ENTRY; 5880 optp->len = msgdsize(mp_conn_ctl->b_cont); 5881 qreply(q, mp_conn_ctl); 5882 5883 /* table of MLP attributes... */ 5884 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5885 sizeof (struct T_optmgmt_ack)]; 5886 optp->level = MIB2_UDP; 5887 optp->name = EXPER_XPORT_MLP; 5888 optp->len = msgdsize(mp_attr_ctl->b_cont); 5889 if (optp->len == 0) 5890 freemsg(mp_attr_ctl); 5891 else 5892 qreply(q, mp_attr_ctl); 5893 5894 /* IPv6 UDP endpoints */ 5895 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5896 sizeof (struct T_optmgmt_ack)]; 5897 optp->level = MIB2_UDP6; 5898 optp->name = MIB2_UDP6_ENTRY; 5899 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5900 qreply(q, mp6_conn_ctl); 5901 5902 /* table of MLP attributes... */ 5903 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5904 sizeof (struct T_optmgmt_ack)]; 5905 optp->level = MIB2_UDP6; 5906 optp->name = EXPER_XPORT_MLP; 5907 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5908 if (optp->len == 0) 5909 freemsg(mp6_attr_ctl); 5910 else 5911 qreply(q, mp6_attr_ctl); 5912 5913 return (1); 5914 } 5915 5916 /* 5917 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5918 * NOTE: Per MIB-II, UDP has no writable data. 5919 * TODO: If this ever actually tries to set anything, it needs to be 5920 * to do the appropriate locking. 5921 */ 5922 /* ARGSUSED */ 5923 int 5924 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5925 uchar_t *ptr, int len) 5926 { 5927 switch (level) { 5928 case MIB2_UDP: 5929 return (0); 5930 default: 5931 return (1); 5932 } 5933 } 5934 5935 static void 5936 udp_report_item(mblk_t *mp, udp_t *udp) 5937 { 5938 char *state; 5939 char addrbuf1[INET6_ADDRSTRLEN]; 5940 char addrbuf2[INET6_ADDRSTRLEN]; 5941 uint_t print_len, buf_len; 5942 5943 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5944 ASSERT(buf_len >= 0); 5945 if (buf_len == 0) 5946 return; 5947 5948 if (udp->udp_state == TS_UNBND) 5949 state = "UNBOUND"; 5950 else if (udp->udp_state == TS_IDLE) 5951 state = "IDLE"; 5952 else if (udp->udp_state == TS_DATA_XFER) 5953 state = "CONNECTED"; 5954 else 5955 state = "UnkState"; 5956 print_len = snprintf((char *)mp->b_wptr, buf_len, 5957 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5958 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5959 inet_ntop(AF_INET6, &udp->udp_v6src, 5960 addrbuf1, sizeof (addrbuf1)), 5961 inet_ntop(AF_INET6, &udp->udp_v6dst, 5962 addrbuf2, sizeof (addrbuf2)), 5963 ntohs(udp->udp_dstport), state); 5964 if (print_len < buf_len) { 5965 mp->b_wptr += print_len; 5966 } else { 5967 mp->b_wptr += buf_len; 5968 } 5969 } 5970 5971 /* Report for ndd "udp_status" */ 5972 /* ARGSUSED */ 5973 static int 5974 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5975 { 5976 zoneid_t zoneid; 5977 connf_t *connfp; 5978 conn_t *connp = Q_TO_CONN(q); 5979 udp_t *udp = connp->conn_udp; 5980 int i; 5981 5982 /* 5983 * Because of the ndd constraint, at most we can have 64K buffer 5984 * to put in all UDP info. So to be more efficient, just 5985 * allocate a 64K buffer here, assuming we need that large buffer. 5986 * This may be a problem as any user can read udp_status. Therefore 5987 * we limit the rate of doing this using udp_ndd_get_info_interval. 5988 * This should be OK as normal users should not do this too often. 5989 */ 5990 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5991 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5992 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5993 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5994 return (0); 5995 } 5996 } 5997 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5998 /* The following may work even if we cannot get a large buf. */ 5999 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6000 return (0); 6001 } 6002 (void) mi_mpprintf(mp, 6003 "UDP " MI_COL_HDRPAD_STR 6004 /* 12345678[89ABCDEF] */ 6005 " zone lport src addr dest addr port state"); 6006 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6007 6008 zoneid = connp->conn_zoneid; 6009 6010 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6011 connfp = &ipcl_globalhash_fanout[i]; 6012 connp = NULL; 6013 6014 while ((connp = ipcl_get_next_conn(connfp, connp, 6015 IPCL_UDP))) { 6016 udp = connp->conn_udp; 6017 if (zoneid != GLOBAL_ZONEID && 6018 zoneid != connp->conn_zoneid) 6019 continue; 6020 6021 udp_report_item(mp->b_cont, udp); 6022 } 6023 } 6024 udp_last_ndd_get_info_time = ddi_get_lbolt(); 6025 return (0); 6026 } 6027 6028 /* 6029 * This routine creates a T_UDERROR_IND message and passes it upstream. 6030 * The address and options are copied from the T_UNITDATA_REQ message 6031 * passed in mp. This message is freed. 6032 */ 6033 static void 6034 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6035 t_scalar_t err) 6036 { 6037 struct T_unitdata_req *tudr; 6038 mblk_t *mp1; 6039 uchar_t *optaddr; 6040 t_scalar_t optlen; 6041 6042 if (DB_TYPE(mp) == M_DATA) { 6043 ASSERT(destaddr != NULL && destlen != 0); 6044 optaddr = NULL; 6045 optlen = 0; 6046 } else { 6047 if ((mp->b_wptr < mp->b_rptr) || 6048 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6049 goto done; 6050 } 6051 tudr = (struct T_unitdata_req *)mp->b_rptr; 6052 destaddr = mp->b_rptr + tudr->DEST_offset; 6053 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6054 destaddr + tudr->DEST_length < mp->b_rptr || 6055 destaddr + tudr->DEST_length > mp->b_wptr) { 6056 goto done; 6057 } 6058 optaddr = mp->b_rptr + tudr->OPT_offset; 6059 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6060 optaddr + tudr->OPT_length < mp->b_rptr || 6061 optaddr + tudr->OPT_length > mp->b_wptr) { 6062 goto done; 6063 } 6064 destlen = tudr->DEST_length; 6065 optlen = tudr->OPT_length; 6066 } 6067 6068 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6069 (char *)optaddr, optlen, err); 6070 if (mp1 != NULL) 6071 putnext(UDP_RD(q), mp1); 6072 6073 done: 6074 freemsg(mp); 6075 } 6076 6077 /* 6078 * This routine removes a port number association from a stream. It 6079 * is called by udp_wput to handle T_UNBIND_REQ messages. 6080 */ 6081 static void 6082 udp_unbind(queue_t *q, mblk_t *mp) 6083 { 6084 udp_t *udp = Q_TO_UDP(q); 6085 6086 /* If a bind has not been done, we can't unbind. */ 6087 if (udp->udp_state == TS_UNBND) { 6088 udp_err_ack(q, mp, TOUTSTATE, 0); 6089 return; 6090 } 6091 if (cl_inet_unbind != NULL) { 6092 /* 6093 * Running in cluster mode - register unbind information 6094 */ 6095 if (udp->udp_ipversion == IPV4_VERSION) { 6096 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6097 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6098 (in_port_t)udp->udp_port); 6099 } else { 6100 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6101 (uint8_t *)&(udp->udp_v6src), 6102 (in_port_t)udp->udp_port); 6103 } 6104 } 6105 6106 udp_bind_hash_remove(udp, B_FALSE); 6107 V6_SET_ZERO(udp->udp_v6src); 6108 V6_SET_ZERO(udp->udp_bound_v6src); 6109 udp->udp_port = 0; 6110 udp->udp_state = TS_UNBND; 6111 6112 if (udp->udp_family == AF_INET6) { 6113 int error; 6114 6115 /* Rebuild the header template */ 6116 error = udp_build_hdrs(q, udp); 6117 if (error != 0) { 6118 udp_err_ack(q, mp, TSYSERR, error); 6119 return; 6120 } 6121 } 6122 /* 6123 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6124 * and therefore ip_unbind must never return NULL. 6125 */ 6126 mp = ip_unbind(q, mp); 6127 ASSERT(mp != NULL); 6128 putnext(UDP_RD(q), mp); 6129 } 6130 6131 /* 6132 * Don't let port fall into the privileged range. 6133 * Since the extra privileged ports can be arbitrary we also 6134 * ensure that we exclude those from consideration. 6135 * udp_g_epriv_ports is not sorted thus we loop over it until 6136 * there are no changes. 6137 */ 6138 static in_port_t 6139 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6140 { 6141 int i; 6142 in_port_t nextport; 6143 boolean_t restart = B_FALSE; 6144 6145 if (random && udp_random_anon_port != 0) { 6146 (void) random_get_pseudo_bytes((uint8_t *)&port, 6147 sizeof (in_port_t)); 6148 /* 6149 * Unless changed by a sys admin, the smallest anon port 6150 * is 32768 and the largest anon port is 65535. It is 6151 * very likely (50%) for the random port to be smaller 6152 * than the smallest anon port. When that happens, 6153 * add port % (anon port range) to the smallest anon 6154 * port to get the random port. It should fall into the 6155 * valid anon port range. 6156 */ 6157 if (port < udp_smallest_anon_port) { 6158 port = udp_smallest_anon_port + 6159 port % (udp_largest_anon_port - 6160 udp_smallest_anon_port); 6161 } 6162 } 6163 6164 retry: 6165 if (port < udp_smallest_anon_port) 6166 port = udp_smallest_anon_port; 6167 6168 if (port > udp_largest_anon_port) { 6169 port = udp_smallest_anon_port; 6170 if (restart) 6171 return (0); 6172 restart = B_TRUE; 6173 } 6174 6175 if (port < udp_smallest_nonpriv_port) 6176 port = udp_smallest_nonpriv_port; 6177 6178 for (i = 0; i < udp_g_num_epriv_ports; i++) { 6179 if (port == udp_g_epriv_ports[i]) { 6180 port++; 6181 /* 6182 * Make sure that the port is in the 6183 * valid range. 6184 */ 6185 goto retry; 6186 } 6187 } 6188 6189 if (is_system_labeled() && 6190 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6191 port, IPPROTO_UDP, B_TRUE)) != 0) { 6192 port = nextport; 6193 goto retry; 6194 } 6195 6196 return (port); 6197 } 6198 6199 static int 6200 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6201 { 6202 int err; 6203 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6204 udp_t *udp = Q_TO_UDP(wq); 6205 6206 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6207 opt_storage, udp->udp_mac_exempt); 6208 if (err == 0) { 6209 err = tsol_update_options(&udp->udp_ip_snd_options, 6210 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6211 opt_storage); 6212 } 6213 if (err != 0) { 6214 DTRACE_PROBE4( 6215 tx__ip__log__info__updatelabel__udp, 6216 char *, "queue(1) failed to update options(2) on mp(3)", 6217 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6218 } else { 6219 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6220 } 6221 return (err); 6222 } 6223 6224 static mblk_t * 6225 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6226 uint_t srcid, int *error) 6227 { 6228 udp_t *udp = connp->conn_udp; 6229 queue_t *q = connp->conn_wq; 6230 mblk_t *mp1 = mp; 6231 mblk_t *mp2; 6232 ipha_t *ipha; 6233 int ip_hdr_length; 6234 uint32_t ip_len; 6235 udpha_t *udpha; 6236 udpattrs_t attrs; 6237 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6238 uint32_t ip_snd_opt_len = 0; 6239 6240 *error = 0; 6241 6242 if (v4dst == INADDR_ANY) 6243 v4dst = htonl(INADDR_LOOPBACK); 6244 6245 /* 6246 * If options passed in, feed it for verification and handling 6247 */ 6248 attrs.udpattr_credset = B_FALSE; 6249 if (DB_TYPE(mp) != M_DATA) { 6250 mp1 = mp->b_cont; 6251 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6252 attrs.udpattr_ipp = NULL; 6253 attrs.udpattr_mb = mp; 6254 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6255 goto done; 6256 /* 6257 * Note: success in processing options. 6258 * mp option buffer represented by 6259 * OPT_length/offset now potentially modified 6260 * and contain option setting results 6261 */ 6262 ASSERT(*error == 0); 6263 } 6264 } 6265 6266 /* mp1 points to the M_DATA mblk carrying the packet */ 6267 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6268 6269 /* 6270 * Check if our saved options are valid; update if not 6271 * TSOL Note: Since we are not in WRITER mode, UDP packets 6272 * to different destination may require different labels. 6273 * We use conn_lock to ensure that lastdst, ip_snd_options, 6274 * and ip_snd_options_len are consistent for the current 6275 * destination and are updated atomically. 6276 */ 6277 mutex_enter(&connp->conn_lock); 6278 if (is_system_labeled()) { 6279 /* Using UDP MLP requires SCM_UCRED from user */ 6280 if (connp->conn_mlp_type != mlptSingle && 6281 !attrs.udpattr_credset) { 6282 mutex_exit(&connp->conn_lock); 6283 DTRACE_PROBE4( 6284 tx__ip__log__info__output__udp, 6285 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6286 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6287 *error = ECONNREFUSED; 6288 goto done; 6289 } 6290 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6291 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6292 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6293 mutex_exit(&connp->conn_lock); 6294 goto done; 6295 } 6296 } 6297 if (udp->udp_ip_snd_options_len > 0) { 6298 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6299 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6300 } 6301 mutex_exit(&connp->conn_lock); 6302 6303 /* Add an IP header */ 6304 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; 6305 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6306 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6307 !OK_32PTR(ipha)) { 6308 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 6309 if (mp2 == NULL) { 6310 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6311 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6312 *error = ENOMEM; 6313 goto done; 6314 } 6315 mp2->b_wptr = DB_LIM(mp2); 6316 mp2->b_cont = mp1; 6317 mp1 = mp2; 6318 if (DB_TYPE(mp) != M_DATA) 6319 mp->b_cont = mp1; 6320 else 6321 mp = mp1; 6322 6323 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6324 } 6325 ip_hdr_length -= UDPH_SIZE; 6326 #ifdef _BIG_ENDIAN 6327 /* Set version, header length, and tos */ 6328 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6329 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6330 udp->udp_type_of_service); 6331 /* Set ttl and protocol */ 6332 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6333 #else 6334 /* Set version, header length, and tos */ 6335 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6336 ((udp->udp_type_of_service << 8) | 6337 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6338 /* Set ttl and protocol */ 6339 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6340 #endif 6341 /* 6342 * Copy our address into the packet. If this is zero, 6343 * first look at __sin6_src_id for a hint. If we leave the source 6344 * as INADDR_ANY then ip will fill in the real source address. 6345 */ 6346 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6347 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6348 in6_addr_t v6src; 6349 6350 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 6351 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6352 } 6353 6354 ipha->ipha_fragment_offset_and_flags = 0; 6355 ipha->ipha_ident = 0; 6356 6357 mp1->b_rptr = (uchar_t *)ipha; 6358 6359 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6360 (uintptr_t)UINT_MAX); 6361 6362 /* Determine length of packet */ 6363 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6364 if ((mp2 = mp1->b_cont) != NULL) { 6365 do { 6366 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6367 ip_len += (uint32_t)MBLKL(mp2); 6368 } while ((mp2 = mp2->b_cont) != NULL); 6369 } 6370 /* 6371 * If the size of the packet is greater than the maximum allowed by 6372 * ip, return an error. Passing this down could cause panics because 6373 * the size will have wrapped and be inconsistent with the msg size. 6374 */ 6375 if (ip_len > IP_MAXPACKET) { 6376 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6377 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6378 *error = EMSGSIZE; 6379 goto done; 6380 } 6381 ipha->ipha_length = htons((uint16_t)ip_len); 6382 ip_len -= ip_hdr_length; 6383 ip_len = htons((uint16_t)ip_len); 6384 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6385 6386 /* 6387 * Copy in the destination address 6388 */ 6389 ipha->ipha_dst = v4dst; 6390 6391 /* 6392 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6393 */ 6394 if (CLASSD(v4dst)) 6395 ipha->ipha_ttl = udp->udp_multicast_ttl; 6396 6397 udpha->uha_dst_port = port; 6398 udpha->uha_src_port = udp->udp_port; 6399 6400 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6401 uint32_t cksum; 6402 6403 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6404 /* 6405 * Massage source route putting first source route in ipha_dst. 6406 * Ignore the destination in T_unitdata_req. 6407 * Create a checksum adjustment for a source route, if any. 6408 */ 6409 cksum = ip_massage_options(ipha); 6410 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6411 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6412 (ipha->ipha_dst & 0xFFFF); 6413 if ((int)cksum < 0) 6414 cksum--; 6415 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6416 /* 6417 * IP does the checksum if uha_checksum is non-zero, 6418 * We make it easy for IP to include our pseudo header 6419 * by putting our length in uha_checksum. 6420 */ 6421 cksum += ip_len; 6422 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6423 /* There might be a carry. */ 6424 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6425 #ifdef _LITTLE_ENDIAN 6426 if (udp_do_checksum) 6427 ip_len = (cksum << 16) | ip_len; 6428 #else 6429 if (udp_do_checksum) 6430 ip_len = (ip_len << 16) | cksum; 6431 else 6432 ip_len <<= 16; 6433 #endif 6434 } else { 6435 /* 6436 * IP does the checksum if uha_checksum is non-zero, 6437 * We make it easy for IP to include our pseudo header 6438 * by putting our length in uha_checksum. 6439 */ 6440 if (udp_do_checksum) 6441 ip_len |= (ip_len << 16); 6442 #ifndef _LITTLE_ENDIAN 6443 else 6444 ip_len <<= 16; 6445 #endif 6446 } 6447 /* Set UDP length and checksum */ 6448 *((uint32_t *)&udpha->uha_length) = ip_len; 6449 if (DB_CRED(mp) != NULL) 6450 mblk_setcred(mp1, DB_CRED(mp)); 6451 6452 if (DB_TYPE(mp) != M_DATA) { 6453 ASSERT(mp != mp1); 6454 freeb(mp); 6455 } 6456 6457 /* mp has been consumed and we'll return success */ 6458 ASSERT(*error == 0); 6459 mp = NULL; 6460 6461 /* We're done. Pass the packet to ip. */ 6462 BUMP_MIB(&udp_mib, udpOutDatagrams); 6463 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6464 "udp_wput_end: q %p (%S)", q, "end"); 6465 6466 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6467 CONN_OUTBOUND_POLICY_PRESENT(connp) || 6468 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6469 connp->conn_nofailover_ill != NULL || 6470 connp->conn_outgoing_ill != NULL || 6471 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6472 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 6473 UDP_STAT(udp_ip_send); 6474 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 6475 } else { 6476 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6477 } 6478 6479 done: 6480 if (*error != 0) { 6481 ASSERT(mp != NULL); 6482 BUMP_MIB(&udp_mib, udpOutErrors); 6483 } 6484 return (mp); 6485 } 6486 6487 static void 6488 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6489 { 6490 conn_t *connp = udp->udp_connp; 6491 ipaddr_t src, dst; 6492 ill_t *ill; 6493 ire_t *ire; 6494 ipif_t *ipif = NULL; 6495 mblk_t *ire_fp_mp; 6496 uint_t ire_fp_mp_len; 6497 uint16_t *up; 6498 uint32_t cksum, hcksum_txflags; 6499 queue_t *dev_q; 6500 boolean_t retry_caching; 6501 6502 dst = ipha->ipha_dst; 6503 src = ipha->ipha_src; 6504 ASSERT(ipha->ipha_ident == 0); 6505 6506 if (CLASSD(dst)) { 6507 int err; 6508 6509 ipif = conn_get_held_ipif(connp, 6510 &connp->conn_multicast_ipif, &err); 6511 6512 if (ipif == NULL || ipif->ipif_isv6 || 6513 (ipif->ipif_ill->ill_phyint->phyint_flags & 6514 PHYI_LOOPBACK)) { 6515 if (ipif != NULL) 6516 ipif_refrele(ipif); 6517 UDP_STAT(udp_ip_send); 6518 ip_output(connp, mp, q, IP_WPUT); 6519 return; 6520 } 6521 } 6522 6523 retry_caching = B_FALSE; 6524 mutex_enter(&connp->conn_lock); 6525 ire = connp->conn_ire_cache; 6526 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6527 6528 if (ire == NULL || ire->ire_addr != dst || 6529 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6530 retry_caching = B_TRUE; 6531 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6532 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6533 6534 ASSERT(ipif != NULL); 6535 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6536 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6537 retry_caching = B_TRUE; 6538 } 6539 6540 if (!retry_caching) { 6541 ASSERT(ire != NULL); 6542 IRE_REFHOLD(ire); 6543 mutex_exit(&connp->conn_lock); 6544 } else { 6545 boolean_t cached = B_FALSE; 6546 6547 connp->conn_ire_cache = NULL; 6548 mutex_exit(&connp->conn_lock); 6549 6550 /* Release the old ire */ 6551 if (ire != NULL) { 6552 IRE_REFRELE_NOTR(ire); 6553 ire = NULL; 6554 } 6555 6556 if (CLASSD(dst)) { 6557 ASSERT(ipif != NULL); 6558 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6559 connp->conn_zoneid, MBLK_GETLABEL(mp), 6560 MATCH_IRE_ILL_GROUP); 6561 } else { 6562 ASSERT(ipif == NULL); 6563 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6564 MBLK_GETLABEL(mp)); 6565 } 6566 6567 if (ire == NULL) { 6568 if (ipif != NULL) 6569 ipif_refrele(ipif); 6570 UDP_STAT(udp_ire_null); 6571 ip_output(connp, mp, q, IP_WPUT); 6572 return; 6573 } 6574 IRE_REFHOLD_NOTR(ire); 6575 6576 mutex_enter(&connp->conn_lock); 6577 if (!(connp->conn_state_flags & CONN_CLOSING) && 6578 connp->conn_ire_cache == NULL) { 6579 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6580 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6581 connp->conn_ire_cache = ire; 6582 cached = B_TRUE; 6583 } 6584 rw_exit(&ire->ire_bucket->irb_lock); 6585 } 6586 mutex_exit(&connp->conn_lock); 6587 6588 /* 6589 * We can continue to use the ire but since it was not 6590 * cached, we should drop the extra reference. 6591 */ 6592 if (!cached) 6593 IRE_REFRELE_NOTR(ire); 6594 } 6595 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6596 ASSERT(!CLASSD(dst) || ipif != NULL); 6597 6598 /* 6599 * Check if we can take the fast-path. 6600 * Note that "incomplete" ire's (where the link-layer for next hop 6601 * is not resolved, or where the fast-path header in nce_fp_mp is not 6602 * available yet) are sent down the legacy (slow) path 6603 */ 6604 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6605 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6606 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6607 (ire->ire_nce != NULL && 6608 (ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6609 (connp->conn_nexthop_set) || 6610 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6611 if (ipif != NULL) 6612 ipif_refrele(ipif); 6613 UDP_STAT(udp_ip_ire_send); 6614 IRE_REFRELE(ire); 6615 ip_output(connp, mp, q, IP_WPUT); 6616 return; 6617 } 6618 6619 BUMP_MIB(&ip_mib, ipOutRequests); 6620 6621 ill = ire_to_ill(ire); 6622 ASSERT(ill != NULL); 6623 6624 dev_q = ire->ire_stq->q_next; 6625 ASSERT(dev_q != NULL); 6626 /* 6627 * If the service thread is already running, or if the driver 6628 * queue is currently flow-controlled, queue this packet. 6629 */ 6630 if ((q->q_first != NULL || connp->conn_draining) || 6631 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6632 if (ip_output_queue) { 6633 (void) putq(q, mp); 6634 } else { 6635 BUMP_MIB(&ip_mib, ipOutDiscards); 6636 freemsg(mp); 6637 } 6638 if (ipif != NULL) 6639 ipif_refrele(ipif); 6640 IRE_REFRELE(ire); 6641 return; 6642 } 6643 6644 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6645 #ifndef _BIG_ENDIAN 6646 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6647 #endif 6648 6649 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6650 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6651 src = ipha->ipha_src = ipif->ipif_src_addr; 6652 else 6653 src = ipha->ipha_src = ire->ire_src_addr; 6654 } 6655 6656 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6657 ASSERT(ill->ill_hcksum_capab != NULL); 6658 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6659 } else { 6660 hcksum_txflags = 0; 6661 } 6662 6663 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6664 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6665 6666 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6667 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6668 if (*up != 0) { 6669 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6670 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6671 ntohs(ipha->ipha_length), cksum); 6672 6673 /* Software checksum? */ 6674 if (DB_CKSUMFLAGS(mp) == 0) { 6675 UDP_STAT(udp_out_sw_cksum); 6676 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6677 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6678 } 6679 } 6680 6681 ipha->ipha_fragment_offset_and_flags |= 6682 (uint32_t)htons(ire->ire_frag_flag); 6683 6684 /* Calculate IP header checksum if hardware isn't capable */ 6685 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6686 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6687 ((uint16_t *)ipha)[4]); 6688 } 6689 6690 if (CLASSD(dst)) { 6691 ilm_t *ilm; 6692 6693 ILM_WALKER_HOLD(ill); 6694 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6695 ILM_WALKER_RELE(ill); 6696 if (ilm != NULL) { 6697 ip_multicast_loopback(q, ill, mp, 6698 connp->conn_multicast_loop ? 0 : 6699 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6700 } 6701 6702 /* If multicast TTL is 0 then we are done */ 6703 if (ipha->ipha_ttl == 0) { 6704 if (ipif != NULL) 6705 ipif_refrele(ipif); 6706 freemsg(mp); 6707 IRE_REFRELE(ire); 6708 return; 6709 } 6710 } 6711 6712 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6713 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6714 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6715 6716 UPDATE_OB_PKT_COUNT(ire); 6717 ire->ire_last_used_time = lbolt; 6718 6719 if (ILL_DLS_CAPABLE(ill)) { 6720 /* 6721 * Send the packet directly to DLD, where it may be queued 6722 * depending on the availability of transmit resources at 6723 * the media layer. 6724 */ 6725 IP_DLS_ILL_TX(ill, mp); 6726 } else { 6727 putnext(ire->ire_stq, mp); 6728 } 6729 6730 if (ipif != NULL) 6731 ipif_refrele(ipif); 6732 IRE_REFRELE(ire); 6733 } 6734 6735 static boolean_t 6736 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6737 { 6738 udp_t *udp = Q_TO_UDP(wq); 6739 int err; 6740 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6741 6742 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6743 dst, opt_storage, udp->udp_mac_exempt); 6744 if (err == 0) { 6745 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6746 &udp->udp_label_len_v6, opt_storage); 6747 } 6748 if (err != 0) { 6749 DTRACE_PROBE4( 6750 tx__ip__log__drop__updatelabel__udp6, 6751 char *, "queue(1) failed to update options(2) on mp(3)", 6752 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6753 } else { 6754 udp->udp_v6lastdst = *dst; 6755 } 6756 return (err); 6757 } 6758 6759 /* 6760 * This routine handles all messages passed downstream. It either 6761 * consumes the message or passes it downstream; it never queues a 6762 * a message. 6763 */ 6764 static void 6765 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6766 { 6767 sin6_t *sin6; 6768 sin_t *sin; 6769 ipaddr_t v4dst; 6770 uint16_t port; 6771 uint_t srcid; 6772 queue_t *q = connp->conn_wq; 6773 udp_t *udp = connp->conn_udp; 6774 int error = 0; 6775 struct sockaddr_storage ss; 6776 6777 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6778 "udp_wput_start: connp %p mp %p", connp, mp); 6779 6780 /* 6781 * We directly handle several cases here: T_UNITDATA_REQ message 6782 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6783 * connected and non-connected socket. The latter carries the 6784 * address structure along when this routine gets called. 6785 */ 6786 switch (DB_TYPE(mp)) { 6787 case M_DATA: 6788 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6789 if (!udp->udp_direct_sockfs || 6790 addr == NULL || addrlen == 0) { 6791 /* Not connected; address is required */ 6792 BUMP_MIB(&udp_mib, udpOutErrors); 6793 UDP_STAT(udp_out_err_notconn); 6794 freemsg(mp); 6795 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6796 "udp_wput_end: connp %p (%S)", connp, 6797 "not-connected; address required"); 6798 return; 6799 } 6800 ASSERT(udp->udp_issocket); 6801 UDP_DBGSTAT(udp_data_notconn); 6802 /* Not connected; do some more checks below */ 6803 break; 6804 } 6805 /* M_DATA for connected socket */ 6806 UDP_DBGSTAT(udp_data_conn); 6807 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6808 6809 /* Initialize addr and addrlen as if they're passed in */ 6810 if (udp->udp_family == AF_INET) { 6811 sin = (sin_t *)&ss; 6812 sin->sin_family = AF_INET; 6813 sin->sin_port = udp->udp_dstport; 6814 sin->sin_addr.s_addr = v4dst; 6815 addr = (struct sockaddr *)sin; 6816 addrlen = sizeof (*sin); 6817 } else { 6818 sin6 = (sin6_t *)&ss; 6819 sin6->sin6_family = AF_INET6; 6820 sin6->sin6_port = udp->udp_dstport; 6821 sin6->sin6_flowinfo = udp->udp_flowinfo; 6822 sin6->sin6_addr = udp->udp_v6dst; 6823 sin6->sin6_scope_id = 0; 6824 sin6->__sin6_src_id = 0; 6825 addr = (struct sockaddr *)sin6; 6826 addrlen = sizeof (*sin6); 6827 } 6828 6829 if (udp->udp_family == AF_INET || 6830 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6831 /* 6832 * Handle both AF_INET and AF_INET6; the latter 6833 * for IPV4 mapped destination addresses. Note 6834 * here that both addr and addrlen point to the 6835 * corresponding struct depending on the address 6836 * family of the socket. 6837 */ 6838 mp = udp_output_v4(connp, mp, v4dst, 6839 udp->udp_dstport, 0, &error); 6840 } else { 6841 mp = udp_output_v6(connp, mp, sin6, &error); 6842 } 6843 if (error != 0) { 6844 ASSERT(addr != NULL && addrlen != 0); 6845 goto ud_error; 6846 } 6847 return; 6848 case M_PROTO: 6849 case M_PCPROTO: { 6850 struct T_unitdata_req *tudr; 6851 6852 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6853 tudr = (struct T_unitdata_req *)mp->b_rptr; 6854 6855 /* Handle valid T_UNITDATA_REQ here */ 6856 if (MBLKL(mp) >= sizeof (*tudr) && 6857 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6858 if (mp->b_cont == NULL) { 6859 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6860 "udp_wput_end: q %p (%S)", q, "badaddr"); 6861 error = EPROTO; 6862 goto ud_error; 6863 } 6864 6865 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6866 tudr->DEST_length)) { 6867 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6868 "udp_wput_end: q %p (%S)", q, "badaddr"); 6869 error = EADDRNOTAVAIL; 6870 goto ud_error; 6871 } 6872 /* 6873 * If a port has not been bound to the stream, fail. 6874 * This is not a problem when sockfs is directly 6875 * above us, because it will ensure that the socket 6876 * is first bound before allowing data to be sent. 6877 */ 6878 if (udp->udp_state == TS_UNBND) { 6879 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6880 "udp_wput_end: q %p (%S)", q, "outstate"); 6881 error = EPROTO; 6882 goto ud_error; 6883 } 6884 addr = (struct sockaddr *) 6885 &mp->b_rptr[tudr->DEST_offset]; 6886 addrlen = tudr->DEST_length; 6887 if (tudr->OPT_length != 0) 6888 UDP_STAT(udp_out_opt); 6889 break; 6890 } 6891 /* FALLTHRU */ 6892 } 6893 default: 6894 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6895 SQTAG_UDP_OUTPUT); 6896 return; 6897 } 6898 ASSERT(addr != NULL); 6899 6900 switch (udp->udp_family) { 6901 case AF_INET6: 6902 sin6 = (sin6_t *)addr; 6903 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6904 sin6->sin6_family != AF_INET6) { 6905 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6906 "udp_wput_end: q %p (%S)", q, "badaddr"); 6907 error = EADDRNOTAVAIL; 6908 goto ud_error; 6909 } 6910 6911 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6912 /* 6913 * Destination is a non-IPv4-compatible IPv6 address. 6914 * Send out an IPv6 format packet. 6915 */ 6916 mp = udp_output_v6(connp, mp, sin6, &error); 6917 if (error != 0) 6918 goto ud_error; 6919 6920 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6921 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6922 return; 6923 } 6924 /* 6925 * If the local address is not zero or a mapped address 6926 * return an error. It would be possible to send an IPv4 6927 * packet but the response would never make it back to the 6928 * application since it is bound to a non-mapped address. 6929 */ 6930 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6931 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6932 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6933 "udp_wput_end: q %p (%S)", q, "badaddr"); 6934 error = EADDRNOTAVAIL; 6935 goto ud_error; 6936 } 6937 /* Send IPv4 packet without modifying udp_ipversion */ 6938 /* Extract port and ipaddr */ 6939 port = sin6->sin6_port; 6940 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6941 srcid = sin6->__sin6_src_id; 6942 break; 6943 6944 case AF_INET: 6945 sin = (sin_t *)addr; 6946 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6947 sin->sin_family != AF_INET) { 6948 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6949 "udp_wput_end: q %p (%S)", q, "badaddr"); 6950 error = EADDRNOTAVAIL; 6951 goto ud_error; 6952 } 6953 /* Extract port and ipaddr */ 6954 port = sin->sin_port; 6955 v4dst = sin->sin_addr.s_addr; 6956 srcid = 0; 6957 break; 6958 } 6959 6960 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6961 if (error != 0) { 6962 ud_error: 6963 UDP_STAT(udp_out_err_output); 6964 ASSERT(mp != NULL); 6965 /* mp is freed by the following routine */ 6966 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6967 (t_scalar_t)error); 6968 } 6969 } 6970 6971 /* ARGSUSED */ 6972 static void 6973 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6974 { 6975 udp_output((conn_t *)arg, mp, NULL, 0); 6976 _UDP_EXIT((conn_t *)arg); 6977 } 6978 6979 static void 6980 udp_wput(queue_t *q, mblk_t *mp) 6981 { 6982 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6983 SQTAG_UDP_WPUT); 6984 } 6985 6986 /* 6987 * Allocate and prepare a T_UNITDATA_REQ message. 6988 */ 6989 static mblk_t * 6990 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6991 { 6992 struct T_unitdata_req *tudr; 6993 mblk_t *mp; 6994 6995 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6996 if (mp != NULL) { 6997 mp->b_wptr += sizeof (*tudr) + addrlen; 6998 DB_TYPE(mp) = M_PROTO; 6999 7000 tudr = (struct T_unitdata_req *)mp->b_rptr; 7001 tudr->PRIM_type = T_UNITDATA_REQ; 7002 tudr->DEST_length = addrlen; 7003 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7004 tudr->OPT_length = 0; 7005 tudr->OPT_offset = 0; 7006 bcopy(addr, tudr+1, addrlen); 7007 } 7008 return (mp); 7009 } 7010 7011 /* 7012 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7013 * is valid when we are directly beneath the stream head, and thus sockfs 7014 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7015 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7016 * this is done for both connected and non-connected endpoint. 7017 */ 7018 void 7019 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7020 { 7021 conn_t *connp; 7022 udp_t *udp; 7023 7024 q = UDP_WR(q); 7025 connp = Q_TO_CONN(q); 7026 udp = connp->conn_udp; 7027 7028 /* udpsockfs should only send down M_DATA for this entry point */ 7029 ASSERT(DB_TYPE(mp) == M_DATA); 7030 7031 mutex_enter(&connp->conn_lock); 7032 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7033 7034 if (udp->udp_mode != UDP_MT_HOT) { 7035 /* 7036 * We can't enter this conn right away because another 7037 * thread is currently executing as writer; therefore we 7038 * need to deposit the message into the squeue to be 7039 * drained later. If a socket address is present, we 7040 * need to create a T_UNITDATA_REQ message as placeholder. 7041 */ 7042 if (addr != NULL && addrlen != 0) { 7043 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7044 7045 if (tudr_mp == NULL) { 7046 mutex_exit(&connp->conn_lock); 7047 BUMP_MIB(&udp_mib, udpOutErrors); 7048 UDP_STAT(udp_out_err_tudr); 7049 freemsg(mp); 7050 return; 7051 } 7052 /* Tag the packet with T_UNITDATA_REQ */ 7053 tudr_mp->b_cont = mp; 7054 mp = tudr_mp; 7055 } 7056 mutex_exit(&connp->conn_lock); 7057 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7058 return; 7059 } 7060 7061 /* We can execute as reader right away. */ 7062 UDP_READERS_INCREF(udp); 7063 mutex_exit(&connp->conn_lock); 7064 7065 udp_output(connp, mp, addr, addrlen); 7066 7067 udp_exit(connp); 7068 } 7069 7070 /* 7071 * udp_output_v6(): 7072 * Assumes that udp_wput did some sanity checking on the destination 7073 * address. 7074 */ 7075 static mblk_t * 7076 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7077 { 7078 ip6_t *ip6h; 7079 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7080 mblk_t *mp1 = mp; 7081 mblk_t *mp2; 7082 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7083 size_t ip_len; 7084 udpha_t *udph; 7085 udp_t *udp = connp->conn_udp; 7086 queue_t *q = connp->conn_wq; 7087 ip6_pkt_t ipp_s; /* For ancillary data options */ 7088 ip6_pkt_t *ipp = &ipp_s; 7089 ip6_pkt_t *tipp; /* temporary ipp */ 7090 uint32_t csum = 0; 7091 uint_t ignore = 0; 7092 uint_t option_exists = 0, is_sticky = 0; 7093 uint8_t *cp; 7094 uint8_t *nxthdr_ptr; 7095 in6_addr_t ip6_dst; 7096 udpattrs_t attrs; 7097 boolean_t opt_present; 7098 ip6_hbh_t *hopoptsptr = NULL; 7099 uint_t hopoptslen = 0; 7100 boolean_t is_ancillary = B_FALSE; 7101 7102 *error = 0; 7103 7104 /* 7105 * If the local address is a mapped address return 7106 * an error. 7107 * It would be possible to send an IPv6 packet but the 7108 * response would never make it back to the application 7109 * since it is bound to a mapped address. 7110 */ 7111 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7112 *error = EADDRNOTAVAIL; 7113 goto done; 7114 } 7115 7116 ipp->ipp_fields = 0; 7117 ipp->ipp_sticky_ignored = 0; 7118 7119 /* 7120 * If TPI options passed in, feed it for verification and handling 7121 */ 7122 attrs.udpattr_credset = B_FALSE; 7123 opt_present = B_FALSE; 7124 if (DB_TYPE(mp) != M_DATA) { 7125 mp1 = mp->b_cont; 7126 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7127 attrs.udpattr_ipp = ipp; 7128 attrs.udpattr_mb = mp; 7129 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7130 goto done; 7131 ASSERT(*error == 0); 7132 opt_present = B_TRUE; 7133 } 7134 } 7135 ignore = ipp->ipp_sticky_ignored; 7136 7137 /* mp1 points to the M_DATA mblk carrying the packet */ 7138 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7139 7140 if (sin6->sin6_scope_id != 0 && 7141 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7142 /* 7143 * IPPF_SCOPE_ID is special. It's neither a sticky 7144 * option nor ancillary data. It needs to be 7145 * explicitly set in options_exists. 7146 */ 7147 option_exists |= IPPF_SCOPE_ID; 7148 } 7149 7150 /* 7151 * Compute the destination address 7152 */ 7153 ip6_dst = sin6->sin6_addr; 7154 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7155 ip6_dst = ipv6_loopback; 7156 7157 /* 7158 * If we're not going to the same destination as last time, then 7159 * recompute the label required. This is done in a separate routine to 7160 * avoid blowing up our stack here. 7161 * 7162 * TSOL Note: Since we are not in WRITER mode, UDP packets 7163 * to different destination may require different labels. 7164 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7165 * and sticky ipp_hopoptslen are consistent for the current 7166 * destination and are updated atomically. 7167 */ 7168 mutex_enter(&connp->conn_lock); 7169 if (is_system_labeled()) { 7170 /* Using UDP MLP requires SCM_UCRED from user */ 7171 if (connp->conn_mlp_type != mlptSingle && 7172 !attrs.udpattr_credset) { 7173 DTRACE_PROBE4( 7174 tx__ip__log__info__output__udp6, 7175 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7176 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7177 *error = ECONNREFUSED; 7178 mutex_exit(&connp->conn_lock); 7179 goto done; 7180 } 7181 if ((opt_present || 7182 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7183 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7184 mutex_exit(&connp->conn_lock); 7185 goto done; 7186 } 7187 } 7188 7189 /* 7190 * If there's a security label here, then we ignore any options the 7191 * user may try to set. We keep the peer's label as a hidden sticky 7192 * option. We make a private copy of this label before releasing the 7193 * lock so that label is kept consistent with the destination addr. 7194 */ 7195 if (udp->udp_label_len_v6 > 0) { 7196 ignore &= ~IPPF_HOPOPTS; 7197 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7198 } 7199 7200 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7201 /* No sticky options nor ancillary data. */ 7202 mutex_exit(&connp->conn_lock); 7203 goto no_options; 7204 } 7205 7206 /* 7207 * Go through the options figuring out where each is going to 7208 * come from and build two masks. The first mask indicates if 7209 * the option exists at all. The second mask indicates if the 7210 * option is sticky or ancillary. 7211 */ 7212 if (!(ignore & IPPF_HOPOPTS)) { 7213 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7214 option_exists |= IPPF_HOPOPTS; 7215 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7216 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7217 option_exists |= IPPF_HOPOPTS; 7218 is_sticky |= IPPF_HOPOPTS; 7219 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7220 hopoptsptr = kmem_alloc( 7221 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7222 if (hopoptsptr == NULL) { 7223 *error = ENOMEM; 7224 mutex_exit(&connp->conn_lock); 7225 goto done; 7226 } 7227 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7228 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7229 hopoptslen); 7230 udp_ip_hdr_len += hopoptslen; 7231 } 7232 } 7233 mutex_exit(&connp->conn_lock); 7234 7235 if (!(ignore & IPPF_RTHDR)) { 7236 if (ipp->ipp_fields & IPPF_RTHDR) { 7237 option_exists |= IPPF_RTHDR; 7238 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7239 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7240 option_exists |= IPPF_RTHDR; 7241 is_sticky |= IPPF_RTHDR; 7242 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7243 } 7244 } 7245 7246 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7247 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7248 option_exists |= IPPF_RTDSTOPTS; 7249 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7250 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7251 option_exists |= IPPF_RTDSTOPTS; 7252 is_sticky |= IPPF_RTDSTOPTS; 7253 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7254 } 7255 } 7256 7257 if (!(ignore & IPPF_DSTOPTS)) { 7258 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7259 option_exists |= IPPF_DSTOPTS; 7260 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7261 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7262 option_exists |= IPPF_DSTOPTS; 7263 is_sticky |= IPPF_DSTOPTS; 7264 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7265 } 7266 } 7267 7268 if (!(ignore & IPPF_IFINDEX)) { 7269 if (ipp->ipp_fields & IPPF_IFINDEX) { 7270 option_exists |= IPPF_IFINDEX; 7271 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7272 option_exists |= IPPF_IFINDEX; 7273 is_sticky |= IPPF_IFINDEX; 7274 } 7275 } 7276 7277 if (!(ignore & IPPF_ADDR)) { 7278 if (ipp->ipp_fields & IPPF_ADDR) { 7279 option_exists |= IPPF_ADDR; 7280 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7281 option_exists |= IPPF_ADDR; 7282 is_sticky |= IPPF_ADDR; 7283 } 7284 } 7285 7286 if (!(ignore & IPPF_DONTFRAG)) { 7287 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7288 option_exists |= IPPF_DONTFRAG; 7289 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7290 option_exists |= IPPF_DONTFRAG; 7291 is_sticky |= IPPF_DONTFRAG; 7292 } 7293 } 7294 7295 if (!(ignore & IPPF_USE_MIN_MTU)) { 7296 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7297 option_exists |= IPPF_USE_MIN_MTU; 7298 } else if (udp->udp_sticky_ipp.ipp_fields & 7299 IPPF_USE_MIN_MTU) { 7300 option_exists |= IPPF_USE_MIN_MTU; 7301 is_sticky |= IPPF_USE_MIN_MTU; 7302 } 7303 } 7304 7305 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7306 option_exists |= IPPF_HOPLIMIT; 7307 /* IPV6_HOPLIMIT can never be sticky */ 7308 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7309 7310 if (!(ignore & IPPF_UNICAST_HOPS) && 7311 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7312 option_exists |= IPPF_UNICAST_HOPS; 7313 is_sticky |= IPPF_UNICAST_HOPS; 7314 } 7315 7316 if (!(ignore & IPPF_MULTICAST_HOPS) && 7317 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7318 option_exists |= IPPF_MULTICAST_HOPS; 7319 is_sticky |= IPPF_MULTICAST_HOPS; 7320 } 7321 7322 if (!(ignore & IPPF_TCLASS)) { 7323 if (ipp->ipp_fields & IPPF_TCLASS) { 7324 option_exists |= IPPF_TCLASS; 7325 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7326 option_exists |= IPPF_TCLASS; 7327 is_sticky |= IPPF_TCLASS; 7328 } 7329 } 7330 7331 if (!(ignore & IPPF_NEXTHOP) && 7332 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7333 option_exists |= IPPF_NEXTHOP; 7334 is_sticky |= IPPF_NEXTHOP; 7335 } 7336 7337 no_options: 7338 7339 /* 7340 * If any options carried in the ip6i_t were specified, we 7341 * need to account for the ip6i_t in the data we'll be sending 7342 * down. 7343 */ 7344 if (option_exists & IPPF_HAS_IP6I) 7345 udp_ip_hdr_len += sizeof (ip6i_t); 7346 7347 /* check/fix buffer config, setup pointers into it */ 7348 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7349 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7350 !OK_32PTR(ip6h)) { 7351 /* Try to get everything in a single mblk next time */ 7352 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7353 udp->udp_max_hdr_len = udp_ip_hdr_len; 7354 (void) mi_set_sth_wroff(UDP_RD(q), 7355 udp->udp_max_hdr_len + udp_wroff_extra); 7356 } 7357 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 7358 if (mp2 == NULL) { 7359 *error = ENOMEM; 7360 goto done; 7361 } 7362 mp2->b_wptr = DB_LIM(mp2); 7363 mp2->b_cont = mp1; 7364 mp1 = mp2; 7365 if (DB_TYPE(mp) != M_DATA) 7366 mp->b_cont = mp1; 7367 else 7368 mp = mp1; 7369 7370 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7371 } 7372 mp1->b_rptr = (unsigned char *)ip6h; 7373 ip6i = (ip6i_t *)ip6h; 7374 7375 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7376 if (option_exists & IPPF_HAS_IP6I) { 7377 ip6h = (ip6_t *)&ip6i[1]; 7378 ip6i->ip6i_flags = 0; 7379 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7380 7381 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7382 if (option_exists & IPPF_SCOPE_ID) { 7383 ip6i->ip6i_flags |= IP6I_IFINDEX; 7384 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7385 } else if (option_exists & IPPF_IFINDEX) { 7386 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7387 ASSERT(tipp->ipp_ifindex != 0); 7388 ip6i->ip6i_flags |= IP6I_IFINDEX; 7389 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7390 } 7391 7392 if (option_exists & IPPF_ADDR) { 7393 /* 7394 * Enable per-packet source address verification if 7395 * IPV6_PKTINFO specified the source address. 7396 * ip6_src is set in the transport's _wput function. 7397 */ 7398 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7399 } 7400 7401 if (option_exists & IPPF_DONTFRAG) { 7402 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7403 } 7404 7405 if (option_exists & IPPF_USE_MIN_MTU) { 7406 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7407 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7408 } 7409 7410 if (option_exists & IPPF_NEXTHOP) { 7411 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7412 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7413 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7414 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7415 } 7416 7417 /* 7418 * tell IP this is an ip6i_t private header 7419 */ 7420 ip6i->ip6i_nxt = IPPROTO_RAW; 7421 } 7422 7423 /* Initialize IPv6 header */ 7424 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7425 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7426 7427 /* Set the hoplimit of the outgoing packet. */ 7428 if (option_exists & IPPF_HOPLIMIT) { 7429 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7430 ip6h->ip6_hops = ipp->ipp_hoplimit; 7431 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7432 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7433 ip6h->ip6_hops = udp->udp_multicast_ttl; 7434 if (option_exists & IPPF_MULTICAST_HOPS) 7435 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7436 } else { 7437 ip6h->ip6_hops = udp->udp_ttl; 7438 if (option_exists & IPPF_UNICAST_HOPS) 7439 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7440 } 7441 7442 if (option_exists & IPPF_ADDR) { 7443 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7444 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7445 ip6h->ip6_src = tipp->ipp_addr; 7446 } else { 7447 /* 7448 * The source address was not set using IPV6_PKTINFO. 7449 * First look at the bound source. 7450 * If unspecified fallback to __sin6_src_id. 7451 */ 7452 ip6h->ip6_src = udp->udp_v6src; 7453 if (sin6->__sin6_src_id != 0 && 7454 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7455 ip_srcid_find_id(sin6->__sin6_src_id, 7456 &ip6h->ip6_src, connp->conn_zoneid); 7457 } 7458 } 7459 7460 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7461 cp = (uint8_t *)&ip6h[1]; 7462 7463 /* 7464 * Here's where we have to start stringing together 7465 * any extension headers in the right order: 7466 * Hop-by-hop, destination, routing, and final destination opts. 7467 */ 7468 if (option_exists & IPPF_HOPOPTS) { 7469 /* Hop-by-hop options */ 7470 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7471 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7472 if (hopoptslen == 0) { 7473 hopoptsptr = tipp->ipp_hopopts; 7474 hopoptslen = tipp->ipp_hopoptslen; 7475 is_ancillary = B_TRUE; 7476 } 7477 7478 *nxthdr_ptr = IPPROTO_HOPOPTS; 7479 nxthdr_ptr = &hbh->ip6h_nxt; 7480 7481 bcopy(hopoptsptr, cp, hopoptslen); 7482 cp += hopoptslen; 7483 7484 if (hopoptsptr != NULL && !is_ancillary) { 7485 kmem_free(hopoptsptr, hopoptslen); 7486 hopoptsptr = NULL; 7487 hopoptslen = 0; 7488 } 7489 } 7490 /* 7491 * En-route destination options 7492 * Only do them if there's a routing header as well 7493 */ 7494 if (option_exists & IPPF_RTDSTOPTS) { 7495 ip6_dest_t *dst = (ip6_dest_t *)cp; 7496 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7497 7498 *nxthdr_ptr = IPPROTO_DSTOPTS; 7499 nxthdr_ptr = &dst->ip6d_nxt; 7500 7501 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7502 cp += tipp->ipp_rtdstoptslen; 7503 } 7504 /* 7505 * Routing header next 7506 */ 7507 if (option_exists & IPPF_RTHDR) { 7508 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7509 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7510 7511 *nxthdr_ptr = IPPROTO_ROUTING; 7512 nxthdr_ptr = &rt->ip6r_nxt; 7513 7514 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7515 cp += tipp->ipp_rthdrlen; 7516 } 7517 /* 7518 * Do ultimate destination options 7519 */ 7520 if (option_exists & IPPF_DSTOPTS) { 7521 ip6_dest_t *dest = (ip6_dest_t *)cp; 7522 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7523 7524 *nxthdr_ptr = IPPROTO_DSTOPTS; 7525 nxthdr_ptr = &dest->ip6d_nxt; 7526 7527 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7528 cp += tipp->ipp_dstoptslen; 7529 } 7530 /* 7531 * Now set the last header pointer to the proto passed in 7532 */ 7533 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7534 *nxthdr_ptr = IPPROTO_UDP; 7535 7536 /* Update UDP header */ 7537 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7538 udph->uha_dst_port = sin6->sin6_port; 7539 udph->uha_src_port = udp->udp_port; 7540 7541 /* 7542 * Copy in the destination address 7543 */ 7544 ip6h->ip6_dst = ip6_dst; 7545 7546 ip6h->ip6_vcf = 7547 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7548 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7549 7550 if (option_exists & IPPF_TCLASS) { 7551 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7552 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7553 tipp->ipp_tclass); 7554 } 7555 7556 if (option_exists & IPPF_RTHDR) { 7557 ip6_rthdr_t *rth; 7558 7559 /* 7560 * Perform any processing needed for source routing. 7561 * We know that all extension headers will be in the same mblk 7562 * as the IPv6 header. 7563 */ 7564 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7565 if (rth != NULL && rth->ip6r_segleft != 0) { 7566 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7567 /* 7568 * Drop packet - only support Type 0 routing. 7569 * Notify the application as well. 7570 */ 7571 *error = EPROTO; 7572 goto done; 7573 } 7574 7575 /* 7576 * rth->ip6r_len is twice the number of 7577 * addresses in the header. Thus it must be even. 7578 */ 7579 if (rth->ip6r_len & 0x1) { 7580 *error = EPROTO; 7581 goto done; 7582 } 7583 /* 7584 * Shuffle the routing header and ip6_dst 7585 * addresses, and get the checksum difference 7586 * between the first hop (in ip6_dst) and 7587 * the destination (in the last routing hdr entry). 7588 */ 7589 csum = ip_massage_options_v6(ip6h, rth); 7590 /* 7591 * Verify that the first hop isn't a mapped address. 7592 * Routers along the path need to do this verification 7593 * for subsequent hops. 7594 */ 7595 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7596 *error = EADDRNOTAVAIL; 7597 goto done; 7598 } 7599 7600 cp += (rth->ip6r_len + 1)*8; 7601 } 7602 } 7603 7604 /* count up length of UDP packet */ 7605 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7606 if ((mp2 = mp1->b_cont) != NULL) { 7607 do { 7608 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7609 ip_len += (uint32_t)MBLKL(mp2); 7610 } while ((mp2 = mp2->b_cont) != NULL); 7611 } 7612 7613 /* 7614 * If the size of the packet is greater than the maximum allowed by 7615 * ip, return an error. Passing this down could cause panics because 7616 * the size will have wrapped and be inconsistent with the msg size. 7617 */ 7618 if (ip_len > IP_MAXPACKET) { 7619 *error = EMSGSIZE; 7620 goto done; 7621 } 7622 7623 /* Store the UDP length. Subtract length of extension hdrs */ 7624 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7625 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7626 7627 /* 7628 * We make it easy for IP to include our pseudo header 7629 * by putting our length in uh_checksum, modified (if 7630 * we have a routing header) by the checksum difference 7631 * between the ultimate destination and first hop addresses. 7632 * Note: UDP over IPv6 must always checksum the packet. 7633 */ 7634 csum += udph->uha_length; 7635 csum = (csum & 0xFFFF) + (csum >> 16); 7636 udph->uha_checksum = (uint16_t)csum; 7637 7638 #ifdef _LITTLE_ENDIAN 7639 ip_len = htons(ip_len); 7640 #endif 7641 ip6h->ip6_plen = ip_len; 7642 if (DB_CRED(mp) != NULL) 7643 mblk_setcred(mp1, DB_CRED(mp)); 7644 7645 if (DB_TYPE(mp) != M_DATA) { 7646 ASSERT(mp != mp1); 7647 freeb(mp); 7648 } 7649 7650 /* mp has been consumed and we'll return success */ 7651 ASSERT(*error == 0); 7652 mp = NULL; 7653 7654 /* We're done. Pass the packet to IP */ 7655 BUMP_MIB(&udp_mib, udpOutDatagrams); 7656 ip_output_v6(connp, mp1, q, IP_WPUT); 7657 7658 done: 7659 if (hopoptsptr != NULL && !is_ancillary) { 7660 kmem_free(hopoptsptr, hopoptslen); 7661 hopoptsptr = NULL; 7662 } 7663 if (*error != 0) { 7664 ASSERT(mp != NULL); 7665 BUMP_MIB(&udp_mib, udpOutErrors); 7666 } 7667 return (mp); 7668 } 7669 7670 static void 7671 udp_wput_other(queue_t *q, mblk_t *mp) 7672 { 7673 uchar_t *rptr = mp->b_rptr; 7674 struct datab *db; 7675 struct iocblk *iocp; 7676 cred_t *cr; 7677 conn_t *connp = Q_TO_CONN(q); 7678 udp_t *udp = connp->conn_udp; 7679 7680 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7681 "udp_wput_other_start: q %p", q); 7682 7683 db = mp->b_datap; 7684 7685 cr = DB_CREDDEF(mp, connp->conn_cred); 7686 7687 switch (db->db_type) { 7688 case M_PROTO: 7689 case M_PCPROTO: 7690 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7691 freemsg(mp); 7692 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7693 "udp_wput_other_end: q %p (%S)", 7694 q, "protoshort"); 7695 return; 7696 } 7697 switch (((t_primp_t)rptr)->type) { 7698 case T_ADDR_REQ: 7699 udp_addr_req(q, mp); 7700 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7701 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7702 return; 7703 case O_T_BIND_REQ: 7704 case T_BIND_REQ: 7705 udp_bind(q, mp); 7706 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7707 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7708 return; 7709 case T_CONN_REQ: 7710 udp_connect(q, mp); 7711 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7712 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7713 return; 7714 case T_CAPABILITY_REQ: 7715 udp_capability_req(q, mp); 7716 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7717 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7718 return; 7719 case T_INFO_REQ: 7720 udp_info_req(q, mp); 7721 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7722 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7723 return; 7724 case T_UNITDATA_REQ: 7725 /* 7726 * If a T_UNITDATA_REQ gets here, the address must 7727 * be bad. Valid T_UNITDATA_REQs are handled 7728 * in udp_wput. 7729 */ 7730 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7731 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7732 "udp_wput_other_end: q %p (%S)", 7733 q, "unitdatareq"); 7734 return; 7735 case T_UNBIND_REQ: 7736 udp_unbind(q, mp); 7737 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7738 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7739 return; 7740 case T_SVR4_OPTMGMT_REQ: 7741 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7742 /* 7743 * Use upper queue for option processing in 7744 * case the request is not handled at this 7745 * level and needs to be passed down to IP. 7746 */ 7747 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7748 mp, cr, &udp_opt_obj); 7749 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7750 "udp_wput_other_end: q %p (%S)", 7751 q, "optmgmtreq"); 7752 return; 7753 7754 case T_OPTMGMT_REQ: 7755 /* 7756 * Use upper queue for option processing in 7757 * case the request is not handled at this 7758 * level and needs to be passed down to IP. 7759 */ 7760 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7761 mp, cr, &udp_opt_obj); 7762 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7763 "udp_wput_other_end: q %p (%S)", 7764 q, "optmgmtreq"); 7765 return; 7766 7767 case T_DISCON_REQ: 7768 udp_disconnect(q, mp); 7769 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7770 "udp_wput_other_end: q %p (%S)", 7771 q, "disconreq"); 7772 return; 7773 7774 /* The following TPI message is not supported by udp. */ 7775 case O_T_CONN_RES: 7776 case T_CONN_RES: 7777 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7778 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7779 "udp_wput_other_end: q %p (%S)", 7780 q, "connres/disconreq"); 7781 return; 7782 7783 /* The following 3 TPI messages are illegal for udp. */ 7784 case T_DATA_REQ: 7785 case T_EXDATA_REQ: 7786 case T_ORDREL_REQ: 7787 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7788 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7789 "udp_wput_other_end: q %p (%S)", 7790 q, "data/exdata/ordrel"); 7791 return; 7792 default: 7793 break; 7794 } 7795 break; 7796 case M_FLUSH: 7797 if (*rptr & FLUSHW) 7798 flushq(q, FLUSHDATA); 7799 break; 7800 case M_IOCTL: 7801 iocp = (struct iocblk *)mp->b_rptr; 7802 switch (iocp->ioc_cmd) { 7803 case TI_GETPEERNAME: 7804 if (udp->udp_state != TS_DATA_XFER) { 7805 /* 7806 * If a default destination address has not 7807 * been associated with the stream, then we 7808 * don't know the peer's name. 7809 */ 7810 iocp->ioc_error = ENOTCONN; 7811 iocp->ioc_count = 0; 7812 mp->b_datap->db_type = M_IOCACK; 7813 putnext(UDP_RD(q), mp); 7814 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7815 "udp_wput_other_end: q %p (%S)", 7816 q, "getpeername"); 7817 return; 7818 } 7819 /* FALLTHRU */ 7820 case TI_GETMYNAME: { 7821 /* 7822 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7823 * need to copyin the user's strbuf structure. 7824 * Processing will continue in the M_IOCDATA case 7825 * below. 7826 */ 7827 mi_copyin(q, mp, NULL, 7828 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7829 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7830 "udp_wput_other_end: q %p (%S)", 7831 q, "getmyname"); 7832 return; 7833 } 7834 case ND_SET: 7835 /* nd_getset performs the necessary checking */ 7836 case ND_GET: 7837 if (nd_getset(q, udp_g_nd, mp)) { 7838 putnext(UDP_RD(q), mp); 7839 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7840 "udp_wput_other_end: q %p (%S)", 7841 q, "get"); 7842 return; 7843 } 7844 break; 7845 case _SIOCSOCKFALLBACK: 7846 /* 7847 * Either sockmod is about to be popped and the 7848 * socket would now be treated as a plain stream, 7849 * or a module is about to be pushed so we could 7850 * no longer use read-side synchronous stream. 7851 * Drain any queued data and disable direct sockfs 7852 * interface from now on. 7853 */ 7854 if (!udp->udp_issocket) { 7855 DB_TYPE(mp) = M_IOCNAK; 7856 iocp->ioc_error = EINVAL; 7857 } else { 7858 udp->udp_issocket = B_FALSE; 7859 if (udp->udp_direct_sockfs) { 7860 /* 7861 * Disable read-side synchronous 7862 * stream interface and drain any 7863 * queued data. 7864 */ 7865 udp_rcv_drain(UDP_RD(q), udp, 7866 B_FALSE); 7867 ASSERT(!udp->udp_direct_sockfs); 7868 UDP_STAT(udp_sock_fallback); 7869 } 7870 DB_TYPE(mp) = M_IOCACK; 7871 iocp->ioc_error = 0; 7872 } 7873 iocp->ioc_count = 0; 7874 iocp->ioc_rval = 0; 7875 putnext(UDP_RD(q), mp); 7876 return; 7877 default: 7878 break; 7879 } 7880 break; 7881 case M_IOCDATA: 7882 udp_wput_iocdata(q, mp); 7883 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7884 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7885 return; 7886 default: 7887 /* Unrecognized messages are passed through without change. */ 7888 break; 7889 } 7890 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7891 "udp_wput_other_end: q %p (%S)", q, "end"); 7892 ip_output(connp, mp, q, IP_WPUT); 7893 } 7894 7895 /* ARGSUSED */ 7896 static void 7897 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7898 { 7899 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7900 udp_exit((conn_t *)arg); 7901 } 7902 7903 /* 7904 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7905 * messages. 7906 */ 7907 static void 7908 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7909 { 7910 mblk_t *mp1; 7911 STRUCT_HANDLE(strbuf, sb); 7912 uint16_t port; 7913 in6_addr_t v6addr; 7914 ipaddr_t v4addr; 7915 uint32_t flowinfo = 0; 7916 int addrlen; 7917 udp_t *udp = Q_TO_UDP(q); 7918 7919 /* Make sure it is one of ours. */ 7920 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7921 case TI_GETMYNAME: 7922 case TI_GETPEERNAME: 7923 break; 7924 default: 7925 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7926 return; 7927 } 7928 7929 q = WR(UDP_RD(q)); 7930 switch (mi_copy_state(q, mp, &mp1)) { 7931 case -1: 7932 return; 7933 case MI_COPY_CASE(MI_COPY_IN, 1): 7934 break; 7935 case MI_COPY_CASE(MI_COPY_OUT, 1): 7936 /* 7937 * The address has been copied out, so now 7938 * copyout the strbuf. 7939 */ 7940 mi_copyout(q, mp); 7941 return; 7942 case MI_COPY_CASE(MI_COPY_OUT, 2): 7943 /* 7944 * The address and strbuf have been copied out. 7945 * We're done, so just acknowledge the original 7946 * M_IOCTL. 7947 */ 7948 mi_copy_done(q, mp, 0); 7949 return; 7950 default: 7951 /* 7952 * Something strange has happened, so acknowledge 7953 * the original M_IOCTL with an EPROTO error. 7954 */ 7955 mi_copy_done(q, mp, EPROTO); 7956 return; 7957 } 7958 7959 /* 7960 * Now we have the strbuf structure for TI_GETMYNAME 7961 * and TI_GETPEERNAME. Next we copyout the requested 7962 * address and then we'll copyout the strbuf. 7963 */ 7964 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7965 (void *)mp1->b_rptr); 7966 if (udp->udp_family == AF_INET) 7967 addrlen = sizeof (sin_t); 7968 else 7969 addrlen = sizeof (sin6_t); 7970 7971 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7972 mi_copy_done(q, mp, EINVAL); 7973 return; 7974 } 7975 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7976 case TI_GETMYNAME: 7977 if (udp->udp_family == AF_INET) { 7978 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7979 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7980 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7981 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7982 } else { 7983 /* 7984 * INADDR_ANY 7985 * udp_v6src is not set, we might be bound to 7986 * broadcast/multicast. Use udp_bound_v6src as 7987 * local address instead (that could 7988 * also still be INADDR_ANY) 7989 */ 7990 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7991 } 7992 } else { 7993 /* udp->udp_family == AF_INET6 */ 7994 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7995 v6addr = udp->udp_v6src; 7996 } else { 7997 /* 7998 * UNSPECIFIED 7999 * udp_v6src is not set, we might be bound to 8000 * broadcast/multicast. Use udp_bound_v6src as 8001 * local address instead (that could 8002 * also still be UNSPECIFIED) 8003 */ 8004 v6addr = udp->udp_bound_v6src; 8005 } 8006 } 8007 port = udp->udp_port; 8008 break; 8009 case TI_GETPEERNAME: 8010 if (udp->udp_state != TS_DATA_XFER) { 8011 mi_copy_done(q, mp, ENOTCONN); 8012 return; 8013 } 8014 if (udp->udp_family == AF_INET) { 8015 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8016 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8017 } else { 8018 /* udp->udp_family == AF_INET6) */ 8019 v6addr = udp->udp_v6dst; 8020 flowinfo = udp->udp_flowinfo; 8021 } 8022 port = udp->udp_dstport; 8023 break; 8024 default: 8025 mi_copy_done(q, mp, EPROTO); 8026 return; 8027 } 8028 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8029 if (!mp1) 8030 return; 8031 8032 if (udp->udp_family == AF_INET) { 8033 sin_t *sin; 8034 8035 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8036 sin = (sin_t *)mp1->b_rptr; 8037 mp1->b_wptr = (uchar_t *)&sin[1]; 8038 *sin = sin_null; 8039 sin->sin_family = AF_INET; 8040 sin->sin_addr.s_addr = v4addr; 8041 sin->sin_port = port; 8042 } else { 8043 /* udp->udp_family == AF_INET6 */ 8044 sin6_t *sin6; 8045 8046 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8047 sin6 = (sin6_t *)mp1->b_rptr; 8048 mp1->b_wptr = (uchar_t *)&sin6[1]; 8049 *sin6 = sin6_null; 8050 sin6->sin6_family = AF_INET6; 8051 sin6->sin6_flowinfo = flowinfo; 8052 sin6->sin6_addr = v6addr; 8053 sin6->sin6_port = port; 8054 } 8055 /* Copy out the address */ 8056 mi_copyout(q, mp); 8057 } 8058 8059 8060 static int 8061 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8062 udpattrs_t *udpattrs) 8063 { 8064 struct T_unitdata_req *udreqp; 8065 int is_absreq_failure; 8066 cred_t *cr; 8067 conn_t *connp = Q_TO_CONN(q); 8068 8069 ASSERT(((t_primp_t)mp->b_rptr)->type); 8070 8071 cr = DB_CREDDEF(mp, connp->conn_cred); 8072 8073 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8074 8075 /* 8076 * Use upper queue for option processing since the callback 8077 * routines expect to be called in UDP instance instead of IP. 8078 */ 8079 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8080 udreqp->OPT_offset, cr, &udp_opt_obj, 8081 udpattrs, &is_absreq_failure); 8082 8083 if (*errorp != 0) { 8084 /* 8085 * Note: No special action needed in this 8086 * module for "is_absreq_failure" 8087 */ 8088 return (-1); /* failure */ 8089 } 8090 ASSERT(is_absreq_failure == 0); 8091 return (0); /* success */ 8092 } 8093 8094 void 8095 udp_ddi_init(void) 8096 { 8097 int i; 8098 8099 UDP6_MAJ = ddi_name_to_major(UDP6); 8100 8101 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8102 udp_opt_obj.odb_opt_arr_cnt); 8103 8104 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 8105 /* Not a power of two. Round up to nearest power of two */ 8106 for (i = 0; i < 31; i++) { 8107 if (udp_bind_fanout_size < (1 << i)) 8108 break; 8109 } 8110 udp_bind_fanout_size = 1 << i; 8111 } 8112 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 8113 sizeof (udp_fanout_t), KM_SLEEP); 8114 for (i = 0; i < udp_bind_fanout_size; i++) { 8115 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8116 NULL); 8117 } 8118 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 8119 8120 udp_kstat_init(); 8121 8122 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8123 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8124 } 8125 8126 void 8127 udp_ddi_destroy(void) 8128 { 8129 int i; 8130 8131 nd_free(&udp_g_nd); 8132 8133 for (i = 0; i < udp_bind_fanout_size; i++) { 8134 mutex_destroy(&udp_bind_fanout[i].uf_lock); 8135 } 8136 8137 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 8138 sizeof (udp_fanout_t)); 8139 8140 udp_kstat_fini(); 8141 8142 kmem_cache_destroy(udp_cache); 8143 } 8144 8145 static void 8146 udp_kstat_init(void) 8147 { 8148 udp_named_kstat_t template = { 8149 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 8150 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8151 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 8152 { "entrySize", KSTAT_DATA_INT32, 0 }, 8153 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8154 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8155 }; 8156 8157 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 8158 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 8159 8160 if (udp_mibkp == NULL) 8161 return; 8162 8163 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8164 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8165 8166 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 8167 8168 udp_mibkp->ks_update = udp_kstat_update; 8169 8170 kstat_install(udp_mibkp); 8171 8172 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 8173 "net", KSTAT_TYPE_NAMED, 8174 sizeof (udp_statistics) / sizeof (kstat_named_t), 8175 KSTAT_FLAG_VIRTUAL)) != NULL) { 8176 udp_ksp->ks_data = &udp_statistics; 8177 kstat_install(udp_ksp); 8178 } 8179 } 8180 8181 static void 8182 udp_kstat_fini(void) 8183 { 8184 if (udp_ksp != NULL) { 8185 kstat_delete(udp_ksp); 8186 udp_ksp = NULL; 8187 } 8188 if (udp_mibkp != NULL) { 8189 kstat_delete(udp_mibkp); 8190 udp_mibkp = NULL; 8191 } 8192 } 8193 8194 static int 8195 udp_kstat_update(kstat_t *kp, int rw) 8196 { 8197 udp_named_kstat_t *udpkp; 8198 8199 if ((kp == NULL) || (kp->ks_data == NULL)) 8200 return (EIO); 8201 8202 if (rw == KSTAT_WRITE) 8203 return (EACCES); 8204 8205 udpkp = (udp_named_kstat_t *)kp->ks_data; 8206 8207 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 8208 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 8209 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 8210 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 8211 8212 return (0); 8213 } 8214 8215 /* ARGSUSED */ 8216 static void 8217 udp_rput(queue_t *q, mblk_t *mp) 8218 { 8219 /* 8220 * We get here whenever we do qreply() from IP, 8221 * i.e as part of handlings ioctls, etc. 8222 */ 8223 putnext(q, mp); 8224 } 8225 8226 /* 8227 * Read-side synchronous stream info entry point, called as a 8228 * result of handling certain STREAMS ioctl operations. 8229 */ 8230 static int 8231 udp_rinfop(queue_t *q, infod_t *dp) 8232 { 8233 mblk_t *mp; 8234 uint_t cmd = dp->d_cmd; 8235 int res = 0; 8236 int error = 0; 8237 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8238 struct stdata *stp = STREAM(q); 8239 8240 mutex_enter(&udp->udp_drain_lock); 8241 /* If shutdown on read has happened, return nothing */ 8242 mutex_enter(&stp->sd_lock); 8243 if (stp->sd_flag & STREOF) { 8244 mutex_exit(&stp->sd_lock); 8245 goto done; 8246 } 8247 mutex_exit(&stp->sd_lock); 8248 8249 if ((mp = udp->udp_rcv_list_head) == NULL) 8250 goto done; 8251 8252 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8253 8254 if (cmd & INFOD_COUNT) { 8255 /* 8256 * Return the number of messages. 8257 */ 8258 dp->d_count += udp->udp_rcv_msgcnt; 8259 res |= INFOD_COUNT; 8260 } 8261 if (cmd & INFOD_BYTES) { 8262 /* 8263 * Return size of all data messages. 8264 */ 8265 dp->d_bytes += udp->udp_rcv_cnt; 8266 res |= INFOD_BYTES; 8267 } 8268 if (cmd & INFOD_FIRSTBYTES) { 8269 /* 8270 * Return size of first data message. 8271 */ 8272 dp->d_bytes = msgdsize(mp); 8273 res |= INFOD_FIRSTBYTES; 8274 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8275 } 8276 if (cmd & INFOD_COPYOUT) { 8277 mblk_t *mp1 = mp->b_cont; 8278 int n; 8279 /* 8280 * Return data contents of first message. 8281 */ 8282 ASSERT(DB_TYPE(mp1) == M_DATA); 8283 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8284 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8285 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8286 UIO_READ, dp->d_uiop)) != 0) { 8287 goto done; 8288 } 8289 mp1 = mp1->b_cont; 8290 } 8291 res |= INFOD_COPYOUT; 8292 dp->d_cmd &= ~INFOD_COPYOUT; 8293 } 8294 done: 8295 mutex_exit(&udp->udp_drain_lock); 8296 8297 dp->d_res |= res; 8298 8299 return (error); 8300 } 8301 8302 /* 8303 * Read-side synchronous stream entry point. This is called as a result 8304 * of recv/read operation done at sockfs, and is guaranteed to execute 8305 * outside of the interrupt thread context. It returns a single datagram 8306 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8307 */ 8308 static int 8309 udp_rrw(queue_t *q, struiod_t *dp) 8310 { 8311 mblk_t *mp; 8312 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8313 8314 /* We should never get here when we're in SNMP mode */ 8315 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8316 8317 /* 8318 * Dequeue datagram from the head of the list and return 8319 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8320 * set/cleared depending on whether or not there's data 8321 * remaining in the list. 8322 */ 8323 mutex_enter(&udp->udp_drain_lock); 8324 if (!udp->udp_direct_sockfs) { 8325 mutex_exit(&udp->udp_drain_lock); 8326 UDP_STAT(udp_rrw_busy); 8327 return (EBUSY); 8328 } 8329 if ((mp = udp->udp_rcv_list_head) != NULL) { 8330 uint_t size = msgdsize(mp); 8331 8332 /* Last datagram in the list? */ 8333 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8334 udp->udp_rcv_list_tail = NULL; 8335 mp->b_next = NULL; 8336 8337 udp->udp_rcv_cnt -= size; 8338 udp->udp_rcv_msgcnt--; 8339 UDP_STAT(udp_rrw_msgcnt); 8340 8341 /* No longer flow-controlling? */ 8342 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8343 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8344 udp->udp_drain_qfull = B_FALSE; 8345 } 8346 if (udp->udp_rcv_list_head == NULL) { 8347 /* 8348 * Either we just dequeued the last datagram or 8349 * we get here from sockfs and have nothing to 8350 * return; in this case clear RSLEEP. 8351 */ 8352 ASSERT(udp->udp_rcv_cnt == 0); 8353 ASSERT(udp->udp_rcv_msgcnt == 0); 8354 ASSERT(udp->udp_rcv_list_tail == NULL); 8355 STR_WAKEUP_CLEAR(STREAM(q)); 8356 } else { 8357 /* 8358 * More data follows; we need udp_rrw() to be 8359 * called in future to pick up the rest. 8360 */ 8361 STR_WAKEUP_SET(STREAM(q)); 8362 } 8363 mutex_exit(&udp->udp_drain_lock); 8364 dp->d_mp = mp; 8365 return (0); 8366 } 8367 8368 /* 8369 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8370 * list; this is typically executed within the interrupt thread context 8371 * and so we do things as quickly as possible. 8372 */ 8373 static void 8374 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8375 { 8376 ASSERT(q == RD(q)); 8377 ASSERT(pkt_len == msgdsize(mp)); 8378 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8379 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8380 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8381 8382 mutex_enter(&udp->udp_drain_lock); 8383 /* 8384 * Wake up and signal the receiving app; it is okay to do this 8385 * before enqueueing the mp because we are holding the drain lock. 8386 * One of the advantages of synchronous stream is the ability for 8387 * us to find out when the application performs a read on the 8388 * socket by way of udp_rrw() entry point being called. We need 8389 * to generate SIGPOLL/SIGIO for each received data in the case 8390 * of asynchronous socket just as in the strrput() case. However, 8391 * we only wake the application up when necessary, i.e. during the 8392 * first enqueue. When udp_rrw() is called, we send up a single 8393 * datagram upstream and call STR_WAKEUP_SET() again when there 8394 * are still data remaining in our receive queue. 8395 */ 8396 if (udp->udp_rcv_list_head == NULL) { 8397 STR_WAKEUP_SET(STREAM(q)); 8398 udp->udp_rcv_list_head = mp; 8399 } else { 8400 udp->udp_rcv_list_tail->b_next = mp; 8401 } 8402 udp->udp_rcv_list_tail = mp; 8403 udp->udp_rcv_cnt += pkt_len; 8404 udp->udp_rcv_msgcnt++; 8405 8406 /* Need to flow-control? */ 8407 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8408 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8409 udp->udp_drain_qfull = B_TRUE; 8410 8411 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8412 STR_SENDSIG(STREAM(q)); 8413 mutex_exit(&udp->udp_drain_lock); 8414 } 8415 8416 /* 8417 * Drain the contents of receive list to the module upstream; we do 8418 * this during close or when we fallback to the slow mode due to 8419 * sockmod being popped or a module being pushed on top of us. 8420 */ 8421 static void 8422 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8423 { 8424 mblk_t *mp; 8425 8426 ASSERT(q == RD(q)); 8427 8428 mutex_enter(&udp->udp_drain_lock); 8429 /* 8430 * There is no race with a concurrent udp_input() sending 8431 * up packets using putnext() after we have cleared the 8432 * udp_direct_sockfs flag but before we have completed 8433 * sending up the packets in udp_rcv_list, since we are 8434 * either a writer or we have quiesced the conn. 8435 */ 8436 udp->udp_direct_sockfs = B_FALSE; 8437 mutex_exit(&udp->udp_drain_lock); 8438 8439 if (udp->udp_rcv_list_head != NULL) 8440 UDP_STAT(udp_drain); 8441 8442 /* 8443 * Send up everything via putnext(); note here that we 8444 * don't need the udp_drain_lock to protect us since 8445 * nothing can enter udp_rrw() and that we currently 8446 * have exclusive access to this udp. 8447 */ 8448 while ((mp = udp->udp_rcv_list_head) != NULL) { 8449 udp->udp_rcv_list_head = mp->b_next; 8450 mp->b_next = NULL; 8451 udp->udp_rcv_cnt -= msgdsize(mp); 8452 udp->udp_rcv_msgcnt--; 8453 if (closing) { 8454 freemsg(mp); 8455 } else { 8456 putnext(q, mp); 8457 } 8458 } 8459 ASSERT(udp->udp_rcv_cnt == 0); 8460 ASSERT(udp->udp_rcv_msgcnt == 0); 8461 ASSERT(udp->udp_rcv_list_head == NULL); 8462 udp->udp_rcv_list_tail = NULL; 8463 udp->udp_drain_qfull = B_FALSE; 8464 } 8465 8466 static size_t 8467 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8468 { 8469 /* We add a bit of extra buffering */ 8470 size += size >> 1; 8471 if (size > udp_max_buf) 8472 size = udp_max_buf; 8473 8474 udp->udp_rcv_hiwat = size; 8475 return (size); 8476 } 8477 8478 /* 8479 * Little helper for IPsec's NAT-T processing. 8480 */ 8481 boolean_t 8482 udp_compute_checksum(void) 8483 { 8484 return (udp_do_checksum); 8485 } 8486