1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/strsubr.h> 46 #include <sys/suntpi.h> 47 #include <sys/xti_inet.h> 48 #include <sys/cmn_err.h> 49 #include <sys/kmem.h> 50 #include <sys/policy.h> 51 #include <sys/ucred.h> 52 #include <sys/zone.h> 53 54 #include <sys/socket.h> 55 #include <sys/sockio.h> 56 #include <sys/vtrace.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 /* 94 * Synchronization notes: 95 * 96 * UDP uses a combination of its internal perimeter, a global lock and 97 * a set of bind hash locks to protect its data structures. Please see 98 * the note above udp_mode_assertions for details about the internal 99 * perimeter. 100 * 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure. An UDP endpoint is removed from the bind hash list only 108 * when it is being unbound or being closed. The per bucket lock also 109 * protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * 113 * Both udp and ip are merged, but the streams plumbing is kept unchanged 114 * in that udp is always pushed atop /dev/ip. This is done to preserve 115 * backwards compatibility for certain applications which rely on such 116 * plumbing geometry to do things such as issuing I_POP on the stream 117 * in order to obtain direct access to /dev/ip, etc. 118 * 119 * All UDP processings happen in the /dev/ip instance; the udp module 120 * instance does not possess any state about the endpoint, and merely 121 * acts as a dummy module whose presence is to keep the streams plumbing 122 * appearance unchanged. At open time /dev/ip allocates a conn_t that 123 * happens to embed a udp_t. This stays dormant until the time udp is 124 * pushed, which indicates to /dev/ip that it must convert itself from 125 * an IP to a UDP endpoint. 126 * 127 * We only allow for the following plumbing cases: 128 * 129 * Normal: 130 * /dev/ip is first opened and later udp is pushed directly on top. 131 * This is the default action that happens when a udp socket or 132 * /dev/udp is opened. The conn_t created by /dev/ip instance is 133 * now shared and is marked with IPCL_UDP. 134 * 135 * SNMP-only: 136 * udp is pushed on top of a module other than /dev/ip. When this 137 * happens it will support only SNMP semantics. A new conn_t is 138 * allocated and marked with IPCL_UDPMOD. 139 * 140 * The above cases imply that we don't support any intermediate module to 141 * reside in between /dev/ip and udp -- in fact, we never supported such 142 * scenario in the past as the inter-layer communication semantics have 143 * always been private. Also note that the normal case allows for SNMP 144 * requests to be processed in addition to the rest of UDP operations. 145 * 146 * The normal case plumbing is depicted by the following diagram: 147 * 148 * +---------------+---------------+ 149 * | | | udp 150 * | udp_wq | udp_rq | 151 * | | UDP_RD | 152 * | | | 153 * +---------------+---------------+ 154 * | ^ 155 * v | 156 * +---------------+---------------+ 157 * | | | /dev/ip 158 * | ip_wq | ip_rq | conn_t 159 * | UDP_WR | | 160 * | | | 161 * +---------------+---------------+ 162 * 163 * Messages arriving at udp_wq from above will end up in ip_wq before 164 * it gets processed, i.e. udp write entry points will advance udp_wq 165 * and use its q_next value as ip_wq in order to use the conn_t that 166 * is stored in its q_ptr. Likewise, messages generated by ip to the 167 * module above udp will appear as if they are originated from udp_rq, 168 * i.e. putnext() calls to the module above udp is done using the 169 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 170 * nothing more than calling putnext(). 171 * 172 * The above implies the following rule of thumb: 173 * 174 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 175 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 176 * is no direct reference to conn_t from either udp_wq or udp_rq. 177 * 178 * 2. Write-side entry points of udp can obtain the conn_t via the 179 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 180 * 181 * 3. While in /dev/ip context, putnext() to the module above udp can 182 * be done by supplying the queue value obtained from UDP_RD(). 183 * 184 */ 185 186 static queue_t *UDP_WR(queue_t *); 187 static queue_t *UDP_RD(queue_t *); 188 189 udp_stat_t udp_statistics = { 190 { "udp_ip_send", KSTAT_DATA_UINT64 }, 191 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 192 { "udp_ire_null", KSTAT_DATA_UINT64 }, 193 { "udp_drain", KSTAT_DATA_UINT64 }, 194 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 196 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 198 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 199 { "udp_out_opt", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 202 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 203 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 217 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 218 #ifdef DEBUG 219 { "udp_data_conn", KSTAT_DATA_UINT64 }, 220 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 221 #endif 222 }; 223 224 static kstat_t *udp_ksp; 225 struct kmem_cache *udp_cache; 226 227 /* 228 * Bind hash list size and hash function. It has to be a power of 2 for 229 * hashing. 230 */ 231 #define UDP_BIND_FANOUT_SIZE 512 232 #define UDP_BIND_HASH(lport) \ 233 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 234 235 /* UDP bind fanout hash structure. */ 236 typedef struct udp_fanout_s { 237 udp_t *uf_udp; 238 kmutex_t uf_lock; 239 #if defined(_LP64) || defined(_I32LPx) 240 char uf_pad[48]; 241 #else 242 char uf_pad[56]; 243 #endif 244 } udp_fanout_t; 245 246 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 247 /* udp_fanout_t *udp_bind_fanout. */ 248 static udp_fanout_t *udp_bind_fanout; 249 250 /* 251 * This controls the rate some ndd info report functions can be used 252 * by non-priviledged users. It stores the last time such info is 253 * requested. When those report functions are called again, this 254 * is checked with the current time and compare with the ndd param 255 * udp_ndd_get_info_interval. 256 */ 257 static clock_t udp_last_ndd_get_info_time; 258 #define NDD_TOO_QUICK_MSG \ 259 "ndd get info rate too high for non-priviledged users, try again " \ 260 "later.\n" 261 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 262 263 static void udp_addr_req(queue_t *q, mblk_t *mp); 264 static void udp_bind(queue_t *q, mblk_t *mp); 265 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 266 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 267 static int udp_build_hdrs(queue_t *q, udp_t *udp); 268 static void udp_capability_req(queue_t *q, mblk_t *mp); 269 static int udp_close(queue_t *q); 270 static void udp_connect(queue_t *q, mblk_t *mp); 271 static void udp_disconnect(queue_t *q, mblk_t *mp); 272 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 273 int sys_error); 274 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 275 t_scalar_t tlierr, int unixerr); 276 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 277 cred_t *cr); 278 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 279 char *value, caddr_t cp, cred_t *cr); 280 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 281 char *value, caddr_t cp, cred_t *cr); 282 static void udp_icmp_error(queue_t *q, mblk_t *mp); 283 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 284 static void udp_info_req(queue_t *q, mblk_t *mp); 285 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 286 t_scalar_t addr_length); 287 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 288 cred_t *credp); 289 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 290 int *errorp, void *thisdg_attrs); 291 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 292 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 293 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 294 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 295 cred_t *cr); 296 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 297 uchar_t **optbufp, uint_t *optlenp); 298 static void udp_report_item(mblk_t *mp, udp_t *udp); 299 static void udp_rput(queue_t *q, mblk_t *mp); 300 static void udp_rput_other(queue_t *, mblk_t *); 301 static int udp_rinfop(queue_t *q, infod_t *dp); 302 static int udp_rrw(queue_t *q, struiod_t *dp); 303 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 304 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 305 cred_t *cr); 306 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 307 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 308 t_scalar_t destlen, t_scalar_t err); 309 static void udp_unbind(queue_t *q, mblk_t *mp); 310 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 311 static void udp_wput(queue_t *q, mblk_t *mp); 312 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 313 uint16_t port, uint_t srcid, int *error); 314 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 315 t_scalar_t tudr_optlen, int *error); 316 static void udp_wput_other(queue_t *q, mblk_t *mp); 317 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 318 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 319 socklen_t addrlen); 320 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 321 322 static void udp_kstat_init(void); 323 static void udp_kstat_fini(void); 324 static int udp_kstat_update(kstat_t *kp, int rw); 325 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 327 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 328 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 329 330 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 331 uint_t pkt_len); 332 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 333 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 334 static void udp_exit(conn_t *); 335 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 336 #ifdef DEBUG 337 static void udp_mode_assertions(udp_t *, int); 338 #endif /* DEBUG */ 339 340 major_t UDP6_MAJ; 341 #define UDP6 "udp6" 342 343 #define UDP_RECV_HIWATER (56 * 1024) 344 #define UDP_RECV_LOWATER 128 345 #define UDP_XMIT_HIWATER (56 * 1024) 346 #define UDP_XMIT_LOWATER 1024 347 348 static struct module_info udp_info = { 349 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 350 }; 351 352 static struct qinit udp_rinit = { 353 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 354 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 355 }; 356 357 static struct qinit udp_winit = { 358 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 359 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 360 }; 361 362 static struct qinit winit = { 363 (pfi_t)putnext, NULL, NULL, NULL, NULL, 364 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 365 }; 366 367 /* Support for just SNMP if UDP is not pushed directly over device IP */ 368 struct qinit udp_snmp_rinit = { 369 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 370 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 371 }; 372 373 struct qinit udp_snmp_winit = { 374 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 375 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 376 }; 377 378 struct streamtab udpinfo = { 379 &udp_rinit, &winit 380 }; 381 382 static sin_t sin_null; /* Zero address for quick clears */ 383 static sin6_t sin6_null; /* Zero address for quick clears */ 384 385 /* Hint not protected by any lock */ 386 static in_port_t udp_g_next_port_to_try; 387 388 /* 389 * Extra privileged ports. In host byte order. 390 */ 391 #define UDP_NUM_EPRIV_PORTS 64 392 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 393 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 394 395 /* Only modified during _init and _fini thus no locking is needed. */ 396 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 397 398 /* MIB-2 stuff for SNMP */ 399 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 400 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 401 402 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 403 404 /* Default structure copied into T_INFO_ACK messages */ 405 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 406 T_INFO_ACK, 407 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 408 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 409 T_INVALID, /* CDATA_size. udp does not support connect data. */ 410 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 411 sizeof (sin_t), /* ADDR_size. */ 412 0, /* OPT_size - not initialized here */ 413 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 414 T_CLTS, /* SERV_type. udp supports connection-less. */ 415 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 416 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 417 }; 418 419 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 420 421 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 422 T_INFO_ACK, 423 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 424 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 425 T_INVALID, /* CDATA_size. udp does not support connect data. */ 426 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 427 sizeof (sin6_t), /* ADDR_size. */ 428 0, /* OPT_size - not initialized here */ 429 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 430 T_CLTS, /* SERV_type. udp supports connection-less. */ 431 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 432 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 433 }; 434 435 /* largest UDP port number */ 436 #define UDP_MAX_PORT 65535 437 438 /* 439 * Table of ND variables supported by udp. These are loaded into udp_g_nd 440 * in udp_open. 441 * All of these are alterable, within the min/max values given, at run time. 442 */ 443 /* BEGIN CSTYLED */ 444 udpparam_t udp_param_arr[] = { 445 /*min max value name */ 446 { 0L, 256, 32, "udp_wroff_extra" }, 447 { 1L, 255, 255, "udp_ipv4_ttl" }, 448 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 449 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 450 { 0, 1, 1, "udp_do_checksum" }, 451 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 452 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 453 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 454 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 455 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 456 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 457 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 458 }; 459 /* END CSTYLED */ 460 461 /* 462 * The smallest anonymous port in the priviledged port range which UDP 463 * looks for free port. Use in the option UDP_ANONPRIVBIND. 464 */ 465 static in_port_t udp_min_anonpriv_port = 512; 466 467 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 468 uint32_t udp_random_anon_port = 1; 469 470 /* 471 * Hook functions to enable cluster networking. 472 * On non-clustered systems these vectors must always be NULL 473 */ 474 475 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 476 uint8_t *laddrp, in_port_t lport) = NULL; 477 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 478 uint8_t *laddrp, in_port_t lport) = NULL; 479 480 typedef union T_primitives *t_primp_t; 481 482 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 483 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 484 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 485 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 486 (mp)->b_prev = (mblk_t *)proc; \ 487 if ((udp)->udp_mphead == NULL) \ 488 (udp)->udp_mphead = (mp); \ 489 else \ 490 (udp)->udp_mptail->b_next = (mp); \ 491 (udp)->udp_mptail = (mp); \ 492 (udp)->udp_mpcount++; \ 493 } 494 495 #define UDP_READERS_INCREF(udp) { \ 496 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 497 (udp)->udp_reader_count++; \ 498 } 499 500 #define UDP_READERS_DECREF(udp) { \ 501 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 502 (udp)->udp_reader_count--; \ 503 if ((udp)->udp_reader_count == 0) \ 504 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 505 } 506 507 #define UDP_SQUEUE_DECREF(udp) { \ 508 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 509 (udp)->udp_squeue_count--; \ 510 if ((udp)->udp_squeue_count == 0) \ 511 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 512 } 513 514 /* 515 * Notes on UDP endpoint synchronization: 516 * 517 * UDP needs exclusive operation on a per endpoint basis, when executing 518 * functions that modify the endpoint state. udp_rput_other() deals with 519 * packets with IP options, and processing these packets end up having 520 * to update the endpoint's option related state. udp_wput_other() deals 521 * with control operations from the top, e.g. connect() that needs to 522 * update the endpoint state. These could be synchronized using locks, 523 * but the current version uses squeues for this purpose. squeues may 524 * give performance improvement for certain cases such as connected UDP 525 * sockets; thus the framework allows for using squeues. 526 * 527 * The perimeter routines are described as follows: 528 * 529 * udp_enter(): 530 * Enter the UDP endpoint perimeter. 531 * 532 * udp_become_writer(): 533 * Become exclusive on the UDP endpoint. Specifies a function 534 * that will be called exclusively either immediately or later 535 * when the perimeter is available exclusively. 536 * 537 * udp_exit(): 538 * Exit the UDP perimeter. 539 * 540 * Entering UDP from the top or from the bottom must be done using 541 * udp_enter(). No lock must be held while attempting to enter the UDP 542 * perimeter. When finished, udp_exit() must be called to get out of 543 * the perimeter. 544 * 545 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 546 * multiple threads may enter a UDP endpoint concurrently. This is used 547 * for sending and/or receiving normal data. Control operations and other 548 * special cases call udp_become_writer() to become exclusive on a per 549 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 550 * by definition serializes access to the conn_t. When there are no more 551 * pending messages on the squeue for the UDP connection, the endpoint 552 * reverts to MT_HOT mode. During the interregnum when not all MT threads 553 * of an endpoint have finished, messages are queued in the UDP endpoint 554 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 555 * 556 * These modes have the following analogs: 557 * 558 * UDP_MT_HOT/udp_reader_count==0 none 559 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 560 * UDP_MT_QUEUED RW_WRITE_WANTED 561 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 562 * 563 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 564 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 565 * 566 * While in stable modes, UDP keeps track of the number of threads 567 * operating on the endpoint. The udp_reader_count variable represents 568 * the number of threads entering the endpoint as readers while it is 569 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 570 * is only a single reader, i.e. when this counter drops to 1. Likewise, 571 * udp_squeue_count represents the number of threads operating on the 572 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 573 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 574 * when this counter drops to 0. 575 * 576 * The default mode is set to UDP_MT_HOT and UDP alternates between 577 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 578 * 579 * Mode transition: 580 * ---------------------------------------------------------------- 581 * old mode Event New mode 582 * ---------------------------------------------------------------- 583 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 584 * and udp_reader_count == 1 585 * 586 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 587 * and udp_reader_count > 1 588 * 589 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 590 * 591 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 592 * internal UDP queue successfully 593 * moved to squeue AND udp_squeue_count != 0 594 * 595 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 596 * internal UDP queue successfully 597 * moved to squeue AND udp_squeue_count 598 * drops to zero 599 * 600 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 601 * ---------------------------------------------------------------- 602 */ 603 604 static queue_t * 605 UDP_WR(queue_t *q) 606 { 607 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 608 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 609 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 610 611 return (_WR(q)->q_next); 612 } 613 614 static queue_t * 615 UDP_RD(queue_t *q) 616 { 617 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 618 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 619 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 620 621 return (_RD(q)->q_next); 622 } 623 624 #ifdef DEBUG 625 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 626 #else 627 #define UDP_MODE_ASSERTIONS(udp, caller) 628 #endif 629 630 /* Invariants */ 631 #ifdef DEBUG 632 633 uint32_t udp_count[4]; 634 635 /* Context of udp_mode_assertions */ 636 #define UDP_ENTER 1 637 #define UDP_BECOME_WRITER 2 638 #define UDP_EXIT 3 639 640 static void 641 udp_mode_assertions(udp_t *udp, int caller) 642 { 643 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 644 645 switch (udp->udp_mode) { 646 case UDP_MT_HOT: 647 /* 648 * Messages have not yet been enqueued on the internal queue, 649 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 650 * by definition, there can't be any messages enqueued on the 651 * squeue. The UDP could be quiescent, so udp_reader_count 652 * could be zero at entry. 653 */ 654 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 655 udp->udp_squeue_count == 0); 656 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 657 udp_count[0]++; 658 break; 659 660 case UDP_MT_QUEUED: 661 /* 662 * The last MT thread to exit the udp perimeter empties the 663 * internal queue and then switches the UDP to 664 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 665 * mode, it means there must be at least 1 MT thread still in 666 * the perimeter and at least 1 message on the internal queue. 667 */ 668 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 669 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 670 udp_count[1]++; 671 break; 672 673 case UDP_QUEUED_SQUEUE: 674 /* 675 * The switch has happened from MT to SQUEUE. So there can't 676 * any MT threads. Messages could still pile up on the internal 677 * queue until the transition is complete and we move to 678 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 679 * since the squeue could drain any time. 680 */ 681 ASSERT(udp->udp_reader_count == 0); 682 udp_count[2]++; 683 break; 684 685 case UDP_SQUEUE: 686 /* 687 * The transition is complete. Thre can't be any messages on 688 * the internal queue. The udp could be quiescent or the squeue 689 * could drain any time, so we can't assert on nonzero 690 * udp_squeue_count during entry. Nor can we assert that 691 * udp_reader_count is zero, since, a reader thread could have 692 * directly become writer in line by calling udp_become_writer 693 * without going through the queued states. 694 */ 695 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 696 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 697 udp_count[3]++; 698 break; 699 } 700 } 701 #endif 702 703 #define _UDP_ENTER(connp, mp, proc, tag) { \ 704 udp_t *_udp = (connp)->conn_udp; \ 705 \ 706 mutex_enter(&(connp)->conn_lock); \ 707 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 708 mutex_exit(&(connp)->conn_lock); \ 709 freemsg(mp); \ 710 } else { \ 711 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 712 \ 713 switch (_udp->udp_mode) { \ 714 case UDP_MT_HOT: \ 715 /* We can execute as reader right away. */ \ 716 UDP_READERS_INCREF(_udp); \ 717 mutex_exit(&(connp)->conn_lock); \ 718 (*(proc))(connp, mp, (connp)->conn_sqp); \ 719 break; \ 720 \ 721 case UDP_SQUEUE: \ 722 /* \ 723 * We are in squeue mode, send the \ 724 * packet to the squeue \ 725 */ \ 726 _udp->udp_squeue_count++; \ 727 CONN_INC_REF_LOCKED(connp); \ 728 mutex_exit(&(connp)->conn_lock); \ 729 squeue_enter((connp)->conn_sqp, mp, proc, \ 730 connp, tag); \ 731 break; \ 732 \ 733 case UDP_MT_QUEUED: \ 734 case UDP_QUEUED_SQUEUE: \ 735 /* \ 736 * Some messages may have been enqueued \ 737 * ahead of us. Enqueue the new message \ 738 * at the tail of the internal queue to \ 739 * preserve message ordering. \ 740 */ \ 741 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 742 mutex_exit(&(connp)->conn_lock); \ 743 break; \ 744 } \ 745 } \ 746 } 747 748 static void 749 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 750 { 751 _UDP_ENTER(connp, mp, proc, tag); 752 } 753 754 static void 755 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 756 { 757 udp_t *udp; 758 759 udp = connp->conn_udp; 760 761 mutex_enter(&connp->conn_lock); 762 763 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 764 765 switch (udp->udp_mode) { 766 case UDP_MT_HOT: 767 if (udp->udp_reader_count == 1) { 768 /* 769 * We are the only MT thread. Switch to squeue mode 770 * immediately. 771 */ 772 udp->udp_mode = UDP_SQUEUE; 773 udp->udp_squeue_count = 1; 774 CONN_INC_REF_LOCKED(connp); 775 mutex_exit(&connp->conn_lock); 776 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 777 return; 778 } 779 /* FALLTHRU */ 780 781 case UDP_MT_QUEUED: 782 /* Enqueue the packet internally in UDP */ 783 udp->udp_mode = UDP_MT_QUEUED; 784 UDP_ENQUEUE_MP(udp, mp, proc, tag); 785 mutex_exit(&connp->conn_lock); 786 return; 787 788 case UDP_SQUEUE: 789 case UDP_QUEUED_SQUEUE: 790 /* 791 * We are already exclusive. i.e. we are already 792 * writer. Simply call the desired function. 793 */ 794 udp->udp_squeue_count++; 795 mutex_exit(&connp->conn_lock); 796 (*proc)(connp, mp, connp->conn_sqp); 797 return; 798 } 799 } 800 801 /* 802 * Transition from MT mode to SQUEUE mode, when the last MT thread 803 * is exiting the UDP perimeter. Move all messages from the internal 804 * udp queue to the squeue. A better way would be to move all the 805 * messages in one shot, this needs more support from the squeue framework 806 */ 807 static void 808 udp_switch_to_squeue(udp_t *udp) 809 { 810 mblk_t *mp; 811 mblk_t *mp_next; 812 sqproc_t proc; 813 uint8_t tag; 814 conn_t *connp = udp->udp_connp; 815 816 ASSERT(MUTEX_HELD(&connp->conn_lock)); 817 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 818 while (udp->udp_mphead != NULL) { 819 mp = udp->udp_mphead; 820 udp->udp_mphead = NULL; 821 udp->udp_mptail = NULL; 822 udp->udp_mpcount = 0; 823 udp->udp_mode = UDP_QUEUED_SQUEUE; 824 mutex_exit(&connp->conn_lock); 825 /* 826 * It is best not to hold any locks across the calls 827 * to squeue functions. Since we drop the lock we 828 * need to go back and check the udp_mphead once again 829 * after the squeue_fill and hence the while loop at 830 * the top of this function 831 */ 832 for (; mp != NULL; mp = mp_next) { 833 mp_next = mp->b_next; 834 proc = (sqproc_t)mp->b_prev; 835 tag = (uint8_t)((uintptr_t)mp->b_queue); 836 mp->b_next = NULL; 837 mp->b_prev = NULL; 838 mp->b_queue = NULL; 839 CONN_INC_REF(connp); 840 udp->udp_squeue_count++; 841 squeue_fill(connp->conn_sqp, mp, proc, connp, 842 tag); 843 } 844 mutex_enter(&connp->conn_lock); 845 } 846 /* 847 * udp_squeue_count of zero implies that the squeue has drained 848 * even before we arrived here (i.e. after the squeue_fill above) 849 */ 850 udp->udp_mode = (udp->udp_squeue_count != 0) ? 851 UDP_SQUEUE : UDP_MT_HOT; 852 } 853 854 #define _UDP_EXIT(connp) { \ 855 udp_t *_udp = (connp)->conn_udp; \ 856 \ 857 mutex_enter(&(connp)->conn_lock); \ 858 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 859 \ 860 switch (_udp->udp_mode) { \ 861 case UDP_MT_HOT: \ 862 UDP_READERS_DECREF(_udp); \ 863 mutex_exit(&(connp)->conn_lock); \ 864 break; \ 865 \ 866 case UDP_SQUEUE: \ 867 UDP_SQUEUE_DECREF(_udp); \ 868 if (_udp->udp_squeue_count == 0) \ 869 _udp->udp_mode = UDP_MT_HOT; \ 870 mutex_exit(&(connp)->conn_lock); \ 871 break; \ 872 \ 873 case UDP_MT_QUEUED: \ 874 /* \ 875 * If this is the last MT thread, we need to \ 876 * switch to squeue mode \ 877 */ \ 878 UDP_READERS_DECREF(_udp); \ 879 if (_udp->udp_reader_count == 0) \ 880 udp_switch_to_squeue(_udp); \ 881 mutex_exit(&(connp)->conn_lock); \ 882 break; \ 883 \ 884 case UDP_QUEUED_SQUEUE: \ 885 UDP_SQUEUE_DECREF(_udp); \ 886 /* \ 887 * Even if the udp_squeue_count drops to zero, we \ 888 * don't want to change udp_mode to UDP_MT_HOT here. \ 889 * The thread in udp_switch_to_squeue will take care \ 890 * of the transition to UDP_MT_HOT, after emptying \ 891 * any more new messages that have been enqueued in \ 892 * udp_mphead. \ 893 */ \ 894 mutex_exit(&(connp)->conn_lock); \ 895 break; \ 896 } \ 897 } 898 899 static void 900 udp_exit(conn_t *connp) 901 { 902 _UDP_EXIT(connp); 903 } 904 905 /* 906 * Return the next anonymous port in the priviledged port range for 907 * bind checking. 908 */ 909 static in_port_t 910 udp_get_next_priv_port(void) 911 { 912 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 913 914 if (next_priv_port < udp_min_anonpriv_port) { 915 next_priv_port = IPPORT_RESERVED - 1; 916 } 917 return (next_priv_port--); 918 } 919 920 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 921 /* ARGSUSED */ 922 static int 923 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 924 { 925 udp_fanout_t *udpf; 926 int i; 927 zoneid_t zoneid; 928 conn_t *connp; 929 udp_t *udp; 930 931 connp = Q_TO_CONN(q); 932 udp = connp->conn_udp; 933 934 /* Refer to comments in udp_status_report(). */ 935 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 936 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 937 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 938 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 939 return (0); 940 } 941 } 942 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 943 /* The following may work even if we cannot get a large buf. */ 944 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 945 return (0); 946 } 947 948 (void) mi_mpprintf(mp, 949 "UDP " MI_COL_HDRPAD_STR 950 /* 12345678[89ABCDEF] */ 951 " zone lport src addr dest addr port state"); 952 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 953 954 zoneid = connp->conn_zoneid; 955 956 for (i = 0; i < udp_bind_fanout_size; i++) { 957 udpf = &udp_bind_fanout[i]; 958 mutex_enter(&udpf->uf_lock); 959 960 /* Print the hash index. */ 961 udp = udpf->uf_udp; 962 if (zoneid != GLOBAL_ZONEID) { 963 /* skip to first entry in this zone; might be none */ 964 while (udp != NULL && 965 udp->udp_connp->conn_zoneid != zoneid) 966 udp = udp->udp_bind_hash; 967 } 968 if (udp != NULL) { 969 uint_t print_len, buf_len; 970 971 buf_len = mp->b_cont->b_datap->db_lim - 972 mp->b_cont->b_wptr; 973 print_len = snprintf((char *)mp->b_cont->b_wptr, 974 buf_len, "%d\n", i); 975 if (print_len < buf_len) { 976 mp->b_cont->b_wptr += print_len; 977 } else { 978 mp->b_cont->b_wptr += buf_len; 979 } 980 for (; udp != NULL; udp = udp->udp_bind_hash) { 981 if (zoneid == GLOBAL_ZONEID || 982 zoneid == udp->udp_connp->conn_zoneid) 983 udp_report_item(mp->b_cont, udp); 984 } 985 } 986 mutex_exit(&udpf->uf_lock); 987 } 988 udp_last_ndd_get_info_time = ddi_get_lbolt(); 989 return (0); 990 } 991 992 /* 993 * Hash list removal routine for udp_t structures. 994 */ 995 static void 996 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 997 { 998 udp_t *udpnext; 999 kmutex_t *lockp; 1000 1001 if (udp->udp_ptpbhn == NULL) 1002 return; 1003 1004 /* 1005 * Extract the lock pointer in case there are concurrent 1006 * hash_remove's for this instance. 1007 */ 1008 ASSERT(udp->udp_port != 0); 1009 if (!caller_holds_lock) { 1010 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1011 ASSERT(lockp != NULL); 1012 mutex_enter(lockp); 1013 } 1014 if (udp->udp_ptpbhn != NULL) { 1015 udpnext = udp->udp_bind_hash; 1016 if (udpnext != NULL) { 1017 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1018 udp->udp_bind_hash = NULL; 1019 } 1020 *udp->udp_ptpbhn = udpnext; 1021 udp->udp_ptpbhn = NULL; 1022 } 1023 if (!caller_holds_lock) { 1024 mutex_exit(lockp); 1025 } 1026 } 1027 1028 static void 1029 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1030 { 1031 udp_t **udpp; 1032 udp_t *udpnext; 1033 1034 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1035 if (udp->udp_ptpbhn != NULL) { 1036 udp_bind_hash_remove(udp, B_TRUE); 1037 } 1038 udpp = &uf->uf_udp; 1039 udpnext = udpp[0]; 1040 if (udpnext != NULL) { 1041 /* 1042 * If the new udp bound to the INADDR_ANY address 1043 * and the first one in the list is not bound to 1044 * INADDR_ANY we skip all entries until we find the 1045 * first one bound to INADDR_ANY. 1046 * This makes sure that applications binding to a 1047 * specific address get preference over those binding to 1048 * INADDR_ANY. 1049 */ 1050 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1051 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1052 while ((udpnext = udpp[0]) != NULL && 1053 !V6_OR_V4_INADDR_ANY( 1054 udpnext->udp_bound_v6src)) { 1055 udpp = &(udpnext->udp_bind_hash); 1056 } 1057 if (udpnext != NULL) 1058 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1059 } else { 1060 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1061 } 1062 } 1063 udp->udp_bind_hash = udpnext; 1064 udp->udp_ptpbhn = udpp; 1065 udpp[0] = udp; 1066 } 1067 1068 /* 1069 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1070 * passed to udp_wput. 1071 * It associates a port number and local address with the stream. 1072 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1073 * protocol type (IPPROTO_UDP) placed in the message following the address. 1074 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1075 * (Called as writer.) 1076 * 1077 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1078 * without setting SO_REUSEADDR. This is needed so that they 1079 * can be viewed as two independent transport protocols. 1080 * However, anonymouns ports are allocated from the same range to avoid 1081 * duplicating the udp_g_next_port_to_try. 1082 */ 1083 static void 1084 udp_bind(queue_t *q, mblk_t *mp) 1085 { 1086 sin_t *sin; 1087 sin6_t *sin6; 1088 mblk_t *mp1; 1089 in_port_t port; /* Host byte order */ 1090 in_port_t requested_port; /* Host byte order */ 1091 struct T_bind_req *tbr; 1092 int count; 1093 in6_addr_t v6src; 1094 boolean_t bind_to_req_port_only; 1095 int loopmax; 1096 udp_fanout_t *udpf; 1097 in_port_t lport; /* Network byte order */ 1098 zoneid_t zoneid; 1099 conn_t *connp; 1100 udp_t *udp; 1101 1102 connp = Q_TO_CONN(q); 1103 udp = connp->conn_udp; 1104 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1105 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1106 "udp_bind: bad req, len %u", 1107 (uint_t)(mp->b_wptr - mp->b_rptr)); 1108 udp_err_ack(q, mp, TPROTO, 0); 1109 return; 1110 } 1111 1112 if (udp->udp_state != TS_UNBND) { 1113 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1114 "udp_bind: bad state, %u", udp->udp_state); 1115 udp_err_ack(q, mp, TOUTSTATE, 0); 1116 return; 1117 } 1118 /* 1119 * Reallocate the message to make sure we have enough room for an 1120 * address and the protocol type. 1121 */ 1122 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1123 if (!mp1) { 1124 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1125 return; 1126 } 1127 1128 mp = mp1; 1129 tbr = (struct T_bind_req *)mp->b_rptr; 1130 switch (tbr->ADDR_length) { 1131 case 0: /* Request for a generic port */ 1132 tbr->ADDR_offset = sizeof (struct T_bind_req); 1133 if (udp->udp_family == AF_INET) { 1134 tbr->ADDR_length = sizeof (sin_t); 1135 sin = (sin_t *)&tbr[1]; 1136 *sin = sin_null; 1137 sin->sin_family = AF_INET; 1138 mp->b_wptr = (uchar_t *)&sin[1]; 1139 } else { 1140 ASSERT(udp->udp_family == AF_INET6); 1141 tbr->ADDR_length = sizeof (sin6_t); 1142 sin6 = (sin6_t *)&tbr[1]; 1143 *sin6 = sin6_null; 1144 sin6->sin6_family = AF_INET6; 1145 mp->b_wptr = (uchar_t *)&sin6[1]; 1146 } 1147 port = 0; 1148 break; 1149 1150 case sizeof (sin_t): /* Complete IPv4 address */ 1151 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1152 sizeof (sin_t)); 1153 if (sin == NULL || !OK_32PTR((char *)sin)) { 1154 udp_err_ack(q, mp, TSYSERR, EINVAL); 1155 return; 1156 } 1157 if (udp->udp_family != AF_INET || 1158 sin->sin_family != AF_INET) { 1159 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1160 return; 1161 } 1162 port = ntohs(sin->sin_port); 1163 break; 1164 1165 case sizeof (sin6_t): /* complete IPv6 address */ 1166 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1167 sizeof (sin6_t)); 1168 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1169 udp_err_ack(q, mp, TSYSERR, EINVAL); 1170 return; 1171 } 1172 if (udp->udp_family != AF_INET6 || 1173 sin6->sin6_family != AF_INET6) { 1174 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1175 return; 1176 } 1177 port = ntohs(sin6->sin6_port); 1178 break; 1179 1180 default: /* Invalid request */ 1181 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1182 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1183 udp_err_ack(q, mp, TBADADDR, 0); 1184 return; 1185 } 1186 1187 requested_port = port; 1188 1189 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1190 bind_to_req_port_only = B_FALSE; 1191 else /* T_BIND_REQ and requested_port != 0 */ 1192 bind_to_req_port_only = B_TRUE; 1193 1194 if (requested_port == 0) { 1195 /* 1196 * If the application passed in zero for the port number, it 1197 * doesn't care which port number we bind to. Get one in the 1198 * valid range. 1199 */ 1200 if (udp->udp_anon_priv_bind) { 1201 port = udp_get_next_priv_port(); 1202 } else { 1203 port = udp_update_next_port(udp_g_next_port_to_try, 1204 B_TRUE); 1205 } 1206 } else { 1207 /* 1208 * If the port is in the well-known privileged range, 1209 * make sure the caller was privileged. 1210 */ 1211 int i; 1212 boolean_t priv = B_FALSE; 1213 1214 if (port < udp_smallest_nonpriv_port) { 1215 priv = B_TRUE; 1216 } else { 1217 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1218 if (port == udp_g_epriv_ports[i]) { 1219 priv = B_TRUE; 1220 break; 1221 } 1222 } 1223 } 1224 1225 if (priv) { 1226 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1227 1228 if (secpolicy_net_privaddr(cr, port) != 0) { 1229 udp_err_ack(q, mp, TACCES, 0); 1230 return; 1231 } 1232 } 1233 } 1234 1235 /* 1236 * Copy the source address into our udp structure. This address 1237 * may still be zero; if so, IP will fill in the correct address 1238 * each time an outbound packet is passed to it. 1239 */ 1240 if (udp->udp_family == AF_INET) { 1241 ASSERT(sin != NULL); 1242 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1243 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1244 udp->udp_ip_snd_options_len; 1245 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1246 } else { 1247 ASSERT(sin6 != NULL); 1248 v6src = sin6->sin6_addr; 1249 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1250 udp->udp_ipversion = IPV4_VERSION; 1251 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1252 UDPH_SIZE + udp->udp_ip_snd_options_len; 1253 } else { 1254 udp->udp_ipversion = IPV6_VERSION; 1255 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1256 } 1257 } 1258 1259 /* 1260 * If udp_reuseaddr is not set, then we have to make sure that 1261 * the IP address and port number the application requested 1262 * (or we selected for the application) is not being used by 1263 * another stream. If another stream is already using the 1264 * requested IP address and port, the behavior depends on 1265 * "bind_to_req_port_only". If set the bind fails; otherwise we 1266 * search for any an unused port to bind to the the stream. 1267 * 1268 * As per the BSD semantics, as modified by the Deering multicast 1269 * changes, if udp_reuseaddr is set, then we allow multiple binds 1270 * to the same port independent of the local IP address. 1271 * 1272 * This is slightly different than in SunOS 4.X which did not 1273 * support IP multicast. Note that the change implemented by the 1274 * Deering multicast code effects all binds - not only binding 1275 * to IP multicast addresses. 1276 * 1277 * Note that when binding to port zero we ignore SO_REUSEADDR in 1278 * order to guarantee a unique port. 1279 */ 1280 1281 count = 0; 1282 if (udp->udp_anon_priv_bind) { 1283 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1284 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1285 } else { 1286 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1287 } 1288 1289 zoneid = connp->conn_zoneid; 1290 for (;;) { 1291 udp_t *udp1; 1292 boolean_t is_inaddr_any; 1293 boolean_t found_exclbind = B_FALSE; 1294 1295 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1296 /* 1297 * Walk through the list of udp streams bound to 1298 * requested port with the same IP address. 1299 */ 1300 lport = htons(port); 1301 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1302 mutex_enter(&udpf->uf_lock); 1303 for (udp1 = udpf->uf_udp; udp1 != NULL; 1304 udp1 = udp1->udp_bind_hash) { 1305 if (lport != udp1->udp_port || 1306 zoneid != udp1->udp_connp->conn_zoneid) 1307 continue; 1308 1309 /* 1310 * If UDP_EXCLBIND is set for either the bound or 1311 * binding endpoint, the semantics of bind 1312 * is changed according to the following chart. 1313 * 1314 * spec = specified address (v4 or v6) 1315 * unspec = unspecified address (v4 or v6) 1316 * A = specified addresses are different for endpoints 1317 * 1318 * bound bind to allowed? 1319 * ------------------------------------- 1320 * unspec unspec no 1321 * unspec spec no 1322 * spec unspec no 1323 * spec spec yes if A 1324 */ 1325 if (udp1->udp_exclbind || udp->udp_exclbind) { 1326 if (V6_OR_V4_INADDR_ANY( 1327 udp1->udp_bound_v6src) || 1328 is_inaddr_any || 1329 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1330 &v6src)) { 1331 found_exclbind = B_TRUE; 1332 break; 1333 } 1334 continue; 1335 } 1336 1337 /* 1338 * Check ipversion to allow IPv4 and IPv6 sockets to 1339 * have disjoint port number spaces. 1340 */ 1341 if (udp->udp_ipversion != udp1->udp_ipversion) 1342 continue; 1343 1344 /* 1345 * No difference depending on SO_REUSEADDR. 1346 * 1347 * If existing port is bound to a 1348 * non-wildcard IP address and 1349 * the requesting stream is bound to 1350 * a distinct different IP addresses 1351 * (non-wildcard, also), keep going. 1352 */ 1353 if (!is_inaddr_any && 1354 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1355 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1356 &v6src)) { 1357 continue; 1358 } 1359 break; 1360 } 1361 1362 if (!found_exclbind && 1363 (udp->udp_reuseaddr && requested_port != 0)) { 1364 break; 1365 } 1366 1367 if (udp1 == NULL) { 1368 /* 1369 * No other stream has this IP address 1370 * and port number. We can use it. 1371 */ 1372 break; 1373 } 1374 mutex_exit(&udpf->uf_lock); 1375 if (bind_to_req_port_only) { 1376 /* 1377 * We get here only when requested port 1378 * is bound (and only first of the for() 1379 * loop iteration). 1380 * 1381 * The semantics of this bind request 1382 * require it to fail so we return from 1383 * the routine (and exit the loop). 1384 * 1385 */ 1386 udp_err_ack(q, mp, TADDRBUSY, 0); 1387 return; 1388 } 1389 1390 if (udp->udp_anon_priv_bind) { 1391 port = udp_get_next_priv_port(); 1392 } else { 1393 if ((count == 0) && (requested_port != 0)) { 1394 /* 1395 * If the application wants us to find 1396 * a port, get one to start with. Set 1397 * requested_port to 0, so that we will 1398 * update udp_g_next_port_to_try below. 1399 */ 1400 port = udp_update_next_port( 1401 udp_g_next_port_to_try, B_TRUE); 1402 requested_port = 0; 1403 } else { 1404 port = udp_update_next_port(port + 1, B_FALSE); 1405 } 1406 } 1407 1408 if (++count >= loopmax) { 1409 /* 1410 * We've tried every possible port number and 1411 * there are none available, so send an error 1412 * to the user. 1413 */ 1414 udp_err_ack(q, mp, TNOADDR, 0); 1415 return; 1416 } 1417 } 1418 1419 /* 1420 * Copy the source address into our udp structure. This address 1421 * may still be zero; if so, ip will fill in the correct address 1422 * each time an outbound packet is passed to it. 1423 * If we are binding to a broadcast or multicast address udp_rput 1424 * will clear the source address when it receives the T_BIND_ACK. 1425 */ 1426 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1427 udp->udp_port = lport; 1428 /* 1429 * Now reset the the next anonymous port if the application requested 1430 * an anonymous port, or we handed out the next anonymous port. 1431 */ 1432 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1433 udp_g_next_port_to_try = port + 1; 1434 } 1435 1436 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1437 if (udp->udp_family == AF_INET) { 1438 sin->sin_port = udp->udp_port; 1439 } else { 1440 int error; 1441 1442 sin6->sin6_port = udp->udp_port; 1443 /* Rebuild the header template */ 1444 error = udp_build_hdrs(q, udp); 1445 if (error != 0) { 1446 mutex_exit(&udpf->uf_lock); 1447 udp_err_ack(q, mp, TSYSERR, error); 1448 return; 1449 } 1450 } 1451 udp->udp_state = TS_IDLE; 1452 udp_bind_hash_insert(udpf, udp); 1453 mutex_exit(&udpf->uf_lock); 1454 1455 if (cl_inet_bind) { 1456 /* 1457 * Running in cluster mode - register bind information 1458 */ 1459 if (udp->udp_ipversion == IPV4_VERSION) { 1460 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1461 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1462 (in_port_t)udp->udp_port); 1463 } else { 1464 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1465 (uint8_t *)&(udp->udp_v6src), 1466 (in_port_t)udp->udp_port); 1467 } 1468 1469 } 1470 1471 /* Pass the protocol number in the message following the address. */ 1472 *mp->b_wptr++ = IPPROTO_UDP; 1473 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1474 /* 1475 * Append a request for an IRE if udp_v6src not 1476 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1477 */ 1478 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1479 if (!mp->b_cont) { 1480 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1481 return; 1482 } 1483 mp->b_cont->b_wptr += sizeof (ire_t); 1484 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1485 } 1486 if (udp->udp_family == AF_INET6) 1487 mp = ip_bind_v6(q, mp, connp, NULL); 1488 else 1489 mp = ip_bind_v4(q, mp, connp); 1490 1491 if (mp != NULL) 1492 udp_rput_other(_RD(q), mp); 1493 else 1494 CONN_INC_REF(connp); 1495 } 1496 1497 1498 void 1499 udp_resume_bind(conn_t *connp, mblk_t *mp) 1500 { 1501 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1502 } 1503 1504 /* 1505 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1506 */ 1507 /* ARGSUSED */ 1508 static void 1509 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1510 { 1511 conn_t *connp = arg; 1512 1513 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1514 1515 udp_rput_other(connp->conn_rq, mp); 1516 1517 CONN_OPER_PENDING_DONE(connp); 1518 udp_exit(connp); 1519 } 1520 1521 /* 1522 * This routine handles each T_CONN_REQ message passed to udp. It 1523 * associates a default destination address with the stream. 1524 * 1525 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1526 * T_BIND_REQ - specifying local and remote address/port 1527 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1528 * T_OK_ACK - for the T_CONN_REQ 1529 * T_CONN_CON - to keep the TPI user happy 1530 * 1531 * The connect completes in udp_rput. 1532 * When a T_BIND_ACK is received information is extracted from the IRE 1533 * and the two appended messages are sent to the TPI user. 1534 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1535 * it to an error ack for the appropriate primitive. 1536 */ 1537 static void 1538 udp_connect(queue_t *q, mblk_t *mp) 1539 { 1540 sin6_t *sin6; 1541 sin_t *sin; 1542 struct T_conn_req *tcr; 1543 in6_addr_t v6dst; 1544 ipaddr_t v4dst; 1545 uint16_t dstport; 1546 uint32_t flowinfo; 1547 mblk_t *mp1, *mp2; 1548 udp_fanout_t *udpf; 1549 udp_t *udp, *udp1; 1550 1551 udp = Q_TO_UDP(q); 1552 1553 tcr = (struct T_conn_req *)mp->b_rptr; 1554 1555 /* A bit of sanity checking */ 1556 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1557 udp_err_ack(q, mp, TPROTO, 0); 1558 return; 1559 } 1560 /* 1561 * This UDP must have bound to a port already before doing 1562 * a connect. 1563 */ 1564 if (udp->udp_state == TS_UNBND) { 1565 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1566 "udp_connect: bad state, %u", udp->udp_state); 1567 udp_err_ack(q, mp, TOUTSTATE, 0); 1568 return; 1569 } 1570 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1571 1572 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1573 1574 if (udp->udp_state == TS_DATA_XFER) { 1575 /* Already connected - clear out state */ 1576 mutex_enter(&udpf->uf_lock); 1577 udp->udp_v6src = udp->udp_bound_v6src; 1578 udp->udp_state = TS_IDLE; 1579 mutex_exit(&udpf->uf_lock); 1580 } 1581 1582 if (tcr->OPT_length != 0) { 1583 udp_err_ack(q, mp, TBADOPT, 0); 1584 return; 1585 } 1586 1587 /* 1588 * Determine packet type based on type of address passed in 1589 * the request should contain an IPv4 or IPv6 address. 1590 * Make sure that address family matches the type of 1591 * family of the the address passed down 1592 */ 1593 switch (tcr->DEST_length) { 1594 default: 1595 udp_err_ack(q, mp, TBADADDR, 0); 1596 return; 1597 1598 case sizeof (sin_t): 1599 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1600 sizeof (sin_t)); 1601 if (sin == NULL || !OK_32PTR((char *)sin)) { 1602 udp_err_ack(q, mp, TSYSERR, EINVAL); 1603 return; 1604 } 1605 if (udp->udp_family != AF_INET || 1606 sin->sin_family != AF_INET) { 1607 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1608 return; 1609 } 1610 v4dst = sin->sin_addr.s_addr; 1611 dstport = sin->sin_port; 1612 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1613 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1614 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1615 udp->udp_ip_snd_options_len; 1616 break; 1617 1618 case sizeof (sin6_t): 1619 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1620 sizeof (sin6_t)); 1621 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1622 udp_err_ack(q, mp, TSYSERR, EINVAL); 1623 return; 1624 } 1625 if (udp->udp_family != AF_INET6 || 1626 sin6->sin6_family != AF_INET6) { 1627 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1628 return; 1629 } 1630 v6dst = sin6->sin6_addr; 1631 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1632 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1633 udp->udp_ipversion = IPV4_VERSION; 1634 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1635 UDPH_SIZE + udp->udp_ip_snd_options_len; 1636 flowinfo = 0; 1637 } else { 1638 udp->udp_ipversion = IPV6_VERSION; 1639 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1640 flowinfo = sin6->sin6_flowinfo; 1641 } 1642 dstport = sin6->sin6_port; 1643 break; 1644 } 1645 if (dstport == 0) { 1646 udp_err_ack(q, mp, TBADADDR, 0); 1647 return; 1648 } 1649 1650 /* 1651 * Create a default IP header with no IP options. 1652 */ 1653 udp->udp_dstport = dstport; 1654 if (udp->udp_ipversion == IPV4_VERSION) { 1655 /* 1656 * Interpret a zero destination to mean loopback. 1657 * Update the T_CONN_REQ (sin/sin6) since it is used to 1658 * generate the T_CONN_CON. 1659 */ 1660 if (v4dst == INADDR_ANY) { 1661 v4dst = htonl(INADDR_LOOPBACK); 1662 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1663 if (udp->udp_family == AF_INET) { 1664 sin->sin_addr.s_addr = v4dst; 1665 } else { 1666 sin6->sin6_addr = v6dst; 1667 } 1668 } 1669 udp->udp_v6dst = v6dst; 1670 udp->udp_flowinfo = 0; 1671 1672 /* 1673 * If the destination address is multicast and 1674 * an outgoing multicast interface has been set, 1675 * use the address of that interface as our 1676 * source address if no source address has been set. 1677 */ 1678 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1679 CLASSD(v4dst) && 1680 udp->udp_multicast_if_addr != INADDR_ANY) { 1681 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1682 &udp->udp_v6src); 1683 } 1684 } else { 1685 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1686 /* 1687 * Interpret a zero destination to mean loopback. 1688 * Update the T_CONN_REQ (sin/sin6) since it is used to 1689 * generate the T_CONN_CON. 1690 */ 1691 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1692 v6dst = ipv6_loopback; 1693 sin6->sin6_addr = v6dst; 1694 } 1695 udp->udp_v6dst = v6dst; 1696 udp->udp_flowinfo = flowinfo; 1697 /* 1698 * If the destination address is multicast and 1699 * an outgoing multicast interface has been set, 1700 * then the ip bind logic will pick the correct source 1701 * address (i.e. matching the outgoing multicast interface). 1702 */ 1703 } 1704 1705 /* 1706 * Verify that the src/port/dst/port is unique for all 1707 * connections in TS_DATA_XFER 1708 */ 1709 mutex_enter(&udpf->uf_lock); 1710 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1711 if (udp1->udp_state != TS_DATA_XFER) 1712 continue; 1713 if (udp->udp_port != udp1->udp_port || 1714 udp->udp_ipversion != udp1->udp_ipversion || 1715 dstport != udp1->udp_dstport || 1716 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1717 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1718 continue; 1719 mutex_exit(&udpf->uf_lock); 1720 udp_err_ack(q, mp, TBADADDR, 0); 1721 return; 1722 } 1723 udp->udp_state = TS_DATA_XFER; 1724 mutex_exit(&udpf->uf_lock); 1725 1726 /* 1727 * Send down bind to IP to verify that there is a route 1728 * and to determine the source address. 1729 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1730 */ 1731 if (udp->udp_family == AF_INET) 1732 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1733 else 1734 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1735 if (mp1 == NULL) { 1736 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1737 bind_failed: 1738 mutex_enter(&udpf->uf_lock); 1739 udp->udp_state = TS_IDLE; 1740 mutex_exit(&udpf->uf_lock); 1741 return; 1742 } 1743 1744 /* 1745 * We also have to send a connection confirmation to 1746 * keep TLI happy. Prepare it for udp_rput. 1747 */ 1748 if (udp->udp_family == AF_INET) 1749 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1750 sizeof (*sin), NULL, 0); 1751 else 1752 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1753 sizeof (*sin6), NULL, 0); 1754 if (mp2 == NULL) { 1755 freemsg(mp1); 1756 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1757 goto bind_failed; 1758 } 1759 1760 mp = mi_tpi_ok_ack_alloc(mp); 1761 if (mp == NULL) { 1762 /* Unable to reuse the T_CONN_REQ for the ack. */ 1763 freemsg(mp2); 1764 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1765 goto bind_failed; 1766 } 1767 1768 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1769 linkb(mp1, mp); 1770 linkb(mp1, mp2); 1771 1772 if (udp->udp_family == AF_INET) 1773 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1774 else 1775 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1776 1777 if (mp1 != NULL) 1778 udp_rput_other(_RD(q), mp1); 1779 else 1780 CONN_INC_REF(udp->udp_connp); 1781 } 1782 1783 static int 1784 udp_close(queue_t *q) 1785 { 1786 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1787 udp_t *udp; 1788 queue_t *ip_rq = RD(UDP_WR(q)); 1789 1790 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1791 udp = connp->conn_udp; 1792 1793 ip_quiesce_conn(connp); 1794 /* 1795 * Disable read-side synchronous stream 1796 * interface and drain any queued data. 1797 */ 1798 udp_rcv_drain(q, udp, B_TRUE); 1799 ASSERT(!udp->udp_direct_sockfs); 1800 1801 qprocsoff(q); 1802 1803 /* restore IP module's high and low water marks to default values */ 1804 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1805 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1806 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1807 1808 ASSERT(udp->udp_rcv_cnt == 0); 1809 ASSERT(udp->udp_rcv_msgcnt == 0); 1810 ASSERT(udp->udp_rcv_list_head == NULL); 1811 ASSERT(udp->udp_rcv_list_tail == NULL); 1812 1813 /* connp is now single threaded. */ 1814 udp_close_free(connp); 1815 /* 1816 * Restore connp as an IP endpoint. We don't need 1817 * any locks since we are now single threaded 1818 */ 1819 connp->conn_flags &= ~IPCL_UDP; 1820 connp->conn_state_flags &= 1821 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1822 return (0); 1823 } 1824 1825 /* 1826 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1827 */ 1828 void 1829 udp_quiesce_conn(conn_t *connp) 1830 { 1831 udp_t *udp = connp->conn_udp; 1832 1833 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1834 /* 1835 * Running in cluster mode - register unbind information 1836 */ 1837 if (udp->udp_ipversion == IPV4_VERSION) { 1838 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1839 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1840 (in_port_t)udp->udp_port); 1841 } else { 1842 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1843 (uint8_t *)(&(udp->udp_v6src)), 1844 (in_port_t)udp->udp_port); 1845 } 1846 } 1847 1848 udp_bind_hash_remove(udp, B_FALSE); 1849 1850 mutex_enter(&connp->conn_lock); 1851 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1852 udp->udp_mode != UDP_MT_HOT) { 1853 cv_wait(&connp->conn_cv, &connp->conn_lock); 1854 } 1855 mutex_exit(&connp->conn_lock); 1856 } 1857 1858 void 1859 udp_close_free(conn_t *connp) 1860 { 1861 udp_t *udp = connp->conn_udp; 1862 1863 /* If there are any options associated with the stream, free them. */ 1864 if (udp->udp_ip_snd_options) { 1865 mi_free((char *)udp->udp_ip_snd_options); 1866 udp->udp_ip_snd_options = NULL; 1867 } 1868 1869 if (udp->udp_ip_rcv_options) { 1870 mi_free((char *)udp->udp_ip_rcv_options); 1871 udp->udp_ip_rcv_options = NULL; 1872 } 1873 1874 /* Free memory associated with sticky options */ 1875 if (udp->udp_sticky_hdrs_len != 0) { 1876 kmem_free(udp->udp_sticky_hdrs, 1877 udp->udp_sticky_hdrs_len); 1878 udp->udp_sticky_hdrs = NULL; 1879 udp->udp_sticky_hdrs_len = 0; 1880 } 1881 1882 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1883 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1884 udp->udp_sticky_ipp.ipp_hopoptslen); 1885 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1886 } 1887 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1888 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1889 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1890 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1891 } 1892 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1893 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1894 udp->udp_sticky_ipp.ipp_rthdrlen); 1895 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1896 } 1897 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1898 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1899 udp->udp_sticky_ipp.ipp_dstoptslen); 1900 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1901 } 1902 udp->udp_sticky_ipp.ipp_fields &= 1903 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1904 1905 udp->udp_connp = NULL; 1906 connp->conn_udp = NULL; 1907 kmem_cache_free(udp_cache, udp); 1908 } 1909 1910 /* 1911 * This routine handles each T_DISCON_REQ message passed to udp 1912 * as an indicating that UDP is no longer connected. This results 1913 * in sending a T_BIND_REQ to IP to restore the binding to just 1914 * the local address/port. 1915 * 1916 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1917 * T_BIND_REQ - specifying just the local address/port 1918 * T_OK_ACK - for the T_DISCON_REQ 1919 * 1920 * The disconnect completes in udp_rput. 1921 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1922 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1923 * it to an error ack for the appropriate primitive. 1924 */ 1925 static void 1926 udp_disconnect(queue_t *q, mblk_t *mp) 1927 { 1928 udp_t *udp = Q_TO_UDP(q); 1929 mblk_t *mp1; 1930 udp_fanout_t *udpf; 1931 1932 if (udp->udp_state != TS_DATA_XFER) { 1933 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1934 "udp_disconnect: bad state, %u", udp->udp_state); 1935 udp_err_ack(q, mp, TOUTSTATE, 0); 1936 return; 1937 } 1938 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1939 mutex_enter(&udpf->uf_lock); 1940 udp->udp_v6src = udp->udp_bound_v6src; 1941 udp->udp_state = TS_IDLE; 1942 mutex_exit(&udpf->uf_lock); 1943 1944 /* 1945 * Send down bind to IP to remove the full binding and revert 1946 * to the local address binding. 1947 */ 1948 if (udp->udp_family == AF_INET) 1949 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1950 else 1951 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1952 if (mp1 == NULL) { 1953 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1954 return; 1955 } 1956 mp = mi_tpi_ok_ack_alloc(mp); 1957 if (mp == NULL) { 1958 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1959 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1960 return; 1961 } 1962 1963 if (udp->udp_family == AF_INET6) { 1964 int error; 1965 1966 /* Rebuild the header template */ 1967 error = udp_build_hdrs(q, udp); 1968 if (error != 0) { 1969 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1970 freemsg(mp1); 1971 return; 1972 } 1973 } 1974 mutex_enter(&udpf->uf_lock); 1975 udp->udp_discon_pending = 1; 1976 mutex_exit(&udpf->uf_lock); 1977 1978 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1979 linkb(mp1, mp); 1980 1981 if (udp->udp_family == AF_INET6) 1982 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1983 else 1984 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1985 1986 if (mp1 != NULL) 1987 udp_rput_other(_RD(q), mp1); 1988 else 1989 CONN_INC_REF(udp->udp_connp); 1990 } 1991 1992 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1993 static void 1994 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1995 { 1996 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1997 putnext(UDP_RD(q), mp); 1998 } 1999 2000 /* Shorthand to generate and send TPI error acks to our client */ 2001 static void 2002 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2003 int sys_error) 2004 { 2005 struct T_error_ack *teackp; 2006 2007 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2008 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2009 teackp = (struct T_error_ack *)mp->b_rptr; 2010 teackp->ERROR_prim = primitive; 2011 teackp->TLI_error = t_error; 2012 teackp->UNIX_error = sys_error; 2013 putnext(UDP_RD(q), mp); 2014 } 2015 } 2016 2017 /*ARGSUSED*/ 2018 static int 2019 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2020 { 2021 int i; 2022 2023 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2024 if (udp_g_epriv_ports[i] != 0) 2025 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2026 } 2027 return (0); 2028 } 2029 2030 /* ARGSUSED */ 2031 static int 2032 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2033 cred_t *cr) 2034 { 2035 long new_value; 2036 int i; 2037 2038 /* 2039 * Fail the request if the new value does not lie within the 2040 * port number limits. 2041 */ 2042 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2043 new_value <= 0 || new_value >= 65536) { 2044 return (EINVAL); 2045 } 2046 2047 /* Check if the value is already in the list */ 2048 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2049 if (new_value == udp_g_epriv_ports[i]) { 2050 return (EEXIST); 2051 } 2052 } 2053 /* Find an empty slot */ 2054 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2055 if (udp_g_epriv_ports[i] == 0) 2056 break; 2057 } 2058 if (i == udp_g_num_epriv_ports) { 2059 return (EOVERFLOW); 2060 } 2061 2062 /* Set the new value */ 2063 udp_g_epriv_ports[i] = (in_port_t)new_value; 2064 return (0); 2065 } 2066 2067 /* ARGSUSED */ 2068 static int 2069 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2070 cred_t *cr) 2071 { 2072 long new_value; 2073 int i; 2074 2075 /* 2076 * Fail the request if the new value does not lie within the 2077 * port number limits. 2078 */ 2079 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2080 new_value <= 0 || new_value >= 65536) { 2081 return (EINVAL); 2082 } 2083 2084 /* Check that the value is already in the list */ 2085 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2086 if (udp_g_epriv_ports[i] == new_value) 2087 break; 2088 } 2089 if (i == udp_g_num_epriv_ports) { 2090 return (ESRCH); 2091 } 2092 2093 /* Clear the value */ 2094 udp_g_epriv_ports[i] = 0; 2095 return (0); 2096 } 2097 2098 /* At minimum we need 4 bytes of UDP header */ 2099 #define ICMP_MIN_UDP_HDR 4 2100 2101 /* 2102 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2103 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2104 * Assumes that IP has pulled up everything up to and including the ICMP header. 2105 * An M_CTL could potentially come here from some other module (i.e. if UDP 2106 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2107 * does not have enough ICMP information , following STREAMS conventions, 2108 * we send it upstream assuming it is an M_CTL we don't understand. 2109 */ 2110 static void 2111 udp_icmp_error(queue_t *q, mblk_t *mp) 2112 { 2113 icmph_t *icmph; 2114 ipha_t *ipha; 2115 int iph_hdr_length; 2116 udpha_t *udpha; 2117 sin_t sin; 2118 sin6_t sin6; 2119 mblk_t *mp1; 2120 int error = 0; 2121 size_t mp_size = MBLKL(mp); 2122 udp_t *udp = Q_TO_UDP(q); 2123 2124 /* 2125 * Assume IP provides aligned packets - otherwise toss 2126 */ 2127 if (!OK_32PTR(mp->b_rptr)) { 2128 freemsg(mp); 2129 return; 2130 } 2131 2132 /* 2133 * Verify that we have a complete IP header and the application has 2134 * asked for errors. If not, send it upstream. 2135 */ 2136 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2137 noticmpv4: 2138 putnext(UDP_RD(q), mp); 2139 return; 2140 } 2141 2142 ipha = (ipha_t *)mp->b_rptr; 2143 /* 2144 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2145 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2146 */ 2147 switch (IPH_HDR_VERSION(ipha)) { 2148 case IPV6_VERSION: 2149 udp_icmp_error_ipv6(q, mp); 2150 return; 2151 case IPV4_VERSION: 2152 break; 2153 default: 2154 goto noticmpv4; 2155 } 2156 2157 /* Skip past the outer IP and ICMP headers */ 2158 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2159 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2160 /* 2161 * If we don't have the correct outer IP header length or if the ULP 2162 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2163 * send the packet upstream. 2164 */ 2165 if (iph_hdr_length < sizeof (ipha_t) || 2166 ipha->ipha_protocol != IPPROTO_ICMP || 2167 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2168 goto noticmpv4; 2169 } 2170 ipha = (ipha_t *)&icmph[1]; 2171 2172 /* Skip past the inner IP and find the ULP header */ 2173 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2174 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2175 /* 2176 * If we don't have the correct inner IP header length or if the ULP 2177 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2178 * bytes of UDP header, send it upstream. 2179 */ 2180 if (iph_hdr_length < sizeof (ipha_t) || 2181 ipha->ipha_protocol != IPPROTO_UDP || 2182 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2183 goto noticmpv4; 2184 } 2185 2186 switch (icmph->icmph_type) { 2187 case ICMP_DEST_UNREACHABLE: 2188 switch (icmph->icmph_code) { 2189 case ICMP_FRAGMENTATION_NEEDED: 2190 /* 2191 * IP has already adjusted the path MTU. 2192 * XXX Somehow pass MTU indication to application? 2193 */ 2194 break; 2195 case ICMP_PORT_UNREACHABLE: 2196 case ICMP_PROTOCOL_UNREACHABLE: 2197 error = ECONNREFUSED; 2198 break; 2199 default: 2200 /* Transient errors */ 2201 break; 2202 } 2203 break; 2204 default: 2205 /* Transient errors */ 2206 break; 2207 } 2208 if (error == 0) { 2209 freemsg(mp); 2210 return; 2211 } 2212 2213 switch (udp->udp_family) { 2214 case AF_INET: 2215 sin = sin_null; 2216 sin.sin_family = AF_INET; 2217 sin.sin_addr.s_addr = ipha->ipha_dst; 2218 sin.sin_port = udpha->uha_dst_port; 2219 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2220 error); 2221 break; 2222 case AF_INET6: 2223 sin6 = sin6_null; 2224 sin6.sin6_family = AF_INET6; 2225 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2226 sin6.sin6_port = udpha->uha_dst_port; 2227 2228 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2229 NULL, 0, error); 2230 break; 2231 } 2232 if (mp1) 2233 putnext(UDP_RD(q), mp1); 2234 freemsg(mp); 2235 } 2236 2237 /* 2238 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2239 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2240 * Assumes that IP has pulled up all the extension headers as well as the 2241 * ICMPv6 header. 2242 * An M_CTL could potentially come here from some other module (i.e. if UDP 2243 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2244 * does not have enough ICMP information , following STREAMS conventions, 2245 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2246 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2247 * field (when cast to ipha_t in udp_icmp_error). 2248 */ 2249 static void 2250 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2251 { 2252 icmp6_t *icmp6; 2253 ip6_t *ip6h, *outer_ip6h; 2254 uint16_t hdr_length; 2255 uint8_t *nexthdrp; 2256 udpha_t *udpha; 2257 sin6_t sin6; 2258 mblk_t *mp1; 2259 int error = 0; 2260 size_t mp_size = MBLKL(mp); 2261 udp_t *udp = Q_TO_UDP(q); 2262 2263 /* 2264 * Verify that we have a complete IP header. If not, send it upstream. 2265 */ 2266 if (mp_size < sizeof (ip6_t)) { 2267 noticmpv6: 2268 putnext(UDP_RD(q), mp); 2269 return; 2270 } 2271 2272 outer_ip6h = (ip6_t *)mp->b_rptr; 2273 /* 2274 * Verify this is an ICMPV6 packet, else send it upstream 2275 */ 2276 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2277 hdr_length = IPV6_HDR_LEN; 2278 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2279 &nexthdrp) || 2280 *nexthdrp != IPPROTO_ICMPV6) { 2281 goto noticmpv6; 2282 } 2283 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2284 ip6h = (ip6_t *)&icmp6[1]; 2285 /* 2286 * Verify we have a complete ICMP and inner IP header. 2287 */ 2288 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2289 goto noticmpv6; 2290 2291 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2292 goto noticmpv6; 2293 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2294 /* 2295 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2296 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2297 * packet upstream. 2298 */ 2299 if ((*nexthdrp != IPPROTO_UDP) || 2300 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2301 goto noticmpv6; 2302 } 2303 2304 switch (icmp6->icmp6_type) { 2305 case ICMP6_DST_UNREACH: 2306 switch (icmp6->icmp6_code) { 2307 case ICMP6_DST_UNREACH_NOPORT: 2308 error = ECONNREFUSED; 2309 break; 2310 case ICMP6_DST_UNREACH_ADMIN: 2311 case ICMP6_DST_UNREACH_NOROUTE: 2312 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2313 case ICMP6_DST_UNREACH_ADDR: 2314 /* Transient errors */ 2315 break; 2316 default: 2317 break; 2318 } 2319 break; 2320 case ICMP6_PACKET_TOO_BIG: { 2321 struct T_unitdata_ind *tudi; 2322 struct T_opthdr *toh; 2323 size_t udi_size; 2324 mblk_t *newmp; 2325 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2326 sizeof (struct ip6_mtuinfo); 2327 sin6_t *sin6; 2328 struct ip6_mtuinfo *mtuinfo; 2329 2330 /* 2331 * If the application has requested to receive path mtu 2332 * information, send up an empty message containing an 2333 * IPV6_PATHMTU ancillary data item. 2334 */ 2335 if (!udp->udp_ipv6_recvpathmtu) 2336 break; 2337 2338 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2339 opt_length; 2340 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2341 BUMP_MIB(&udp_mib, udpInErrors); 2342 break; 2343 } 2344 2345 /* 2346 * newmp->b_cont is left to NULL on purpose. This is an 2347 * empty message containing only ancillary data. 2348 */ 2349 newmp->b_datap->db_type = M_PROTO; 2350 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2351 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2352 tudi->PRIM_type = T_UNITDATA_IND; 2353 tudi->SRC_length = sizeof (sin6_t); 2354 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2355 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2356 tudi->OPT_length = opt_length; 2357 2358 sin6 = (sin6_t *)&tudi[1]; 2359 bzero(sin6, sizeof (sin6_t)); 2360 sin6->sin6_family = AF_INET6; 2361 sin6->sin6_addr = udp->udp_v6dst; 2362 2363 toh = (struct T_opthdr *)&sin6[1]; 2364 toh->level = IPPROTO_IPV6; 2365 toh->name = IPV6_PATHMTU; 2366 toh->len = opt_length; 2367 toh->status = 0; 2368 2369 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2370 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2371 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2372 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2373 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2374 /* 2375 * We've consumed everything we need from the original 2376 * message. Free it, then send our empty message. 2377 */ 2378 freemsg(mp); 2379 putnext(UDP_RD(q), newmp); 2380 return; 2381 } 2382 case ICMP6_TIME_EXCEEDED: 2383 /* Transient errors */ 2384 break; 2385 case ICMP6_PARAM_PROB: 2386 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2387 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2388 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2389 (uchar_t *)nexthdrp) { 2390 error = ECONNREFUSED; 2391 break; 2392 } 2393 break; 2394 } 2395 if (error == 0) { 2396 freemsg(mp); 2397 return; 2398 } 2399 2400 sin6 = sin6_null; 2401 sin6.sin6_family = AF_INET6; 2402 sin6.sin6_addr = ip6h->ip6_dst; 2403 sin6.sin6_port = udpha->uha_dst_port; 2404 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2405 2406 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2407 error); 2408 if (mp1) 2409 putnext(UDP_RD(q), mp1); 2410 freemsg(mp); 2411 } 2412 2413 /* 2414 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2415 * The local address is filled in if endpoint is bound. The remote address 2416 * is filled in if remote address has been precified ("connected endpoint") 2417 * (The concept of connected CLTS sockets is alien to published TPI 2418 * but we support it anyway). 2419 */ 2420 static void 2421 udp_addr_req(queue_t *q, mblk_t *mp) 2422 { 2423 sin_t *sin; 2424 sin6_t *sin6; 2425 mblk_t *ackmp; 2426 struct T_addr_ack *taa; 2427 udp_t *udp = Q_TO_UDP(q); 2428 2429 /* Make it large enough for worst case */ 2430 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2431 2 * sizeof (sin6_t), 1); 2432 if (ackmp == NULL) { 2433 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2434 return; 2435 } 2436 taa = (struct T_addr_ack *)ackmp->b_rptr; 2437 2438 bzero(taa, sizeof (struct T_addr_ack)); 2439 ackmp->b_wptr = (uchar_t *)&taa[1]; 2440 2441 taa->PRIM_type = T_ADDR_ACK; 2442 ackmp->b_datap->db_type = M_PCPROTO; 2443 /* 2444 * Note: Following code assumes 32 bit alignment of basic 2445 * data structures like sin_t and struct T_addr_ack. 2446 */ 2447 if (udp->udp_state != TS_UNBND) { 2448 /* 2449 * Fill in local address first 2450 */ 2451 taa->LOCADDR_offset = sizeof (*taa); 2452 if (udp->udp_family == AF_INET) { 2453 taa->LOCADDR_length = sizeof (sin_t); 2454 sin = (sin_t *)&taa[1]; 2455 /* Fill zeroes and then initialize non-zero fields */ 2456 *sin = sin_null; 2457 sin->sin_family = AF_INET; 2458 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2459 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2460 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2461 sin->sin_addr.s_addr); 2462 } else { 2463 /* 2464 * INADDR_ANY 2465 * udp_v6src is not set, we might be bound to 2466 * broadcast/multicast. Use udp_bound_v6src as 2467 * local address instead (that could 2468 * also still be INADDR_ANY) 2469 */ 2470 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2471 sin->sin_addr.s_addr); 2472 } 2473 sin->sin_port = udp->udp_port; 2474 ackmp->b_wptr = (uchar_t *)&sin[1]; 2475 if (udp->udp_state == TS_DATA_XFER) { 2476 /* 2477 * connected, fill remote address too 2478 */ 2479 taa->REMADDR_length = sizeof (sin_t); 2480 /* assumed 32-bit alignment */ 2481 taa->REMADDR_offset = taa->LOCADDR_offset + 2482 taa->LOCADDR_length; 2483 2484 sin = (sin_t *)(ackmp->b_rptr + 2485 taa->REMADDR_offset); 2486 /* initialize */ 2487 *sin = sin_null; 2488 sin->sin_family = AF_INET; 2489 sin->sin_addr.s_addr = 2490 V4_PART_OF_V6(udp->udp_v6dst); 2491 sin->sin_port = udp->udp_dstport; 2492 ackmp->b_wptr = (uchar_t *)&sin[1]; 2493 } 2494 } else { 2495 taa->LOCADDR_length = sizeof (sin6_t); 2496 sin6 = (sin6_t *)&taa[1]; 2497 /* Fill zeroes and then initialize non-zero fields */ 2498 *sin6 = sin6_null; 2499 sin6->sin6_family = AF_INET6; 2500 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2501 sin6->sin6_addr = udp->udp_v6src; 2502 } else { 2503 /* 2504 * UNSPECIFIED 2505 * udp_v6src is not set, we might be bound to 2506 * broadcast/multicast. Use udp_bound_v6src as 2507 * local address instead (that could 2508 * also still be UNSPECIFIED) 2509 */ 2510 sin6->sin6_addr = 2511 udp->udp_bound_v6src; 2512 } 2513 sin6->sin6_port = udp->udp_port; 2514 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2515 if (udp->udp_state == TS_DATA_XFER) { 2516 /* 2517 * connected, fill remote address too 2518 */ 2519 taa->REMADDR_length = sizeof (sin6_t); 2520 /* assumed 32-bit alignment */ 2521 taa->REMADDR_offset = taa->LOCADDR_offset + 2522 taa->LOCADDR_length; 2523 2524 sin6 = (sin6_t *)(ackmp->b_rptr + 2525 taa->REMADDR_offset); 2526 /* initialize */ 2527 *sin6 = sin6_null; 2528 sin6->sin6_family = AF_INET6; 2529 sin6->sin6_addr = udp->udp_v6dst; 2530 sin6->sin6_port = udp->udp_dstport; 2531 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2532 } 2533 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2534 } 2535 } 2536 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2537 putnext(UDP_RD(q), ackmp); 2538 } 2539 2540 static void 2541 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2542 { 2543 if (udp->udp_family == AF_INET) { 2544 *tap = udp_g_t_info_ack_ipv4; 2545 } else { 2546 *tap = udp_g_t_info_ack_ipv6; 2547 } 2548 tap->CURRENT_state = udp->udp_state; 2549 tap->OPT_size = udp_max_optsize; 2550 } 2551 2552 /* 2553 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2554 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2555 * udp_g_t_info_ack. The current state of the stream is copied from 2556 * udp_state. 2557 */ 2558 static void 2559 udp_capability_req(queue_t *q, mblk_t *mp) 2560 { 2561 t_uscalar_t cap_bits1; 2562 struct T_capability_ack *tcap; 2563 udp_t *udp = Q_TO_UDP(q); 2564 2565 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2566 2567 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2568 mp->b_datap->db_type, T_CAPABILITY_ACK); 2569 if (!mp) 2570 return; 2571 2572 tcap = (struct T_capability_ack *)mp->b_rptr; 2573 tcap->CAP_bits1 = 0; 2574 2575 if (cap_bits1 & TC1_INFO) { 2576 udp_copy_info(&tcap->INFO_ack, udp); 2577 tcap->CAP_bits1 |= TC1_INFO; 2578 } 2579 2580 putnext(UDP_RD(q), mp); 2581 } 2582 2583 /* 2584 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2585 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2586 * The current state of the stream is copied from udp_state. 2587 */ 2588 static void 2589 udp_info_req(queue_t *q, mblk_t *mp) 2590 { 2591 udp_t *udp = Q_TO_UDP(q); 2592 2593 /* Create a T_INFO_ACK message. */ 2594 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2595 T_INFO_ACK); 2596 if (!mp) 2597 return; 2598 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2599 putnext(UDP_RD(q), mp); 2600 } 2601 2602 /* 2603 * IP recognizes seven kinds of bind requests: 2604 * 2605 * - A zero-length address binds only to the protocol number. 2606 * 2607 * - A 4-byte address is treated as a request to 2608 * validate that the address is a valid local IPv4 2609 * address, appropriate for an application to bind to. 2610 * IP does the verification, but does not make any note 2611 * of the address at this time. 2612 * 2613 * - A 16-byte address contains is treated as a request 2614 * to validate a local IPv6 address, as the 4-byte 2615 * address case above. 2616 * 2617 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2618 * use it for the inbound fanout of packets. 2619 * 2620 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2621 * use it for the inbound fanout of packets. 2622 * 2623 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2624 * information consisting of local and remote addresses 2625 * and ports. In this case, the addresses are both 2626 * validated as appropriate for this operation, and, if 2627 * so, the information is retained for use in the 2628 * inbound fanout. 2629 * 2630 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2631 * fanout information, like the 12-byte case above. 2632 * 2633 * IP will also fill in the IRE request mblk with information 2634 * regarding our peer. In all cases, we notify IP of our protocol 2635 * type by appending a single protocol byte to the bind request. 2636 */ 2637 static mblk_t * 2638 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2639 { 2640 char *cp; 2641 mblk_t *mp; 2642 struct T_bind_req *tbr; 2643 ipa_conn_t *ac; 2644 ipa6_conn_t *ac6; 2645 sin_t *sin; 2646 sin6_t *sin6; 2647 2648 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2649 2650 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2651 if (!mp) 2652 return (mp); 2653 mp->b_datap->db_type = M_PROTO; 2654 tbr = (struct T_bind_req *)mp->b_rptr; 2655 tbr->PRIM_type = bind_prim; 2656 tbr->ADDR_offset = sizeof (*tbr); 2657 tbr->CONIND_number = 0; 2658 tbr->ADDR_length = addr_length; 2659 cp = (char *)&tbr[1]; 2660 switch (addr_length) { 2661 case sizeof (ipa_conn_t): 2662 ASSERT(udp->udp_family == AF_INET); 2663 /* Append a request for an IRE */ 2664 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2665 if (!mp->b_cont) { 2666 freemsg(mp); 2667 return (NULL); 2668 } 2669 mp->b_cont->b_wptr += sizeof (ire_t); 2670 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2671 2672 /* cp known to be 32 bit aligned */ 2673 ac = (ipa_conn_t *)cp; 2674 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2675 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2676 ac->ac_fport = udp->udp_dstport; 2677 ac->ac_lport = udp->udp_port; 2678 break; 2679 2680 case sizeof (ipa6_conn_t): 2681 ASSERT(udp->udp_family == AF_INET6); 2682 /* Append a request for an IRE */ 2683 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2684 if (!mp->b_cont) { 2685 freemsg(mp); 2686 return (NULL); 2687 } 2688 mp->b_cont->b_wptr += sizeof (ire_t); 2689 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2690 2691 /* cp known to be 32 bit aligned */ 2692 ac6 = (ipa6_conn_t *)cp; 2693 ac6->ac6_laddr = udp->udp_v6src; 2694 ac6->ac6_faddr = udp->udp_v6dst; 2695 ac6->ac6_fport = udp->udp_dstport; 2696 ac6->ac6_lport = udp->udp_port; 2697 break; 2698 2699 case sizeof (sin_t): 2700 ASSERT(udp->udp_family == AF_INET); 2701 /* Append a request for an IRE */ 2702 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2703 if (!mp->b_cont) { 2704 freemsg(mp); 2705 return (NULL); 2706 } 2707 mp->b_cont->b_wptr += sizeof (ire_t); 2708 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2709 2710 sin = (sin_t *)cp; 2711 *sin = sin_null; 2712 sin->sin_family = AF_INET; 2713 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2714 sin->sin_port = udp->udp_port; 2715 break; 2716 2717 case sizeof (sin6_t): 2718 ASSERT(udp->udp_family == AF_INET6); 2719 /* Append a request for an IRE */ 2720 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2721 if (!mp->b_cont) { 2722 freemsg(mp); 2723 return (NULL); 2724 } 2725 mp->b_cont->b_wptr += sizeof (ire_t); 2726 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2727 2728 sin6 = (sin6_t *)cp; 2729 *sin6 = sin6_null; 2730 sin6->sin6_family = AF_INET6; 2731 sin6->sin6_addr = udp->udp_bound_v6src; 2732 sin6->sin6_port = udp->udp_port; 2733 break; 2734 } 2735 /* Add protocol number to end */ 2736 cp[addr_length] = (char)IPPROTO_UDP; 2737 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2738 return (mp); 2739 } 2740 2741 /* 2742 * This is the open routine for udp. It allocates a udp_t structure for 2743 * the stream and, on the first open of the module, creates an ND table. 2744 */ 2745 /* ARGSUSED */ 2746 static int 2747 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2748 { 2749 int err; 2750 udp_t *udp; 2751 conn_t *connp; 2752 zoneid_t zoneid = getzoneid(); 2753 queue_t *ip_wq; 2754 char *name; 2755 2756 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2757 2758 /* If the stream is already open, return immediately. */ 2759 if (q->q_ptr != NULL) 2760 return (0); 2761 2762 /* If this is not a push of udp as a module, fail. */ 2763 if (sflag != MODOPEN) 2764 return (EINVAL); 2765 2766 q->q_hiwat = udp_recv_hiwat; 2767 WR(q)->q_hiwat = udp_xmit_hiwat; 2768 WR(q)->q_lowat = udp_xmit_lowat; 2769 2770 /* Insert ourselves in the stream since we're about to walk q_next */ 2771 qprocson(q); 2772 2773 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2774 bzero(udp, sizeof (*udp)); 2775 2776 /* 2777 * UDP is supported only as a module and it has to be pushed directly 2778 * above the device instance of IP. If UDP is pushed anywhere else 2779 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2780 * sake of MIB browsers and fail everything else. 2781 */ 2782 ip_wq = WR(q)->q_next; 2783 if (ip_wq->q_next != NULL || 2784 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2785 strcmp(name, IP_MOD_NAME) != 0 || 2786 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2787 /* Support just SNMP for MIB browsers */ 2788 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2789 connp->conn_rq = q; 2790 connp->conn_wq = WR(q); 2791 connp->conn_flags |= IPCL_UDPMOD; 2792 connp->conn_cred = credp; 2793 connp->conn_zoneid = zoneid; 2794 connp->conn_udp = udp; 2795 udp->udp_connp = connp; 2796 q->q_ptr = WR(q)->q_ptr = connp; 2797 crhold(credp); 2798 q->q_qinfo = &udp_snmp_rinit; 2799 WR(q)->q_qinfo = &udp_snmp_winit; 2800 return (0); 2801 } 2802 2803 /* 2804 * Initialize the udp_t structure for this stream. 2805 */ 2806 q = RD(ip_wq); 2807 connp = Q_TO_CONN(q); 2808 mutex_enter(&connp->conn_lock); 2809 connp->conn_proto = IPPROTO_UDP; 2810 connp->conn_flags |= IPCL_UDP; 2811 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2812 connp->conn_udp = udp; 2813 2814 /* Set the initial state of the stream and the privilege status. */ 2815 udp->udp_connp = connp; 2816 udp->udp_state = TS_UNBND; 2817 udp->udp_mode = UDP_MT_HOT; 2818 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2819 udp->udp_family = AF_INET6; 2820 udp->udp_ipversion = IPV6_VERSION; 2821 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2822 udp->udp_ttl = udp_ipv6_hoplimit; 2823 connp->conn_af_isv6 = B_TRUE; 2824 connp->conn_flags |= IPCL_ISV6; 2825 } else { 2826 udp->udp_family = AF_INET; 2827 udp->udp_ipversion = IPV4_VERSION; 2828 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2829 udp->udp_ttl = udp_ipv4_ttl; 2830 connp->conn_af_isv6 = B_FALSE; 2831 connp->conn_flags &= ~IPCL_ISV6; 2832 } 2833 2834 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2835 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2836 connp->conn_zoneid = zoneid; 2837 2838 if (connp->conn_flags & IPCL_SOCKET) { 2839 udp->udp_issocket = B_TRUE; 2840 udp->udp_direct_sockfs = B_TRUE; 2841 } 2842 mutex_exit(&connp->conn_lock); 2843 2844 /* 2845 * The transmit hiwat/lowat is only looked at on IP's queue. 2846 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2847 * getsockopts. 2848 */ 2849 q->q_hiwat = udp_recv_hiwat; 2850 WR(q)->q_hiwat = udp_xmit_hiwat; 2851 WR(q)->q_lowat = udp_xmit_lowat; 2852 2853 if (udp->udp_family == AF_INET6) { 2854 /* Build initial header template for transmit */ 2855 if ((err = udp_build_hdrs(q, udp)) != 0) { 2856 qprocsoff(UDP_RD(q)); 2857 udp->udp_connp = NULL; 2858 connp->conn_udp = NULL; 2859 kmem_cache_free(udp_cache, udp); 2860 return (err); 2861 } 2862 } 2863 2864 /* Set the Stream head write offset and high watermark. */ 2865 (void) mi_set_sth_wroff(UDP_RD(q), 2866 udp->udp_max_hdr_len + udp_wroff_extra); 2867 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2868 2869 WR(UDP_RD(q))->q_qinfo = &udp_winit; 2870 2871 return (0); 2872 } 2873 2874 /* 2875 * Which UDP options OK to set through T_UNITDATA_REQ... 2876 */ 2877 /* ARGSUSED */ 2878 static boolean_t 2879 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2880 { 2881 return (B_TRUE); 2882 } 2883 2884 /* 2885 * This routine gets default values of certain options whose default 2886 * values are maintained by protcol specific code 2887 */ 2888 /* ARGSUSED */ 2889 int 2890 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2891 { 2892 int *i1 = (int *)ptr; 2893 2894 switch (level) { 2895 case IPPROTO_IP: 2896 switch (name) { 2897 case IP_MULTICAST_TTL: 2898 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2899 return (sizeof (uchar_t)); 2900 case IP_MULTICAST_LOOP: 2901 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2902 return (sizeof (uchar_t)); 2903 } 2904 break; 2905 case IPPROTO_IPV6: 2906 switch (name) { 2907 case IPV6_MULTICAST_HOPS: 2908 *i1 = IP_DEFAULT_MULTICAST_TTL; 2909 return (sizeof (int)); 2910 case IPV6_MULTICAST_LOOP: 2911 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2912 return (sizeof (int)); 2913 case IPV6_UNICAST_HOPS: 2914 *i1 = udp_ipv6_hoplimit; 2915 return (sizeof (int)); 2916 } 2917 break; 2918 } 2919 return (-1); 2920 } 2921 2922 /* 2923 * This routine retrieves the current status of socket options 2924 * and expects the caller to pass in the queue pointer of the 2925 * upper instance. It returns the size of the option retrieved. 2926 */ 2927 int 2928 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2929 { 2930 int *i1 = (int *)ptr; 2931 conn_t *connp; 2932 udp_t *udp; 2933 ip6_pkt_t *ipp; 2934 2935 q = UDP_WR(q); 2936 connp = Q_TO_CONN(q); 2937 udp = connp->conn_udp; 2938 ipp = &udp->udp_sticky_ipp; 2939 2940 switch (level) { 2941 case SOL_SOCKET: 2942 switch (name) { 2943 case SO_DEBUG: 2944 *i1 = udp->udp_debug; 2945 break; /* goto sizeof (int) option return */ 2946 case SO_REUSEADDR: 2947 *i1 = udp->udp_reuseaddr; 2948 break; /* goto sizeof (int) option return */ 2949 case SO_TYPE: 2950 *i1 = SOCK_DGRAM; 2951 break; /* goto sizeof (int) option return */ 2952 2953 /* 2954 * The following three items are available here, 2955 * but are only meaningful to IP. 2956 */ 2957 case SO_DONTROUTE: 2958 *i1 = udp->udp_dontroute; 2959 break; /* goto sizeof (int) option return */ 2960 case SO_USELOOPBACK: 2961 *i1 = udp->udp_useloopback; 2962 break; /* goto sizeof (int) option return */ 2963 case SO_BROADCAST: 2964 *i1 = udp->udp_broadcast; 2965 break; /* goto sizeof (int) option return */ 2966 2967 case SO_SNDBUF: 2968 *i1 = q->q_hiwat; 2969 break; /* goto sizeof (int) option return */ 2970 case SO_RCVBUF: 2971 *i1 = RD(q)->q_hiwat; 2972 break; /* goto sizeof (int) option return */ 2973 case SO_DGRAM_ERRIND: 2974 *i1 = udp->udp_dgram_errind; 2975 break; /* goto sizeof (int) option return */ 2976 case SO_RECVUCRED: 2977 *i1 = udp->udp_recvucred; 2978 break; /* goto sizeof (int) option return */ 2979 case SO_TIMESTAMP: 2980 *i1 = udp->udp_timestamp; 2981 break; 2982 default: 2983 return (-1); 2984 } 2985 break; 2986 case IPPROTO_IP: 2987 if (udp->udp_family != AF_INET) 2988 return (-1); 2989 switch (name) { 2990 case IP_OPTIONS: 2991 case T_IP_OPTIONS: 2992 if (udp->udp_ip_rcv_options_len) 2993 bcopy(udp->udp_ip_rcv_options, ptr, 2994 udp->udp_ip_rcv_options_len); 2995 return (udp->udp_ip_rcv_options_len); 2996 case IP_TOS: 2997 case T_IP_TOS: 2998 *i1 = (int)udp->udp_type_of_service; 2999 break; /* goto sizeof (int) option return */ 3000 case IP_TTL: 3001 *i1 = (int)udp->udp_ttl; 3002 break; /* goto sizeof (int) option return */ 3003 case IP_NEXTHOP: 3004 /* Handled at IP level */ 3005 return (-EINVAL); 3006 case IP_MULTICAST_IF: 3007 /* 0 address if not set */ 3008 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3009 return (sizeof (ipaddr_t)); 3010 case IP_MULTICAST_TTL: 3011 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3012 return (sizeof (uchar_t)); 3013 case IP_MULTICAST_LOOP: 3014 *ptr = connp->conn_multicast_loop; 3015 return (sizeof (uint8_t)); 3016 case IP_RECVOPTS: 3017 *i1 = udp->udp_recvopts; 3018 break; /* goto sizeof (int) option return */ 3019 case IP_RECVDSTADDR: 3020 *i1 = udp->udp_recvdstaddr; 3021 break; /* goto sizeof (int) option return */ 3022 case IP_RECVIF: 3023 *i1 = udp->udp_recvif; 3024 break; /* goto sizeof (int) option return */ 3025 case IP_RECVSLLA: 3026 *i1 = udp->udp_recvslla; 3027 break; /* goto sizeof (int) option return */ 3028 case IP_RECVTTL: 3029 *i1 = udp->udp_recvttl; 3030 break; /* goto sizeof (int) option return */ 3031 case IP_ADD_MEMBERSHIP: 3032 case IP_DROP_MEMBERSHIP: 3033 case IP_BLOCK_SOURCE: 3034 case IP_UNBLOCK_SOURCE: 3035 case IP_ADD_SOURCE_MEMBERSHIP: 3036 case IP_DROP_SOURCE_MEMBERSHIP: 3037 case MCAST_JOIN_GROUP: 3038 case MCAST_LEAVE_GROUP: 3039 case MCAST_BLOCK_SOURCE: 3040 case MCAST_UNBLOCK_SOURCE: 3041 case MCAST_JOIN_SOURCE_GROUP: 3042 case MCAST_LEAVE_SOURCE_GROUP: 3043 case IP_DONTFAILOVER_IF: 3044 /* cannot "get" the value for these */ 3045 return (-1); 3046 case IP_BOUND_IF: 3047 /* Zero if not set */ 3048 *i1 = udp->udp_bound_if; 3049 break; /* goto sizeof (int) option return */ 3050 case IP_UNSPEC_SRC: 3051 *i1 = udp->udp_unspec_source; 3052 break; /* goto sizeof (int) option return */ 3053 case IP_XMIT_IF: 3054 *i1 = udp->udp_xmit_if; 3055 break; /* goto sizeof (int) option return */ 3056 default: 3057 return (-1); 3058 } 3059 break; 3060 case IPPROTO_IPV6: 3061 if (udp->udp_family != AF_INET6) 3062 return (-1); 3063 switch (name) { 3064 case IPV6_UNICAST_HOPS: 3065 *i1 = (unsigned int)udp->udp_ttl; 3066 break; /* goto sizeof (int) option return */ 3067 case IPV6_MULTICAST_IF: 3068 /* 0 index if not set */ 3069 *i1 = udp->udp_multicast_if_index; 3070 break; /* goto sizeof (int) option return */ 3071 case IPV6_MULTICAST_HOPS: 3072 *i1 = udp->udp_multicast_ttl; 3073 break; /* goto sizeof (int) option return */ 3074 case IPV6_MULTICAST_LOOP: 3075 *i1 = connp->conn_multicast_loop; 3076 break; /* goto sizeof (int) option return */ 3077 case IPV6_JOIN_GROUP: 3078 case IPV6_LEAVE_GROUP: 3079 case MCAST_JOIN_GROUP: 3080 case MCAST_LEAVE_GROUP: 3081 case MCAST_BLOCK_SOURCE: 3082 case MCAST_UNBLOCK_SOURCE: 3083 case MCAST_JOIN_SOURCE_GROUP: 3084 case MCAST_LEAVE_SOURCE_GROUP: 3085 /* cannot "get" the value for these */ 3086 return (-1); 3087 case IPV6_BOUND_IF: 3088 /* Zero if not set */ 3089 *i1 = udp->udp_bound_if; 3090 break; /* goto sizeof (int) option return */ 3091 case IPV6_UNSPEC_SRC: 3092 *i1 = udp->udp_unspec_source; 3093 break; /* goto sizeof (int) option return */ 3094 case IPV6_RECVPKTINFO: 3095 *i1 = udp->udp_ipv6_recvpktinfo; 3096 break; /* goto sizeof (int) option return */ 3097 case IPV6_RECVTCLASS: 3098 *i1 = udp->udp_ipv6_recvtclass; 3099 break; /* goto sizeof (int) option return */ 3100 case IPV6_RECVPATHMTU: 3101 *i1 = udp->udp_ipv6_recvpathmtu; 3102 break; /* goto sizeof (int) option return */ 3103 case IPV6_RECVHOPLIMIT: 3104 *i1 = udp->udp_ipv6_recvhoplimit; 3105 break; /* goto sizeof (int) option return */ 3106 case IPV6_RECVHOPOPTS: 3107 *i1 = udp->udp_ipv6_recvhopopts; 3108 break; /* goto sizeof (int) option return */ 3109 case IPV6_RECVDSTOPTS: 3110 *i1 = udp->udp_ipv6_recvdstopts; 3111 break; /* goto sizeof (int) option return */ 3112 case _OLD_IPV6_RECVDSTOPTS: 3113 *i1 = udp->udp_old_ipv6_recvdstopts; 3114 break; /* goto sizeof (int) option return */ 3115 case IPV6_RECVRTHDRDSTOPTS: 3116 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3117 break; /* goto sizeof (int) option return */ 3118 case IPV6_RECVRTHDR: 3119 *i1 = udp->udp_ipv6_recvrthdr; 3120 break; /* goto sizeof (int) option return */ 3121 case IPV6_PKTINFO: { 3122 /* XXX assumes that caller has room for max size! */ 3123 struct in6_pktinfo *pkti; 3124 3125 pkti = (struct in6_pktinfo *)ptr; 3126 if (ipp->ipp_fields & IPPF_IFINDEX) 3127 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3128 else 3129 pkti->ipi6_ifindex = 0; 3130 if (ipp->ipp_fields & IPPF_ADDR) 3131 pkti->ipi6_addr = ipp->ipp_addr; 3132 else 3133 pkti->ipi6_addr = ipv6_all_zeros; 3134 return (sizeof (struct in6_pktinfo)); 3135 } 3136 case IPV6_TCLASS: 3137 if (ipp->ipp_fields & IPPF_TCLASS) 3138 *i1 = ipp->ipp_tclass; 3139 else 3140 *i1 = IPV6_FLOW_TCLASS( 3141 IPV6_DEFAULT_VERS_AND_FLOW); 3142 break; /* goto sizeof (int) option return */ 3143 case IPV6_NEXTHOP: { 3144 sin6_t *sin6 = (sin6_t *)ptr; 3145 3146 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3147 return (0); 3148 *sin6 = sin6_null; 3149 sin6->sin6_family = AF_INET6; 3150 sin6->sin6_addr = ipp->ipp_nexthop; 3151 return (sizeof (sin6_t)); 3152 } 3153 case IPV6_HOPOPTS: 3154 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3155 return (0); 3156 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3157 return (ipp->ipp_hopoptslen); 3158 case IPV6_RTHDRDSTOPTS: 3159 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3160 return (0); 3161 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3162 return (ipp->ipp_rtdstoptslen); 3163 case IPV6_RTHDR: 3164 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3165 return (0); 3166 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3167 return (ipp->ipp_rthdrlen); 3168 case IPV6_DSTOPTS: 3169 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3170 return (0); 3171 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3172 return (ipp->ipp_dstoptslen); 3173 case IPV6_PATHMTU: 3174 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3175 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3176 default: 3177 return (-1); 3178 } 3179 break; 3180 case IPPROTO_UDP: 3181 switch (name) { 3182 case UDP_ANONPRIVBIND: 3183 *i1 = udp->udp_anon_priv_bind; 3184 break; 3185 case UDP_EXCLBIND: 3186 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3187 break; 3188 case UDP_RCVHDR: 3189 *i1 = udp->udp_rcvhdr ? 1 : 0; 3190 break; 3191 default: 3192 return (-1); 3193 } 3194 break; 3195 default: 3196 return (-1); 3197 } 3198 return (sizeof (int)); 3199 } 3200 3201 /* 3202 * This routine sets socket options; it expects the caller 3203 * to pass in the queue pointer of the upper instance. 3204 */ 3205 /* ARGSUSED */ 3206 int 3207 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3208 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3209 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3210 { 3211 int *i1 = (int *)invalp; 3212 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3213 boolean_t checkonly; 3214 int error; 3215 conn_t *connp; 3216 udp_t *udp; 3217 3218 q = UDP_WR(q); 3219 connp = Q_TO_CONN(q); 3220 udp = connp->conn_udp; 3221 3222 switch (optset_context) { 3223 case SETFN_OPTCOM_CHECKONLY: 3224 checkonly = B_TRUE; 3225 /* 3226 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3227 * inlen != 0 implies value supplied and 3228 * we have to "pretend" to set it. 3229 * inlen == 0 implies that there is no 3230 * value part in T_CHECK request and just validation 3231 * done elsewhere should be enough, we just return here. 3232 */ 3233 if (inlen == 0) { 3234 *outlenp = 0; 3235 return (0); 3236 } 3237 break; 3238 case SETFN_OPTCOM_NEGOTIATE: 3239 checkonly = B_FALSE; 3240 break; 3241 case SETFN_UD_NEGOTIATE: 3242 case SETFN_CONN_NEGOTIATE: 3243 checkonly = B_FALSE; 3244 /* 3245 * Negotiating local and "association-related" options 3246 * through T_UNITDATA_REQ. 3247 * 3248 * Following routine can filter out ones we do not 3249 * want to be "set" this way. 3250 */ 3251 if (!udp_opt_allow_udr_set(level, name)) { 3252 *outlenp = 0; 3253 return (EINVAL); 3254 } 3255 break; 3256 default: 3257 /* 3258 * We should never get here 3259 */ 3260 *outlenp = 0; 3261 return (EINVAL); 3262 } 3263 3264 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3265 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3266 3267 /* 3268 * For fixed length options, no sanity check 3269 * of passed in length is done. It is assumed *_optcom_req() 3270 * routines do the right thing. 3271 */ 3272 3273 switch (level) { 3274 case SOL_SOCKET: 3275 switch (name) { 3276 case SO_REUSEADDR: 3277 if (!checkonly) 3278 udp->udp_reuseaddr = onoff; 3279 break; 3280 case SO_DEBUG: 3281 if (!checkonly) 3282 udp->udp_debug = onoff; 3283 break; 3284 /* 3285 * The following three items are available here, 3286 * but are only meaningful to IP. 3287 */ 3288 case SO_DONTROUTE: 3289 if (!checkonly) 3290 udp->udp_dontroute = onoff; 3291 break; 3292 case SO_USELOOPBACK: 3293 if (!checkonly) 3294 udp->udp_useloopback = onoff; 3295 break; 3296 case SO_BROADCAST: 3297 if (!checkonly) 3298 udp->udp_broadcast = onoff; 3299 break; 3300 3301 case SO_SNDBUF: 3302 if (*i1 > udp_max_buf) { 3303 *outlenp = 0; 3304 return (ENOBUFS); 3305 } 3306 if (!checkonly) { 3307 q->q_hiwat = *i1; 3308 WR(UDP_RD(q))->q_hiwat = *i1; 3309 } 3310 break; 3311 case SO_RCVBUF: 3312 if (*i1 > udp_max_buf) { 3313 *outlenp = 0; 3314 return (ENOBUFS); 3315 } 3316 if (!checkonly) { 3317 RD(q)->q_hiwat = *i1; 3318 UDP_RD(q)->q_hiwat = *i1; 3319 (void) mi_set_sth_hiwat(UDP_RD(q), 3320 udp_set_rcv_hiwat(udp, *i1)); 3321 } 3322 break; 3323 case SO_DGRAM_ERRIND: 3324 if (!checkonly) 3325 udp->udp_dgram_errind = onoff; 3326 break; 3327 case SO_RECVUCRED: 3328 if (!checkonly) 3329 udp->udp_recvucred = onoff; 3330 break; 3331 case SO_TIMESTAMP: 3332 if (!checkonly) 3333 udp->udp_timestamp = onoff; 3334 break; 3335 default: 3336 *outlenp = 0; 3337 return (EINVAL); 3338 } 3339 break; 3340 case IPPROTO_IP: 3341 if (udp->udp_family != AF_INET) { 3342 *outlenp = 0; 3343 return (ENOPROTOOPT); 3344 } 3345 switch (name) { 3346 case IP_OPTIONS: 3347 case T_IP_OPTIONS: 3348 /* Save options for use by IP. */ 3349 if (inlen & 0x3) { 3350 *outlenp = 0; 3351 return (EINVAL); 3352 } 3353 if (checkonly) 3354 break; 3355 3356 if (udp->udp_ip_snd_options) { 3357 mi_free((char *)udp->udp_ip_snd_options); 3358 udp->udp_ip_snd_options_len = 0; 3359 udp->udp_ip_snd_options = NULL; 3360 } 3361 if (inlen) { 3362 udp->udp_ip_snd_options = 3363 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3364 if (udp->udp_ip_snd_options) { 3365 bcopy(invalp, udp->udp_ip_snd_options, 3366 inlen); 3367 udp->udp_ip_snd_options_len = inlen; 3368 } 3369 } 3370 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3371 UDPH_SIZE + udp->udp_ip_snd_options_len; 3372 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3373 udp_wroff_extra); 3374 break; 3375 case IP_TTL: 3376 if (!checkonly) { 3377 udp->udp_ttl = (uchar_t)*i1; 3378 } 3379 break; 3380 case IP_TOS: 3381 case T_IP_TOS: 3382 if (!checkonly) { 3383 udp->udp_type_of_service = (uchar_t)*i1; 3384 } 3385 break; 3386 case IP_MULTICAST_IF: { 3387 /* 3388 * TODO should check OPTMGMT reply and undo this if 3389 * there is an error. 3390 */ 3391 struct in_addr *inap = (struct in_addr *)invalp; 3392 if (!checkonly) { 3393 udp->udp_multicast_if_addr = 3394 inap->s_addr; 3395 } 3396 break; 3397 } 3398 case IP_MULTICAST_TTL: 3399 if (!checkonly) 3400 udp->udp_multicast_ttl = *invalp; 3401 break; 3402 case IP_MULTICAST_LOOP: 3403 if (!checkonly) 3404 connp->conn_multicast_loop = *invalp; 3405 break; 3406 case IP_RECVOPTS: 3407 if (!checkonly) 3408 udp->udp_recvopts = onoff; 3409 break; 3410 case IP_RECVDSTADDR: 3411 if (!checkonly) 3412 udp->udp_recvdstaddr = onoff; 3413 break; 3414 case IP_RECVIF: 3415 if (!checkonly) 3416 udp->udp_recvif = onoff; 3417 break; 3418 case IP_RECVSLLA: 3419 if (!checkonly) 3420 udp->udp_recvslla = onoff; 3421 break; 3422 case IP_RECVTTL: 3423 if (!checkonly) 3424 udp->udp_recvttl = onoff; 3425 break; 3426 case IP_ADD_MEMBERSHIP: 3427 case IP_DROP_MEMBERSHIP: 3428 case IP_BLOCK_SOURCE: 3429 case IP_UNBLOCK_SOURCE: 3430 case IP_ADD_SOURCE_MEMBERSHIP: 3431 case IP_DROP_SOURCE_MEMBERSHIP: 3432 case MCAST_JOIN_GROUP: 3433 case MCAST_LEAVE_GROUP: 3434 case MCAST_BLOCK_SOURCE: 3435 case MCAST_UNBLOCK_SOURCE: 3436 case MCAST_JOIN_SOURCE_GROUP: 3437 case MCAST_LEAVE_SOURCE_GROUP: 3438 case IP_SEC_OPT: 3439 case IP_NEXTHOP: 3440 /* 3441 * "soft" error (negative) 3442 * option not handled at this level 3443 * Do not modify *outlenp. 3444 */ 3445 return (-EINVAL); 3446 case IP_BOUND_IF: 3447 if (!checkonly) 3448 udp->udp_bound_if = *i1; 3449 break; 3450 case IP_UNSPEC_SRC: 3451 if (!checkonly) 3452 udp->udp_unspec_source = onoff; 3453 break; 3454 case IP_XMIT_IF: 3455 if (!checkonly) 3456 udp->udp_xmit_if = *i1; 3457 break; 3458 default: 3459 *outlenp = 0; 3460 return (EINVAL); 3461 } 3462 break; 3463 case IPPROTO_IPV6: { 3464 ip6_pkt_t *ipp; 3465 boolean_t sticky; 3466 3467 if (udp->udp_family != AF_INET6) { 3468 *outlenp = 0; 3469 return (ENOPROTOOPT); 3470 } 3471 /* 3472 * Deal with both sticky options and ancillary data 3473 */ 3474 if (thisdg_attrs == NULL) { 3475 /* sticky options, or none */ 3476 ipp = &udp->udp_sticky_ipp; 3477 sticky = B_TRUE; 3478 } else { 3479 /* ancillary data */ 3480 ipp = (ip6_pkt_t *)thisdg_attrs; 3481 sticky = B_FALSE; 3482 } 3483 3484 switch (name) { 3485 case IPV6_MULTICAST_IF: 3486 if (!checkonly) 3487 udp->udp_multicast_if_index = *i1; 3488 break; 3489 case IPV6_UNICAST_HOPS: 3490 /* -1 means use default */ 3491 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3492 *outlenp = 0; 3493 return (EINVAL); 3494 } 3495 if (!checkonly) { 3496 if (*i1 == -1) { 3497 udp->udp_ttl = ipp->ipp_unicast_hops = 3498 udp_ipv6_hoplimit; 3499 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3500 /* Pass modified value to IP. */ 3501 *i1 = udp->udp_ttl; 3502 } else { 3503 udp->udp_ttl = ipp->ipp_unicast_hops = 3504 (uint8_t)*i1; 3505 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3506 } 3507 /* Rebuild the header template */ 3508 error = udp_build_hdrs(q, udp); 3509 if (error != 0) { 3510 *outlenp = 0; 3511 return (error); 3512 } 3513 } 3514 break; 3515 case IPV6_MULTICAST_HOPS: 3516 /* -1 means use default */ 3517 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3518 *outlenp = 0; 3519 return (EINVAL); 3520 } 3521 if (!checkonly) { 3522 if (*i1 == -1) { 3523 udp->udp_multicast_ttl = 3524 ipp->ipp_multicast_hops = 3525 IP_DEFAULT_MULTICAST_TTL; 3526 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3527 /* Pass modified value to IP. */ 3528 *i1 = udp->udp_multicast_ttl; 3529 } else { 3530 udp->udp_multicast_ttl = 3531 ipp->ipp_multicast_hops = 3532 (uint8_t)*i1; 3533 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3534 } 3535 } 3536 break; 3537 case IPV6_MULTICAST_LOOP: 3538 if (*i1 != 0 && *i1 != 1) { 3539 *outlenp = 0; 3540 return (EINVAL); 3541 } 3542 if (!checkonly) 3543 connp->conn_multicast_loop = *i1; 3544 break; 3545 case IPV6_JOIN_GROUP: 3546 case IPV6_LEAVE_GROUP: 3547 case MCAST_JOIN_GROUP: 3548 case MCAST_LEAVE_GROUP: 3549 case MCAST_BLOCK_SOURCE: 3550 case MCAST_UNBLOCK_SOURCE: 3551 case MCAST_JOIN_SOURCE_GROUP: 3552 case MCAST_LEAVE_SOURCE_GROUP: 3553 /* 3554 * "soft" error (negative) 3555 * option not handled at this level 3556 * Note: Do not modify *outlenp 3557 */ 3558 return (-EINVAL); 3559 case IPV6_BOUND_IF: 3560 if (!checkonly) 3561 udp->udp_bound_if = *i1; 3562 break; 3563 case IPV6_UNSPEC_SRC: 3564 if (!checkonly) 3565 udp->udp_unspec_source = onoff; 3566 break; 3567 /* 3568 * Set boolean switches for ancillary data delivery 3569 */ 3570 case IPV6_RECVPKTINFO: 3571 if (!checkonly) 3572 udp->udp_ipv6_recvpktinfo = onoff; 3573 break; 3574 case IPV6_RECVTCLASS: 3575 if (!checkonly) { 3576 udp->udp_ipv6_recvtclass = onoff; 3577 } 3578 break; 3579 case IPV6_RECVPATHMTU: 3580 if (!checkonly) { 3581 udp->udp_ipv6_recvpathmtu = onoff; 3582 } 3583 break; 3584 case IPV6_RECVHOPLIMIT: 3585 if (!checkonly) 3586 udp->udp_ipv6_recvhoplimit = onoff; 3587 break; 3588 case IPV6_RECVHOPOPTS: 3589 if (!checkonly) 3590 udp->udp_ipv6_recvhopopts = onoff; 3591 break; 3592 case IPV6_RECVDSTOPTS: 3593 if (!checkonly) 3594 udp->udp_ipv6_recvdstopts = onoff; 3595 break; 3596 case _OLD_IPV6_RECVDSTOPTS: 3597 if (!checkonly) 3598 udp->udp_old_ipv6_recvdstopts = onoff; 3599 break; 3600 case IPV6_RECVRTHDRDSTOPTS: 3601 if (!checkonly) 3602 udp->udp_ipv6_recvrthdrdstopts = onoff; 3603 break; 3604 case IPV6_RECVRTHDR: 3605 if (!checkonly) 3606 udp->udp_ipv6_recvrthdr = onoff; 3607 break; 3608 /* 3609 * Set sticky options or ancillary data. 3610 * If sticky options, (re)build any extension headers 3611 * that might be needed as a result. 3612 */ 3613 case IPV6_PKTINFO: 3614 /* 3615 * The source address and ifindex are verified 3616 * in ip_opt_set(). For ancillary data the 3617 * source address is checked in ip_wput_v6. 3618 */ 3619 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3620 return (EINVAL); 3621 if (checkonly) 3622 break; 3623 3624 if (inlen == 0) { 3625 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3626 ipp->ipp_sticky_ignored |= 3627 (IPPF_IFINDEX|IPPF_ADDR); 3628 } else { 3629 struct in6_pktinfo *pkti; 3630 3631 pkti = (struct in6_pktinfo *)invalp; 3632 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3633 ipp->ipp_addr = pkti->ipi6_addr; 3634 if (ipp->ipp_ifindex != 0) 3635 ipp->ipp_fields |= IPPF_IFINDEX; 3636 else 3637 ipp->ipp_fields &= ~IPPF_IFINDEX; 3638 if (!IN6_IS_ADDR_UNSPECIFIED( 3639 &ipp->ipp_addr)) 3640 ipp->ipp_fields |= IPPF_ADDR; 3641 else 3642 ipp->ipp_fields &= ~IPPF_ADDR; 3643 } 3644 if (sticky) { 3645 error = udp_build_hdrs(q, udp); 3646 if (error != 0) 3647 return (error); 3648 } 3649 break; 3650 case IPV6_HOPLIMIT: 3651 if (sticky) 3652 return (EINVAL); 3653 if (inlen != 0 && inlen != sizeof (int)) 3654 return (EINVAL); 3655 if (checkonly) 3656 break; 3657 3658 if (inlen == 0) { 3659 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3660 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3661 } else { 3662 if (*i1 > 255 || *i1 < -1) 3663 return (EINVAL); 3664 if (*i1 == -1) 3665 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3666 else 3667 ipp->ipp_hoplimit = *i1; 3668 ipp->ipp_fields |= IPPF_HOPLIMIT; 3669 } 3670 break; 3671 case IPV6_TCLASS: 3672 if (inlen != 0 && inlen != sizeof (int)) 3673 return (EINVAL); 3674 if (checkonly) 3675 break; 3676 3677 if (inlen == 0) { 3678 ipp->ipp_fields &= ~IPPF_TCLASS; 3679 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3680 } else { 3681 if (*i1 > 255 || *i1 < -1) 3682 return (EINVAL); 3683 if (*i1 == -1) 3684 ipp->ipp_tclass = 0; 3685 else 3686 ipp->ipp_tclass = *i1; 3687 ipp->ipp_fields |= IPPF_TCLASS; 3688 } 3689 if (sticky) { 3690 error = udp_build_hdrs(q, udp); 3691 if (error != 0) 3692 return (error); 3693 } 3694 break; 3695 case IPV6_NEXTHOP: 3696 /* 3697 * IP will verify that the nexthop is reachable 3698 * and fail for sticky options. 3699 */ 3700 if (inlen != 0 && inlen != sizeof (sin6_t)) 3701 return (EINVAL); 3702 if (checkonly) 3703 break; 3704 3705 if (inlen == 0) { 3706 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3707 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3708 } else { 3709 sin6_t *sin6 = (sin6_t *)invalp; 3710 3711 if (sin6->sin6_family != AF_INET6) 3712 return (EAFNOSUPPORT); 3713 if (IN6_IS_ADDR_V4MAPPED( 3714 &sin6->sin6_addr)) 3715 return (EADDRNOTAVAIL); 3716 ipp->ipp_nexthop = sin6->sin6_addr; 3717 if (!IN6_IS_ADDR_UNSPECIFIED( 3718 &ipp->ipp_nexthop)) 3719 ipp->ipp_fields |= IPPF_NEXTHOP; 3720 else 3721 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3722 } 3723 if (sticky) { 3724 error = udp_build_hdrs(q, udp); 3725 if (error != 0) 3726 return (error); 3727 } 3728 break; 3729 case IPV6_HOPOPTS: { 3730 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3731 /* 3732 * Sanity checks - minimum size, size a multiple of 3733 * eight bytes, and matching size passed in. 3734 */ 3735 if (inlen != 0 && 3736 inlen != (8 * (hopts->ip6h_len + 1))) 3737 return (EINVAL); 3738 3739 if (checkonly) 3740 break; 3741 3742 if (inlen == 0) { 3743 if (sticky && 3744 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3745 kmem_free(ipp->ipp_hopopts, 3746 ipp->ipp_hopoptslen); 3747 ipp->ipp_hopopts = NULL; 3748 ipp->ipp_hopoptslen = 0; 3749 } 3750 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3751 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3752 } else { 3753 error = udp_pkt_set(invalp, inlen, sticky, 3754 (uchar_t **)&ipp->ipp_hopopts, 3755 &ipp->ipp_hopoptslen); 3756 if (error != 0) 3757 return (error); 3758 ipp->ipp_fields |= IPPF_HOPOPTS; 3759 } 3760 if (sticky) { 3761 error = udp_build_hdrs(q, udp); 3762 if (error != 0) 3763 return (error); 3764 } 3765 break; 3766 } 3767 case IPV6_RTHDRDSTOPTS: { 3768 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3769 3770 /* 3771 * Sanity checks - minimum size, size a multiple of 3772 * eight bytes, and matching size passed in. 3773 */ 3774 if (inlen != 0 && 3775 inlen != (8 * (dopts->ip6d_len + 1))) 3776 return (EINVAL); 3777 3778 if (checkonly) 3779 break; 3780 3781 if (inlen == 0) { 3782 if (sticky && 3783 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3784 kmem_free(ipp->ipp_rtdstopts, 3785 ipp->ipp_rtdstoptslen); 3786 ipp->ipp_rtdstopts = NULL; 3787 ipp->ipp_rtdstoptslen = 0; 3788 } 3789 3790 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3791 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3792 } else { 3793 error = udp_pkt_set(invalp, inlen, sticky, 3794 (uchar_t **)&ipp->ipp_rtdstopts, 3795 &ipp->ipp_rtdstoptslen); 3796 if (error != 0) 3797 return (error); 3798 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3799 } 3800 if (sticky) { 3801 error = udp_build_hdrs(q, udp); 3802 if (error != 0) 3803 return (error); 3804 } 3805 break; 3806 } 3807 case IPV6_DSTOPTS: { 3808 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3809 3810 /* 3811 * Sanity checks - minimum size, size a multiple of 3812 * eight bytes, and matching size passed in. 3813 */ 3814 if (inlen != 0 && 3815 inlen != (8 * (dopts->ip6d_len + 1))) 3816 return (EINVAL); 3817 3818 if (checkonly) 3819 break; 3820 3821 if (inlen == 0) { 3822 if (sticky && 3823 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3824 kmem_free(ipp->ipp_dstopts, 3825 ipp->ipp_dstoptslen); 3826 ipp->ipp_dstopts = NULL; 3827 ipp->ipp_dstoptslen = 0; 3828 } 3829 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3830 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3831 } else { 3832 error = udp_pkt_set(invalp, inlen, sticky, 3833 (uchar_t **)&ipp->ipp_dstopts, 3834 &ipp->ipp_dstoptslen); 3835 if (error != 0) 3836 return (error); 3837 ipp->ipp_fields |= IPPF_DSTOPTS; 3838 } 3839 if (sticky) { 3840 error = udp_build_hdrs(q, udp); 3841 if (error != 0) 3842 return (error); 3843 } 3844 break; 3845 } 3846 case IPV6_RTHDR: { 3847 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3848 3849 /* 3850 * Sanity checks - minimum size, size a multiple of 3851 * eight bytes, and matching size passed in. 3852 */ 3853 if (inlen != 0 && 3854 inlen != (8 * (rt->ip6r_len + 1))) 3855 return (EINVAL); 3856 3857 if (checkonly) 3858 break; 3859 3860 if (inlen == 0) { 3861 if (sticky && 3862 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3863 kmem_free(ipp->ipp_rthdr, 3864 ipp->ipp_rthdrlen); 3865 ipp->ipp_rthdr = NULL; 3866 ipp->ipp_rthdrlen = 0; 3867 } 3868 ipp->ipp_fields &= ~IPPF_RTHDR; 3869 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3870 } else { 3871 error = udp_pkt_set(invalp, inlen, sticky, 3872 (uchar_t **)&ipp->ipp_rthdr, 3873 &ipp->ipp_rthdrlen); 3874 if (error != 0) 3875 return (error); 3876 ipp->ipp_fields |= IPPF_RTHDR; 3877 } 3878 if (sticky) { 3879 error = udp_build_hdrs(q, udp); 3880 if (error != 0) 3881 return (error); 3882 } 3883 break; 3884 } 3885 3886 case IPV6_DONTFRAG: 3887 if (checkonly) 3888 break; 3889 3890 if (onoff) { 3891 ipp->ipp_fields |= IPPF_DONTFRAG; 3892 } else { 3893 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3894 } 3895 break; 3896 3897 case IPV6_USE_MIN_MTU: 3898 if (inlen != sizeof (int)) 3899 return (EINVAL); 3900 3901 if (*i1 < -1 || *i1 > 1) 3902 return (EINVAL); 3903 3904 if (checkonly) 3905 break; 3906 3907 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3908 ipp->ipp_use_min_mtu = *i1; 3909 break; 3910 3911 case IPV6_BOUND_PIF: 3912 case IPV6_SEC_OPT: 3913 case IPV6_DONTFAILOVER_IF: 3914 case IPV6_SRC_PREFERENCES: 3915 case IPV6_V6ONLY: 3916 /* Handled at the IP level */ 3917 return (-EINVAL); 3918 default: 3919 *outlenp = 0; 3920 return (EINVAL); 3921 } 3922 break; 3923 } /* end IPPROTO_IPV6 */ 3924 case IPPROTO_UDP: 3925 switch (name) { 3926 case UDP_ANONPRIVBIND: 3927 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3928 *outlenp = 0; 3929 return (error); 3930 } 3931 if (!checkonly) { 3932 udp->udp_anon_priv_bind = onoff; 3933 } 3934 break; 3935 case UDP_EXCLBIND: 3936 if (!checkonly) 3937 udp->udp_exclbind = onoff; 3938 break; 3939 case UDP_RCVHDR: 3940 if (!checkonly) 3941 udp->udp_rcvhdr = onoff; 3942 break; 3943 default: 3944 *outlenp = 0; 3945 return (EINVAL); 3946 } 3947 break; 3948 default: 3949 *outlenp = 0; 3950 return (EINVAL); 3951 } 3952 /* 3953 * Common case of OK return with outval same as inval. 3954 */ 3955 if (invalp != outvalp) { 3956 /* don't trust bcopy for identical src/dst */ 3957 (void) bcopy(invalp, outvalp, inlen); 3958 } 3959 *outlenp = inlen; 3960 return (0); 3961 } 3962 3963 /* 3964 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3965 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3966 * headers, and the udp header. 3967 * Returns failure if can't allocate memory. 3968 */ 3969 static int 3970 udp_build_hdrs(queue_t *q, udp_t *udp) 3971 { 3972 uchar_t *hdrs; 3973 uint_t hdrs_len; 3974 ip6_t *ip6h; 3975 ip6i_t *ip6i; 3976 udpha_t *udpha; 3977 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3978 3979 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3980 ASSERT(hdrs_len != 0); 3981 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3982 /* Need to reallocate */ 3983 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3984 if (hdrs == NULL) 3985 return (ENOMEM); 3986 3987 if (udp->udp_sticky_hdrs_len != 0) { 3988 kmem_free(udp->udp_sticky_hdrs, 3989 udp->udp_sticky_hdrs_len); 3990 } 3991 udp->udp_sticky_hdrs = hdrs; 3992 udp->udp_sticky_hdrs_len = hdrs_len; 3993 } 3994 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3995 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3996 3997 /* Set header fields not in ipp */ 3998 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3999 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4000 ip6h = (ip6_t *)&ip6i[1]; 4001 } else { 4002 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4003 } 4004 4005 if (!(ipp->ipp_fields & IPPF_ADDR)) 4006 ip6h->ip6_src = udp->udp_v6src; 4007 4008 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4009 udpha->uha_src_port = udp->udp_port; 4010 4011 /* Try to get everything in a single mblk */ 4012 if (hdrs_len > udp->udp_max_hdr_len) { 4013 udp->udp_max_hdr_len = hdrs_len; 4014 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4015 udp_wroff_extra); 4016 } 4017 return (0); 4018 } 4019 4020 /* 4021 * Set optbuf and optlen for the option. 4022 * If sticky is set allocate memory (if not already present). 4023 * Otherwise just point optbuf and optlen at invalp and inlen. 4024 * Returns failure if memory can not be allocated. 4025 */ 4026 static int 4027 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4028 uchar_t **optbufp, uint_t *optlenp) 4029 { 4030 uchar_t *optbuf; 4031 4032 if (!sticky) { 4033 *optbufp = invalp; 4034 *optlenp = inlen; 4035 return (0); 4036 } 4037 if (inlen == *optlenp) { 4038 /* Unchanged length - no need to realocate */ 4039 bcopy(invalp, *optbufp, inlen); 4040 return (0); 4041 } 4042 if (inlen != 0) { 4043 /* Allocate new buffer before free */ 4044 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4045 if (optbuf == NULL) 4046 return (ENOMEM); 4047 } else { 4048 optbuf = NULL; 4049 } 4050 /* Free old buffer */ 4051 if (*optlenp != 0) 4052 kmem_free(*optbufp, *optlenp); 4053 4054 bcopy(invalp, optbuf, inlen); 4055 *optbufp = optbuf; 4056 *optlenp = inlen; 4057 return (0); 4058 } 4059 4060 /* 4061 * This routine retrieves the value of an ND variable in a udpparam_t 4062 * structure. It is called through nd_getset when a user reads the 4063 * variable. 4064 */ 4065 /* ARGSUSED */ 4066 static int 4067 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4068 { 4069 udpparam_t *udppa = (udpparam_t *)cp; 4070 4071 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4072 return (0); 4073 } 4074 4075 /* 4076 * Walk through the param array specified registering each element with the 4077 * named dispatch (ND) handler. 4078 */ 4079 static boolean_t 4080 udp_param_register(udpparam_t *udppa, int cnt) 4081 { 4082 for (; cnt-- > 0; udppa++) { 4083 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4084 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4085 udp_param_get, udp_param_set, 4086 (caddr_t)udppa)) { 4087 nd_free(&udp_g_nd); 4088 return (B_FALSE); 4089 } 4090 } 4091 } 4092 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4093 udp_extra_priv_ports_get, NULL, NULL)) { 4094 nd_free(&udp_g_nd); 4095 return (B_FALSE); 4096 } 4097 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4098 NULL, udp_extra_priv_ports_add, NULL)) { 4099 nd_free(&udp_g_nd); 4100 return (B_FALSE); 4101 } 4102 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4103 NULL, udp_extra_priv_ports_del, NULL)) { 4104 nd_free(&udp_g_nd); 4105 return (B_FALSE); 4106 } 4107 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4108 NULL)) { 4109 nd_free(&udp_g_nd); 4110 return (B_FALSE); 4111 } 4112 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4113 NULL)) { 4114 nd_free(&udp_g_nd); 4115 return (B_FALSE); 4116 } 4117 return (B_TRUE); 4118 } 4119 4120 /* This routine sets an ND variable in a udpparam_t structure. */ 4121 /* ARGSUSED */ 4122 static int 4123 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4124 { 4125 long new_value; 4126 udpparam_t *udppa = (udpparam_t *)cp; 4127 4128 /* 4129 * Fail the request if the new value does not lie within the 4130 * required bounds. 4131 */ 4132 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4133 new_value < udppa->udp_param_min || 4134 new_value > udppa->udp_param_max) { 4135 return (EINVAL); 4136 } 4137 4138 /* Set the new value */ 4139 udppa->udp_param_value = new_value; 4140 return (0); 4141 } 4142 4143 static void 4144 udp_input(conn_t *connp, mblk_t *mp) 4145 { 4146 struct T_unitdata_ind *tudi; 4147 uchar_t *rptr; /* Pointer to IP header */ 4148 int hdr_length; /* Length of IP+UDP headers */ 4149 int udi_size; /* Size of T_unitdata_ind */ 4150 int mp_len; 4151 udp_t *udp; 4152 udpha_t *udpha; 4153 int ipversion; 4154 ip6_pkt_t ipp; 4155 ip6_t *ip6h; 4156 ip6i_t *ip6i; 4157 mblk_t *mp1; 4158 mblk_t *options_mp = NULL; 4159 in_pktinfo_t *pinfo = NULL; 4160 cred_t *cr = NULL; 4161 queue_t *q = connp->conn_rq; 4162 pid_t cpid; 4163 4164 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4165 "udp_rput_start: q %p mp %p", q, mp); 4166 4167 udp = connp->conn_udp; 4168 rptr = mp->b_rptr; 4169 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4170 ASSERT(OK_32PTR(rptr)); 4171 4172 /* 4173 * IP should have prepended the options data in an M_CTL 4174 * Check M_CTL "type" to make sure are not here bcos of 4175 * a valid ICMP message 4176 */ 4177 if (DB_TYPE(mp) == M_CTL) { 4178 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4179 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4180 IN_PKTINFO) { 4181 /* 4182 * IP_RECVIF or IP_RECVSLLA information has been 4183 * appended to the packet by IP. We need to 4184 * extract the mblk and adjust the rptr 4185 */ 4186 pinfo = (in_pktinfo_t *)mp->b_rptr; 4187 options_mp = mp; 4188 mp = mp->b_cont; 4189 rptr = mp->b_rptr; 4190 UDP_STAT(udp_in_pktinfo); 4191 } else { 4192 /* 4193 * ICMP messages. 4194 */ 4195 udp_icmp_error(q, mp); 4196 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4197 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4198 return; 4199 } 4200 } 4201 4202 mp_len = msgdsize(mp); 4203 /* 4204 * This is the inbound data path. 4205 * First, we check to make sure the IP version number is correct, 4206 * and then pull the IP and UDP headers into the first mblk. 4207 * Assume IP provides aligned packets - otherwise toss. 4208 * Also, check if we have a complete IP header. 4209 */ 4210 4211 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4212 ipp.ipp_fields = 0; 4213 4214 ipversion = IPH_HDR_VERSION(rptr); 4215 switch (ipversion) { 4216 case IPV4_VERSION: 4217 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4218 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4219 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4220 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4221 (udp->udp_ip_rcv_options_len)) { 4222 /* 4223 * Handle IPv4 packets with options outside of the 4224 * main data path. Not needed for AF_INET6 sockets 4225 * since they don't support a getsockopt of IP_OPTIONS. 4226 */ 4227 if (udp->udp_family == AF_INET6) 4228 break; 4229 /* 4230 * UDP length check performed for IPv4 packets with 4231 * options to check whether UDP length specified in 4232 * the header is the same as the physical length of 4233 * the packet. 4234 */ 4235 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4236 if (mp_len != (ntohs(udpha->uha_length) + 4237 hdr_length - UDPH_SIZE)) { 4238 goto tossit; 4239 } 4240 /* 4241 * Handle the case where the packet has IP options 4242 * and the IP_RECVSLLA & IP_RECVIF are set 4243 */ 4244 if (pinfo != NULL) 4245 mp = options_mp; 4246 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4247 SQTAG_UDP_INPUT); 4248 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4249 "udp_rput_end: q %p (%S)", q, "end"); 4250 return; 4251 } 4252 4253 /* Handle IPV6_RECVHOPLIMIT. */ 4254 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4255 udp->udp_ipv6_recvpktinfo) { 4256 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4257 ipp.ipp_fields |= IPPF_IFINDEX; 4258 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4259 } 4260 } 4261 break; 4262 case IPV6_VERSION: 4263 /* 4264 * IPv6 packets can only be received by applications 4265 * that are prepared to receive IPv6 addresses. 4266 * The IP fanout must ensure this. 4267 */ 4268 ASSERT(udp->udp_family == AF_INET6); 4269 4270 ip6h = (ip6_t *)rptr; 4271 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4272 4273 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4274 uint8_t nexthdrp; 4275 /* Look for ifindex information */ 4276 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4277 ip6i = (ip6i_t *)ip6h; 4278 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4279 goto tossit; 4280 4281 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4282 ASSERT(ip6i->ip6i_ifindex != 0); 4283 ipp.ipp_fields |= IPPF_IFINDEX; 4284 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4285 } 4286 rptr = (uchar_t *)&ip6i[1]; 4287 mp->b_rptr = rptr; 4288 if (rptr == mp->b_wptr) { 4289 mp1 = mp->b_cont; 4290 freeb(mp); 4291 mp = mp1; 4292 rptr = mp->b_rptr; 4293 } 4294 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4295 goto tossit; 4296 ip6h = (ip6_t *)rptr; 4297 mp_len = msgdsize(mp); 4298 } 4299 /* 4300 * Find any potentially interesting extension headers 4301 * as well as the length of the IPv6 + extension 4302 * headers. 4303 */ 4304 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4305 UDPH_SIZE; 4306 ASSERT(nexthdrp == IPPROTO_UDP); 4307 } else { 4308 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4309 ip6i = NULL; 4310 } 4311 break; 4312 default: 4313 ASSERT(0); 4314 } 4315 4316 /* 4317 * IP inspected the UDP header thus all of it must be in the mblk. 4318 * UDP length check is performed for IPv6 packets and IPv4 packets 4319 * without options to check if the size of the packet as specified 4320 * by the header is the same as the physical size of the packet. 4321 */ 4322 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4323 if ((MBLKL(mp) < hdr_length) || 4324 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4325 goto tossit; 4326 } 4327 4328 /* Walk past the headers. */ 4329 if (!udp->udp_rcvhdr) { 4330 mp->b_rptr = rptr + hdr_length; 4331 mp_len -= hdr_length; 4332 } 4333 4334 /* 4335 * This is the inbound data path. Packets are passed upstream as 4336 * T_UNITDATA_IND messages with full IP headers still attached. 4337 */ 4338 if (udp->udp_family == AF_INET) { 4339 sin_t *sin; 4340 4341 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4342 4343 /* 4344 * Normally only send up the address. 4345 * If IP_RECVDSTADDR is set we include the destination IP 4346 * address as an option. With IP_RECVOPTS we include all 4347 * the IP options. Only ip_rput_other() handles packets 4348 * that contain IP options. 4349 */ 4350 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4351 if (udp->udp_recvdstaddr) { 4352 udi_size += sizeof (struct T_opthdr) + 4353 sizeof (struct in_addr); 4354 UDP_STAT(udp_in_recvdstaddr); 4355 } 4356 4357 /* 4358 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4359 * space accordingly 4360 */ 4361 if (udp->udp_recvif && (pinfo != NULL) && 4362 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4363 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4364 UDP_STAT(udp_in_recvif); 4365 } 4366 4367 if (udp->udp_recvslla && (pinfo != NULL) && 4368 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4369 udi_size += sizeof (struct T_opthdr) + 4370 sizeof (struct sockaddr_dl); 4371 UDP_STAT(udp_in_recvslla); 4372 } 4373 4374 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4375 udi_size += sizeof (struct T_opthdr) + ucredsize; 4376 cpid = DB_CPID(mp); 4377 UDP_STAT(udp_in_recvucred); 4378 } 4379 /* 4380 * If IP_RECVTTL is set allocate the appropriate sized buffer 4381 */ 4382 if (udp->udp_recvttl) { 4383 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4384 UDP_STAT(udp_in_recvttl); 4385 } 4386 /* 4387 * If SO_TIMESTAMP is set allocate the appropriate sized 4388 * buffer. Since gethrestime() expects a pointer aligned 4389 * argument, we allocate space necessary for extra 4390 * alignment (even though it might not be used). 4391 */ 4392 if (udp->udp_timestamp) { 4393 udi_size += sizeof (struct T_opthdr) + 4394 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4395 UDP_STAT(udp_in_timestamp); 4396 } 4397 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4398 4399 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4400 mp1 = allocb(udi_size, BPRI_MED); 4401 if (mp1 == NULL) { 4402 freemsg(mp); 4403 if (options_mp != NULL) 4404 freeb(options_mp); 4405 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4406 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4407 BUMP_MIB(&udp_mib, udpInErrors); 4408 return; 4409 } 4410 mp1->b_cont = mp; 4411 mp = mp1; 4412 mp->b_datap->db_type = M_PROTO; 4413 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4414 mp->b_wptr = (uchar_t *)tudi + udi_size; 4415 tudi->PRIM_type = T_UNITDATA_IND; 4416 tudi->SRC_length = sizeof (sin_t); 4417 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4418 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4419 sizeof (sin_t); 4420 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4421 tudi->OPT_length = udi_size; 4422 sin = (sin_t *)&tudi[1]; 4423 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4424 sin->sin_port = udpha->uha_src_port; 4425 sin->sin_family = udp->udp_family; 4426 *(uint32_t *)&sin->sin_zero[0] = 0; 4427 *(uint32_t *)&sin->sin_zero[4] = 0; 4428 4429 /* 4430 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4431 * IP_RECVTTL has been set. 4432 */ 4433 if (udi_size != 0) { 4434 /* 4435 * Copy in destination address before options to avoid 4436 * any padding issues. 4437 */ 4438 char *dstopt; 4439 4440 dstopt = (char *)&sin[1]; 4441 if (udp->udp_recvdstaddr) { 4442 struct T_opthdr *toh; 4443 ipaddr_t *dstptr; 4444 4445 toh = (struct T_opthdr *)dstopt; 4446 toh->level = IPPROTO_IP; 4447 toh->name = IP_RECVDSTADDR; 4448 toh->len = sizeof (struct T_opthdr) + 4449 sizeof (ipaddr_t); 4450 toh->status = 0; 4451 dstopt += sizeof (struct T_opthdr); 4452 dstptr = (ipaddr_t *)dstopt; 4453 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4454 dstopt += sizeof (ipaddr_t); 4455 udi_size -= toh->len; 4456 } 4457 4458 if (udp->udp_recvslla && (pinfo != NULL) && 4459 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4460 4461 struct T_opthdr *toh; 4462 struct sockaddr_dl *dstptr; 4463 4464 toh = (struct T_opthdr *)dstopt; 4465 toh->level = IPPROTO_IP; 4466 toh->name = IP_RECVSLLA; 4467 toh->len = sizeof (struct T_opthdr) + 4468 sizeof (struct sockaddr_dl); 4469 toh->status = 0; 4470 dstopt += sizeof (struct T_opthdr); 4471 dstptr = (struct sockaddr_dl *)dstopt; 4472 bcopy(&pinfo->in_pkt_slla, dstptr, 4473 sizeof (struct sockaddr_dl)); 4474 dstopt += sizeof (struct sockaddr_dl); 4475 udi_size -= toh->len; 4476 } 4477 4478 if (udp->udp_recvif && (pinfo != NULL) && 4479 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4480 4481 struct T_opthdr *toh; 4482 uint_t *dstptr; 4483 4484 toh = (struct T_opthdr *)dstopt; 4485 toh->level = IPPROTO_IP; 4486 toh->name = IP_RECVIF; 4487 toh->len = sizeof (struct T_opthdr) + 4488 sizeof (uint_t); 4489 toh->status = 0; 4490 dstopt += sizeof (struct T_opthdr); 4491 dstptr = (uint_t *)dstopt; 4492 *dstptr = pinfo->in_pkt_ifindex; 4493 dstopt += sizeof (uint_t); 4494 udi_size -= toh->len; 4495 } 4496 4497 if (cr != NULL) { 4498 struct T_opthdr *toh; 4499 4500 toh = (struct T_opthdr *)dstopt; 4501 toh->level = SOL_SOCKET; 4502 toh->name = SCM_UCRED; 4503 toh->len = sizeof (struct T_opthdr) + ucredsize; 4504 toh->status = 0; 4505 (void) cred2ucred(cr, cpid, &toh[1]); 4506 dstopt += toh->len; 4507 udi_size -= toh->len; 4508 } 4509 4510 if (udp->udp_recvttl) { 4511 struct T_opthdr *toh; 4512 uint8_t *dstptr; 4513 4514 toh = (struct T_opthdr *)dstopt; 4515 toh->level = IPPROTO_IP; 4516 toh->name = IP_RECVTTL; 4517 toh->len = sizeof (struct T_opthdr) + 4518 sizeof (uint8_t); 4519 toh->status = 0; 4520 dstopt += sizeof (struct T_opthdr); 4521 dstptr = (uint8_t *)dstopt; 4522 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4523 dstopt += sizeof (uint8_t); 4524 udi_size -= toh->len; 4525 } 4526 if (udp->udp_timestamp) { 4527 struct T_opthdr *toh; 4528 4529 toh = (struct T_opthdr *)dstopt; 4530 toh->level = SOL_SOCKET; 4531 toh->name = SCM_TIMESTAMP; 4532 toh->len = sizeof (struct T_opthdr) + 4533 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4534 toh->status = 0; 4535 dstopt += sizeof (struct T_opthdr); 4536 /* Align for gethrestime() */ 4537 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4538 sizeof (intptr_t)); 4539 gethrestime((timestruc_t *)dstopt); 4540 dstopt += sizeof (timestruc_t); 4541 udi_size -= toh->len; 4542 } 4543 4544 /* Consumed all of allocated space */ 4545 ASSERT(udi_size == 0); 4546 } 4547 } else { 4548 sin6_t *sin6; 4549 4550 /* 4551 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4552 * 4553 * Normally we only send up the address. If receiving of any 4554 * optional receive side information is enabled, we also send 4555 * that up as options. 4556 * [ Only udp_rput_other() handles packets that contain IP 4557 * options so code to account for does not appear immediately 4558 * below but elsewhere ] 4559 */ 4560 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4561 4562 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4563 IPPF_RTHDR|IPPF_IFINDEX)) { 4564 if (udp->udp_ipv6_recvhopopts && 4565 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4566 udi_size += sizeof (struct T_opthdr) + 4567 ipp.ipp_hopoptslen; 4568 UDP_STAT(udp_in_recvhopopts); 4569 } 4570 if ((udp->udp_ipv6_recvdstopts || 4571 udp->udp_old_ipv6_recvdstopts) && 4572 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4573 udi_size += sizeof (struct T_opthdr) + 4574 ipp.ipp_dstoptslen; 4575 UDP_STAT(udp_in_recvdstopts); 4576 } 4577 if (((udp->udp_ipv6_recvdstopts && 4578 udp->udp_ipv6_recvrthdr && 4579 (ipp.ipp_fields & IPPF_RTHDR)) || 4580 udp->udp_ipv6_recvrthdrdstopts) && 4581 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4582 udi_size += sizeof (struct T_opthdr) + 4583 ipp.ipp_rtdstoptslen; 4584 UDP_STAT(udp_in_recvrtdstopts); 4585 } 4586 if (udp->udp_ipv6_recvrthdr && 4587 (ipp.ipp_fields & IPPF_RTHDR)) { 4588 udi_size += sizeof (struct T_opthdr) + 4589 ipp.ipp_rthdrlen; 4590 UDP_STAT(udp_in_recvrthdr); 4591 } 4592 if (udp->udp_ipv6_recvpktinfo && 4593 (ipp.ipp_fields & IPPF_IFINDEX)) { 4594 udi_size += sizeof (struct T_opthdr) + 4595 sizeof (struct in6_pktinfo); 4596 UDP_STAT(udp_in_recvpktinfo); 4597 } 4598 4599 } 4600 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4601 udi_size += sizeof (struct T_opthdr) + ucredsize; 4602 cpid = DB_CPID(mp); 4603 UDP_STAT(udp_in_recvucred); 4604 } 4605 4606 if (udp->udp_ipv6_recvhoplimit) { 4607 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4608 UDP_STAT(udp_in_recvhoplimit); 4609 } 4610 4611 if (udp->udp_ipv6_recvtclass) { 4612 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4613 UDP_STAT(udp_in_recvtclass); 4614 } 4615 4616 mp1 = allocb(udi_size, BPRI_MED); 4617 if (mp1 == NULL) { 4618 freemsg(mp); 4619 if (options_mp != NULL) 4620 freeb(options_mp); 4621 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4622 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4623 BUMP_MIB(&udp_mib, udpInErrors); 4624 return; 4625 } 4626 mp1->b_cont = mp; 4627 mp = mp1; 4628 mp->b_datap->db_type = M_PROTO; 4629 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4630 mp->b_wptr = (uchar_t *)tudi + udi_size; 4631 tudi->PRIM_type = T_UNITDATA_IND; 4632 tudi->SRC_length = sizeof (sin6_t); 4633 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4634 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4635 sizeof (sin6_t); 4636 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4637 tudi->OPT_length = udi_size; 4638 sin6 = (sin6_t *)&tudi[1]; 4639 if (ipversion == IPV4_VERSION) { 4640 in6_addr_t v6dst; 4641 4642 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4643 &sin6->sin6_addr); 4644 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4645 &v6dst); 4646 sin6->sin6_flowinfo = 0; 4647 sin6->sin6_scope_id = 0; 4648 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4649 connp->conn_zoneid); 4650 } else { 4651 sin6->sin6_addr = ip6h->ip6_src; 4652 /* No sin6_flowinfo per API */ 4653 sin6->sin6_flowinfo = 0; 4654 /* For link-scope source pass up scope id */ 4655 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4656 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4657 sin6->sin6_scope_id = ipp.ipp_ifindex; 4658 else 4659 sin6->sin6_scope_id = 0; 4660 sin6->__sin6_src_id = ip_srcid_find_addr( 4661 &ip6h->ip6_dst, connp->conn_zoneid); 4662 } 4663 sin6->sin6_port = udpha->uha_src_port; 4664 sin6->sin6_family = udp->udp_family; 4665 4666 if (udi_size != 0) { 4667 uchar_t *dstopt; 4668 4669 dstopt = (uchar_t *)&sin6[1]; 4670 if (udp->udp_ipv6_recvpktinfo && 4671 (ipp.ipp_fields & IPPF_IFINDEX)) { 4672 struct T_opthdr *toh; 4673 struct in6_pktinfo *pkti; 4674 4675 toh = (struct T_opthdr *)dstopt; 4676 toh->level = IPPROTO_IPV6; 4677 toh->name = IPV6_PKTINFO; 4678 toh->len = sizeof (struct T_opthdr) + 4679 sizeof (*pkti); 4680 toh->status = 0; 4681 dstopt += sizeof (struct T_opthdr); 4682 pkti = (struct in6_pktinfo *)dstopt; 4683 if (ipversion == IPV6_VERSION) 4684 pkti->ipi6_addr = ip6h->ip6_dst; 4685 else 4686 IN6_IPADDR_TO_V4MAPPED( 4687 ((ipha_t *)rptr)->ipha_dst, 4688 &pkti->ipi6_addr); 4689 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4690 dstopt += sizeof (*pkti); 4691 udi_size -= toh->len; 4692 } 4693 if (udp->udp_ipv6_recvhoplimit) { 4694 struct T_opthdr *toh; 4695 4696 toh = (struct T_opthdr *)dstopt; 4697 toh->level = IPPROTO_IPV6; 4698 toh->name = IPV6_HOPLIMIT; 4699 toh->len = sizeof (struct T_opthdr) + 4700 sizeof (uint_t); 4701 toh->status = 0; 4702 dstopt += sizeof (struct T_opthdr); 4703 if (ipversion == IPV6_VERSION) 4704 *(uint_t *)dstopt = ip6h->ip6_hops; 4705 else 4706 *(uint_t *)dstopt = 4707 ((ipha_t *)rptr)->ipha_ttl; 4708 dstopt += sizeof (uint_t); 4709 udi_size -= toh->len; 4710 } 4711 if (udp->udp_ipv6_recvtclass) { 4712 struct T_opthdr *toh; 4713 4714 toh = (struct T_opthdr *)dstopt; 4715 toh->level = IPPROTO_IPV6; 4716 toh->name = IPV6_TCLASS; 4717 toh->len = sizeof (struct T_opthdr) + 4718 sizeof (uint_t); 4719 toh->status = 0; 4720 dstopt += sizeof (struct T_opthdr); 4721 if (ipversion == IPV6_VERSION) { 4722 *(uint_t *)dstopt = 4723 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4724 } else { 4725 ipha_t *ipha = (ipha_t *)rptr; 4726 *(uint_t *)dstopt = 4727 ipha->ipha_type_of_service; 4728 } 4729 dstopt += sizeof (uint_t); 4730 udi_size -= toh->len; 4731 } 4732 if (udp->udp_ipv6_recvhopopts && 4733 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4734 struct T_opthdr *toh; 4735 4736 toh = (struct T_opthdr *)dstopt; 4737 toh->level = IPPROTO_IPV6; 4738 toh->name = IPV6_HOPOPTS; 4739 toh->len = sizeof (struct T_opthdr) + 4740 ipp.ipp_hopoptslen; 4741 toh->status = 0; 4742 dstopt += sizeof (struct T_opthdr); 4743 bcopy(ipp.ipp_hopopts, dstopt, 4744 ipp.ipp_hopoptslen); 4745 dstopt += ipp.ipp_hopoptslen; 4746 udi_size -= toh->len; 4747 } 4748 if (udp->udp_ipv6_recvdstopts && 4749 udp->udp_ipv6_recvrthdr && 4750 (ipp.ipp_fields & IPPF_RTHDR) && 4751 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4752 struct T_opthdr *toh; 4753 4754 toh = (struct T_opthdr *)dstopt; 4755 toh->level = IPPROTO_IPV6; 4756 toh->name = IPV6_DSTOPTS; 4757 toh->len = sizeof (struct T_opthdr) + 4758 ipp.ipp_rtdstoptslen; 4759 toh->status = 0; 4760 dstopt += sizeof (struct T_opthdr); 4761 bcopy(ipp.ipp_rtdstopts, dstopt, 4762 ipp.ipp_rtdstoptslen); 4763 dstopt += ipp.ipp_rtdstoptslen; 4764 udi_size -= toh->len; 4765 } 4766 if (udp->udp_ipv6_recvrthdr && 4767 (ipp.ipp_fields & IPPF_RTHDR)) { 4768 struct T_opthdr *toh; 4769 4770 toh = (struct T_opthdr *)dstopt; 4771 toh->level = IPPROTO_IPV6; 4772 toh->name = IPV6_RTHDR; 4773 toh->len = sizeof (struct T_opthdr) + 4774 ipp.ipp_rthdrlen; 4775 toh->status = 0; 4776 dstopt += sizeof (struct T_opthdr); 4777 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4778 dstopt += ipp.ipp_rthdrlen; 4779 udi_size -= toh->len; 4780 } 4781 if (udp->udp_ipv6_recvdstopts && 4782 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4783 struct T_opthdr *toh; 4784 4785 toh = (struct T_opthdr *)dstopt; 4786 toh->level = IPPROTO_IPV6; 4787 toh->name = IPV6_DSTOPTS; 4788 toh->len = sizeof (struct T_opthdr) + 4789 ipp.ipp_dstoptslen; 4790 toh->status = 0; 4791 dstopt += sizeof (struct T_opthdr); 4792 bcopy(ipp.ipp_dstopts, dstopt, 4793 ipp.ipp_dstoptslen); 4794 dstopt += ipp.ipp_dstoptslen; 4795 udi_size -= toh->len; 4796 } 4797 4798 if (cr != NULL) { 4799 struct T_opthdr *toh; 4800 4801 toh = (struct T_opthdr *)dstopt; 4802 toh->level = SOL_SOCKET; 4803 toh->name = SCM_UCRED; 4804 toh->len = sizeof (struct T_opthdr) + ucredsize; 4805 toh->status = 0; 4806 (void) cred2ucred(cr, cpid, &toh[1]); 4807 dstopt += toh->len; 4808 udi_size -= toh->len; 4809 } 4810 /* Consumed all of allocated space */ 4811 ASSERT(udi_size == 0); 4812 } 4813 #undef sin6 4814 /* No IP_RECVDSTADDR for IPv6. */ 4815 } 4816 4817 BUMP_MIB(&udp_mib, udpInDatagrams); 4818 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4819 "udp_rput_end: q %p (%S)", q, "end"); 4820 if (options_mp != NULL) 4821 freeb(options_mp); 4822 4823 if (udp->udp_direct_sockfs) { 4824 /* 4825 * There is nothing above us except for the stream head; 4826 * use the read-side synchronous stream interface in 4827 * order to reduce the time spent in interrupt thread. 4828 */ 4829 ASSERT(udp->udp_issocket); 4830 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4831 } else { 4832 /* 4833 * Use regular STREAMS interface to pass data upstream 4834 * if this is not a socket endpoint, or if we have 4835 * switched over to the slow mode due to sockmod being 4836 * popped or a module being pushed on top of us. 4837 */ 4838 putnext(UDP_RD(q), mp); 4839 } 4840 return; 4841 4842 tossit: 4843 freemsg(mp); 4844 if (options_mp != NULL) 4845 freeb(options_mp); 4846 BUMP_MIB(&udp_mib, udpInErrors); 4847 } 4848 4849 void 4850 udp_conn_recv(conn_t *connp, mblk_t *mp) 4851 { 4852 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4853 } 4854 4855 /* ARGSUSED */ 4856 static void 4857 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4858 { 4859 udp_input((conn_t *)arg, mp); 4860 _UDP_EXIT((conn_t *)arg); 4861 } 4862 4863 /* 4864 * Process non-M_DATA messages as well as M_DATA messages that requires 4865 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4866 */ 4867 static void 4868 udp_rput_other(queue_t *q, mblk_t *mp) 4869 { 4870 struct T_unitdata_ind *tudi; 4871 mblk_t *mp1; 4872 uchar_t *rptr; 4873 uchar_t *new_rptr; 4874 int hdr_length; 4875 int udi_size; /* Size of T_unitdata_ind */ 4876 int opt_len; /* Length of IP options */ 4877 sin_t *sin; 4878 struct T_error_ack *tea; 4879 mblk_t *options_mp = NULL; 4880 in_pktinfo_t *pinfo; 4881 boolean_t recv_on = B_FALSE; 4882 cred_t *cr = NULL; 4883 udp_t *udp = Q_TO_UDP(q); 4884 pid_t cpid; 4885 4886 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4887 "udp_rput_other: q %p mp %p", q, mp); 4888 4889 ASSERT(OK_32PTR(mp->b_rptr)); 4890 rptr = mp->b_rptr; 4891 4892 switch (mp->b_datap->db_type) { 4893 case M_CTL: 4894 /* 4895 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4896 */ 4897 recv_on = B_TRUE; 4898 options_mp = mp; 4899 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4900 4901 /* 4902 * The actual data is in mp->b_cont 4903 */ 4904 mp = mp->b_cont; 4905 ASSERT(OK_32PTR(mp->b_rptr)); 4906 rptr = mp->b_rptr; 4907 break; 4908 case M_DATA: 4909 /* 4910 * M_DATA messages contain IPv4 datagrams. They are handled 4911 * after this switch. 4912 */ 4913 break; 4914 case M_PROTO: 4915 case M_PCPROTO: 4916 /* M_PROTO messages contain some type of TPI message. */ 4917 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4918 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4919 freemsg(mp); 4920 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4921 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4922 return; 4923 } 4924 tea = (struct T_error_ack *)rptr; 4925 4926 switch (tea->PRIM_type) { 4927 case T_ERROR_ACK: 4928 switch (tea->ERROR_prim) { 4929 case O_T_BIND_REQ: 4930 case T_BIND_REQ: { 4931 /* 4932 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4933 * clear out the associated port and source 4934 * address before passing the message 4935 * upstream. If this was caused by a T_CONN_REQ 4936 * revert back to bound state. 4937 */ 4938 udp_fanout_t *udpf; 4939 4940 udpf = &udp_bind_fanout[ 4941 UDP_BIND_HASH(udp->udp_port)]; 4942 mutex_enter(&udpf->uf_lock); 4943 if (udp->udp_state == TS_DATA_XFER) { 4944 /* Connect failed */ 4945 tea->ERROR_prim = T_CONN_REQ; 4946 /* Revert back to the bound source */ 4947 udp->udp_v6src = udp->udp_bound_v6src; 4948 udp->udp_state = TS_IDLE; 4949 mutex_exit(&udpf->uf_lock); 4950 if (udp->udp_family == AF_INET6) 4951 (void) udp_build_hdrs(q, udp); 4952 break; 4953 } 4954 4955 if (udp->udp_discon_pending) { 4956 tea->ERROR_prim = T_DISCON_REQ; 4957 udp->udp_discon_pending = 0; 4958 } 4959 V6_SET_ZERO(udp->udp_v6src); 4960 V6_SET_ZERO(udp->udp_bound_v6src); 4961 udp->udp_state = TS_UNBND; 4962 udp_bind_hash_remove(udp, B_TRUE); 4963 udp->udp_port = 0; 4964 mutex_exit(&udpf->uf_lock); 4965 if (udp->udp_family == AF_INET6) 4966 (void) udp_build_hdrs(q, udp); 4967 break; 4968 } 4969 default: 4970 break; 4971 } 4972 break; 4973 case T_BIND_ACK: 4974 udp_rput_bind_ack(q, mp); 4975 return; 4976 4977 case T_OPTMGMT_ACK: 4978 case T_OK_ACK: 4979 break; 4980 default: 4981 freemsg(mp); 4982 return; 4983 } 4984 putnext(UDP_RD(q), mp); 4985 return; 4986 } 4987 4988 /* 4989 * This is the inbound data path. 4990 * First, we make sure the data contains both IP and UDP headers. 4991 * 4992 * This handle IPv4 packets for only AF_INET sockets. 4993 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4994 * is no need saving the options. 4995 */ 4996 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4997 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4998 if (mp->b_wptr - rptr < hdr_length) { 4999 if (!pullupmsg(mp, hdr_length)) { 5000 freemsg(mp); 5001 if (options_mp != NULL) 5002 freeb(options_mp); 5003 BUMP_MIB(&udp_mib, udpInErrors); 5004 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5005 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5006 BUMP_MIB(&udp_mib, udpInErrors); 5007 return; 5008 } 5009 rptr = mp->b_rptr; 5010 } 5011 /* Walk past the headers. */ 5012 new_rptr = rptr + hdr_length; 5013 if (!udp->udp_rcvhdr) 5014 mp->b_rptr = new_rptr; 5015 5016 /* Save the options if any */ 5017 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5018 if (opt_len > 0) { 5019 if (opt_len > udp->udp_ip_rcv_options_len) { 5020 if (udp->udp_ip_rcv_options_len) 5021 mi_free((char *)udp->udp_ip_rcv_options); 5022 udp->udp_ip_rcv_options_len = 0; 5023 udp->udp_ip_rcv_options = 5024 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5025 if (udp->udp_ip_rcv_options) 5026 udp->udp_ip_rcv_options_len = opt_len; 5027 } 5028 if (udp->udp_ip_rcv_options_len) { 5029 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5030 udp->udp_ip_rcv_options, opt_len); 5031 /* Adjust length if we are resusing the space */ 5032 udp->udp_ip_rcv_options_len = opt_len; 5033 } 5034 } else if (udp->udp_ip_rcv_options_len) { 5035 mi_free((char *)udp->udp_ip_rcv_options); 5036 udp->udp_ip_rcv_options = NULL; 5037 udp->udp_ip_rcv_options_len = 0; 5038 } 5039 5040 /* 5041 * Normally only send up the address. 5042 * If IP_RECVDSTADDR is set we include the destination IP 5043 * address as an option. With IP_RECVOPTS we include all 5044 * the IP options. 5045 */ 5046 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5047 if (udp->udp_recvdstaddr) { 5048 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5049 UDP_STAT(udp_in_recvdstaddr); 5050 } 5051 if (udp->udp_recvopts && opt_len > 0) { 5052 udi_size += sizeof (struct T_opthdr) + opt_len; 5053 UDP_STAT(udp_in_recvopts); 5054 } 5055 5056 /* 5057 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5058 * space accordingly 5059 */ 5060 if (udp->udp_recvif && recv_on && 5061 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5062 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5063 UDP_STAT(udp_in_recvif); 5064 } 5065 5066 if (udp->udp_recvslla && recv_on && 5067 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5068 udi_size += sizeof (struct T_opthdr) + 5069 sizeof (struct sockaddr_dl); 5070 UDP_STAT(udp_in_recvslla); 5071 } 5072 5073 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5074 udi_size += sizeof (struct T_opthdr) + ucredsize; 5075 cpid = DB_CPID(mp); 5076 UDP_STAT(udp_in_recvucred); 5077 } 5078 /* 5079 * If IP_RECVTTL is set allocate the appropriate sized buffer 5080 */ 5081 if (udp->udp_recvttl) { 5082 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5083 UDP_STAT(udp_in_recvttl); 5084 } 5085 5086 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5087 mp1 = allocb(udi_size, BPRI_MED); 5088 if (mp1 == NULL) { 5089 freemsg(mp); 5090 if (options_mp != NULL) 5091 freeb(options_mp); 5092 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5093 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5094 BUMP_MIB(&udp_mib, udpInErrors); 5095 return; 5096 } 5097 mp1->b_cont = mp; 5098 mp = mp1; 5099 mp->b_datap->db_type = M_PROTO; 5100 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5101 mp->b_wptr = (uchar_t *)tudi + udi_size; 5102 tudi->PRIM_type = T_UNITDATA_IND; 5103 tudi->SRC_length = sizeof (sin_t); 5104 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5105 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5106 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5107 tudi->OPT_length = udi_size; 5108 5109 sin = (sin_t *)&tudi[1]; 5110 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5111 sin->sin_port = ((in_port_t *) 5112 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5113 sin->sin_family = AF_INET; 5114 *(uint32_t *)&sin->sin_zero[0] = 0; 5115 *(uint32_t *)&sin->sin_zero[4] = 0; 5116 5117 /* 5118 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5119 * IP_RECVTTL has been set. 5120 */ 5121 if (udi_size != 0) { 5122 /* 5123 * Copy in destination address before options to avoid any 5124 * padding issues. 5125 */ 5126 char *dstopt; 5127 5128 dstopt = (char *)&sin[1]; 5129 if (udp->udp_recvdstaddr) { 5130 struct T_opthdr *toh; 5131 ipaddr_t *dstptr; 5132 5133 toh = (struct T_opthdr *)dstopt; 5134 toh->level = IPPROTO_IP; 5135 toh->name = IP_RECVDSTADDR; 5136 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5137 toh->status = 0; 5138 dstopt += sizeof (struct T_opthdr); 5139 dstptr = (ipaddr_t *)dstopt; 5140 *dstptr = (((ipaddr_t *)rptr)[4]); 5141 dstopt += sizeof (ipaddr_t); 5142 udi_size -= toh->len; 5143 } 5144 if (udp->udp_recvopts && udi_size != 0) { 5145 struct T_opthdr *toh; 5146 5147 toh = (struct T_opthdr *)dstopt; 5148 toh->level = IPPROTO_IP; 5149 toh->name = IP_RECVOPTS; 5150 toh->len = sizeof (struct T_opthdr) + opt_len; 5151 toh->status = 0; 5152 dstopt += sizeof (struct T_opthdr); 5153 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5154 dstopt += opt_len; 5155 udi_size -= toh->len; 5156 } 5157 5158 if (udp->udp_recvslla && recv_on && 5159 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5160 5161 struct T_opthdr *toh; 5162 struct sockaddr_dl *dstptr; 5163 5164 toh = (struct T_opthdr *)dstopt; 5165 toh->level = IPPROTO_IP; 5166 toh->name = IP_RECVSLLA; 5167 toh->len = sizeof (struct T_opthdr) + 5168 sizeof (struct sockaddr_dl); 5169 toh->status = 0; 5170 dstopt += sizeof (struct T_opthdr); 5171 dstptr = (struct sockaddr_dl *)dstopt; 5172 bcopy(&pinfo->in_pkt_slla, dstptr, 5173 sizeof (struct sockaddr_dl)); 5174 dstopt += sizeof (struct sockaddr_dl); 5175 udi_size -= toh->len; 5176 } 5177 5178 if (udp->udp_recvif && recv_on && 5179 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5180 5181 struct T_opthdr *toh; 5182 uint_t *dstptr; 5183 5184 toh = (struct T_opthdr *)dstopt; 5185 toh->level = IPPROTO_IP; 5186 toh->name = IP_RECVIF; 5187 toh->len = sizeof (struct T_opthdr) + 5188 sizeof (uint_t); 5189 toh->status = 0; 5190 dstopt += sizeof (struct T_opthdr); 5191 dstptr = (uint_t *)dstopt; 5192 *dstptr = pinfo->in_pkt_ifindex; 5193 dstopt += sizeof (uint_t); 5194 udi_size -= toh->len; 5195 } 5196 5197 if (cr != NULL) { 5198 struct T_opthdr *toh; 5199 5200 toh = (struct T_opthdr *)dstopt; 5201 toh->level = SOL_SOCKET; 5202 toh->name = SCM_UCRED; 5203 toh->len = sizeof (struct T_opthdr) + ucredsize; 5204 toh->status = 0; 5205 (void) cred2ucred(cr, cpid, &toh[1]); 5206 dstopt += toh->len; 5207 udi_size -= toh->len; 5208 } 5209 5210 if (udp->udp_recvttl) { 5211 struct T_opthdr *toh; 5212 uint8_t *dstptr; 5213 5214 toh = (struct T_opthdr *)dstopt; 5215 toh->level = IPPROTO_IP; 5216 toh->name = IP_RECVTTL; 5217 toh->len = sizeof (struct T_opthdr) + 5218 sizeof (uint8_t); 5219 toh->status = 0; 5220 dstopt += sizeof (struct T_opthdr); 5221 dstptr = (uint8_t *)dstopt; 5222 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5223 dstopt += sizeof (uint8_t); 5224 udi_size -= toh->len; 5225 } 5226 5227 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5228 } 5229 BUMP_MIB(&udp_mib, udpInDatagrams); 5230 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5231 "udp_rput_other_end: q %p (%S)", q, "end"); 5232 if (options_mp != NULL) 5233 freeb(options_mp); 5234 5235 if (udp->udp_direct_sockfs) { 5236 /* 5237 * There is nothing above us except for the stream head; 5238 * use the read-side synchronous stream interface in 5239 * order to reduce the time spent in interrupt thread. 5240 */ 5241 ASSERT(udp->udp_issocket); 5242 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5243 } else { 5244 /* 5245 * Use regular STREAMS interface to pass data upstream 5246 * if this is not a socket endpoint, or if we have 5247 * switched over to the slow mode due to sockmod being 5248 * popped or a module being pushed on top of us. 5249 */ 5250 putnext(UDP_RD(q), mp); 5251 } 5252 } 5253 5254 /* ARGSUSED */ 5255 static void 5256 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5257 { 5258 conn_t *connp = arg; 5259 5260 udp_rput_other(connp->conn_rq, mp); 5261 udp_exit(connp); 5262 } 5263 5264 /* 5265 * Process a T_BIND_ACK 5266 */ 5267 static void 5268 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5269 { 5270 udp_t *udp = Q_TO_UDP(q); 5271 mblk_t *mp1; 5272 ire_t *ire; 5273 struct T_bind_ack *tba; 5274 uchar_t *addrp; 5275 ipa_conn_t *ac; 5276 ipa6_conn_t *ac6; 5277 5278 if (udp->udp_discon_pending) 5279 udp->udp_discon_pending = 0; 5280 5281 /* 5282 * If a broadcast/multicast address was bound set 5283 * the source address to 0. 5284 * This ensures no datagrams with broadcast address 5285 * as source address are emitted (which would violate 5286 * RFC1122 - Hosts requirements) 5287 * 5288 * Note that when connecting the returned IRE is 5289 * for the destination address and we only perform 5290 * the broadcast check for the source address (it 5291 * is OK to connect to a broadcast/multicast address.) 5292 */ 5293 mp1 = mp->b_cont; 5294 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5295 ire = (ire_t *)mp1->b_rptr; 5296 5297 /* 5298 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5299 * local address. 5300 */ 5301 if (ire->ire_type == IRE_BROADCAST && 5302 udp->udp_state != TS_DATA_XFER) { 5303 /* This was just a local bind to a broadcast addr */ 5304 V6_SET_ZERO(udp->udp_v6src); 5305 if (udp->udp_family == AF_INET6) 5306 (void) udp_build_hdrs(q, udp); 5307 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5308 /* 5309 * Local address not yet set - pick it from the 5310 * T_bind_ack 5311 */ 5312 tba = (struct T_bind_ack *)mp->b_rptr; 5313 addrp = &mp->b_rptr[tba->ADDR_offset]; 5314 switch (udp->udp_family) { 5315 case AF_INET: 5316 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5317 ac = (ipa_conn_t *)addrp; 5318 } else { 5319 ASSERT(tba->ADDR_length == 5320 sizeof (ipa_conn_x_t)); 5321 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5322 } 5323 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5324 &udp->udp_v6src); 5325 break; 5326 case AF_INET6: 5327 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5328 ac6 = (ipa6_conn_t *)addrp; 5329 } else { 5330 ASSERT(tba->ADDR_length == 5331 sizeof (ipa6_conn_x_t)); 5332 ac6 = &((ipa6_conn_x_t *) 5333 addrp)->ac6x_conn; 5334 } 5335 udp->udp_v6src = ac6->ac6_laddr; 5336 (void) udp_build_hdrs(q, udp); 5337 break; 5338 } 5339 } 5340 mp1 = mp1->b_cont; 5341 } 5342 /* 5343 * Look for one or more appended ACK message added by 5344 * udp_connect or udp_disconnect. 5345 * If none found just send up the T_BIND_ACK. 5346 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5347 * udp_disconnect has appended a T_OK_ACK. 5348 */ 5349 if (mp1 != NULL) { 5350 if (mp->b_cont == mp1) 5351 mp->b_cont = NULL; 5352 else { 5353 ASSERT(mp->b_cont->b_cont == mp1); 5354 mp->b_cont->b_cont = NULL; 5355 } 5356 freemsg(mp); 5357 mp = mp1; 5358 while (mp != NULL) { 5359 mp1 = mp->b_cont; 5360 mp->b_cont = NULL; 5361 putnext(UDP_RD(q), mp); 5362 mp = mp1; 5363 } 5364 return; 5365 } 5366 freemsg(mp->b_cont); 5367 mp->b_cont = NULL; 5368 putnext(UDP_RD(q), mp); 5369 } 5370 5371 /* 5372 * return SNMP stuff in buffer in mpdata 5373 */ 5374 int 5375 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5376 { 5377 mblk_t *mpdata; 5378 mblk_t *mp_conn_ctl; 5379 mblk_t *mp6_conn_ctl; 5380 mblk_t *mp_conn_data; 5381 mblk_t *mp6_conn_data; 5382 mblk_t *mp_conn_tail = NULL; 5383 mblk_t *mp6_conn_tail = NULL; 5384 struct opthdr *optp; 5385 mib2_udpEntry_t ude; 5386 mib2_udp6Entry_t ude6; 5387 int state; 5388 zoneid_t zoneid; 5389 int i; 5390 connf_t *connfp; 5391 conn_t *connp = Q_TO_CONN(q); 5392 udp_t *udp = connp->conn_udp; 5393 5394 if (mpctl == NULL || 5395 (mpdata = mpctl->b_cont) == NULL || 5396 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5397 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5398 freemsg(mp_conn_ctl); 5399 return (0); 5400 } 5401 5402 mp_conn_data = mp_conn_ctl->b_cont; 5403 mp6_conn_data = mp6_conn_ctl->b_cont; 5404 5405 zoneid = connp->conn_zoneid; 5406 5407 /* fixed length structure for IPv4 and IPv6 counters */ 5408 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5409 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5410 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5411 optp->level = MIB2_UDP; 5412 optp->name = 0; 5413 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5414 optp->len = msgdsize(mpdata); 5415 qreply(q, mpctl); 5416 5417 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5418 connfp = &ipcl_globalhash_fanout[i]; 5419 connp = NULL; 5420 5421 while ((connp = ipcl_get_next_conn(connfp, connp, 5422 IPCL_UDP))) { 5423 udp = connp->conn_udp; 5424 if (zoneid != connp->conn_zoneid) 5425 continue; 5426 5427 /* 5428 * Note that the port numbers are sent in 5429 * host byte order 5430 */ 5431 5432 if (udp->udp_state == TS_UNBND) 5433 state = MIB2_UDP_unbound; 5434 else if (udp->udp_state == TS_IDLE) 5435 state = MIB2_UDP_idle; 5436 else if (udp->udp_state == TS_DATA_XFER) 5437 state = MIB2_UDP_connected; 5438 else 5439 state = MIB2_UDP_unknown; 5440 5441 /* 5442 * Create an IPv4 table entry for IPv4 entries and also 5443 * any IPv6 entries which are bound to in6addr_any 5444 * (i.e. anything a IPv4 peer could connect/send to). 5445 */ 5446 if (udp->udp_ipversion == IPV4_VERSION || 5447 (udp->udp_state <= TS_IDLE && 5448 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5449 ude.udpEntryInfo.ue_state = state; 5450 /* 5451 * If in6addr_any this will set it to 5452 * INADDR_ANY 5453 */ 5454 ude.udpLocalAddress = 5455 V4_PART_OF_V6(udp->udp_v6src); 5456 ude.udpLocalPort = ntohs(udp->udp_port); 5457 if (udp->udp_state == TS_DATA_XFER) { 5458 /* 5459 * Can potentially get here for 5460 * v6 socket if another process 5461 * (say, ping) has just done a 5462 * sendto(), changing the state 5463 * from the TS_IDLE above to 5464 * TS_DATA_XFER by the time we hit 5465 * this part of the code. 5466 */ 5467 ude.udpEntryInfo.ue_RemoteAddress = 5468 V4_PART_OF_V6(udp->udp_v6dst); 5469 ude.udpEntryInfo.ue_RemotePort = 5470 ntohs(udp->udp_dstport); 5471 } else { 5472 ude.udpEntryInfo.ue_RemoteAddress = 0; 5473 ude.udpEntryInfo.ue_RemotePort = 0; 5474 } 5475 (void) snmp_append_data2(mp_conn_data, 5476 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5477 } 5478 if (udp->udp_ipversion == IPV6_VERSION) { 5479 ude6.udp6EntryInfo.ue_state = state; 5480 ude6.udp6LocalAddress = udp->udp_v6src; 5481 ude6.udp6LocalPort = ntohs(udp->udp_port); 5482 ude6.udp6IfIndex = udp->udp_bound_if; 5483 if (udp->udp_state == TS_DATA_XFER) { 5484 ude6.udp6EntryInfo.ue_RemoteAddress = 5485 udp->udp_v6dst; 5486 ude6.udp6EntryInfo.ue_RemotePort = 5487 ntohs(udp->udp_dstport); 5488 } else { 5489 ude6.udp6EntryInfo.ue_RemoteAddress = 5490 sin6_null.sin6_addr; 5491 ude6.udp6EntryInfo.ue_RemotePort = 0; 5492 } 5493 (void) snmp_append_data2(mp6_conn_data, 5494 &mp6_conn_tail, (char *)&ude6, 5495 sizeof (ude6)); 5496 } 5497 } 5498 } 5499 5500 /* IPv4 UDP endpoints */ 5501 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5502 sizeof (struct T_optmgmt_ack)]; 5503 optp->level = MIB2_UDP; 5504 optp->name = MIB2_UDP_ENTRY; 5505 optp->len = msgdsize(mp_conn_data); 5506 qreply(q, mp_conn_ctl); 5507 5508 /* IPv6 UDP endpoints */ 5509 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5510 sizeof (struct T_optmgmt_ack)]; 5511 optp->level = MIB2_UDP6; 5512 optp->name = MIB2_UDP6_ENTRY; 5513 optp->len = msgdsize(mp6_conn_data); 5514 qreply(q, mp6_conn_ctl); 5515 5516 return (1); 5517 } 5518 5519 /* 5520 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5521 * NOTE: Per MIB-II, UDP has no writable data. 5522 * TODO: If this ever actually tries to set anything, it needs to be 5523 * to do the appropriate locking. 5524 */ 5525 /* ARGSUSED */ 5526 int 5527 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5528 uchar_t *ptr, int len) 5529 { 5530 switch (level) { 5531 case MIB2_UDP: 5532 return (0); 5533 default: 5534 return (1); 5535 } 5536 } 5537 5538 static void 5539 udp_report_item(mblk_t *mp, udp_t *udp) 5540 { 5541 char *state; 5542 char addrbuf1[INET6_ADDRSTRLEN]; 5543 char addrbuf2[INET6_ADDRSTRLEN]; 5544 uint_t print_len, buf_len; 5545 5546 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5547 ASSERT(buf_len >= 0); 5548 if (buf_len == 0) 5549 return; 5550 5551 if (udp->udp_state == TS_UNBND) 5552 state = "UNBOUND"; 5553 else if (udp->udp_state == TS_IDLE) 5554 state = "IDLE"; 5555 else if (udp->udp_state == TS_DATA_XFER) 5556 state = "CONNECTED"; 5557 else 5558 state = "UnkState"; 5559 print_len = snprintf((char *)mp->b_wptr, buf_len, 5560 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5561 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5562 inet_ntop(AF_INET6, &udp->udp_v6src, 5563 addrbuf1, sizeof (addrbuf1)), 5564 inet_ntop(AF_INET6, &udp->udp_v6dst, 5565 addrbuf2, sizeof (addrbuf2)), 5566 ntohs(udp->udp_dstport), state); 5567 if (print_len < buf_len) { 5568 mp->b_wptr += print_len; 5569 } else { 5570 mp->b_wptr += buf_len; 5571 } 5572 } 5573 5574 /* Report for ndd "udp_status" */ 5575 /* ARGSUSED */ 5576 static int 5577 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5578 { 5579 zoneid_t zoneid; 5580 connf_t *connfp; 5581 conn_t *connp = Q_TO_CONN(q); 5582 udp_t *udp = connp->conn_udp; 5583 int i; 5584 5585 /* 5586 * Because of the ndd constraint, at most we can have 64K buffer 5587 * to put in all UDP info. So to be more efficient, just 5588 * allocate a 64K buffer here, assuming we need that large buffer. 5589 * This may be a problem as any user can read udp_status. Therefore 5590 * we limit the rate of doing this using udp_ndd_get_info_interval. 5591 * This should be OK as normal users should not do this too often. 5592 */ 5593 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5594 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5595 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5596 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5597 return (0); 5598 } 5599 } 5600 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5601 /* The following may work even if we cannot get a large buf. */ 5602 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5603 return (0); 5604 } 5605 (void) mi_mpprintf(mp, 5606 "UDP " MI_COL_HDRPAD_STR 5607 /* 12345678[89ABCDEF] */ 5608 " zone lport src addr dest addr port state"); 5609 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5610 5611 zoneid = connp->conn_zoneid; 5612 5613 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5614 connfp = &ipcl_globalhash_fanout[i]; 5615 connp = NULL; 5616 5617 while ((connp = ipcl_get_next_conn(connfp, connp, 5618 IPCL_UDP))) { 5619 udp = connp->conn_udp; 5620 if (zoneid != GLOBAL_ZONEID && 5621 zoneid != connp->conn_zoneid) 5622 continue; 5623 5624 udp_report_item(mp->b_cont, udp); 5625 } 5626 } 5627 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5628 return (0); 5629 } 5630 5631 /* 5632 * This routine creates a T_UDERROR_IND message and passes it upstream. 5633 * The address and options are copied from the T_UNITDATA_REQ message 5634 * passed in mp. This message is freed. 5635 */ 5636 static void 5637 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5638 t_scalar_t err) 5639 { 5640 struct T_unitdata_req *tudr; 5641 mblk_t *mp1; 5642 uchar_t *optaddr; 5643 t_scalar_t optlen; 5644 5645 if (DB_TYPE(mp) == M_DATA) { 5646 ASSERT(destaddr != NULL && destlen != 0); 5647 optaddr = NULL; 5648 optlen = 0; 5649 } else { 5650 if ((mp->b_wptr < mp->b_rptr) || 5651 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5652 goto done; 5653 } 5654 tudr = (struct T_unitdata_req *)mp->b_rptr; 5655 destaddr = mp->b_rptr + tudr->DEST_offset; 5656 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5657 destaddr + tudr->DEST_length < mp->b_rptr || 5658 destaddr + tudr->DEST_length > mp->b_wptr) { 5659 goto done; 5660 } 5661 optaddr = mp->b_rptr + tudr->OPT_offset; 5662 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5663 optaddr + tudr->OPT_length < mp->b_rptr || 5664 optaddr + tudr->OPT_length > mp->b_wptr) { 5665 goto done; 5666 } 5667 destlen = tudr->DEST_length; 5668 optlen = tudr->OPT_length; 5669 } 5670 5671 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5672 (char *)optaddr, optlen, err); 5673 if (mp1 != NULL) 5674 putnext(UDP_RD(q), mp1); 5675 5676 done: 5677 freemsg(mp); 5678 } 5679 5680 /* 5681 * This routine removes a port number association from a stream. It 5682 * is called by udp_wput to handle T_UNBIND_REQ messages. 5683 */ 5684 static void 5685 udp_unbind(queue_t *q, mblk_t *mp) 5686 { 5687 udp_t *udp = Q_TO_UDP(q); 5688 5689 /* If a bind has not been done, we can't unbind. */ 5690 if (udp->udp_state == TS_UNBND) { 5691 udp_err_ack(q, mp, TOUTSTATE, 0); 5692 return; 5693 } 5694 if (cl_inet_unbind != NULL) { 5695 /* 5696 * Running in cluster mode - register unbind information 5697 */ 5698 if (udp->udp_ipversion == IPV4_VERSION) { 5699 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5700 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5701 (in_port_t)udp->udp_port); 5702 } else { 5703 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5704 (uint8_t *)&(udp->udp_v6src), 5705 (in_port_t)udp->udp_port); 5706 } 5707 } 5708 5709 udp_bind_hash_remove(udp, B_FALSE); 5710 V6_SET_ZERO(udp->udp_v6src); 5711 V6_SET_ZERO(udp->udp_bound_v6src); 5712 udp->udp_port = 0; 5713 udp->udp_state = TS_UNBND; 5714 5715 if (udp->udp_family == AF_INET6) { 5716 int error; 5717 5718 /* Rebuild the header template */ 5719 error = udp_build_hdrs(q, udp); 5720 if (error != 0) { 5721 udp_err_ack(q, mp, TSYSERR, error); 5722 return; 5723 } 5724 } 5725 /* 5726 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5727 * and therefore ip_unbind must never return NULL. 5728 */ 5729 mp = ip_unbind(q, mp); 5730 ASSERT(mp != NULL); 5731 putnext(UDP_RD(q), mp); 5732 } 5733 5734 /* 5735 * Don't let port fall into the privileged range. 5736 * Since the extra priviledged ports can be arbitrary we also 5737 * ensure that we exclude those from consideration. 5738 * udp_g_epriv_ports is not sorted thus we loop over it until 5739 * there are no changes. 5740 */ 5741 static in_port_t 5742 udp_update_next_port(in_port_t port, boolean_t random) 5743 { 5744 int i; 5745 5746 if (random && udp_random_anon_port != 0) { 5747 (void) random_get_pseudo_bytes((uint8_t *)&port, 5748 sizeof (in_port_t)); 5749 /* 5750 * Unless changed by a sys admin, the smallest anon port 5751 * is 32768 and the largest anon port is 65535. It is 5752 * very likely (50%) for the random port to be smaller 5753 * than the smallest anon port. When that happens, 5754 * add port % (anon port range) to the smallest anon 5755 * port to get the random port. It should fall into the 5756 * valid anon port range. 5757 */ 5758 if (port < udp_smallest_anon_port) { 5759 port = udp_smallest_anon_port + 5760 port % (udp_largest_anon_port - 5761 udp_smallest_anon_port); 5762 } 5763 } 5764 5765 retry: 5766 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5767 port = udp_smallest_anon_port; 5768 5769 if (port < udp_smallest_nonpriv_port) 5770 port = udp_smallest_nonpriv_port; 5771 5772 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5773 if (port == udp_g_epriv_ports[i]) { 5774 port++; 5775 /* 5776 * Make sure that the port is in the 5777 * valid range. 5778 */ 5779 goto retry; 5780 } 5781 } 5782 return (port); 5783 } 5784 5785 static mblk_t * 5786 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5787 uint_t srcid, int *error) 5788 { 5789 udp_t *udp = connp->conn_udp; 5790 queue_t *q = connp->conn_wq; 5791 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5792 mblk_t *mp2; 5793 ipha_t *ipha; 5794 int ip_hdr_length; 5795 uint32_t ip_len; 5796 udpha_t *udpha; 5797 5798 *error = 0; 5799 5800 /* mp1 points to the M_DATA mblk carrying the packet */ 5801 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5802 5803 /* Add an IP header */ 5804 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5805 udp->udp_ip_snd_options_len; 5806 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5807 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5808 !OK_32PTR(ipha)) { 5809 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5810 if (mp2 == NULL) { 5811 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5812 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5813 *error = ENOMEM; 5814 goto done; 5815 } 5816 mp2->b_wptr = DB_LIM(mp2); 5817 mp2->b_cont = mp1; 5818 mp1 = mp2; 5819 if (DB_TYPE(mp) != M_DATA) 5820 mp->b_cont = mp1; 5821 else 5822 mp = mp1; 5823 5824 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5825 } 5826 ip_hdr_length -= UDPH_SIZE; 5827 #ifdef _BIG_ENDIAN 5828 /* Set version, header length, and tos */ 5829 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5830 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5831 udp->udp_type_of_service); 5832 /* Set ttl and protocol */ 5833 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5834 #else 5835 /* Set version, header length, and tos */ 5836 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5837 ((udp->udp_type_of_service << 8) | 5838 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5839 /* Set ttl and protocol */ 5840 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5841 #endif 5842 /* 5843 * Copy our address into the packet. If this is zero, 5844 * first look at __sin6_src_id for a hint. If we leave the source 5845 * as INADDR_ANY then ip will fill in the real source address. 5846 */ 5847 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5848 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5849 in6_addr_t v6src; 5850 5851 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5852 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5853 } 5854 5855 ipha->ipha_fragment_offset_and_flags = 0; 5856 ipha->ipha_ident = 0; 5857 5858 mp1->b_rptr = (uchar_t *)ipha; 5859 5860 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5861 (uintptr_t)UINT_MAX); 5862 5863 /* Determine length of packet */ 5864 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5865 if ((mp2 = mp1->b_cont) != NULL) { 5866 do { 5867 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5868 ip_len += (uint32_t)MBLKL(mp2); 5869 } while ((mp2 = mp2->b_cont) != NULL); 5870 } 5871 /* 5872 * If the size of the packet is greater than the maximum allowed by 5873 * ip, return an error. Passing this down could cause panics because 5874 * the size will have wrapped and be inconsistent with the msg size. 5875 */ 5876 if (ip_len > IP_MAXPACKET) { 5877 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5878 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5879 *error = EMSGSIZE; 5880 goto done; 5881 } 5882 ipha->ipha_length = htons((uint16_t)ip_len); 5883 ip_len -= ip_hdr_length; 5884 ip_len = htons((uint16_t)ip_len); 5885 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5886 5887 /* 5888 * Copy in the destination address 5889 */ 5890 if (v4dst == INADDR_ANY) 5891 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5892 else 5893 ipha->ipha_dst = v4dst; 5894 5895 /* 5896 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5897 */ 5898 if (CLASSD(v4dst)) 5899 ipha->ipha_ttl = udp->udp_multicast_ttl; 5900 5901 udpha->uha_dst_port = port; 5902 udpha->uha_src_port = udp->udp_port; 5903 5904 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5905 uint32_t cksum; 5906 5907 bcopy(udp->udp_ip_snd_options, &ipha[1], 5908 udp->udp_ip_snd_options_len); 5909 /* 5910 * Massage source route putting first source route in ipha_dst. 5911 * Ignore the destination in T_unitdata_req. 5912 * Create a checksum adjustment for a source route, if any. 5913 */ 5914 cksum = ip_massage_options(ipha); 5915 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5916 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5917 (ipha->ipha_dst & 0xFFFF); 5918 if ((int)cksum < 0) 5919 cksum--; 5920 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5921 /* 5922 * IP does the checksum if uha_checksum is non-zero, 5923 * We make it easy for IP to include our pseudo header 5924 * by putting our length in uha_checksum. 5925 */ 5926 cksum += ip_len; 5927 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5928 /* There might be a carry. */ 5929 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5930 #ifdef _LITTLE_ENDIAN 5931 if (udp_do_checksum) 5932 ip_len = (cksum << 16) | ip_len; 5933 #else 5934 if (udp_do_checksum) 5935 ip_len = (ip_len << 16) | cksum; 5936 else 5937 ip_len <<= 16; 5938 #endif 5939 } else { 5940 /* 5941 * IP does the checksum if uha_checksum is non-zero, 5942 * We make it easy for IP to include our pseudo header 5943 * by putting our length in uha_checksum. 5944 */ 5945 if (udp_do_checksum) 5946 ip_len |= (ip_len << 16); 5947 #ifndef _LITTLE_ENDIAN 5948 else 5949 ip_len <<= 16; 5950 #endif 5951 } 5952 /* Set UDP length and checksum */ 5953 *((uint32_t *)&udpha->uha_length) = ip_len; 5954 5955 if (DB_TYPE(mp) != M_DATA) { 5956 ASSERT(mp != mp1); 5957 freeb(mp); 5958 } 5959 5960 /* mp has been consumed and we'll return success */ 5961 ASSERT(*error == 0); 5962 mp = NULL; 5963 5964 /* We're done. Pass the packet to ip. */ 5965 BUMP_MIB(&udp_mib, udpOutDatagrams); 5966 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5967 "udp_wput_end: q %p (%S)", q, "end"); 5968 5969 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5970 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5971 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5972 connp->conn_nofailover_ill != NULL || 5973 connp->conn_outgoing_ill != NULL || 5974 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5975 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5976 UDP_STAT(udp_ip_send); 5977 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5978 } else { 5979 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5980 } 5981 5982 done: 5983 if (*error != 0) { 5984 ASSERT(mp != NULL); 5985 BUMP_MIB(&udp_mib, udpOutErrors); 5986 } 5987 return (mp); 5988 } 5989 5990 static void 5991 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5992 { 5993 conn_t *connp = udp->udp_connp; 5994 ipaddr_t src, dst; 5995 ill_t *ill; 5996 ire_t *ire; 5997 ipif_t *ipif = NULL; 5998 mblk_t *ire_fp_mp; 5999 uint_t ire_fp_mp_len; 6000 uint16_t *up; 6001 uint32_t cksum, hcksum_txflags; 6002 queue_t *dev_q; 6003 boolean_t retry_caching; 6004 6005 dst = ipha->ipha_dst; 6006 src = ipha->ipha_src; 6007 ASSERT(ipha->ipha_ident == 0); 6008 6009 if (CLASSD(dst)) { 6010 int err; 6011 6012 ipif = conn_get_held_ipif(connp, 6013 &connp->conn_multicast_ipif, &err); 6014 6015 if (ipif == NULL || ipif->ipif_isv6 || 6016 (ipif->ipif_ill->ill_phyint->phyint_flags & 6017 PHYI_LOOPBACK)) { 6018 if (ipif != NULL) 6019 ipif_refrele(ipif); 6020 UDP_STAT(udp_ip_send); 6021 ip_output(connp, mp, q, IP_WPUT); 6022 return; 6023 } 6024 } 6025 6026 retry_caching = B_FALSE; 6027 mutex_enter(&connp->conn_lock); 6028 ire = connp->conn_ire_cache; 6029 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6030 6031 if (ire == NULL || ire->ire_addr != dst || 6032 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6033 retry_caching = B_TRUE; 6034 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6035 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6036 6037 ASSERT(ipif != NULL); 6038 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6039 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6040 retry_caching = B_TRUE; 6041 } 6042 6043 if (!retry_caching) { 6044 ASSERT(ire != NULL); 6045 IRE_REFHOLD(ire); 6046 mutex_exit(&connp->conn_lock); 6047 } else { 6048 boolean_t cached = B_FALSE; 6049 6050 connp->conn_ire_cache = NULL; 6051 mutex_exit(&connp->conn_lock); 6052 6053 /* Release the old ire */ 6054 if (ire != NULL) { 6055 IRE_REFRELE_NOTR(ire); 6056 ire = NULL; 6057 } 6058 6059 if (CLASSD(dst)) { 6060 ASSERT(ipif != NULL); 6061 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6062 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6063 } else { 6064 ASSERT(ipif == NULL); 6065 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6066 } 6067 6068 if (ire == NULL) { 6069 if (ipif != NULL) 6070 ipif_refrele(ipif); 6071 UDP_STAT(udp_ire_null); 6072 ip_output(connp, mp, q, IP_WPUT); 6073 return; 6074 } 6075 IRE_REFHOLD_NOTR(ire); 6076 6077 mutex_enter(&connp->conn_lock); 6078 if (!(connp->conn_state_flags & CONN_CLOSING) && 6079 connp->conn_ire_cache == NULL) { 6080 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6081 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6082 connp->conn_ire_cache = ire; 6083 cached = B_TRUE; 6084 } 6085 rw_exit(&ire->ire_bucket->irb_lock); 6086 } 6087 mutex_exit(&connp->conn_lock); 6088 6089 /* 6090 * We can continue to use the ire but since it was not 6091 * cached, we should drop the extra reference. 6092 */ 6093 if (!cached) 6094 IRE_REFRELE_NOTR(ire); 6095 } 6096 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6097 ASSERT(!CLASSD(dst) || ipif != NULL); 6098 6099 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6100 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6101 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6102 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6103 (connp->conn_nexthop_set) || 6104 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6105 if (ipif != NULL) 6106 ipif_refrele(ipif); 6107 UDP_STAT(udp_ip_ire_send); 6108 IRE_REFRELE(ire); 6109 ip_output(connp, mp, q, IP_WPUT); 6110 return; 6111 } 6112 6113 BUMP_MIB(&ip_mib, ipOutRequests); 6114 6115 ill = ire_to_ill(ire); 6116 ASSERT(ill != NULL); 6117 6118 dev_q = ire->ire_stq->q_next; 6119 ASSERT(dev_q != NULL); 6120 /* 6121 * If the service thread is already running, or if the driver 6122 * queue is currently flow-controlled, queue this packet. 6123 */ 6124 if ((q->q_first != NULL || connp->conn_draining) || 6125 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6126 if (ip_output_queue) { 6127 (void) putq(q, mp); 6128 } else { 6129 BUMP_MIB(&ip_mib, ipOutDiscards); 6130 freemsg(mp); 6131 } 6132 if (ipif != NULL) 6133 ipif_refrele(ipif); 6134 IRE_REFRELE(ire); 6135 return; 6136 } 6137 6138 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6139 #ifndef _BIG_ENDIAN 6140 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6141 #endif 6142 6143 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6144 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6145 src = ipha->ipha_src = ipif->ipif_src_addr; 6146 else 6147 src = ipha->ipha_src = ire->ire_src_addr; 6148 } 6149 6150 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6151 ASSERT(ill->ill_hcksum_capab != NULL); 6152 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6153 } else { 6154 hcksum_txflags = 0; 6155 } 6156 6157 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6158 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6159 6160 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6161 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6162 if (*up != 0) { 6163 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6164 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6165 ntohs(ipha->ipha_length), cksum); 6166 6167 /* Software checksum? */ 6168 if (DB_CKSUMFLAGS(mp) == 0) { 6169 UDP_STAT(udp_out_sw_cksum); 6170 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6171 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6172 } 6173 } 6174 6175 ipha->ipha_fragment_offset_and_flags |= 6176 (uint32_t)htons(ire->ire_frag_flag); 6177 6178 /* Calculate IP header checksum if hardware isn't capable */ 6179 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6180 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6181 ((uint16_t *)ipha)[4]); 6182 } 6183 6184 if (CLASSD(dst)) { 6185 ilm_t *ilm; 6186 6187 ILM_WALKER_HOLD(ill); 6188 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6189 ILM_WALKER_RELE(ill); 6190 if (ilm != NULL) { 6191 ip_multicast_loopback(q, ill, mp, 6192 connp->conn_multicast_loop ? 0 : 6193 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6194 } 6195 6196 /* If multicast TTL is 0 then we are done */ 6197 if (ipha->ipha_ttl == 0) { 6198 if (ipif != NULL) 6199 ipif_refrele(ipif); 6200 freemsg(mp); 6201 IRE_REFRELE(ire); 6202 return; 6203 } 6204 } 6205 6206 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6207 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6208 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6209 6210 UPDATE_OB_PKT_COUNT(ire); 6211 ire->ire_last_used_time = lbolt; 6212 6213 if (ILL_DLS_CAPABLE(ill)) { 6214 /* 6215 * Send the packet directly to DLD, where it may be queued 6216 * depending on the availability of transmit resources at 6217 * the media layer. 6218 */ 6219 IP_DLS_ILL_TX(ill, mp); 6220 } else { 6221 putnext(ire->ire_stq, mp); 6222 } 6223 6224 if (ipif != NULL) 6225 ipif_refrele(ipif); 6226 IRE_REFRELE(ire); 6227 } 6228 6229 /* 6230 * This routine handles all messages passed downstream. It either 6231 * consumes the message or passes it downstream; it never queues a 6232 * a message. 6233 */ 6234 static void 6235 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6236 { 6237 sin6_t *sin6; 6238 sin_t *sin; 6239 ipaddr_t v4dst; 6240 uint16_t port; 6241 uint_t srcid; 6242 queue_t *q = connp->conn_wq; 6243 udp_t *udp = connp->conn_udp; 6244 t_scalar_t optlen; 6245 int error = 0; 6246 struct sockaddr_storage ss; 6247 6248 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6249 "udp_wput_start: connp %p mp %p", connp, mp); 6250 6251 /* 6252 * We directly handle several cases here: T_UNITDATA_REQ message 6253 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6254 * connected and non-connected socket. The latter carries the 6255 * address structure along when this routine gets called. 6256 */ 6257 switch (DB_TYPE(mp)) { 6258 case M_DATA: 6259 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6260 if (!udp->udp_direct_sockfs || 6261 addr == NULL || addrlen == 0) { 6262 /* Not connected; address is required */ 6263 BUMP_MIB(&udp_mib, udpOutErrors); 6264 UDP_STAT(udp_out_err_notconn); 6265 freemsg(mp); 6266 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6267 "udp_wput_end: connp %p (%S)", connp, 6268 "not-connected; address required"); 6269 return; 6270 } 6271 ASSERT(udp->udp_issocket); 6272 UDP_DBGSTAT(udp_data_notconn); 6273 /* Not connected; do some more checks below */ 6274 optlen = 0; 6275 break; 6276 } 6277 /* M_DATA for connected socket */ 6278 UDP_DBGSTAT(udp_data_conn); 6279 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6280 6281 /* Initialize addr and addrlen as if they're passed in */ 6282 if (udp->udp_family == AF_INET) { 6283 sin = (sin_t *)&ss; 6284 sin->sin_family = AF_INET; 6285 sin->sin_port = udp->udp_dstport; 6286 sin->sin_addr.s_addr = v4dst; 6287 addr = (struct sockaddr *)sin; 6288 addrlen = sizeof (*sin); 6289 } else { 6290 sin6 = (sin6_t *)&ss; 6291 sin6->sin6_family = AF_INET6; 6292 sin6->sin6_port = udp->udp_dstport; 6293 sin6->sin6_flowinfo = udp->udp_flowinfo; 6294 sin6->sin6_addr = udp->udp_v6dst; 6295 sin6->sin6_scope_id = 0; 6296 sin6->__sin6_src_id = 0; 6297 addr = (struct sockaddr *)sin6; 6298 addrlen = sizeof (*sin6); 6299 } 6300 6301 if (udp->udp_family == AF_INET || 6302 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6303 /* 6304 * Handle both AF_INET and AF_INET6; the latter 6305 * for IPV4 mapped destination addresses. Note 6306 * here that both addr and addrlen point to the 6307 * corresponding struct depending on the address 6308 * family of the socket. 6309 */ 6310 mp = udp_output_v4(connp, mp, v4dst, 6311 udp->udp_dstport, 0, &error); 6312 } else { 6313 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6314 } 6315 if (error != 0) { 6316 ASSERT(addr != NULL && addrlen != 0); 6317 goto ud_error; 6318 } 6319 return; 6320 case M_PROTO: 6321 case M_PCPROTO: { 6322 struct T_unitdata_req *tudr; 6323 6324 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6325 tudr = (struct T_unitdata_req *)mp->b_rptr; 6326 6327 /* Handle valid T_UNITDATA_REQ here */ 6328 if (MBLKL(mp) >= sizeof (*tudr) && 6329 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6330 if (mp->b_cont == NULL) { 6331 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6332 "udp_wput_end: q %p (%S)", q, "badaddr"); 6333 error = EPROTO; 6334 goto ud_error; 6335 } 6336 6337 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6338 tudr->DEST_length)) { 6339 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6340 "udp_wput_end: q %p (%S)", q, "badaddr"); 6341 error = EADDRNOTAVAIL; 6342 goto ud_error; 6343 } 6344 /* 6345 * If a port has not been bound to the stream, fail. 6346 * This is not a problem when sockfs is directly 6347 * above us, because it will ensure that the socket 6348 * is first bound before allowing data to be sent. 6349 */ 6350 if (udp->udp_state == TS_UNBND) { 6351 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6352 "udp_wput_end: q %p (%S)", q, "outstate"); 6353 error = EPROTO; 6354 goto ud_error; 6355 } 6356 addr = (struct sockaddr *) 6357 &mp->b_rptr[tudr->DEST_offset]; 6358 addrlen = tudr->DEST_length; 6359 optlen = tudr->OPT_length; 6360 if (optlen != 0) 6361 UDP_STAT(udp_out_opt); 6362 break; 6363 } 6364 /* FALLTHRU */ 6365 } 6366 default: 6367 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6368 SQTAG_UDP_OUTPUT); 6369 return; 6370 } 6371 ASSERT(addr != NULL); 6372 6373 switch (udp->udp_family) { 6374 case AF_INET6: 6375 sin6 = (sin6_t *)addr; 6376 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6377 sin6->sin6_family != AF_INET6) { 6378 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6379 "udp_wput_end: q %p (%S)", q, "badaddr"); 6380 error = EADDRNOTAVAIL; 6381 goto ud_error; 6382 } 6383 6384 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6385 /* 6386 * Destination is a non-IPv4-compatible IPv6 address. 6387 * Send out an IPv6 format packet. 6388 */ 6389 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6390 if (error != 0) 6391 goto ud_error; 6392 6393 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6394 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6395 return; 6396 } 6397 /* 6398 * If the local address is not zero or a mapped address 6399 * return an error. It would be possible to send an IPv4 6400 * packet but the response would never make it back to the 6401 * application since it is bound to a non-mapped address. 6402 */ 6403 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6404 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6405 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6406 "udp_wput_end: q %p (%S)", q, "badaddr"); 6407 error = EADDRNOTAVAIL; 6408 goto ud_error; 6409 } 6410 /* Send IPv4 packet without modifying udp_ipversion */ 6411 /* Extract port and ipaddr */ 6412 port = sin6->sin6_port; 6413 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6414 srcid = sin6->__sin6_src_id; 6415 break; 6416 6417 case AF_INET: 6418 sin = (sin_t *)addr; 6419 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6420 sin->sin_family != AF_INET) { 6421 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6422 "udp_wput_end: q %p (%S)", q, "badaddr"); 6423 error = EADDRNOTAVAIL; 6424 goto ud_error; 6425 } 6426 /* Extract port and ipaddr */ 6427 port = sin->sin_port; 6428 v4dst = sin->sin_addr.s_addr; 6429 srcid = 0; 6430 break; 6431 } 6432 6433 /* 6434 * If options passed in, feed it for verification and handling 6435 */ 6436 if (optlen != 0) { 6437 ASSERT(DB_TYPE(mp) != M_DATA); 6438 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6439 /* failure */ 6440 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6441 "udp_wput_end: q %p (%S)", q, 6442 "udp_unitdata_opt_process"); 6443 goto ud_error; 6444 } 6445 /* 6446 * Note: success in processing options. 6447 * mp option buffer represented by 6448 * OPT_length/offset now potentially modified 6449 * and contain option setting results 6450 */ 6451 } 6452 ASSERT(error == 0); 6453 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6454 if (error != 0) { 6455 ud_error: 6456 UDP_STAT(udp_out_err_output); 6457 ASSERT(mp != NULL); 6458 /* mp is freed by the following routine */ 6459 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6460 (t_scalar_t)error); 6461 } 6462 } 6463 6464 /* ARGSUSED */ 6465 static void 6466 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6467 { 6468 udp_output((conn_t *)arg, mp, NULL, 0); 6469 _UDP_EXIT((conn_t *)arg); 6470 } 6471 6472 static void 6473 udp_wput(queue_t *q, mblk_t *mp) 6474 { 6475 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6476 SQTAG_UDP_WPUT); 6477 } 6478 6479 /* 6480 * Allocate and prepare a T_UNITDATA_REQ message. 6481 */ 6482 static mblk_t * 6483 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6484 { 6485 struct T_unitdata_req *tudr; 6486 mblk_t *mp; 6487 6488 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6489 if (mp != NULL) { 6490 mp->b_wptr += sizeof (*tudr) + addrlen; 6491 DB_TYPE(mp) = M_PROTO; 6492 6493 tudr = (struct T_unitdata_req *)mp->b_rptr; 6494 tudr->PRIM_type = T_UNITDATA_REQ; 6495 tudr->DEST_length = addrlen; 6496 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6497 tudr->OPT_length = 0; 6498 tudr->OPT_offset = 0; 6499 bcopy(addr, tudr+1, addrlen); 6500 } 6501 return (mp); 6502 } 6503 6504 /* 6505 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6506 * is valid when we are directly beneath the stream head, and thus sockfs 6507 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6508 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6509 * this is done for both connected and non-connected endpoint. 6510 */ 6511 void 6512 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6513 { 6514 conn_t *connp; 6515 udp_t *udp; 6516 6517 q = UDP_WR(q); 6518 connp = Q_TO_CONN(q); 6519 udp = connp->conn_udp; 6520 6521 /* udpsockfs should only send down M_DATA for this entry point */ 6522 ASSERT(DB_TYPE(mp) == M_DATA); 6523 6524 mutex_enter(&connp->conn_lock); 6525 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6526 6527 if (udp->udp_mode != UDP_MT_HOT) { 6528 /* 6529 * We can't enter this conn right away because another 6530 * thread is currently executing as writer; therefore we 6531 * need to deposit the message into the squeue to be 6532 * drained later. If a socket address is present, we 6533 * need to create a T_UNITDATA_REQ message as placeholder. 6534 */ 6535 if (addr != NULL && addrlen != 0) { 6536 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6537 6538 if (tudr_mp == NULL) { 6539 mutex_exit(&connp->conn_lock); 6540 BUMP_MIB(&udp_mib, udpOutErrors); 6541 UDP_STAT(udp_out_err_tudr); 6542 freemsg(mp); 6543 return; 6544 } 6545 /* Tag the packet with T_UNITDATA_REQ */ 6546 tudr_mp->b_cont = mp; 6547 mp = tudr_mp; 6548 } 6549 mutex_exit(&connp->conn_lock); 6550 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6551 return; 6552 } 6553 6554 /* We can execute as reader right away. */ 6555 UDP_READERS_INCREF(udp); 6556 mutex_exit(&connp->conn_lock); 6557 6558 udp_output(connp, mp, addr, addrlen); 6559 6560 udp_exit(connp); 6561 } 6562 6563 /* 6564 * udp_output_v6(): 6565 * Assumes that udp_wput did some sanity checking on the destination 6566 * address. 6567 */ 6568 static mblk_t * 6569 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6570 int *error) 6571 { 6572 ip6_t *ip6h; 6573 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6574 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6575 mblk_t *mp2; 6576 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6577 size_t ip_len; 6578 udpha_t *udph; 6579 udp_t *udp = connp->conn_udp; 6580 queue_t *q = connp->conn_wq; 6581 ip6_pkt_t ipp_s; /* For ancillary data options */ 6582 ip6_pkt_t *ipp = &ipp_s; 6583 ip6_pkt_t *tipp; /* temporary ipp */ 6584 uint32_t csum = 0; 6585 uint_t ignore = 0; 6586 uint_t option_exists = 0, is_sticky = 0; 6587 uint8_t *cp; 6588 uint8_t *nxthdr_ptr; 6589 6590 *error = 0; 6591 6592 /* mp1 points to the M_DATA mblk carrying the packet */ 6593 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6594 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6595 6596 /* 6597 * If the local address is a mapped address return 6598 * an error. 6599 * It would be possible to send an IPv6 packet but the 6600 * response would never make it back to the application 6601 * since it is bound to a mapped address. 6602 */ 6603 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6604 *error = EADDRNOTAVAIL; 6605 goto done; 6606 } 6607 6608 ipp->ipp_fields = 0; 6609 ipp->ipp_sticky_ignored = 0; 6610 6611 /* 6612 * If TPI options passed in, feed it for verification and handling 6613 */ 6614 if (tudr_optlen != 0) { 6615 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6616 /* failure */ 6617 goto done; 6618 } 6619 ignore = ipp->ipp_sticky_ignored; 6620 ASSERT(*error == 0); 6621 } 6622 6623 if (sin6->sin6_scope_id != 0 && 6624 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6625 /* 6626 * IPPF_SCOPE_ID is special. It's neither a sticky 6627 * option nor ancillary data. It needs to be 6628 * explicitly set in options_exists. 6629 */ 6630 option_exists |= IPPF_SCOPE_ID; 6631 } 6632 6633 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6634 /* No sticky options nor ancillary data. */ 6635 goto no_options; 6636 } 6637 6638 /* 6639 * Go through the options figuring out where each is going to 6640 * come from and build two masks. The first mask indicates if 6641 * the option exists at all. The second mask indicates if the 6642 * option is sticky or ancillary. 6643 */ 6644 if (!(ignore & IPPF_HOPOPTS)) { 6645 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6646 option_exists |= IPPF_HOPOPTS; 6647 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6648 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6649 option_exists |= IPPF_HOPOPTS; 6650 is_sticky |= IPPF_HOPOPTS; 6651 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6652 } 6653 } 6654 6655 if (!(ignore & IPPF_RTHDR)) { 6656 if (ipp->ipp_fields & IPPF_RTHDR) { 6657 option_exists |= IPPF_RTHDR; 6658 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6659 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6660 option_exists |= IPPF_RTHDR; 6661 is_sticky |= IPPF_RTHDR; 6662 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6663 } 6664 } 6665 6666 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6667 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6668 option_exists |= IPPF_RTDSTOPTS; 6669 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6670 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6671 option_exists |= IPPF_RTDSTOPTS; 6672 is_sticky |= IPPF_RTDSTOPTS; 6673 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6674 } 6675 } 6676 6677 if (!(ignore & IPPF_DSTOPTS)) { 6678 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6679 option_exists |= IPPF_DSTOPTS; 6680 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6681 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6682 option_exists |= IPPF_DSTOPTS; 6683 is_sticky |= IPPF_DSTOPTS; 6684 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6685 } 6686 } 6687 6688 if (!(ignore & IPPF_IFINDEX)) { 6689 if (ipp->ipp_fields & IPPF_IFINDEX) { 6690 option_exists |= IPPF_IFINDEX; 6691 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6692 option_exists |= IPPF_IFINDEX; 6693 is_sticky |= IPPF_IFINDEX; 6694 } 6695 } 6696 6697 if (!(ignore & IPPF_ADDR)) { 6698 if (ipp->ipp_fields & IPPF_ADDR) { 6699 option_exists |= IPPF_ADDR; 6700 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6701 option_exists |= IPPF_ADDR; 6702 is_sticky |= IPPF_ADDR; 6703 } 6704 } 6705 6706 if (!(ignore & IPPF_DONTFRAG)) { 6707 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6708 option_exists |= IPPF_DONTFRAG; 6709 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6710 option_exists |= IPPF_DONTFRAG; 6711 is_sticky |= IPPF_DONTFRAG; 6712 } 6713 } 6714 6715 if (!(ignore & IPPF_USE_MIN_MTU)) { 6716 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6717 option_exists |= IPPF_USE_MIN_MTU; 6718 } else if (udp->udp_sticky_ipp.ipp_fields & 6719 IPPF_USE_MIN_MTU) { 6720 option_exists |= IPPF_USE_MIN_MTU; 6721 is_sticky |= IPPF_USE_MIN_MTU; 6722 } 6723 } 6724 6725 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6726 option_exists |= IPPF_HOPLIMIT; 6727 /* IPV6_HOPLIMIT can never be sticky */ 6728 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6729 6730 if (!(ignore & IPPF_UNICAST_HOPS) && 6731 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6732 option_exists |= IPPF_UNICAST_HOPS; 6733 is_sticky |= IPPF_UNICAST_HOPS; 6734 } 6735 6736 if (!(ignore & IPPF_MULTICAST_HOPS) && 6737 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6738 option_exists |= IPPF_MULTICAST_HOPS; 6739 is_sticky |= IPPF_MULTICAST_HOPS; 6740 } 6741 6742 if (!(ignore & IPPF_TCLASS)) { 6743 if (ipp->ipp_fields & IPPF_TCLASS) { 6744 option_exists |= IPPF_TCLASS; 6745 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6746 option_exists |= IPPF_TCLASS; 6747 is_sticky |= IPPF_TCLASS; 6748 } 6749 } 6750 6751 no_options: 6752 6753 /* 6754 * If any options carried in the ip6i_t were specified, we 6755 * need to account for the ip6i_t in the data we'll be sending 6756 * down. 6757 */ 6758 if (option_exists & IPPF_HAS_IP6I) 6759 udp_ip_hdr_len += sizeof (ip6i_t); 6760 6761 /* check/fix buffer config, setup pointers into it */ 6762 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6763 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6764 !OK_32PTR(ip6h)) { 6765 /* Try to get everything in a single mblk next time */ 6766 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6767 udp->udp_max_hdr_len = udp_ip_hdr_len; 6768 (void) mi_set_sth_wroff(UDP_RD(q), 6769 udp->udp_max_hdr_len + udp_wroff_extra); 6770 } 6771 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6772 if (mp2 == NULL) { 6773 *error = ENOMEM; 6774 goto done; 6775 } 6776 mp2->b_wptr = DB_LIM(mp2); 6777 mp2->b_cont = mp1; 6778 mp1 = mp2; 6779 if (DB_TYPE(mp) != M_DATA) 6780 mp->b_cont = mp1; 6781 else 6782 mp = mp1; 6783 6784 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6785 } 6786 mp1->b_rptr = (unsigned char *)ip6h; 6787 ip6i = (ip6i_t *)ip6h; 6788 6789 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6790 if (option_exists & IPPF_HAS_IP6I) { 6791 ip6h = (ip6_t *)&ip6i[1]; 6792 ip6i->ip6i_flags = 0; 6793 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6794 6795 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6796 if (option_exists & IPPF_SCOPE_ID) { 6797 ip6i->ip6i_flags |= IP6I_IFINDEX; 6798 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6799 } else if (option_exists & IPPF_IFINDEX) { 6800 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6801 ASSERT(tipp->ipp_ifindex != 0); 6802 ip6i->ip6i_flags |= IP6I_IFINDEX; 6803 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6804 } 6805 6806 if (option_exists & IPPF_ADDR) { 6807 /* 6808 * Enable per-packet source address verification if 6809 * IPV6_PKTINFO specified the source address. 6810 * ip6_src is set in the transport's _wput function. 6811 */ 6812 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6813 } 6814 6815 if (option_exists & IPPF_DONTFRAG) { 6816 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6817 } 6818 6819 if (option_exists & IPPF_USE_MIN_MTU) { 6820 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6821 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6822 } 6823 6824 if (option_exists & IPPF_NEXTHOP) { 6825 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6826 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6827 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6828 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6829 } 6830 6831 /* 6832 * tell IP this is an ip6i_t private header 6833 */ 6834 ip6i->ip6i_nxt = IPPROTO_RAW; 6835 } 6836 6837 /* Initialize IPv6 header */ 6838 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6839 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6840 6841 /* Set the hoplimit of the outgoing packet. */ 6842 if (option_exists & IPPF_HOPLIMIT) { 6843 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6844 ip6h->ip6_hops = ipp->ipp_hoplimit; 6845 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6846 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6847 ip6h->ip6_hops = udp->udp_multicast_ttl; 6848 if (option_exists & IPPF_MULTICAST_HOPS) 6849 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6850 } else { 6851 ip6h->ip6_hops = udp->udp_ttl; 6852 if (option_exists & IPPF_UNICAST_HOPS) 6853 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6854 } 6855 6856 if (option_exists & IPPF_ADDR) { 6857 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6858 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6859 ip6h->ip6_src = tipp->ipp_addr; 6860 } else { 6861 /* 6862 * The source address was not set using IPV6_PKTINFO. 6863 * First look at the bound source. 6864 * If unspecified fallback to __sin6_src_id. 6865 */ 6866 ip6h->ip6_src = udp->udp_v6src; 6867 if (sin6->__sin6_src_id != 0 && 6868 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6869 ip_srcid_find_id(sin6->__sin6_src_id, 6870 &ip6h->ip6_src, connp->conn_zoneid); 6871 } 6872 } 6873 6874 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6875 cp = (uint8_t *)&ip6h[1]; 6876 6877 /* 6878 * Here's where we have to start stringing together 6879 * any extension headers in the right order: 6880 * Hop-by-hop, destination, routing, and final destination opts. 6881 */ 6882 if (option_exists & IPPF_HOPOPTS) { 6883 /* Hop-by-hop options */ 6884 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6885 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6886 6887 *nxthdr_ptr = IPPROTO_HOPOPTS; 6888 nxthdr_ptr = &hbh->ip6h_nxt; 6889 6890 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6891 cp += tipp->ipp_hopoptslen; 6892 } 6893 /* 6894 * En-route destination options 6895 * Only do them if there's a routing header as well 6896 */ 6897 if (option_exists & IPPF_RTDSTOPTS) { 6898 ip6_dest_t *dst = (ip6_dest_t *)cp; 6899 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6900 6901 *nxthdr_ptr = IPPROTO_DSTOPTS; 6902 nxthdr_ptr = &dst->ip6d_nxt; 6903 6904 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6905 cp += tipp->ipp_rtdstoptslen; 6906 } 6907 /* 6908 * Routing header next 6909 */ 6910 if (option_exists & IPPF_RTHDR) { 6911 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6912 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6913 6914 *nxthdr_ptr = IPPROTO_ROUTING; 6915 nxthdr_ptr = &rt->ip6r_nxt; 6916 6917 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6918 cp += tipp->ipp_rthdrlen; 6919 } 6920 /* 6921 * Do ultimate destination options 6922 */ 6923 if (option_exists & IPPF_DSTOPTS) { 6924 ip6_dest_t *dest = (ip6_dest_t *)cp; 6925 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6926 6927 *nxthdr_ptr = IPPROTO_DSTOPTS; 6928 nxthdr_ptr = &dest->ip6d_nxt; 6929 6930 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6931 cp += tipp->ipp_dstoptslen; 6932 } 6933 /* 6934 * Now set the last header pointer to the proto passed in 6935 */ 6936 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6937 *nxthdr_ptr = IPPROTO_UDP; 6938 6939 /* Update UDP header */ 6940 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6941 udph->uha_dst_port = sin6->sin6_port; 6942 udph->uha_src_port = udp->udp_port; 6943 6944 /* 6945 * Copy in the destination address 6946 */ 6947 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6948 ip6h->ip6_dst = ipv6_loopback; 6949 else 6950 ip6h->ip6_dst = sin6->sin6_addr; 6951 6952 ip6h->ip6_vcf = 6953 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6954 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6955 6956 if (option_exists & IPPF_TCLASS) { 6957 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6958 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6959 tipp->ipp_tclass); 6960 } 6961 6962 if (option_exists & IPPF_RTHDR) { 6963 ip6_rthdr_t *rth; 6964 6965 /* 6966 * Perform any processing needed for source routing. 6967 * We know that all extension headers will be in the same mblk 6968 * as the IPv6 header. 6969 */ 6970 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6971 if (rth != NULL && rth->ip6r_segleft != 0) { 6972 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6973 /* 6974 * Drop packet - only support Type 0 routing. 6975 * Notify the application as well. 6976 */ 6977 *error = EPROTO; 6978 goto done; 6979 } 6980 6981 /* 6982 * rth->ip6r_len is twice the number of 6983 * addresses in the header. Thus it must be even. 6984 */ 6985 if (rth->ip6r_len & 0x1) { 6986 *error = EPROTO; 6987 goto done; 6988 } 6989 /* 6990 * Shuffle the routing header and ip6_dst 6991 * addresses, and get the checksum difference 6992 * between the first hop (in ip6_dst) and 6993 * the destination (in the last routing hdr entry). 6994 */ 6995 csum = ip_massage_options_v6(ip6h, rth); 6996 /* 6997 * Verify that the first hop isn't a mapped address. 6998 * Routers along the path need to do this verification 6999 * for subsequent hops. 7000 */ 7001 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7002 *error = EADDRNOTAVAIL; 7003 goto done; 7004 } 7005 7006 cp += (rth->ip6r_len + 1)*8; 7007 } 7008 } 7009 7010 /* count up length of UDP packet */ 7011 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7012 if ((mp2 = mp1->b_cont) != NULL) { 7013 do { 7014 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7015 ip_len += (uint32_t)MBLKL(mp2); 7016 } while ((mp2 = mp2->b_cont) != NULL); 7017 } 7018 7019 /* 7020 * If the size of the packet is greater than the maximum allowed by 7021 * ip, return an error. Passing this down could cause panics because 7022 * the size will have wrapped and be inconsistent with the msg size. 7023 */ 7024 if (ip_len > IP_MAXPACKET) { 7025 *error = EMSGSIZE; 7026 goto done; 7027 } 7028 7029 /* Store the UDP length. Subtract length of extension hdrs */ 7030 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7031 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7032 7033 /* 7034 * We make it easy for IP to include our pseudo header 7035 * by putting our length in uh_checksum, modified (if 7036 * we have a routing header) by the checksum difference 7037 * between the ultimate destination and first hop addresses. 7038 * Note: UDP over IPv6 must always checksum the packet. 7039 */ 7040 csum += udph->uha_length; 7041 csum = (csum & 0xFFFF) + (csum >> 16); 7042 udph->uha_checksum = (uint16_t)csum; 7043 7044 #ifdef _LITTLE_ENDIAN 7045 ip_len = htons(ip_len); 7046 #endif 7047 ip6h->ip6_plen = ip_len; 7048 7049 if (DB_TYPE(mp) != M_DATA) { 7050 ASSERT(mp != mp1); 7051 freeb(mp); 7052 } 7053 7054 /* mp has been consumed and we'll return success */ 7055 ASSERT(*error == 0); 7056 mp = NULL; 7057 7058 /* We're done. Pass the packet to IP */ 7059 BUMP_MIB(&udp_mib, udpOutDatagrams); 7060 ip_output_v6(connp, mp1, q, IP_WPUT); 7061 7062 done: 7063 if (*error != 0) { 7064 ASSERT(mp != NULL); 7065 BUMP_MIB(&udp_mib, udpOutErrors); 7066 } 7067 return (mp); 7068 } 7069 7070 static void 7071 udp_wput_other(queue_t *q, mblk_t *mp) 7072 { 7073 uchar_t *rptr = mp->b_rptr; 7074 struct datab *db; 7075 struct iocblk *iocp; 7076 cred_t *cr; 7077 conn_t *connp = Q_TO_CONN(q); 7078 udp_t *udp = connp->conn_udp; 7079 7080 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7081 "udp_wput_other_start: q %p", q); 7082 7083 db = mp->b_datap; 7084 7085 cr = DB_CREDDEF(mp, connp->conn_cred); 7086 7087 switch (db->db_type) { 7088 case M_PROTO: 7089 case M_PCPROTO: 7090 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7091 freemsg(mp); 7092 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7093 "udp_wput_other_end: q %p (%S)", 7094 q, "protoshort"); 7095 return; 7096 } 7097 switch (((t_primp_t)rptr)->type) { 7098 case T_ADDR_REQ: 7099 udp_addr_req(q, mp); 7100 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7101 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7102 return; 7103 case O_T_BIND_REQ: 7104 case T_BIND_REQ: 7105 udp_bind(q, mp); 7106 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7107 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7108 return; 7109 case T_CONN_REQ: 7110 udp_connect(q, mp); 7111 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7112 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7113 return; 7114 case T_CAPABILITY_REQ: 7115 udp_capability_req(q, mp); 7116 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7117 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7118 return; 7119 case T_INFO_REQ: 7120 udp_info_req(q, mp); 7121 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7122 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7123 return; 7124 case T_UNITDATA_REQ: 7125 /* 7126 * If a T_UNITDATA_REQ gets here, the address must 7127 * be bad. Valid T_UNITDATA_REQs are handled 7128 * in udp_wput. 7129 */ 7130 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7131 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7132 "udp_wput_other_end: q %p (%S)", 7133 q, "unitdatareq"); 7134 return; 7135 case T_UNBIND_REQ: 7136 udp_unbind(q, mp); 7137 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7138 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7139 return; 7140 case T_SVR4_OPTMGMT_REQ: 7141 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7142 /* 7143 * Use upper queue for option processing in 7144 * case the request is not handled at this 7145 * level and needs to be passed down to IP. 7146 */ 7147 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7148 mp, cr, &udp_opt_obj); 7149 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7150 "udp_wput_other_end: q %p (%S)", 7151 q, "optmgmtreq"); 7152 return; 7153 7154 case T_OPTMGMT_REQ: 7155 /* 7156 * Use upper queue for option processing in 7157 * case the request is not handled at this 7158 * level and needs to be passed down to IP. 7159 */ 7160 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7161 mp, cr, &udp_opt_obj); 7162 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7163 "udp_wput_other_end: q %p (%S)", 7164 q, "optmgmtreq"); 7165 return; 7166 7167 case T_DISCON_REQ: 7168 udp_disconnect(q, mp); 7169 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7170 "udp_wput_other_end: q %p (%S)", 7171 q, "disconreq"); 7172 return; 7173 7174 /* The following TPI message is not supported by udp. */ 7175 case O_T_CONN_RES: 7176 case T_CONN_RES: 7177 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7178 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7179 "udp_wput_other_end: q %p (%S)", 7180 q, "connres/disconreq"); 7181 return; 7182 7183 /* The following 3 TPI messages are illegal for udp. */ 7184 case T_DATA_REQ: 7185 case T_EXDATA_REQ: 7186 case T_ORDREL_REQ: 7187 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7188 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7189 "udp_wput_other_end: q %p (%S)", 7190 q, "data/exdata/ordrel"); 7191 return; 7192 default: 7193 break; 7194 } 7195 break; 7196 case M_FLUSH: 7197 if (*rptr & FLUSHW) 7198 flushq(q, FLUSHDATA); 7199 break; 7200 case M_IOCTL: 7201 iocp = (struct iocblk *)mp->b_rptr; 7202 switch (iocp->ioc_cmd) { 7203 case TI_GETPEERNAME: 7204 if (udp->udp_state != TS_DATA_XFER) { 7205 /* 7206 * If a default destination address has not 7207 * been associated with the stream, then we 7208 * don't know the peer's name. 7209 */ 7210 iocp->ioc_error = ENOTCONN; 7211 iocp->ioc_count = 0; 7212 mp->b_datap->db_type = M_IOCACK; 7213 putnext(UDP_RD(q), mp); 7214 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7215 "udp_wput_other_end: q %p (%S)", 7216 q, "getpeername"); 7217 return; 7218 } 7219 /* FALLTHRU */ 7220 case TI_GETMYNAME: { 7221 /* 7222 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7223 * need to copyin the user's strbuf structure. 7224 * Processing will continue in the M_IOCDATA case 7225 * below. 7226 */ 7227 mi_copyin(q, mp, NULL, 7228 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7229 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7230 "udp_wput_other_end: q %p (%S)", 7231 q, "getmyname"); 7232 return; 7233 } 7234 case ND_SET: 7235 /* nd_getset performs the necessary checking */ 7236 case ND_GET: 7237 if (nd_getset(q, udp_g_nd, mp)) { 7238 putnext(UDP_RD(q), mp); 7239 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7240 "udp_wput_other_end: q %p (%S)", 7241 q, "get"); 7242 return; 7243 } 7244 break; 7245 case _SIOCSOCKFALLBACK: 7246 /* 7247 * Either sockmod is about to be popped and the 7248 * socket would now be treated as a plain stream, 7249 * or a module is about to be pushed so we could 7250 * no longer use read-side synchronous stream. 7251 * Drain any queued data and disable direct sockfs 7252 * interface from now on. 7253 */ 7254 if (!udp->udp_issocket) { 7255 DB_TYPE(mp) = M_IOCNAK; 7256 iocp->ioc_error = EINVAL; 7257 } else { 7258 udp->udp_issocket = B_FALSE; 7259 if (udp->udp_direct_sockfs) { 7260 /* 7261 * Disable read-side synchronous 7262 * stream interface and drain any 7263 * queued data. 7264 */ 7265 udp_rcv_drain(UDP_RD(q), udp, 7266 B_FALSE); 7267 ASSERT(!udp->udp_direct_sockfs); 7268 UDP_STAT(udp_sock_fallback); 7269 } 7270 DB_TYPE(mp) = M_IOCACK; 7271 iocp->ioc_error = 0; 7272 } 7273 iocp->ioc_count = 0; 7274 iocp->ioc_rval = 0; 7275 putnext(UDP_RD(q), mp); 7276 return; 7277 default: 7278 break; 7279 } 7280 break; 7281 case M_IOCDATA: 7282 udp_wput_iocdata(q, mp); 7283 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7284 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7285 return; 7286 default: 7287 /* Unrecognized messages are passed through without change. */ 7288 break; 7289 } 7290 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7291 "udp_wput_other_end: q %p (%S)", q, "end"); 7292 ip_output(connp, mp, q, IP_WPUT); 7293 } 7294 7295 /* ARGSUSED */ 7296 static void 7297 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7298 { 7299 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7300 udp_exit((conn_t *)arg); 7301 } 7302 7303 /* 7304 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7305 * messages. 7306 */ 7307 static void 7308 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7309 { 7310 mblk_t *mp1; 7311 STRUCT_HANDLE(strbuf, sb); 7312 uint16_t port; 7313 in6_addr_t v6addr; 7314 ipaddr_t v4addr; 7315 uint32_t flowinfo = 0; 7316 int addrlen; 7317 udp_t *udp = Q_TO_UDP(q); 7318 7319 /* Make sure it is one of ours. */ 7320 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7321 case TI_GETMYNAME: 7322 case TI_GETPEERNAME: 7323 break; 7324 default: 7325 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7326 return; 7327 } 7328 7329 q = WR(UDP_RD(q)); 7330 switch (mi_copy_state(q, mp, &mp1)) { 7331 case -1: 7332 return; 7333 case MI_COPY_CASE(MI_COPY_IN, 1): 7334 break; 7335 case MI_COPY_CASE(MI_COPY_OUT, 1): 7336 /* 7337 * The address has been copied out, so now 7338 * copyout the strbuf. 7339 */ 7340 mi_copyout(q, mp); 7341 return; 7342 case MI_COPY_CASE(MI_COPY_OUT, 2): 7343 /* 7344 * The address and strbuf have been copied out. 7345 * We're done, so just acknowledge the original 7346 * M_IOCTL. 7347 */ 7348 mi_copy_done(q, mp, 0); 7349 return; 7350 default: 7351 /* 7352 * Something strange has happened, so acknowledge 7353 * the original M_IOCTL with an EPROTO error. 7354 */ 7355 mi_copy_done(q, mp, EPROTO); 7356 return; 7357 } 7358 7359 /* 7360 * Now we have the strbuf structure for TI_GETMYNAME 7361 * and TI_GETPEERNAME. Next we copyout the requested 7362 * address and then we'll copyout the strbuf. 7363 */ 7364 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7365 (void *)mp1->b_rptr); 7366 if (udp->udp_family == AF_INET) 7367 addrlen = sizeof (sin_t); 7368 else 7369 addrlen = sizeof (sin6_t); 7370 7371 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7372 mi_copy_done(q, mp, EINVAL); 7373 return; 7374 } 7375 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7376 case TI_GETMYNAME: 7377 if (udp->udp_family == AF_INET) { 7378 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7379 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7380 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7381 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7382 } else { 7383 /* 7384 * INADDR_ANY 7385 * udp_v6src is not set, we might be bound to 7386 * broadcast/multicast. Use udp_bound_v6src as 7387 * local address instead (that could 7388 * also still be INADDR_ANY) 7389 */ 7390 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7391 } 7392 } else { 7393 /* udp->udp_family == AF_INET6 */ 7394 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7395 v6addr = udp->udp_v6src; 7396 } else { 7397 /* 7398 * UNSPECIFIED 7399 * udp_v6src is not set, we might be bound to 7400 * broadcast/multicast. Use udp_bound_v6src as 7401 * local address instead (that could 7402 * also still be UNSPECIFIED) 7403 */ 7404 v6addr = udp->udp_bound_v6src; 7405 } 7406 } 7407 port = udp->udp_port; 7408 break; 7409 case TI_GETPEERNAME: 7410 if (udp->udp_state != TS_DATA_XFER) { 7411 mi_copy_done(q, mp, ENOTCONN); 7412 return; 7413 } 7414 if (udp->udp_family == AF_INET) { 7415 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7416 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7417 } else { 7418 /* udp->udp_family == AF_INET6) */ 7419 v6addr = udp->udp_v6dst; 7420 flowinfo = udp->udp_flowinfo; 7421 } 7422 port = udp->udp_dstport; 7423 break; 7424 default: 7425 mi_copy_done(q, mp, EPROTO); 7426 return; 7427 } 7428 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7429 if (!mp1) 7430 return; 7431 7432 if (udp->udp_family == AF_INET) { 7433 sin_t *sin; 7434 7435 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7436 sin = (sin_t *)mp1->b_rptr; 7437 mp1->b_wptr = (uchar_t *)&sin[1]; 7438 *sin = sin_null; 7439 sin->sin_family = AF_INET; 7440 sin->sin_addr.s_addr = v4addr; 7441 sin->sin_port = port; 7442 } else { 7443 /* udp->udp_family == AF_INET6 */ 7444 sin6_t *sin6; 7445 7446 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7447 sin6 = (sin6_t *)mp1->b_rptr; 7448 mp1->b_wptr = (uchar_t *)&sin6[1]; 7449 *sin6 = sin6_null; 7450 sin6->sin6_family = AF_INET6; 7451 sin6->sin6_flowinfo = flowinfo; 7452 sin6->sin6_addr = v6addr; 7453 sin6->sin6_port = port; 7454 } 7455 /* Copy out the address */ 7456 mi_copyout(q, mp); 7457 } 7458 7459 7460 static int 7461 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7462 void *thisdg_attrs) 7463 { 7464 struct T_unitdata_req *udreqp; 7465 int is_absreq_failure; 7466 cred_t *cr; 7467 conn_t *connp = Q_TO_CONN(q); 7468 7469 ASSERT(((t_primp_t)mp->b_rptr)->type); 7470 7471 cr = DB_CREDDEF(mp, connp->conn_cred); 7472 7473 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7474 *errorp = 0; 7475 7476 /* 7477 * Use upper queue for option processing since the callback 7478 * routines expect to be called in UDP instance instead of IP. 7479 */ 7480 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7481 udreqp->OPT_offset, cr, &udp_opt_obj, 7482 thisdg_attrs, &is_absreq_failure); 7483 7484 if (*errorp != 0) { 7485 /* 7486 * Note: No special action needed in this 7487 * module for "is_absreq_failure" 7488 */ 7489 return (-1); /* failure */ 7490 } 7491 ASSERT(is_absreq_failure == 0); 7492 return (0); /* success */ 7493 } 7494 7495 void 7496 udp_ddi_init(void) 7497 { 7498 int i; 7499 7500 UDP6_MAJ = ddi_name_to_major(UDP6); 7501 7502 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7503 udp_opt_obj.odb_opt_arr_cnt); 7504 7505 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7506 /* Not a power of two. Round up to nearest power of two */ 7507 for (i = 0; i < 31; i++) { 7508 if (udp_bind_fanout_size < (1 << i)) 7509 break; 7510 } 7511 udp_bind_fanout_size = 1 << i; 7512 } 7513 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7514 sizeof (udp_fanout_t), KM_SLEEP); 7515 for (i = 0; i < udp_bind_fanout_size; i++) { 7516 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7517 NULL); 7518 } 7519 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7520 7521 udp_kstat_init(); 7522 7523 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7524 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7525 } 7526 7527 void 7528 udp_ddi_destroy(void) 7529 { 7530 int i; 7531 7532 nd_free(&udp_g_nd); 7533 7534 for (i = 0; i < udp_bind_fanout_size; i++) { 7535 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7536 } 7537 7538 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7539 sizeof (udp_fanout_t)); 7540 7541 udp_kstat_fini(); 7542 7543 kmem_cache_destroy(udp_cache); 7544 } 7545 7546 static void 7547 udp_kstat_init(void) 7548 { 7549 udp_named_kstat_t template = { 7550 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7551 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7552 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7553 { "entrySize", KSTAT_DATA_INT32, 0 }, 7554 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7555 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7556 }; 7557 7558 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7559 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7560 7561 if (udp_mibkp == NULL) 7562 return; 7563 7564 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7565 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7566 7567 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7568 7569 udp_mibkp->ks_update = udp_kstat_update; 7570 7571 kstat_install(udp_mibkp); 7572 7573 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7574 "net", KSTAT_TYPE_NAMED, 7575 sizeof (udp_statistics) / sizeof (kstat_named_t), 7576 KSTAT_FLAG_VIRTUAL)) != NULL) { 7577 udp_ksp->ks_data = &udp_statistics; 7578 kstat_install(udp_ksp); 7579 } 7580 } 7581 7582 static void 7583 udp_kstat_fini(void) 7584 { 7585 if (udp_ksp != NULL) { 7586 kstat_delete(udp_ksp); 7587 udp_ksp = NULL; 7588 } 7589 if (udp_mibkp != NULL) { 7590 kstat_delete(udp_mibkp); 7591 udp_mibkp = NULL; 7592 } 7593 } 7594 7595 static int 7596 udp_kstat_update(kstat_t *kp, int rw) 7597 { 7598 udp_named_kstat_t *udpkp; 7599 7600 if ((kp == NULL) || (kp->ks_data == NULL)) 7601 return (EIO); 7602 7603 if (rw == KSTAT_WRITE) 7604 return (EACCES); 7605 7606 udpkp = (udp_named_kstat_t *)kp->ks_data; 7607 7608 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7609 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7610 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7611 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7612 7613 return (0); 7614 } 7615 7616 /* ARGSUSED */ 7617 static void 7618 udp_rput(queue_t *q, mblk_t *mp) 7619 { 7620 /* 7621 * We get here whenever we do qreply() from IP, 7622 * i.e as part of handlings ioctls, etc. 7623 */ 7624 putnext(q, mp); 7625 } 7626 7627 /* 7628 * Read-side synchronous stream info entry point, called as a 7629 * result of handling certain STREAMS ioctl operations. 7630 */ 7631 static int 7632 udp_rinfop(queue_t *q, infod_t *dp) 7633 { 7634 mblk_t *mp; 7635 uint_t cmd = dp->d_cmd; 7636 int res = 0; 7637 int error = 0; 7638 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7639 struct stdata *stp = STREAM(q); 7640 7641 mutex_enter(&udp->udp_drain_lock); 7642 /* If shutdown on read has happened, return nothing */ 7643 mutex_enter(&stp->sd_lock); 7644 if (stp->sd_flag & STREOF) { 7645 mutex_exit(&stp->sd_lock); 7646 goto done; 7647 } 7648 mutex_exit(&stp->sd_lock); 7649 7650 if ((mp = udp->udp_rcv_list_head) == NULL) 7651 goto done; 7652 7653 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7654 7655 if (cmd & INFOD_COUNT) { 7656 /* 7657 * Return the number of messages. 7658 */ 7659 dp->d_count += udp->udp_rcv_msgcnt; 7660 res |= INFOD_COUNT; 7661 } 7662 if (cmd & INFOD_BYTES) { 7663 /* 7664 * Return size of all data messages. 7665 */ 7666 dp->d_bytes += udp->udp_rcv_cnt; 7667 res |= INFOD_BYTES; 7668 } 7669 if (cmd & INFOD_FIRSTBYTES) { 7670 /* 7671 * Return size of first data message. 7672 */ 7673 dp->d_bytes = msgdsize(mp); 7674 res |= INFOD_FIRSTBYTES; 7675 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7676 } 7677 if (cmd & INFOD_COPYOUT) { 7678 mblk_t *mp1 = mp->b_cont; 7679 int n; 7680 /* 7681 * Return data contents of first message. 7682 */ 7683 ASSERT(DB_TYPE(mp1) == M_DATA); 7684 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7685 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7686 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7687 UIO_READ, dp->d_uiop)) != 0) { 7688 goto done; 7689 } 7690 mp1 = mp1->b_cont; 7691 } 7692 res |= INFOD_COPYOUT; 7693 dp->d_cmd &= ~INFOD_COPYOUT; 7694 } 7695 done: 7696 mutex_exit(&udp->udp_drain_lock); 7697 7698 dp->d_res |= res; 7699 7700 return (error); 7701 } 7702 7703 /* 7704 * Read-side synchronous stream entry point. This is called as a result 7705 * of recv/read operation done at sockfs, and is guaranteed to execute 7706 * outside of the interrupt thread context. It returns a single datagram 7707 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7708 */ 7709 static int 7710 udp_rrw(queue_t *q, struiod_t *dp) 7711 { 7712 mblk_t *mp; 7713 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7714 7715 /* We should never get here when we're in SNMP mode */ 7716 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7717 7718 /* 7719 * Dequeue datagram from the head of the list and return 7720 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7721 * set/cleared depending on whether or not there's data 7722 * remaining in the list. 7723 */ 7724 mutex_enter(&udp->udp_drain_lock); 7725 if (!udp->udp_direct_sockfs) { 7726 mutex_exit(&udp->udp_drain_lock); 7727 UDP_STAT(udp_rrw_busy); 7728 return (EBUSY); 7729 } 7730 if ((mp = udp->udp_rcv_list_head) != NULL) { 7731 uint_t size = msgdsize(mp); 7732 7733 /* Last datagram in the list? */ 7734 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7735 udp->udp_rcv_list_tail = NULL; 7736 mp->b_next = NULL; 7737 7738 udp->udp_rcv_cnt -= size; 7739 udp->udp_rcv_msgcnt--; 7740 UDP_STAT(udp_rrw_msgcnt); 7741 7742 /* No longer flow-controlling? */ 7743 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7744 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7745 udp->udp_drain_qfull = B_FALSE; 7746 } 7747 if (udp->udp_rcv_list_head == NULL) { 7748 /* 7749 * Either we just dequeued the last datagram or 7750 * we get here from sockfs and have nothing to 7751 * return; in this case clear RSLEEP. 7752 */ 7753 ASSERT(udp->udp_rcv_cnt == 0); 7754 ASSERT(udp->udp_rcv_msgcnt == 0); 7755 ASSERT(udp->udp_rcv_list_tail == NULL); 7756 STR_WAKEUP_CLEAR(STREAM(q)); 7757 } else { 7758 /* 7759 * More data follows; we need udp_rrw() to be 7760 * called in future to pick up the rest. 7761 */ 7762 STR_WAKEUP_SET(STREAM(q)); 7763 } 7764 mutex_exit(&udp->udp_drain_lock); 7765 dp->d_mp = mp; 7766 return (0); 7767 } 7768 7769 /* 7770 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7771 * list; this is typically executed within the interrupt thread context 7772 * and so we do things as quickly as possible. 7773 */ 7774 static void 7775 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7776 { 7777 ASSERT(q == RD(q)); 7778 ASSERT(pkt_len == msgdsize(mp)); 7779 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7780 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7781 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7782 7783 mutex_enter(&udp->udp_drain_lock); 7784 /* 7785 * Wake up and signal the receiving app; it is okay to do this 7786 * before enqueueing the mp because we are holding the drain lock. 7787 * One of the advantages of synchronous stream is the ability for 7788 * us to find out when the application performs a read on the 7789 * socket by way of udp_rrw() entry point being called. We need 7790 * to generate SIGPOLL/SIGIO for each received data in the case 7791 * of asynchronous socket just as in the strrput() case. However, 7792 * we only wake the application up when necessary, i.e. during the 7793 * first enqueue. When udp_rrw() is called, we send up a single 7794 * datagram upstream and call STR_WAKEUP_SET() again when there 7795 * are still data remaining in our receive queue. 7796 */ 7797 if (udp->udp_rcv_list_head == NULL) { 7798 STR_WAKEUP_SET(STREAM(q)); 7799 udp->udp_rcv_list_head = mp; 7800 } else { 7801 udp->udp_rcv_list_tail->b_next = mp; 7802 } 7803 udp->udp_rcv_list_tail = mp; 7804 udp->udp_rcv_cnt += pkt_len; 7805 udp->udp_rcv_msgcnt++; 7806 7807 /* Need to flow-control? */ 7808 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7809 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7810 udp->udp_drain_qfull = B_TRUE; 7811 7812 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7813 STR_SENDSIG(STREAM(q)); 7814 mutex_exit(&udp->udp_drain_lock); 7815 } 7816 7817 /* 7818 * Drain the contents of receive list to the module upstream; we do 7819 * this during close or when we fallback to the slow mode due to 7820 * sockmod being popped or a module being pushed on top of us. 7821 */ 7822 static void 7823 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7824 { 7825 mblk_t *mp; 7826 7827 ASSERT(q == RD(q)); 7828 7829 mutex_enter(&udp->udp_drain_lock); 7830 /* 7831 * There is no race with a concurrent udp_input() sending 7832 * up packets using putnext() after we have cleared the 7833 * udp_direct_sockfs flag but before we have completed 7834 * sending up the packets in udp_rcv_list, since we are 7835 * either a writer or we have quiesced the conn. 7836 */ 7837 udp->udp_direct_sockfs = B_FALSE; 7838 mutex_exit(&udp->udp_drain_lock); 7839 7840 if (udp->udp_rcv_list_head != NULL) 7841 UDP_STAT(udp_drain); 7842 7843 /* 7844 * Send up everything via putnext(); note here that we 7845 * don't need the udp_drain_lock to protect us since 7846 * nothing can enter udp_rrw() and that we currently 7847 * have exclusive access to this udp. 7848 */ 7849 while ((mp = udp->udp_rcv_list_head) != NULL) { 7850 udp->udp_rcv_list_head = mp->b_next; 7851 mp->b_next = NULL; 7852 udp->udp_rcv_cnt -= msgdsize(mp); 7853 udp->udp_rcv_msgcnt--; 7854 if (closing) { 7855 freemsg(mp); 7856 } else { 7857 putnext(q, mp); 7858 } 7859 } 7860 ASSERT(udp->udp_rcv_cnt == 0); 7861 ASSERT(udp->udp_rcv_msgcnt == 0); 7862 ASSERT(udp->udp_rcv_list_head == NULL); 7863 udp->udp_rcv_list_tail = NULL; 7864 udp->udp_drain_qfull = B_FALSE; 7865 } 7866 7867 static size_t 7868 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7869 { 7870 /* We add a bit of extra buffering */ 7871 size += size >> 1; 7872 if (size > udp_max_buf) 7873 size = udp_max_buf; 7874 7875 udp->udp_rcv_hiwat = size; 7876 return (size); 7877 } 7878 7879 /* 7880 * Little helper for IPsec's NAT-T processing. 7881 */ 7882 boolean_t 7883 udp_compute_checksum(void) 7884 { 7885 return (udp_do_checksum); 7886 } 7887