1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/timod.h> 41 #include <sys/tiuser.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/debug.h> 57 #include <sys/isa_defs.h> 58 #include <sys/random.h> 59 #include <netinet/in.h> 60 #include <netinet/ip6.h> 61 #include <netinet/icmp6.h> 62 #include <netinet/udp.h> 63 #include <net/if.h> 64 #include <net/route.h> 65 66 #include <inet/common.h> 67 #include <inet/ip.h> 68 #include <inet/ip_impl.h> 69 #include <inet/ip6.h> 70 #include <inet/ip_ire.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/mi.h> 74 #include <inet/mib2.h> 75 #include <inet/nd.h> 76 #include <inet/optcom.h> 77 #include <inet/snmpcom.h> 78 #include <inet/kstatcom.h> 79 #include <inet/udp_impl.h> 80 #include <inet/ipclassifier.h> 81 #include <inet/ipsec_impl.h> 82 #include <inet/ipp_common.h> 83 84 /* 85 * The ipsec_info.h header file is here since it has the definition for the 86 * M_CTL message types used by IP to convey information to the ULP. The 87 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 88 */ 89 #include <net/pfkeyv2.h> 90 #include <inet/ipsec_info.h> 91 92 /* 93 * Synchronization notes: 94 * 95 * UDP uses a combination of its internal perimeter, a global lock and 96 * a set of bind hash locks to protect its data structures. Please see 97 * the note above udp_mode_assertions for details about the internal 98 * perimeter. 99 * 100 * When a UDP endpoint is bound to a local port, it is inserted into 101 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 102 * The size of the array is controlled by the udp_bind_fanout_size variable. 103 * This variable can be changed in /etc/system if the default value is 104 * not large enough. Each bind hash bucket is protected by a per bucket 105 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 106 * structure. An UDP endpoint is removed from the bind hash list only 107 * when it is being unbound or being closed. The per bucket lock also 108 * protects a UDP endpoint's state changes. 109 * 110 * Plumbing notes: 111 * 112 * Both udp and ip are merged, but the streams plumbing is kept unchanged 113 * in that udp is always pushed atop /dev/ip. This is done to preserve 114 * backwards compatibility for certain applications which rely on such 115 * plumbing geometry to do things such as issuing I_POP on the stream 116 * in order to obtain direct access to /dev/ip, etc. 117 * 118 * All UDP processings happen in the /dev/ip instance; the udp module 119 * instance does not possess any state about the endpoint, and merely 120 * acts as a dummy module whose presence is to keep the streams plumbing 121 * appearance unchanged. At open time /dev/ip allocates a conn_t that 122 * happens to embed a udp_t. This stays dormant until the time udp is 123 * pushed, which indicates to /dev/ip that it must convert itself from 124 * an IP to a UDP endpoint. 125 * 126 * We only allow for the following plumbing cases: 127 * 128 * Normal: 129 * /dev/ip is first opened and later udp is pushed directly on top. 130 * This is the default action that happens when a udp socket or 131 * /dev/udp is opened. The conn_t created by /dev/ip instance is 132 * now shared and is marked with IPCL_UDP. 133 * 134 * SNMP-only: 135 * udp is pushed on top of a module other than /dev/ip. When this 136 * happens it will support only SNMP semantics. A new conn_t is 137 * allocated and marked with IPCL_UDPMOD. 138 * 139 * The above cases imply that we don't support any intermediate module to 140 * reside in between /dev/ip and udp -- in fact, we never supported such 141 * scenario in the past as the inter-layer communication semantics have 142 * always been private. Also note that the normal case allows for SNMP 143 * requests to be processed in addition to the rest of UDP operations. 144 * 145 * The normal case plumbing is depicted by the following diagram: 146 * 147 * +---------------+---------------+ 148 * | | | udp 149 * | udp_wq | udp_rq | 150 * | | UDP_RD | 151 * | | | 152 * +---------------+---------------+ 153 * | ^ 154 * v | 155 * +---------------+---------------+ 156 * | | | /dev/ip 157 * | ip_wq | ip_rq | conn_t 158 * | UDP_WR | | 159 * | | | 160 * +---------------+---------------+ 161 * 162 * Messages arriving at udp_wq from above will end up in ip_wq before 163 * it gets processed, i.e. udp write entry points will advance udp_wq 164 * and use its q_next value as ip_wq in order to use the conn_t that 165 * is stored in its q_ptr. Likewise, messages generated by ip to the 166 * module above udp will appear as if they are originated from udp_rq, 167 * i.e. putnext() calls to the module above udp is done using the 168 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 169 * nothing more than calling putnext(). 170 * 171 * The above implies the following rule of thumb: 172 * 173 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 174 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 175 * is no direct reference to conn_t from either udp_wq or udp_rq. 176 * 177 * 2. Write-side entry points of udp can obtain the conn_t via the 178 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 179 * 180 * 3. While in /dev/ip context, putnext() to the module above udp can 181 * be done by supplying the queue value obtained from UDP_RD(). 182 * 183 */ 184 185 static queue_t *UDP_WR(queue_t *); 186 static queue_t *UDP_RD(queue_t *); 187 188 udp_stat_t udp_statistics = { 189 { "udp_ip_send", KSTAT_DATA_UINT64 }, 190 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 191 { "udp_ire_null", KSTAT_DATA_UINT64 }, 192 { "udp_drain", KSTAT_DATA_UINT64 }, 193 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 194 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 196 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 198 { "udp_out_opt", KSTAT_DATA_UINT64 }, 199 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 202 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 203 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 216 #ifdef DEBUG 217 { "udp_data_conn", KSTAT_DATA_UINT64 }, 218 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 219 #endif 220 }; 221 222 static kstat_t *udp_ksp; 223 struct kmem_cache *udp_cache; 224 225 /* 226 * Bind hash list size and hash function. It has to be a power of 2 for 227 * hashing. 228 */ 229 #define UDP_BIND_FANOUT_SIZE 512 230 #define UDP_BIND_HASH(lport) \ 231 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 232 233 /* UDP bind fanout hash structure. */ 234 typedef struct udp_fanout_s { 235 udp_t *uf_udp; 236 kmutex_t uf_lock; 237 #if defined(_LP64) || defined(_I32LPx) 238 char uf_pad[48]; 239 #else 240 char uf_pad[56]; 241 #endif 242 } udp_fanout_t; 243 244 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 245 /* udp_fanout_t *udp_bind_fanout. */ 246 static udp_fanout_t *udp_bind_fanout; 247 248 /* 249 * This controls the rate some ndd info report functions can be used 250 * by non-priviledged users. It stores the last time such info is 251 * requested. When those report functions are called again, this 252 * is checked with the current time and compare with the ndd param 253 * udp_ndd_get_info_interval. 254 */ 255 static clock_t udp_last_ndd_get_info_time; 256 #define NDD_TOO_QUICK_MSG \ 257 "ndd get info rate too high for non-priviledged users, try again " \ 258 "later.\n" 259 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 260 261 static void udp_addr_req(queue_t *q, mblk_t *mp); 262 static void udp_bind(queue_t *q, mblk_t *mp); 263 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 264 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 265 static int udp_build_hdrs(queue_t *q, udp_t *udp); 266 static void udp_capability_req(queue_t *q, mblk_t *mp); 267 static int udp_close(queue_t *q); 268 static void udp_connect(queue_t *q, mblk_t *mp); 269 static void udp_disconnect(queue_t *q, mblk_t *mp); 270 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 271 int sys_error); 272 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 273 t_scalar_t tlierr, int unixerr); 274 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 275 cred_t *cr); 276 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 277 char *value, caddr_t cp, cred_t *cr); 278 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 279 char *value, caddr_t cp, cred_t *cr); 280 static void udp_icmp_error(queue_t *q, mblk_t *mp); 281 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 282 static void udp_info_req(queue_t *q, mblk_t *mp); 283 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 284 t_scalar_t addr_length); 285 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 286 cred_t *credp); 287 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 288 int *errorp, void *thisdg_attrs); 289 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 290 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 291 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 292 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 293 cred_t *cr); 294 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 295 uchar_t **optbufp, uint_t *optlenp); 296 static void udp_report_item(mblk_t *mp, udp_t *udp); 297 static void udp_rput(queue_t *q, mblk_t *mp); 298 static void udp_rput_other(queue_t *, mblk_t *); 299 static int udp_rinfop(queue_t *q, infod_t *dp); 300 static int udp_rrw(queue_t *q, struiod_t *dp); 301 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 302 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 303 cred_t *cr); 304 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 305 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 306 t_scalar_t destlen, t_scalar_t err); 307 static void udp_unbind(queue_t *q, mblk_t *mp); 308 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 309 static void udp_wput(queue_t *q, mblk_t *mp); 310 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 311 uint16_t port, uint_t srcid, int *error); 312 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 313 t_scalar_t tudr_optlen, int *error); 314 static void udp_wput_other(queue_t *q, mblk_t *mp); 315 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 316 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 317 socklen_t addrlen); 318 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 319 320 static void udp_kstat_init(void); 321 static void udp_kstat_fini(void); 322 static int udp_kstat_update(kstat_t *kp, int rw); 323 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 324 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 325 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 327 328 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 329 uint_t pkt_len); 330 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 331 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 332 static void udp_exit(conn_t *); 333 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 334 #ifdef DEBUG 335 static void udp_mode_assertions(udp_t *, int); 336 #endif /* DEBUG */ 337 338 major_t UDP6_MAJ; 339 #define UDP6 "udp6" 340 341 #define UDP_RECV_HIWATER (56 * 1024) 342 #define UDP_RECV_LOWATER 128 343 #define UDP_XMIT_HIWATER (56 * 1024) 344 #define UDP_XMIT_LOWATER 1024 345 346 static struct module_info udp_info = { 347 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 348 }; 349 350 static struct qinit udp_rinit = { 351 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 352 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 353 }; 354 355 static struct qinit udp_winit = { 356 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 357 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 358 }; 359 360 static struct qinit winit = { 361 (pfi_t)putnext, NULL, NULL, NULL, NULL, 362 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 363 }; 364 365 /* Support for just SNMP if UDP is not pushed directly over device IP */ 366 struct qinit udp_snmp_rinit = { 367 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 368 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 369 }; 370 371 struct qinit udp_snmp_winit = { 372 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 373 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 374 }; 375 376 struct streamtab udpinfo = { 377 &udp_rinit, &winit 378 }; 379 380 static sin_t sin_null; /* Zero address for quick clears */ 381 static sin6_t sin6_null; /* Zero address for quick clears */ 382 383 /* Hint not protected by any lock */ 384 static in_port_t udp_g_next_port_to_try; 385 386 /* 387 * Extra privileged ports. In host byte order. 388 */ 389 #define UDP_NUM_EPRIV_PORTS 64 390 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 391 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 392 393 /* Only modified during _init and _fini thus no locking is needed. */ 394 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 395 396 /* MIB-2 stuff for SNMP */ 397 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 398 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 399 400 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 401 402 /* Default structure copied into T_INFO_ACK messages */ 403 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 404 T_INFO_ACK, 405 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 406 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 407 T_INVALID, /* CDATA_size. udp does not support connect data. */ 408 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 409 sizeof (sin_t), /* ADDR_size. */ 410 0, /* OPT_size - not initialized here */ 411 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 412 T_CLTS, /* SERV_type. udp supports connection-less. */ 413 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 414 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 415 }; 416 417 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 418 419 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 420 T_INFO_ACK, 421 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 422 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 423 T_INVALID, /* CDATA_size. udp does not support connect data. */ 424 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 425 sizeof (sin6_t), /* ADDR_size. */ 426 0, /* OPT_size - not initialized here */ 427 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 428 T_CLTS, /* SERV_type. udp supports connection-less. */ 429 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 430 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 431 }; 432 433 /* largest UDP port number */ 434 #define UDP_MAX_PORT 65535 435 436 /* 437 * Table of ND variables supported by udp. These are loaded into udp_g_nd 438 * in udp_open. 439 * All of these are alterable, within the min/max values given, at run time. 440 */ 441 /* BEGIN CSTYLED */ 442 udpparam_t udp_param_arr[] = { 443 /*min max value name */ 444 { 0L, 256, 32, "udp_wroff_extra" }, 445 { 1L, 255, 255, "udp_ipv4_ttl" }, 446 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 447 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 448 { 0, 1, 1, "udp_do_checksum" }, 449 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 450 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 451 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 452 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 453 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 454 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 455 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 456 }; 457 /* END CSTYLED */ 458 459 /* 460 * The smallest anonymous port in the priviledged port range which UDP 461 * looks for free port. Use in the option UDP_ANONPRIVBIND. 462 */ 463 static in_port_t udp_min_anonpriv_port = 512; 464 465 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 466 uint32_t udp_random_anon_port = 1; 467 468 /* 469 * Hook functions to enable cluster networking. 470 * On non-clustered systems these vectors must always be NULL 471 */ 472 473 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 474 uint8_t *laddrp, in_port_t lport) = NULL; 475 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 476 uint8_t *laddrp, in_port_t lport) = NULL; 477 478 typedef union T_primitives *t_primp_t; 479 480 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 481 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 482 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 483 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 484 (mp)->b_prev = (mblk_t *)proc; \ 485 if ((udp)->udp_mphead == NULL) \ 486 (udp)->udp_mphead = (mp); \ 487 else \ 488 (udp)->udp_mptail->b_next = (mp); \ 489 (udp)->udp_mptail = (mp); \ 490 (udp)->udp_mpcount++; \ 491 } 492 493 #define UDP_READERS_INCREF(udp) { \ 494 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 495 (udp)->udp_reader_count++; \ 496 } 497 498 #define UDP_READERS_DECREF(udp) { \ 499 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 500 (udp)->udp_reader_count--; \ 501 if ((udp)->udp_reader_count == 0) \ 502 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 503 } 504 505 #define UDP_SQUEUE_DECREF(udp) { \ 506 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 507 (udp)->udp_squeue_count--; \ 508 if ((udp)->udp_squeue_count == 0) \ 509 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 510 } 511 512 /* 513 * Notes on UDP endpoint synchronization: 514 * 515 * UDP needs exclusive operation on a per endpoint basis, when executing 516 * functions that modify the endpoint state. udp_rput_other() deals with 517 * packets with IP options, and processing these packets end up having 518 * to update the endpoint's option related state. udp_wput_other() deals 519 * with control operations from the top, e.g. connect() that needs to 520 * update the endpoint state. These could be synchronized using locks, 521 * but the current version uses squeues for this purpose. squeues may 522 * give performance improvement for certain cases such as connected UDP 523 * sockets; thus the framework allows for using squeues. 524 * 525 * The perimeter routines are described as follows: 526 * 527 * udp_enter(): 528 * Enter the UDP endpoint perimeter. 529 * 530 * udp_become_writer(): 531 * Become exclusive on the UDP endpoint. Specifies a function 532 * that will be called exclusively either immediately or later 533 * when the perimeter is available exclusively. 534 * 535 * udp_exit(): 536 * Exit the UDP perimeter. 537 * 538 * Entering UDP from the top or from the bottom must be done using 539 * udp_enter(). No lock must be held while attempting to enter the UDP 540 * perimeter. When finished, udp_exit() must be called to get out of 541 * the perimeter. 542 * 543 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 544 * multiple threads may enter a UDP endpoint concurrently. This is used 545 * for sending and/or receiving normal data. Control operations and other 546 * special cases call udp_become_writer() to become exclusive on a per 547 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 548 * by definition serializes access to the conn_t. When there are no more 549 * pending messages on the squeue for the UDP connection, the endpoint 550 * reverts to MT_HOT mode. During the interregnum when not all MT threads 551 * of an endpoint have finished, messages are queued in the UDP endpoint 552 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 553 * 554 * These modes have the following analogs: 555 * 556 * UDP_MT_HOT/udp_reader_count==0 none 557 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 558 * UDP_MT_QUEUED RW_WRITE_WANTED 559 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 560 * 561 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 562 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 563 * 564 * While in stable modes, UDP keeps track of the number of threads 565 * operating on the endpoint. The udp_reader_count variable represents 566 * the number of threads entering the endpoint as readers while it is 567 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 568 * is only a single reader, i.e. when this counter drops to 1. Likewise, 569 * udp_squeue_count represents the number of threads operating on the 570 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 571 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 572 * when this counter drops to 0. 573 * 574 * The default mode is set to UDP_MT_HOT and UDP alternates between 575 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 576 * 577 * Mode transition: 578 * ---------------------------------------------------------------- 579 * old mode Event New mode 580 * ---------------------------------------------------------------- 581 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 582 * and udp_reader_count == 1 583 * 584 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 585 * and udp_reader_count > 1 586 * 587 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 588 * 589 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 590 * internal UDP queue successfully 591 * moved to squeue AND udp_squeue_count != 0 592 * 593 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 594 * internal UDP queue successfully 595 * moved to squeue AND udp_squeue_count 596 * drops to zero 597 * 598 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 599 * ---------------------------------------------------------------- 600 */ 601 602 static queue_t * 603 UDP_WR(queue_t *q) 604 { 605 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 606 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 607 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 608 609 return (_WR(q)->q_next); 610 } 611 612 static queue_t * 613 UDP_RD(queue_t *q) 614 { 615 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 616 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 617 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 618 619 return (_RD(q)->q_next); 620 } 621 622 #ifdef DEBUG 623 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 624 #else 625 #define UDP_MODE_ASSERTIONS(udp, caller) 626 #endif 627 628 /* Invariants */ 629 #ifdef DEBUG 630 631 uint32_t udp_count[4]; 632 633 /* Context of udp_mode_assertions */ 634 #define UDP_ENTER 1 635 #define UDP_BECOME_WRITER 2 636 #define UDP_EXIT 3 637 638 static void 639 udp_mode_assertions(udp_t *udp, int caller) 640 { 641 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 642 643 switch (udp->udp_mode) { 644 case UDP_MT_HOT: 645 /* 646 * Messages have not yet been enqueued on the internal queue, 647 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 648 * by definition, there can't be any messages enqueued on the 649 * squeue. The UDP could be quiescent, so udp_reader_count 650 * could be zero at entry. 651 */ 652 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 653 udp->udp_squeue_count == 0); 654 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 655 udp_count[0]++; 656 break; 657 658 case UDP_MT_QUEUED: 659 /* 660 * The last MT thread to exit the udp perimeter empties the 661 * internal queue and then switches the UDP to 662 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 663 * mode, it means there must be at least 1 MT thread still in 664 * the perimeter and at least 1 message on the internal queue. 665 */ 666 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 667 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 668 udp_count[1]++; 669 break; 670 671 case UDP_QUEUED_SQUEUE: 672 /* 673 * The switch has happened from MT to SQUEUE. So there can't 674 * any MT threads. Messages could still pile up on the internal 675 * queue until the transition is complete and we move to 676 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 677 * since the squeue could drain any time. 678 */ 679 ASSERT(udp->udp_reader_count == 0); 680 udp_count[2]++; 681 break; 682 683 case UDP_SQUEUE: 684 /* 685 * The transition is complete. Thre can't be any messages on 686 * the internal queue. The udp could be quiescent or the squeue 687 * could drain any time, so we can't assert on nonzero 688 * udp_squeue_count during entry. Nor can we assert that 689 * udp_reader_count is zero, since, a reader thread could have 690 * directly become writer in line by calling udp_become_writer 691 * without going through the queued states. 692 */ 693 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 694 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 695 udp_count[3]++; 696 break; 697 } 698 } 699 #endif 700 701 #define _UDP_ENTER(connp, mp, proc, tag) { \ 702 udp_t *_udp = (connp)->conn_udp; \ 703 \ 704 mutex_enter(&(connp)->conn_lock); \ 705 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 706 mutex_exit(&(connp)->conn_lock); \ 707 freemsg(mp); \ 708 } else { \ 709 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 710 \ 711 switch (_udp->udp_mode) { \ 712 case UDP_MT_HOT: \ 713 /* We can execute as reader right away. */ \ 714 UDP_READERS_INCREF(_udp); \ 715 mutex_exit(&(connp)->conn_lock); \ 716 (*(proc))(connp, mp, (connp)->conn_sqp); \ 717 break; \ 718 \ 719 case UDP_SQUEUE: \ 720 /* \ 721 * We are in squeue mode, send the \ 722 * packet to the squeue \ 723 */ \ 724 _udp->udp_squeue_count++; \ 725 CONN_INC_REF_LOCKED(connp); \ 726 mutex_exit(&(connp)->conn_lock); \ 727 squeue_enter((connp)->conn_sqp, mp, proc, \ 728 connp, tag); \ 729 break; \ 730 \ 731 case UDP_MT_QUEUED: \ 732 case UDP_QUEUED_SQUEUE: \ 733 /* \ 734 * Some messages may have been enqueued \ 735 * ahead of us. Enqueue the new message \ 736 * at the tail of the internal queue to \ 737 * preserve message ordering. \ 738 */ \ 739 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 740 mutex_exit(&(connp)->conn_lock); \ 741 break; \ 742 } \ 743 } \ 744 } 745 746 static void 747 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 748 { 749 _UDP_ENTER(connp, mp, proc, tag); 750 } 751 752 static void 753 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 754 { 755 udp_t *udp; 756 757 udp = connp->conn_udp; 758 759 mutex_enter(&connp->conn_lock); 760 761 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 762 763 switch (udp->udp_mode) { 764 case UDP_MT_HOT: 765 if (udp->udp_reader_count == 1) { 766 /* 767 * We are the only MT thread. Switch to squeue mode 768 * immediately. 769 */ 770 udp->udp_mode = UDP_SQUEUE; 771 udp->udp_squeue_count = 1; 772 CONN_INC_REF_LOCKED(connp); 773 mutex_exit(&connp->conn_lock); 774 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 775 return; 776 } 777 /* FALLTHRU */ 778 779 case UDP_MT_QUEUED: 780 /* Enqueue the packet internally in UDP */ 781 udp->udp_mode = UDP_MT_QUEUED; 782 UDP_ENQUEUE_MP(udp, mp, proc, tag); 783 mutex_exit(&connp->conn_lock); 784 return; 785 786 case UDP_SQUEUE: 787 case UDP_QUEUED_SQUEUE: 788 /* 789 * We are already exclusive. i.e. we are already 790 * writer. Simply call the desired function. 791 */ 792 udp->udp_squeue_count++; 793 mutex_exit(&connp->conn_lock); 794 (*proc)(connp, mp, connp->conn_sqp); 795 return; 796 } 797 } 798 799 /* 800 * Transition from MT mode to SQUEUE mode, when the last MT thread 801 * is exiting the UDP perimeter. Move all messages from the internal 802 * udp queue to the squeue. A better way would be to move all the 803 * messages in one shot, this needs more support from the squeue framework 804 */ 805 static void 806 udp_switch_to_squeue(udp_t *udp) 807 { 808 mblk_t *mp; 809 mblk_t *mp_next; 810 sqproc_t proc; 811 uint8_t tag; 812 conn_t *connp = udp->udp_connp; 813 814 ASSERT(MUTEX_HELD(&connp->conn_lock)); 815 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 816 while (udp->udp_mphead != NULL) { 817 mp = udp->udp_mphead; 818 udp->udp_mphead = NULL; 819 udp->udp_mptail = NULL; 820 udp->udp_mpcount = 0; 821 udp->udp_mode = UDP_QUEUED_SQUEUE; 822 mutex_exit(&connp->conn_lock); 823 /* 824 * It is best not to hold any locks across the calls 825 * to squeue functions. Since we drop the lock we 826 * need to go back and check the udp_mphead once again 827 * after the squeue_fill and hence the while loop at 828 * the top of this function 829 */ 830 for (; mp != NULL; mp = mp_next) { 831 mp_next = mp->b_next; 832 proc = (sqproc_t)mp->b_prev; 833 tag = (uint8_t)((uintptr_t)mp->b_queue); 834 mp->b_next = NULL; 835 mp->b_prev = NULL; 836 mp->b_queue = NULL; 837 CONN_INC_REF(connp); 838 udp->udp_squeue_count++; 839 squeue_fill(connp->conn_sqp, mp, proc, connp, 840 tag); 841 } 842 mutex_enter(&connp->conn_lock); 843 } 844 /* 845 * udp_squeue_count of zero implies that the squeue has drained 846 * even before we arrived here (i.e. after the squeue_fill above) 847 */ 848 udp->udp_mode = (udp->udp_squeue_count != 0) ? 849 UDP_SQUEUE : UDP_MT_HOT; 850 } 851 852 #define _UDP_EXIT(connp) { \ 853 udp_t *_udp = (connp)->conn_udp; \ 854 \ 855 mutex_enter(&(connp)->conn_lock); \ 856 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 857 \ 858 switch (_udp->udp_mode) { \ 859 case UDP_MT_HOT: \ 860 UDP_READERS_DECREF(_udp); \ 861 mutex_exit(&(connp)->conn_lock); \ 862 break; \ 863 \ 864 case UDP_SQUEUE: \ 865 UDP_SQUEUE_DECREF(_udp); \ 866 if (_udp->udp_squeue_count == 0) \ 867 _udp->udp_mode = UDP_MT_HOT; \ 868 mutex_exit(&(connp)->conn_lock); \ 869 break; \ 870 \ 871 case UDP_MT_QUEUED: \ 872 /* \ 873 * If this is the last MT thread, we need to \ 874 * switch to squeue mode \ 875 */ \ 876 UDP_READERS_DECREF(_udp); \ 877 if (_udp->udp_reader_count == 0) \ 878 udp_switch_to_squeue(_udp); \ 879 mutex_exit(&(connp)->conn_lock); \ 880 break; \ 881 \ 882 case UDP_QUEUED_SQUEUE: \ 883 UDP_SQUEUE_DECREF(_udp); \ 884 /* \ 885 * Even if the udp_squeue_count drops to zero, we \ 886 * don't want to change udp_mode to UDP_MT_HOT here. \ 887 * The thread in udp_switch_to_squeue will take care \ 888 * of the transition to UDP_MT_HOT, after emptying \ 889 * any more new messages that have been enqueued in \ 890 * udp_mphead. \ 891 */ \ 892 mutex_exit(&(connp)->conn_lock); \ 893 break; \ 894 } \ 895 } 896 897 static void 898 udp_exit(conn_t *connp) 899 { 900 _UDP_EXIT(connp); 901 } 902 903 /* 904 * Return the next anonymous port in the priviledged port range for 905 * bind checking. 906 */ 907 static in_port_t 908 udp_get_next_priv_port(void) 909 { 910 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 911 912 if (next_priv_port < udp_min_anonpriv_port) { 913 next_priv_port = IPPORT_RESERVED - 1; 914 } 915 return (next_priv_port--); 916 } 917 918 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 919 /* ARGSUSED */ 920 static int 921 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 922 { 923 udp_fanout_t *udpf; 924 int i; 925 zoneid_t zoneid; 926 conn_t *connp; 927 udp_t *udp; 928 929 connp = Q_TO_CONN(q); 930 udp = connp->conn_udp; 931 932 /* Refer to comments in udp_status_report(). */ 933 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 934 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 935 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 936 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 937 return (0); 938 } 939 } 940 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 941 /* The following may work even if we cannot get a large buf. */ 942 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 943 return (0); 944 } 945 946 (void) mi_mpprintf(mp, 947 "UDP " MI_COL_HDRPAD_STR 948 /* 12345678[89ABCDEF] */ 949 " zone lport src addr dest addr port state"); 950 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 951 952 zoneid = connp->conn_zoneid; 953 954 for (i = 0; i < udp_bind_fanout_size; i++) { 955 udpf = &udp_bind_fanout[i]; 956 mutex_enter(&udpf->uf_lock); 957 958 /* Print the hash index. */ 959 udp = udpf->uf_udp; 960 if (zoneid != GLOBAL_ZONEID) { 961 /* skip to first entry in this zone; might be none */ 962 while (udp != NULL && 963 udp->udp_connp->conn_zoneid != zoneid) 964 udp = udp->udp_bind_hash; 965 } 966 if (udp != NULL) { 967 uint_t print_len, buf_len; 968 969 buf_len = mp->b_cont->b_datap->db_lim - 970 mp->b_cont->b_wptr; 971 print_len = snprintf((char *)mp->b_cont->b_wptr, 972 buf_len, "%d\n", i); 973 if (print_len < buf_len) { 974 mp->b_cont->b_wptr += print_len; 975 } else { 976 mp->b_cont->b_wptr += buf_len; 977 } 978 for (; udp != NULL; udp = udp->udp_bind_hash) { 979 if (zoneid == GLOBAL_ZONEID || 980 zoneid == udp->udp_connp->conn_zoneid) 981 udp_report_item(mp->b_cont, udp); 982 } 983 } 984 mutex_exit(&udpf->uf_lock); 985 } 986 udp_last_ndd_get_info_time = ddi_get_lbolt(); 987 return (0); 988 } 989 990 /* 991 * Hash list removal routine for udp_t structures. 992 */ 993 static void 994 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 995 { 996 udp_t *udpnext; 997 kmutex_t *lockp; 998 999 if (udp->udp_ptpbhn == NULL) 1000 return; 1001 1002 /* 1003 * Extract the lock pointer in case there are concurrent 1004 * hash_remove's for this instance. 1005 */ 1006 ASSERT(udp->udp_port != 0); 1007 if (!caller_holds_lock) { 1008 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1009 ASSERT(lockp != NULL); 1010 mutex_enter(lockp); 1011 } 1012 if (udp->udp_ptpbhn != NULL) { 1013 udpnext = udp->udp_bind_hash; 1014 if (udpnext != NULL) { 1015 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1016 udp->udp_bind_hash = NULL; 1017 } 1018 *udp->udp_ptpbhn = udpnext; 1019 udp->udp_ptpbhn = NULL; 1020 } 1021 if (!caller_holds_lock) { 1022 mutex_exit(lockp); 1023 } 1024 } 1025 1026 static void 1027 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1028 { 1029 udp_t **udpp; 1030 udp_t *udpnext; 1031 1032 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1033 if (udp->udp_ptpbhn != NULL) { 1034 udp_bind_hash_remove(udp, B_TRUE); 1035 } 1036 udpp = &uf->uf_udp; 1037 udpnext = udpp[0]; 1038 if (udpnext != NULL) { 1039 /* 1040 * If the new udp bound to the INADDR_ANY address 1041 * and the first one in the list is not bound to 1042 * INADDR_ANY we skip all entries until we find the 1043 * first one bound to INADDR_ANY. 1044 * This makes sure that applications binding to a 1045 * specific address get preference over those binding to 1046 * INADDR_ANY. 1047 */ 1048 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1049 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1050 while ((udpnext = udpp[0]) != NULL && 1051 !V6_OR_V4_INADDR_ANY( 1052 udpnext->udp_bound_v6src)) { 1053 udpp = &(udpnext->udp_bind_hash); 1054 } 1055 if (udpnext != NULL) 1056 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1057 } else { 1058 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1059 } 1060 } 1061 udp->udp_bind_hash = udpnext; 1062 udp->udp_ptpbhn = udpp; 1063 udpp[0] = udp; 1064 } 1065 1066 /* 1067 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1068 * passed to udp_wput. 1069 * It associates a port number and local address with the stream. 1070 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1071 * protocol type (IPPROTO_UDP) placed in the message following the address. 1072 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1073 * (Called as writer.) 1074 * 1075 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1076 * without setting SO_REUSEADDR. This is needed so that they 1077 * can be viewed as two independent transport protocols. 1078 * However, anonymouns ports are allocated from the same range to avoid 1079 * duplicating the udp_g_next_port_to_try. 1080 */ 1081 static void 1082 udp_bind(queue_t *q, mblk_t *mp) 1083 { 1084 sin_t *sin; 1085 sin6_t *sin6; 1086 mblk_t *mp1; 1087 in_port_t port; /* Host byte order */ 1088 in_port_t requested_port; /* Host byte order */ 1089 struct T_bind_req *tbr; 1090 int count; 1091 in6_addr_t v6src; 1092 boolean_t bind_to_req_port_only; 1093 int loopmax; 1094 udp_fanout_t *udpf; 1095 in_port_t lport; /* Network byte order */ 1096 zoneid_t zoneid; 1097 conn_t *connp; 1098 udp_t *udp; 1099 1100 connp = Q_TO_CONN(q); 1101 udp = connp->conn_udp; 1102 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1103 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1104 "udp_bind: bad req, len %u", 1105 (uint_t)(mp->b_wptr - mp->b_rptr)); 1106 udp_err_ack(q, mp, TPROTO, 0); 1107 return; 1108 } 1109 1110 if (udp->udp_state != TS_UNBND) { 1111 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1112 "udp_bind: bad state, %u", udp->udp_state); 1113 udp_err_ack(q, mp, TOUTSTATE, 0); 1114 return; 1115 } 1116 /* 1117 * Reallocate the message to make sure we have enough room for an 1118 * address and the protocol type. 1119 */ 1120 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1121 if (!mp1) { 1122 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1123 return; 1124 } 1125 1126 mp = mp1; 1127 tbr = (struct T_bind_req *)mp->b_rptr; 1128 switch (tbr->ADDR_length) { 1129 case 0: /* Request for a generic port */ 1130 tbr->ADDR_offset = sizeof (struct T_bind_req); 1131 if (udp->udp_family == AF_INET) { 1132 tbr->ADDR_length = sizeof (sin_t); 1133 sin = (sin_t *)&tbr[1]; 1134 *sin = sin_null; 1135 sin->sin_family = AF_INET; 1136 mp->b_wptr = (uchar_t *)&sin[1]; 1137 } else { 1138 ASSERT(udp->udp_family == AF_INET6); 1139 tbr->ADDR_length = sizeof (sin6_t); 1140 sin6 = (sin6_t *)&tbr[1]; 1141 *sin6 = sin6_null; 1142 sin6->sin6_family = AF_INET6; 1143 mp->b_wptr = (uchar_t *)&sin6[1]; 1144 } 1145 port = 0; 1146 break; 1147 1148 case sizeof (sin_t): /* Complete IPv4 address */ 1149 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1150 sizeof (sin_t)); 1151 if (sin == NULL || !OK_32PTR((char *)sin)) { 1152 udp_err_ack(q, mp, TSYSERR, EINVAL); 1153 return; 1154 } 1155 if (udp->udp_family != AF_INET || 1156 sin->sin_family != AF_INET) { 1157 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1158 return; 1159 } 1160 port = ntohs(sin->sin_port); 1161 break; 1162 1163 case sizeof (sin6_t): /* complete IPv6 address */ 1164 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1165 sizeof (sin6_t)); 1166 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1167 udp_err_ack(q, mp, TSYSERR, EINVAL); 1168 return; 1169 } 1170 if (udp->udp_family != AF_INET6 || 1171 sin6->sin6_family != AF_INET6) { 1172 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1173 return; 1174 } 1175 port = ntohs(sin6->sin6_port); 1176 break; 1177 1178 default: /* Invalid request */ 1179 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1180 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1181 udp_err_ack(q, mp, TBADADDR, 0); 1182 return; 1183 } 1184 1185 requested_port = port; 1186 1187 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1188 bind_to_req_port_only = B_FALSE; 1189 else /* T_BIND_REQ and requested_port != 0 */ 1190 bind_to_req_port_only = B_TRUE; 1191 1192 if (requested_port == 0) { 1193 /* 1194 * If the application passed in zero for the port number, it 1195 * doesn't care which port number we bind to. Get one in the 1196 * valid range. 1197 */ 1198 if (udp->udp_anon_priv_bind) { 1199 port = udp_get_next_priv_port(); 1200 } else { 1201 port = udp_update_next_port(udp_g_next_port_to_try, 1202 B_TRUE); 1203 } 1204 } else { 1205 /* 1206 * If the port is in the well-known privileged range, 1207 * make sure the caller was privileged. 1208 */ 1209 int i; 1210 boolean_t priv = B_FALSE; 1211 1212 if (port < udp_smallest_nonpriv_port) { 1213 priv = B_TRUE; 1214 } else { 1215 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1216 if (port == udp_g_epriv_ports[i]) { 1217 priv = B_TRUE; 1218 break; 1219 } 1220 } 1221 } 1222 1223 if (priv) { 1224 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1225 1226 if (secpolicy_net_privaddr(cr, port) != 0) { 1227 udp_err_ack(q, mp, TACCES, 0); 1228 return; 1229 } 1230 } 1231 } 1232 1233 /* 1234 * Copy the source address into our udp structure. This address 1235 * may still be zero; if so, IP will fill in the correct address 1236 * each time an outbound packet is passed to it. 1237 */ 1238 if (udp->udp_family == AF_INET) { 1239 ASSERT(sin != NULL); 1240 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1241 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1242 udp->udp_ip_snd_options_len; 1243 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1244 } else { 1245 ASSERT(sin6 != NULL); 1246 v6src = sin6->sin6_addr; 1247 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1248 udp->udp_ipversion = IPV4_VERSION; 1249 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1250 UDPH_SIZE + udp->udp_ip_snd_options_len; 1251 } else { 1252 udp->udp_ipversion = IPV6_VERSION; 1253 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1254 } 1255 } 1256 1257 /* 1258 * If udp_reuseaddr is not set, then we have to make sure that 1259 * the IP address and port number the application requested 1260 * (or we selected for the application) is not being used by 1261 * another stream. If another stream is already using the 1262 * requested IP address and port, the behavior depends on 1263 * "bind_to_req_port_only". If set the bind fails; otherwise we 1264 * search for any an unused port to bind to the the stream. 1265 * 1266 * As per the BSD semantics, as modified by the Deering multicast 1267 * changes, if udp_reuseaddr is set, then we allow multiple binds 1268 * to the same port independent of the local IP address. 1269 * 1270 * This is slightly different than in SunOS 4.X which did not 1271 * support IP multicast. Note that the change implemented by the 1272 * Deering multicast code effects all binds - not only binding 1273 * to IP multicast addresses. 1274 * 1275 * Note that when binding to port zero we ignore SO_REUSEADDR in 1276 * order to guarantee a unique port. 1277 */ 1278 1279 count = 0; 1280 if (udp->udp_anon_priv_bind) { 1281 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1282 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1283 } else { 1284 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1285 } 1286 1287 zoneid = connp->conn_zoneid; 1288 for (;;) { 1289 udp_t *udp1; 1290 boolean_t is_inaddr_any; 1291 boolean_t found_exclbind = B_FALSE; 1292 1293 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1294 /* 1295 * Walk through the list of udp streams bound to 1296 * requested port with the same IP address. 1297 */ 1298 lport = htons(port); 1299 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1300 mutex_enter(&udpf->uf_lock); 1301 for (udp1 = udpf->uf_udp; udp1 != NULL; 1302 udp1 = udp1->udp_bind_hash) { 1303 if (lport != udp1->udp_port || 1304 zoneid != udp1->udp_connp->conn_zoneid) 1305 continue; 1306 1307 /* 1308 * If UDP_EXCLBIND is set for either the bound or 1309 * binding endpoint, the semantics of bind 1310 * is changed according to the following chart. 1311 * 1312 * spec = specified address (v4 or v6) 1313 * unspec = unspecified address (v4 or v6) 1314 * A = specified addresses are different for endpoints 1315 * 1316 * bound bind to allowed? 1317 * ------------------------------------- 1318 * unspec unspec no 1319 * unspec spec no 1320 * spec unspec no 1321 * spec spec yes if A 1322 */ 1323 if (udp1->udp_exclbind || udp->udp_exclbind) { 1324 if (V6_OR_V4_INADDR_ANY( 1325 udp1->udp_bound_v6src) || 1326 is_inaddr_any || 1327 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1328 &v6src)) { 1329 found_exclbind = B_TRUE; 1330 break; 1331 } 1332 continue; 1333 } 1334 1335 /* 1336 * Check ipversion to allow IPv4 and IPv6 sockets to 1337 * have disjoint port number spaces. 1338 */ 1339 if (udp->udp_ipversion != udp1->udp_ipversion) 1340 continue; 1341 1342 /* 1343 * No difference depending on SO_REUSEADDR. 1344 * 1345 * If existing port is bound to a 1346 * non-wildcard IP address and 1347 * the requesting stream is bound to 1348 * a distinct different IP addresses 1349 * (non-wildcard, also), keep going. 1350 */ 1351 if (!is_inaddr_any && 1352 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1353 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1354 &v6src)) { 1355 continue; 1356 } 1357 break; 1358 } 1359 1360 if (!found_exclbind && 1361 (udp->udp_reuseaddr && requested_port != 0)) { 1362 break; 1363 } 1364 1365 if (udp1 == NULL) { 1366 /* 1367 * No other stream has this IP address 1368 * and port number. We can use it. 1369 */ 1370 break; 1371 } 1372 mutex_exit(&udpf->uf_lock); 1373 if (bind_to_req_port_only) { 1374 /* 1375 * We get here only when requested port 1376 * is bound (and only first of the for() 1377 * loop iteration). 1378 * 1379 * The semantics of this bind request 1380 * require it to fail so we return from 1381 * the routine (and exit the loop). 1382 * 1383 */ 1384 udp_err_ack(q, mp, TADDRBUSY, 0); 1385 return; 1386 } 1387 1388 if (udp->udp_anon_priv_bind) { 1389 port = udp_get_next_priv_port(); 1390 } else { 1391 if ((count == 0) && (requested_port != 0)) { 1392 /* 1393 * If the application wants us to find 1394 * a port, get one to start with. Set 1395 * requested_port to 0, so that we will 1396 * update udp_g_next_port_to_try below. 1397 */ 1398 port = udp_update_next_port( 1399 udp_g_next_port_to_try, B_TRUE); 1400 requested_port = 0; 1401 } else { 1402 port = udp_update_next_port(port + 1, B_FALSE); 1403 } 1404 } 1405 1406 if (++count >= loopmax) { 1407 /* 1408 * We've tried every possible port number and 1409 * there are none available, so send an error 1410 * to the user. 1411 */ 1412 udp_err_ack(q, mp, TNOADDR, 0); 1413 return; 1414 } 1415 } 1416 1417 /* 1418 * Copy the source address into our udp structure. This address 1419 * may still be zero; if so, ip will fill in the correct address 1420 * each time an outbound packet is passed to it. 1421 * If we are binding to a broadcast or multicast address udp_rput 1422 * will clear the source address when it receives the T_BIND_ACK. 1423 */ 1424 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1425 udp->udp_port = lport; 1426 /* 1427 * Now reset the the next anonymous port if the application requested 1428 * an anonymous port, or we handed out the next anonymous port. 1429 */ 1430 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1431 udp_g_next_port_to_try = port + 1; 1432 } 1433 1434 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1435 if (udp->udp_family == AF_INET) { 1436 sin->sin_port = udp->udp_port; 1437 } else { 1438 int error; 1439 1440 sin6->sin6_port = udp->udp_port; 1441 /* Rebuild the header template */ 1442 error = udp_build_hdrs(q, udp); 1443 if (error != 0) { 1444 mutex_exit(&udpf->uf_lock); 1445 udp_err_ack(q, mp, TSYSERR, error); 1446 return; 1447 } 1448 } 1449 udp->udp_state = TS_IDLE; 1450 udp_bind_hash_insert(udpf, udp); 1451 mutex_exit(&udpf->uf_lock); 1452 1453 if (cl_inet_bind) { 1454 /* 1455 * Running in cluster mode - register bind information 1456 */ 1457 if (udp->udp_ipversion == IPV4_VERSION) { 1458 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1459 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1460 (in_port_t)udp->udp_port); 1461 } else { 1462 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1463 (uint8_t *)&(udp->udp_v6src), 1464 (in_port_t)udp->udp_port); 1465 } 1466 1467 } 1468 1469 /* Pass the protocol number in the message following the address. */ 1470 *mp->b_wptr++ = IPPROTO_UDP; 1471 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1472 /* 1473 * Append a request for an IRE if udp_v6src not 1474 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1475 */ 1476 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1477 if (!mp->b_cont) { 1478 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1479 return; 1480 } 1481 mp->b_cont->b_wptr += sizeof (ire_t); 1482 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1483 } 1484 if (udp->udp_family == AF_INET6) 1485 mp = ip_bind_v6(q, mp, connp, NULL); 1486 else 1487 mp = ip_bind_v4(q, mp, connp); 1488 1489 if (mp != NULL) 1490 udp_rput_other(_RD(q), mp); 1491 else 1492 CONN_INC_REF(connp); 1493 } 1494 1495 1496 void 1497 udp_resume_bind(conn_t *connp, mblk_t *mp) 1498 { 1499 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1500 } 1501 1502 /* 1503 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1504 */ 1505 /* ARGSUSED */ 1506 static void 1507 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1508 { 1509 conn_t *connp = arg; 1510 1511 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1512 1513 udp_rput_other(connp->conn_rq, mp); 1514 1515 CONN_OPER_PENDING_DONE(connp); 1516 udp_exit(connp); 1517 } 1518 1519 /* 1520 * This routine handles each T_CONN_REQ message passed to udp. It 1521 * associates a default destination address with the stream. 1522 * 1523 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1524 * T_BIND_REQ - specifying local and remote address/port 1525 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1526 * T_OK_ACK - for the T_CONN_REQ 1527 * T_CONN_CON - to keep the TPI user happy 1528 * 1529 * The connect completes in udp_rput. 1530 * When a T_BIND_ACK is received information is extracted from the IRE 1531 * and the two appended messages are sent to the TPI user. 1532 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1533 * it to an error ack for the appropriate primitive. 1534 */ 1535 static void 1536 udp_connect(queue_t *q, mblk_t *mp) 1537 { 1538 sin6_t *sin6; 1539 sin_t *sin; 1540 struct T_conn_req *tcr; 1541 in6_addr_t v6dst; 1542 ipaddr_t v4dst; 1543 uint16_t dstport; 1544 uint32_t flowinfo; 1545 mblk_t *mp1, *mp2; 1546 udp_fanout_t *udpf; 1547 udp_t *udp, *udp1; 1548 1549 udp = Q_TO_UDP(q); 1550 1551 tcr = (struct T_conn_req *)mp->b_rptr; 1552 1553 /* A bit of sanity checking */ 1554 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1555 udp_err_ack(q, mp, TPROTO, 0); 1556 return; 1557 } 1558 /* 1559 * This UDP must have bound to a port already before doing 1560 * a connect. 1561 */ 1562 if (udp->udp_state == TS_UNBND) { 1563 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1564 "udp_connect: bad state, %u", udp->udp_state); 1565 udp_err_ack(q, mp, TOUTSTATE, 0); 1566 return; 1567 } 1568 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1569 1570 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1571 1572 if (udp->udp_state == TS_DATA_XFER) { 1573 /* Already connected - clear out state */ 1574 mutex_enter(&udpf->uf_lock); 1575 udp->udp_v6src = udp->udp_bound_v6src; 1576 udp->udp_state = TS_IDLE; 1577 mutex_exit(&udpf->uf_lock); 1578 } 1579 1580 if (tcr->OPT_length != 0) { 1581 udp_err_ack(q, mp, TBADOPT, 0); 1582 return; 1583 } 1584 1585 /* 1586 * Determine packet type based on type of address passed in 1587 * the request should contain an IPv4 or IPv6 address. 1588 * Make sure that address family matches the type of 1589 * family of the the address passed down 1590 */ 1591 switch (tcr->DEST_length) { 1592 default: 1593 udp_err_ack(q, mp, TBADADDR, 0); 1594 return; 1595 1596 case sizeof (sin_t): 1597 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1598 sizeof (sin_t)); 1599 if (sin == NULL || !OK_32PTR((char *)sin)) { 1600 udp_err_ack(q, mp, TSYSERR, EINVAL); 1601 return; 1602 } 1603 if (udp->udp_family != AF_INET || 1604 sin->sin_family != AF_INET) { 1605 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1606 return; 1607 } 1608 v4dst = sin->sin_addr.s_addr; 1609 dstport = sin->sin_port; 1610 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1611 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1612 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1613 udp->udp_ip_snd_options_len; 1614 break; 1615 1616 case sizeof (sin6_t): 1617 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1618 sizeof (sin6_t)); 1619 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1620 udp_err_ack(q, mp, TSYSERR, EINVAL); 1621 return; 1622 } 1623 if (udp->udp_family != AF_INET6 || 1624 sin6->sin6_family != AF_INET6) { 1625 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1626 return; 1627 } 1628 v6dst = sin6->sin6_addr; 1629 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1630 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1631 udp->udp_ipversion = IPV4_VERSION; 1632 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1633 UDPH_SIZE + udp->udp_ip_snd_options_len; 1634 flowinfo = 0; 1635 } else { 1636 udp->udp_ipversion = IPV6_VERSION; 1637 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1638 flowinfo = sin6->sin6_flowinfo; 1639 } 1640 dstport = sin6->sin6_port; 1641 break; 1642 } 1643 if (dstport == 0) { 1644 udp_err_ack(q, mp, TBADADDR, 0); 1645 return; 1646 } 1647 1648 /* 1649 * Create a default IP header with no IP options. 1650 */ 1651 udp->udp_dstport = dstport; 1652 if (udp->udp_ipversion == IPV4_VERSION) { 1653 /* 1654 * Interpret a zero destination to mean loopback. 1655 * Update the T_CONN_REQ (sin/sin6) since it is used to 1656 * generate the T_CONN_CON. 1657 */ 1658 if (v4dst == INADDR_ANY) { 1659 v4dst = htonl(INADDR_LOOPBACK); 1660 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1661 if (udp->udp_family == AF_INET) { 1662 sin->sin_addr.s_addr = v4dst; 1663 } else { 1664 sin6->sin6_addr = v6dst; 1665 } 1666 } 1667 udp->udp_v6dst = v6dst; 1668 udp->udp_flowinfo = 0; 1669 1670 /* 1671 * If the destination address is multicast and 1672 * an outgoing multicast interface has been set, 1673 * use the address of that interface as our 1674 * source address if no source address has been set. 1675 */ 1676 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1677 CLASSD(v4dst) && 1678 udp->udp_multicast_if_addr != INADDR_ANY) { 1679 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1680 &udp->udp_v6src); 1681 } 1682 } else { 1683 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1684 /* 1685 * Interpret a zero destination to mean loopback. 1686 * Update the T_CONN_REQ (sin/sin6) since it is used to 1687 * generate the T_CONN_CON. 1688 */ 1689 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1690 v6dst = ipv6_loopback; 1691 sin6->sin6_addr = v6dst; 1692 } 1693 udp->udp_v6dst = v6dst; 1694 udp->udp_flowinfo = flowinfo; 1695 /* 1696 * If the destination address is multicast and 1697 * an outgoing multicast interface has been set, 1698 * then the ip bind logic will pick the correct source 1699 * address (i.e. matching the outgoing multicast interface). 1700 */ 1701 } 1702 1703 /* 1704 * Verify that the src/port/dst/port is unique for all 1705 * connections in TS_DATA_XFER 1706 */ 1707 mutex_enter(&udpf->uf_lock); 1708 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1709 if (udp1->udp_state != TS_DATA_XFER) 1710 continue; 1711 if (udp->udp_port != udp1->udp_port || 1712 udp->udp_ipversion != udp1->udp_ipversion || 1713 dstport != udp1->udp_dstport || 1714 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1715 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1716 continue; 1717 mutex_exit(&udpf->uf_lock); 1718 udp_err_ack(q, mp, TBADADDR, 0); 1719 return; 1720 } 1721 udp->udp_state = TS_DATA_XFER; 1722 mutex_exit(&udpf->uf_lock); 1723 1724 /* 1725 * Send down bind to IP to verify that there is a route 1726 * and to determine the source address. 1727 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1728 */ 1729 if (udp->udp_family == AF_INET) 1730 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1731 else 1732 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1733 if (mp1 == NULL) { 1734 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1735 bind_failed: 1736 mutex_enter(&udpf->uf_lock); 1737 udp->udp_state = TS_IDLE; 1738 mutex_exit(&udpf->uf_lock); 1739 return; 1740 } 1741 1742 /* 1743 * We also have to send a connection confirmation to 1744 * keep TLI happy. Prepare it for udp_rput. 1745 */ 1746 if (udp->udp_family == AF_INET) 1747 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1748 sizeof (*sin), NULL, 0); 1749 else 1750 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1751 sizeof (*sin6), NULL, 0); 1752 if (mp2 == NULL) { 1753 freemsg(mp1); 1754 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1755 goto bind_failed; 1756 } 1757 1758 mp = mi_tpi_ok_ack_alloc(mp); 1759 if (mp == NULL) { 1760 /* Unable to reuse the T_CONN_REQ for the ack. */ 1761 freemsg(mp2); 1762 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1763 goto bind_failed; 1764 } 1765 1766 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1767 linkb(mp1, mp); 1768 linkb(mp1, mp2); 1769 1770 if (udp->udp_family == AF_INET) 1771 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1772 else 1773 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1774 1775 if (mp1 != NULL) 1776 udp_rput_other(_RD(q), mp1); 1777 else 1778 CONN_INC_REF(udp->udp_connp); 1779 } 1780 1781 static int 1782 udp_close(queue_t *q) 1783 { 1784 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1785 udp_t *udp; 1786 queue_t *ip_rq = RD(UDP_WR(q)); 1787 1788 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1789 udp = connp->conn_udp; 1790 1791 ip_quiesce_conn(connp); 1792 /* 1793 * Disable read-side synchronous stream 1794 * interface and drain any queued data. 1795 */ 1796 udp_rcv_drain(q, udp, B_TRUE); 1797 ASSERT(!udp->udp_direct_sockfs); 1798 1799 qprocsoff(q); 1800 1801 /* restore IP module's high and low water marks to default values */ 1802 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1803 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1804 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1805 1806 ASSERT(udp->udp_rcv_cnt == 0); 1807 ASSERT(udp->udp_rcv_msgcnt == 0); 1808 ASSERT(udp->udp_rcv_list_head == NULL); 1809 ASSERT(udp->udp_rcv_list_tail == NULL); 1810 1811 /* connp is now single threaded. */ 1812 udp_close_free(connp); 1813 /* 1814 * Restore connp as an IP endpoint. We don't need 1815 * any locks since we are now single threaded 1816 */ 1817 connp->conn_flags &= ~IPCL_UDP; 1818 connp->conn_state_flags &= 1819 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1820 return (0); 1821 } 1822 1823 /* 1824 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1825 */ 1826 void 1827 udp_quiesce_conn(conn_t *connp) 1828 { 1829 udp_t *udp = connp->conn_udp; 1830 1831 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1832 /* 1833 * Running in cluster mode - register unbind information 1834 */ 1835 if (udp->udp_ipversion == IPV4_VERSION) { 1836 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1837 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1838 (in_port_t)udp->udp_port); 1839 } else { 1840 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1841 (uint8_t *)(&(udp->udp_v6src)), 1842 (in_port_t)udp->udp_port); 1843 } 1844 } 1845 1846 udp_bind_hash_remove(udp, B_FALSE); 1847 1848 mutex_enter(&connp->conn_lock); 1849 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1850 udp->udp_mode != UDP_MT_HOT) { 1851 cv_wait(&connp->conn_cv, &connp->conn_lock); 1852 } 1853 mutex_exit(&connp->conn_lock); 1854 } 1855 1856 void 1857 udp_close_free(conn_t *connp) 1858 { 1859 udp_t *udp = connp->conn_udp; 1860 1861 /* If there are any options associated with the stream, free them. */ 1862 if (udp->udp_ip_snd_options) { 1863 mi_free((char *)udp->udp_ip_snd_options); 1864 udp->udp_ip_snd_options = NULL; 1865 } 1866 1867 if (udp->udp_ip_rcv_options) { 1868 mi_free((char *)udp->udp_ip_rcv_options); 1869 udp->udp_ip_rcv_options = NULL; 1870 } 1871 1872 /* Free memory associated with sticky options */ 1873 if (udp->udp_sticky_hdrs_len != 0) { 1874 kmem_free(udp->udp_sticky_hdrs, 1875 udp->udp_sticky_hdrs_len); 1876 udp->udp_sticky_hdrs = NULL; 1877 udp->udp_sticky_hdrs_len = 0; 1878 } 1879 1880 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1881 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1882 udp->udp_sticky_ipp.ipp_hopoptslen); 1883 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1884 } 1885 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1886 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1887 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1888 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1889 } 1890 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1891 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1892 udp->udp_sticky_ipp.ipp_rthdrlen); 1893 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1894 } 1895 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1896 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1897 udp->udp_sticky_ipp.ipp_dstoptslen); 1898 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1899 } 1900 udp->udp_sticky_ipp.ipp_fields &= 1901 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1902 1903 udp->udp_connp = NULL; 1904 connp->conn_udp = NULL; 1905 kmem_cache_free(udp_cache, udp); 1906 } 1907 1908 /* 1909 * This routine handles each T_DISCON_REQ message passed to udp 1910 * as an indicating that UDP is no longer connected. This results 1911 * in sending a T_BIND_REQ to IP to restore the binding to just 1912 * the local address/port. 1913 * 1914 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1915 * T_BIND_REQ - specifying just the local address/port 1916 * T_OK_ACK - for the T_DISCON_REQ 1917 * 1918 * The disconnect completes in udp_rput. 1919 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1920 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1921 * it to an error ack for the appropriate primitive. 1922 */ 1923 static void 1924 udp_disconnect(queue_t *q, mblk_t *mp) 1925 { 1926 udp_t *udp = Q_TO_UDP(q); 1927 mblk_t *mp1; 1928 udp_fanout_t *udpf; 1929 1930 if (udp->udp_state != TS_DATA_XFER) { 1931 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1932 "udp_disconnect: bad state, %u", udp->udp_state); 1933 udp_err_ack(q, mp, TOUTSTATE, 0); 1934 return; 1935 } 1936 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1937 mutex_enter(&udpf->uf_lock); 1938 udp->udp_v6src = udp->udp_bound_v6src; 1939 udp->udp_state = TS_IDLE; 1940 mutex_exit(&udpf->uf_lock); 1941 1942 /* 1943 * Send down bind to IP to remove the full binding and revert 1944 * to the local address binding. 1945 */ 1946 if (udp->udp_family == AF_INET) 1947 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1948 else 1949 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1950 if (mp1 == NULL) { 1951 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1952 return; 1953 } 1954 mp = mi_tpi_ok_ack_alloc(mp); 1955 if (mp == NULL) { 1956 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1957 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1958 return; 1959 } 1960 1961 if (udp->udp_family == AF_INET6) { 1962 int error; 1963 1964 /* Rebuild the header template */ 1965 error = udp_build_hdrs(q, udp); 1966 if (error != 0) { 1967 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1968 freemsg(mp1); 1969 return; 1970 } 1971 } 1972 mutex_enter(&udpf->uf_lock); 1973 udp->udp_discon_pending = 1; 1974 mutex_exit(&udpf->uf_lock); 1975 1976 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1977 linkb(mp1, mp); 1978 1979 if (udp->udp_family == AF_INET6) 1980 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1981 else 1982 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1983 1984 if (mp1 != NULL) 1985 udp_rput_other(_RD(q), mp1); 1986 else 1987 CONN_INC_REF(udp->udp_connp); 1988 } 1989 1990 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1991 static void 1992 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1993 { 1994 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1995 putnext(UDP_RD(q), mp); 1996 } 1997 1998 /* Shorthand to generate and send TPI error acks to our client */ 1999 static void 2000 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2001 int sys_error) 2002 { 2003 struct T_error_ack *teackp; 2004 2005 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2006 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2007 teackp = (struct T_error_ack *)mp->b_rptr; 2008 teackp->ERROR_prim = primitive; 2009 teackp->TLI_error = t_error; 2010 teackp->UNIX_error = sys_error; 2011 putnext(UDP_RD(q), mp); 2012 } 2013 } 2014 2015 /*ARGSUSED*/ 2016 static int 2017 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2018 { 2019 int i; 2020 2021 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2022 if (udp_g_epriv_ports[i] != 0) 2023 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2024 } 2025 return (0); 2026 } 2027 2028 /* ARGSUSED */ 2029 static int 2030 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2031 cred_t *cr) 2032 { 2033 long new_value; 2034 int i; 2035 2036 /* 2037 * Fail the request if the new value does not lie within the 2038 * port number limits. 2039 */ 2040 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2041 new_value <= 0 || new_value >= 65536) { 2042 return (EINVAL); 2043 } 2044 2045 /* Check if the value is already in the list */ 2046 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2047 if (new_value == udp_g_epriv_ports[i]) { 2048 return (EEXIST); 2049 } 2050 } 2051 /* Find an empty slot */ 2052 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2053 if (udp_g_epriv_ports[i] == 0) 2054 break; 2055 } 2056 if (i == udp_g_num_epriv_ports) { 2057 return (EOVERFLOW); 2058 } 2059 2060 /* Set the new value */ 2061 udp_g_epriv_ports[i] = (in_port_t)new_value; 2062 return (0); 2063 } 2064 2065 /* ARGSUSED */ 2066 static int 2067 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2068 cred_t *cr) 2069 { 2070 long new_value; 2071 int i; 2072 2073 /* 2074 * Fail the request if the new value does not lie within the 2075 * port number limits. 2076 */ 2077 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2078 new_value <= 0 || new_value >= 65536) { 2079 return (EINVAL); 2080 } 2081 2082 /* Check that the value is already in the list */ 2083 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2084 if (udp_g_epriv_ports[i] == new_value) 2085 break; 2086 } 2087 if (i == udp_g_num_epriv_ports) { 2088 return (ESRCH); 2089 } 2090 2091 /* Clear the value */ 2092 udp_g_epriv_ports[i] = 0; 2093 return (0); 2094 } 2095 2096 /* At minimum we need 4 bytes of UDP header */ 2097 #define ICMP_MIN_UDP_HDR 4 2098 2099 /* 2100 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2101 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2102 * Assumes that IP has pulled up everything up to and including the ICMP header. 2103 * An M_CTL could potentially come here from some other module (i.e. if UDP 2104 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2105 * does not have enough ICMP information , following STREAMS conventions, 2106 * we send it upstream assuming it is an M_CTL we don't understand. 2107 */ 2108 static void 2109 udp_icmp_error(queue_t *q, mblk_t *mp) 2110 { 2111 icmph_t *icmph; 2112 ipha_t *ipha; 2113 int iph_hdr_length; 2114 udpha_t *udpha; 2115 sin_t sin; 2116 sin6_t sin6; 2117 mblk_t *mp1; 2118 int error = 0; 2119 size_t mp_size = MBLKL(mp); 2120 udp_t *udp = Q_TO_UDP(q); 2121 2122 /* 2123 * Assume IP provides aligned packets - otherwise toss 2124 */ 2125 if (!OK_32PTR(mp->b_rptr)) { 2126 freemsg(mp); 2127 return; 2128 } 2129 2130 /* 2131 * Verify that we have a complete IP header and the application has 2132 * asked for errors. If not, send it upstream. 2133 */ 2134 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2135 noticmpv4: 2136 putnext(UDP_RD(q), mp); 2137 return; 2138 } 2139 2140 ipha = (ipha_t *)mp->b_rptr; 2141 /* 2142 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2143 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2144 */ 2145 switch (IPH_HDR_VERSION(ipha)) { 2146 case IPV6_VERSION: 2147 udp_icmp_error_ipv6(q, mp); 2148 return; 2149 case IPV4_VERSION: 2150 break; 2151 default: 2152 goto noticmpv4; 2153 } 2154 2155 /* Skip past the outer IP and ICMP headers */ 2156 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2157 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2158 /* 2159 * If we don't have the correct outer IP header length or if the ULP 2160 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2161 * send the packet upstream. 2162 */ 2163 if (iph_hdr_length < sizeof (ipha_t) || 2164 ipha->ipha_protocol != IPPROTO_ICMP || 2165 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2166 goto noticmpv4; 2167 } 2168 ipha = (ipha_t *)&icmph[1]; 2169 2170 /* Skip past the inner IP and find the ULP header */ 2171 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2172 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2173 /* 2174 * If we don't have the correct inner IP header length or if the ULP 2175 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2176 * bytes of UDP header, send it upstream. 2177 */ 2178 if (iph_hdr_length < sizeof (ipha_t) || 2179 ipha->ipha_protocol != IPPROTO_UDP || 2180 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2181 goto noticmpv4; 2182 } 2183 2184 switch (icmph->icmph_type) { 2185 case ICMP_DEST_UNREACHABLE: 2186 switch (icmph->icmph_code) { 2187 case ICMP_FRAGMENTATION_NEEDED: 2188 /* 2189 * IP has already adjusted the path MTU. 2190 * XXX Somehow pass MTU indication to application? 2191 */ 2192 break; 2193 case ICMP_PORT_UNREACHABLE: 2194 case ICMP_PROTOCOL_UNREACHABLE: 2195 error = ECONNREFUSED; 2196 break; 2197 default: 2198 /* Transient errors */ 2199 break; 2200 } 2201 break; 2202 default: 2203 /* Transient errors */ 2204 break; 2205 } 2206 if (error == 0) { 2207 freemsg(mp); 2208 return; 2209 } 2210 2211 switch (udp->udp_family) { 2212 case AF_INET: 2213 sin = sin_null; 2214 sin.sin_family = AF_INET; 2215 sin.sin_addr.s_addr = ipha->ipha_dst; 2216 sin.sin_port = udpha->uha_dst_port; 2217 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2218 error); 2219 break; 2220 case AF_INET6: 2221 sin6 = sin6_null; 2222 sin6.sin6_family = AF_INET6; 2223 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2224 sin6.sin6_port = udpha->uha_dst_port; 2225 2226 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2227 NULL, 0, error); 2228 break; 2229 } 2230 if (mp1) 2231 putnext(UDP_RD(q), mp1); 2232 freemsg(mp); 2233 } 2234 2235 /* 2236 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2237 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2238 * Assumes that IP has pulled up all the extension headers as well as the 2239 * ICMPv6 header. 2240 * An M_CTL could potentially come here from some other module (i.e. if UDP 2241 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2242 * does not have enough ICMP information , following STREAMS conventions, 2243 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2244 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2245 * field (when cast to ipha_t in udp_icmp_error). 2246 */ 2247 static void 2248 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2249 { 2250 icmp6_t *icmp6; 2251 ip6_t *ip6h, *outer_ip6h; 2252 uint16_t hdr_length; 2253 uint8_t *nexthdrp; 2254 udpha_t *udpha; 2255 sin6_t sin6; 2256 mblk_t *mp1; 2257 int error = 0; 2258 size_t mp_size = MBLKL(mp); 2259 udp_t *udp = Q_TO_UDP(q); 2260 2261 /* 2262 * Verify that we have a complete IP header. If not, send it upstream. 2263 */ 2264 if (mp_size < sizeof (ip6_t)) { 2265 noticmpv6: 2266 putnext(UDP_RD(q), mp); 2267 return; 2268 } 2269 2270 outer_ip6h = (ip6_t *)mp->b_rptr; 2271 /* 2272 * Verify this is an ICMPV6 packet, else send it upstream 2273 */ 2274 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2275 hdr_length = IPV6_HDR_LEN; 2276 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2277 &nexthdrp) || 2278 *nexthdrp != IPPROTO_ICMPV6) { 2279 goto noticmpv6; 2280 } 2281 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2282 ip6h = (ip6_t *)&icmp6[1]; 2283 /* 2284 * Verify we have a complete ICMP and inner IP header. 2285 */ 2286 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2287 goto noticmpv6; 2288 2289 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2290 goto noticmpv6; 2291 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2292 /* 2293 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2294 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2295 * packet upstream. 2296 */ 2297 if ((*nexthdrp != IPPROTO_UDP) || 2298 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2299 goto noticmpv6; 2300 } 2301 2302 switch (icmp6->icmp6_type) { 2303 case ICMP6_DST_UNREACH: 2304 switch (icmp6->icmp6_code) { 2305 case ICMP6_DST_UNREACH_NOPORT: 2306 error = ECONNREFUSED; 2307 break; 2308 case ICMP6_DST_UNREACH_ADMIN: 2309 case ICMP6_DST_UNREACH_NOROUTE: 2310 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2311 case ICMP6_DST_UNREACH_ADDR: 2312 /* Transient errors */ 2313 break; 2314 default: 2315 break; 2316 } 2317 break; 2318 case ICMP6_PACKET_TOO_BIG: { 2319 struct T_unitdata_ind *tudi; 2320 struct T_opthdr *toh; 2321 size_t udi_size; 2322 mblk_t *newmp; 2323 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2324 sizeof (struct ip6_mtuinfo); 2325 sin6_t *sin6; 2326 struct ip6_mtuinfo *mtuinfo; 2327 2328 /* 2329 * If the application has requested to receive path mtu 2330 * information, send up an empty message containing an 2331 * IPV6_PATHMTU ancillary data item. 2332 */ 2333 if (!udp->udp_ipv6_recvpathmtu) 2334 break; 2335 2336 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2337 opt_length; 2338 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2339 BUMP_MIB(&udp_mib, udpInErrors); 2340 break; 2341 } 2342 2343 /* 2344 * newmp->b_cont is left to NULL on purpose. This is an 2345 * empty message containing only ancillary data. 2346 */ 2347 newmp->b_datap->db_type = M_PROTO; 2348 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2349 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2350 tudi->PRIM_type = T_UNITDATA_IND; 2351 tudi->SRC_length = sizeof (sin6_t); 2352 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2353 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2354 tudi->OPT_length = opt_length; 2355 2356 sin6 = (sin6_t *)&tudi[1]; 2357 bzero(sin6, sizeof (sin6_t)); 2358 sin6->sin6_family = AF_INET6; 2359 sin6->sin6_addr = udp->udp_v6dst; 2360 2361 toh = (struct T_opthdr *)&sin6[1]; 2362 toh->level = IPPROTO_IPV6; 2363 toh->name = IPV6_PATHMTU; 2364 toh->len = opt_length; 2365 toh->status = 0; 2366 2367 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2368 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2369 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2370 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2371 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2372 /* 2373 * We've consumed everything we need from the original 2374 * message. Free it, then send our empty message. 2375 */ 2376 freemsg(mp); 2377 putnext(UDP_RD(q), newmp); 2378 return; 2379 } 2380 case ICMP6_TIME_EXCEEDED: 2381 /* Transient errors */ 2382 break; 2383 case ICMP6_PARAM_PROB: 2384 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2385 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2386 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2387 (uchar_t *)nexthdrp) { 2388 error = ECONNREFUSED; 2389 break; 2390 } 2391 break; 2392 } 2393 if (error == 0) { 2394 freemsg(mp); 2395 return; 2396 } 2397 2398 sin6 = sin6_null; 2399 sin6.sin6_family = AF_INET6; 2400 sin6.sin6_addr = ip6h->ip6_dst; 2401 sin6.sin6_port = udpha->uha_dst_port; 2402 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2403 2404 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2405 error); 2406 if (mp1) 2407 putnext(UDP_RD(q), mp1); 2408 freemsg(mp); 2409 } 2410 2411 /* 2412 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2413 * The local address is filled in if endpoint is bound. The remote address 2414 * is filled in if remote address has been precified ("connected endpoint") 2415 * (The concept of connected CLTS sockets is alien to published TPI 2416 * but we support it anyway). 2417 */ 2418 static void 2419 udp_addr_req(queue_t *q, mblk_t *mp) 2420 { 2421 sin_t *sin; 2422 sin6_t *sin6; 2423 mblk_t *ackmp; 2424 struct T_addr_ack *taa; 2425 udp_t *udp = Q_TO_UDP(q); 2426 2427 /* Make it large enough for worst case */ 2428 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2429 2 * sizeof (sin6_t), 1); 2430 if (ackmp == NULL) { 2431 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2432 return; 2433 } 2434 taa = (struct T_addr_ack *)ackmp->b_rptr; 2435 2436 bzero(taa, sizeof (struct T_addr_ack)); 2437 ackmp->b_wptr = (uchar_t *)&taa[1]; 2438 2439 taa->PRIM_type = T_ADDR_ACK; 2440 ackmp->b_datap->db_type = M_PCPROTO; 2441 /* 2442 * Note: Following code assumes 32 bit alignment of basic 2443 * data structures like sin_t and struct T_addr_ack. 2444 */ 2445 if (udp->udp_state != TS_UNBND) { 2446 /* 2447 * Fill in local address first 2448 */ 2449 taa->LOCADDR_offset = sizeof (*taa); 2450 if (udp->udp_family == AF_INET) { 2451 taa->LOCADDR_length = sizeof (sin_t); 2452 sin = (sin_t *)&taa[1]; 2453 /* Fill zeroes and then initialize non-zero fields */ 2454 *sin = sin_null; 2455 sin->sin_family = AF_INET; 2456 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2457 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2458 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2459 sin->sin_addr.s_addr); 2460 } else { 2461 /* 2462 * INADDR_ANY 2463 * udp_v6src is not set, we might be bound to 2464 * broadcast/multicast. Use udp_bound_v6src as 2465 * local address instead (that could 2466 * also still be INADDR_ANY) 2467 */ 2468 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2469 sin->sin_addr.s_addr); 2470 } 2471 sin->sin_port = udp->udp_port; 2472 ackmp->b_wptr = (uchar_t *)&sin[1]; 2473 if (udp->udp_state == TS_DATA_XFER) { 2474 /* 2475 * connected, fill remote address too 2476 */ 2477 taa->REMADDR_length = sizeof (sin_t); 2478 /* assumed 32-bit alignment */ 2479 taa->REMADDR_offset = taa->LOCADDR_offset + 2480 taa->LOCADDR_length; 2481 2482 sin = (sin_t *)(ackmp->b_rptr + 2483 taa->REMADDR_offset); 2484 /* initialize */ 2485 *sin = sin_null; 2486 sin->sin_family = AF_INET; 2487 sin->sin_addr.s_addr = 2488 V4_PART_OF_V6(udp->udp_v6dst); 2489 sin->sin_port = udp->udp_dstport; 2490 ackmp->b_wptr = (uchar_t *)&sin[1]; 2491 } 2492 } else { 2493 taa->LOCADDR_length = sizeof (sin6_t); 2494 sin6 = (sin6_t *)&taa[1]; 2495 /* Fill zeroes and then initialize non-zero fields */ 2496 *sin6 = sin6_null; 2497 sin6->sin6_family = AF_INET6; 2498 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2499 sin6->sin6_addr = udp->udp_v6src; 2500 } else { 2501 /* 2502 * UNSPECIFIED 2503 * udp_v6src is not set, we might be bound to 2504 * broadcast/multicast. Use udp_bound_v6src as 2505 * local address instead (that could 2506 * also still be UNSPECIFIED) 2507 */ 2508 sin6->sin6_addr = 2509 udp->udp_bound_v6src; 2510 } 2511 sin6->sin6_port = udp->udp_port; 2512 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2513 if (udp->udp_state == TS_DATA_XFER) { 2514 /* 2515 * connected, fill remote address too 2516 */ 2517 taa->REMADDR_length = sizeof (sin6_t); 2518 /* assumed 32-bit alignment */ 2519 taa->REMADDR_offset = taa->LOCADDR_offset + 2520 taa->LOCADDR_length; 2521 2522 sin6 = (sin6_t *)(ackmp->b_rptr + 2523 taa->REMADDR_offset); 2524 /* initialize */ 2525 *sin6 = sin6_null; 2526 sin6->sin6_family = AF_INET6; 2527 sin6->sin6_addr = udp->udp_v6dst; 2528 sin6->sin6_port = udp->udp_dstport; 2529 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2530 } 2531 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2532 } 2533 } 2534 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2535 putnext(UDP_RD(q), ackmp); 2536 } 2537 2538 static void 2539 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2540 { 2541 if (udp->udp_family == AF_INET) { 2542 *tap = udp_g_t_info_ack_ipv4; 2543 } else { 2544 *tap = udp_g_t_info_ack_ipv6; 2545 } 2546 tap->CURRENT_state = udp->udp_state; 2547 tap->OPT_size = udp_max_optsize; 2548 } 2549 2550 /* 2551 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2552 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2553 * udp_g_t_info_ack. The current state of the stream is copied from 2554 * udp_state. 2555 */ 2556 static void 2557 udp_capability_req(queue_t *q, mblk_t *mp) 2558 { 2559 t_uscalar_t cap_bits1; 2560 struct T_capability_ack *tcap; 2561 udp_t *udp = Q_TO_UDP(q); 2562 2563 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2564 2565 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2566 mp->b_datap->db_type, T_CAPABILITY_ACK); 2567 if (!mp) 2568 return; 2569 2570 tcap = (struct T_capability_ack *)mp->b_rptr; 2571 tcap->CAP_bits1 = 0; 2572 2573 if (cap_bits1 & TC1_INFO) { 2574 udp_copy_info(&tcap->INFO_ack, udp); 2575 tcap->CAP_bits1 |= TC1_INFO; 2576 } 2577 2578 putnext(UDP_RD(q), mp); 2579 } 2580 2581 /* 2582 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2583 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2584 * The current state of the stream is copied from udp_state. 2585 */ 2586 static void 2587 udp_info_req(queue_t *q, mblk_t *mp) 2588 { 2589 udp_t *udp = Q_TO_UDP(q); 2590 2591 /* Create a T_INFO_ACK message. */ 2592 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2593 T_INFO_ACK); 2594 if (!mp) 2595 return; 2596 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2597 putnext(UDP_RD(q), mp); 2598 } 2599 2600 /* 2601 * IP recognizes seven kinds of bind requests: 2602 * 2603 * - A zero-length address binds only to the protocol number. 2604 * 2605 * - A 4-byte address is treated as a request to 2606 * validate that the address is a valid local IPv4 2607 * address, appropriate for an application to bind to. 2608 * IP does the verification, but does not make any note 2609 * of the address at this time. 2610 * 2611 * - A 16-byte address contains is treated as a request 2612 * to validate a local IPv6 address, as the 4-byte 2613 * address case above. 2614 * 2615 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2616 * use it for the inbound fanout of packets. 2617 * 2618 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2619 * use it for the inbound fanout of packets. 2620 * 2621 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2622 * information consisting of local and remote addresses 2623 * and ports. In this case, the addresses are both 2624 * validated as appropriate for this operation, and, if 2625 * so, the information is retained for use in the 2626 * inbound fanout. 2627 * 2628 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2629 * fanout information, like the 12-byte case above. 2630 * 2631 * IP will also fill in the IRE request mblk with information 2632 * regarding our peer. In all cases, we notify IP of our protocol 2633 * type by appending a single protocol byte to the bind request. 2634 */ 2635 static mblk_t * 2636 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2637 { 2638 char *cp; 2639 mblk_t *mp; 2640 struct T_bind_req *tbr; 2641 ipa_conn_t *ac; 2642 ipa6_conn_t *ac6; 2643 sin_t *sin; 2644 sin6_t *sin6; 2645 2646 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2647 2648 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2649 if (!mp) 2650 return (mp); 2651 mp->b_datap->db_type = M_PROTO; 2652 tbr = (struct T_bind_req *)mp->b_rptr; 2653 tbr->PRIM_type = bind_prim; 2654 tbr->ADDR_offset = sizeof (*tbr); 2655 tbr->CONIND_number = 0; 2656 tbr->ADDR_length = addr_length; 2657 cp = (char *)&tbr[1]; 2658 switch (addr_length) { 2659 case sizeof (ipa_conn_t): 2660 ASSERT(udp->udp_family == AF_INET); 2661 /* Append a request for an IRE */ 2662 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2663 if (!mp->b_cont) { 2664 freemsg(mp); 2665 return (NULL); 2666 } 2667 mp->b_cont->b_wptr += sizeof (ire_t); 2668 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2669 2670 /* cp known to be 32 bit aligned */ 2671 ac = (ipa_conn_t *)cp; 2672 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2673 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2674 ac->ac_fport = udp->udp_dstport; 2675 ac->ac_lport = udp->udp_port; 2676 break; 2677 2678 case sizeof (ipa6_conn_t): 2679 ASSERT(udp->udp_family == AF_INET6); 2680 /* Append a request for an IRE */ 2681 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2682 if (!mp->b_cont) { 2683 freemsg(mp); 2684 return (NULL); 2685 } 2686 mp->b_cont->b_wptr += sizeof (ire_t); 2687 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2688 2689 /* cp known to be 32 bit aligned */ 2690 ac6 = (ipa6_conn_t *)cp; 2691 ac6->ac6_laddr = udp->udp_v6src; 2692 ac6->ac6_faddr = udp->udp_v6dst; 2693 ac6->ac6_fport = udp->udp_dstport; 2694 ac6->ac6_lport = udp->udp_port; 2695 break; 2696 2697 case sizeof (sin_t): 2698 ASSERT(udp->udp_family == AF_INET); 2699 /* Append a request for an IRE */ 2700 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2701 if (!mp->b_cont) { 2702 freemsg(mp); 2703 return (NULL); 2704 } 2705 mp->b_cont->b_wptr += sizeof (ire_t); 2706 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2707 2708 sin = (sin_t *)cp; 2709 *sin = sin_null; 2710 sin->sin_family = AF_INET; 2711 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2712 sin->sin_port = udp->udp_port; 2713 break; 2714 2715 case sizeof (sin6_t): 2716 ASSERT(udp->udp_family == AF_INET6); 2717 /* Append a request for an IRE */ 2718 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2719 if (!mp->b_cont) { 2720 freemsg(mp); 2721 return (NULL); 2722 } 2723 mp->b_cont->b_wptr += sizeof (ire_t); 2724 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2725 2726 sin6 = (sin6_t *)cp; 2727 *sin6 = sin6_null; 2728 sin6->sin6_family = AF_INET6; 2729 sin6->sin6_addr = udp->udp_bound_v6src; 2730 sin6->sin6_port = udp->udp_port; 2731 break; 2732 } 2733 /* Add protocol number to end */ 2734 cp[addr_length] = (char)IPPROTO_UDP; 2735 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2736 return (mp); 2737 } 2738 2739 /* 2740 * This is the open routine for udp. It allocates a udp_t structure for 2741 * the stream and, on the first open of the module, creates an ND table. 2742 */ 2743 /* ARGSUSED */ 2744 static int 2745 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2746 { 2747 int err; 2748 udp_t *udp; 2749 conn_t *connp; 2750 zoneid_t zoneid = getzoneid(); 2751 queue_t *ip_wq; 2752 char *name; 2753 2754 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2755 2756 /* If the stream is already open, return immediately. */ 2757 if (q->q_ptr != NULL) 2758 return (0); 2759 2760 /* If this is not a push of udp as a module, fail. */ 2761 if (sflag != MODOPEN) 2762 return (EINVAL); 2763 2764 q->q_hiwat = udp_recv_hiwat; 2765 WR(q)->q_hiwat = udp_xmit_hiwat; 2766 WR(q)->q_lowat = udp_xmit_lowat; 2767 2768 /* Insert ourselves in the stream since we're about to walk q_next */ 2769 qprocson(q); 2770 2771 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2772 bzero(udp, sizeof (*udp)); 2773 2774 /* 2775 * UDP is supported only as a module and it has to be pushed directly 2776 * above the device instance of IP. If UDP is pushed anywhere else 2777 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2778 * sake of MIB browsers and fail everything else. 2779 */ 2780 ip_wq = WR(q)->q_next; 2781 if (ip_wq->q_next != NULL || 2782 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2783 strcmp(name, IP_MOD_NAME) != 0 || 2784 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2785 /* Support just SNMP for MIB browsers */ 2786 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2787 connp->conn_rq = q; 2788 connp->conn_wq = WR(q); 2789 connp->conn_flags |= IPCL_UDPMOD; 2790 connp->conn_cred = credp; 2791 connp->conn_zoneid = zoneid; 2792 connp->conn_udp = udp; 2793 udp->udp_connp = connp; 2794 q->q_ptr = WR(q)->q_ptr = connp; 2795 crhold(credp); 2796 q->q_qinfo = &udp_snmp_rinit; 2797 WR(q)->q_qinfo = &udp_snmp_winit; 2798 return (0); 2799 } 2800 2801 /* 2802 * Initialize the udp_t structure for this stream. 2803 */ 2804 q = RD(ip_wq); 2805 connp = Q_TO_CONN(q); 2806 mutex_enter(&connp->conn_lock); 2807 connp->conn_proto = IPPROTO_UDP; 2808 connp->conn_flags |= IPCL_UDP; 2809 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2810 connp->conn_udp = udp; 2811 2812 /* Set the initial state of the stream and the privilege status. */ 2813 udp->udp_connp = connp; 2814 udp->udp_state = TS_UNBND; 2815 udp->udp_mode = UDP_MT_HOT; 2816 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2817 udp->udp_family = AF_INET6; 2818 udp->udp_ipversion = IPV6_VERSION; 2819 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2820 udp->udp_ttl = udp_ipv6_hoplimit; 2821 connp->conn_af_isv6 = B_TRUE; 2822 connp->conn_flags |= IPCL_ISV6; 2823 } else { 2824 udp->udp_family = AF_INET; 2825 udp->udp_ipversion = IPV4_VERSION; 2826 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2827 udp->udp_ttl = udp_ipv4_ttl; 2828 connp->conn_af_isv6 = B_FALSE; 2829 connp->conn_flags &= ~IPCL_ISV6; 2830 } 2831 2832 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2833 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2834 connp->conn_zoneid = zoneid; 2835 2836 if (connp->conn_flags & IPCL_SOCKET) { 2837 udp->udp_issocket = B_TRUE; 2838 udp->udp_direct_sockfs = B_TRUE; 2839 } 2840 mutex_exit(&connp->conn_lock); 2841 2842 /* 2843 * The transmit hiwat/lowat is only looked at on IP's queue. 2844 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2845 * getsockopts. 2846 */ 2847 q->q_hiwat = udp_recv_hiwat; 2848 WR(q)->q_hiwat = udp_xmit_hiwat; 2849 WR(q)->q_lowat = udp_xmit_lowat; 2850 2851 if (udp->udp_family == AF_INET6) { 2852 /* Build initial header template for transmit */ 2853 if ((err = udp_build_hdrs(q, udp)) != 0) { 2854 qprocsoff(UDP_RD(q)); 2855 udp->udp_connp = NULL; 2856 connp->conn_udp = NULL; 2857 kmem_cache_free(udp_cache, udp); 2858 return (err); 2859 } 2860 } 2861 2862 /* Set the Stream head write offset and high watermark. */ 2863 (void) mi_set_sth_wroff(UDP_RD(q), 2864 udp->udp_max_hdr_len + udp_wroff_extra); 2865 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2866 2867 WR(UDP_RD(q))->q_qinfo = &udp_winit; 2868 2869 return (0); 2870 } 2871 2872 /* 2873 * Which UDP options OK to set through T_UNITDATA_REQ... 2874 */ 2875 /* ARGSUSED */ 2876 static boolean_t 2877 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2878 { 2879 return (B_TRUE); 2880 } 2881 2882 /* 2883 * This routine gets default values of certain options whose default 2884 * values are maintained by protcol specific code 2885 */ 2886 /* ARGSUSED */ 2887 int 2888 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2889 { 2890 int *i1 = (int *)ptr; 2891 2892 switch (level) { 2893 case IPPROTO_IP: 2894 switch (name) { 2895 case IP_MULTICAST_TTL: 2896 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2897 return (sizeof (uchar_t)); 2898 case IP_MULTICAST_LOOP: 2899 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2900 return (sizeof (uchar_t)); 2901 } 2902 break; 2903 case IPPROTO_IPV6: 2904 switch (name) { 2905 case IPV6_MULTICAST_HOPS: 2906 *i1 = IP_DEFAULT_MULTICAST_TTL; 2907 return (sizeof (int)); 2908 case IPV6_MULTICAST_LOOP: 2909 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2910 return (sizeof (int)); 2911 case IPV6_UNICAST_HOPS: 2912 *i1 = udp_ipv6_hoplimit; 2913 return (sizeof (int)); 2914 } 2915 break; 2916 } 2917 return (-1); 2918 } 2919 2920 /* 2921 * This routine retrieves the current status of socket options 2922 * and expects the caller to pass in the queue pointer of the 2923 * upper instance. It returns the size of the option retrieved. 2924 */ 2925 int 2926 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2927 { 2928 int *i1 = (int *)ptr; 2929 conn_t *connp; 2930 udp_t *udp; 2931 ip6_pkt_t *ipp; 2932 2933 q = UDP_WR(q); 2934 connp = Q_TO_CONN(q); 2935 udp = connp->conn_udp; 2936 ipp = &udp->udp_sticky_ipp; 2937 2938 switch (level) { 2939 case SOL_SOCKET: 2940 switch (name) { 2941 case SO_DEBUG: 2942 *i1 = udp->udp_debug; 2943 break; /* goto sizeof (int) option return */ 2944 case SO_REUSEADDR: 2945 *i1 = udp->udp_reuseaddr; 2946 break; /* goto sizeof (int) option return */ 2947 case SO_TYPE: 2948 *i1 = SOCK_DGRAM; 2949 break; /* goto sizeof (int) option return */ 2950 2951 /* 2952 * The following three items are available here, 2953 * but are only meaningful to IP. 2954 */ 2955 case SO_DONTROUTE: 2956 *i1 = udp->udp_dontroute; 2957 break; /* goto sizeof (int) option return */ 2958 case SO_USELOOPBACK: 2959 *i1 = udp->udp_useloopback; 2960 break; /* goto sizeof (int) option return */ 2961 case SO_BROADCAST: 2962 *i1 = udp->udp_broadcast; 2963 break; /* goto sizeof (int) option return */ 2964 2965 case SO_SNDBUF: 2966 *i1 = q->q_hiwat; 2967 break; /* goto sizeof (int) option return */ 2968 case SO_RCVBUF: 2969 *i1 = RD(q)->q_hiwat; 2970 break; /* goto sizeof (int) option return */ 2971 case SO_DGRAM_ERRIND: 2972 *i1 = udp->udp_dgram_errind; 2973 break; /* goto sizeof (int) option return */ 2974 case SO_RECVUCRED: 2975 *i1 = udp->udp_recvucred; 2976 break; /* goto sizeof (int) option return */ 2977 default: 2978 return (-1); 2979 } 2980 break; 2981 case IPPROTO_IP: 2982 if (udp->udp_family != AF_INET) 2983 return (-1); 2984 switch (name) { 2985 case IP_OPTIONS: 2986 case T_IP_OPTIONS: 2987 if (udp->udp_ip_rcv_options_len) 2988 bcopy(udp->udp_ip_rcv_options, ptr, 2989 udp->udp_ip_rcv_options_len); 2990 return (udp->udp_ip_rcv_options_len); 2991 case IP_TOS: 2992 case T_IP_TOS: 2993 *i1 = (int)udp->udp_type_of_service; 2994 break; /* goto sizeof (int) option return */ 2995 case IP_TTL: 2996 *i1 = (int)udp->udp_ttl; 2997 break; /* goto sizeof (int) option return */ 2998 case IP_NEXTHOP: 2999 /* Handled at IP level */ 3000 return (-EINVAL); 3001 case IP_MULTICAST_IF: 3002 /* 0 address if not set */ 3003 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3004 return (sizeof (ipaddr_t)); 3005 case IP_MULTICAST_TTL: 3006 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3007 return (sizeof (uchar_t)); 3008 case IP_MULTICAST_LOOP: 3009 *ptr = connp->conn_multicast_loop; 3010 return (sizeof (uint8_t)); 3011 case IP_RECVOPTS: 3012 *i1 = udp->udp_recvopts; 3013 break; /* goto sizeof (int) option return */ 3014 case IP_RECVDSTADDR: 3015 *i1 = udp->udp_recvdstaddr; 3016 break; /* goto sizeof (int) option return */ 3017 case IP_RECVIF: 3018 *i1 = udp->udp_recvif; 3019 break; /* goto sizeof (int) option return */ 3020 case IP_RECVSLLA: 3021 *i1 = udp->udp_recvslla; 3022 break; /* goto sizeof (int) option return */ 3023 case IP_RECVTTL: 3024 *i1 = udp->udp_recvttl; 3025 break; /* goto sizeof (int) option return */ 3026 case IP_ADD_MEMBERSHIP: 3027 case IP_DROP_MEMBERSHIP: 3028 case IP_BLOCK_SOURCE: 3029 case IP_UNBLOCK_SOURCE: 3030 case IP_ADD_SOURCE_MEMBERSHIP: 3031 case IP_DROP_SOURCE_MEMBERSHIP: 3032 case MCAST_JOIN_GROUP: 3033 case MCAST_LEAVE_GROUP: 3034 case MCAST_BLOCK_SOURCE: 3035 case MCAST_UNBLOCK_SOURCE: 3036 case MCAST_JOIN_SOURCE_GROUP: 3037 case MCAST_LEAVE_SOURCE_GROUP: 3038 case IP_DONTFAILOVER_IF: 3039 /* cannot "get" the value for these */ 3040 return (-1); 3041 case IP_BOUND_IF: 3042 /* Zero if not set */ 3043 *i1 = udp->udp_bound_if; 3044 break; /* goto sizeof (int) option return */ 3045 case IP_UNSPEC_SRC: 3046 *i1 = udp->udp_unspec_source; 3047 break; /* goto sizeof (int) option return */ 3048 case IP_XMIT_IF: 3049 *i1 = udp->udp_xmit_if; 3050 break; /* goto sizeof (int) option return */ 3051 default: 3052 return (-1); 3053 } 3054 break; 3055 case IPPROTO_IPV6: 3056 if (udp->udp_family != AF_INET6) 3057 return (-1); 3058 switch (name) { 3059 case IPV6_UNICAST_HOPS: 3060 *i1 = (unsigned int)udp->udp_ttl; 3061 break; /* goto sizeof (int) option return */ 3062 case IPV6_MULTICAST_IF: 3063 /* 0 index if not set */ 3064 *i1 = udp->udp_multicast_if_index; 3065 break; /* goto sizeof (int) option return */ 3066 case IPV6_MULTICAST_HOPS: 3067 *i1 = udp->udp_multicast_ttl; 3068 break; /* goto sizeof (int) option return */ 3069 case IPV6_MULTICAST_LOOP: 3070 *i1 = connp->conn_multicast_loop; 3071 break; /* goto sizeof (int) option return */ 3072 case IPV6_JOIN_GROUP: 3073 case IPV6_LEAVE_GROUP: 3074 case MCAST_JOIN_GROUP: 3075 case MCAST_LEAVE_GROUP: 3076 case MCAST_BLOCK_SOURCE: 3077 case MCAST_UNBLOCK_SOURCE: 3078 case MCAST_JOIN_SOURCE_GROUP: 3079 case MCAST_LEAVE_SOURCE_GROUP: 3080 /* cannot "get" the value for these */ 3081 return (-1); 3082 case IPV6_BOUND_IF: 3083 /* Zero if not set */ 3084 *i1 = udp->udp_bound_if; 3085 break; /* goto sizeof (int) option return */ 3086 case IPV6_UNSPEC_SRC: 3087 *i1 = udp->udp_unspec_source; 3088 break; /* goto sizeof (int) option return */ 3089 case IPV6_RECVPKTINFO: 3090 *i1 = udp->udp_ipv6_recvpktinfo; 3091 break; /* goto sizeof (int) option return */ 3092 case IPV6_RECVTCLASS: 3093 *i1 = udp->udp_ipv6_recvtclass; 3094 break; /* goto sizeof (int) option return */ 3095 case IPV6_RECVPATHMTU: 3096 *i1 = udp->udp_ipv6_recvpathmtu; 3097 break; /* goto sizeof (int) option return */ 3098 case IPV6_RECVHOPLIMIT: 3099 *i1 = udp->udp_ipv6_recvhoplimit; 3100 break; /* goto sizeof (int) option return */ 3101 case IPV6_RECVHOPOPTS: 3102 *i1 = udp->udp_ipv6_recvhopopts; 3103 break; /* goto sizeof (int) option return */ 3104 case IPV6_RECVDSTOPTS: 3105 *i1 = udp->udp_ipv6_recvdstopts; 3106 break; /* goto sizeof (int) option return */ 3107 case _OLD_IPV6_RECVDSTOPTS: 3108 *i1 = udp->udp_old_ipv6_recvdstopts; 3109 break; /* goto sizeof (int) option return */ 3110 case IPV6_RECVRTHDRDSTOPTS: 3111 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3112 break; /* goto sizeof (int) option return */ 3113 case IPV6_RECVRTHDR: 3114 *i1 = udp->udp_ipv6_recvrthdr; 3115 break; /* goto sizeof (int) option return */ 3116 case IPV6_PKTINFO: { 3117 /* XXX assumes that caller has room for max size! */ 3118 struct in6_pktinfo *pkti; 3119 3120 pkti = (struct in6_pktinfo *)ptr; 3121 if (ipp->ipp_fields & IPPF_IFINDEX) 3122 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3123 else 3124 pkti->ipi6_ifindex = 0; 3125 if (ipp->ipp_fields & IPPF_ADDR) 3126 pkti->ipi6_addr = ipp->ipp_addr; 3127 else 3128 pkti->ipi6_addr = ipv6_all_zeros; 3129 return (sizeof (struct in6_pktinfo)); 3130 } 3131 case IPV6_TCLASS: 3132 if (ipp->ipp_fields & IPPF_TCLASS) 3133 *i1 = ipp->ipp_tclass; 3134 else 3135 *i1 = IPV6_FLOW_TCLASS( 3136 IPV6_DEFAULT_VERS_AND_FLOW); 3137 break; /* goto sizeof (int) option return */ 3138 case IPV6_NEXTHOP: { 3139 sin6_t *sin6 = (sin6_t *)ptr; 3140 3141 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3142 return (0); 3143 *sin6 = sin6_null; 3144 sin6->sin6_family = AF_INET6; 3145 sin6->sin6_addr = ipp->ipp_nexthop; 3146 return (sizeof (sin6_t)); 3147 } 3148 case IPV6_HOPOPTS: 3149 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3150 return (0); 3151 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3152 return (ipp->ipp_hopoptslen); 3153 case IPV6_RTHDRDSTOPTS: 3154 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3155 return (0); 3156 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3157 return (ipp->ipp_rtdstoptslen); 3158 case IPV6_RTHDR: 3159 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3160 return (0); 3161 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3162 return (ipp->ipp_rthdrlen); 3163 case IPV6_DSTOPTS: 3164 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3165 return (0); 3166 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3167 return (ipp->ipp_dstoptslen); 3168 case IPV6_PATHMTU: 3169 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3170 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3171 default: 3172 return (-1); 3173 } 3174 break; 3175 case IPPROTO_UDP: 3176 switch (name) { 3177 case UDP_ANONPRIVBIND: 3178 *i1 = udp->udp_anon_priv_bind; 3179 break; 3180 case UDP_EXCLBIND: 3181 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3182 break; 3183 case UDP_RCVHDR: 3184 *i1 = udp->udp_rcvhdr ? 1 : 0; 3185 break; 3186 default: 3187 return (-1); 3188 } 3189 break; 3190 default: 3191 return (-1); 3192 } 3193 return (sizeof (int)); 3194 } 3195 3196 /* 3197 * This routine sets socket options; it expects the caller 3198 * to pass in the queue pointer of the upper instance. 3199 */ 3200 /* ARGSUSED */ 3201 int 3202 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3203 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3204 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3205 { 3206 int *i1 = (int *)invalp; 3207 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3208 boolean_t checkonly; 3209 int error; 3210 conn_t *connp; 3211 udp_t *udp; 3212 3213 q = UDP_WR(q); 3214 connp = Q_TO_CONN(q); 3215 udp = connp->conn_udp; 3216 3217 switch (optset_context) { 3218 case SETFN_OPTCOM_CHECKONLY: 3219 checkonly = B_TRUE; 3220 /* 3221 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3222 * inlen != 0 implies value supplied and 3223 * we have to "pretend" to set it. 3224 * inlen == 0 implies that there is no 3225 * value part in T_CHECK request and just validation 3226 * done elsewhere should be enough, we just return here. 3227 */ 3228 if (inlen == 0) { 3229 *outlenp = 0; 3230 return (0); 3231 } 3232 break; 3233 case SETFN_OPTCOM_NEGOTIATE: 3234 checkonly = B_FALSE; 3235 break; 3236 case SETFN_UD_NEGOTIATE: 3237 case SETFN_CONN_NEGOTIATE: 3238 checkonly = B_FALSE; 3239 /* 3240 * Negotiating local and "association-related" options 3241 * through T_UNITDATA_REQ. 3242 * 3243 * Following routine can filter out ones we do not 3244 * want to be "set" this way. 3245 */ 3246 if (!udp_opt_allow_udr_set(level, name)) { 3247 *outlenp = 0; 3248 return (EINVAL); 3249 } 3250 break; 3251 default: 3252 /* 3253 * We should never get here 3254 */ 3255 *outlenp = 0; 3256 return (EINVAL); 3257 } 3258 3259 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3260 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3261 3262 /* 3263 * For fixed length options, no sanity check 3264 * of passed in length is done. It is assumed *_optcom_req() 3265 * routines do the right thing. 3266 */ 3267 3268 switch (level) { 3269 case SOL_SOCKET: 3270 switch (name) { 3271 case SO_REUSEADDR: 3272 if (!checkonly) 3273 udp->udp_reuseaddr = onoff; 3274 break; 3275 case SO_DEBUG: 3276 if (!checkonly) 3277 udp->udp_debug = onoff; 3278 break; 3279 /* 3280 * The following three items are available here, 3281 * but are only meaningful to IP. 3282 */ 3283 case SO_DONTROUTE: 3284 if (!checkonly) 3285 udp->udp_dontroute = onoff; 3286 break; 3287 case SO_USELOOPBACK: 3288 if (!checkonly) 3289 udp->udp_useloopback = onoff; 3290 break; 3291 case SO_BROADCAST: 3292 if (!checkonly) 3293 udp->udp_broadcast = onoff; 3294 break; 3295 3296 case SO_SNDBUF: 3297 if (*i1 > udp_max_buf) { 3298 *outlenp = 0; 3299 return (ENOBUFS); 3300 } 3301 if (!checkonly) { 3302 q->q_hiwat = *i1; 3303 WR(UDP_RD(q))->q_hiwat = *i1; 3304 } 3305 break; 3306 case SO_RCVBUF: 3307 if (*i1 > udp_max_buf) { 3308 *outlenp = 0; 3309 return (ENOBUFS); 3310 } 3311 if (!checkonly) { 3312 RD(q)->q_hiwat = *i1; 3313 UDP_RD(q)->q_hiwat = *i1; 3314 (void) mi_set_sth_hiwat(UDP_RD(q), 3315 udp_set_rcv_hiwat(udp, *i1)); 3316 } 3317 break; 3318 case SO_DGRAM_ERRIND: 3319 if (!checkonly) 3320 udp->udp_dgram_errind = onoff; 3321 break; 3322 case SO_RECVUCRED: 3323 if (!checkonly) 3324 udp->udp_recvucred = onoff; 3325 break; 3326 default: 3327 *outlenp = 0; 3328 return (EINVAL); 3329 } 3330 break; 3331 case IPPROTO_IP: 3332 if (udp->udp_family != AF_INET) { 3333 *outlenp = 0; 3334 return (ENOPROTOOPT); 3335 } 3336 switch (name) { 3337 case IP_OPTIONS: 3338 case T_IP_OPTIONS: 3339 /* Save options for use by IP. */ 3340 if (inlen & 0x3) { 3341 *outlenp = 0; 3342 return (EINVAL); 3343 } 3344 if (checkonly) 3345 break; 3346 3347 if (udp->udp_ip_snd_options) { 3348 mi_free((char *)udp->udp_ip_snd_options); 3349 udp->udp_ip_snd_options_len = 0; 3350 udp->udp_ip_snd_options = NULL; 3351 } 3352 if (inlen) { 3353 udp->udp_ip_snd_options = 3354 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3355 if (udp->udp_ip_snd_options) { 3356 bcopy(invalp, udp->udp_ip_snd_options, 3357 inlen); 3358 udp->udp_ip_snd_options_len = inlen; 3359 } 3360 } 3361 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3362 UDPH_SIZE + udp->udp_ip_snd_options_len; 3363 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3364 udp_wroff_extra); 3365 break; 3366 case IP_TTL: 3367 if (!checkonly) { 3368 udp->udp_ttl = (uchar_t)*i1; 3369 } 3370 break; 3371 case IP_TOS: 3372 case T_IP_TOS: 3373 if (!checkonly) { 3374 udp->udp_type_of_service = (uchar_t)*i1; 3375 } 3376 break; 3377 case IP_MULTICAST_IF: { 3378 /* 3379 * TODO should check OPTMGMT reply and undo this if 3380 * there is an error. 3381 */ 3382 struct in_addr *inap = (struct in_addr *)invalp; 3383 if (!checkonly) { 3384 udp->udp_multicast_if_addr = 3385 inap->s_addr; 3386 } 3387 break; 3388 } 3389 case IP_MULTICAST_TTL: 3390 if (!checkonly) 3391 udp->udp_multicast_ttl = *invalp; 3392 break; 3393 case IP_MULTICAST_LOOP: 3394 if (!checkonly) 3395 connp->conn_multicast_loop = *invalp; 3396 break; 3397 case IP_RECVOPTS: 3398 if (!checkonly) 3399 udp->udp_recvopts = onoff; 3400 break; 3401 case IP_RECVDSTADDR: 3402 if (!checkonly) 3403 udp->udp_recvdstaddr = onoff; 3404 break; 3405 case IP_RECVIF: 3406 if (!checkonly) 3407 udp->udp_recvif = onoff; 3408 break; 3409 case IP_RECVSLLA: 3410 if (!checkonly) 3411 udp->udp_recvslla = onoff; 3412 break; 3413 case IP_RECVTTL: 3414 if (!checkonly) 3415 udp->udp_recvttl = onoff; 3416 break; 3417 case IP_ADD_MEMBERSHIP: 3418 case IP_DROP_MEMBERSHIP: 3419 case IP_BLOCK_SOURCE: 3420 case IP_UNBLOCK_SOURCE: 3421 case IP_ADD_SOURCE_MEMBERSHIP: 3422 case IP_DROP_SOURCE_MEMBERSHIP: 3423 case MCAST_JOIN_GROUP: 3424 case MCAST_LEAVE_GROUP: 3425 case MCAST_BLOCK_SOURCE: 3426 case MCAST_UNBLOCK_SOURCE: 3427 case MCAST_JOIN_SOURCE_GROUP: 3428 case MCAST_LEAVE_SOURCE_GROUP: 3429 case IP_SEC_OPT: 3430 case IP_NEXTHOP: 3431 /* 3432 * "soft" error (negative) 3433 * option not handled at this level 3434 * Do not modify *outlenp. 3435 */ 3436 return (-EINVAL); 3437 case IP_BOUND_IF: 3438 if (!checkonly) 3439 udp->udp_bound_if = *i1; 3440 break; 3441 case IP_UNSPEC_SRC: 3442 if (!checkonly) 3443 udp->udp_unspec_source = onoff; 3444 break; 3445 case IP_XMIT_IF: 3446 if (!checkonly) 3447 udp->udp_xmit_if = *i1; 3448 break; 3449 default: 3450 *outlenp = 0; 3451 return (EINVAL); 3452 } 3453 break; 3454 case IPPROTO_IPV6: { 3455 ip6_pkt_t *ipp; 3456 boolean_t sticky; 3457 3458 if (udp->udp_family != AF_INET6) { 3459 *outlenp = 0; 3460 return (ENOPROTOOPT); 3461 } 3462 /* 3463 * Deal with both sticky options and ancillary data 3464 */ 3465 if (thisdg_attrs == NULL) { 3466 /* sticky options, or none */ 3467 ipp = &udp->udp_sticky_ipp; 3468 sticky = B_TRUE; 3469 } else { 3470 /* ancillary data */ 3471 ipp = (ip6_pkt_t *)thisdg_attrs; 3472 sticky = B_FALSE; 3473 } 3474 3475 switch (name) { 3476 case IPV6_MULTICAST_IF: 3477 if (!checkonly) 3478 udp->udp_multicast_if_index = *i1; 3479 break; 3480 case IPV6_UNICAST_HOPS: 3481 /* -1 means use default */ 3482 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3483 *outlenp = 0; 3484 return (EINVAL); 3485 } 3486 if (!checkonly) { 3487 if (*i1 == -1) { 3488 udp->udp_ttl = ipp->ipp_unicast_hops = 3489 udp_ipv6_hoplimit; 3490 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3491 /* Pass modified value to IP. */ 3492 *i1 = udp->udp_ttl; 3493 } else { 3494 udp->udp_ttl = ipp->ipp_unicast_hops = 3495 (uint8_t)*i1; 3496 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3497 } 3498 /* Rebuild the header template */ 3499 error = udp_build_hdrs(q, udp); 3500 if (error != 0) { 3501 *outlenp = 0; 3502 return (error); 3503 } 3504 } 3505 break; 3506 case IPV6_MULTICAST_HOPS: 3507 /* -1 means use default */ 3508 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3509 *outlenp = 0; 3510 return (EINVAL); 3511 } 3512 if (!checkonly) { 3513 if (*i1 == -1) { 3514 udp->udp_multicast_ttl = 3515 ipp->ipp_multicast_hops = 3516 IP_DEFAULT_MULTICAST_TTL; 3517 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3518 /* Pass modified value to IP. */ 3519 *i1 = udp->udp_multicast_ttl; 3520 } else { 3521 udp->udp_multicast_ttl = 3522 ipp->ipp_multicast_hops = 3523 (uint8_t)*i1; 3524 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3525 } 3526 } 3527 break; 3528 case IPV6_MULTICAST_LOOP: 3529 if (*i1 != 0 && *i1 != 1) { 3530 *outlenp = 0; 3531 return (EINVAL); 3532 } 3533 if (!checkonly) 3534 connp->conn_multicast_loop = *i1; 3535 break; 3536 case IPV6_JOIN_GROUP: 3537 case IPV6_LEAVE_GROUP: 3538 case MCAST_JOIN_GROUP: 3539 case MCAST_LEAVE_GROUP: 3540 case MCAST_BLOCK_SOURCE: 3541 case MCAST_UNBLOCK_SOURCE: 3542 case MCAST_JOIN_SOURCE_GROUP: 3543 case MCAST_LEAVE_SOURCE_GROUP: 3544 /* 3545 * "soft" error (negative) 3546 * option not handled at this level 3547 * Note: Do not modify *outlenp 3548 */ 3549 return (-EINVAL); 3550 case IPV6_BOUND_IF: 3551 if (!checkonly) 3552 udp->udp_bound_if = *i1; 3553 break; 3554 case IPV6_UNSPEC_SRC: 3555 if (!checkonly) 3556 udp->udp_unspec_source = onoff; 3557 break; 3558 /* 3559 * Set boolean switches for ancillary data delivery 3560 */ 3561 case IPV6_RECVPKTINFO: 3562 if (!checkonly) 3563 udp->udp_ipv6_recvpktinfo = onoff; 3564 break; 3565 case IPV6_RECVTCLASS: 3566 if (!checkonly) { 3567 udp->udp_ipv6_recvtclass = onoff; 3568 } 3569 break; 3570 case IPV6_RECVPATHMTU: 3571 if (!checkonly) { 3572 udp->udp_ipv6_recvpathmtu = onoff; 3573 } 3574 break; 3575 case IPV6_RECVHOPLIMIT: 3576 if (!checkonly) 3577 udp->udp_ipv6_recvhoplimit = onoff; 3578 break; 3579 case IPV6_RECVHOPOPTS: 3580 if (!checkonly) 3581 udp->udp_ipv6_recvhopopts = onoff; 3582 break; 3583 case IPV6_RECVDSTOPTS: 3584 if (!checkonly) 3585 udp->udp_ipv6_recvdstopts = onoff; 3586 break; 3587 case _OLD_IPV6_RECVDSTOPTS: 3588 if (!checkonly) 3589 udp->udp_old_ipv6_recvdstopts = onoff; 3590 break; 3591 case IPV6_RECVRTHDRDSTOPTS: 3592 if (!checkonly) 3593 udp->udp_ipv6_recvrthdrdstopts = onoff; 3594 break; 3595 case IPV6_RECVRTHDR: 3596 if (!checkonly) 3597 udp->udp_ipv6_recvrthdr = onoff; 3598 break; 3599 /* 3600 * Set sticky options or ancillary data. 3601 * If sticky options, (re)build any extension headers 3602 * that might be needed as a result. 3603 */ 3604 case IPV6_PKTINFO: 3605 /* 3606 * The source address and ifindex are verified 3607 * in ip_opt_set(). For ancillary data the 3608 * source address is checked in ip_wput_v6. 3609 */ 3610 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3611 return (EINVAL); 3612 if (checkonly) 3613 break; 3614 3615 if (inlen == 0) { 3616 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3617 ipp->ipp_sticky_ignored |= 3618 (IPPF_IFINDEX|IPPF_ADDR); 3619 } else { 3620 struct in6_pktinfo *pkti; 3621 3622 pkti = (struct in6_pktinfo *)invalp; 3623 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3624 ipp->ipp_addr = pkti->ipi6_addr; 3625 if (ipp->ipp_ifindex != 0) 3626 ipp->ipp_fields |= IPPF_IFINDEX; 3627 else 3628 ipp->ipp_fields &= ~IPPF_IFINDEX; 3629 if (!IN6_IS_ADDR_UNSPECIFIED( 3630 &ipp->ipp_addr)) 3631 ipp->ipp_fields |= IPPF_ADDR; 3632 else 3633 ipp->ipp_fields &= ~IPPF_ADDR; 3634 } 3635 if (sticky) { 3636 error = udp_build_hdrs(q, udp); 3637 if (error != 0) 3638 return (error); 3639 } 3640 break; 3641 case IPV6_HOPLIMIT: 3642 if (sticky) 3643 return (EINVAL); 3644 if (inlen != 0 && inlen != sizeof (int)) 3645 return (EINVAL); 3646 if (checkonly) 3647 break; 3648 3649 if (inlen == 0) { 3650 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3651 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3652 } else { 3653 if (*i1 > 255 || *i1 < -1) 3654 return (EINVAL); 3655 if (*i1 == -1) 3656 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3657 else 3658 ipp->ipp_hoplimit = *i1; 3659 ipp->ipp_fields |= IPPF_HOPLIMIT; 3660 } 3661 break; 3662 case IPV6_TCLASS: 3663 if (inlen != 0 && inlen != sizeof (int)) 3664 return (EINVAL); 3665 if (checkonly) 3666 break; 3667 3668 if (inlen == 0) { 3669 ipp->ipp_fields &= ~IPPF_TCLASS; 3670 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3671 } else { 3672 if (*i1 > 255 || *i1 < -1) 3673 return (EINVAL); 3674 if (*i1 == -1) 3675 ipp->ipp_tclass = 0; 3676 else 3677 ipp->ipp_tclass = *i1; 3678 ipp->ipp_fields |= IPPF_TCLASS; 3679 } 3680 if (sticky) { 3681 error = udp_build_hdrs(q, udp); 3682 if (error != 0) 3683 return (error); 3684 } 3685 break; 3686 case IPV6_NEXTHOP: 3687 /* 3688 * IP will verify that the nexthop is reachable 3689 * and fail for sticky options. 3690 */ 3691 if (inlen != 0 && inlen != sizeof (sin6_t)) 3692 return (EINVAL); 3693 if (checkonly) 3694 break; 3695 3696 if (inlen == 0) { 3697 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3698 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3699 } else { 3700 sin6_t *sin6 = (sin6_t *)invalp; 3701 3702 if (sin6->sin6_family != AF_INET6) 3703 return (EAFNOSUPPORT); 3704 if (IN6_IS_ADDR_V4MAPPED( 3705 &sin6->sin6_addr)) 3706 return (EADDRNOTAVAIL); 3707 ipp->ipp_nexthop = sin6->sin6_addr; 3708 if (!IN6_IS_ADDR_UNSPECIFIED( 3709 &ipp->ipp_nexthop)) 3710 ipp->ipp_fields |= IPPF_NEXTHOP; 3711 else 3712 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3713 } 3714 if (sticky) { 3715 error = udp_build_hdrs(q, udp); 3716 if (error != 0) 3717 return (error); 3718 } 3719 break; 3720 case IPV6_HOPOPTS: { 3721 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3722 /* 3723 * Sanity checks - minimum size, size a multiple of 3724 * eight bytes, and matching size passed in. 3725 */ 3726 if (inlen != 0 && 3727 inlen != (8 * (hopts->ip6h_len + 1))) 3728 return (EINVAL); 3729 3730 if (checkonly) 3731 break; 3732 3733 if (inlen == 0) { 3734 if (sticky && 3735 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3736 kmem_free(ipp->ipp_hopopts, 3737 ipp->ipp_hopoptslen); 3738 ipp->ipp_hopopts = NULL; 3739 ipp->ipp_hopoptslen = 0; 3740 } 3741 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3742 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3743 } else { 3744 error = udp_pkt_set(invalp, inlen, sticky, 3745 (uchar_t **)&ipp->ipp_hopopts, 3746 &ipp->ipp_hopoptslen); 3747 if (error != 0) 3748 return (error); 3749 ipp->ipp_fields |= IPPF_HOPOPTS; 3750 } 3751 if (sticky) { 3752 error = udp_build_hdrs(q, udp); 3753 if (error != 0) 3754 return (error); 3755 } 3756 break; 3757 } 3758 case IPV6_RTHDRDSTOPTS: { 3759 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3760 3761 /* 3762 * Sanity checks - minimum size, size a multiple of 3763 * eight bytes, and matching size passed in. 3764 */ 3765 if (inlen != 0 && 3766 inlen != (8 * (dopts->ip6d_len + 1))) 3767 return (EINVAL); 3768 3769 if (checkonly) 3770 break; 3771 3772 if (inlen == 0) { 3773 if (sticky && 3774 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3775 kmem_free(ipp->ipp_rtdstopts, 3776 ipp->ipp_rtdstoptslen); 3777 ipp->ipp_rtdstopts = NULL; 3778 ipp->ipp_rtdstoptslen = 0; 3779 } 3780 3781 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3782 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3783 } else { 3784 error = udp_pkt_set(invalp, inlen, sticky, 3785 (uchar_t **)&ipp->ipp_rtdstopts, 3786 &ipp->ipp_rtdstoptslen); 3787 if (error != 0) 3788 return (error); 3789 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3790 } 3791 if (sticky) { 3792 error = udp_build_hdrs(q, udp); 3793 if (error != 0) 3794 return (error); 3795 } 3796 break; 3797 } 3798 case IPV6_DSTOPTS: { 3799 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3800 3801 /* 3802 * Sanity checks - minimum size, size a multiple of 3803 * eight bytes, and matching size passed in. 3804 */ 3805 if (inlen != 0 && 3806 inlen != (8 * (dopts->ip6d_len + 1))) 3807 return (EINVAL); 3808 3809 if (checkonly) 3810 break; 3811 3812 if (inlen == 0) { 3813 if (sticky && 3814 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3815 kmem_free(ipp->ipp_dstopts, 3816 ipp->ipp_dstoptslen); 3817 ipp->ipp_dstopts = NULL; 3818 ipp->ipp_dstoptslen = 0; 3819 } 3820 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3821 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3822 } else { 3823 error = udp_pkt_set(invalp, inlen, sticky, 3824 (uchar_t **)&ipp->ipp_dstopts, 3825 &ipp->ipp_dstoptslen); 3826 if (error != 0) 3827 return (error); 3828 ipp->ipp_fields |= IPPF_DSTOPTS; 3829 } 3830 if (sticky) { 3831 error = udp_build_hdrs(q, udp); 3832 if (error != 0) 3833 return (error); 3834 } 3835 break; 3836 } 3837 case IPV6_RTHDR: { 3838 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3839 3840 /* 3841 * Sanity checks - minimum size, size a multiple of 3842 * eight bytes, and matching size passed in. 3843 */ 3844 if (inlen != 0 && 3845 inlen != (8 * (rt->ip6r_len + 1))) 3846 return (EINVAL); 3847 3848 if (checkonly) 3849 break; 3850 3851 if (inlen == 0) { 3852 if (sticky && 3853 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3854 kmem_free(ipp->ipp_rthdr, 3855 ipp->ipp_rthdrlen); 3856 ipp->ipp_rthdr = NULL; 3857 ipp->ipp_rthdrlen = 0; 3858 } 3859 ipp->ipp_fields &= ~IPPF_RTHDR; 3860 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3861 } else { 3862 error = udp_pkt_set(invalp, inlen, sticky, 3863 (uchar_t **)&ipp->ipp_rthdr, 3864 &ipp->ipp_rthdrlen); 3865 if (error != 0) 3866 return (error); 3867 ipp->ipp_fields |= IPPF_RTHDR; 3868 } 3869 if (sticky) { 3870 error = udp_build_hdrs(q, udp); 3871 if (error != 0) 3872 return (error); 3873 } 3874 break; 3875 } 3876 3877 case IPV6_DONTFRAG: 3878 if (checkonly) 3879 break; 3880 3881 if (onoff) { 3882 ipp->ipp_fields |= IPPF_DONTFRAG; 3883 } else { 3884 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3885 } 3886 break; 3887 3888 case IPV6_USE_MIN_MTU: 3889 if (inlen != sizeof (int)) 3890 return (EINVAL); 3891 3892 if (*i1 < -1 || *i1 > 1) 3893 return (EINVAL); 3894 3895 if (checkonly) 3896 break; 3897 3898 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3899 ipp->ipp_use_min_mtu = *i1; 3900 break; 3901 3902 case IPV6_BOUND_PIF: 3903 case IPV6_SEC_OPT: 3904 case IPV6_DONTFAILOVER_IF: 3905 case IPV6_SRC_PREFERENCES: 3906 case IPV6_V6ONLY: 3907 /* Handled at the IP level */ 3908 return (-EINVAL); 3909 default: 3910 *outlenp = 0; 3911 return (EINVAL); 3912 } 3913 break; 3914 } /* end IPPROTO_IPV6 */ 3915 case IPPROTO_UDP: 3916 switch (name) { 3917 case UDP_ANONPRIVBIND: 3918 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3919 *outlenp = 0; 3920 return (error); 3921 } 3922 if (!checkonly) { 3923 udp->udp_anon_priv_bind = onoff; 3924 } 3925 break; 3926 case UDP_EXCLBIND: 3927 if (!checkonly) 3928 udp->udp_exclbind = onoff; 3929 break; 3930 case UDP_RCVHDR: 3931 if (!checkonly) 3932 udp->udp_rcvhdr = onoff; 3933 break; 3934 default: 3935 *outlenp = 0; 3936 return (EINVAL); 3937 } 3938 break; 3939 default: 3940 *outlenp = 0; 3941 return (EINVAL); 3942 } 3943 /* 3944 * Common case of OK return with outval same as inval. 3945 */ 3946 if (invalp != outvalp) { 3947 /* don't trust bcopy for identical src/dst */ 3948 (void) bcopy(invalp, outvalp, inlen); 3949 } 3950 *outlenp = inlen; 3951 return (0); 3952 } 3953 3954 /* 3955 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3956 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3957 * headers, and the udp header. 3958 * Returns failure if can't allocate memory. 3959 */ 3960 static int 3961 udp_build_hdrs(queue_t *q, udp_t *udp) 3962 { 3963 uchar_t *hdrs; 3964 uint_t hdrs_len; 3965 ip6_t *ip6h; 3966 ip6i_t *ip6i; 3967 udpha_t *udpha; 3968 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3969 3970 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3971 ASSERT(hdrs_len != 0); 3972 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3973 /* Need to reallocate */ 3974 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3975 if (hdrs == NULL) 3976 return (ENOMEM); 3977 3978 if (udp->udp_sticky_hdrs_len != 0) { 3979 kmem_free(udp->udp_sticky_hdrs, 3980 udp->udp_sticky_hdrs_len); 3981 } 3982 udp->udp_sticky_hdrs = hdrs; 3983 udp->udp_sticky_hdrs_len = hdrs_len; 3984 } 3985 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3986 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3987 3988 /* Set header fields not in ipp */ 3989 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3990 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3991 ip6h = (ip6_t *)&ip6i[1]; 3992 } else { 3993 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3994 } 3995 3996 if (!(ipp->ipp_fields & IPPF_ADDR)) 3997 ip6h->ip6_src = udp->udp_v6src; 3998 3999 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4000 udpha->uha_src_port = udp->udp_port; 4001 4002 /* Try to get everything in a single mblk */ 4003 if (hdrs_len > udp->udp_max_hdr_len) { 4004 udp->udp_max_hdr_len = hdrs_len; 4005 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4006 udp_wroff_extra); 4007 } 4008 return (0); 4009 } 4010 4011 /* 4012 * Set optbuf and optlen for the option. 4013 * If sticky is set allocate memory (if not already present). 4014 * Otherwise just point optbuf and optlen at invalp and inlen. 4015 * Returns failure if memory can not be allocated. 4016 */ 4017 static int 4018 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4019 uchar_t **optbufp, uint_t *optlenp) 4020 { 4021 uchar_t *optbuf; 4022 4023 if (!sticky) { 4024 *optbufp = invalp; 4025 *optlenp = inlen; 4026 return (0); 4027 } 4028 if (inlen == *optlenp) { 4029 /* Unchanged length - no need to realocate */ 4030 bcopy(invalp, *optbufp, inlen); 4031 return (0); 4032 } 4033 if (inlen != 0) { 4034 /* Allocate new buffer before free */ 4035 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4036 if (optbuf == NULL) 4037 return (ENOMEM); 4038 } else { 4039 optbuf = NULL; 4040 } 4041 /* Free old buffer */ 4042 if (*optlenp != 0) 4043 kmem_free(*optbufp, *optlenp); 4044 4045 bcopy(invalp, optbuf, inlen); 4046 *optbufp = optbuf; 4047 *optlenp = inlen; 4048 return (0); 4049 } 4050 4051 /* 4052 * This routine retrieves the value of an ND variable in a udpparam_t 4053 * structure. It is called through nd_getset when a user reads the 4054 * variable. 4055 */ 4056 /* ARGSUSED */ 4057 static int 4058 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4059 { 4060 udpparam_t *udppa = (udpparam_t *)cp; 4061 4062 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4063 return (0); 4064 } 4065 4066 /* 4067 * Walk through the param array specified registering each element with the 4068 * named dispatch (ND) handler. 4069 */ 4070 static boolean_t 4071 udp_param_register(udpparam_t *udppa, int cnt) 4072 { 4073 for (; cnt-- > 0; udppa++) { 4074 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4075 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4076 udp_param_get, udp_param_set, 4077 (caddr_t)udppa)) { 4078 nd_free(&udp_g_nd); 4079 return (B_FALSE); 4080 } 4081 } 4082 } 4083 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4084 udp_extra_priv_ports_get, NULL, NULL)) { 4085 nd_free(&udp_g_nd); 4086 return (B_FALSE); 4087 } 4088 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4089 NULL, udp_extra_priv_ports_add, NULL)) { 4090 nd_free(&udp_g_nd); 4091 return (B_FALSE); 4092 } 4093 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4094 NULL, udp_extra_priv_ports_del, NULL)) { 4095 nd_free(&udp_g_nd); 4096 return (B_FALSE); 4097 } 4098 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4099 NULL)) { 4100 nd_free(&udp_g_nd); 4101 return (B_FALSE); 4102 } 4103 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4104 NULL)) { 4105 nd_free(&udp_g_nd); 4106 return (B_FALSE); 4107 } 4108 return (B_TRUE); 4109 } 4110 4111 /* This routine sets an ND variable in a udpparam_t structure. */ 4112 /* ARGSUSED */ 4113 static int 4114 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4115 { 4116 long new_value; 4117 udpparam_t *udppa = (udpparam_t *)cp; 4118 4119 /* 4120 * Fail the request if the new value does not lie within the 4121 * required bounds. 4122 */ 4123 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4124 new_value < udppa->udp_param_min || 4125 new_value > udppa->udp_param_max) { 4126 return (EINVAL); 4127 } 4128 4129 /* Set the new value */ 4130 udppa->udp_param_value = new_value; 4131 return (0); 4132 } 4133 4134 static void 4135 udp_input(conn_t *connp, mblk_t *mp) 4136 { 4137 struct T_unitdata_ind *tudi; 4138 uchar_t *rptr; /* Pointer to IP header */ 4139 int hdr_length; /* Length of IP+UDP headers */ 4140 int udi_size; /* Size of T_unitdata_ind */ 4141 int mp_len; 4142 udp_t *udp; 4143 udpha_t *udpha; 4144 int ipversion; 4145 ip6_pkt_t ipp; 4146 ip6_t *ip6h; 4147 ip6i_t *ip6i; 4148 mblk_t *mp1; 4149 mblk_t *options_mp = NULL; 4150 in_pktinfo_t *pinfo = NULL; 4151 cred_t *cr = NULL; 4152 queue_t *q = connp->conn_rq; 4153 pid_t cpid; 4154 4155 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4156 "udp_rput_start: q %p mp %p", q, mp); 4157 4158 udp = connp->conn_udp; 4159 rptr = mp->b_rptr; 4160 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4161 ASSERT(OK_32PTR(rptr)); 4162 4163 /* 4164 * IP should have prepended the options data in an M_CTL 4165 * Check M_CTL "type" to make sure are not here bcos of 4166 * a valid ICMP message 4167 */ 4168 if (DB_TYPE(mp) == M_CTL) { 4169 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4170 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4171 IN_PKTINFO) { 4172 /* 4173 * IP_RECVIF or IP_RECVSLLA information has been 4174 * appended to the packet by IP. We need to 4175 * extract the mblk and adjust the rptr 4176 */ 4177 pinfo = (in_pktinfo_t *)mp->b_rptr; 4178 options_mp = mp; 4179 mp = mp->b_cont; 4180 rptr = mp->b_rptr; 4181 UDP_STAT(udp_in_pktinfo); 4182 } else { 4183 /* 4184 * ICMP messages. 4185 */ 4186 udp_icmp_error(q, mp); 4187 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4188 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4189 return; 4190 } 4191 } 4192 4193 mp_len = msgdsize(mp); 4194 /* 4195 * This is the inbound data path. 4196 * First, we check to make sure the IP version number is correct, 4197 * and then pull the IP and UDP headers into the first mblk. 4198 * Assume IP provides aligned packets - otherwise toss. 4199 * Also, check if we have a complete IP header. 4200 */ 4201 4202 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4203 ipp.ipp_fields = 0; 4204 4205 ipversion = IPH_HDR_VERSION(rptr); 4206 switch (ipversion) { 4207 case IPV4_VERSION: 4208 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4209 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4210 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4211 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4212 (udp->udp_ip_rcv_options_len)) { 4213 /* 4214 * Handle IPv4 packets with options outside of the 4215 * main data path. Not needed for AF_INET6 sockets 4216 * since they don't support a getsockopt of IP_OPTIONS. 4217 */ 4218 if (udp->udp_family == AF_INET6) 4219 break; 4220 /* 4221 * UDP length check performed for IPv4 packets with 4222 * options to check whether UDP length specified in 4223 * the header is the same as the physical length of 4224 * the packet. 4225 */ 4226 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4227 if (mp_len != (ntohs(udpha->uha_length) + 4228 hdr_length - UDPH_SIZE)) { 4229 goto tossit; 4230 } 4231 /* 4232 * Handle the case where the packet has IP options 4233 * and the IP_RECVSLLA & IP_RECVIF are set 4234 */ 4235 if (pinfo != NULL) 4236 mp = options_mp; 4237 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4238 SQTAG_UDP_INPUT); 4239 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4240 "udp_rput_end: q %p (%S)", q, "end"); 4241 return; 4242 } 4243 4244 /* Handle IPV6_RECVHOPLIMIT. */ 4245 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4246 udp->udp_ipv6_recvpktinfo) { 4247 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4248 ipp.ipp_fields |= IPPF_IFINDEX; 4249 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4250 } 4251 } 4252 break; 4253 case IPV6_VERSION: 4254 /* 4255 * IPv6 packets can only be received by applications 4256 * that are prepared to receive IPv6 addresses. 4257 * The IP fanout must ensure this. 4258 */ 4259 ASSERT(udp->udp_family == AF_INET6); 4260 4261 ip6h = (ip6_t *)rptr; 4262 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4263 4264 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4265 uint8_t nexthdrp; 4266 /* Look for ifindex information */ 4267 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4268 ip6i = (ip6i_t *)ip6h; 4269 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4270 goto tossit; 4271 4272 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4273 ASSERT(ip6i->ip6i_ifindex != 0); 4274 ipp.ipp_fields |= IPPF_IFINDEX; 4275 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4276 } 4277 rptr = (uchar_t *)&ip6i[1]; 4278 mp->b_rptr = rptr; 4279 if (rptr == mp->b_wptr) { 4280 mp1 = mp->b_cont; 4281 freeb(mp); 4282 mp = mp1; 4283 rptr = mp->b_rptr; 4284 } 4285 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4286 goto tossit; 4287 ip6h = (ip6_t *)rptr; 4288 mp_len = msgdsize(mp); 4289 } 4290 /* 4291 * Find any potentially interesting extension headers 4292 * as well as the length of the IPv6 + extension 4293 * headers. 4294 */ 4295 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4296 UDPH_SIZE; 4297 ASSERT(nexthdrp == IPPROTO_UDP); 4298 } else { 4299 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4300 ip6i = NULL; 4301 } 4302 break; 4303 default: 4304 ASSERT(0); 4305 } 4306 4307 /* 4308 * IP inspected the UDP header thus all of it must be in the mblk. 4309 * UDP length check is performed for IPv6 packets and IPv4 packets 4310 * without options to check if the size of the packet as specified 4311 * by the header is the same as the physical size of the packet. 4312 */ 4313 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4314 if ((MBLKL(mp) < hdr_length) || 4315 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4316 goto tossit; 4317 } 4318 4319 /* Walk past the headers. */ 4320 if (!udp->udp_rcvhdr) { 4321 mp->b_rptr = rptr + hdr_length; 4322 mp_len -= hdr_length; 4323 } 4324 4325 /* 4326 * This is the inbound data path. Packets are passed upstream as 4327 * T_UNITDATA_IND messages with full IP headers still attached. 4328 */ 4329 if (udp->udp_family == AF_INET) { 4330 sin_t *sin; 4331 4332 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4333 4334 /* 4335 * Normally only send up the address. 4336 * If IP_RECVDSTADDR is set we include the destination IP 4337 * address as an option. With IP_RECVOPTS we include all 4338 * the IP options. Only ip_rput_other() handles packets 4339 * that contain IP options. 4340 */ 4341 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4342 if (udp->udp_recvdstaddr) { 4343 udi_size += sizeof (struct T_opthdr) + 4344 sizeof (struct in_addr); 4345 UDP_STAT(udp_in_recvdstaddr); 4346 } 4347 4348 /* 4349 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4350 * space accordingly 4351 */ 4352 if (udp->udp_recvif && (pinfo != NULL) && 4353 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4354 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4355 UDP_STAT(udp_in_recvif); 4356 } 4357 4358 if (udp->udp_recvslla && (pinfo != NULL) && 4359 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4360 udi_size += sizeof (struct T_opthdr) + 4361 sizeof (struct sockaddr_dl); 4362 UDP_STAT(udp_in_recvslla); 4363 } 4364 4365 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4366 udi_size += sizeof (struct T_opthdr) + ucredsize; 4367 cpid = DB_CPID(mp); 4368 UDP_STAT(udp_in_recvucred); 4369 } 4370 /* 4371 * If IP_RECVTTL is set allocate the appropriate sized buffer 4372 */ 4373 if (udp->udp_recvttl) { 4374 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4375 UDP_STAT(udp_in_recvttl); 4376 } 4377 4378 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4379 4380 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4381 mp1 = allocb(udi_size, BPRI_MED); 4382 if (mp1 == NULL) { 4383 freemsg(mp); 4384 if (options_mp != NULL) 4385 freeb(options_mp); 4386 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4387 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4388 BUMP_MIB(&udp_mib, udpInErrors); 4389 return; 4390 } 4391 mp1->b_cont = mp; 4392 mp = mp1; 4393 mp->b_datap->db_type = M_PROTO; 4394 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4395 mp->b_wptr = (uchar_t *)tudi + udi_size; 4396 tudi->PRIM_type = T_UNITDATA_IND; 4397 tudi->SRC_length = sizeof (sin_t); 4398 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4399 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4400 sizeof (sin_t); 4401 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4402 tudi->OPT_length = udi_size; 4403 sin = (sin_t *)&tudi[1]; 4404 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4405 sin->sin_port = udpha->uha_src_port; 4406 sin->sin_family = udp->udp_family; 4407 *(uint32_t *)&sin->sin_zero[0] = 0; 4408 *(uint32_t *)&sin->sin_zero[4] = 0; 4409 4410 /* 4411 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4412 * IP_RECVTTL has been set. 4413 */ 4414 if (udi_size != 0) { 4415 /* 4416 * Copy in destination address before options to avoid 4417 * any padding issues. 4418 */ 4419 char *dstopt; 4420 4421 dstopt = (char *)&sin[1]; 4422 if (udp->udp_recvdstaddr) { 4423 struct T_opthdr *toh; 4424 ipaddr_t *dstptr; 4425 4426 toh = (struct T_opthdr *)dstopt; 4427 toh->level = IPPROTO_IP; 4428 toh->name = IP_RECVDSTADDR; 4429 toh->len = sizeof (struct T_opthdr) + 4430 sizeof (ipaddr_t); 4431 toh->status = 0; 4432 dstopt += sizeof (struct T_opthdr); 4433 dstptr = (ipaddr_t *)dstopt; 4434 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4435 dstopt += sizeof (ipaddr_t); 4436 udi_size -= toh->len; 4437 } 4438 4439 if (udp->udp_recvslla && (pinfo != NULL) && 4440 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4441 4442 struct T_opthdr *toh; 4443 struct sockaddr_dl *dstptr; 4444 4445 toh = (struct T_opthdr *)dstopt; 4446 toh->level = IPPROTO_IP; 4447 toh->name = IP_RECVSLLA; 4448 toh->len = sizeof (struct T_opthdr) + 4449 sizeof (struct sockaddr_dl); 4450 toh->status = 0; 4451 dstopt += sizeof (struct T_opthdr); 4452 dstptr = (struct sockaddr_dl *)dstopt; 4453 bcopy(&pinfo->in_pkt_slla, dstptr, 4454 sizeof (struct sockaddr_dl)); 4455 dstopt += sizeof (struct sockaddr_dl); 4456 udi_size -= toh->len; 4457 } 4458 4459 if (udp->udp_recvif && (pinfo != NULL) && 4460 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4461 4462 struct T_opthdr *toh; 4463 uint_t *dstptr; 4464 4465 toh = (struct T_opthdr *)dstopt; 4466 toh->level = IPPROTO_IP; 4467 toh->name = IP_RECVIF; 4468 toh->len = sizeof (struct T_opthdr) + 4469 sizeof (uint_t); 4470 toh->status = 0; 4471 dstopt += sizeof (struct T_opthdr); 4472 dstptr = (uint_t *)dstopt; 4473 *dstptr = pinfo->in_pkt_ifindex; 4474 dstopt += sizeof (uint_t); 4475 udi_size -= toh->len; 4476 } 4477 4478 if (cr != NULL) { 4479 struct T_opthdr *toh; 4480 4481 toh = (struct T_opthdr *)dstopt; 4482 toh->level = SOL_SOCKET; 4483 toh->name = SCM_UCRED; 4484 toh->len = sizeof (struct T_opthdr) + ucredsize; 4485 toh->status = 0; 4486 (void) cred2ucred(cr, cpid, &toh[1]); 4487 dstopt += toh->len; 4488 udi_size -= toh->len; 4489 } 4490 4491 if (udp->udp_recvttl) { 4492 struct T_opthdr *toh; 4493 uint8_t *dstptr; 4494 4495 toh = (struct T_opthdr *)dstopt; 4496 toh->level = IPPROTO_IP; 4497 toh->name = IP_RECVTTL; 4498 toh->len = sizeof (struct T_opthdr) + 4499 sizeof (uint8_t); 4500 toh->status = 0; 4501 dstopt += sizeof (struct T_opthdr); 4502 dstptr = (uint8_t *)dstopt; 4503 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4504 dstopt += sizeof (uint8_t); 4505 udi_size -= toh->len; 4506 } 4507 4508 /* Consumed all of allocated space */ 4509 ASSERT(udi_size == 0); 4510 } 4511 } else { 4512 sin6_t *sin6; 4513 4514 /* 4515 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4516 * 4517 * Normally we only send up the address. If receiving of any 4518 * optional receive side information is enabled, we also send 4519 * that up as options. 4520 * [ Only udp_rput_other() handles packets that contain IP 4521 * options so code to account for does not appear immediately 4522 * below but elsewhere ] 4523 */ 4524 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4525 4526 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4527 IPPF_RTHDR|IPPF_IFINDEX)) { 4528 if (udp->udp_ipv6_recvhopopts && 4529 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4530 udi_size += sizeof (struct T_opthdr) + 4531 ipp.ipp_hopoptslen; 4532 UDP_STAT(udp_in_recvhopopts); 4533 } 4534 if ((udp->udp_ipv6_recvdstopts || 4535 udp->udp_old_ipv6_recvdstopts) && 4536 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4537 udi_size += sizeof (struct T_opthdr) + 4538 ipp.ipp_dstoptslen; 4539 UDP_STAT(udp_in_recvdstopts); 4540 } 4541 if (((udp->udp_ipv6_recvdstopts && 4542 udp->udp_ipv6_recvrthdr && 4543 (ipp.ipp_fields & IPPF_RTHDR)) || 4544 udp->udp_ipv6_recvrthdrdstopts) && 4545 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4546 udi_size += sizeof (struct T_opthdr) + 4547 ipp.ipp_rtdstoptslen; 4548 UDP_STAT(udp_in_recvrtdstopts); 4549 } 4550 if (udp->udp_ipv6_recvrthdr && 4551 (ipp.ipp_fields & IPPF_RTHDR)) { 4552 udi_size += sizeof (struct T_opthdr) + 4553 ipp.ipp_rthdrlen; 4554 UDP_STAT(udp_in_recvrthdr); 4555 } 4556 if (udp->udp_ipv6_recvpktinfo && 4557 (ipp.ipp_fields & IPPF_IFINDEX)) { 4558 udi_size += sizeof (struct T_opthdr) + 4559 sizeof (struct in6_pktinfo); 4560 UDP_STAT(udp_in_recvpktinfo); 4561 } 4562 4563 } 4564 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4565 udi_size += sizeof (struct T_opthdr) + ucredsize; 4566 cpid = DB_CPID(mp); 4567 UDP_STAT(udp_in_recvucred); 4568 } 4569 4570 if (udp->udp_ipv6_recvhoplimit) { 4571 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4572 UDP_STAT(udp_in_recvhoplimit); 4573 } 4574 4575 if (udp->udp_ipv6_recvtclass) { 4576 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4577 UDP_STAT(udp_in_recvtclass); 4578 } 4579 4580 mp1 = allocb(udi_size, BPRI_MED); 4581 if (mp1 == NULL) { 4582 freemsg(mp); 4583 if (options_mp != NULL) 4584 freeb(options_mp); 4585 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4586 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4587 BUMP_MIB(&udp_mib, udpInErrors); 4588 return; 4589 } 4590 mp1->b_cont = mp; 4591 mp = mp1; 4592 mp->b_datap->db_type = M_PROTO; 4593 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4594 mp->b_wptr = (uchar_t *)tudi + udi_size; 4595 tudi->PRIM_type = T_UNITDATA_IND; 4596 tudi->SRC_length = sizeof (sin6_t); 4597 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4598 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4599 sizeof (sin6_t); 4600 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4601 tudi->OPT_length = udi_size; 4602 sin6 = (sin6_t *)&tudi[1]; 4603 if (ipversion == IPV4_VERSION) { 4604 in6_addr_t v6dst; 4605 4606 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4607 &sin6->sin6_addr); 4608 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4609 &v6dst); 4610 sin6->sin6_flowinfo = 0; 4611 sin6->sin6_scope_id = 0; 4612 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4613 connp->conn_zoneid); 4614 } else { 4615 sin6->sin6_addr = ip6h->ip6_src; 4616 /* No sin6_flowinfo per API */ 4617 sin6->sin6_flowinfo = 0; 4618 /* For link-scope source pass up scope id */ 4619 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4620 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4621 sin6->sin6_scope_id = ipp.ipp_ifindex; 4622 else 4623 sin6->sin6_scope_id = 0; 4624 sin6->__sin6_src_id = ip_srcid_find_addr( 4625 &ip6h->ip6_dst, connp->conn_zoneid); 4626 } 4627 sin6->sin6_port = udpha->uha_src_port; 4628 sin6->sin6_family = udp->udp_family; 4629 4630 if (udi_size != 0) { 4631 uchar_t *dstopt; 4632 4633 dstopt = (uchar_t *)&sin6[1]; 4634 if (udp->udp_ipv6_recvpktinfo && 4635 (ipp.ipp_fields & IPPF_IFINDEX)) { 4636 struct T_opthdr *toh; 4637 struct in6_pktinfo *pkti; 4638 4639 toh = (struct T_opthdr *)dstopt; 4640 toh->level = IPPROTO_IPV6; 4641 toh->name = IPV6_PKTINFO; 4642 toh->len = sizeof (struct T_opthdr) + 4643 sizeof (*pkti); 4644 toh->status = 0; 4645 dstopt += sizeof (struct T_opthdr); 4646 pkti = (struct in6_pktinfo *)dstopt; 4647 if (ipversion == IPV6_VERSION) 4648 pkti->ipi6_addr = ip6h->ip6_dst; 4649 else 4650 IN6_IPADDR_TO_V4MAPPED( 4651 ((ipha_t *)rptr)->ipha_dst, 4652 &pkti->ipi6_addr); 4653 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4654 dstopt += sizeof (*pkti); 4655 udi_size -= toh->len; 4656 } 4657 if (udp->udp_ipv6_recvhoplimit) { 4658 struct T_opthdr *toh; 4659 4660 toh = (struct T_opthdr *)dstopt; 4661 toh->level = IPPROTO_IPV6; 4662 toh->name = IPV6_HOPLIMIT; 4663 toh->len = sizeof (struct T_opthdr) + 4664 sizeof (uint_t); 4665 toh->status = 0; 4666 dstopt += sizeof (struct T_opthdr); 4667 if (ipversion == IPV6_VERSION) 4668 *(uint_t *)dstopt = ip6h->ip6_hops; 4669 else 4670 *(uint_t *)dstopt = 4671 ((ipha_t *)rptr)->ipha_ttl; 4672 dstopt += sizeof (uint_t); 4673 udi_size -= toh->len; 4674 } 4675 if (udp->udp_ipv6_recvtclass) { 4676 struct T_opthdr *toh; 4677 4678 toh = (struct T_opthdr *)dstopt; 4679 toh->level = IPPROTO_IPV6; 4680 toh->name = IPV6_TCLASS; 4681 toh->len = sizeof (struct T_opthdr) + 4682 sizeof (uint_t); 4683 toh->status = 0; 4684 dstopt += sizeof (struct T_opthdr); 4685 if (ipversion == IPV6_VERSION) { 4686 *(uint_t *)dstopt = 4687 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4688 } else { 4689 ipha_t *ipha = (ipha_t *)rptr; 4690 *(uint_t *)dstopt = 4691 ipha->ipha_type_of_service; 4692 } 4693 dstopt += sizeof (uint_t); 4694 udi_size -= toh->len; 4695 } 4696 if (udp->udp_ipv6_recvhopopts && 4697 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4698 struct T_opthdr *toh; 4699 4700 toh = (struct T_opthdr *)dstopt; 4701 toh->level = IPPROTO_IPV6; 4702 toh->name = IPV6_HOPOPTS; 4703 toh->len = sizeof (struct T_opthdr) + 4704 ipp.ipp_hopoptslen; 4705 toh->status = 0; 4706 dstopt += sizeof (struct T_opthdr); 4707 bcopy(ipp.ipp_hopopts, dstopt, 4708 ipp.ipp_hopoptslen); 4709 dstopt += ipp.ipp_hopoptslen; 4710 udi_size -= toh->len; 4711 } 4712 if (udp->udp_ipv6_recvdstopts && 4713 udp->udp_ipv6_recvrthdr && 4714 (ipp.ipp_fields & IPPF_RTHDR) && 4715 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4716 struct T_opthdr *toh; 4717 4718 toh = (struct T_opthdr *)dstopt; 4719 toh->level = IPPROTO_IPV6; 4720 toh->name = IPV6_DSTOPTS; 4721 toh->len = sizeof (struct T_opthdr) + 4722 ipp.ipp_rtdstoptslen; 4723 toh->status = 0; 4724 dstopt += sizeof (struct T_opthdr); 4725 bcopy(ipp.ipp_rtdstopts, dstopt, 4726 ipp.ipp_rtdstoptslen); 4727 dstopt += ipp.ipp_rtdstoptslen; 4728 udi_size -= toh->len; 4729 } 4730 if (udp->udp_ipv6_recvrthdr && 4731 (ipp.ipp_fields & IPPF_RTHDR)) { 4732 struct T_opthdr *toh; 4733 4734 toh = (struct T_opthdr *)dstopt; 4735 toh->level = IPPROTO_IPV6; 4736 toh->name = IPV6_RTHDR; 4737 toh->len = sizeof (struct T_opthdr) + 4738 ipp.ipp_rthdrlen; 4739 toh->status = 0; 4740 dstopt += sizeof (struct T_opthdr); 4741 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4742 dstopt += ipp.ipp_rthdrlen; 4743 udi_size -= toh->len; 4744 } 4745 if (udp->udp_ipv6_recvdstopts && 4746 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4747 struct T_opthdr *toh; 4748 4749 toh = (struct T_opthdr *)dstopt; 4750 toh->level = IPPROTO_IPV6; 4751 toh->name = IPV6_DSTOPTS; 4752 toh->len = sizeof (struct T_opthdr) + 4753 ipp.ipp_dstoptslen; 4754 toh->status = 0; 4755 dstopt += sizeof (struct T_opthdr); 4756 bcopy(ipp.ipp_dstopts, dstopt, 4757 ipp.ipp_dstoptslen); 4758 dstopt += ipp.ipp_dstoptslen; 4759 udi_size -= toh->len; 4760 } 4761 4762 if (cr != NULL) { 4763 struct T_opthdr *toh; 4764 4765 toh = (struct T_opthdr *)dstopt; 4766 toh->level = SOL_SOCKET; 4767 toh->name = SCM_UCRED; 4768 toh->len = sizeof (struct T_opthdr) + ucredsize; 4769 toh->status = 0; 4770 (void) cred2ucred(cr, cpid, &toh[1]); 4771 dstopt += toh->len; 4772 udi_size -= toh->len; 4773 } 4774 /* Consumed all of allocated space */ 4775 ASSERT(udi_size == 0); 4776 } 4777 #undef sin6 4778 /* No IP_RECVDSTADDR for IPv6. */ 4779 } 4780 4781 BUMP_MIB(&udp_mib, udpInDatagrams); 4782 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4783 "udp_rput_end: q %p (%S)", q, "end"); 4784 if (options_mp != NULL) 4785 freeb(options_mp); 4786 4787 if (udp->udp_direct_sockfs) { 4788 /* 4789 * There is nothing above us except for the stream head; 4790 * use the read-side synchronous stream interface in 4791 * order to reduce the time spent in interrupt thread. 4792 */ 4793 ASSERT(udp->udp_issocket); 4794 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4795 } else { 4796 /* 4797 * Use regular STREAMS interface to pass data upstream 4798 * if this is not a socket endpoint, or if we have 4799 * switched over to the slow mode due to sockmod being 4800 * popped or a module being pushed on top of us. 4801 */ 4802 putnext(UDP_RD(q), mp); 4803 } 4804 return; 4805 4806 tossit: 4807 freemsg(mp); 4808 if (options_mp != NULL) 4809 freeb(options_mp); 4810 BUMP_MIB(&udp_mib, udpInErrors); 4811 } 4812 4813 void 4814 udp_conn_recv(conn_t *connp, mblk_t *mp) 4815 { 4816 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4817 } 4818 4819 /* ARGSUSED */ 4820 static void 4821 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4822 { 4823 udp_input((conn_t *)arg, mp); 4824 _UDP_EXIT((conn_t *)arg); 4825 } 4826 4827 /* 4828 * Process non-M_DATA messages as well as M_DATA messages that requires 4829 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4830 */ 4831 static void 4832 udp_rput_other(queue_t *q, mblk_t *mp) 4833 { 4834 struct T_unitdata_ind *tudi; 4835 mblk_t *mp1; 4836 uchar_t *rptr; 4837 uchar_t *new_rptr; 4838 int hdr_length; 4839 int udi_size; /* Size of T_unitdata_ind */ 4840 int opt_len; /* Length of IP options */ 4841 sin_t *sin; 4842 struct T_error_ack *tea; 4843 mblk_t *options_mp = NULL; 4844 in_pktinfo_t *pinfo; 4845 boolean_t recv_on = B_FALSE; 4846 cred_t *cr = NULL; 4847 udp_t *udp = Q_TO_UDP(q); 4848 pid_t cpid; 4849 4850 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4851 "udp_rput_other: q %p mp %p", q, mp); 4852 4853 ASSERT(OK_32PTR(mp->b_rptr)); 4854 rptr = mp->b_rptr; 4855 4856 switch (mp->b_datap->db_type) { 4857 case M_CTL: 4858 /* 4859 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4860 */ 4861 recv_on = B_TRUE; 4862 options_mp = mp; 4863 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4864 4865 /* 4866 * The actual data is in mp->b_cont 4867 */ 4868 mp = mp->b_cont; 4869 ASSERT(OK_32PTR(mp->b_rptr)); 4870 rptr = mp->b_rptr; 4871 break; 4872 case M_DATA: 4873 /* 4874 * M_DATA messages contain IPv4 datagrams. They are handled 4875 * after this switch. 4876 */ 4877 break; 4878 case M_PROTO: 4879 case M_PCPROTO: 4880 /* M_PROTO messages contain some type of TPI message. */ 4881 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4882 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4883 freemsg(mp); 4884 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4885 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4886 return; 4887 } 4888 tea = (struct T_error_ack *)rptr; 4889 4890 switch (tea->PRIM_type) { 4891 case T_ERROR_ACK: 4892 switch (tea->ERROR_prim) { 4893 case O_T_BIND_REQ: 4894 case T_BIND_REQ: { 4895 /* 4896 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4897 * clear out the associated port and source 4898 * address before passing the message 4899 * upstream. If this was caused by a T_CONN_REQ 4900 * revert back to bound state. 4901 */ 4902 udp_fanout_t *udpf; 4903 4904 udpf = &udp_bind_fanout[ 4905 UDP_BIND_HASH(udp->udp_port)]; 4906 mutex_enter(&udpf->uf_lock); 4907 if (udp->udp_state == TS_DATA_XFER) { 4908 /* Connect failed */ 4909 tea->ERROR_prim = T_CONN_REQ; 4910 /* Revert back to the bound source */ 4911 udp->udp_v6src = udp->udp_bound_v6src; 4912 udp->udp_state = TS_IDLE; 4913 mutex_exit(&udpf->uf_lock); 4914 if (udp->udp_family == AF_INET6) 4915 (void) udp_build_hdrs(q, udp); 4916 break; 4917 } 4918 4919 if (udp->udp_discon_pending) { 4920 tea->ERROR_prim = T_DISCON_REQ; 4921 udp->udp_discon_pending = 0; 4922 } 4923 V6_SET_ZERO(udp->udp_v6src); 4924 V6_SET_ZERO(udp->udp_bound_v6src); 4925 udp->udp_state = TS_UNBND; 4926 udp_bind_hash_remove(udp, B_TRUE); 4927 udp->udp_port = 0; 4928 mutex_exit(&udpf->uf_lock); 4929 if (udp->udp_family == AF_INET6) 4930 (void) udp_build_hdrs(q, udp); 4931 break; 4932 } 4933 default: 4934 break; 4935 } 4936 break; 4937 case T_BIND_ACK: 4938 udp_rput_bind_ack(q, mp); 4939 return; 4940 4941 case T_OPTMGMT_ACK: 4942 case T_OK_ACK: 4943 break; 4944 default: 4945 freemsg(mp); 4946 return; 4947 } 4948 putnext(UDP_RD(q), mp); 4949 return; 4950 } 4951 4952 /* 4953 * This is the inbound data path. 4954 * First, we make sure the data contains both IP and UDP headers. 4955 * 4956 * This handle IPv4 packets for only AF_INET sockets. 4957 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4958 * is no need saving the options. 4959 */ 4960 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4961 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4962 if (mp->b_wptr - rptr < hdr_length) { 4963 if (!pullupmsg(mp, hdr_length)) { 4964 freemsg(mp); 4965 if (options_mp != NULL) 4966 freeb(options_mp); 4967 BUMP_MIB(&udp_mib, udpInErrors); 4968 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4969 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 4970 BUMP_MIB(&udp_mib, udpInErrors); 4971 return; 4972 } 4973 rptr = mp->b_rptr; 4974 } 4975 /* Walk past the headers. */ 4976 new_rptr = rptr + hdr_length; 4977 if (!udp->udp_rcvhdr) 4978 mp->b_rptr = new_rptr; 4979 4980 /* Save the options if any */ 4981 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4982 if (opt_len > 0) { 4983 if (opt_len > udp->udp_ip_rcv_options_len) { 4984 if (udp->udp_ip_rcv_options_len) 4985 mi_free((char *)udp->udp_ip_rcv_options); 4986 udp->udp_ip_rcv_options_len = 0; 4987 udp->udp_ip_rcv_options = 4988 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4989 if (udp->udp_ip_rcv_options) 4990 udp->udp_ip_rcv_options_len = opt_len; 4991 } 4992 if (udp->udp_ip_rcv_options_len) { 4993 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 4994 udp->udp_ip_rcv_options, opt_len); 4995 /* Adjust length if we are resusing the space */ 4996 udp->udp_ip_rcv_options_len = opt_len; 4997 } 4998 } else if (udp->udp_ip_rcv_options_len) { 4999 mi_free((char *)udp->udp_ip_rcv_options); 5000 udp->udp_ip_rcv_options = NULL; 5001 udp->udp_ip_rcv_options_len = 0; 5002 } 5003 5004 /* 5005 * Normally only send up the address. 5006 * If IP_RECVDSTADDR is set we include the destination IP 5007 * address as an option. With IP_RECVOPTS we include all 5008 * the IP options. 5009 */ 5010 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5011 if (udp->udp_recvdstaddr) { 5012 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5013 UDP_STAT(udp_in_recvdstaddr); 5014 } 5015 if (udp->udp_recvopts && opt_len > 0) { 5016 udi_size += sizeof (struct T_opthdr) + opt_len; 5017 UDP_STAT(udp_in_recvopts); 5018 } 5019 5020 /* 5021 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5022 * space accordingly 5023 */ 5024 if (udp->udp_recvif && recv_on && 5025 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5026 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5027 UDP_STAT(udp_in_recvif); 5028 } 5029 5030 if (udp->udp_recvslla && recv_on && 5031 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5032 udi_size += sizeof (struct T_opthdr) + 5033 sizeof (struct sockaddr_dl); 5034 UDP_STAT(udp_in_recvslla); 5035 } 5036 5037 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5038 udi_size += sizeof (struct T_opthdr) + ucredsize; 5039 cpid = DB_CPID(mp); 5040 UDP_STAT(udp_in_recvucred); 5041 } 5042 /* 5043 * If IP_RECVTTL is set allocate the appropriate sized buffer 5044 */ 5045 if (udp->udp_recvttl) { 5046 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5047 UDP_STAT(udp_in_recvttl); 5048 } 5049 5050 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5051 mp1 = allocb(udi_size, BPRI_MED); 5052 if (mp1 == NULL) { 5053 freemsg(mp); 5054 if (options_mp != NULL) 5055 freeb(options_mp); 5056 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5057 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5058 BUMP_MIB(&udp_mib, udpInErrors); 5059 return; 5060 } 5061 mp1->b_cont = mp; 5062 mp = mp1; 5063 mp->b_datap->db_type = M_PROTO; 5064 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5065 mp->b_wptr = (uchar_t *)tudi + udi_size; 5066 tudi->PRIM_type = T_UNITDATA_IND; 5067 tudi->SRC_length = sizeof (sin_t); 5068 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5069 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5070 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5071 tudi->OPT_length = udi_size; 5072 5073 sin = (sin_t *)&tudi[1]; 5074 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5075 sin->sin_port = ((in_port_t *) 5076 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5077 sin->sin_family = AF_INET; 5078 *(uint32_t *)&sin->sin_zero[0] = 0; 5079 *(uint32_t *)&sin->sin_zero[4] = 0; 5080 5081 /* 5082 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5083 * IP_RECVTTL has been set. 5084 */ 5085 if (udi_size != 0) { 5086 /* 5087 * Copy in destination address before options to avoid any 5088 * padding issues. 5089 */ 5090 char *dstopt; 5091 5092 dstopt = (char *)&sin[1]; 5093 if (udp->udp_recvdstaddr) { 5094 struct T_opthdr *toh; 5095 ipaddr_t *dstptr; 5096 5097 toh = (struct T_opthdr *)dstopt; 5098 toh->level = IPPROTO_IP; 5099 toh->name = IP_RECVDSTADDR; 5100 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5101 toh->status = 0; 5102 dstopt += sizeof (struct T_opthdr); 5103 dstptr = (ipaddr_t *)dstopt; 5104 *dstptr = (((ipaddr_t *)rptr)[4]); 5105 dstopt += sizeof (ipaddr_t); 5106 udi_size -= toh->len; 5107 } 5108 if (udp->udp_recvopts && udi_size != 0) { 5109 struct T_opthdr *toh; 5110 5111 toh = (struct T_opthdr *)dstopt; 5112 toh->level = IPPROTO_IP; 5113 toh->name = IP_RECVOPTS; 5114 toh->len = sizeof (struct T_opthdr) + opt_len; 5115 toh->status = 0; 5116 dstopt += sizeof (struct T_opthdr); 5117 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5118 dstopt += opt_len; 5119 udi_size -= toh->len; 5120 } 5121 5122 if (udp->udp_recvslla && recv_on && 5123 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5124 5125 struct T_opthdr *toh; 5126 struct sockaddr_dl *dstptr; 5127 5128 toh = (struct T_opthdr *)dstopt; 5129 toh->level = IPPROTO_IP; 5130 toh->name = IP_RECVSLLA; 5131 toh->len = sizeof (struct T_opthdr) + 5132 sizeof (struct sockaddr_dl); 5133 toh->status = 0; 5134 dstopt += sizeof (struct T_opthdr); 5135 dstptr = (struct sockaddr_dl *)dstopt; 5136 bcopy(&pinfo->in_pkt_slla, dstptr, 5137 sizeof (struct sockaddr_dl)); 5138 dstopt += sizeof (struct sockaddr_dl); 5139 udi_size -= toh->len; 5140 } 5141 5142 if (udp->udp_recvif && recv_on && 5143 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5144 5145 struct T_opthdr *toh; 5146 uint_t *dstptr; 5147 5148 toh = (struct T_opthdr *)dstopt; 5149 toh->level = IPPROTO_IP; 5150 toh->name = IP_RECVIF; 5151 toh->len = sizeof (struct T_opthdr) + 5152 sizeof (uint_t); 5153 toh->status = 0; 5154 dstopt += sizeof (struct T_opthdr); 5155 dstptr = (uint_t *)dstopt; 5156 *dstptr = pinfo->in_pkt_ifindex; 5157 dstopt += sizeof (uint_t); 5158 udi_size -= toh->len; 5159 } 5160 5161 if (cr != NULL) { 5162 struct T_opthdr *toh; 5163 5164 toh = (struct T_opthdr *)dstopt; 5165 toh->level = SOL_SOCKET; 5166 toh->name = SCM_UCRED; 5167 toh->len = sizeof (struct T_opthdr) + ucredsize; 5168 toh->status = 0; 5169 (void) cred2ucred(cr, cpid, &toh[1]); 5170 dstopt += toh->len; 5171 udi_size -= toh->len; 5172 } 5173 5174 if (udp->udp_recvttl) { 5175 struct T_opthdr *toh; 5176 uint8_t *dstptr; 5177 5178 toh = (struct T_opthdr *)dstopt; 5179 toh->level = IPPROTO_IP; 5180 toh->name = IP_RECVTTL; 5181 toh->len = sizeof (struct T_opthdr) + 5182 sizeof (uint8_t); 5183 toh->status = 0; 5184 dstopt += sizeof (struct T_opthdr); 5185 dstptr = (uint8_t *)dstopt; 5186 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5187 dstopt += sizeof (uint8_t); 5188 udi_size -= toh->len; 5189 } 5190 5191 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5192 } 5193 BUMP_MIB(&udp_mib, udpInDatagrams); 5194 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5195 "udp_rput_other_end: q %p (%S)", q, "end"); 5196 if (options_mp != NULL) 5197 freeb(options_mp); 5198 5199 if (udp->udp_direct_sockfs) { 5200 /* 5201 * There is nothing above us except for the stream head; 5202 * use the read-side synchronous stream interface in 5203 * order to reduce the time spent in interrupt thread. 5204 */ 5205 ASSERT(udp->udp_issocket); 5206 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5207 } else { 5208 /* 5209 * Use regular STREAMS interface to pass data upstream 5210 * if this is not a socket endpoint, or if we have 5211 * switched over to the slow mode due to sockmod being 5212 * popped or a module being pushed on top of us. 5213 */ 5214 putnext(UDP_RD(q), mp); 5215 } 5216 } 5217 5218 /* ARGSUSED */ 5219 static void 5220 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5221 { 5222 conn_t *connp = arg; 5223 5224 udp_rput_other(connp->conn_rq, mp); 5225 udp_exit(connp); 5226 } 5227 5228 /* 5229 * Process a T_BIND_ACK 5230 */ 5231 static void 5232 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5233 { 5234 udp_t *udp = Q_TO_UDP(q); 5235 mblk_t *mp1; 5236 ire_t *ire; 5237 struct T_bind_ack *tba; 5238 uchar_t *addrp; 5239 ipa_conn_t *ac; 5240 ipa6_conn_t *ac6; 5241 5242 if (udp->udp_discon_pending) 5243 udp->udp_discon_pending = 0; 5244 5245 /* 5246 * If a broadcast/multicast address was bound set 5247 * the source address to 0. 5248 * This ensures no datagrams with broadcast address 5249 * as source address are emitted (which would violate 5250 * RFC1122 - Hosts requirements) 5251 * 5252 * Note that when connecting the returned IRE is 5253 * for the destination address and we only perform 5254 * the broadcast check for the source address (it 5255 * is OK to connect to a broadcast/multicast address.) 5256 */ 5257 mp1 = mp->b_cont; 5258 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5259 ire = (ire_t *)mp1->b_rptr; 5260 5261 /* 5262 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5263 * local address. 5264 */ 5265 if (ire->ire_type == IRE_BROADCAST && 5266 udp->udp_state != TS_DATA_XFER) { 5267 /* This was just a local bind to a broadcast addr */ 5268 V6_SET_ZERO(udp->udp_v6src); 5269 if (udp->udp_family == AF_INET6) 5270 (void) udp_build_hdrs(q, udp); 5271 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5272 /* 5273 * Local address not yet set - pick it from the 5274 * T_bind_ack 5275 */ 5276 tba = (struct T_bind_ack *)mp->b_rptr; 5277 addrp = &mp->b_rptr[tba->ADDR_offset]; 5278 switch (udp->udp_family) { 5279 case AF_INET: 5280 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5281 ac = (ipa_conn_t *)addrp; 5282 } else { 5283 ASSERT(tba->ADDR_length == 5284 sizeof (ipa_conn_x_t)); 5285 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5286 } 5287 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5288 &udp->udp_v6src); 5289 break; 5290 case AF_INET6: 5291 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5292 ac6 = (ipa6_conn_t *)addrp; 5293 } else { 5294 ASSERT(tba->ADDR_length == 5295 sizeof (ipa6_conn_x_t)); 5296 ac6 = &((ipa6_conn_x_t *) 5297 addrp)->ac6x_conn; 5298 } 5299 udp->udp_v6src = ac6->ac6_laddr; 5300 (void) udp_build_hdrs(q, udp); 5301 break; 5302 } 5303 } 5304 mp1 = mp1->b_cont; 5305 } 5306 /* 5307 * Look for one or more appended ACK message added by 5308 * udp_connect or udp_disconnect. 5309 * If none found just send up the T_BIND_ACK. 5310 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5311 * udp_disconnect has appended a T_OK_ACK. 5312 */ 5313 if (mp1 != NULL) { 5314 if (mp->b_cont == mp1) 5315 mp->b_cont = NULL; 5316 else { 5317 ASSERT(mp->b_cont->b_cont == mp1); 5318 mp->b_cont->b_cont = NULL; 5319 } 5320 freemsg(mp); 5321 mp = mp1; 5322 while (mp != NULL) { 5323 mp1 = mp->b_cont; 5324 mp->b_cont = NULL; 5325 putnext(UDP_RD(q), mp); 5326 mp = mp1; 5327 } 5328 return; 5329 } 5330 freemsg(mp->b_cont); 5331 mp->b_cont = NULL; 5332 putnext(UDP_RD(q), mp); 5333 } 5334 5335 /* 5336 * return SNMP stuff in buffer in mpdata 5337 */ 5338 int 5339 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5340 { 5341 mblk_t *mpdata; 5342 mblk_t *mp_conn_ctl; 5343 mblk_t *mp6_conn_ctl; 5344 mblk_t *mp_conn_data; 5345 mblk_t *mp6_conn_data; 5346 mblk_t *mp_conn_tail = NULL; 5347 mblk_t *mp6_conn_tail = NULL; 5348 struct opthdr *optp; 5349 mib2_udpEntry_t ude; 5350 mib2_udp6Entry_t ude6; 5351 int state; 5352 zoneid_t zoneid; 5353 int i; 5354 connf_t *connfp; 5355 conn_t *connp = Q_TO_CONN(q); 5356 udp_t *udp = connp->conn_udp; 5357 5358 if (mpctl == NULL || 5359 (mpdata = mpctl->b_cont) == NULL || 5360 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5361 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5362 freemsg(mp_conn_ctl); 5363 return (0); 5364 } 5365 5366 mp_conn_data = mp_conn_ctl->b_cont; 5367 mp6_conn_data = mp6_conn_ctl->b_cont; 5368 5369 zoneid = connp->conn_zoneid; 5370 5371 /* fixed length structure for IPv4 and IPv6 counters */ 5372 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5373 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5374 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5375 optp->level = MIB2_UDP; 5376 optp->name = 0; 5377 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5378 optp->len = msgdsize(mpdata); 5379 qreply(q, mpctl); 5380 5381 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5382 connfp = &ipcl_globalhash_fanout[i]; 5383 connp = NULL; 5384 5385 while ((connp = ipcl_get_next_conn(connfp, connp, 5386 IPCL_UDP))) { 5387 udp = connp->conn_udp; 5388 if (zoneid != connp->conn_zoneid) 5389 continue; 5390 5391 /* 5392 * Note that the port numbers are sent in 5393 * host byte order 5394 */ 5395 5396 if (udp->udp_state == TS_UNBND) 5397 state = MIB2_UDP_unbound; 5398 else if (udp->udp_state == TS_IDLE) 5399 state = MIB2_UDP_idle; 5400 else if (udp->udp_state == TS_DATA_XFER) 5401 state = MIB2_UDP_connected; 5402 else 5403 state = MIB2_UDP_unknown; 5404 5405 /* 5406 * Create an IPv4 table entry for IPv4 entries and also 5407 * any IPv6 entries which are bound to in6addr_any 5408 * (i.e. anything a IPv4 peer could connect/send to). 5409 */ 5410 if (udp->udp_ipversion == IPV4_VERSION || 5411 (udp->udp_state <= TS_IDLE && 5412 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5413 ude.udpEntryInfo.ue_state = state; 5414 /* 5415 * If in6addr_any this will set it to 5416 * INADDR_ANY 5417 */ 5418 ude.udpLocalAddress = 5419 V4_PART_OF_V6(udp->udp_v6src); 5420 ude.udpLocalPort = ntohs(udp->udp_port); 5421 if (udp->udp_state == TS_DATA_XFER) { 5422 /* 5423 * Can potentially get here for 5424 * v6 socket if another process 5425 * (say, ping) has just done a 5426 * sendto(), changing the state 5427 * from the TS_IDLE above to 5428 * TS_DATA_XFER by the time we hit 5429 * this part of the code. 5430 */ 5431 ude.udpEntryInfo.ue_RemoteAddress = 5432 V4_PART_OF_V6(udp->udp_v6dst); 5433 ude.udpEntryInfo.ue_RemotePort = 5434 ntohs(udp->udp_dstport); 5435 } else { 5436 ude.udpEntryInfo.ue_RemoteAddress = 0; 5437 ude.udpEntryInfo.ue_RemotePort = 0; 5438 } 5439 (void) snmp_append_data2(mp_conn_data, 5440 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5441 } 5442 if (udp->udp_ipversion == IPV6_VERSION) { 5443 ude6.udp6EntryInfo.ue_state = state; 5444 ude6.udp6LocalAddress = udp->udp_v6src; 5445 ude6.udp6LocalPort = ntohs(udp->udp_port); 5446 ude6.udp6IfIndex = udp->udp_bound_if; 5447 if (udp->udp_state == TS_DATA_XFER) { 5448 ude6.udp6EntryInfo.ue_RemoteAddress = 5449 udp->udp_v6dst; 5450 ude6.udp6EntryInfo.ue_RemotePort = 5451 ntohs(udp->udp_dstport); 5452 } else { 5453 ude6.udp6EntryInfo.ue_RemoteAddress = 5454 sin6_null.sin6_addr; 5455 ude6.udp6EntryInfo.ue_RemotePort = 0; 5456 } 5457 (void) snmp_append_data2(mp6_conn_data, 5458 &mp6_conn_tail, (char *)&ude6, 5459 sizeof (ude6)); 5460 } 5461 } 5462 } 5463 5464 /* IPv4 UDP endpoints */ 5465 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5466 sizeof (struct T_optmgmt_ack)]; 5467 optp->level = MIB2_UDP; 5468 optp->name = MIB2_UDP_ENTRY; 5469 optp->len = msgdsize(mp_conn_data); 5470 qreply(q, mp_conn_ctl); 5471 5472 /* IPv6 UDP endpoints */ 5473 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5474 sizeof (struct T_optmgmt_ack)]; 5475 optp->level = MIB2_UDP6; 5476 optp->name = MIB2_UDP6_ENTRY; 5477 optp->len = msgdsize(mp6_conn_data); 5478 qreply(q, mp6_conn_ctl); 5479 5480 return (1); 5481 } 5482 5483 /* 5484 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5485 * NOTE: Per MIB-II, UDP has no writable data. 5486 * TODO: If this ever actually tries to set anything, it needs to be 5487 * to do the appropriate locking. 5488 */ 5489 /* ARGSUSED */ 5490 int 5491 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5492 uchar_t *ptr, int len) 5493 { 5494 switch (level) { 5495 case MIB2_UDP: 5496 return (0); 5497 default: 5498 return (1); 5499 } 5500 } 5501 5502 static void 5503 udp_report_item(mblk_t *mp, udp_t *udp) 5504 { 5505 char *state; 5506 char addrbuf1[INET6_ADDRSTRLEN]; 5507 char addrbuf2[INET6_ADDRSTRLEN]; 5508 uint_t print_len, buf_len; 5509 5510 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5511 ASSERT(buf_len >= 0); 5512 if (buf_len == 0) 5513 return; 5514 5515 if (udp->udp_state == TS_UNBND) 5516 state = "UNBOUND"; 5517 else if (udp->udp_state == TS_IDLE) 5518 state = "IDLE"; 5519 else if (udp->udp_state == TS_DATA_XFER) 5520 state = "CONNECTED"; 5521 else 5522 state = "UnkState"; 5523 print_len = snprintf((char *)mp->b_wptr, buf_len, 5524 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5525 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5526 inet_ntop(AF_INET6, &udp->udp_v6src, 5527 addrbuf1, sizeof (addrbuf1)), 5528 inet_ntop(AF_INET6, &udp->udp_v6dst, 5529 addrbuf2, sizeof (addrbuf2)), 5530 ntohs(udp->udp_dstport), state); 5531 if (print_len < buf_len) { 5532 mp->b_wptr += print_len; 5533 } else { 5534 mp->b_wptr += buf_len; 5535 } 5536 } 5537 5538 /* Report for ndd "udp_status" */ 5539 /* ARGSUSED */ 5540 static int 5541 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5542 { 5543 zoneid_t zoneid; 5544 connf_t *connfp; 5545 conn_t *connp = Q_TO_CONN(q); 5546 udp_t *udp = connp->conn_udp; 5547 int i; 5548 5549 /* 5550 * Because of the ndd constraint, at most we can have 64K buffer 5551 * to put in all UDP info. So to be more efficient, just 5552 * allocate a 64K buffer here, assuming we need that large buffer. 5553 * This may be a problem as any user can read udp_status. Therefore 5554 * we limit the rate of doing this using udp_ndd_get_info_interval. 5555 * This should be OK as normal users should not do this too often. 5556 */ 5557 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5558 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5559 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5560 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5561 return (0); 5562 } 5563 } 5564 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5565 /* The following may work even if we cannot get a large buf. */ 5566 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5567 return (0); 5568 } 5569 (void) mi_mpprintf(mp, 5570 "UDP " MI_COL_HDRPAD_STR 5571 /* 12345678[89ABCDEF] */ 5572 " zone lport src addr dest addr port state"); 5573 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5574 5575 zoneid = connp->conn_zoneid; 5576 5577 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5578 connfp = &ipcl_globalhash_fanout[i]; 5579 connp = NULL; 5580 5581 while ((connp = ipcl_get_next_conn(connfp, connp, 5582 IPCL_UDP))) { 5583 udp = connp->conn_udp; 5584 if (zoneid != GLOBAL_ZONEID && 5585 zoneid != connp->conn_zoneid) 5586 continue; 5587 5588 udp_report_item(mp->b_cont, udp); 5589 } 5590 } 5591 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5592 return (0); 5593 } 5594 5595 /* 5596 * This routine creates a T_UDERROR_IND message and passes it upstream. 5597 * The address and options are copied from the T_UNITDATA_REQ message 5598 * passed in mp. This message is freed. 5599 */ 5600 static void 5601 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5602 t_scalar_t err) 5603 { 5604 struct T_unitdata_req *tudr; 5605 mblk_t *mp1; 5606 uchar_t *optaddr; 5607 t_scalar_t optlen; 5608 5609 if (DB_TYPE(mp) == M_DATA) { 5610 ASSERT(destaddr != NULL && destlen != 0); 5611 optaddr = NULL; 5612 optlen = 0; 5613 } else { 5614 if ((mp->b_wptr < mp->b_rptr) || 5615 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5616 goto done; 5617 } 5618 tudr = (struct T_unitdata_req *)mp->b_rptr; 5619 destaddr = mp->b_rptr + tudr->DEST_offset; 5620 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5621 destaddr + tudr->DEST_length < mp->b_rptr || 5622 destaddr + tudr->DEST_length > mp->b_wptr) { 5623 goto done; 5624 } 5625 optaddr = mp->b_rptr + tudr->OPT_offset; 5626 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5627 optaddr + tudr->OPT_length < mp->b_rptr || 5628 optaddr + tudr->OPT_length > mp->b_wptr) { 5629 goto done; 5630 } 5631 destlen = tudr->DEST_length; 5632 optlen = tudr->OPT_length; 5633 } 5634 5635 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5636 (char *)optaddr, optlen, err); 5637 if (mp1 != NULL) 5638 putnext(UDP_RD(q), mp1); 5639 5640 done: 5641 freemsg(mp); 5642 } 5643 5644 /* 5645 * This routine removes a port number association from a stream. It 5646 * is called by udp_wput to handle T_UNBIND_REQ messages. 5647 */ 5648 static void 5649 udp_unbind(queue_t *q, mblk_t *mp) 5650 { 5651 udp_t *udp = Q_TO_UDP(q); 5652 5653 /* If a bind has not been done, we can't unbind. */ 5654 if (udp->udp_state == TS_UNBND) { 5655 udp_err_ack(q, mp, TOUTSTATE, 0); 5656 return; 5657 } 5658 if (cl_inet_unbind != NULL) { 5659 /* 5660 * Running in cluster mode - register unbind information 5661 */ 5662 if (udp->udp_ipversion == IPV4_VERSION) { 5663 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5664 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5665 (in_port_t)udp->udp_port); 5666 } else { 5667 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5668 (uint8_t *)&(udp->udp_v6src), 5669 (in_port_t)udp->udp_port); 5670 } 5671 } 5672 5673 udp_bind_hash_remove(udp, B_FALSE); 5674 V6_SET_ZERO(udp->udp_v6src); 5675 V6_SET_ZERO(udp->udp_bound_v6src); 5676 udp->udp_port = 0; 5677 udp->udp_state = TS_UNBND; 5678 5679 if (udp->udp_family == AF_INET6) { 5680 int error; 5681 5682 /* Rebuild the header template */ 5683 error = udp_build_hdrs(q, udp); 5684 if (error != 0) { 5685 udp_err_ack(q, mp, TSYSERR, error); 5686 return; 5687 } 5688 } 5689 /* 5690 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5691 * and therefore ip_unbind must never return NULL. 5692 */ 5693 mp = ip_unbind(q, mp); 5694 ASSERT(mp != NULL); 5695 putnext(UDP_RD(q), mp); 5696 } 5697 5698 /* 5699 * Don't let port fall into the privileged range. 5700 * Since the extra priviledged ports can be arbitrary we also 5701 * ensure that we exclude those from consideration. 5702 * udp_g_epriv_ports is not sorted thus we loop over it until 5703 * there are no changes. 5704 */ 5705 static in_port_t 5706 udp_update_next_port(in_port_t port, boolean_t random) 5707 { 5708 int i; 5709 5710 if (random && udp_random_anon_port != 0) { 5711 (void) random_get_pseudo_bytes((uint8_t *)&port, 5712 sizeof (in_port_t)); 5713 /* 5714 * Unless changed by a sys admin, the smallest anon port 5715 * is 32768 and the largest anon port is 65535. It is 5716 * very likely (50%) for the random port to be smaller 5717 * than the smallest anon port. When that happens, 5718 * add port % (anon port range) to the smallest anon 5719 * port to get the random port. It should fall into the 5720 * valid anon port range. 5721 */ 5722 if (port < udp_smallest_anon_port) { 5723 port = udp_smallest_anon_port + 5724 port % (udp_largest_anon_port - 5725 udp_smallest_anon_port); 5726 } 5727 } 5728 5729 retry: 5730 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5731 port = udp_smallest_anon_port; 5732 5733 if (port < udp_smallest_nonpriv_port) 5734 port = udp_smallest_nonpriv_port; 5735 5736 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5737 if (port == udp_g_epriv_ports[i]) { 5738 port++; 5739 /* 5740 * Make sure that the port is in the 5741 * valid range. 5742 */ 5743 goto retry; 5744 } 5745 } 5746 return (port); 5747 } 5748 5749 static mblk_t * 5750 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5751 uint_t srcid, int *error) 5752 { 5753 udp_t *udp = connp->conn_udp; 5754 queue_t *q = connp->conn_wq; 5755 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5756 mblk_t *mp2; 5757 ipha_t *ipha; 5758 int ip_hdr_length; 5759 uint32_t ip_len; 5760 udpha_t *udpha; 5761 5762 *error = 0; 5763 5764 /* mp1 points to the M_DATA mblk carrying the packet */ 5765 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5766 5767 /* Add an IP header */ 5768 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5769 udp->udp_ip_snd_options_len; 5770 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5771 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5772 !OK_32PTR(ipha)) { 5773 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5774 if (mp2 == NULL) { 5775 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5776 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5777 *error = ENOMEM; 5778 goto done; 5779 } 5780 mp2->b_wptr = DB_LIM(mp2); 5781 mp2->b_cont = mp1; 5782 mp1 = mp2; 5783 if (DB_TYPE(mp) != M_DATA) 5784 mp->b_cont = mp1; 5785 else 5786 mp = mp1; 5787 5788 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5789 } 5790 ip_hdr_length -= UDPH_SIZE; 5791 #ifdef _BIG_ENDIAN 5792 /* Set version, header length, and tos */ 5793 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5794 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5795 udp->udp_type_of_service); 5796 /* Set ttl and protocol */ 5797 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5798 #else 5799 /* Set version, header length, and tos */ 5800 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5801 ((udp->udp_type_of_service << 8) | 5802 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5803 /* Set ttl and protocol */ 5804 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5805 #endif 5806 /* 5807 * Copy our address into the packet. If this is zero, 5808 * first look at __sin6_src_id for a hint. If we leave the source 5809 * as INADDR_ANY then ip will fill in the real source address. 5810 */ 5811 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5812 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5813 in6_addr_t v6src; 5814 5815 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5816 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5817 } 5818 5819 ipha->ipha_fragment_offset_and_flags = 0; 5820 ipha->ipha_ident = 0; 5821 5822 mp1->b_rptr = (uchar_t *)ipha; 5823 5824 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5825 (uintptr_t)UINT_MAX); 5826 5827 /* Determine length of packet */ 5828 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5829 if ((mp2 = mp1->b_cont) != NULL) { 5830 do { 5831 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5832 ip_len += (uint32_t)MBLKL(mp2); 5833 } while ((mp2 = mp2->b_cont) != NULL); 5834 } 5835 /* 5836 * If the size of the packet is greater than the maximum allowed by 5837 * ip, return an error. Passing this down could cause panics because 5838 * the size will have wrapped and be inconsistent with the msg size. 5839 */ 5840 if (ip_len > IP_MAXPACKET) { 5841 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5842 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5843 *error = EMSGSIZE; 5844 goto done; 5845 } 5846 ipha->ipha_length = htons((uint16_t)ip_len); 5847 ip_len -= ip_hdr_length; 5848 ip_len = htons((uint16_t)ip_len); 5849 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5850 5851 /* 5852 * Copy in the destination address 5853 */ 5854 if (v4dst == INADDR_ANY) 5855 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5856 else 5857 ipha->ipha_dst = v4dst; 5858 5859 /* 5860 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5861 */ 5862 if (CLASSD(v4dst)) 5863 ipha->ipha_ttl = udp->udp_multicast_ttl; 5864 5865 udpha->uha_dst_port = port; 5866 udpha->uha_src_port = udp->udp_port; 5867 5868 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5869 uint32_t cksum; 5870 5871 bcopy(udp->udp_ip_snd_options, &ipha[1], 5872 udp->udp_ip_snd_options_len); 5873 /* 5874 * Massage source route putting first source route in ipha_dst. 5875 * Ignore the destination in T_unitdata_req. 5876 * Create a checksum adjustment for a source route, if any. 5877 */ 5878 cksum = ip_massage_options(ipha); 5879 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5880 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5881 (ipha->ipha_dst & 0xFFFF); 5882 if ((int)cksum < 0) 5883 cksum--; 5884 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5885 /* 5886 * IP does the checksum if uha_checksum is non-zero, 5887 * We make it easy for IP to include our pseudo header 5888 * by putting our length in uha_checksum. 5889 */ 5890 cksum += ip_len; 5891 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5892 /* There might be a carry. */ 5893 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5894 #ifdef _LITTLE_ENDIAN 5895 if (udp_do_checksum) 5896 ip_len = (cksum << 16) | ip_len; 5897 #else 5898 if (udp_do_checksum) 5899 ip_len = (ip_len << 16) | cksum; 5900 else 5901 ip_len <<= 16; 5902 #endif 5903 } else { 5904 /* 5905 * IP does the checksum if uha_checksum is non-zero, 5906 * We make it easy for IP to include our pseudo header 5907 * by putting our length in uha_checksum. 5908 */ 5909 if (udp_do_checksum) 5910 ip_len |= (ip_len << 16); 5911 #ifndef _LITTLE_ENDIAN 5912 else 5913 ip_len <<= 16; 5914 #endif 5915 } 5916 /* Set UDP length and checksum */ 5917 *((uint32_t *)&udpha->uha_length) = ip_len; 5918 5919 if (DB_TYPE(mp) != M_DATA) { 5920 ASSERT(mp != mp1); 5921 freeb(mp); 5922 } 5923 5924 /* mp has been consumed and we'll return success */ 5925 ASSERT(*error == 0); 5926 mp = NULL; 5927 5928 /* We're done. Pass the packet to ip. */ 5929 BUMP_MIB(&udp_mib, udpOutDatagrams); 5930 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5931 "udp_wput_end: q %p (%S)", q, "end"); 5932 5933 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5934 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5935 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5936 connp->conn_nofailover_ill != NULL || 5937 connp->conn_outgoing_ill != NULL || 5938 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5939 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5940 UDP_STAT(udp_ip_send); 5941 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5942 } else { 5943 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5944 } 5945 5946 done: 5947 if (*error != 0) { 5948 ASSERT(mp != NULL); 5949 BUMP_MIB(&udp_mib, udpOutErrors); 5950 } 5951 return (mp); 5952 } 5953 5954 static void 5955 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5956 { 5957 conn_t *connp = udp->udp_connp; 5958 ipaddr_t src, dst; 5959 ill_t *ill; 5960 ire_t *ire; 5961 ipif_t *ipif = NULL; 5962 mblk_t *ire_fp_mp; 5963 uint_t ire_fp_mp_len; 5964 uint16_t *up; 5965 uint32_t cksum, hcksum_txflags; 5966 queue_t *dev_q; 5967 boolean_t retry_caching; 5968 5969 dst = ipha->ipha_dst; 5970 src = ipha->ipha_src; 5971 ASSERT(ipha->ipha_ident == 0); 5972 5973 if (CLASSD(dst)) { 5974 int err; 5975 5976 ipif = conn_get_held_ipif(connp, 5977 &connp->conn_multicast_ipif, &err); 5978 5979 if (ipif == NULL || ipif->ipif_isv6 || 5980 (ipif->ipif_ill->ill_phyint->phyint_flags & 5981 PHYI_LOOPBACK)) { 5982 if (ipif != NULL) 5983 ipif_refrele(ipif); 5984 UDP_STAT(udp_ip_send); 5985 ip_output(connp, mp, q, IP_WPUT); 5986 return; 5987 } 5988 } 5989 5990 retry_caching = B_FALSE; 5991 mutex_enter(&connp->conn_lock); 5992 ire = connp->conn_ire_cache; 5993 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5994 5995 if (ire == NULL || ire->ire_addr != dst || 5996 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5997 retry_caching = B_TRUE; 5998 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5999 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6000 6001 ASSERT(ipif != NULL); 6002 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6003 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6004 retry_caching = B_TRUE; 6005 } 6006 6007 if (!retry_caching) { 6008 ASSERT(ire != NULL); 6009 IRE_REFHOLD(ire); 6010 mutex_exit(&connp->conn_lock); 6011 } else { 6012 boolean_t cached = B_FALSE; 6013 6014 connp->conn_ire_cache = NULL; 6015 mutex_exit(&connp->conn_lock); 6016 6017 /* Release the old ire */ 6018 if (ire != NULL) { 6019 IRE_REFRELE_NOTR(ire); 6020 ire = NULL; 6021 } 6022 6023 if (CLASSD(dst)) { 6024 ASSERT(ipif != NULL); 6025 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6026 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6027 } else { 6028 ASSERT(ipif == NULL); 6029 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6030 } 6031 6032 if (ire == NULL) { 6033 if (ipif != NULL) 6034 ipif_refrele(ipif); 6035 UDP_STAT(udp_ire_null); 6036 ip_output(connp, mp, q, IP_WPUT); 6037 return; 6038 } 6039 IRE_REFHOLD_NOTR(ire); 6040 6041 mutex_enter(&connp->conn_lock); 6042 if (!(connp->conn_state_flags & CONN_CLOSING) && 6043 connp->conn_ire_cache == NULL) { 6044 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6045 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6046 connp->conn_ire_cache = ire; 6047 cached = B_TRUE; 6048 } 6049 rw_exit(&ire->ire_bucket->irb_lock); 6050 } 6051 mutex_exit(&connp->conn_lock); 6052 6053 /* 6054 * We can continue to use the ire but since it was not 6055 * cached, we should drop the extra reference. 6056 */ 6057 if (!cached) 6058 IRE_REFRELE_NOTR(ire); 6059 } 6060 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6061 ASSERT(!CLASSD(dst) || ipif != NULL); 6062 6063 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6064 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6065 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6066 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6067 (connp->conn_nexthop_set) || 6068 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6069 if (ipif != NULL) 6070 ipif_refrele(ipif); 6071 UDP_STAT(udp_ip_ire_send); 6072 IRE_REFRELE(ire); 6073 ip_output(connp, mp, q, IP_WPUT); 6074 return; 6075 } 6076 6077 BUMP_MIB(&ip_mib, ipOutRequests); 6078 6079 ill = ire_to_ill(ire); 6080 ASSERT(ill != NULL); 6081 6082 dev_q = ire->ire_stq->q_next; 6083 ASSERT(dev_q != NULL); 6084 /* 6085 * If the service thread is already running, or if the driver 6086 * queue is currently flow-controlled, queue this packet. 6087 */ 6088 if ((q->q_first != NULL || connp->conn_draining) || 6089 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6090 if (ip_output_queue) { 6091 (void) putq(q, mp); 6092 } else { 6093 BUMP_MIB(&ip_mib, ipOutDiscards); 6094 freemsg(mp); 6095 } 6096 if (ipif != NULL) 6097 ipif_refrele(ipif); 6098 IRE_REFRELE(ire); 6099 return; 6100 } 6101 6102 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6103 #ifndef _BIG_ENDIAN 6104 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6105 #endif 6106 6107 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6108 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6109 src = ipha->ipha_src = ipif->ipif_src_addr; 6110 else 6111 src = ipha->ipha_src = ire->ire_src_addr; 6112 } 6113 6114 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6115 ASSERT(ill->ill_hcksum_capab != NULL); 6116 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6117 } else { 6118 hcksum_txflags = 0; 6119 } 6120 6121 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6122 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6123 6124 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6125 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6126 if (*up != 0) { 6127 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6128 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6129 ntohs(ipha->ipha_length), cksum); 6130 6131 /* Software checksum? */ 6132 if (DB_CKSUMFLAGS(mp) == 0) { 6133 UDP_STAT(udp_out_sw_cksum); 6134 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6135 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6136 } 6137 } 6138 6139 ipha->ipha_fragment_offset_and_flags |= 6140 (uint32_t)htons(ire->ire_frag_flag); 6141 6142 /* Calculate IP header checksum if hardware isn't capable */ 6143 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6144 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6145 ((uint16_t *)ipha)[4]); 6146 } 6147 6148 if (CLASSD(dst)) { 6149 ilm_t *ilm; 6150 6151 ILM_WALKER_HOLD(ill); 6152 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6153 ILM_WALKER_RELE(ill); 6154 if (ilm != NULL) { 6155 ip_multicast_loopback(q, ill, mp, 6156 connp->conn_multicast_loop ? 0 : 6157 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6158 } 6159 6160 /* If multicast TTL is 0 then we are done */ 6161 if (ipha->ipha_ttl == 0) { 6162 if (ipif != NULL) 6163 ipif_refrele(ipif); 6164 freemsg(mp); 6165 IRE_REFRELE(ire); 6166 return; 6167 } 6168 } 6169 6170 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6171 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6172 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6173 6174 UPDATE_OB_PKT_COUNT(ire); 6175 ire->ire_last_used_time = lbolt; 6176 6177 if (ILL_DLS_CAPABLE(ill)) { 6178 /* 6179 * Send the packet directly to DLD, where it may be queued 6180 * depending on the availability of transmit resources at 6181 * the media layer. 6182 */ 6183 IP_DLS_ILL_TX(ill, mp); 6184 } else { 6185 putnext(ire->ire_stq, mp); 6186 } 6187 6188 if (ipif != NULL) 6189 ipif_refrele(ipif); 6190 IRE_REFRELE(ire); 6191 } 6192 6193 /* 6194 * This routine handles all messages passed downstream. It either 6195 * consumes the message or passes it downstream; it never queues a 6196 * a message. 6197 */ 6198 static void 6199 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6200 { 6201 sin6_t *sin6; 6202 sin_t *sin; 6203 ipaddr_t v4dst; 6204 uint16_t port; 6205 uint_t srcid; 6206 queue_t *q = connp->conn_wq; 6207 udp_t *udp = connp->conn_udp; 6208 t_scalar_t optlen; 6209 int error = 0; 6210 struct sockaddr_storage ss; 6211 6212 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6213 "udp_wput_start: connp %p mp %p", connp, mp); 6214 6215 /* 6216 * We directly handle several cases here: T_UNITDATA_REQ message 6217 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6218 * connected and non-connected socket. The latter carries the 6219 * address structure along when this routine gets called. 6220 */ 6221 switch (DB_TYPE(mp)) { 6222 case M_DATA: 6223 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6224 if (!udp->udp_direct_sockfs || 6225 addr == NULL || addrlen == 0) { 6226 /* Not connected; address is required */ 6227 BUMP_MIB(&udp_mib, udpOutErrors); 6228 UDP_STAT(udp_out_err_notconn); 6229 freemsg(mp); 6230 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6231 "udp_wput_end: connp %p (%S)", connp, 6232 "not-connected; address required"); 6233 return; 6234 } 6235 ASSERT(udp->udp_issocket); 6236 UDP_DBGSTAT(udp_data_notconn); 6237 /* Not connected; do some more checks below */ 6238 optlen = 0; 6239 break; 6240 } 6241 /* M_DATA for connected socket */ 6242 UDP_DBGSTAT(udp_data_conn); 6243 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6244 6245 /* Initialize addr and addrlen as if they're passed in */ 6246 if (udp->udp_family == AF_INET) { 6247 sin = (sin_t *)&ss; 6248 sin->sin_family = AF_INET; 6249 sin->sin_port = udp->udp_dstport; 6250 sin->sin_addr.s_addr = v4dst; 6251 addr = (struct sockaddr *)sin; 6252 addrlen = sizeof (*sin); 6253 } else { 6254 sin6 = (sin6_t *)&ss; 6255 sin6->sin6_family = AF_INET6; 6256 sin6->sin6_port = udp->udp_dstport; 6257 sin6->sin6_flowinfo = udp->udp_flowinfo; 6258 sin6->sin6_addr = udp->udp_v6dst; 6259 sin6->sin6_scope_id = 0; 6260 sin6->__sin6_src_id = 0; 6261 addr = (struct sockaddr *)sin6; 6262 addrlen = sizeof (*sin6); 6263 } 6264 6265 if (udp->udp_family == AF_INET || 6266 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6267 /* 6268 * Handle both AF_INET and AF_INET6; the latter 6269 * for IPV4 mapped destination addresses. Note 6270 * here that both addr and addrlen point to the 6271 * corresponding struct depending on the address 6272 * family of the socket. 6273 */ 6274 mp = udp_output_v4(connp, mp, v4dst, 6275 udp->udp_dstport, 0, &error); 6276 } else { 6277 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6278 } 6279 if (error != 0) { 6280 ASSERT(addr != NULL && addrlen != 0); 6281 goto ud_error; 6282 } 6283 return; 6284 case M_PROTO: 6285 case M_PCPROTO: { 6286 struct T_unitdata_req *tudr; 6287 6288 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6289 tudr = (struct T_unitdata_req *)mp->b_rptr; 6290 6291 /* Handle valid T_UNITDATA_REQ here */ 6292 if (MBLKL(mp) >= sizeof (*tudr) && 6293 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6294 if (mp->b_cont == NULL) { 6295 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6296 "udp_wput_end: q %p (%S)", q, "badaddr"); 6297 error = EPROTO; 6298 goto ud_error; 6299 } 6300 6301 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6302 tudr->DEST_length)) { 6303 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6304 "udp_wput_end: q %p (%S)", q, "badaddr"); 6305 error = EADDRNOTAVAIL; 6306 goto ud_error; 6307 } 6308 /* 6309 * If a port has not been bound to the stream, fail. 6310 * This is not a problem when sockfs is directly 6311 * above us, because it will ensure that the socket 6312 * is first bound before allowing data to be sent. 6313 */ 6314 if (udp->udp_state == TS_UNBND) { 6315 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6316 "udp_wput_end: q %p (%S)", q, "outstate"); 6317 error = EPROTO; 6318 goto ud_error; 6319 } 6320 addr = (struct sockaddr *) 6321 &mp->b_rptr[tudr->DEST_offset]; 6322 addrlen = tudr->DEST_length; 6323 optlen = tudr->OPT_length; 6324 if (optlen != 0) 6325 UDP_STAT(udp_out_opt); 6326 break; 6327 } 6328 /* FALLTHRU */ 6329 } 6330 default: 6331 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6332 SQTAG_UDP_OUTPUT); 6333 return; 6334 } 6335 ASSERT(addr != NULL); 6336 6337 switch (udp->udp_family) { 6338 case AF_INET6: 6339 sin6 = (sin6_t *)addr; 6340 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6341 sin6->sin6_family != AF_INET6) { 6342 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6343 "udp_wput_end: q %p (%S)", q, "badaddr"); 6344 error = EADDRNOTAVAIL; 6345 goto ud_error; 6346 } 6347 6348 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6349 /* 6350 * Destination is a non-IPv4-compatible IPv6 address. 6351 * Send out an IPv6 format packet. 6352 */ 6353 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6354 if (error != 0) 6355 goto ud_error; 6356 6357 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6358 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6359 return; 6360 } 6361 /* 6362 * If the local address is not zero or a mapped address 6363 * return an error. It would be possible to send an IPv4 6364 * packet but the response would never make it back to the 6365 * application since it is bound to a non-mapped address. 6366 */ 6367 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6368 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6369 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6370 "udp_wput_end: q %p (%S)", q, "badaddr"); 6371 error = EADDRNOTAVAIL; 6372 goto ud_error; 6373 } 6374 /* Send IPv4 packet without modifying udp_ipversion */ 6375 /* Extract port and ipaddr */ 6376 port = sin6->sin6_port; 6377 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6378 srcid = sin6->__sin6_src_id; 6379 break; 6380 6381 case AF_INET: 6382 sin = (sin_t *)addr; 6383 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6384 sin->sin_family != AF_INET) { 6385 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6386 "udp_wput_end: q %p (%S)", q, "badaddr"); 6387 error = EADDRNOTAVAIL; 6388 goto ud_error; 6389 } 6390 /* Extract port and ipaddr */ 6391 port = sin->sin_port; 6392 v4dst = sin->sin_addr.s_addr; 6393 srcid = 0; 6394 break; 6395 } 6396 6397 /* 6398 * If options passed in, feed it for verification and handling 6399 */ 6400 if (optlen != 0) { 6401 ASSERT(DB_TYPE(mp) != M_DATA); 6402 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6403 /* failure */ 6404 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6405 "udp_wput_end: q %p (%S)", q, 6406 "udp_unitdata_opt_process"); 6407 goto ud_error; 6408 } 6409 /* 6410 * Note: success in processing options. 6411 * mp option buffer represented by 6412 * OPT_length/offset now potentially modified 6413 * and contain option setting results 6414 */ 6415 } 6416 ASSERT(error == 0); 6417 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6418 if (error != 0) { 6419 ud_error: 6420 UDP_STAT(udp_out_err_output); 6421 ASSERT(mp != NULL); 6422 /* mp is freed by the following routine */ 6423 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6424 (t_scalar_t)error); 6425 } 6426 } 6427 6428 /* ARGSUSED */ 6429 static void 6430 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6431 { 6432 udp_output((conn_t *)arg, mp, NULL, 0); 6433 _UDP_EXIT((conn_t *)arg); 6434 } 6435 6436 static void 6437 udp_wput(queue_t *q, mblk_t *mp) 6438 { 6439 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6440 SQTAG_UDP_WPUT); 6441 } 6442 6443 /* 6444 * Allocate and prepare a T_UNITDATA_REQ message. 6445 */ 6446 static mblk_t * 6447 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6448 { 6449 struct T_unitdata_req *tudr; 6450 mblk_t *mp; 6451 6452 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6453 if (mp != NULL) { 6454 mp->b_wptr += sizeof (*tudr) + addrlen; 6455 DB_TYPE(mp) = M_PROTO; 6456 6457 tudr = (struct T_unitdata_req *)mp->b_rptr; 6458 tudr->PRIM_type = T_UNITDATA_REQ; 6459 tudr->DEST_length = addrlen; 6460 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6461 tudr->OPT_length = 0; 6462 tudr->OPT_offset = 0; 6463 bcopy(addr, tudr+1, addrlen); 6464 } 6465 return (mp); 6466 } 6467 6468 /* 6469 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6470 * is valid when we are directly beneath the stream head, and thus sockfs 6471 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6472 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6473 * this is done for both connected and non-connected endpoint. 6474 */ 6475 void 6476 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6477 { 6478 conn_t *connp; 6479 udp_t *udp; 6480 6481 q = UDP_WR(q); 6482 connp = Q_TO_CONN(q); 6483 udp = connp->conn_udp; 6484 6485 /* udpsockfs should only send down M_DATA for this entry point */ 6486 ASSERT(DB_TYPE(mp) == M_DATA); 6487 6488 mutex_enter(&connp->conn_lock); 6489 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6490 6491 if (udp->udp_mode != UDP_MT_HOT) { 6492 /* 6493 * We can't enter this conn right away because another 6494 * thread is currently executing as writer; therefore we 6495 * need to deposit the message into the squeue to be 6496 * drained later. If a socket address is present, we 6497 * need to create a T_UNITDATA_REQ message as placeholder. 6498 */ 6499 if (addr != NULL && addrlen != 0) { 6500 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6501 6502 if (tudr_mp == NULL) { 6503 mutex_exit(&connp->conn_lock); 6504 BUMP_MIB(&udp_mib, udpOutErrors); 6505 UDP_STAT(udp_out_err_tudr); 6506 freemsg(mp); 6507 return; 6508 } 6509 /* Tag the packet with T_UNITDATA_REQ */ 6510 tudr_mp->b_cont = mp; 6511 mp = tudr_mp; 6512 } 6513 mutex_exit(&connp->conn_lock); 6514 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6515 return; 6516 } 6517 6518 /* We can execute as reader right away. */ 6519 UDP_READERS_INCREF(udp); 6520 mutex_exit(&connp->conn_lock); 6521 6522 udp_output(connp, mp, addr, addrlen); 6523 6524 udp_exit(connp); 6525 } 6526 6527 /* 6528 * udp_output_v6(): 6529 * Assumes that udp_wput did some sanity checking on the destination 6530 * address. 6531 */ 6532 static mblk_t * 6533 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6534 int *error) 6535 { 6536 ip6_t *ip6h; 6537 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6538 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6539 mblk_t *mp2; 6540 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6541 size_t ip_len; 6542 udpha_t *udph; 6543 udp_t *udp = connp->conn_udp; 6544 queue_t *q = connp->conn_wq; 6545 ip6_pkt_t ipp_s; /* For ancillary data options */ 6546 ip6_pkt_t *ipp = &ipp_s; 6547 ip6_pkt_t *tipp; /* temporary ipp */ 6548 uint32_t csum = 0; 6549 uint_t ignore = 0; 6550 uint_t option_exists = 0, is_sticky = 0; 6551 uint8_t *cp; 6552 uint8_t *nxthdr_ptr; 6553 6554 *error = 0; 6555 6556 /* mp1 points to the M_DATA mblk carrying the packet */ 6557 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6558 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6559 6560 /* 6561 * If the local address is a mapped address return 6562 * an error. 6563 * It would be possible to send an IPv6 packet but the 6564 * response would never make it back to the application 6565 * since it is bound to a mapped address. 6566 */ 6567 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6568 *error = EADDRNOTAVAIL; 6569 goto done; 6570 } 6571 6572 ipp->ipp_fields = 0; 6573 ipp->ipp_sticky_ignored = 0; 6574 6575 /* 6576 * If TPI options passed in, feed it for verification and handling 6577 */ 6578 if (tudr_optlen != 0) { 6579 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6580 /* failure */ 6581 goto done; 6582 } 6583 ignore = ipp->ipp_sticky_ignored; 6584 ASSERT(*error == 0); 6585 } 6586 6587 if (sin6->sin6_scope_id != 0 && 6588 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6589 /* 6590 * IPPF_SCOPE_ID is special. It's neither a sticky 6591 * option nor ancillary data. It needs to be 6592 * explicitly set in options_exists. 6593 */ 6594 option_exists |= IPPF_SCOPE_ID; 6595 } 6596 6597 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6598 /* No sticky options nor ancillary data. */ 6599 goto no_options; 6600 } 6601 6602 /* 6603 * Go through the options figuring out where each is going to 6604 * come from and build two masks. The first mask indicates if 6605 * the option exists at all. The second mask indicates if the 6606 * option is sticky or ancillary. 6607 */ 6608 if (!(ignore & IPPF_HOPOPTS)) { 6609 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6610 option_exists |= IPPF_HOPOPTS; 6611 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6612 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6613 option_exists |= IPPF_HOPOPTS; 6614 is_sticky |= IPPF_HOPOPTS; 6615 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6616 } 6617 } 6618 6619 if (!(ignore & IPPF_RTHDR)) { 6620 if (ipp->ipp_fields & IPPF_RTHDR) { 6621 option_exists |= IPPF_RTHDR; 6622 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6623 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6624 option_exists |= IPPF_RTHDR; 6625 is_sticky |= IPPF_RTHDR; 6626 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6627 } 6628 } 6629 6630 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6631 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6632 option_exists |= IPPF_RTDSTOPTS; 6633 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6634 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6635 option_exists |= IPPF_RTDSTOPTS; 6636 is_sticky |= IPPF_RTDSTOPTS; 6637 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6638 } 6639 } 6640 6641 if (!(ignore & IPPF_DSTOPTS)) { 6642 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6643 option_exists |= IPPF_DSTOPTS; 6644 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6645 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6646 option_exists |= IPPF_DSTOPTS; 6647 is_sticky |= IPPF_DSTOPTS; 6648 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6649 } 6650 } 6651 6652 if (!(ignore & IPPF_IFINDEX)) { 6653 if (ipp->ipp_fields & IPPF_IFINDEX) { 6654 option_exists |= IPPF_IFINDEX; 6655 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6656 option_exists |= IPPF_IFINDEX; 6657 is_sticky |= IPPF_IFINDEX; 6658 } 6659 } 6660 6661 if (!(ignore & IPPF_ADDR)) { 6662 if (ipp->ipp_fields & IPPF_ADDR) { 6663 option_exists |= IPPF_ADDR; 6664 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6665 option_exists |= IPPF_ADDR; 6666 is_sticky |= IPPF_ADDR; 6667 } 6668 } 6669 6670 if (!(ignore & IPPF_DONTFRAG)) { 6671 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6672 option_exists |= IPPF_DONTFRAG; 6673 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6674 option_exists |= IPPF_DONTFRAG; 6675 is_sticky |= IPPF_DONTFRAG; 6676 } 6677 } 6678 6679 if (!(ignore & IPPF_USE_MIN_MTU)) { 6680 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6681 option_exists |= IPPF_USE_MIN_MTU; 6682 } else if (udp->udp_sticky_ipp.ipp_fields & 6683 IPPF_USE_MIN_MTU) { 6684 option_exists |= IPPF_USE_MIN_MTU; 6685 is_sticky |= IPPF_USE_MIN_MTU; 6686 } 6687 } 6688 6689 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6690 option_exists |= IPPF_HOPLIMIT; 6691 /* IPV6_HOPLIMIT can never be sticky */ 6692 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6693 6694 if (!(ignore & IPPF_UNICAST_HOPS) && 6695 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6696 option_exists |= IPPF_UNICAST_HOPS; 6697 is_sticky |= IPPF_UNICAST_HOPS; 6698 } 6699 6700 if (!(ignore & IPPF_MULTICAST_HOPS) && 6701 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6702 option_exists |= IPPF_MULTICAST_HOPS; 6703 is_sticky |= IPPF_MULTICAST_HOPS; 6704 } 6705 6706 if (!(ignore & IPPF_TCLASS)) { 6707 if (ipp->ipp_fields & IPPF_TCLASS) { 6708 option_exists |= IPPF_TCLASS; 6709 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6710 option_exists |= IPPF_TCLASS; 6711 is_sticky |= IPPF_TCLASS; 6712 } 6713 } 6714 6715 no_options: 6716 6717 /* 6718 * If any options carried in the ip6i_t were specified, we 6719 * need to account for the ip6i_t in the data we'll be sending 6720 * down. 6721 */ 6722 if (option_exists & IPPF_HAS_IP6I) 6723 udp_ip_hdr_len += sizeof (ip6i_t); 6724 6725 /* check/fix buffer config, setup pointers into it */ 6726 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6727 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6728 !OK_32PTR(ip6h)) { 6729 /* Try to get everything in a single mblk next time */ 6730 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6731 udp->udp_max_hdr_len = udp_ip_hdr_len; 6732 (void) mi_set_sth_wroff(UDP_RD(q), 6733 udp->udp_max_hdr_len + udp_wroff_extra); 6734 } 6735 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6736 if (mp2 == NULL) { 6737 *error = ENOMEM; 6738 goto done; 6739 } 6740 mp2->b_wptr = DB_LIM(mp2); 6741 mp2->b_cont = mp1; 6742 mp1 = mp2; 6743 if (DB_TYPE(mp) != M_DATA) 6744 mp->b_cont = mp1; 6745 else 6746 mp = mp1; 6747 6748 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6749 } 6750 mp1->b_rptr = (unsigned char *)ip6h; 6751 ip6i = (ip6i_t *)ip6h; 6752 6753 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6754 if (option_exists & IPPF_HAS_IP6I) { 6755 ip6h = (ip6_t *)&ip6i[1]; 6756 ip6i->ip6i_flags = 0; 6757 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6758 6759 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6760 if (option_exists & IPPF_SCOPE_ID) { 6761 ip6i->ip6i_flags |= IP6I_IFINDEX; 6762 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6763 } else if (option_exists & IPPF_IFINDEX) { 6764 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6765 ASSERT(tipp->ipp_ifindex != 0); 6766 ip6i->ip6i_flags |= IP6I_IFINDEX; 6767 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6768 } 6769 6770 if (option_exists & IPPF_ADDR) { 6771 /* 6772 * Enable per-packet source address verification if 6773 * IPV6_PKTINFO specified the source address. 6774 * ip6_src is set in the transport's _wput function. 6775 */ 6776 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6777 } 6778 6779 if (option_exists & IPPF_DONTFRAG) { 6780 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6781 } 6782 6783 if (option_exists & IPPF_USE_MIN_MTU) { 6784 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6785 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6786 } 6787 6788 if (option_exists & IPPF_NEXTHOP) { 6789 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6790 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6791 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6792 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6793 } 6794 6795 /* 6796 * tell IP this is an ip6i_t private header 6797 */ 6798 ip6i->ip6i_nxt = IPPROTO_RAW; 6799 } 6800 6801 /* Initialize IPv6 header */ 6802 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6803 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6804 6805 /* Set the hoplimit of the outgoing packet. */ 6806 if (option_exists & IPPF_HOPLIMIT) { 6807 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6808 ip6h->ip6_hops = ipp->ipp_hoplimit; 6809 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6810 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6811 ip6h->ip6_hops = udp->udp_multicast_ttl; 6812 if (option_exists & IPPF_MULTICAST_HOPS) 6813 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6814 } else { 6815 ip6h->ip6_hops = udp->udp_ttl; 6816 if (option_exists & IPPF_UNICAST_HOPS) 6817 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6818 } 6819 6820 if (option_exists & IPPF_ADDR) { 6821 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6822 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6823 ip6h->ip6_src = tipp->ipp_addr; 6824 } else { 6825 /* 6826 * The source address was not set using IPV6_PKTINFO. 6827 * First look at the bound source. 6828 * If unspecified fallback to __sin6_src_id. 6829 */ 6830 ip6h->ip6_src = udp->udp_v6src; 6831 if (sin6->__sin6_src_id != 0 && 6832 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6833 ip_srcid_find_id(sin6->__sin6_src_id, 6834 &ip6h->ip6_src, connp->conn_zoneid); 6835 } 6836 } 6837 6838 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6839 cp = (uint8_t *)&ip6h[1]; 6840 6841 /* 6842 * Here's where we have to start stringing together 6843 * any extension headers in the right order: 6844 * Hop-by-hop, destination, routing, and final destination opts. 6845 */ 6846 if (option_exists & IPPF_HOPOPTS) { 6847 /* Hop-by-hop options */ 6848 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6849 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6850 6851 *nxthdr_ptr = IPPROTO_HOPOPTS; 6852 nxthdr_ptr = &hbh->ip6h_nxt; 6853 6854 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6855 cp += tipp->ipp_hopoptslen; 6856 } 6857 /* 6858 * En-route destination options 6859 * Only do them if there's a routing header as well 6860 */ 6861 if (option_exists & IPPF_RTDSTOPTS) { 6862 ip6_dest_t *dst = (ip6_dest_t *)cp; 6863 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6864 6865 *nxthdr_ptr = IPPROTO_DSTOPTS; 6866 nxthdr_ptr = &dst->ip6d_nxt; 6867 6868 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6869 cp += tipp->ipp_rtdstoptslen; 6870 } 6871 /* 6872 * Routing header next 6873 */ 6874 if (option_exists & IPPF_RTHDR) { 6875 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6876 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6877 6878 *nxthdr_ptr = IPPROTO_ROUTING; 6879 nxthdr_ptr = &rt->ip6r_nxt; 6880 6881 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6882 cp += tipp->ipp_rthdrlen; 6883 } 6884 /* 6885 * Do ultimate destination options 6886 */ 6887 if (option_exists & IPPF_DSTOPTS) { 6888 ip6_dest_t *dest = (ip6_dest_t *)cp; 6889 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6890 6891 *nxthdr_ptr = IPPROTO_DSTOPTS; 6892 nxthdr_ptr = &dest->ip6d_nxt; 6893 6894 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6895 cp += tipp->ipp_dstoptslen; 6896 } 6897 /* 6898 * Now set the last header pointer to the proto passed in 6899 */ 6900 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6901 *nxthdr_ptr = IPPROTO_UDP; 6902 6903 /* Update UDP header */ 6904 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6905 udph->uha_dst_port = sin6->sin6_port; 6906 udph->uha_src_port = udp->udp_port; 6907 6908 /* 6909 * Copy in the destination address 6910 */ 6911 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6912 ip6h->ip6_dst = ipv6_loopback; 6913 else 6914 ip6h->ip6_dst = sin6->sin6_addr; 6915 6916 ip6h->ip6_vcf = 6917 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6918 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6919 6920 if (option_exists & IPPF_TCLASS) { 6921 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6922 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6923 tipp->ipp_tclass); 6924 } 6925 6926 if (option_exists & IPPF_RTHDR) { 6927 ip6_rthdr_t *rth; 6928 6929 /* 6930 * Perform any processing needed for source routing. 6931 * We know that all extension headers will be in the same mblk 6932 * as the IPv6 header. 6933 */ 6934 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6935 if (rth != NULL && rth->ip6r_segleft != 0) { 6936 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6937 /* 6938 * Drop packet - only support Type 0 routing. 6939 * Notify the application as well. 6940 */ 6941 *error = EPROTO; 6942 goto done; 6943 } 6944 6945 /* 6946 * rth->ip6r_len is twice the number of 6947 * addresses in the header. Thus it must be even. 6948 */ 6949 if (rth->ip6r_len & 0x1) { 6950 *error = EPROTO; 6951 goto done; 6952 } 6953 /* 6954 * Shuffle the routing header and ip6_dst 6955 * addresses, and get the checksum difference 6956 * between the first hop (in ip6_dst) and 6957 * the destination (in the last routing hdr entry). 6958 */ 6959 csum = ip_massage_options_v6(ip6h, rth); 6960 /* 6961 * Verify that the first hop isn't a mapped address. 6962 * Routers along the path need to do this verification 6963 * for subsequent hops. 6964 */ 6965 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6966 *error = EADDRNOTAVAIL; 6967 goto done; 6968 } 6969 6970 cp += (rth->ip6r_len + 1)*8; 6971 } 6972 } 6973 6974 /* count up length of UDP packet */ 6975 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6976 if ((mp2 = mp1->b_cont) != NULL) { 6977 do { 6978 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6979 ip_len += (uint32_t)MBLKL(mp2); 6980 } while ((mp2 = mp2->b_cont) != NULL); 6981 } 6982 6983 /* 6984 * If the size of the packet is greater than the maximum allowed by 6985 * ip, return an error. Passing this down could cause panics because 6986 * the size will have wrapped and be inconsistent with the msg size. 6987 */ 6988 if (ip_len > IP_MAXPACKET) { 6989 *error = EMSGSIZE; 6990 goto done; 6991 } 6992 6993 /* Store the UDP length. Subtract length of extension hdrs */ 6994 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6995 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6996 6997 /* 6998 * We make it easy for IP to include our pseudo header 6999 * by putting our length in uh_checksum, modified (if 7000 * we have a routing header) by the checksum difference 7001 * between the ultimate destination and first hop addresses. 7002 * Note: UDP over IPv6 must always checksum the packet. 7003 */ 7004 csum += udph->uha_length; 7005 csum = (csum & 0xFFFF) + (csum >> 16); 7006 udph->uha_checksum = (uint16_t)csum; 7007 7008 #ifdef _LITTLE_ENDIAN 7009 ip_len = htons(ip_len); 7010 #endif 7011 ip6h->ip6_plen = ip_len; 7012 7013 if (DB_TYPE(mp) != M_DATA) { 7014 ASSERT(mp != mp1); 7015 freeb(mp); 7016 } 7017 7018 /* mp has been consumed and we'll return success */ 7019 ASSERT(*error == 0); 7020 mp = NULL; 7021 7022 /* We're done. Pass the packet to IP */ 7023 BUMP_MIB(&udp_mib, udpOutDatagrams); 7024 ip_output_v6(connp, mp1, q, IP_WPUT); 7025 7026 done: 7027 if (*error != 0) { 7028 ASSERT(mp != NULL); 7029 BUMP_MIB(&udp_mib, udpOutErrors); 7030 } 7031 return (mp); 7032 } 7033 7034 static void 7035 udp_wput_other(queue_t *q, mblk_t *mp) 7036 { 7037 uchar_t *rptr = mp->b_rptr; 7038 struct datab *db; 7039 struct iocblk *iocp; 7040 cred_t *cr; 7041 conn_t *connp = Q_TO_CONN(q); 7042 udp_t *udp = connp->conn_udp; 7043 7044 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7045 "udp_wput_other_start: q %p", q); 7046 7047 db = mp->b_datap; 7048 7049 cr = DB_CREDDEF(mp, connp->conn_cred); 7050 7051 switch (db->db_type) { 7052 case M_PROTO: 7053 case M_PCPROTO: 7054 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7055 freemsg(mp); 7056 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7057 "udp_wput_other_end: q %p (%S)", 7058 q, "protoshort"); 7059 return; 7060 } 7061 switch (((t_primp_t)rptr)->type) { 7062 case T_ADDR_REQ: 7063 udp_addr_req(q, mp); 7064 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7065 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7066 return; 7067 case O_T_BIND_REQ: 7068 case T_BIND_REQ: 7069 udp_bind(q, mp); 7070 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7071 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7072 return; 7073 case T_CONN_REQ: 7074 udp_connect(q, mp); 7075 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7076 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7077 return; 7078 case T_CAPABILITY_REQ: 7079 udp_capability_req(q, mp); 7080 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7081 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7082 return; 7083 case T_INFO_REQ: 7084 udp_info_req(q, mp); 7085 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7086 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7087 return; 7088 case T_UNITDATA_REQ: 7089 /* 7090 * If a T_UNITDATA_REQ gets here, the address must 7091 * be bad. Valid T_UNITDATA_REQs are handled 7092 * in udp_wput. 7093 */ 7094 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7095 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7096 "udp_wput_other_end: q %p (%S)", 7097 q, "unitdatareq"); 7098 return; 7099 case T_UNBIND_REQ: 7100 udp_unbind(q, mp); 7101 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7102 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7103 return; 7104 case T_SVR4_OPTMGMT_REQ: 7105 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7106 /* 7107 * Use upper queue for option processing in 7108 * case the request is not handled at this 7109 * level and needs to be passed down to IP. 7110 */ 7111 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7112 mp, cr, &udp_opt_obj); 7113 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7114 "udp_wput_other_end: q %p (%S)", 7115 q, "optmgmtreq"); 7116 return; 7117 7118 case T_OPTMGMT_REQ: 7119 /* 7120 * Use upper queue for option processing in 7121 * case the request is not handled at this 7122 * level and needs to be passed down to IP. 7123 */ 7124 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7125 mp, cr, &udp_opt_obj); 7126 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7127 "udp_wput_other_end: q %p (%S)", 7128 q, "optmgmtreq"); 7129 return; 7130 7131 case T_DISCON_REQ: 7132 udp_disconnect(q, mp); 7133 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7134 "udp_wput_other_end: q %p (%S)", 7135 q, "disconreq"); 7136 return; 7137 7138 /* The following TPI message is not supported by udp. */ 7139 case O_T_CONN_RES: 7140 case T_CONN_RES: 7141 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7142 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7143 "udp_wput_other_end: q %p (%S)", 7144 q, "connres/disconreq"); 7145 return; 7146 7147 /* The following 3 TPI messages are illegal for udp. */ 7148 case T_DATA_REQ: 7149 case T_EXDATA_REQ: 7150 case T_ORDREL_REQ: 7151 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7152 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7153 "udp_wput_other_end: q %p (%S)", 7154 q, "data/exdata/ordrel"); 7155 return; 7156 default: 7157 break; 7158 } 7159 break; 7160 case M_FLUSH: 7161 if (*rptr & FLUSHW) 7162 flushq(q, FLUSHDATA); 7163 break; 7164 case M_IOCTL: 7165 iocp = (struct iocblk *)mp->b_rptr; 7166 switch (iocp->ioc_cmd) { 7167 case TI_GETPEERNAME: 7168 if (udp->udp_state != TS_DATA_XFER) { 7169 /* 7170 * If a default destination address has not 7171 * been associated with the stream, then we 7172 * don't know the peer's name. 7173 */ 7174 iocp->ioc_error = ENOTCONN; 7175 iocp->ioc_count = 0; 7176 mp->b_datap->db_type = M_IOCACK; 7177 putnext(UDP_RD(q), mp); 7178 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7179 "udp_wput_other_end: q %p (%S)", 7180 q, "getpeername"); 7181 return; 7182 } 7183 /* FALLTHRU */ 7184 case TI_GETMYNAME: { 7185 /* 7186 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7187 * need to copyin the user's strbuf structure. 7188 * Processing will continue in the M_IOCDATA case 7189 * below. 7190 */ 7191 mi_copyin(q, mp, NULL, 7192 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7193 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7194 "udp_wput_other_end: q %p (%S)", 7195 q, "getmyname"); 7196 return; 7197 } 7198 case ND_SET: 7199 /* nd_getset performs the necessary checking */ 7200 case ND_GET: 7201 if (nd_getset(q, udp_g_nd, mp)) { 7202 putnext(UDP_RD(q), mp); 7203 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7204 "udp_wput_other_end: q %p (%S)", 7205 q, "get"); 7206 return; 7207 } 7208 break; 7209 case _SIOCSOCKFALLBACK: 7210 /* 7211 * Either sockmod is about to be popped and the 7212 * socket would now be treated as a plain stream, 7213 * or a module is about to be pushed so we could 7214 * no longer use read-side synchronous stream. 7215 * Drain any queued data and disable direct sockfs 7216 * interface from now on. 7217 */ 7218 if (!udp->udp_issocket) { 7219 DB_TYPE(mp) = M_IOCNAK; 7220 iocp->ioc_error = EINVAL; 7221 } else { 7222 udp->udp_issocket = B_FALSE; 7223 if (udp->udp_direct_sockfs) { 7224 /* 7225 * Disable read-side synchronous 7226 * stream interface and drain any 7227 * queued data. 7228 */ 7229 udp_rcv_drain(UDP_RD(q), udp, 7230 B_FALSE); 7231 ASSERT(!udp->udp_direct_sockfs); 7232 UDP_STAT(udp_sock_fallback); 7233 } 7234 DB_TYPE(mp) = M_IOCACK; 7235 iocp->ioc_error = 0; 7236 } 7237 iocp->ioc_count = 0; 7238 iocp->ioc_rval = 0; 7239 putnext(UDP_RD(q), mp); 7240 return; 7241 default: 7242 break; 7243 } 7244 break; 7245 case M_IOCDATA: 7246 udp_wput_iocdata(q, mp); 7247 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7248 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7249 return; 7250 default: 7251 /* Unrecognized messages are passed through without change. */ 7252 break; 7253 } 7254 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7255 "udp_wput_other_end: q %p (%S)", q, "end"); 7256 ip_output(connp, mp, q, IP_WPUT); 7257 } 7258 7259 /* ARGSUSED */ 7260 static void 7261 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7262 { 7263 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7264 udp_exit((conn_t *)arg); 7265 } 7266 7267 /* 7268 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7269 * messages. 7270 */ 7271 static void 7272 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7273 { 7274 mblk_t *mp1; 7275 STRUCT_HANDLE(strbuf, sb); 7276 uint16_t port; 7277 in6_addr_t v6addr; 7278 ipaddr_t v4addr; 7279 uint32_t flowinfo = 0; 7280 int addrlen; 7281 udp_t *udp = Q_TO_UDP(q); 7282 7283 /* Make sure it is one of ours. */ 7284 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7285 case TI_GETMYNAME: 7286 case TI_GETPEERNAME: 7287 break; 7288 default: 7289 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7290 return; 7291 } 7292 7293 q = WR(UDP_RD(q)); 7294 switch (mi_copy_state(q, mp, &mp1)) { 7295 case -1: 7296 return; 7297 case MI_COPY_CASE(MI_COPY_IN, 1): 7298 break; 7299 case MI_COPY_CASE(MI_COPY_OUT, 1): 7300 /* 7301 * The address has been copied out, so now 7302 * copyout the strbuf. 7303 */ 7304 mi_copyout(q, mp); 7305 return; 7306 case MI_COPY_CASE(MI_COPY_OUT, 2): 7307 /* 7308 * The address and strbuf have been copied out. 7309 * We're done, so just acknowledge the original 7310 * M_IOCTL. 7311 */ 7312 mi_copy_done(q, mp, 0); 7313 return; 7314 default: 7315 /* 7316 * Something strange has happened, so acknowledge 7317 * the original M_IOCTL with an EPROTO error. 7318 */ 7319 mi_copy_done(q, mp, EPROTO); 7320 return; 7321 } 7322 7323 /* 7324 * Now we have the strbuf structure for TI_GETMYNAME 7325 * and TI_GETPEERNAME. Next we copyout the requested 7326 * address and then we'll copyout the strbuf. 7327 */ 7328 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7329 (void *)mp1->b_rptr); 7330 if (udp->udp_family == AF_INET) 7331 addrlen = sizeof (sin_t); 7332 else 7333 addrlen = sizeof (sin6_t); 7334 7335 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7336 mi_copy_done(q, mp, EINVAL); 7337 return; 7338 } 7339 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7340 case TI_GETMYNAME: 7341 if (udp->udp_family == AF_INET) { 7342 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7343 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7344 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7345 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7346 } else { 7347 /* 7348 * INADDR_ANY 7349 * udp_v6src is not set, we might be bound to 7350 * broadcast/multicast. Use udp_bound_v6src as 7351 * local address instead (that could 7352 * also still be INADDR_ANY) 7353 */ 7354 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7355 } 7356 } else { 7357 /* udp->udp_family == AF_INET6 */ 7358 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7359 v6addr = udp->udp_v6src; 7360 } else { 7361 /* 7362 * UNSPECIFIED 7363 * udp_v6src is not set, we might be bound to 7364 * broadcast/multicast. Use udp_bound_v6src as 7365 * local address instead (that could 7366 * also still be UNSPECIFIED) 7367 */ 7368 v6addr = udp->udp_bound_v6src; 7369 } 7370 } 7371 port = udp->udp_port; 7372 break; 7373 case TI_GETPEERNAME: 7374 if (udp->udp_state != TS_DATA_XFER) { 7375 mi_copy_done(q, mp, ENOTCONN); 7376 return; 7377 } 7378 if (udp->udp_family == AF_INET) { 7379 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7380 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7381 } else { 7382 /* udp->udp_family == AF_INET6) */ 7383 v6addr = udp->udp_v6dst; 7384 flowinfo = udp->udp_flowinfo; 7385 } 7386 port = udp->udp_dstport; 7387 break; 7388 default: 7389 mi_copy_done(q, mp, EPROTO); 7390 return; 7391 } 7392 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7393 if (!mp1) 7394 return; 7395 7396 if (udp->udp_family == AF_INET) { 7397 sin_t *sin; 7398 7399 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7400 sin = (sin_t *)mp1->b_rptr; 7401 mp1->b_wptr = (uchar_t *)&sin[1]; 7402 *sin = sin_null; 7403 sin->sin_family = AF_INET; 7404 sin->sin_addr.s_addr = v4addr; 7405 sin->sin_port = port; 7406 } else { 7407 /* udp->udp_family == AF_INET6 */ 7408 sin6_t *sin6; 7409 7410 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7411 sin6 = (sin6_t *)mp1->b_rptr; 7412 mp1->b_wptr = (uchar_t *)&sin6[1]; 7413 *sin6 = sin6_null; 7414 sin6->sin6_family = AF_INET6; 7415 sin6->sin6_flowinfo = flowinfo; 7416 sin6->sin6_addr = v6addr; 7417 sin6->sin6_port = port; 7418 } 7419 /* Copy out the address */ 7420 mi_copyout(q, mp); 7421 } 7422 7423 7424 static int 7425 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7426 void *thisdg_attrs) 7427 { 7428 struct T_unitdata_req *udreqp; 7429 int is_absreq_failure; 7430 cred_t *cr; 7431 conn_t *connp = Q_TO_CONN(q); 7432 7433 ASSERT(((t_primp_t)mp->b_rptr)->type); 7434 7435 cr = DB_CREDDEF(mp, connp->conn_cred); 7436 7437 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7438 *errorp = 0; 7439 7440 /* 7441 * Use upper queue for option processing since the callback 7442 * routines expect to be called in UDP instance instead of IP. 7443 */ 7444 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7445 udreqp->OPT_offset, cr, &udp_opt_obj, 7446 thisdg_attrs, &is_absreq_failure); 7447 7448 if (*errorp != 0) { 7449 /* 7450 * Note: No special action needed in this 7451 * module for "is_absreq_failure" 7452 */ 7453 return (-1); /* failure */ 7454 } 7455 ASSERT(is_absreq_failure == 0); 7456 return (0); /* success */ 7457 } 7458 7459 void 7460 udp_ddi_init(void) 7461 { 7462 int i; 7463 7464 UDP6_MAJ = ddi_name_to_major(UDP6); 7465 7466 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7467 udp_opt_obj.odb_opt_arr_cnt); 7468 7469 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7470 /* Not a power of two. Round up to nearest power of two */ 7471 for (i = 0; i < 31; i++) { 7472 if (udp_bind_fanout_size < (1 << i)) 7473 break; 7474 } 7475 udp_bind_fanout_size = 1 << i; 7476 } 7477 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7478 sizeof (udp_fanout_t), KM_SLEEP); 7479 for (i = 0; i < udp_bind_fanout_size; i++) { 7480 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7481 NULL); 7482 } 7483 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7484 7485 udp_kstat_init(); 7486 7487 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7488 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7489 } 7490 7491 void 7492 udp_ddi_destroy(void) 7493 { 7494 int i; 7495 7496 nd_free(&udp_g_nd); 7497 7498 for (i = 0; i < udp_bind_fanout_size; i++) { 7499 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7500 } 7501 7502 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7503 sizeof (udp_fanout_t)); 7504 7505 udp_kstat_fini(); 7506 7507 kmem_cache_destroy(udp_cache); 7508 } 7509 7510 static void 7511 udp_kstat_init(void) 7512 { 7513 udp_named_kstat_t template = { 7514 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7515 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7516 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7517 { "entrySize", KSTAT_DATA_INT32, 0 }, 7518 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7519 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7520 }; 7521 7522 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7523 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7524 7525 if (udp_mibkp == NULL) 7526 return; 7527 7528 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7529 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7530 7531 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7532 7533 udp_mibkp->ks_update = udp_kstat_update; 7534 7535 kstat_install(udp_mibkp); 7536 7537 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7538 "net", KSTAT_TYPE_NAMED, 7539 sizeof (udp_statistics) / sizeof (kstat_named_t), 7540 KSTAT_FLAG_VIRTUAL)) != NULL) { 7541 udp_ksp->ks_data = &udp_statistics; 7542 kstat_install(udp_ksp); 7543 } 7544 } 7545 7546 static void 7547 udp_kstat_fini(void) 7548 { 7549 if (udp_ksp != NULL) { 7550 kstat_delete(udp_ksp); 7551 udp_ksp = NULL; 7552 } 7553 if (udp_mibkp != NULL) { 7554 kstat_delete(udp_mibkp); 7555 udp_mibkp = NULL; 7556 } 7557 } 7558 7559 static int 7560 udp_kstat_update(kstat_t *kp, int rw) 7561 { 7562 udp_named_kstat_t *udpkp; 7563 7564 if ((kp == NULL) || (kp->ks_data == NULL)) 7565 return (EIO); 7566 7567 if (rw == KSTAT_WRITE) 7568 return (EACCES); 7569 7570 udpkp = (udp_named_kstat_t *)kp->ks_data; 7571 7572 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7573 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7574 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7575 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7576 7577 return (0); 7578 } 7579 7580 /* ARGSUSED */ 7581 static void 7582 udp_rput(queue_t *q, mblk_t *mp) 7583 { 7584 /* 7585 * We get here whenever we do qreply() from IP, 7586 * i.e as part of handlings ioctls, etc. 7587 */ 7588 putnext(q, mp); 7589 } 7590 7591 /* 7592 * Read-side synchronous stream info entry point, called as a 7593 * result of handling certain STREAMS ioctl operations. 7594 */ 7595 static int 7596 udp_rinfop(queue_t *q, infod_t *dp) 7597 { 7598 mblk_t *mp; 7599 uint_t cmd = dp->d_cmd; 7600 int res = 0; 7601 int error = 0; 7602 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7603 struct stdata *stp = STREAM(q); 7604 7605 mutex_enter(&udp->udp_drain_lock); 7606 /* If shutdown on read has happened, return nothing */ 7607 mutex_enter(&stp->sd_lock); 7608 if (stp->sd_flag & STREOF) { 7609 mutex_exit(&stp->sd_lock); 7610 goto done; 7611 } 7612 mutex_exit(&stp->sd_lock); 7613 7614 if ((mp = udp->udp_rcv_list_head) == NULL) 7615 goto done; 7616 7617 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7618 7619 if (cmd & INFOD_COUNT) { 7620 /* 7621 * Return the number of messages. 7622 */ 7623 dp->d_count += udp->udp_rcv_msgcnt; 7624 res |= INFOD_COUNT; 7625 } 7626 if (cmd & INFOD_BYTES) { 7627 /* 7628 * Return size of all data messages. 7629 */ 7630 dp->d_bytes += udp->udp_rcv_cnt; 7631 res |= INFOD_BYTES; 7632 } 7633 if (cmd & INFOD_FIRSTBYTES) { 7634 /* 7635 * Return size of first data message. 7636 */ 7637 dp->d_bytes = msgdsize(mp); 7638 res |= INFOD_FIRSTBYTES; 7639 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7640 } 7641 if (cmd & INFOD_COPYOUT) { 7642 mblk_t *mp1 = mp->b_cont; 7643 int n; 7644 /* 7645 * Return data contents of first message. 7646 */ 7647 ASSERT(DB_TYPE(mp1) == M_DATA); 7648 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7649 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7650 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7651 UIO_READ, dp->d_uiop)) != 0) { 7652 goto done; 7653 } 7654 mp1 = mp1->b_cont; 7655 } 7656 res |= INFOD_COPYOUT; 7657 dp->d_cmd &= ~INFOD_COPYOUT; 7658 } 7659 done: 7660 mutex_exit(&udp->udp_drain_lock); 7661 7662 dp->d_res |= res; 7663 7664 return (error); 7665 } 7666 7667 /* 7668 * Read-side synchronous stream entry point. This is called as a result 7669 * of recv/read operation done at sockfs, and is guaranteed to execute 7670 * outside of the interrupt thread context. It returns a single datagram 7671 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7672 */ 7673 static int 7674 udp_rrw(queue_t *q, struiod_t *dp) 7675 { 7676 mblk_t *mp; 7677 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7678 7679 /* We should never get here when we're in SNMP mode */ 7680 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7681 7682 /* 7683 * Dequeue datagram from the head of the list and return 7684 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7685 * set/cleared depending on whether or not there's data 7686 * remaining in the list. 7687 */ 7688 mutex_enter(&udp->udp_drain_lock); 7689 if (!udp->udp_direct_sockfs) { 7690 mutex_exit(&udp->udp_drain_lock); 7691 UDP_STAT(udp_rrw_busy); 7692 return (EBUSY); 7693 } 7694 if ((mp = udp->udp_rcv_list_head) != NULL) { 7695 uint_t size = msgdsize(mp); 7696 7697 /* Last datagram in the list? */ 7698 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7699 udp->udp_rcv_list_tail = NULL; 7700 mp->b_next = NULL; 7701 7702 udp->udp_rcv_cnt -= size; 7703 udp->udp_rcv_msgcnt--; 7704 UDP_STAT(udp_rrw_msgcnt); 7705 7706 /* No longer flow-controlling? */ 7707 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7708 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7709 udp->udp_drain_qfull = B_FALSE; 7710 } 7711 if (udp->udp_rcv_list_head == NULL) { 7712 /* 7713 * Either we just dequeued the last datagram or 7714 * we get here from sockfs and have nothing to 7715 * return; in this case clear RSLEEP. 7716 */ 7717 ASSERT(udp->udp_rcv_cnt == 0); 7718 ASSERT(udp->udp_rcv_msgcnt == 0); 7719 ASSERT(udp->udp_rcv_list_tail == NULL); 7720 STR_WAKEUP_CLEAR(STREAM(q)); 7721 } else { 7722 /* 7723 * More data follows; we need udp_rrw() to be 7724 * called in future to pick up the rest. 7725 */ 7726 STR_WAKEUP_SET(STREAM(q)); 7727 } 7728 mutex_exit(&udp->udp_drain_lock); 7729 dp->d_mp = mp; 7730 return (0); 7731 } 7732 7733 /* 7734 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7735 * list; this is typically executed within the interrupt thread context 7736 * and so we do things as quickly as possible. 7737 */ 7738 static void 7739 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7740 { 7741 ASSERT(q == RD(q)); 7742 ASSERT(pkt_len == msgdsize(mp)); 7743 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7744 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7745 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7746 7747 mutex_enter(&udp->udp_drain_lock); 7748 /* 7749 * Wake up and signal the receiving app; it is okay to do this 7750 * before enqueueing the mp because we are holding the drain lock. 7751 * One of the advantages of synchronous stream is the ability for 7752 * us to find out when the application performs a read on the 7753 * socket by way of udp_rrw() entry point being called. We need 7754 * to generate SIGPOLL/SIGIO for each received data in the case 7755 * of asynchronous socket just as in the strrput() case. However, 7756 * we only wake the application up when necessary, i.e. during the 7757 * first enqueue. When udp_rrw() is called, we send up a single 7758 * datagram upstream and call STR_WAKEUP_SET() again when there 7759 * are still data remaining in our receive queue. 7760 */ 7761 if (udp->udp_rcv_list_head == NULL) { 7762 STR_WAKEUP_SET(STREAM(q)); 7763 udp->udp_rcv_list_head = mp; 7764 } else { 7765 udp->udp_rcv_list_tail->b_next = mp; 7766 } 7767 udp->udp_rcv_list_tail = mp; 7768 udp->udp_rcv_cnt += pkt_len; 7769 udp->udp_rcv_msgcnt++; 7770 7771 /* Need to flow-control? */ 7772 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7773 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7774 udp->udp_drain_qfull = B_TRUE; 7775 7776 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7777 STR_SENDSIG(STREAM(q)); 7778 mutex_exit(&udp->udp_drain_lock); 7779 } 7780 7781 /* 7782 * Drain the contents of receive list to the module upstream; we do 7783 * this during close or when we fallback to the slow mode due to 7784 * sockmod being popped or a module being pushed on top of us. 7785 */ 7786 static void 7787 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7788 { 7789 mblk_t *mp; 7790 7791 ASSERT(q == RD(q)); 7792 7793 mutex_enter(&udp->udp_drain_lock); 7794 /* 7795 * There is no race with a concurrent udp_input() sending 7796 * up packets using putnext() after we have cleared the 7797 * udp_direct_sockfs flag but before we have completed 7798 * sending up the packets in udp_rcv_list, since we are 7799 * either a writer or we have quiesced the conn. 7800 */ 7801 udp->udp_direct_sockfs = B_FALSE; 7802 mutex_exit(&udp->udp_drain_lock); 7803 7804 if (udp->udp_rcv_list_head != NULL) 7805 UDP_STAT(udp_drain); 7806 7807 /* 7808 * Send up everything via putnext(); note here that we 7809 * don't need the udp_drain_lock to protect us since 7810 * nothing can enter udp_rrw() and that we currently 7811 * have exclusive access to this udp. 7812 */ 7813 while ((mp = udp->udp_rcv_list_head) != NULL) { 7814 udp->udp_rcv_list_head = mp->b_next; 7815 mp->b_next = NULL; 7816 udp->udp_rcv_cnt -= msgdsize(mp); 7817 udp->udp_rcv_msgcnt--; 7818 if (closing) { 7819 freemsg(mp); 7820 } else { 7821 putnext(q, mp); 7822 } 7823 } 7824 ASSERT(udp->udp_rcv_cnt == 0); 7825 ASSERT(udp->udp_rcv_msgcnt == 0); 7826 ASSERT(udp->udp_rcv_list_head == NULL); 7827 udp->udp_rcv_list_tail = NULL; 7828 udp->udp_drain_qfull = B_FALSE; 7829 } 7830 7831 static size_t 7832 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7833 { 7834 /* We add a bit of extra buffering */ 7835 size += size >> 1; 7836 if (size > udp_max_buf) 7837 size = udp_max_buf; 7838 7839 udp->udp_rcv_hiwat = size; 7840 return (size); 7841 } 7842 7843 /* 7844 * Little helper for IPsec's NAT-T processing. 7845 */ 7846 boolean_t 7847 udp_compute_checksum(void) 7848 { 7849 return (udp_do_checksum); 7850 } 7851