1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/pattr.h> 36 #include <sys/stropts.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/strsubr.h> 46 #include <sys/suntpi.h> 47 #include <sys/xti_inet.h> 48 #include <sys/cmn_err.h> 49 #include <sys/kmem.h> 50 #include <sys/policy.h> 51 #include <sys/ucred.h> 52 #include <sys/zone.h> 53 54 #include <sys/socket.h> 55 #include <sys/sockio.h> 56 #include <sys/vtrace.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 /* 94 * Synchronization notes: 95 * 96 * UDP uses a combination of its internal perimeter, a global lock and 97 * a set of bind hash locks to protect its data structures. Please see 98 * the note above udp_mode_assertions for details about the internal 99 * perimeter. 100 * 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure. An UDP endpoint is removed from the bind hash list only 108 * when it is being unbound or being closed. The per bucket lock also 109 * protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * 113 * Both udp and ip are merged, but the streams plumbing is kept unchanged 114 * in that udp is always pushed atop /dev/ip. This is done to preserve 115 * backwards compatibility for certain applications which rely on such 116 * plumbing geometry to do things such as issuing I_POP on the stream 117 * in order to obtain direct access to /dev/ip, etc. 118 * 119 * All UDP processings happen in the /dev/ip instance; the udp module 120 * instance does not possess any state about the endpoint, and merely 121 * acts as a dummy module whose presence is to keep the streams plumbing 122 * appearance unchanged. At open time /dev/ip allocates a conn_t that 123 * happens to embed a udp_t. This stays dormant until the time udp is 124 * pushed, which indicates to /dev/ip that it must convert itself from 125 * an IP to a UDP endpoint. 126 * 127 * We only allow for the following plumbing cases: 128 * 129 * Normal: 130 * /dev/ip is first opened and later udp is pushed directly on top. 131 * This is the default action that happens when a udp socket or 132 * /dev/udp is opened. The conn_t created by /dev/ip instance is 133 * now shared and is marked with IPCL_UDP. 134 * 135 * SNMP-only: 136 * udp is pushed on top of a module other than /dev/ip. When this 137 * happens it will support only SNMP semantics. A new conn_t is 138 * allocated and marked with IPCL_UDPMOD. 139 * 140 * The above cases imply that we don't support any intermediate module to 141 * reside in between /dev/ip and udp -- in fact, we never supported such 142 * scenario in the past as the inter-layer communication semantics have 143 * always been private. Also note that the normal case allows for SNMP 144 * requests to be processed in addition to the rest of UDP operations. 145 * 146 * The normal case plumbing is depicted by the following diagram: 147 * 148 * +---------------+---------------+ 149 * | | | udp 150 * | udp_wq | udp_rq | 151 * | | UDP_RD | 152 * | | | 153 * +---------------+---------------+ 154 * | ^ 155 * v | 156 * +---------------+---------------+ 157 * | | | /dev/ip 158 * | ip_wq | ip_rq | conn_t 159 * | UDP_WR | | 160 * | | | 161 * +---------------+---------------+ 162 * 163 * Messages arriving at udp_wq from above will end up in ip_wq before 164 * it gets processed, i.e. udp write entry points will advance udp_wq 165 * and use its q_next value as ip_wq in order to use the conn_t that 166 * is stored in its q_ptr. Likewise, messages generated by ip to the 167 * module above udp will appear as if they are originated from udp_rq, 168 * i.e. putnext() calls to the module above udp is done using the 169 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 170 * nothing more than calling putnext(). 171 * 172 * The above implies the following rule of thumb: 173 * 174 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 175 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 176 * is no direct reference to conn_t from either udp_wq or udp_rq. 177 * 178 * 2. Write-side entry points of udp can obtain the conn_t via the 179 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 180 * 181 * 3. While in /dev/ip context, putnext() to the module above udp can 182 * be done by supplying the queue value obtained from UDP_RD(). 183 * 184 */ 185 186 static queue_t *UDP_WR(queue_t *); 187 static queue_t *UDP_RD(queue_t *); 188 189 udp_stat_t udp_statistics = { 190 { "udp_ip_send", KSTAT_DATA_UINT64 }, 191 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 192 { "udp_ire_null", KSTAT_DATA_UINT64 }, 193 { "udp_drain", KSTAT_DATA_UINT64 }, 194 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 196 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 198 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 199 { "udp_out_opt", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 202 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 203 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 217 #ifdef DEBUG 218 { "udp_data_conn", KSTAT_DATA_UINT64 }, 219 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 220 #endif 221 }; 222 223 static kstat_t *udp_ksp; 224 struct kmem_cache *udp_cache; 225 226 /* 227 * Bind hash list size and hash function. It has to be a power of 2 for 228 * hashing. 229 */ 230 #define UDP_BIND_FANOUT_SIZE 512 231 #define UDP_BIND_HASH(lport) \ 232 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 233 234 /* UDP bind fanout hash structure. */ 235 typedef struct udp_fanout_s { 236 udp_t *uf_udp; 237 kmutex_t uf_lock; 238 #if defined(_LP64) || defined(_I32LPx) 239 char uf_pad[48]; 240 #else 241 char uf_pad[56]; 242 #endif 243 } udp_fanout_t; 244 245 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 246 /* udp_fanout_t *udp_bind_fanout. */ 247 static udp_fanout_t *udp_bind_fanout; 248 249 /* 250 * This controls the rate some ndd info report functions can be used 251 * by non-priviledged users. It stores the last time such info is 252 * requested. When those report functions are called again, this 253 * is checked with the current time and compare with the ndd param 254 * udp_ndd_get_info_interval. 255 */ 256 static clock_t udp_last_ndd_get_info_time; 257 #define NDD_TOO_QUICK_MSG \ 258 "ndd get info rate too high for non-priviledged users, try again " \ 259 "later.\n" 260 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 261 262 static void udp_addr_req(queue_t *q, mblk_t *mp); 263 static void udp_bind(queue_t *q, mblk_t *mp); 264 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 265 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 266 static int udp_build_hdrs(queue_t *q, udp_t *udp); 267 static void udp_capability_req(queue_t *q, mblk_t *mp); 268 static int udp_close(queue_t *q); 269 static void udp_connect(queue_t *q, mblk_t *mp); 270 static void udp_disconnect(queue_t *q, mblk_t *mp); 271 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 272 int sys_error); 273 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 274 t_scalar_t tlierr, int unixerr); 275 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 276 cred_t *cr); 277 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 278 char *value, caddr_t cp, cred_t *cr); 279 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 280 char *value, caddr_t cp, cred_t *cr); 281 static void udp_icmp_error(queue_t *q, mblk_t *mp); 282 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 283 static void udp_info_req(queue_t *q, mblk_t *mp); 284 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 285 t_scalar_t addr_length); 286 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 287 cred_t *credp); 288 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 289 int *errorp, void *thisdg_attrs); 290 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 291 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 292 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 293 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 294 cred_t *cr); 295 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 296 uchar_t **optbufp, uint_t *optlenp); 297 static void udp_report_item(mblk_t *mp, udp_t *udp); 298 static void udp_rput(queue_t *q, mblk_t *mp); 299 static void udp_rput_other(queue_t *, mblk_t *); 300 static int udp_rinfop(queue_t *q, infod_t *dp); 301 static int udp_rrw(queue_t *q, struiod_t *dp); 302 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 303 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 304 cred_t *cr); 305 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 306 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 307 t_scalar_t destlen, t_scalar_t err); 308 static void udp_unbind(queue_t *q, mblk_t *mp); 309 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 310 static void udp_wput(queue_t *q, mblk_t *mp); 311 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 312 uint16_t port, uint_t srcid, int *error); 313 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 314 t_scalar_t tudr_optlen, int *error); 315 static void udp_wput_other(queue_t *q, mblk_t *mp); 316 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 317 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 318 socklen_t addrlen); 319 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 320 321 static void udp_kstat_init(void); 322 static void udp_kstat_fini(void); 323 static int udp_kstat_update(kstat_t *kp, int rw); 324 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 325 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 327 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 328 329 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 330 uint_t pkt_len); 331 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 332 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 333 static void udp_exit(conn_t *); 334 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 335 #ifdef DEBUG 336 static void udp_mode_assertions(udp_t *, int); 337 #endif /* DEBUG */ 338 339 major_t UDP6_MAJ; 340 #define UDP6 "udp6" 341 342 #define UDP_RECV_HIWATER (56 * 1024) 343 #define UDP_RECV_LOWATER 128 344 #define UDP_XMIT_HIWATER (56 * 1024) 345 #define UDP_XMIT_LOWATER 1024 346 347 static struct module_info udp_info = { 348 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 349 }; 350 351 static struct qinit udp_rinit = { 352 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 353 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 354 }; 355 356 static struct qinit udp_winit = { 357 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 358 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 359 }; 360 361 /* Support for just SNMP if UDP is not pushed directly over device IP */ 362 struct qinit udp_snmp_rinit = { 363 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 364 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 365 }; 366 367 struct qinit udp_snmp_winit = { 368 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 369 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 370 }; 371 372 struct streamtab udpinfo = { 373 &udp_rinit, &udp_winit 374 }; 375 376 static sin_t sin_null; /* Zero address for quick clears */ 377 static sin6_t sin6_null; /* Zero address for quick clears */ 378 379 /* Hint not protected by any lock */ 380 static in_port_t udp_g_next_port_to_try; 381 382 /* 383 * Extra privileged ports. In host byte order. 384 */ 385 #define UDP_NUM_EPRIV_PORTS 64 386 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 387 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 388 389 /* Only modified during _init and _fini thus no locking is needed. */ 390 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 391 392 /* MIB-2 stuff for SNMP */ 393 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 394 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 395 396 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 397 398 /* Default structure copied into T_INFO_ACK messages */ 399 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 400 T_INFO_ACK, 401 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 402 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 403 T_INVALID, /* CDATA_size. udp does not support connect data. */ 404 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 405 sizeof (sin_t), /* ADDR_size. */ 406 0, /* OPT_size - not initialized here */ 407 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 408 T_CLTS, /* SERV_type. udp supports connection-less. */ 409 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 410 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 411 }; 412 413 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 414 415 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 416 T_INFO_ACK, 417 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 418 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 419 T_INVALID, /* CDATA_size. udp does not support connect data. */ 420 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 421 sizeof (sin6_t), /* ADDR_size. */ 422 0, /* OPT_size - not initialized here */ 423 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 424 T_CLTS, /* SERV_type. udp supports connection-less. */ 425 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 426 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 427 }; 428 429 /* largest UDP port number */ 430 #define UDP_MAX_PORT 65535 431 432 /* 433 * Table of ND variables supported by udp. These are loaded into udp_g_nd 434 * in udp_open. 435 * All of these are alterable, within the min/max values given, at run time. 436 */ 437 /* BEGIN CSTYLED */ 438 udpparam_t udp_param_arr[] = { 439 /*min max value name */ 440 { 0L, 256, 32, "udp_wroff_extra" }, 441 { 1L, 255, 255, "udp_ipv4_ttl" }, 442 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 443 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 444 { 0, 1, 1, "udp_do_checksum" }, 445 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 446 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 447 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 448 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 449 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 450 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 451 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 452 }; 453 /* END CSTYLED */ 454 455 /* 456 * The smallest anonymous port in the priviledged port range which UDP 457 * looks for free port. Use in the option UDP_ANONPRIVBIND. 458 */ 459 static in_port_t udp_min_anonpriv_port = 512; 460 461 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 462 uint32_t udp_random_anon_port = 1; 463 464 /* 465 * Hook functions to enable cluster networking. 466 * On non-clustered systems these vectors must always be NULL 467 */ 468 469 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 470 uint8_t *laddrp, in_port_t lport) = NULL; 471 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 472 uint8_t *laddrp, in_port_t lport) = NULL; 473 474 typedef union T_primitives *t_primp_t; 475 476 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 477 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 478 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 479 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 480 (mp)->b_prev = (mblk_t *)proc; \ 481 if ((udp)->udp_mphead == NULL) \ 482 (udp)->udp_mphead = (mp); \ 483 else \ 484 (udp)->udp_mptail->b_next = (mp); \ 485 (udp)->udp_mptail = (mp); \ 486 (udp)->udp_mpcount++; \ 487 } 488 489 #define UDP_READERS_INCREF(udp) { \ 490 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 491 (udp)->udp_reader_count++; \ 492 } 493 494 #define UDP_READERS_DECREF(udp) { \ 495 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 496 (udp)->udp_reader_count--; \ 497 if ((udp)->udp_reader_count == 0) \ 498 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 499 } 500 501 #define UDP_SQUEUE_DECREF(udp) { \ 502 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 503 (udp)->udp_squeue_count--; \ 504 if ((udp)->udp_squeue_count == 0) \ 505 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 506 } 507 508 /* 509 * Notes on UDP endpoint synchronization: 510 * 511 * UDP needs exclusive operation on a per endpoint basis, when executing 512 * functions that modify the endpoint state. udp_rput_other() deals with 513 * packets with IP options, and processing these packets end up having 514 * to update the endpoint's option related state. udp_wput_other() deals 515 * with control operations from the top, e.g. connect() that needs to 516 * update the endpoint state. These could be synchronized using locks, 517 * but the current version uses squeues for this purpose. squeues may 518 * give performance improvement for certain cases such as connected UDP 519 * sockets; thus the framework allows for using squeues. 520 * 521 * The perimeter routines are described as follows: 522 * 523 * udp_enter(): 524 * Enter the UDP endpoint perimeter. 525 * 526 * udp_become_writer(): 527 * Become exclusive on the UDP endpoint. Specifies a function 528 * that will be called exclusively either immediately or later 529 * when the perimeter is available exclusively. 530 * 531 * udp_exit(): 532 * Exit the UDP perimeter. 533 * 534 * Entering UDP from the top or from the bottom must be done using 535 * udp_enter(). No lock must be held while attempting to enter the UDP 536 * perimeter. When finished, udp_exit() must be called to get out of 537 * the perimeter. 538 * 539 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 540 * multiple threads may enter a UDP endpoint concurrently. This is used 541 * for sending and/or receiving normal data. Control operations and other 542 * special cases call udp_become_writer() to become exclusive on a per 543 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 544 * by definition serializes access to the conn_t. When there are no more 545 * pending messages on the squeue for the UDP connection, the endpoint 546 * reverts to MT_HOT mode. During the interregnum when not all MT threads 547 * of an endpoint have finished, messages are queued in the UDP endpoint 548 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 549 * 550 * These modes have the following analogs: 551 * 552 * UDP_MT_HOT/udp_reader_count==0 none 553 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 554 * UDP_MT_QUEUED RW_WRITE_WANTED 555 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 556 * 557 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 558 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 559 * 560 * While in stable modes, UDP keeps track of the number of threads 561 * operating on the endpoint. The udp_reader_count variable represents 562 * the number of threads entering the endpoint as readers while it is 563 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 564 * is only a single reader, i.e. when this counter drops to 1. Likewise, 565 * udp_squeue_count represents the number of threads operating on the 566 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 567 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 568 * when this counter drops to 0. 569 * 570 * The default mode is set to UDP_MT_HOT and UDP alternates between 571 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 572 * 573 * Mode transition: 574 * ---------------------------------------------------------------- 575 * old mode Event New mode 576 * ---------------------------------------------------------------- 577 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 578 * and udp_reader_count == 1 579 * 580 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 581 * and udp_reader_count > 1 582 * 583 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 584 * 585 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 586 * internal UDP queue successfully 587 * moved to squeue AND udp_squeue_count != 0 588 * 589 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 590 * internal UDP queue successfully 591 * moved to squeue AND udp_squeue_count 592 * drops to zero 593 * 594 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 595 * ---------------------------------------------------------------- 596 */ 597 598 static queue_t * 599 UDP_WR(queue_t *q) 600 { 601 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 602 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 603 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 604 605 return (_WR(q)->q_next); 606 } 607 608 static queue_t * 609 UDP_RD(queue_t *q) 610 { 611 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 612 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 613 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 614 615 return (_RD(q)->q_next); 616 } 617 618 #ifdef DEBUG 619 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 620 #else 621 #define UDP_MODE_ASSERTIONS(udp, caller) 622 #endif 623 624 /* Invariants */ 625 #ifdef DEBUG 626 627 uint32_t udp_count[4]; 628 629 /* Context of udp_mode_assertions */ 630 #define UDP_ENTER 1 631 #define UDP_BECOME_WRITER 2 632 #define UDP_EXIT 3 633 634 static void 635 udp_mode_assertions(udp_t *udp, int caller) 636 { 637 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 638 639 switch (udp->udp_mode) { 640 case UDP_MT_HOT: 641 /* 642 * Messages have not yet been enqueued on the internal queue, 643 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 644 * by definition, there can't be any messages enqueued on the 645 * squeue. The UDP could be quiescent, so udp_reader_count 646 * could be zero at entry. 647 */ 648 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 649 udp->udp_squeue_count == 0); 650 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 651 udp_count[0]++; 652 break; 653 654 case UDP_MT_QUEUED: 655 /* 656 * The last MT thread to exit the udp perimeter empties the 657 * internal queue and then switches the UDP to 658 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 659 * mode, it means there must be at least 1 MT thread still in 660 * the perimeter and at least 1 message on the internal queue. 661 */ 662 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 663 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 664 udp_count[1]++; 665 break; 666 667 case UDP_QUEUED_SQUEUE: 668 /* 669 * The switch has happened from MT to SQUEUE. So there can't 670 * any MT threads. Messages could still pile up on the internal 671 * queue until the transition is complete and we move to 672 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 673 * since the squeue could drain any time. 674 */ 675 ASSERT(udp->udp_reader_count == 0); 676 udp_count[2]++; 677 break; 678 679 case UDP_SQUEUE: 680 /* 681 * The transition is complete. Thre can't be any messages on 682 * the internal queue. The udp could be quiescent or the squeue 683 * could drain any time, so we can't assert on nonzero 684 * udp_squeue_count during entry. Nor can we assert that 685 * udp_reader_count is zero, since, a reader thread could have 686 * directly become writer in line by calling udp_become_writer 687 * without going through the queued states. 688 */ 689 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 690 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 691 udp_count[3]++; 692 break; 693 } 694 } 695 #endif 696 697 #define _UDP_ENTER(connp, mp, proc, tag) { \ 698 udp_t *_udp = (connp)->conn_udp; \ 699 \ 700 mutex_enter(&(connp)->conn_lock); \ 701 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 702 mutex_exit(&(connp)->conn_lock); \ 703 freemsg(mp); \ 704 } else { \ 705 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 706 \ 707 switch (_udp->udp_mode) { \ 708 case UDP_MT_HOT: \ 709 /* We can execute as reader right away. */ \ 710 UDP_READERS_INCREF(_udp); \ 711 mutex_exit(&(connp)->conn_lock); \ 712 (*(proc))(connp, mp, (connp)->conn_sqp); \ 713 break; \ 714 \ 715 case UDP_SQUEUE: \ 716 /* \ 717 * We are in squeue mode, send the \ 718 * packet to the squeue \ 719 */ \ 720 _udp->udp_squeue_count++; \ 721 CONN_INC_REF_LOCKED(connp); \ 722 mutex_exit(&(connp)->conn_lock); \ 723 squeue_enter((connp)->conn_sqp, mp, proc, \ 724 connp, tag); \ 725 break; \ 726 \ 727 case UDP_MT_QUEUED: \ 728 case UDP_QUEUED_SQUEUE: \ 729 /* \ 730 * Some messages may have been enqueued \ 731 * ahead of us. Enqueue the new message \ 732 * at the tail of the internal queue to \ 733 * preserve message ordering. \ 734 */ \ 735 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 736 mutex_exit(&(connp)->conn_lock); \ 737 break; \ 738 } \ 739 } \ 740 } 741 742 static void 743 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 744 { 745 _UDP_ENTER(connp, mp, proc, tag); 746 } 747 748 static void 749 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 750 { 751 udp_t *udp; 752 753 udp = connp->conn_udp; 754 755 mutex_enter(&connp->conn_lock); 756 757 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 758 759 switch (udp->udp_mode) { 760 case UDP_MT_HOT: 761 if (udp->udp_reader_count == 1) { 762 /* 763 * We are the only MT thread. Switch to squeue mode 764 * immediately. 765 */ 766 udp->udp_mode = UDP_SQUEUE; 767 udp->udp_squeue_count = 1; 768 CONN_INC_REF_LOCKED(connp); 769 mutex_exit(&connp->conn_lock); 770 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 771 return; 772 } 773 /* FALLTHRU */ 774 775 case UDP_MT_QUEUED: 776 /* Enqueue the packet internally in UDP */ 777 udp->udp_mode = UDP_MT_QUEUED; 778 UDP_ENQUEUE_MP(udp, mp, proc, tag); 779 mutex_exit(&connp->conn_lock); 780 return; 781 782 case UDP_SQUEUE: 783 case UDP_QUEUED_SQUEUE: 784 /* 785 * We are already exclusive. i.e. we are already 786 * writer. Simply call the desired function. 787 */ 788 udp->udp_squeue_count++; 789 mutex_exit(&connp->conn_lock); 790 (*proc)(connp, mp, connp->conn_sqp); 791 return; 792 } 793 } 794 795 /* 796 * Transition from MT mode to SQUEUE mode, when the last MT thread 797 * is exiting the UDP perimeter. Move all messages from the internal 798 * udp queue to the squeue. A better way would be to move all the 799 * messages in one shot, this needs more support from the squeue framework 800 */ 801 static void 802 udp_switch_to_squeue(udp_t *udp) 803 { 804 mblk_t *mp; 805 mblk_t *mp_next; 806 sqproc_t proc; 807 uint8_t tag; 808 conn_t *connp = udp->udp_connp; 809 810 ASSERT(MUTEX_HELD(&connp->conn_lock)); 811 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 812 while (udp->udp_mphead != NULL) { 813 mp = udp->udp_mphead; 814 udp->udp_mphead = NULL; 815 udp->udp_mptail = NULL; 816 udp->udp_mpcount = 0; 817 udp->udp_mode = UDP_QUEUED_SQUEUE; 818 mutex_exit(&connp->conn_lock); 819 /* 820 * It is best not to hold any locks across the calls 821 * to squeue functions. Since we drop the lock we 822 * need to go back and check the udp_mphead once again 823 * after the squeue_fill and hence the while loop at 824 * the top of this function 825 */ 826 for (; mp != NULL; mp = mp_next) { 827 mp_next = mp->b_next; 828 proc = (sqproc_t)mp->b_prev; 829 tag = (uint8_t)((uintptr_t)mp->b_queue); 830 mp->b_next = NULL; 831 mp->b_prev = NULL; 832 mp->b_queue = NULL; 833 CONN_INC_REF(connp); 834 udp->udp_squeue_count++; 835 squeue_fill(connp->conn_sqp, mp, proc, connp, 836 tag); 837 } 838 mutex_enter(&connp->conn_lock); 839 } 840 /* 841 * udp_squeue_count of zero implies that the squeue has drained 842 * even before we arrived here (i.e. after the squeue_fill above) 843 */ 844 udp->udp_mode = (udp->udp_squeue_count != 0) ? 845 UDP_SQUEUE : UDP_MT_HOT; 846 } 847 848 #define _UDP_EXIT(connp) { \ 849 udp_t *_udp = (connp)->conn_udp; \ 850 \ 851 mutex_enter(&(connp)->conn_lock); \ 852 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 853 \ 854 switch (_udp->udp_mode) { \ 855 case UDP_MT_HOT: \ 856 UDP_READERS_DECREF(_udp); \ 857 mutex_exit(&(connp)->conn_lock); \ 858 break; \ 859 \ 860 case UDP_SQUEUE: \ 861 UDP_SQUEUE_DECREF(_udp); \ 862 if (_udp->udp_squeue_count == 0) \ 863 _udp->udp_mode = UDP_MT_HOT; \ 864 mutex_exit(&(connp)->conn_lock); \ 865 break; \ 866 \ 867 case UDP_MT_QUEUED: \ 868 /* \ 869 * If this is the last MT thread, we need to \ 870 * switch to squeue mode \ 871 */ \ 872 UDP_READERS_DECREF(_udp); \ 873 if (_udp->udp_reader_count == 0) \ 874 udp_switch_to_squeue(_udp); \ 875 mutex_exit(&(connp)->conn_lock); \ 876 break; \ 877 \ 878 case UDP_QUEUED_SQUEUE: \ 879 UDP_SQUEUE_DECREF(_udp); \ 880 /* \ 881 * Even if the udp_squeue_count drops to zero, we \ 882 * don't want to change udp_mode to UDP_MT_HOT here. \ 883 * The thread in udp_switch_to_squeue will take care \ 884 * of the transition to UDP_MT_HOT, after emptying \ 885 * any more new messages that have been enqueued in \ 886 * udp_mphead. \ 887 */ \ 888 mutex_exit(&(connp)->conn_lock); \ 889 break; \ 890 } \ 891 } 892 893 static void 894 udp_exit(conn_t *connp) 895 { 896 _UDP_EXIT(connp); 897 } 898 899 /* 900 * Return the next anonymous port in the priviledged port range for 901 * bind checking. 902 */ 903 static in_port_t 904 udp_get_next_priv_port(void) 905 { 906 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 907 908 if (next_priv_port < udp_min_anonpriv_port) { 909 next_priv_port = IPPORT_RESERVED - 1; 910 } 911 return (next_priv_port--); 912 } 913 914 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 915 /* ARGSUSED */ 916 static int 917 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 918 { 919 udp_fanout_t *udpf; 920 int i; 921 zoneid_t zoneid; 922 conn_t *connp; 923 udp_t *udp; 924 925 connp = Q_TO_CONN(q); 926 udp = connp->conn_udp; 927 928 /* Refer to comments in udp_status_report(). */ 929 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 930 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 931 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 932 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 933 return (0); 934 } 935 } 936 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 937 /* The following may work even if we cannot get a large buf. */ 938 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 939 return (0); 940 } 941 942 (void) mi_mpprintf(mp, 943 "UDP " MI_COL_HDRPAD_STR 944 /* 12345678[89ABCDEF] */ 945 " zone lport src addr dest addr port state"); 946 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 947 948 zoneid = connp->conn_zoneid; 949 950 for (i = 0; i < udp_bind_fanout_size; i++) { 951 udpf = &udp_bind_fanout[i]; 952 mutex_enter(&udpf->uf_lock); 953 954 /* Print the hash index. */ 955 udp = udpf->uf_udp; 956 if (zoneid != GLOBAL_ZONEID) { 957 /* skip to first entry in this zone; might be none */ 958 while (udp != NULL && 959 udp->udp_connp->conn_zoneid != zoneid) 960 udp = udp->udp_bind_hash; 961 } 962 if (udp != NULL) { 963 uint_t print_len, buf_len; 964 965 buf_len = mp->b_cont->b_datap->db_lim - 966 mp->b_cont->b_wptr; 967 print_len = snprintf((char *)mp->b_cont->b_wptr, 968 buf_len, "%d\n", i); 969 if (print_len < buf_len) { 970 mp->b_cont->b_wptr += print_len; 971 } else { 972 mp->b_cont->b_wptr += buf_len; 973 } 974 for (; udp != NULL; udp = udp->udp_bind_hash) { 975 if (zoneid == GLOBAL_ZONEID || 976 zoneid == udp->udp_connp->conn_zoneid) 977 udp_report_item(mp->b_cont, udp); 978 } 979 } 980 mutex_exit(&udpf->uf_lock); 981 } 982 udp_last_ndd_get_info_time = ddi_get_lbolt(); 983 return (0); 984 } 985 986 /* 987 * Hash list removal routine for udp_t structures. 988 */ 989 static void 990 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 991 { 992 udp_t *udpnext; 993 kmutex_t *lockp; 994 995 if (udp->udp_ptpbhn == NULL) 996 return; 997 998 /* 999 * Extract the lock pointer in case there are concurrent 1000 * hash_remove's for this instance. 1001 */ 1002 ASSERT(udp->udp_port != 0); 1003 if (!caller_holds_lock) { 1004 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1005 ASSERT(lockp != NULL); 1006 mutex_enter(lockp); 1007 } 1008 if (udp->udp_ptpbhn != NULL) { 1009 udpnext = udp->udp_bind_hash; 1010 if (udpnext != NULL) { 1011 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1012 udp->udp_bind_hash = NULL; 1013 } 1014 *udp->udp_ptpbhn = udpnext; 1015 udp->udp_ptpbhn = NULL; 1016 } 1017 if (!caller_holds_lock) { 1018 mutex_exit(lockp); 1019 } 1020 } 1021 1022 static void 1023 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1024 { 1025 udp_t **udpp; 1026 udp_t *udpnext; 1027 1028 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1029 if (udp->udp_ptpbhn != NULL) { 1030 udp_bind_hash_remove(udp, B_TRUE); 1031 } 1032 udpp = &uf->uf_udp; 1033 udpnext = udpp[0]; 1034 if (udpnext != NULL) { 1035 /* 1036 * If the new udp bound to the INADDR_ANY address 1037 * and the first one in the list is not bound to 1038 * INADDR_ANY we skip all entries until we find the 1039 * first one bound to INADDR_ANY. 1040 * This makes sure that applications binding to a 1041 * specific address get preference over those binding to 1042 * INADDR_ANY. 1043 */ 1044 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1045 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1046 while ((udpnext = udpp[0]) != NULL && 1047 !V6_OR_V4_INADDR_ANY( 1048 udpnext->udp_bound_v6src)) { 1049 udpp = &(udpnext->udp_bind_hash); 1050 } 1051 if (udpnext != NULL) 1052 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1053 } else { 1054 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1055 } 1056 } 1057 udp->udp_bind_hash = udpnext; 1058 udp->udp_ptpbhn = udpp; 1059 udpp[0] = udp; 1060 } 1061 1062 /* 1063 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1064 * passed to udp_wput. 1065 * It associates a port number and local address with the stream. 1066 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1067 * protocol type (IPPROTO_UDP) placed in the message following the address. 1068 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1069 * (Called as writer.) 1070 * 1071 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1072 * without setting SO_REUSEADDR. This is needed so that they 1073 * can be viewed as two independent transport protocols. 1074 * However, anonymouns ports are allocated from the same range to avoid 1075 * duplicating the udp_g_next_port_to_try. 1076 */ 1077 static void 1078 udp_bind(queue_t *q, mblk_t *mp) 1079 { 1080 sin_t *sin; 1081 sin6_t *sin6; 1082 mblk_t *mp1; 1083 in_port_t port; /* Host byte order */ 1084 in_port_t requested_port; /* Host byte order */ 1085 struct T_bind_req *tbr; 1086 int count; 1087 in6_addr_t v6src; 1088 boolean_t bind_to_req_port_only; 1089 int loopmax; 1090 udp_fanout_t *udpf; 1091 in_port_t lport; /* Network byte order */ 1092 zoneid_t zoneid; 1093 conn_t *connp; 1094 udp_t *udp; 1095 1096 connp = Q_TO_CONN(q); 1097 udp = connp->conn_udp; 1098 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1099 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1100 "udp_bind: bad req, len %u", 1101 (uint_t)(mp->b_wptr - mp->b_rptr)); 1102 udp_err_ack(q, mp, TPROTO, 0); 1103 return; 1104 } 1105 1106 if (udp->udp_state != TS_UNBND) { 1107 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1108 "udp_bind: bad state, %u", udp->udp_state); 1109 udp_err_ack(q, mp, TOUTSTATE, 0); 1110 return; 1111 } 1112 /* 1113 * Reallocate the message to make sure we have enough room for an 1114 * address and the protocol type. 1115 */ 1116 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1117 if (!mp1) { 1118 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1119 return; 1120 } 1121 1122 mp = mp1; 1123 tbr = (struct T_bind_req *)mp->b_rptr; 1124 switch (tbr->ADDR_length) { 1125 case 0: /* Request for a generic port */ 1126 tbr->ADDR_offset = sizeof (struct T_bind_req); 1127 if (udp->udp_family == AF_INET) { 1128 tbr->ADDR_length = sizeof (sin_t); 1129 sin = (sin_t *)&tbr[1]; 1130 *sin = sin_null; 1131 sin->sin_family = AF_INET; 1132 mp->b_wptr = (uchar_t *)&sin[1]; 1133 } else { 1134 ASSERT(udp->udp_family == AF_INET6); 1135 tbr->ADDR_length = sizeof (sin6_t); 1136 sin6 = (sin6_t *)&tbr[1]; 1137 *sin6 = sin6_null; 1138 sin6->sin6_family = AF_INET6; 1139 mp->b_wptr = (uchar_t *)&sin6[1]; 1140 } 1141 port = 0; 1142 break; 1143 1144 case sizeof (sin_t): /* Complete IPv4 address */ 1145 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1146 sizeof (sin_t)); 1147 if (sin == NULL || !OK_32PTR((char *)sin)) { 1148 udp_err_ack(q, mp, TSYSERR, EINVAL); 1149 return; 1150 } 1151 if (udp->udp_family != AF_INET || 1152 sin->sin_family != AF_INET) { 1153 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1154 return; 1155 } 1156 port = ntohs(sin->sin_port); 1157 break; 1158 1159 case sizeof (sin6_t): /* complete IPv6 address */ 1160 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1161 sizeof (sin6_t)); 1162 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1163 udp_err_ack(q, mp, TSYSERR, EINVAL); 1164 return; 1165 } 1166 if (udp->udp_family != AF_INET6 || 1167 sin6->sin6_family != AF_INET6) { 1168 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1169 return; 1170 } 1171 port = ntohs(sin6->sin6_port); 1172 break; 1173 1174 default: /* Invalid request */ 1175 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1176 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1177 udp_err_ack(q, mp, TBADADDR, 0); 1178 return; 1179 } 1180 1181 requested_port = port; 1182 1183 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1184 bind_to_req_port_only = B_FALSE; 1185 else /* T_BIND_REQ and requested_port != 0 */ 1186 bind_to_req_port_only = B_TRUE; 1187 1188 if (requested_port == 0) { 1189 /* 1190 * If the application passed in zero for the port number, it 1191 * doesn't care which port number we bind to. Get one in the 1192 * valid range. 1193 */ 1194 if (udp->udp_anon_priv_bind) { 1195 port = udp_get_next_priv_port(); 1196 } else { 1197 port = udp_update_next_port(udp_g_next_port_to_try, 1198 B_TRUE); 1199 } 1200 } else { 1201 /* 1202 * If the port is in the well-known privileged range, 1203 * make sure the caller was privileged. 1204 */ 1205 int i; 1206 boolean_t priv = B_FALSE; 1207 1208 if (port < udp_smallest_nonpriv_port) { 1209 priv = B_TRUE; 1210 } else { 1211 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1212 if (port == udp_g_epriv_ports[i]) { 1213 priv = B_TRUE; 1214 break; 1215 } 1216 } 1217 } 1218 1219 if (priv) { 1220 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1221 1222 if (secpolicy_net_privaddr(cr, port) != 0) { 1223 udp_err_ack(q, mp, TACCES, 0); 1224 return; 1225 } 1226 } 1227 } 1228 1229 /* 1230 * Copy the source address into our udp structure. This address 1231 * may still be zero; if so, IP will fill in the correct address 1232 * each time an outbound packet is passed to it. 1233 */ 1234 if (udp->udp_family == AF_INET) { 1235 ASSERT(sin != NULL); 1236 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1237 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1238 udp->udp_ip_snd_options_len; 1239 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1240 } else { 1241 ASSERT(sin6 != NULL); 1242 v6src = sin6->sin6_addr; 1243 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1244 udp->udp_ipversion = IPV4_VERSION; 1245 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1246 UDPH_SIZE + udp->udp_ip_snd_options_len; 1247 } else { 1248 udp->udp_ipversion = IPV6_VERSION; 1249 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1250 } 1251 } 1252 1253 /* 1254 * If udp_reuseaddr is not set, then we have to make sure that 1255 * the IP address and port number the application requested 1256 * (or we selected for the application) is not being used by 1257 * another stream. If another stream is already using the 1258 * requested IP address and port, the behavior depends on 1259 * "bind_to_req_port_only". If set the bind fails; otherwise we 1260 * search for any an unused port to bind to the the stream. 1261 * 1262 * As per the BSD semantics, as modified by the Deering multicast 1263 * changes, if udp_reuseaddr is set, then we allow multiple binds 1264 * to the same port independent of the local IP address. 1265 * 1266 * This is slightly different than in SunOS 4.X which did not 1267 * support IP multicast. Note that the change implemented by the 1268 * Deering multicast code effects all binds - not only binding 1269 * to IP multicast addresses. 1270 * 1271 * Note that when binding to port zero we ignore SO_REUSEADDR in 1272 * order to guarantee a unique port. 1273 */ 1274 1275 count = 0; 1276 if (udp->udp_anon_priv_bind) { 1277 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1278 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1279 } else { 1280 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1281 } 1282 1283 zoneid = connp->conn_zoneid; 1284 for (;;) { 1285 udp_t *udp1; 1286 boolean_t is_inaddr_any; 1287 boolean_t found_exclbind = B_FALSE; 1288 1289 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1290 /* 1291 * Walk through the list of udp streams bound to 1292 * requested port with the same IP address. 1293 */ 1294 lport = htons(port); 1295 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1296 mutex_enter(&udpf->uf_lock); 1297 for (udp1 = udpf->uf_udp; udp1 != NULL; 1298 udp1 = udp1->udp_bind_hash) { 1299 if (lport != udp1->udp_port || 1300 zoneid != udp1->udp_connp->conn_zoneid) 1301 continue; 1302 1303 /* 1304 * If UDP_EXCLBIND is set for either the bound or 1305 * binding endpoint, the semantics of bind 1306 * is changed according to the following chart. 1307 * 1308 * spec = specified address (v4 or v6) 1309 * unspec = unspecified address (v4 or v6) 1310 * A = specified addresses are different for endpoints 1311 * 1312 * bound bind to allowed? 1313 * ------------------------------------- 1314 * unspec unspec no 1315 * unspec spec no 1316 * spec unspec no 1317 * spec spec yes if A 1318 */ 1319 if (udp1->udp_exclbind || udp->udp_exclbind) { 1320 if (V6_OR_V4_INADDR_ANY( 1321 udp1->udp_bound_v6src) || 1322 is_inaddr_any || 1323 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1324 &v6src)) { 1325 found_exclbind = B_TRUE; 1326 break; 1327 } 1328 continue; 1329 } 1330 1331 /* 1332 * Check ipversion to allow IPv4 and IPv6 sockets to 1333 * have disjoint port number spaces. 1334 */ 1335 if (udp->udp_ipversion != udp1->udp_ipversion) 1336 continue; 1337 1338 /* 1339 * No difference depending on SO_REUSEADDR. 1340 * 1341 * If existing port is bound to a 1342 * non-wildcard IP address and 1343 * the requesting stream is bound to 1344 * a distinct different IP addresses 1345 * (non-wildcard, also), keep going. 1346 */ 1347 if (!is_inaddr_any && 1348 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1349 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1350 &v6src)) { 1351 continue; 1352 } 1353 break; 1354 } 1355 1356 if (!found_exclbind && 1357 (udp->udp_reuseaddr && requested_port != 0)) { 1358 break; 1359 } 1360 1361 if (udp1 == NULL) { 1362 /* 1363 * No other stream has this IP address 1364 * and port number. We can use it. 1365 */ 1366 break; 1367 } 1368 mutex_exit(&udpf->uf_lock); 1369 if (bind_to_req_port_only) { 1370 /* 1371 * We get here only when requested port 1372 * is bound (and only first of the for() 1373 * loop iteration). 1374 * 1375 * The semantics of this bind request 1376 * require it to fail so we return from 1377 * the routine (and exit the loop). 1378 * 1379 */ 1380 udp_err_ack(q, mp, TADDRBUSY, 0); 1381 return; 1382 } 1383 1384 if (udp->udp_anon_priv_bind) { 1385 port = udp_get_next_priv_port(); 1386 } else { 1387 if ((count == 0) && (requested_port != 0)) { 1388 /* 1389 * If the application wants us to find 1390 * a port, get one to start with. Set 1391 * requested_port to 0, so that we will 1392 * update udp_g_next_port_to_try below. 1393 */ 1394 port = udp_update_next_port( 1395 udp_g_next_port_to_try, B_TRUE); 1396 requested_port = 0; 1397 } else { 1398 port = udp_update_next_port(port + 1, B_FALSE); 1399 } 1400 } 1401 1402 if (++count >= loopmax) { 1403 /* 1404 * We've tried every possible port number and 1405 * there are none available, so send an error 1406 * to the user. 1407 */ 1408 udp_err_ack(q, mp, TNOADDR, 0); 1409 return; 1410 } 1411 } 1412 1413 /* 1414 * Copy the source address into our udp structure. This address 1415 * may still be zero; if so, ip will fill in the correct address 1416 * each time an outbound packet is passed to it. 1417 * If we are binding to a broadcast or multicast address udp_rput 1418 * will clear the source address when it receives the T_BIND_ACK. 1419 */ 1420 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1421 udp->udp_port = lport; 1422 /* 1423 * Now reset the the next anonymous port if the application requested 1424 * an anonymous port, or we handed out the next anonymous port. 1425 */ 1426 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1427 udp_g_next_port_to_try = port + 1; 1428 } 1429 1430 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1431 if (udp->udp_family == AF_INET) { 1432 sin->sin_port = udp->udp_port; 1433 } else { 1434 int error; 1435 1436 sin6->sin6_port = udp->udp_port; 1437 /* Rebuild the header template */ 1438 error = udp_build_hdrs(q, udp); 1439 if (error != 0) { 1440 mutex_exit(&udpf->uf_lock); 1441 udp_err_ack(q, mp, TSYSERR, error); 1442 return; 1443 } 1444 } 1445 udp->udp_state = TS_IDLE; 1446 udp_bind_hash_insert(udpf, udp); 1447 mutex_exit(&udpf->uf_lock); 1448 1449 if (cl_inet_bind) { 1450 /* 1451 * Running in cluster mode - register bind information 1452 */ 1453 if (udp->udp_ipversion == IPV4_VERSION) { 1454 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1455 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1456 (in_port_t)udp->udp_port); 1457 } else { 1458 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1459 (uint8_t *)&(udp->udp_v6src), 1460 (in_port_t)udp->udp_port); 1461 } 1462 1463 } 1464 1465 /* Pass the protocol number in the message following the address. */ 1466 *mp->b_wptr++ = IPPROTO_UDP; 1467 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1468 /* 1469 * Append a request for an IRE if udp_v6src not 1470 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1471 */ 1472 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1473 if (!mp->b_cont) { 1474 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1475 return; 1476 } 1477 mp->b_cont->b_wptr += sizeof (ire_t); 1478 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1479 } 1480 if (udp->udp_family == AF_INET6) 1481 mp = ip_bind_v6(q, mp, connp, NULL); 1482 else 1483 mp = ip_bind_v4(q, mp, connp); 1484 1485 if (mp != NULL) 1486 udp_rput_other(_RD(q), mp); 1487 else 1488 CONN_INC_REF(connp); 1489 } 1490 1491 1492 void 1493 udp_resume_bind(conn_t *connp, mblk_t *mp) 1494 { 1495 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1496 } 1497 1498 /* 1499 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1500 */ 1501 /* ARGSUSED */ 1502 static void 1503 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1504 { 1505 conn_t *connp = arg; 1506 1507 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1508 1509 udp_rput_other(connp->conn_rq, mp); 1510 1511 CONN_OPER_PENDING_DONE(connp); 1512 udp_exit(connp); 1513 } 1514 1515 /* 1516 * This routine handles each T_CONN_REQ message passed to udp. It 1517 * associates a default destination address with the stream. 1518 * 1519 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1520 * T_BIND_REQ - specifying local and remote address/port 1521 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1522 * T_OK_ACK - for the T_CONN_REQ 1523 * T_CONN_CON - to keep the TPI user happy 1524 * 1525 * The connect completes in udp_rput. 1526 * When a T_BIND_ACK is received information is extracted from the IRE 1527 * and the two appended messages are sent to the TPI user. 1528 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1529 * it to an error ack for the appropriate primitive. 1530 */ 1531 static void 1532 udp_connect(queue_t *q, mblk_t *mp) 1533 { 1534 sin6_t *sin6; 1535 sin_t *sin; 1536 struct T_conn_req *tcr; 1537 in6_addr_t v6dst; 1538 ipaddr_t v4dst; 1539 uint16_t dstport; 1540 uint32_t flowinfo; 1541 mblk_t *mp1, *mp2; 1542 udp_fanout_t *udpf; 1543 udp_t *udp, *udp1; 1544 1545 udp = Q_TO_UDP(q); 1546 1547 tcr = (struct T_conn_req *)mp->b_rptr; 1548 1549 /* A bit of sanity checking */ 1550 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1551 udp_err_ack(q, mp, TPROTO, 0); 1552 return; 1553 } 1554 /* 1555 * This UDP must have bound to a port already before doing 1556 * a connect. 1557 */ 1558 if (udp->udp_state == TS_UNBND) { 1559 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1560 "udp_connect: bad state, %u", udp->udp_state); 1561 udp_err_ack(q, mp, TOUTSTATE, 0); 1562 return; 1563 } 1564 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1565 1566 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1567 1568 if (udp->udp_state == TS_DATA_XFER) { 1569 /* Already connected - clear out state */ 1570 mutex_enter(&udpf->uf_lock); 1571 udp->udp_v6src = udp->udp_bound_v6src; 1572 udp->udp_state = TS_IDLE; 1573 mutex_exit(&udpf->uf_lock); 1574 } 1575 1576 if (tcr->OPT_length != 0) { 1577 udp_err_ack(q, mp, TBADOPT, 0); 1578 return; 1579 } 1580 1581 /* 1582 * Determine packet type based on type of address passed in 1583 * the request should contain an IPv4 or IPv6 address. 1584 * Make sure that address family matches the type of 1585 * family of the the address passed down 1586 */ 1587 switch (tcr->DEST_length) { 1588 default: 1589 udp_err_ack(q, mp, TBADADDR, 0); 1590 return; 1591 1592 case sizeof (sin_t): 1593 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1594 sizeof (sin_t)); 1595 if (sin == NULL || !OK_32PTR((char *)sin)) { 1596 udp_err_ack(q, mp, TSYSERR, EINVAL); 1597 return; 1598 } 1599 if (udp->udp_family != AF_INET || 1600 sin->sin_family != AF_INET) { 1601 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1602 return; 1603 } 1604 v4dst = sin->sin_addr.s_addr; 1605 dstport = sin->sin_port; 1606 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1607 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1608 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1609 udp->udp_ip_snd_options_len; 1610 break; 1611 1612 case sizeof (sin6_t): 1613 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1614 sizeof (sin6_t)); 1615 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1616 udp_err_ack(q, mp, TSYSERR, EINVAL); 1617 return; 1618 } 1619 if (udp->udp_family != AF_INET6 || 1620 sin6->sin6_family != AF_INET6) { 1621 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1622 return; 1623 } 1624 v6dst = sin6->sin6_addr; 1625 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1626 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1627 udp->udp_ipversion = IPV4_VERSION; 1628 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1629 UDPH_SIZE + udp->udp_ip_snd_options_len; 1630 flowinfo = 0; 1631 } else { 1632 udp->udp_ipversion = IPV6_VERSION; 1633 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1634 flowinfo = sin6->sin6_flowinfo; 1635 } 1636 dstport = sin6->sin6_port; 1637 break; 1638 } 1639 if (dstport == 0) { 1640 udp_err_ack(q, mp, TBADADDR, 0); 1641 return; 1642 } 1643 1644 /* 1645 * Create a default IP header with no IP options. 1646 */ 1647 udp->udp_dstport = dstport; 1648 if (udp->udp_ipversion == IPV4_VERSION) { 1649 /* 1650 * Interpret a zero destination to mean loopback. 1651 * Update the T_CONN_REQ (sin/sin6) since it is used to 1652 * generate the T_CONN_CON. 1653 */ 1654 if (v4dst == INADDR_ANY) { 1655 v4dst = htonl(INADDR_LOOPBACK); 1656 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1657 if (udp->udp_family == AF_INET) { 1658 sin->sin_addr.s_addr = v4dst; 1659 } else { 1660 sin6->sin6_addr = v6dst; 1661 } 1662 } 1663 udp->udp_v6dst = v6dst; 1664 udp->udp_flowinfo = 0; 1665 1666 /* 1667 * If the destination address is multicast and 1668 * an outgoing multicast interface has been set, 1669 * use the address of that interface as our 1670 * source address if no source address has been set. 1671 */ 1672 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1673 CLASSD(v4dst) && 1674 udp->udp_multicast_if_addr != INADDR_ANY) { 1675 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1676 &udp->udp_v6src); 1677 } 1678 } else { 1679 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1680 /* 1681 * Interpret a zero destination to mean loopback. 1682 * Update the T_CONN_REQ (sin/sin6) since it is used to 1683 * generate the T_CONN_CON. 1684 */ 1685 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1686 v6dst = ipv6_loopback; 1687 sin6->sin6_addr = v6dst; 1688 } 1689 udp->udp_v6dst = v6dst; 1690 udp->udp_flowinfo = flowinfo; 1691 /* 1692 * If the destination address is multicast and 1693 * an outgoing multicast interface has been set, 1694 * then the ip bind logic will pick the correct source 1695 * address (i.e. matching the outgoing multicast interface). 1696 */ 1697 } 1698 1699 /* 1700 * Verify that the src/port/dst/port is unique for all 1701 * connections in TS_DATA_XFER 1702 */ 1703 mutex_enter(&udpf->uf_lock); 1704 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1705 if (udp1->udp_state != TS_DATA_XFER) 1706 continue; 1707 if (udp->udp_port != udp1->udp_port || 1708 udp->udp_ipversion != udp1->udp_ipversion || 1709 dstport != udp1->udp_dstport || 1710 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1711 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1712 continue; 1713 mutex_exit(&udpf->uf_lock); 1714 udp_err_ack(q, mp, TBADADDR, 0); 1715 return; 1716 } 1717 udp->udp_state = TS_DATA_XFER; 1718 mutex_exit(&udpf->uf_lock); 1719 1720 /* 1721 * Send down bind to IP to verify that there is a route 1722 * and to determine the source address. 1723 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1724 */ 1725 if (udp->udp_family == AF_INET) 1726 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1727 else 1728 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1729 if (mp1 == NULL) { 1730 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1731 bind_failed: 1732 mutex_enter(&udpf->uf_lock); 1733 udp->udp_state = TS_IDLE; 1734 mutex_exit(&udpf->uf_lock); 1735 return; 1736 } 1737 1738 /* 1739 * We also have to send a connection confirmation to 1740 * keep TLI happy. Prepare it for udp_rput. 1741 */ 1742 if (udp->udp_family == AF_INET) 1743 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1744 sizeof (*sin), NULL, 0); 1745 else 1746 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1747 sizeof (*sin6), NULL, 0); 1748 if (mp2 == NULL) { 1749 freemsg(mp1); 1750 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1751 goto bind_failed; 1752 } 1753 1754 mp = mi_tpi_ok_ack_alloc(mp); 1755 if (mp == NULL) { 1756 /* Unable to reuse the T_CONN_REQ for the ack. */ 1757 freemsg(mp2); 1758 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1759 goto bind_failed; 1760 } 1761 1762 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1763 linkb(mp1, mp); 1764 linkb(mp1, mp2); 1765 1766 if (udp->udp_family == AF_INET) 1767 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1768 else 1769 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1770 1771 if (mp1 != NULL) 1772 udp_rput_other(_RD(q), mp1); 1773 else 1774 CONN_INC_REF(udp->udp_connp); 1775 } 1776 1777 static int 1778 udp_close(queue_t *q) 1779 { 1780 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1781 udp_t *udp; 1782 queue_t *ip_rq = RD(UDP_WR(q)); 1783 1784 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1785 udp = connp->conn_udp; 1786 1787 ip_quiesce_conn(connp); 1788 /* 1789 * Disable read-side synchronous stream 1790 * interface and drain any queued data. 1791 */ 1792 udp_rcv_drain(q, udp, B_TRUE); 1793 ASSERT(!udp->udp_direct_sockfs); 1794 1795 qprocsoff(q); 1796 1797 /* restore IP module's high and low water marks to default values */ 1798 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1799 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1800 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1801 1802 ASSERT(udp->udp_rcv_cnt == 0); 1803 ASSERT(udp->udp_rcv_msgcnt == 0); 1804 ASSERT(udp->udp_rcv_list_head == NULL); 1805 ASSERT(udp->udp_rcv_list_tail == NULL); 1806 1807 /* connp is now single threaded. */ 1808 udp_close_free(connp); 1809 /* 1810 * Restore connp as an IP endpoint. We don't need 1811 * any locks since we are now single threaded 1812 */ 1813 connp->conn_flags &= ~IPCL_UDP; 1814 connp->conn_state_flags &= 1815 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1816 return (0); 1817 } 1818 1819 /* 1820 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1821 */ 1822 void 1823 udp_quiesce_conn(conn_t *connp) 1824 { 1825 udp_t *udp = connp->conn_udp; 1826 1827 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1828 /* 1829 * Running in cluster mode - register unbind information 1830 */ 1831 if (udp->udp_ipversion == IPV4_VERSION) { 1832 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1833 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1834 (in_port_t)udp->udp_port); 1835 } else { 1836 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1837 (uint8_t *)(&(udp->udp_v6src)), 1838 (in_port_t)udp->udp_port); 1839 } 1840 } 1841 1842 udp_bind_hash_remove(udp, B_FALSE); 1843 1844 mutex_enter(&connp->conn_lock); 1845 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1846 udp->udp_mode != UDP_MT_HOT) { 1847 cv_wait(&connp->conn_cv, &connp->conn_lock); 1848 } 1849 mutex_exit(&connp->conn_lock); 1850 } 1851 1852 void 1853 udp_close_free(conn_t *connp) 1854 { 1855 udp_t *udp = connp->conn_udp; 1856 1857 /* If there are any options associated with the stream, free them. */ 1858 if (udp->udp_ip_snd_options) { 1859 mi_free((char *)udp->udp_ip_snd_options); 1860 udp->udp_ip_snd_options = NULL; 1861 } 1862 1863 if (udp->udp_ip_rcv_options) { 1864 mi_free((char *)udp->udp_ip_rcv_options); 1865 udp->udp_ip_rcv_options = NULL; 1866 } 1867 1868 /* Free memory associated with sticky options */ 1869 if (udp->udp_sticky_hdrs_len != 0) { 1870 kmem_free(udp->udp_sticky_hdrs, 1871 udp->udp_sticky_hdrs_len); 1872 udp->udp_sticky_hdrs = NULL; 1873 udp->udp_sticky_hdrs_len = 0; 1874 } 1875 1876 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1877 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1878 udp->udp_sticky_ipp.ipp_hopoptslen); 1879 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1880 } 1881 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1882 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1883 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1884 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1885 } 1886 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1887 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1888 udp->udp_sticky_ipp.ipp_rthdrlen); 1889 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1890 } 1891 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1892 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1893 udp->udp_sticky_ipp.ipp_dstoptslen); 1894 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1895 } 1896 udp->udp_sticky_ipp.ipp_fields &= 1897 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1898 1899 udp->udp_connp = NULL; 1900 connp->conn_udp = NULL; 1901 kmem_cache_free(udp_cache, udp); 1902 } 1903 1904 /* 1905 * This routine handles each T_DISCON_REQ message passed to udp 1906 * as an indicating that UDP is no longer connected. This results 1907 * in sending a T_BIND_REQ to IP to restore the binding to just 1908 * the local address/port. 1909 * 1910 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1911 * T_BIND_REQ - specifying just the local address/port 1912 * T_OK_ACK - for the T_DISCON_REQ 1913 * 1914 * The disconnect completes in udp_rput. 1915 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1916 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1917 * it to an error ack for the appropriate primitive. 1918 */ 1919 static void 1920 udp_disconnect(queue_t *q, mblk_t *mp) 1921 { 1922 udp_t *udp = Q_TO_UDP(q); 1923 mblk_t *mp1; 1924 udp_fanout_t *udpf; 1925 1926 if (udp->udp_state != TS_DATA_XFER) { 1927 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1928 "udp_disconnect: bad state, %u", udp->udp_state); 1929 udp_err_ack(q, mp, TOUTSTATE, 0); 1930 return; 1931 } 1932 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1933 mutex_enter(&udpf->uf_lock); 1934 udp->udp_v6src = udp->udp_bound_v6src; 1935 udp->udp_state = TS_IDLE; 1936 mutex_exit(&udpf->uf_lock); 1937 1938 /* 1939 * Send down bind to IP to remove the full binding and revert 1940 * to the local address binding. 1941 */ 1942 if (udp->udp_family == AF_INET) 1943 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1944 else 1945 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1946 if (mp1 == NULL) { 1947 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1948 return; 1949 } 1950 mp = mi_tpi_ok_ack_alloc(mp); 1951 if (mp == NULL) { 1952 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1953 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1954 return; 1955 } 1956 1957 if (udp->udp_family == AF_INET6) { 1958 int error; 1959 1960 /* Rebuild the header template */ 1961 error = udp_build_hdrs(q, udp); 1962 if (error != 0) { 1963 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1964 freemsg(mp1); 1965 return; 1966 } 1967 } 1968 mutex_enter(&udpf->uf_lock); 1969 udp->udp_discon_pending = 1; 1970 mutex_exit(&udpf->uf_lock); 1971 1972 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1973 linkb(mp1, mp); 1974 1975 if (udp->udp_family == AF_INET6) 1976 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1977 else 1978 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1979 1980 if (mp1 != NULL) 1981 udp_rput_other(_RD(q), mp1); 1982 else 1983 CONN_INC_REF(udp->udp_connp); 1984 } 1985 1986 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1987 static void 1988 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1989 { 1990 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1991 putnext(UDP_RD(q), mp); 1992 } 1993 1994 /* Shorthand to generate and send TPI error acks to our client */ 1995 static void 1996 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1997 int sys_error) 1998 { 1999 struct T_error_ack *teackp; 2000 2001 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2002 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2003 teackp = (struct T_error_ack *)mp->b_rptr; 2004 teackp->ERROR_prim = primitive; 2005 teackp->TLI_error = t_error; 2006 teackp->UNIX_error = sys_error; 2007 putnext(UDP_RD(q), mp); 2008 } 2009 } 2010 2011 /*ARGSUSED*/ 2012 static int 2013 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2014 { 2015 int i; 2016 2017 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2018 if (udp_g_epriv_ports[i] != 0) 2019 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2020 } 2021 return (0); 2022 } 2023 2024 /* ARGSUSED */ 2025 static int 2026 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2027 cred_t *cr) 2028 { 2029 long new_value; 2030 int i; 2031 2032 /* 2033 * Fail the request if the new value does not lie within the 2034 * port number limits. 2035 */ 2036 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2037 new_value <= 0 || new_value >= 65536) { 2038 return (EINVAL); 2039 } 2040 2041 /* Check if the value is already in the list */ 2042 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2043 if (new_value == udp_g_epriv_ports[i]) { 2044 return (EEXIST); 2045 } 2046 } 2047 /* Find an empty slot */ 2048 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2049 if (udp_g_epriv_ports[i] == 0) 2050 break; 2051 } 2052 if (i == udp_g_num_epriv_ports) { 2053 return (EOVERFLOW); 2054 } 2055 2056 /* Set the new value */ 2057 udp_g_epriv_ports[i] = (in_port_t)new_value; 2058 return (0); 2059 } 2060 2061 /* ARGSUSED */ 2062 static int 2063 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2064 cred_t *cr) 2065 { 2066 long new_value; 2067 int i; 2068 2069 /* 2070 * Fail the request if the new value does not lie within the 2071 * port number limits. 2072 */ 2073 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2074 new_value <= 0 || new_value >= 65536) { 2075 return (EINVAL); 2076 } 2077 2078 /* Check that the value is already in the list */ 2079 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2080 if (udp_g_epriv_ports[i] == new_value) 2081 break; 2082 } 2083 if (i == udp_g_num_epriv_ports) { 2084 return (ESRCH); 2085 } 2086 2087 /* Clear the value */ 2088 udp_g_epriv_ports[i] = 0; 2089 return (0); 2090 } 2091 2092 /* At minimum we need 4 bytes of UDP header */ 2093 #define ICMP_MIN_UDP_HDR 4 2094 2095 /* 2096 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2097 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2098 * Assumes that IP has pulled up everything up to and including the ICMP header. 2099 * An M_CTL could potentially come here from some other module (i.e. if UDP 2100 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2101 * does not have enough ICMP information , following STREAMS conventions, 2102 * we send it upstream assuming it is an M_CTL we don't understand. 2103 */ 2104 static void 2105 udp_icmp_error(queue_t *q, mblk_t *mp) 2106 { 2107 icmph_t *icmph; 2108 ipha_t *ipha; 2109 int iph_hdr_length; 2110 udpha_t *udpha; 2111 sin_t sin; 2112 sin6_t sin6; 2113 mblk_t *mp1; 2114 int error = 0; 2115 size_t mp_size = MBLKL(mp); 2116 udp_t *udp = Q_TO_UDP(q); 2117 2118 /* 2119 * Assume IP provides aligned packets - otherwise toss 2120 */ 2121 if (!OK_32PTR(mp->b_rptr)) { 2122 freemsg(mp); 2123 return; 2124 } 2125 2126 /* 2127 * Verify that we have a complete IP header and the application has 2128 * asked for errors. If not, send it upstream. 2129 */ 2130 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2131 noticmpv4: 2132 putnext(UDP_RD(q), mp); 2133 return; 2134 } 2135 2136 ipha = (ipha_t *)mp->b_rptr; 2137 /* 2138 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2139 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2140 */ 2141 switch (IPH_HDR_VERSION(ipha)) { 2142 case IPV6_VERSION: 2143 udp_icmp_error_ipv6(q, mp); 2144 return; 2145 case IPV4_VERSION: 2146 break; 2147 default: 2148 goto noticmpv4; 2149 } 2150 2151 /* Skip past the outer IP and ICMP headers */ 2152 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2153 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2154 /* 2155 * If we don't have the correct outer IP header length or if the ULP 2156 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2157 * send the packet upstream. 2158 */ 2159 if (iph_hdr_length < sizeof (ipha_t) || 2160 ipha->ipha_protocol != IPPROTO_ICMP || 2161 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2162 goto noticmpv4; 2163 } 2164 ipha = (ipha_t *)&icmph[1]; 2165 2166 /* Skip past the inner IP and find the ULP header */ 2167 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2168 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2169 /* 2170 * If we don't have the correct inner IP header length or if the ULP 2171 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2172 * bytes of UDP header, send it upstream. 2173 */ 2174 if (iph_hdr_length < sizeof (ipha_t) || 2175 ipha->ipha_protocol != IPPROTO_UDP || 2176 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2177 goto noticmpv4; 2178 } 2179 2180 switch (icmph->icmph_type) { 2181 case ICMP_DEST_UNREACHABLE: 2182 switch (icmph->icmph_code) { 2183 case ICMP_FRAGMENTATION_NEEDED: 2184 /* 2185 * IP has already adjusted the path MTU. 2186 * XXX Somehow pass MTU indication to application? 2187 */ 2188 break; 2189 case ICMP_PORT_UNREACHABLE: 2190 case ICMP_PROTOCOL_UNREACHABLE: 2191 error = ECONNREFUSED; 2192 break; 2193 default: 2194 /* Transient errors */ 2195 break; 2196 } 2197 break; 2198 default: 2199 /* Transient errors */ 2200 break; 2201 } 2202 if (error == 0) { 2203 freemsg(mp); 2204 return; 2205 } 2206 2207 switch (udp->udp_family) { 2208 case AF_INET: 2209 sin = sin_null; 2210 sin.sin_family = AF_INET; 2211 sin.sin_addr.s_addr = ipha->ipha_dst; 2212 sin.sin_port = udpha->uha_dst_port; 2213 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2214 error); 2215 break; 2216 case AF_INET6: 2217 sin6 = sin6_null; 2218 sin6.sin6_family = AF_INET6; 2219 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2220 sin6.sin6_port = udpha->uha_dst_port; 2221 2222 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2223 NULL, 0, error); 2224 break; 2225 } 2226 if (mp1) 2227 putnext(UDP_RD(q), mp1); 2228 freemsg(mp); 2229 } 2230 2231 /* 2232 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2233 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2234 * Assumes that IP has pulled up all the extension headers as well as the 2235 * ICMPv6 header. 2236 * An M_CTL could potentially come here from some other module (i.e. if UDP 2237 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2238 * does not have enough ICMP information , following STREAMS conventions, 2239 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2240 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2241 * field (when cast to ipha_t in udp_icmp_error). 2242 */ 2243 static void 2244 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2245 { 2246 icmp6_t *icmp6; 2247 ip6_t *ip6h, *outer_ip6h; 2248 uint16_t hdr_length; 2249 uint8_t *nexthdrp; 2250 udpha_t *udpha; 2251 sin6_t sin6; 2252 mblk_t *mp1; 2253 int error = 0; 2254 size_t mp_size = MBLKL(mp); 2255 udp_t *udp = Q_TO_UDP(q); 2256 2257 /* 2258 * Verify that we have a complete IP header. If not, send it upstream. 2259 */ 2260 if (mp_size < sizeof (ip6_t)) { 2261 noticmpv6: 2262 putnext(UDP_RD(q), mp); 2263 return; 2264 } 2265 2266 outer_ip6h = (ip6_t *)mp->b_rptr; 2267 /* 2268 * Verify this is an ICMPV6 packet, else send it upstream 2269 */ 2270 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2271 hdr_length = IPV6_HDR_LEN; 2272 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2273 &nexthdrp) || 2274 *nexthdrp != IPPROTO_ICMPV6) { 2275 goto noticmpv6; 2276 } 2277 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2278 ip6h = (ip6_t *)&icmp6[1]; 2279 /* 2280 * Verify we have a complete ICMP and inner IP header. 2281 */ 2282 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2283 goto noticmpv6; 2284 2285 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2286 goto noticmpv6; 2287 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2288 /* 2289 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2290 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2291 * packet upstream. 2292 */ 2293 if ((*nexthdrp != IPPROTO_UDP) || 2294 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2295 goto noticmpv6; 2296 } 2297 2298 switch (icmp6->icmp6_type) { 2299 case ICMP6_DST_UNREACH: 2300 switch (icmp6->icmp6_code) { 2301 case ICMP6_DST_UNREACH_NOPORT: 2302 error = ECONNREFUSED; 2303 break; 2304 case ICMP6_DST_UNREACH_ADMIN: 2305 case ICMP6_DST_UNREACH_NOROUTE: 2306 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2307 case ICMP6_DST_UNREACH_ADDR: 2308 /* Transient errors */ 2309 break; 2310 default: 2311 break; 2312 } 2313 break; 2314 case ICMP6_PACKET_TOO_BIG: { 2315 struct T_unitdata_ind *tudi; 2316 struct T_opthdr *toh; 2317 size_t udi_size; 2318 mblk_t *newmp; 2319 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2320 sizeof (struct ip6_mtuinfo); 2321 sin6_t *sin6; 2322 struct ip6_mtuinfo *mtuinfo; 2323 2324 /* 2325 * If the application has requested to receive path mtu 2326 * information, send up an empty message containing an 2327 * IPV6_PATHMTU ancillary data item. 2328 */ 2329 if (!udp->udp_ipv6_recvpathmtu) 2330 break; 2331 2332 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2333 opt_length; 2334 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2335 BUMP_MIB(&udp_mib, udpInErrors); 2336 break; 2337 } 2338 2339 /* 2340 * newmp->b_cont is left to NULL on purpose. This is an 2341 * empty message containing only ancillary data. 2342 */ 2343 newmp->b_datap->db_type = M_PROTO; 2344 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2345 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2346 tudi->PRIM_type = T_UNITDATA_IND; 2347 tudi->SRC_length = sizeof (sin6_t); 2348 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2349 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2350 tudi->OPT_length = opt_length; 2351 2352 sin6 = (sin6_t *)&tudi[1]; 2353 bzero(sin6, sizeof (sin6_t)); 2354 sin6->sin6_family = AF_INET6; 2355 sin6->sin6_addr = udp->udp_v6dst; 2356 2357 toh = (struct T_opthdr *)&sin6[1]; 2358 toh->level = IPPROTO_IPV6; 2359 toh->name = IPV6_PATHMTU; 2360 toh->len = opt_length; 2361 toh->status = 0; 2362 2363 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2364 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2365 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2366 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2367 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2368 /* 2369 * We've consumed everything we need from the original 2370 * message. Free it, then send our empty message. 2371 */ 2372 freemsg(mp); 2373 putnext(UDP_RD(q), newmp); 2374 return; 2375 } 2376 case ICMP6_TIME_EXCEEDED: 2377 /* Transient errors */ 2378 break; 2379 case ICMP6_PARAM_PROB: 2380 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2381 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2382 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2383 (uchar_t *)nexthdrp) { 2384 error = ECONNREFUSED; 2385 break; 2386 } 2387 break; 2388 } 2389 if (error == 0) { 2390 freemsg(mp); 2391 return; 2392 } 2393 2394 sin6 = sin6_null; 2395 sin6.sin6_family = AF_INET6; 2396 sin6.sin6_addr = ip6h->ip6_dst; 2397 sin6.sin6_port = udpha->uha_dst_port; 2398 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2399 2400 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2401 error); 2402 if (mp1) 2403 putnext(UDP_RD(q), mp1); 2404 freemsg(mp); 2405 } 2406 2407 /* 2408 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2409 * The local address is filled in if endpoint is bound. The remote address 2410 * is filled in if remote address has been precified ("connected endpoint") 2411 * (The concept of connected CLTS sockets is alien to published TPI 2412 * but we support it anyway). 2413 */ 2414 static void 2415 udp_addr_req(queue_t *q, mblk_t *mp) 2416 { 2417 sin_t *sin; 2418 sin6_t *sin6; 2419 mblk_t *ackmp; 2420 struct T_addr_ack *taa; 2421 udp_t *udp = Q_TO_UDP(q); 2422 2423 /* Make it large enough for worst case */ 2424 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2425 2 * sizeof (sin6_t), 1); 2426 if (ackmp == NULL) { 2427 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2428 return; 2429 } 2430 taa = (struct T_addr_ack *)ackmp->b_rptr; 2431 2432 bzero(taa, sizeof (struct T_addr_ack)); 2433 ackmp->b_wptr = (uchar_t *)&taa[1]; 2434 2435 taa->PRIM_type = T_ADDR_ACK; 2436 ackmp->b_datap->db_type = M_PCPROTO; 2437 /* 2438 * Note: Following code assumes 32 bit alignment of basic 2439 * data structures like sin_t and struct T_addr_ack. 2440 */ 2441 if (udp->udp_state != TS_UNBND) { 2442 /* 2443 * Fill in local address first 2444 */ 2445 taa->LOCADDR_offset = sizeof (*taa); 2446 if (udp->udp_family == AF_INET) { 2447 taa->LOCADDR_length = sizeof (sin_t); 2448 sin = (sin_t *)&taa[1]; 2449 /* Fill zeroes and then initialize non-zero fields */ 2450 *sin = sin_null; 2451 sin->sin_family = AF_INET; 2452 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2453 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2454 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2455 sin->sin_addr.s_addr); 2456 } else { 2457 /* 2458 * INADDR_ANY 2459 * udp_v6src is not set, we might be bound to 2460 * broadcast/multicast. Use udp_bound_v6src as 2461 * local address instead (that could 2462 * also still be INADDR_ANY) 2463 */ 2464 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2465 sin->sin_addr.s_addr); 2466 } 2467 sin->sin_port = udp->udp_port; 2468 ackmp->b_wptr = (uchar_t *)&sin[1]; 2469 if (udp->udp_state == TS_DATA_XFER) { 2470 /* 2471 * connected, fill remote address too 2472 */ 2473 taa->REMADDR_length = sizeof (sin_t); 2474 /* assumed 32-bit alignment */ 2475 taa->REMADDR_offset = taa->LOCADDR_offset + 2476 taa->LOCADDR_length; 2477 2478 sin = (sin_t *)(ackmp->b_rptr + 2479 taa->REMADDR_offset); 2480 /* initialize */ 2481 *sin = sin_null; 2482 sin->sin_family = AF_INET; 2483 sin->sin_addr.s_addr = 2484 V4_PART_OF_V6(udp->udp_v6dst); 2485 sin->sin_port = udp->udp_dstport; 2486 ackmp->b_wptr = (uchar_t *)&sin[1]; 2487 } 2488 } else { 2489 taa->LOCADDR_length = sizeof (sin6_t); 2490 sin6 = (sin6_t *)&taa[1]; 2491 /* Fill zeroes and then initialize non-zero fields */ 2492 *sin6 = sin6_null; 2493 sin6->sin6_family = AF_INET6; 2494 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2495 sin6->sin6_addr = udp->udp_v6src; 2496 } else { 2497 /* 2498 * UNSPECIFIED 2499 * udp_v6src is not set, we might be bound to 2500 * broadcast/multicast. Use udp_bound_v6src as 2501 * local address instead (that could 2502 * also still be UNSPECIFIED) 2503 */ 2504 sin6->sin6_addr = 2505 udp->udp_bound_v6src; 2506 } 2507 sin6->sin6_port = udp->udp_port; 2508 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2509 if (udp->udp_state == TS_DATA_XFER) { 2510 /* 2511 * connected, fill remote address too 2512 */ 2513 taa->REMADDR_length = sizeof (sin6_t); 2514 /* assumed 32-bit alignment */ 2515 taa->REMADDR_offset = taa->LOCADDR_offset + 2516 taa->LOCADDR_length; 2517 2518 sin6 = (sin6_t *)(ackmp->b_rptr + 2519 taa->REMADDR_offset); 2520 /* initialize */ 2521 *sin6 = sin6_null; 2522 sin6->sin6_family = AF_INET6; 2523 sin6->sin6_addr = udp->udp_v6dst; 2524 sin6->sin6_port = udp->udp_dstport; 2525 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2526 } 2527 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2528 } 2529 } 2530 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2531 putnext(UDP_RD(q), ackmp); 2532 } 2533 2534 static void 2535 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2536 { 2537 if (udp->udp_family == AF_INET) { 2538 *tap = udp_g_t_info_ack_ipv4; 2539 } else { 2540 *tap = udp_g_t_info_ack_ipv6; 2541 } 2542 tap->CURRENT_state = udp->udp_state; 2543 tap->OPT_size = udp_max_optsize; 2544 } 2545 2546 /* 2547 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2548 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2549 * udp_g_t_info_ack. The current state of the stream is copied from 2550 * udp_state. 2551 */ 2552 static void 2553 udp_capability_req(queue_t *q, mblk_t *mp) 2554 { 2555 t_uscalar_t cap_bits1; 2556 struct T_capability_ack *tcap; 2557 udp_t *udp = Q_TO_UDP(q); 2558 2559 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2560 2561 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2562 mp->b_datap->db_type, T_CAPABILITY_ACK); 2563 if (!mp) 2564 return; 2565 2566 tcap = (struct T_capability_ack *)mp->b_rptr; 2567 tcap->CAP_bits1 = 0; 2568 2569 if (cap_bits1 & TC1_INFO) { 2570 udp_copy_info(&tcap->INFO_ack, udp); 2571 tcap->CAP_bits1 |= TC1_INFO; 2572 } 2573 2574 putnext(UDP_RD(q), mp); 2575 } 2576 2577 /* 2578 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2579 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2580 * The current state of the stream is copied from udp_state. 2581 */ 2582 static void 2583 udp_info_req(queue_t *q, mblk_t *mp) 2584 { 2585 udp_t *udp = Q_TO_UDP(q); 2586 2587 /* Create a T_INFO_ACK message. */ 2588 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2589 T_INFO_ACK); 2590 if (!mp) 2591 return; 2592 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2593 putnext(UDP_RD(q), mp); 2594 } 2595 2596 /* 2597 * IP recognizes seven kinds of bind requests: 2598 * 2599 * - A zero-length address binds only to the protocol number. 2600 * 2601 * - A 4-byte address is treated as a request to 2602 * validate that the address is a valid local IPv4 2603 * address, appropriate for an application to bind to. 2604 * IP does the verification, but does not make any note 2605 * of the address at this time. 2606 * 2607 * - A 16-byte address contains is treated as a request 2608 * to validate a local IPv6 address, as the 4-byte 2609 * address case above. 2610 * 2611 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2612 * use it for the inbound fanout of packets. 2613 * 2614 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2615 * use it for the inbound fanout of packets. 2616 * 2617 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2618 * information consisting of local and remote addresses 2619 * and ports. In this case, the addresses are both 2620 * validated as appropriate for this operation, and, if 2621 * so, the information is retained for use in the 2622 * inbound fanout. 2623 * 2624 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2625 * fanout information, like the 12-byte case above. 2626 * 2627 * IP will also fill in the IRE request mblk with information 2628 * regarding our peer. In all cases, we notify IP of our protocol 2629 * type by appending a single protocol byte to the bind request. 2630 */ 2631 static mblk_t * 2632 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2633 { 2634 char *cp; 2635 mblk_t *mp; 2636 struct T_bind_req *tbr; 2637 ipa_conn_t *ac; 2638 ipa6_conn_t *ac6; 2639 sin_t *sin; 2640 sin6_t *sin6; 2641 2642 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2643 2644 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2645 if (!mp) 2646 return (mp); 2647 mp->b_datap->db_type = M_PROTO; 2648 tbr = (struct T_bind_req *)mp->b_rptr; 2649 tbr->PRIM_type = bind_prim; 2650 tbr->ADDR_offset = sizeof (*tbr); 2651 tbr->CONIND_number = 0; 2652 tbr->ADDR_length = addr_length; 2653 cp = (char *)&tbr[1]; 2654 switch (addr_length) { 2655 case sizeof (ipa_conn_t): 2656 ASSERT(udp->udp_family == AF_INET); 2657 /* Append a request for an IRE */ 2658 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2659 if (!mp->b_cont) { 2660 freemsg(mp); 2661 return (NULL); 2662 } 2663 mp->b_cont->b_wptr += sizeof (ire_t); 2664 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2665 2666 /* cp known to be 32 bit aligned */ 2667 ac = (ipa_conn_t *)cp; 2668 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2669 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2670 ac->ac_fport = udp->udp_dstport; 2671 ac->ac_lport = udp->udp_port; 2672 break; 2673 2674 case sizeof (ipa6_conn_t): 2675 ASSERT(udp->udp_family == AF_INET6); 2676 /* Append a request for an IRE */ 2677 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2678 if (!mp->b_cont) { 2679 freemsg(mp); 2680 return (NULL); 2681 } 2682 mp->b_cont->b_wptr += sizeof (ire_t); 2683 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2684 2685 /* cp known to be 32 bit aligned */ 2686 ac6 = (ipa6_conn_t *)cp; 2687 ac6->ac6_laddr = udp->udp_v6src; 2688 ac6->ac6_faddr = udp->udp_v6dst; 2689 ac6->ac6_fport = udp->udp_dstport; 2690 ac6->ac6_lport = udp->udp_port; 2691 break; 2692 2693 case sizeof (sin_t): 2694 ASSERT(udp->udp_family == AF_INET); 2695 /* Append a request for an IRE */ 2696 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2697 if (!mp->b_cont) { 2698 freemsg(mp); 2699 return (NULL); 2700 } 2701 mp->b_cont->b_wptr += sizeof (ire_t); 2702 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2703 2704 sin = (sin_t *)cp; 2705 *sin = sin_null; 2706 sin->sin_family = AF_INET; 2707 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2708 sin->sin_port = udp->udp_port; 2709 break; 2710 2711 case sizeof (sin6_t): 2712 ASSERT(udp->udp_family == AF_INET6); 2713 /* Append a request for an IRE */ 2714 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2715 if (!mp->b_cont) { 2716 freemsg(mp); 2717 return (NULL); 2718 } 2719 mp->b_cont->b_wptr += sizeof (ire_t); 2720 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2721 2722 sin6 = (sin6_t *)cp; 2723 *sin6 = sin6_null; 2724 sin6->sin6_family = AF_INET6; 2725 sin6->sin6_addr = udp->udp_bound_v6src; 2726 sin6->sin6_port = udp->udp_port; 2727 break; 2728 } 2729 /* Add protocol number to end */ 2730 cp[addr_length] = (char)IPPROTO_UDP; 2731 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2732 return (mp); 2733 } 2734 2735 /* 2736 * This is the open routine for udp. It allocates a udp_t structure for 2737 * the stream and, on the first open of the module, creates an ND table. 2738 */ 2739 /* ARGSUSED */ 2740 static int 2741 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2742 { 2743 int err; 2744 udp_t *udp; 2745 conn_t *connp; 2746 zoneid_t zoneid = getzoneid(); 2747 queue_t *ip_wq; 2748 char *name; 2749 2750 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2751 2752 /* If the stream is already open, return immediately. */ 2753 if (q->q_ptr != NULL) 2754 return (0); 2755 2756 /* If this is not a push of udp as a module, fail. */ 2757 if (sflag != MODOPEN) 2758 return (EINVAL); 2759 2760 q->q_hiwat = udp_recv_hiwat; 2761 WR(q)->q_hiwat = udp_xmit_hiwat; 2762 WR(q)->q_lowat = udp_xmit_lowat; 2763 2764 /* Insert ourselves in the stream since we're about to walk q_next */ 2765 qprocson(q); 2766 2767 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2768 bzero(udp, sizeof (*udp)); 2769 2770 /* 2771 * UDP is supported only as a module and it has to be pushed directly 2772 * above the device instance of IP. If UDP is pushed anywhere else 2773 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2774 * sake of MIB browsers and fail everything else. 2775 */ 2776 ip_wq = WR(q)->q_next; 2777 if (ip_wq->q_next != NULL || 2778 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2779 strcmp(name, IP_MOD_NAME) != 0 || 2780 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2781 /* Support just SNMP for MIB browsers */ 2782 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2783 connp->conn_rq = q; 2784 connp->conn_wq = WR(q); 2785 connp->conn_flags |= IPCL_UDPMOD; 2786 connp->conn_cred = credp; 2787 connp->conn_zoneid = zoneid; 2788 connp->conn_udp = udp; 2789 udp->udp_connp = connp; 2790 q->q_ptr = WR(q)->q_ptr = connp; 2791 crhold(credp); 2792 q->q_qinfo = &udp_snmp_rinit; 2793 WR(q)->q_qinfo = &udp_snmp_winit; 2794 return (0); 2795 } 2796 2797 /* 2798 * Initialize the udp_t structure for this stream. 2799 */ 2800 q = RD(ip_wq); 2801 connp = Q_TO_CONN(q); 2802 mutex_enter(&connp->conn_lock); 2803 connp->conn_proto = IPPROTO_UDP; 2804 connp->conn_flags |= IPCL_UDP; 2805 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2806 connp->conn_udp = udp; 2807 2808 /* Set the initial state of the stream and the privilege status. */ 2809 udp->udp_connp = connp; 2810 udp->udp_state = TS_UNBND; 2811 udp->udp_mode = UDP_MT_HOT; 2812 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2813 udp->udp_family = AF_INET6; 2814 udp->udp_ipversion = IPV6_VERSION; 2815 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2816 udp->udp_ttl = udp_ipv6_hoplimit; 2817 connp->conn_af_isv6 = B_TRUE; 2818 connp->conn_flags |= IPCL_ISV6; 2819 } else { 2820 udp->udp_family = AF_INET; 2821 udp->udp_ipversion = IPV4_VERSION; 2822 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2823 udp->udp_ttl = udp_ipv4_ttl; 2824 connp->conn_af_isv6 = B_FALSE; 2825 connp->conn_flags &= ~IPCL_ISV6; 2826 } 2827 2828 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2829 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2830 connp->conn_zoneid = zoneid; 2831 2832 if (connp->conn_flags & IPCL_SOCKET) { 2833 udp->udp_issocket = B_TRUE; 2834 udp->udp_direct_sockfs = B_TRUE; 2835 } 2836 mutex_exit(&connp->conn_lock); 2837 2838 /* 2839 * The transmit hiwat/lowat is only looked at on IP's queue. 2840 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2841 * getsockopts. 2842 */ 2843 q->q_hiwat = udp_recv_hiwat; 2844 WR(q)->q_hiwat = udp_xmit_hiwat; 2845 WR(q)->q_lowat = udp_xmit_lowat; 2846 2847 if (udp->udp_family == AF_INET6) { 2848 /* Build initial header template for transmit */ 2849 if ((err = udp_build_hdrs(q, udp)) != 0) { 2850 qprocsoff(UDP_RD(q)); 2851 udp->udp_connp = NULL; 2852 connp->conn_udp = NULL; 2853 kmem_cache_free(udp_cache, udp); 2854 return (err); 2855 } 2856 } 2857 2858 /* Set the Stream head write offset and high watermark. */ 2859 (void) mi_set_sth_wroff(UDP_RD(q), 2860 udp->udp_max_hdr_len + udp_wroff_extra); 2861 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2862 2863 return (0); 2864 } 2865 2866 /* 2867 * Which UDP options OK to set through T_UNITDATA_REQ... 2868 */ 2869 /* ARGSUSED */ 2870 static boolean_t 2871 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2872 { 2873 return (B_TRUE); 2874 } 2875 2876 /* 2877 * This routine gets default values of certain options whose default 2878 * values are maintained by protcol specific code 2879 */ 2880 /* ARGSUSED */ 2881 int 2882 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2883 { 2884 int *i1 = (int *)ptr; 2885 2886 switch (level) { 2887 case IPPROTO_IP: 2888 switch (name) { 2889 case IP_MULTICAST_TTL: 2890 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2891 return (sizeof (uchar_t)); 2892 case IP_MULTICAST_LOOP: 2893 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2894 return (sizeof (uchar_t)); 2895 } 2896 break; 2897 case IPPROTO_IPV6: 2898 switch (name) { 2899 case IPV6_MULTICAST_HOPS: 2900 *i1 = IP_DEFAULT_MULTICAST_TTL; 2901 return (sizeof (int)); 2902 case IPV6_MULTICAST_LOOP: 2903 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2904 return (sizeof (int)); 2905 case IPV6_UNICAST_HOPS: 2906 *i1 = udp_ipv6_hoplimit; 2907 return (sizeof (int)); 2908 } 2909 break; 2910 } 2911 return (-1); 2912 } 2913 2914 /* 2915 * This routine retrieves the current status of socket options 2916 * and expects the caller to pass in the queue pointer of the 2917 * upper instance. It returns the size of the option retrieved. 2918 */ 2919 int 2920 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2921 { 2922 int *i1 = (int *)ptr; 2923 conn_t *connp; 2924 udp_t *udp; 2925 ip6_pkt_t *ipp; 2926 2927 q = UDP_WR(q); 2928 connp = Q_TO_CONN(q); 2929 udp = connp->conn_udp; 2930 ipp = &udp->udp_sticky_ipp; 2931 2932 switch (level) { 2933 case SOL_SOCKET: 2934 switch (name) { 2935 case SO_DEBUG: 2936 *i1 = udp->udp_debug; 2937 break; /* goto sizeof (int) option return */ 2938 case SO_REUSEADDR: 2939 *i1 = udp->udp_reuseaddr; 2940 break; /* goto sizeof (int) option return */ 2941 case SO_TYPE: 2942 *i1 = SOCK_DGRAM; 2943 break; /* goto sizeof (int) option return */ 2944 2945 /* 2946 * The following three items are available here, 2947 * but are only meaningful to IP. 2948 */ 2949 case SO_DONTROUTE: 2950 *i1 = udp->udp_dontroute; 2951 break; /* goto sizeof (int) option return */ 2952 case SO_USELOOPBACK: 2953 *i1 = udp->udp_useloopback; 2954 break; /* goto sizeof (int) option return */ 2955 case SO_BROADCAST: 2956 *i1 = udp->udp_broadcast; 2957 break; /* goto sizeof (int) option return */ 2958 2959 case SO_SNDBUF: 2960 *i1 = q->q_hiwat; 2961 break; /* goto sizeof (int) option return */ 2962 case SO_RCVBUF: 2963 *i1 = RD(q)->q_hiwat; 2964 break; /* goto sizeof (int) option return */ 2965 case SO_DGRAM_ERRIND: 2966 *i1 = udp->udp_dgram_errind; 2967 break; /* goto sizeof (int) option return */ 2968 case SO_RECVUCRED: 2969 *i1 = udp->udp_recvucred; 2970 break; /* goto sizeof (int) option return */ 2971 default: 2972 return (-1); 2973 } 2974 break; 2975 case IPPROTO_IP: 2976 if (udp->udp_family != AF_INET) 2977 return (-1); 2978 switch (name) { 2979 case IP_OPTIONS: 2980 case T_IP_OPTIONS: 2981 if (udp->udp_ip_rcv_options_len) 2982 bcopy(udp->udp_ip_rcv_options, ptr, 2983 udp->udp_ip_rcv_options_len); 2984 return (udp->udp_ip_rcv_options_len); 2985 case IP_TOS: 2986 case T_IP_TOS: 2987 *i1 = (int)udp->udp_type_of_service; 2988 break; /* goto sizeof (int) option return */ 2989 case IP_TTL: 2990 *i1 = (int)udp->udp_ttl; 2991 break; /* goto sizeof (int) option return */ 2992 case IP_MULTICAST_IF: 2993 /* 0 address if not set */ 2994 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2995 return (sizeof (ipaddr_t)); 2996 case IP_MULTICAST_TTL: 2997 *(uchar_t *)ptr = udp->udp_multicast_ttl; 2998 return (sizeof (uchar_t)); 2999 case IP_MULTICAST_LOOP: 3000 *ptr = connp->conn_multicast_loop; 3001 return (sizeof (uint8_t)); 3002 case IP_RECVOPTS: 3003 *i1 = udp->udp_recvopts; 3004 break; /* goto sizeof (int) option return */ 3005 case IP_RECVDSTADDR: 3006 *i1 = udp->udp_recvdstaddr; 3007 break; /* goto sizeof (int) option return */ 3008 case IP_RECVIF: 3009 *i1 = udp->udp_recvif; 3010 break; /* goto sizeof (int) option return */ 3011 case IP_RECVSLLA: 3012 *i1 = udp->udp_recvslla; 3013 break; /* goto sizeof (int) option return */ 3014 case IP_RECVTTL: 3015 *i1 = udp->udp_recvttl; 3016 break; /* goto sizeof (int) option return */ 3017 case IP_ADD_MEMBERSHIP: 3018 case IP_DROP_MEMBERSHIP: 3019 case IP_BLOCK_SOURCE: 3020 case IP_UNBLOCK_SOURCE: 3021 case IP_ADD_SOURCE_MEMBERSHIP: 3022 case IP_DROP_SOURCE_MEMBERSHIP: 3023 case MCAST_JOIN_GROUP: 3024 case MCAST_LEAVE_GROUP: 3025 case MCAST_BLOCK_SOURCE: 3026 case MCAST_UNBLOCK_SOURCE: 3027 case MCAST_JOIN_SOURCE_GROUP: 3028 case MCAST_LEAVE_SOURCE_GROUP: 3029 case IP_DONTFAILOVER_IF: 3030 /* cannot "get" the value for these */ 3031 return (-1); 3032 case IP_BOUND_IF: 3033 /* Zero if not set */ 3034 *i1 = udp->udp_bound_if; 3035 break; /* goto sizeof (int) option return */ 3036 case IP_UNSPEC_SRC: 3037 *i1 = udp->udp_unspec_source; 3038 break; /* goto sizeof (int) option return */ 3039 case IP_XMIT_IF: 3040 *i1 = udp->udp_xmit_if; 3041 break; /* goto sizeof (int) option return */ 3042 default: 3043 return (-1); 3044 } 3045 break; 3046 case IPPROTO_IPV6: 3047 if (udp->udp_family != AF_INET6) 3048 return (-1); 3049 switch (name) { 3050 case IPV6_UNICAST_HOPS: 3051 *i1 = (unsigned int)udp->udp_ttl; 3052 break; /* goto sizeof (int) option return */ 3053 case IPV6_MULTICAST_IF: 3054 /* 0 index if not set */ 3055 *i1 = udp->udp_multicast_if_index; 3056 break; /* goto sizeof (int) option return */ 3057 case IPV6_MULTICAST_HOPS: 3058 *i1 = udp->udp_multicast_ttl; 3059 break; /* goto sizeof (int) option return */ 3060 case IPV6_MULTICAST_LOOP: 3061 *i1 = connp->conn_multicast_loop; 3062 break; /* goto sizeof (int) option return */ 3063 case IPV6_JOIN_GROUP: 3064 case IPV6_LEAVE_GROUP: 3065 case MCAST_JOIN_GROUP: 3066 case MCAST_LEAVE_GROUP: 3067 case MCAST_BLOCK_SOURCE: 3068 case MCAST_UNBLOCK_SOURCE: 3069 case MCAST_JOIN_SOURCE_GROUP: 3070 case MCAST_LEAVE_SOURCE_GROUP: 3071 /* cannot "get" the value for these */ 3072 return (-1); 3073 case IPV6_BOUND_IF: 3074 /* Zero if not set */ 3075 *i1 = udp->udp_bound_if; 3076 break; /* goto sizeof (int) option return */ 3077 case IPV6_UNSPEC_SRC: 3078 *i1 = udp->udp_unspec_source; 3079 break; /* goto sizeof (int) option return */ 3080 case IPV6_RECVPKTINFO: 3081 *i1 = udp->udp_ipv6_recvpktinfo; 3082 break; /* goto sizeof (int) option return */ 3083 case IPV6_RECVTCLASS: 3084 *i1 = udp->udp_ipv6_recvtclass; 3085 break; /* goto sizeof (int) option return */ 3086 case IPV6_RECVPATHMTU: 3087 *i1 = udp->udp_ipv6_recvpathmtu; 3088 break; /* goto sizeof (int) option return */ 3089 case IPV6_RECVHOPLIMIT: 3090 *i1 = udp->udp_ipv6_recvhoplimit; 3091 break; /* goto sizeof (int) option return */ 3092 case IPV6_RECVHOPOPTS: 3093 *i1 = udp->udp_ipv6_recvhopopts; 3094 break; /* goto sizeof (int) option return */ 3095 case IPV6_RECVDSTOPTS: 3096 *i1 = udp->udp_ipv6_recvdstopts; 3097 break; /* goto sizeof (int) option return */ 3098 case _OLD_IPV6_RECVDSTOPTS: 3099 *i1 = udp->udp_old_ipv6_recvdstopts; 3100 break; /* goto sizeof (int) option return */ 3101 case IPV6_RECVRTHDRDSTOPTS: 3102 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3103 break; /* goto sizeof (int) option return */ 3104 case IPV6_RECVRTHDR: 3105 *i1 = udp->udp_ipv6_recvrthdr; 3106 break; /* goto sizeof (int) option return */ 3107 case IPV6_PKTINFO: { 3108 /* XXX assumes that caller has room for max size! */ 3109 struct in6_pktinfo *pkti; 3110 3111 pkti = (struct in6_pktinfo *)ptr; 3112 if (ipp->ipp_fields & IPPF_IFINDEX) 3113 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3114 else 3115 pkti->ipi6_ifindex = 0; 3116 if (ipp->ipp_fields & IPPF_ADDR) 3117 pkti->ipi6_addr = ipp->ipp_addr; 3118 else 3119 pkti->ipi6_addr = ipv6_all_zeros; 3120 return (sizeof (struct in6_pktinfo)); 3121 } 3122 case IPV6_TCLASS: 3123 if (ipp->ipp_fields & IPPF_TCLASS) 3124 *i1 = ipp->ipp_tclass; 3125 else 3126 *i1 = IPV6_FLOW_TCLASS( 3127 IPV6_DEFAULT_VERS_AND_FLOW); 3128 break; /* goto sizeof (int) option return */ 3129 case IPV6_NEXTHOP: { 3130 sin6_t *sin6 = (sin6_t *)ptr; 3131 3132 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3133 return (0); 3134 *sin6 = sin6_null; 3135 sin6->sin6_family = AF_INET6; 3136 sin6->sin6_addr = ipp->ipp_nexthop; 3137 return (sizeof (sin6_t)); 3138 } 3139 case IPV6_HOPOPTS: 3140 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3141 return (0); 3142 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3143 return (ipp->ipp_hopoptslen); 3144 case IPV6_RTHDRDSTOPTS: 3145 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3146 return (0); 3147 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3148 return (ipp->ipp_rtdstoptslen); 3149 case IPV6_RTHDR: 3150 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3151 return (0); 3152 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3153 return (ipp->ipp_rthdrlen); 3154 case IPV6_DSTOPTS: 3155 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3156 return (0); 3157 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3158 return (ipp->ipp_dstoptslen); 3159 case IPV6_PATHMTU: 3160 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3161 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3162 default: 3163 return (-1); 3164 } 3165 break; 3166 case IPPROTO_UDP: 3167 switch (name) { 3168 case UDP_ANONPRIVBIND: 3169 *i1 = udp->udp_anon_priv_bind; 3170 break; 3171 case UDP_EXCLBIND: 3172 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3173 break; 3174 case UDP_RCVHDR: 3175 *i1 = udp->udp_rcvhdr ? 1 : 0; 3176 break; 3177 default: 3178 return (-1); 3179 } 3180 break; 3181 default: 3182 return (-1); 3183 } 3184 return (sizeof (int)); 3185 } 3186 3187 /* 3188 * This routine sets socket options; it expects the caller 3189 * to pass in the queue pointer of the upper instance. 3190 */ 3191 /* ARGSUSED */ 3192 int 3193 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3194 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3195 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3196 { 3197 int *i1 = (int *)invalp; 3198 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3199 boolean_t checkonly; 3200 int error; 3201 conn_t *connp; 3202 udp_t *udp; 3203 3204 q = UDP_WR(q); 3205 connp = Q_TO_CONN(q); 3206 udp = connp->conn_udp; 3207 3208 switch (optset_context) { 3209 case SETFN_OPTCOM_CHECKONLY: 3210 checkonly = B_TRUE; 3211 /* 3212 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3213 * inlen != 0 implies value supplied and 3214 * we have to "pretend" to set it. 3215 * inlen == 0 implies that there is no 3216 * value part in T_CHECK request and just validation 3217 * done elsewhere should be enough, we just return here. 3218 */ 3219 if (inlen == 0) { 3220 *outlenp = 0; 3221 return (0); 3222 } 3223 break; 3224 case SETFN_OPTCOM_NEGOTIATE: 3225 checkonly = B_FALSE; 3226 break; 3227 case SETFN_UD_NEGOTIATE: 3228 case SETFN_CONN_NEGOTIATE: 3229 checkonly = B_FALSE; 3230 /* 3231 * Negotiating local and "association-related" options 3232 * through T_UNITDATA_REQ. 3233 * 3234 * Following routine can filter out ones we do not 3235 * want to be "set" this way. 3236 */ 3237 if (!udp_opt_allow_udr_set(level, name)) { 3238 *outlenp = 0; 3239 return (EINVAL); 3240 } 3241 break; 3242 default: 3243 /* 3244 * We should never get here 3245 */ 3246 *outlenp = 0; 3247 return (EINVAL); 3248 } 3249 3250 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3251 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3252 3253 /* 3254 * For fixed length options, no sanity check 3255 * of passed in length is done. It is assumed *_optcom_req() 3256 * routines do the right thing. 3257 */ 3258 3259 switch (level) { 3260 case SOL_SOCKET: 3261 switch (name) { 3262 case SO_REUSEADDR: 3263 if (!checkonly) 3264 udp->udp_reuseaddr = onoff; 3265 break; 3266 case SO_DEBUG: 3267 if (!checkonly) 3268 udp->udp_debug = onoff; 3269 break; 3270 /* 3271 * The following three items are available here, 3272 * but are only meaningful to IP. 3273 */ 3274 case SO_DONTROUTE: 3275 if (!checkonly) 3276 udp->udp_dontroute = onoff; 3277 break; 3278 case SO_USELOOPBACK: 3279 if (!checkonly) 3280 udp->udp_useloopback = onoff; 3281 break; 3282 case SO_BROADCAST: 3283 if (!checkonly) 3284 udp->udp_broadcast = onoff; 3285 break; 3286 3287 case SO_SNDBUF: 3288 if (*i1 > udp_max_buf) { 3289 *outlenp = 0; 3290 return (ENOBUFS); 3291 } 3292 if (!checkonly) { 3293 q->q_hiwat = *i1; 3294 WR(UDP_RD(q))->q_hiwat = *i1; 3295 } 3296 break; 3297 case SO_RCVBUF: 3298 if (*i1 > udp_max_buf) { 3299 *outlenp = 0; 3300 return (ENOBUFS); 3301 } 3302 if (!checkonly) { 3303 RD(q)->q_hiwat = *i1; 3304 UDP_RD(q)->q_hiwat = *i1; 3305 (void) mi_set_sth_hiwat(UDP_RD(q), 3306 udp_set_rcv_hiwat(udp, *i1)); 3307 } 3308 break; 3309 case SO_DGRAM_ERRIND: 3310 if (!checkonly) 3311 udp->udp_dgram_errind = onoff; 3312 break; 3313 case SO_RECVUCRED: 3314 if (!checkonly) 3315 udp->udp_recvucred = onoff; 3316 break; 3317 default: 3318 *outlenp = 0; 3319 return (EINVAL); 3320 } 3321 break; 3322 case IPPROTO_IP: 3323 if (udp->udp_family != AF_INET) { 3324 *outlenp = 0; 3325 return (ENOPROTOOPT); 3326 } 3327 switch (name) { 3328 case IP_OPTIONS: 3329 case T_IP_OPTIONS: 3330 /* Save options for use by IP. */ 3331 if (inlen & 0x3) { 3332 *outlenp = 0; 3333 return (EINVAL); 3334 } 3335 if (checkonly) 3336 break; 3337 3338 if (udp->udp_ip_snd_options) { 3339 mi_free((char *)udp->udp_ip_snd_options); 3340 udp->udp_ip_snd_options_len = 0; 3341 udp->udp_ip_snd_options = NULL; 3342 } 3343 if (inlen) { 3344 udp->udp_ip_snd_options = 3345 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3346 if (udp->udp_ip_snd_options) { 3347 bcopy(invalp, udp->udp_ip_snd_options, 3348 inlen); 3349 udp->udp_ip_snd_options_len = inlen; 3350 } 3351 } 3352 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3353 UDPH_SIZE + udp->udp_ip_snd_options_len; 3354 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3355 udp_wroff_extra); 3356 break; 3357 case IP_TTL: 3358 if (!checkonly) { 3359 udp->udp_ttl = (uchar_t)*i1; 3360 } 3361 break; 3362 case IP_TOS: 3363 case T_IP_TOS: 3364 if (!checkonly) { 3365 udp->udp_type_of_service = (uchar_t)*i1; 3366 } 3367 break; 3368 case IP_MULTICAST_IF: { 3369 /* 3370 * TODO should check OPTMGMT reply and undo this if 3371 * there is an error. 3372 */ 3373 struct in_addr *inap = (struct in_addr *)invalp; 3374 if (!checkonly) { 3375 udp->udp_multicast_if_addr = 3376 inap->s_addr; 3377 } 3378 break; 3379 } 3380 case IP_MULTICAST_TTL: 3381 if (!checkonly) 3382 udp->udp_multicast_ttl = *invalp; 3383 break; 3384 case IP_MULTICAST_LOOP: 3385 if (!checkonly) 3386 connp->conn_multicast_loop = *invalp; 3387 break; 3388 case IP_RECVOPTS: 3389 if (!checkonly) 3390 udp->udp_recvopts = onoff; 3391 break; 3392 case IP_RECVDSTADDR: 3393 if (!checkonly) 3394 udp->udp_recvdstaddr = onoff; 3395 break; 3396 case IP_RECVIF: 3397 if (!checkonly) 3398 udp->udp_recvif = onoff; 3399 break; 3400 case IP_RECVSLLA: 3401 if (!checkonly) 3402 udp->udp_recvslla = onoff; 3403 break; 3404 case IP_RECVTTL: 3405 if (!checkonly) 3406 udp->udp_recvttl = onoff; 3407 break; 3408 case IP_ADD_MEMBERSHIP: 3409 case IP_DROP_MEMBERSHIP: 3410 case IP_BLOCK_SOURCE: 3411 case IP_UNBLOCK_SOURCE: 3412 case IP_ADD_SOURCE_MEMBERSHIP: 3413 case IP_DROP_SOURCE_MEMBERSHIP: 3414 case MCAST_JOIN_GROUP: 3415 case MCAST_LEAVE_GROUP: 3416 case MCAST_BLOCK_SOURCE: 3417 case MCAST_UNBLOCK_SOURCE: 3418 case MCAST_JOIN_SOURCE_GROUP: 3419 case MCAST_LEAVE_SOURCE_GROUP: 3420 case IP_SEC_OPT: 3421 /* 3422 * "soft" error (negative) 3423 * option not handled at this level 3424 * Do not modify *outlenp. 3425 */ 3426 return (-EINVAL); 3427 case IP_BOUND_IF: 3428 if (!checkonly) 3429 udp->udp_bound_if = *i1; 3430 break; 3431 case IP_UNSPEC_SRC: 3432 if (!checkonly) 3433 udp->udp_unspec_source = onoff; 3434 break; 3435 case IP_XMIT_IF: 3436 if (!checkonly) 3437 udp->udp_xmit_if = *i1; 3438 break; 3439 default: 3440 *outlenp = 0; 3441 return (EINVAL); 3442 } 3443 break; 3444 case IPPROTO_IPV6: { 3445 ip6_pkt_t *ipp; 3446 boolean_t sticky; 3447 3448 if (udp->udp_family != AF_INET6) { 3449 *outlenp = 0; 3450 return (ENOPROTOOPT); 3451 } 3452 /* 3453 * Deal with both sticky options and ancillary data 3454 */ 3455 if (thisdg_attrs == NULL) { 3456 /* sticky options, or none */ 3457 ipp = &udp->udp_sticky_ipp; 3458 sticky = B_TRUE; 3459 } else { 3460 /* ancillary data */ 3461 ipp = (ip6_pkt_t *)thisdg_attrs; 3462 sticky = B_FALSE; 3463 } 3464 3465 switch (name) { 3466 case IPV6_MULTICAST_IF: 3467 if (!checkonly) 3468 udp->udp_multicast_if_index = *i1; 3469 break; 3470 case IPV6_UNICAST_HOPS: 3471 /* -1 means use default */ 3472 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3473 *outlenp = 0; 3474 return (EINVAL); 3475 } 3476 if (!checkonly) { 3477 if (*i1 == -1) { 3478 udp->udp_ttl = ipp->ipp_unicast_hops = 3479 udp_ipv6_hoplimit; 3480 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3481 /* Pass modified value to IP. */ 3482 *i1 = udp->udp_ttl; 3483 } else { 3484 udp->udp_ttl = ipp->ipp_unicast_hops = 3485 (uint8_t)*i1; 3486 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3487 } 3488 /* Rebuild the header template */ 3489 error = udp_build_hdrs(q, udp); 3490 if (error != 0) { 3491 *outlenp = 0; 3492 return (error); 3493 } 3494 } 3495 break; 3496 case IPV6_MULTICAST_HOPS: 3497 /* -1 means use default */ 3498 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3499 *outlenp = 0; 3500 return (EINVAL); 3501 } 3502 if (!checkonly) { 3503 if (*i1 == -1) { 3504 udp->udp_multicast_ttl = 3505 ipp->ipp_multicast_hops = 3506 IP_DEFAULT_MULTICAST_TTL; 3507 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3508 /* Pass modified value to IP. */ 3509 *i1 = udp->udp_multicast_ttl; 3510 } else { 3511 udp->udp_multicast_ttl = 3512 ipp->ipp_multicast_hops = 3513 (uint8_t)*i1; 3514 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3515 } 3516 } 3517 break; 3518 case IPV6_MULTICAST_LOOP: 3519 if (*i1 != 0 && *i1 != 1) { 3520 *outlenp = 0; 3521 return (EINVAL); 3522 } 3523 if (!checkonly) 3524 connp->conn_multicast_loop = *i1; 3525 break; 3526 case IPV6_JOIN_GROUP: 3527 case IPV6_LEAVE_GROUP: 3528 case MCAST_JOIN_GROUP: 3529 case MCAST_LEAVE_GROUP: 3530 case MCAST_BLOCK_SOURCE: 3531 case MCAST_UNBLOCK_SOURCE: 3532 case MCAST_JOIN_SOURCE_GROUP: 3533 case MCAST_LEAVE_SOURCE_GROUP: 3534 /* 3535 * "soft" error (negative) 3536 * option not handled at this level 3537 * Note: Do not modify *outlenp 3538 */ 3539 return (-EINVAL); 3540 case IPV6_BOUND_IF: 3541 if (!checkonly) 3542 udp->udp_bound_if = *i1; 3543 break; 3544 case IPV6_UNSPEC_SRC: 3545 if (!checkonly) 3546 udp->udp_unspec_source = onoff; 3547 break; 3548 /* 3549 * Set boolean switches for ancillary data delivery 3550 */ 3551 case IPV6_RECVPKTINFO: 3552 if (!checkonly) 3553 udp->udp_ipv6_recvpktinfo = onoff; 3554 break; 3555 case IPV6_RECVTCLASS: 3556 if (!checkonly) { 3557 udp->udp_ipv6_recvtclass = onoff; 3558 } 3559 break; 3560 case IPV6_RECVPATHMTU: 3561 if (!checkonly) { 3562 udp->udp_ipv6_recvpathmtu = onoff; 3563 } 3564 break; 3565 case IPV6_RECVHOPLIMIT: 3566 if (!checkonly) 3567 udp->udp_ipv6_recvhoplimit = onoff; 3568 break; 3569 case IPV6_RECVHOPOPTS: 3570 if (!checkonly) 3571 udp->udp_ipv6_recvhopopts = onoff; 3572 break; 3573 case IPV6_RECVDSTOPTS: 3574 if (!checkonly) 3575 udp->udp_ipv6_recvdstopts = onoff; 3576 break; 3577 case _OLD_IPV6_RECVDSTOPTS: 3578 if (!checkonly) 3579 udp->udp_old_ipv6_recvdstopts = onoff; 3580 break; 3581 case IPV6_RECVRTHDRDSTOPTS: 3582 if (!checkonly) 3583 udp->udp_ipv6_recvrthdrdstopts = onoff; 3584 break; 3585 case IPV6_RECVRTHDR: 3586 if (!checkonly) 3587 udp->udp_ipv6_recvrthdr = onoff; 3588 break; 3589 /* 3590 * Set sticky options or ancillary data. 3591 * If sticky options, (re)build any extension headers 3592 * that might be needed as a result. 3593 */ 3594 case IPV6_PKTINFO: 3595 /* 3596 * The source address and ifindex are verified 3597 * in ip_opt_set(). For ancillary data the 3598 * source address is checked in ip_wput_v6. 3599 */ 3600 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3601 return (EINVAL); 3602 if (checkonly) 3603 break; 3604 3605 if (inlen == 0) { 3606 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3607 ipp->ipp_sticky_ignored |= 3608 (IPPF_IFINDEX|IPPF_ADDR); 3609 } else { 3610 struct in6_pktinfo *pkti; 3611 3612 pkti = (struct in6_pktinfo *)invalp; 3613 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3614 ipp->ipp_addr = pkti->ipi6_addr; 3615 if (ipp->ipp_ifindex != 0) 3616 ipp->ipp_fields |= IPPF_IFINDEX; 3617 else 3618 ipp->ipp_fields &= ~IPPF_IFINDEX; 3619 if (!IN6_IS_ADDR_UNSPECIFIED( 3620 &ipp->ipp_addr)) 3621 ipp->ipp_fields |= IPPF_ADDR; 3622 else 3623 ipp->ipp_fields &= ~IPPF_ADDR; 3624 } 3625 if (sticky) { 3626 error = udp_build_hdrs(q, udp); 3627 if (error != 0) 3628 return (error); 3629 } 3630 break; 3631 case IPV6_HOPLIMIT: 3632 if (sticky) 3633 return (EINVAL); 3634 if (inlen != 0 && inlen != sizeof (int)) 3635 return (EINVAL); 3636 if (checkonly) 3637 break; 3638 3639 if (inlen == 0) { 3640 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3641 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3642 } else { 3643 if (*i1 > 255 || *i1 < -1) 3644 return (EINVAL); 3645 if (*i1 == -1) 3646 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3647 else 3648 ipp->ipp_hoplimit = *i1; 3649 ipp->ipp_fields |= IPPF_HOPLIMIT; 3650 } 3651 break; 3652 case IPV6_TCLASS: 3653 if (inlen != 0 && inlen != sizeof (int)) 3654 return (EINVAL); 3655 if (checkonly) 3656 break; 3657 3658 if (inlen == 0) { 3659 ipp->ipp_fields &= ~IPPF_TCLASS; 3660 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3661 } else { 3662 if (*i1 > 255 || *i1 < -1) 3663 return (EINVAL); 3664 if (*i1 == -1) 3665 ipp->ipp_tclass = 0; 3666 else 3667 ipp->ipp_tclass = *i1; 3668 ipp->ipp_fields |= IPPF_TCLASS; 3669 } 3670 if (sticky) { 3671 error = udp_build_hdrs(q, udp); 3672 if (error != 0) 3673 return (error); 3674 } 3675 break; 3676 case IPV6_NEXTHOP: 3677 /* 3678 * IP will verify that the nexthop is reachable 3679 * and fail for sticky options. 3680 */ 3681 if (inlen != 0 && inlen != sizeof (sin6_t)) 3682 return (EINVAL); 3683 if (checkonly) 3684 break; 3685 3686 if (inlen == 0) { 3687 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3688 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3689 } else { 3690 sin6_t *sin6 = (sin6_t *)invalp; 3691 3692 if (sin6->sin6_family != AF_INET6) 3693 return (EAFNOSUPPORT); 3694 if (IN6_IS_ADDR_V4MAPPED( 3695 &sin6->sin6_addr)) 3696 return (EADDRNOTAVAIL); 3697 ipp->ipp_nexthop = sin6->sin6_addr; 3698 if (!IN6_IS_ADDR_UNSPECIFIED( 3699 &ipp->ipp_nexthop)) 3700 ipp->ipp_fields |= IPPF_NEXTHOP; 3701 else 3702 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3703 } 3704 if (sticky) { 3705 error = udp_build_hdrs(q, udp); 3706 if (error != 0) 3707 return (error); 3708 } 3709 break; 3710 case IPV6_HOPOPTS: { 3711 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3712 /* 3713 * Sanity checks - minimum size, size a multiple of 3714 * eight bytes, and matching size passed in. 3715 */ 3716 if (inlen != 0 && 3717 inlen != (8 * (hopts->ip6h_len + 1))) 3718 return (EINVAL); 3719 3720 if (checkonly) 3721 break; 3722 3723 if (inlen == 0) { 3724 if (sticky && 3725 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3726 kmem_free(ipp->ipp_hopopts, 3727 ipp->ipp_hopoptslen); 3728 ipp->ipp_hopopts = NULL; 3729 ipp->ipp_hopoptslen = 0; 3730 } 3731 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3732 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3733 } else { 3734 error = udp_pkt_set(invalp, inlen, sticky, 3735 (uchar_t **)&ipp->ipp_hopopts, 3736 &ipp->ipp_hopoptslen); 3737 if (error != 0) 3738 return (error); 3739 ipp->ipp_fields |= IPPF_HOPOPTS; 3740 } 3741 if (sticky) { 3742 error = udp_build_hdrs(q, udp); 3743 if (error != 0) 3744 return (error); 3745 } 3746 break; 3747 } 3748 case IPV6_RTHDRDSTOPTS: { 3749 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3750 3751 /* 3752 * Sanity checks - minimum size, size a multiple of 3753 * eight bytes, and matching size passed in. 3754 */ 3755 if (inlen != 0 && 3756 inlen != (8 * (dopts->ip6d_len + 1))) 3757 return (EINVAL); 3758 3759 if (checkonly) 3760 break; 3761 3762 if (inlen == 0) { 3763 if (sticky && 3764 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3765 kmem_free(ipp->ipp_rtdstopts, 3766 ipp->ipp_rtdstoptslen); 3767 ipp->ipp_rtdstopts = NULL; 3768 ipp->ipp_rtdstoptslen = 0; 3769 } 3770 3771 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3772 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3773 } else { 3774 error = udp_pkt_set(invalp, inlen, sticky, 3775 (uchar_t **)&ipp->ipp_rtdstopts, 3776 &ipp->ipp_rtdstoptslen); 3777 if (error != 0) 3778 return (error); 3779 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3780 } 3781 if (sticky) { 3782 error = udp_build_hdrs(q, udp); 3783 if (error != 0) 3784 return (error); 3785 } 3786 break; 3787 } 3788 case IPV6_DSTOPTS: { 3789 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3790 3791 /* 3792 * Sanity checks - minimum size, size a multiple of 3793 * eight bytes, and matching size passed in. 3794 */ 3795 if (inlen != 0 && 3796 inlen != (8 * (dopts->ip6d_len + 1))) 3797 return (EINVAL); 3798 3799 if (checkonly) 3800 break; 3801 3802 if (inlen == 0) { 3803 if (sticky && 3804 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3805 kmem_free(ipp->ipp_dstopts, 3806 ipp->ipp_dstoptslen); 3807 ipp->ipp_dstopts = NULL; 3808 ipp->ipp_dstoptslen = 0; 3809 } 3810 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3811 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3812 } else { 3813 error = udp_pkt_set(invalp, inlen, sticky, 3814 (uchar_t **)&ipp->ipp_dstopts, 3815 &ipp->ipp_dstoptslen); 3816 if (error != 0) 3817 return (error); 3818 ipp->ipp_fields |= IPPF_DSTOPTS; 3819 } 3820 if (sticky) { 3821 error = udp_build_hdrs(q, udp); 3822 if (error != 0) 3823 return (error); 3824 } 3825 break; 3826 } 3827 case IPV6_RTHDR: { 3828 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3829 3830 /* 3831 * Sanity checks - minimum size, size a multiple of 3832 * eight bytes, and matching size passed in. 3833 */ 3834 if (inlen != 0 && 3835 inlen != (8 * (rt->ip6r_len + 1))) 3836 return (EINVAL); 3837 3838 if (checkonly) 3839 break; 3840 3841 if (inlen == 0) { 3842 if (sticky && 3843 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3844 kmem_free(ipp->ipp_rthdr, 3845 ipp->ipp_rthdrlen); 3846 ipp->ipp_rthdr = NULL; 3847 ipp->ipp_rthdrlen = 0; 3848 } 3849 ipp->ipp_fields &= ~IPPF_RTHDR; 3850 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3851 } else { 3852 error = udp_pkt_set(invalp, inlen, sticky, 3853 (uchar_t **)&ipp->ipp_rthdr, 3854 &ipp->ipp_rthdrlen); 3855 if (error != 0) 3856 return (error); 3857 ipp->ipp_fields |= IPPF_RTHDR; 3858 } 3859 if (sticky) { 3860 error = udp_build_hdrs(q, udp); 3861 if (error != 0) 3862 return (error); 3863 } 3864 break; 3865 } 3866 3867 case IPV6_DONTFRAG: 3868 if (checkonly) 3869 break; 3870 3871 if (onoff) { 3872 ipp->ipp_fields |= IPPF_DONTFRAG; 3873 } else { 3874 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3875 } 3876 break; 3877 3878 case IPV6_USE_MIN_MTU: 3879 if (inlen != sizeof (int)) 3880 return (EINVAL); 3881 3882 if (*i1 < -1 || *i1 > 1) 3883 return (EINVAL); 3884 3885 if (checkonly) 3886 break; 3887 3888 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3889 ipp->ipp_use_min_mtu = *i1; 3890 break; 3891 3892 case IPV6_BOUND_PIF: 3893 case IPV6_SEC_OPT: 3894 case IPV6_DONTFAILOVER_IF: 3895 case IPV6_SRC_PREFERENCES: 3896 case IPV6_V6ONLY: 3897 /* Handled at the IP level */ 3898 return (-EINVAL); 3899 default: 3900 *outlenp = 0; 3901 return (EINVAL); 3902 } 3903 break; 3904 } /* end IPPROTO_IPV6 */ 3905 case IPPROTO_UDP: 3906 switch (name) { 3907 case UDP_ANONPRIVBIND: 3908 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3909 *outlenp = 0; 3910 return (error); 3911 } 3912 if (!checkonly) { 3913 udp->udp_anon_priv_bind = onoff; 3914 } 3915 break; 3916 case UDP_EXCLBIND: 3917 if (!checkonly) 3918 udp->udp_exclbind = onoff; 3919 break; 3920 case UDP_RCVHDR: 3921 if (!checkonly) 3922 udp->udp_rcvhdr = onoff; 3923 break; 3924 default: 3925 *outlenp = 0; 3926 return (EINVAL); 3927 } 3928 break; 3929 default: 3930 *outlenp = 0; 3931 return (EINVAL); 3932 } 3933 /* 3934 * Common case of OK return with outval same as inval. 3935 */ 3936 if (invalp != outvalp) { 3937 /* don't trust bcopy for identical src/dst */ 3938 (void) bcopy(invalp, outvalp, inlen); 3939 } 3940 *outlenp = inlen; 3941 return (0); 3942 } 3943 3944 /* 3945 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3946 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3947 * headers, and the udp header. 3948 * Returns failure if can't allocate memory. 3949 */ 3950 static int 3951 udp_build_hdrs(queue_t *q, udp_t *udp) 3952 { 3953 uchar_t *hdrs; 3954 uint_t hdrs_len; 3955 ip6_t *ip6h; 3956 ip6i_t *ip6i; 3957 udpha_t *udpha; 3958 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3959 3960 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3961 ASSERT(hdrs_len != 0); 3962 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3963 /* Need to reallocate */ 3964 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3965 if (hdrs == NULL) 3966 return (ENOMEM); 3967 3968 if (udp->udp_sticky_hdrs_len != 0) { 3969 kmem_free(udp->udp_sticky_hdrs, 3970 udp->udp_sticky_hdrs_len); 3971 } 3972 udp->udp_sticky_hdrs = hdrs; 3973 udp->udp_sticky_hdrs_len = hdrs_len; 3974 } 3975 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3976 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3977 3978 /* Set header fields not in ipp */ 3979 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3980 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3981 ip6h = (ip6_t *)&ip6i[1]; 3982 } else { 3983 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3984 } 3985 3986 if (!(ipp->ipp_fields & IPPF_ADDR)) 3987 ip6h->ip6_src = udp->udp_v6src; 3988 3989 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3990 udpha->uha_src_port = udp->udp_port; 3991 3992 /* Try to get everything in a single mblk */ 3993 if (hdrs_len > udp->udp_max_hdr_len) { 3994 udp->udp_max_hdr_len = hdrs_len; 3995 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3996 udp_wroff_extra); 3997 } 3998 return (0); 3999 } 4000 4001 /* 4002 * Set optbuf and optlen for the option. 4003 * If sticky is set allocate memory (if not already present). 4004 * Otherwise just point optbuf and optlen at invalp and inlen. 4005 * Returns failure if memory can not be allocated. 4006 */ 4007 static int 4008 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4009 uchar_t **optbufp, uint_t *optlenp) 4010 { 4011 uchar_t *optbuf; 4012 4013 if (!sticky) { 4014 *optbufp = invalp; 4015 *optlenp = inlen; 4016 return (0); 4017 } 4018 if (inlen == *optlenp) { 4019 /* Unchanged length - no need to realocate */ 4020 bcopy(invalp, *optbufp, inlen); 4021 return (0); 4022 } 4023 if (inlen != 0) { 4024 /* Allocate new buffer before free */ 4025 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4026 if (optbuf == NULL) 4027 return (ENOMEM); 4028 } else { 4029 optbuf = NULL; 4030 } 4031 /* Free old buffer */ 4032 if (*optlenp != 0) 4033 kmem_free(*optbufp, *optlenp); 4034 4035 bcopy(invalp, optbuf, inlen); 4036 *optbufp = optbuf; 4037 *optlenp = inlen; 4038 return (0); 4039 } 4040 4041 /* 4042 * This routine retrieves the value of an ND variable in a udpparam_t 4043 * structure. It is called through nd_getset when a user reads the 4044 * variable. 4045 */ 4046 /* ARGSUSED */ 4047 static int 4048 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4049 { 4050 udpparam_t *udppa = (udpparam_t *)cp; 4051 4052 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4053 return (0); 4054 } 4055 4056 /* 4057 * Walk through the param array specified registering each element with the 4058 * named dispatch (ND) handler. 4059 */ 4060 static boolean_t 4061 udp_param_register(udpparam_t *udppa, int cnt) 4062 { 4063 for (; cnt-- > 0; udppa++) { 4064 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4065 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4066 udp_param_get, udp_param_set, 4067 (caddr_t)udppa)) { 4068 nd_free(&udp_g_nd); 4069 return (B_FALSE); 4070 } 4071 } 4072 } 4073 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4074 udp_extra_priv_ports_get, NULL, NULL)) { 4075 nd_free(&udp_g_nd); 4076 return (B_FALSE); 4077 } 4078 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4079 NULL, udp_extra_priv_ports_add, NULL)) { 4080 nd_free(&udp_g_nd); 4081 return (B_FALSE); 4082 } 4083 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4084 NULL, udp_extra_priv_ports_del, NULL)) { 4085 nd_free(&udp_g_nd); 4086 return (B_FALSE); 4087 } 4088 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4089 NULL)) { 4090 nd_free(&udp_g_nd); 4091 return (B_FALSE); 4092 } 4093 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4094 NULL)) { 4095 nd_free(&udp_g_nd); 4096 return (B_FALSE); 4097 } 4098 return (B_TRUE); 4099 } 4100 4101 /* This routine sets an ND variable in a udpparam_t structure. */ 4102 /* ARGSUSED */ 4103 static int 4104 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4105 { 4106 long new_value; 4107 udpparam_t *udppa = (udpparam_t *)cp; 4108 4109 /* 4110 * Fail the request if the new value does not lie within the 4111 * required bounds. 4112 */ 4113 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4114 new_value < udppa->udp_param_min || 4115 new_value > udppa->udp_param_max) { 4116 return (EINVAL); 4117 } 4118 4119 /* Set the new value */ 4120 udppa->udp_param_value = new_value; 4121 return (0); 4122 } 4123 4124 static void 4125 udp_input(conn_t *connp, mblk_t *mp) 4126 { 4127 struct T_unitdata_ind *tudi; 4128 uchar_t *rptr; /* Pointer to IP header */ 4129 int hdr_length; /* Length of IP+UDP headers */ 4130 int udi_size; /* Size of T_unitdata_ind */ 4131 int mp_len; 4132 udp_t *udp; 4133 udpha_t *udpha; 4134 int ipversion; 4135 ip6_pkt_t ipp; 4136 ip6_t *ip6h; 4137 ip6i_t *ip6i; 4138 mblk_t *mp1; 4139 mblk_t *options_mp = NULL; 4140 in_pktinfo_t *pinfo = NULL; 4141 cred_t *cr = NULL; 4142 queue_t *q = connp->conn_rq; 4143 pid_t cpid; 4144 4145 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4146 "udp_rput_start: q %p mp %p", q, mp); 4147 4148 udp = connp->conn_udp; 4149 rptr = mp->b_rptr; 4150 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4151 ASSERT(OK_32PTR(rptr)); 4152 4153 /* 4154 * IP should have prepended the options data in an M_CTL 4155 * Check M_CTL "type" to make sure are not here bcos of 4156 * a valid ICMP message 4157 */ 4158 if (DB_TYPE(mp) == M_CTL) { 4159 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4160 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4161 IN_PKTINFO) { 4162 /* 4163 * IP_RECVIF or IP_RECVSLLA information has been 4164 * appended to the packet by IP. We need to 4165 * extract the mblk and adjust the rptr 4166 */ 4167 pinfo = (in_pktinfo_t *)mp->b_rptr; 4168 options_mp = mp; 4169 mp = mp->b_cont; 4170 rptr = mp->b_rptr; 4171 UDP_STAT(udp_in_pktinfo); 4172 } else { 4173 /* 4174 * ICMP messages. 4175 */ 4176 udp_icmp_error(q, mp); 4177 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4178 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4179 return; 4180 } 4181 } 4182 4183 mp_len = msgdsize(mp); 4184 /* 4185 * This is the inbound data path. 4186 * First, we check to make sure the IP version number is correct, 4187 * and then pull the IP and UDP headers into the first mblk. 4188 * Assume IP provides aligned packets - otherwise toss. 4189 * Also, check if we have a complete IP header. 4190 */ 4191 4192 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4193 ipp.ipp_fields = 0; 4194 4195 ipversion = IPH_HDR_VERSION(rptr); 4196 switch (ipversion) { 4197 case IPV4_VERSION: 4198 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4199 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4200 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4201 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4202 (udp->udp_ip_rcv_options_len)) { 4203 /* 4204 * Handle IPv4 packets with options outside of the 4205 * main data path. Not needed for AF_INET6 sockets 4206 * since they don't support a getsockopt of IP_OPTIONS. 4207 */ 4208 if (udp->udp_family == AF_INET6) 4209 break; 4210 /* 4211 * UDP length check performed for IPv4 packets with 4212 * options to check whether UDP length specified in 4213 * the header is the same as the physical length of 4214 * the packet. 4215 */ 4216 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4217 if (mp_len != (ntohs(udpha->uha_length) + 4218 hdr_length - UDPH_SIZE)) { 4219 goto tossit; 4220 } 4221 /* 4222 * Handle the case where the packet has IP options 4223 * and the IP_RECVSLLA & IP_RECVIF are set 4224 */ 4225 if (pinfo != NULL) 4226 mp = options_mp; 4227 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4228 SQTAG_UDP_INPUT); 4229 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4230 "udp_rput_end: q %p (%S)", q, "end"); 4231 return; 4232 } 4233 4234 /* Handle IPV6_RECVHOPLIMIT. */ 4235 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4236 udp->udp_ipv6_recvpktinfo) { 4237 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4238 ipp.ipp_fields |= IPPF_IFINDEX; 4239 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4240 } 4241 } 4242 break; 4243 case IPV6_VERSION: 4244 /* 4245 * IPv6 packets can only be received by applications 4246 * that are prepared to receive IPv6 addresses. 4247 * The IP fanout must ensure this. 4248 */ 4249 ASSERT(udp->udp_family == AF_INET6); 4250 4251 ip6h = (ip6_t *)rptr; 4252 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4253 4254 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4255 uint8_t nexthdrp; 4256 /* Look for ifindex information */ 4257 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4258 ip6i = (ip6i_t *)ip6h; 4259 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4260 goto tossit; 4261 4262 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4263 ASSERT(ip6i->ip6i_ifindex != 0); 4264 ipp.ipp_fields |= IPPF_IFINDEX; 4265 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4266 } 4267 rptr = (uchar_t *)&ip6i[1]; 4268 mp->b_rptr = rptr; 4269 if (rptr == mp->b_wptr) { 4270 mp1 = mp->b_cont; 4271 freeb(mp); 4272 mp = mp1; 4273 rptr = mp->b_rptr; 4274 } 4275 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4276 goto tossit; 4277 ip6h = (ip6_t *)rptr; 4278 mp_len = msgdsize(mp); 4279 } 4280 /* 4281 * Find any potentially interesting extension headers 4282 * as well as the length of the IPv6 + extension 4283 * headers. 4284 */ 4285 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4286 UDPH_SIZE; 4287 ASSERT(nexthdrp == IPPROTO_UDP); 4288 } else { 4289 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4290 ip6i = NULL; 4291 } 4292 break; 4293 default: 4294 ASSERT(0); 4295 } 4296 4297 /* 4298 * IP inspected the UDP header thus all of it must be in the mblk. 4299 * UDP length check is performed for IPv6 packets and IPv4 packets 4300 * without options to check if the size of the packet as specified 4301 * by the header is the same as the physical size of the packet. 4302 */ 4303 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4304 if ((MBLKL(mp) < hdr_length) || 4305 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4306 goto tossit; 4307 } 4308 4309 /* Walk past the headers. */ 4310 if (!udp->udp_rcvhdr) { 4311 mp->b_rptr = rptr + hdr_length; 4312 mp_len -= hdr_length; 4313 } 4314 4315 /* 4316 * This is the inbound data path. Packets are passed upstream as 4317 * T_UNITDATA_IND messages with full IP headers still attached. 4318 */ 4319 if (udp->udp_family == AF_INET) { 4320 sin_t *sin; 4321 4322 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4323 4324 /* 4325 * Normally only send up the address. 4326 * If IP_RECVDSTADDR is set we include the destination IP 4327 * address as an option. With IP_RECVOPTS we include all 4328 * the IP options. Only ip_rput_other() handles packets 4329 * that contain IP options. 4330 */ 4331 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4332 if (udp->udp_recvdstaddr) { 4333 udi_size += sizeof (struct T_opthdr) + 4334 sizeof (struct in_addr); 4335 UDP_STAT(udp_in_recvdstaddr); 4336 } 4337 4338 /* 4339 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4340 * space accordingly 4341 */ 4342 if (udp->udp_recvif && (pinfo != NULL) && 4343 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4344 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4345 UDP_STAT(udp_in_recvif); 4346 } 4347 4348 if (udp->udp_recvslla && (pinfo != NULL) && 4349 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4350 udi_size += sizeof (struct T_opthdr) + 4351 sizeof (struct sockaddr_dl); 4352 UDP_STAT(udp_in_recvslla); 4353 } 4354 4355 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4356 udi_size += sizeof (struct T_opthdr) + ucredsize; 4357 cpid = DB_CPID(mp); 4358 UDP_STAT(udp_in_recvucred); 4359 } 4360 /* 4361 * If IP_RECVTTL is set allocate the appropriate sized buffer 4362 */ 4363 if (udp->udp_recvttl) { 4364 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4365 UDP_STAT(udp_in_recvttl); 4366 } 4367 4368 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4369 4370 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4371 mp1 = allocb(udi_size, BPRI_MED); 4372 if (mp1 == NULL) { 4373 freemsg(mp); 4374 if (options_mp != NULL) 4375 freeb(options_mp); 4376 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4377 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4378 BUMP_MIB(&udp_mib, udpInErrors); 4379 return; 4380 } 4381 mp1->b_cont = mp; 4382 mp = mp1; 4383 mp->b_datap->db_type = M_PROTO; 4384 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4385 mp->b_wptr = (uchar_t *)tudi + udi_size; 4386 tudi->PRIM_type = T_UNITDATA_IND; 4387 tudi->SRC_length = sizeof (sin_t); 4388 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4389 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4390 sizeof (sin_t); 4391 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4392 tudi->OPT_length = udi_size; 4393 sin = (sin_t *)&tudi[1]; 4394 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4395 sin->sin_port = udpha->uha_src_port; 4396 sin->sin_family = udp->udp_family; 4397 *(uint32_t *)&sin->sin_zero[0] = 0; 4398 *(uint32_t *)&sin->sin_zero[4] = 0; 4399 4400 /* 4401 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4402 * IP_RECVTTL has been set. 4403 */ 4404 if (udi_size != 0) { 4405 /* 4406 * Copy in destination address before options to avoid 4407 * any padding issues. 4408 */ 4409 char *dstopt; 4410 4411 dstopt = (char *)&sin[1]; 4412 if (udp->udp_recvdstaddr) { 4413 struct T_opthdr *toh; 4414 ipaddr_t *dstptr; 4415 4416 toh = (struct T_opthdr *)dstopt; 4417 toh->level = IPPROTO_IP; 4418 toh->name = IP_RECVDSTADDR; 4419 toh->len = sizeof (struct T_opthdr) + 4420 sizeof (ipaddr_t); 4421 toh->status = 0; 4422 dstopt += sizeof (struct T_opthdr); 4423 dstptr = (ipaddr_t *)dstopt; 4424 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4425 dstopt += sizeof (ipaddr_t); 4426 udi_size -= toh->len; 4427 } 4428 4429 if (udp->udp_recvslla && (pinfo != NULL) && 4430 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4431 4432 struct T_opthdr *toh; 4433 struct sockaddr_dl *dstptr; 4434 4435 toh = (struct T_opthdr *)dstopt; 4436 toh->level = IPPROTO_IP; 4437 toh->name = IP_RECVSLLA; 4438 toh->len = sizeof (struct T_opthdr) + 4439 sizeof (struct sockaddr_dl); 4440 toh->status = 0; 4441 dstopt += sizeof (struct T_opthdr); 4442 dstptr = (struct sockaddr_dl *)dstopt; 4443 bcopy(&pinfo->in_pkt_slla, dstptr, 4444 sizeof (struct sockaddr_dl)); 4445 dstopt += sizeof (struct sockaddr_dl); 4446 udi_size -= toh->len; 4447 } 4448 4449 if (udp->udp_recvif && (pinfo != NULL) && 4450 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4451 4452 struct T_opthdr *toh; 4453 uint_t *dstptr; 4454 4455 toh = (struct T_opthdr *)dstopt; 4456 toh->level = IPPROTO_IP; 4457 toh->name = IP_RECVIF; 4458 toh->len = sizeof (struct T_opthdr) + 4459 sizeof (uint_t); 4460 toh->status = 0; 4461 dstopt += sizeof (struct T_opthdr); 4462 dstptr = (uint_t *)dstopt; 4463 *dstptr = pinfo->in_pkt_ifindex; 4464 dstopt += sizeof (uint_t); 4465 udi_size -= toh->len; 4466 } 4467 4468 if (cr != NULL) { 4469 struct T_opthdr *toh; 4470 4471 toh = (struct T_opthdr *)dstopt; 4472 toh->level = SOL_SOCKET; 4473 toh->name = SCM_UCRED; 4474 toh->len = sizeof (struct T_opthdr) + ucredsize; 4475 toh->status = 0; 4476 (void) cred2ucred(cr, cpid, &toh[1]); 4477 dstopt += toh->len; 4478 udi_size -= toh->len; 4479 } 4480 4481 if (udp->udp_recvttl) { 4482 struct T_opthdr *toh; 4483 uint8_t *dstptr; 4484 4485 toh = (struct T_opthdr *)dstopt; 4486 toh->level = IPPROTO_IP; 4487 toh->name = IP_RECVTTL; 4488 toh->len = sizeof (struct T_opthdr) + 4489 sizeof (uint8_t); 4490 toh->status = 0; 4491 dstopt += sizeof (struct T_opthdr); 4492 dstptr = (uint8_t *)dstopt; 4493 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4494 dstopt += sizeof (uint8_t); 4495 udi_size -= toh->len; 4496 } 4497 4498 /* Consumed all of allocated space */ 4499 ASSERT(udi_size == 0); 4500 } 4501 } else { 4502 sin6_t *sin6; 4503 4504 /* 4505 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4506 * 4507 * Normally we only send up the address. If receiving of any 4508 * optional receive side information is enabled, we also send 4509 * that up as options. 4510 * [ Only udp_rput_other() handles packets that contain IP 4511 * options so code to account for does not appear immediately 4512 * below but elsewhere ] 4513 */ 4514 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4515 4516 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4517 IPPF_RTHDR|IPPF_IFINDEX)) { 4518 if (udp->udp_ipv6_recvhopopts && 4519 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4520 udi_size += sizeof (struct T_opthdr) + 4521 ipp.ipp_hopoptslen; 4522 UDP_STAT(udp_in_recvhopopts); 4523 } 4524 if ((udp->udp_ipv6_recvdstopts || 4525 udp->udp_old_ipv6_recvdstopts) && 4526 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4527 udi_size += sizeof (struct T_opthdr) + 4528 ipp.ipp_dstoptslen; 4529 UDP_STAT(udp_in_recvdstopts); 4530 } 4531 if (((udp->udp_ipv6_recvdstopts && 4532 udp->udp_ipv6_recvrthdr && 4533 (ipp.ipp_fields & IPPF_RTHDR)) || 4534 udp->udp_ipv6_recvrthdrdstopts) && 4535 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4536 udi_size += sizeof (struct T_opthdr) + 4537 ipp.ipp_rtdstoptslen; 4538 UDP_STAT(udp_in_recvrtdstopts); 4539 } 4540 if (udp->udp_ipv6_recvrthdr && 4541 (ipp.ipp_fields & IPPF_RTHDR)) { 4542 udi_size += sizeof (struct T_opthdr) + 4543 ipp.ipp_rthdrlen; 4544 UDP_STAT(udp_in_recvrthdr); 4545 } 4546 if (udp->udp_ipv6_recvpktinfo && 4547 (ipp.ipp_fields & IPPF_IFINDEX)) { 4548 udi_size += sizeof (struct T_opthdr) + 4549 sizeof (struct in6_pktinfo); 4550 UDP_STAT(udp_in_recvpktinfo); 4551 } 4552 4553 } 4554 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4555 udi_size += sizeof (struct T_opthdr) + ucredsize; 4556 cpid = DB_CPID(mp); 4557 UDP_STAT(udp_in_recvucred); 4558 } 4559 4560 if (udp->udp_ipv6_recvhoplimit) { 4561 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4562 UDP_STAT(udp_in_recvhoplimit); 4563 } 4564 4565 if (udp->udp_ipv6_recvtclass) { 4566 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4567 UDP_STAT(udp_in_recvtclass); 4568 } 4569 4570 mp1 = allocb(udi_size, BPRI_MED); 4571 if (mp1 == NULL) { 4572 freemsg(mp); 4573 if (options_mp != NULL) 4574 freeb(options_mp); 4575 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4576 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4577 BUMP_MIB(&udp_mib, udpInErrors); 4578 return; 4579 } 4580 mp1->b_cont = mp; 4581 mp = mp1; 4582 mp->b_datap->db_type = M_PROTO; 4583 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4584 mp->b_wptr = (uchar_t *)tudi + udi_size; 4585 tudi->PRIM_type = T_UNITDATA_IND; 4586 tudi->SRC_length = sizeof (sin6_t); 4587 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4588 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4589 sizeof (sin6_t); 4590 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4591 tudi->OPT_length = udi_size; 4592 sin6 = (sin6_t *)&tudi[1]; 4593 if (ipversion == IPV4_VERSION) { 4594 in6_addr_t v6dst; 4595 4596 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4597 &sin6->sin6_addr); 4598 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4599 &v6dst); 4600 sin6->sin6_flowinfo = 0; 4601 sin6->sin6_scope_id = 0; 4602 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4603 connp->conn_zoneid); 4604 } else { 4605 sin6->sin6_addr = ip6h->ip6_src; 4606 /* No sin6_flowinfo per API */ 4607 sin6->sin6_flowinfo = 0; 4608 /* For link-scope source pass up scope id */ 4609 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4610 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4611 sin6->sin6_scope_id = ipp.ipp_ifindex; 4612 else 4613 sin6->sin6_scope_id = 0; 4614 sin6->__sin6_src_id = ip_srcid_find_addr( 4615 &ip6h->ip6_dst, connp->conn_zoneid); 4616 } 4617 sin6->sin6_port = udpha->uha_src_port; 4618 sin6->sin6_family = udp->udp_family; 4619 4620 if (udi_size != 0) { 4621 uchar_t *dstopt; 4622 4623 dstopt = (uchar_t *)&sin6[1]; 4624 if (udp->udp_ipv6_recvpktinfo && 4625 (ipp.ipp_fields & IPPF_IFINDEX)) { 4626 struct T_opthdr *toh; 4627 struct in6_pktinfo *pkti; 4628 4629 toh = (struct T_opthdr *)dstopt; 4630 toh->level = IPPROTO_IPV6; 4631 toh->name = IPV6_PKTINFO; 4632 toh->len = sizeof (struct T_opthdr) + 4633 sizeof (*pkti); 4634 toh->status = 0; 4635 dstopt += sizeof (struct T_opthdr); 4636 pkti = (struct in6_pktinfo *)dstopt; 4637 if (ipversion == IPV6_VERSION) 4638 pkti->ipi6_addr = ip6h->ip6_dst; 4639 else 4640 IN6_IPADDR_TO_V4MAPPED( 4641 ((ipha_t *)rptr)->ipha_dst, 4642 &pkti->ipi6_addr); 4643 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4644 dstopt += sizeof (*pkti); 4645 udi_size -= toh->len; 4646 } 4647 if (udp->udp_ipv6_recvhoplimit) { 4648 struct T_opthdr *toh; 4649 4650 toh = (struct T_opthdr *)dstopt; 4651 toh->level = IPPROTO_IPV6; 4652 toh->name = IPV6_HOPLIMIT; 4653 toh->len = sizeof (struct T_opthdr) + 4654 sizeof (uint_t); 4655 toh->status = 0; 4656 dstopt += sizeof (struct T_opthdr); 4657 if (ipversion == IPV6_VERSION) 4658 *(uint_t *)dstopt = ip6h->ip6_hops; 4659 else 4660 *(uint_t *)dstopt = 4661 ((ipha_t *)rptr)->ipha_ttl; 4662 dstopt += sizeof (uint_t); 4663 udi_size -= toh->len; 4664 } 4665 if (udp->udp_ipv6_recvtclass) { 4666 struct T_opthdr *toh; 4667 4668 toh = (struct T_opthdr *)dstopt; 4669 toh->level = IPPROTO_IPV6; 4670 toh->name = IPV6_TCLASS; 4671 toh->len = sizeof (struct T_opthdr) + 4672 sizeof (uint_t); 4673 toh->status = 0; 4674 dstopt += sizeof (struct T_opthdr); 4675 if (ipversion == IPV6_VERSION) { 4676 *(uint_t *)dstopt = 4677 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4678 } else { 4679 ipha_t *ipha = (ipha_t *)rptr; 4680 *(uint_t *)dstopt = 4681 ipha->ipha_type_of_service; 4682 } 4683 dstopt += sizeof (uint_t); 4684 udi_size -= toh->len; 4685 } 4686 if (udp->udp_ipv6_recvhopopts && 4687 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4688 struct T_opthdr *toh; 4689 4690 toh = (struct T_opthdr *)dstopt; 4691 toh->level = IPPROTO_IPV6; 4692 toh->name = IPV6_HOPOPTS; 4693 toh->len = sizeof (struct T_opthdr) + 4694 ipp.ipp_hopoptslen; 4695 toh->status = 0; 4696 dstopt += sizeof (struct T_opthdr); 4697 bcopy(ipp.ipp_hopopts, dstopt, 4698 ipp.ipp_hopoptslen); 4699 dstopt += ipp.ipp_hopoptslen; 4700 udi_size -= toh->len; 4701 } 4702 if (udp->udp_ipv6_recvdstopts && 4703 udp->udp_ipv6_recvrthdr && 4704 (ipp.ipp_fields & IPPF_RTHDR) && 4705 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4706 struct T_opthdr *toh; 4707 4708 toh = (struct T_opthdr *)dstopt; 4709 toh->level = IPPROTO_IPV6; 4710 toh->name = IPV6_DSTOPTS; 4711 toh->len = sizeof (struct T_opthdr) + 4712 ipp.ipp_rtdstoptslen; 4713 toh->status = 0; 4714 dstopt += sizeof (struct T_opthdr); 4715 bcopy(ipp.ipp_rtdstopts, dstopt, 4716 ipp.ipp_rtdstoptslen); 4717 dstopt += ipp.ipp_rtdstoptslen; 4718 udi_size -= toh->len; 4719 } 4720 if (udp->udp_ipv6_recvrthdr && 4721 (ipp.ipp_fields & IPPF_RTHDR)) { 4722 struct T_opthdr *toh; 4723 4724 toh = (struct T_opthdr *)dstopt; 4725 toh->level = IPPROTO_IPV6; 4726 toh->name = IPV6_RTHDR; 4727 toh->len = sizeof (struct T_opthdr) + 4728 ipp.ipp_rthdrlen; 4729 toh->status = 0; 4730 dstopt += sizeof (struct T_opthdr); 4731 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4732 dstopt += ipp.ipp_rthdrlen; 4733 udi_size -= toh->len; 4734 } 4735 if (udp->udp_ipv6_recvdstopts && 4736 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4737 struct T_opthdr *toh; 4738 4739 toh = (struct T_opthdr *)dstopt; 4740 toh->level = IPPROTO_IPV6; 4741 toh->name = IPV6_DSTOPTS; 4742 toh->len = sizeof (struct T_opthdr) + 4743 ipp.ipp_dstoptslen; 4744 toh->status = 0; 4745 dstopt += sizeof (struct T_opthdr); 4746 bcopy(ipp.ipp_dstopts, dstopt, 4747 ipp.ipp_dstoptslen); 4748 dstopt += ipp.ipp_dstoptslen; 4749 udi_size -= toh->len; 4750 } 4751 4752 if (cr != NULL) { 4753 struct T_opthdr *toh; 4754 4755 toh = (struct T_opthdr *)dstopt; 4756 toh->level = SOL_SOCKET; 4757 toh->name = SCM_UCRED; 4758 toh->len = sizeof (struct T_opthdr) + ucredsize; 4759 toh->status = 0; 4760 (void) cred2ucred(cr, cpid, &toh[1]); 4761 dstopt += toh->len; 4762 udi_size -= toh->len; 4763 } 4764 /* Consumed all of allocated space */ 4765 ASSERT(udi_size == 0); 4766 } 4767 #undef sin6 4768 /* No IP_RECVDSTADDR for IPv6. */ 4769 } 4770 4771 BUMP_MIB(&udp_mib, udpInDatagrams); 4772 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4773 "udp_rput_end: q %p (%S)", q, "end"); 4774 if (options_mp != NULL) 4775 freeb(options_mp); 4776 4777 if (udp->udp_direct_sockfs) { 4778 /* 4779 * There is nothing above us except for the stream head; 4780 * use the read-side synchronous stream interface in 4781 * order to reduce the time spent in interrupt thread. 4782 */ 4783 ASSERT(udp->udp_issocket); 4784 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4785 } else { 4786 /* 4787 * Use regular STREAMS interface to pass data upstream 4788 * if this is not a socket endpoint, or if we have 4789 * switched over to the slow mode due to sockmod being 4790 * popped or a module being pushed on top of us. 4791 */ 4792 putnext(UDP_RD(q), mp); 4793 } 4794 return; 4795 4796 tossit: 4797 freemsg(mp); 4798 if (options_mp != NULL) 4799 freeb(options_mp); 4800 BUMP_MIB(&udp_mib, udpInErrors); 4801 } 4802 4803 void 4804 udp_conn_recv(conn_t *connp, mblk_t *mp) 4805 { 4806 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4807 } 4808 4809 /* ARGSUSED */ 4810 static void 4811 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4812 { 4813 udp_input((conn_t *)arg, mp); 4814 _UDP_EXIT((conn_t *)arg); 4815 } 4816 4817 /* 4818 * Process non-M_DATA messages as well as M_DATA messages that requires 4819 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4820 */ 4821 static void 4822 udp_rput_other(queue_t *q, mblk_t *mp) 4823 { 4824 struct T_unitdata_ind *tudi; 4825 mblk_t *mp1; 4826 uchar_t *rptr; 4827 uchar_t *new_rptr; 4828 int hdr_length; 4829 int udi_size; /* Size of T_unitdata_ind */ 4830 int opt_len; /* Length of IP options */ 4831 sin_t *sin; 4832 struct T_error_ack *tea; 4833 mblk_t *options_mp = NULL; 4834 in_pktinfo_t *pinfo; 4835 boolean_t recv_on = B_FALSE; 4836 cred_t *cr = NULL; 4837 udp_t *udp = Q_TO_UDP(q); 4838 pid_t cpid; 4839 4840 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4841 "udp_rput_other: q %p mp %p", q, mp); 4842 4843 ASSERT(OK_32PTR(mp->b_rptr)); 4844 rptr = mp->b_rptr; 4845 4846 switch (mp->b_datap->db_type) { 4847 case M_CTL: 4848 /* 4849 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4850 */ 4851 recv_on = B_TRUE; 4852 options_mp = mp; 4853 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4854 4855 /* 4856 * The actual data is in mp->b_cont 4857 */ 4858 mp = mp->b_cont; 4859 ASSERT(OK_32PTR(mp->b_rptr)); 4860 rptr = mp->b_rptr; 4861 break; 4862 case M_DATA: 4863 /* 4864 * M_DATA messages contain IPv4 datagrams. They are handled 4865 * after this switch. 4866 */ 4867 break; 4868 case M_PROTO: 4869 case M_PCPROTO: 4870 /* M_PROTO messages contain some type of TPI message. */ 4871 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4872 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4873 freemsg(mp); 4874 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4875 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4876 return; 4877 } 4878 tea = (struct T_error_ack *)rptr; 4879 4880 switch (tea->PRIM_type) { 4881 case T_ERROR_ACK: 4882 switch (tea->ERROR_prim) { 4883 case O_T_BIND_REQ: 4884 case T_BIND_REQ: { 4885 /* 4886 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4887 * clear out the associated port and source 4888 * address before passing the message 4889 * upstream. If this was caused by a T_CONN_REQ 4890 * revert back to bound state. 4891 */ 4892 udp_fanout_t *udpf; 4893 4894 udpf = &udp_bind_fanout[ 4895 UDP_BIND_HASH(udp->udp_port)]; 4896 mutex_enter(&udpf->uf_lock); 4897 if (udp->udp_state == TS_DATA_XFER) { 4898 /* Connect failed */ 4899 tea->ERROR_prim = T_CONN_REQ; 4900 /* Revert back to the bound source */ 4901 udp->udp_v6src = udp->udp_bound_v6src; 4902 udp->udp_state = TS_IDLE; 4903 mutex_exit(&udpf->uf_lock); 4904 if (udp->udp_family == AF_INET6) 4905 (void) udp_build_hdrs(q, udp); 4906 break; 4907 } 4908 4909 if (udp->udp_discon_pending) { 4910 tea->ERROR_prim = T_DISCON_REQ; 4911 udp->udp_discon_pending = 0; 4912 } 4913 V6_SET_ZERO(udp->udp_v6src); 4914 V6_SET_ZERO(udp->udp_bound_v6src); 4915 udp->udp_state = TS_UNBND; 4916 udp_bind_hash_remove(udp, B_TRUE); 4917 udp->udp_port = 0; 4918 mutex_exit(&udpf->uf_lock); 4919 if (udp->udp_family == AF_INET6) 4920 (void) udp_build_hdrs(q, udp); 4921 break; 4922 } 4923 default: 4924 break; 4925 } 4926 break; 4927 case T_BIND_ACK: 4928 udp_rput_bind_ack(q, mp); 4929 return; 4930 4931 case T_OPTMGMT_ACK: 4932 case T_OK_ACK: 4933 break; 4934 default: 4935 freemsg(mp); 4936 return; 4937 } 4938 putnext(UDP_RD(q), mp); 4939 return; 4940 } 4941 4942 /* 4943 * This is the inbound data path. 4944 * First, we make sure the data contains both IP and UDP headers. 4945 * 4946 * This handle IPv4 packets for only AF_INET sockets. 4947 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4948 * is no need saving the options. 4949 */ 4950 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4951 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4952 if (mp->b_wptr - rptr < hdr_length) { 4953 if (!pullupmsg(mp, hdr_length)) { 4954 freemsg(mp); 4955 if (options_mp != NULL) 4956 freeb(options_mp); 4957 BUMP_MIB(&udp_mib, udpInErrors); 4958 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4959 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 4960 BUMP_MIB(&udp_mib, udpInErrors); 4961 return; 4962 } 4963 rptr = mp->b_rptr; 4964 } 4965 /* Walk past the headers. */ 4966 new_rptr = rptr + hdr_length; 4967 if (!udp->udp_rcvhdr) 4968 mp->b_rptr = new_rptr; 4969 4970 /* Save the options if any */ 4971 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4972 if (opt_len > 0) { 4973 if (opt_len > udp->udp_ip_rcv_options_len) { 4974 if (udp->udp_ip_rcv_options_len) 4975 mi_free((char *)udp->udp_ip_rcv_options); 4976 udp->udp_ip_rcv_options_len = 0; 4977 udp->udp_ip_rcv_options = 4978 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4979 if (udp->udp_ip_rcv_options) 4980 udp->udp_ip_rcv_options_len = opt_len; 4981 } 4982 if (udp->udp_ip_rcv_options_len) { 4983 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 4984 udp->udp_ip_rcv_options, opt_len); 4985 /* Adjust length if we are resusing the space */ 4986 udp->udp_ip_rcv_options_len = opt_len; 4987 } 4988 } else if (udp->udp_ip_rcv_options_len) { 4989 mi_free((char *)udp->udp_ip_rcv_options); 4990 udp->udp_ip_rcv_options = NULL; 4991 udp->udp_ip_rcv_options_len = 0; 4992 } 4993 4994 /* 4995 * Normally only send up the address. 4996 * If IP_RECVDSTADDR is set we include the destination IP 4997 * address as an option. With IP_RECVOPTS we include all 4998 * the IP options. 4999 */ 5000 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5001 if (udp->udp_recvdstaddr) { 5002 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5003 UDP_STAT(udp_in_recvdstaddr); 5004 } 5005 if (udp->udp_recvopts && opt_len > 0) { 5006 udi_size += sizeof (struct T_opthdr) + opt_len; 5007 UDP_STAT(udp_in_recvopts); 5008 } 5009 5010 /* 5011 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5012 * space accordingly 5013 */ 5014 if (udp->udp_recvif && recv_on && 5015 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5016 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5017 UDP_STAT(udp_in_recvif); 5018 } 5019 5020 if (udp->udp_recvslla && recv_on && 5021 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5022 udi_size += sizeof (struct T_opthdr) + 5023 sizeof (struct sockaddr_dl); 5024 UDP_STAT(udp_in_recvslla); 5025 } 5026 5027 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5028 udi_size += sizeof (struct T_opthdr) + ucredsize; 5029 cpid = DB_CPID(mp); 5030 UDP_STAT(udp_in_recvucred); 5031 } 5032 /* 5033 * If IP_RECVTTL is set allocate the appropriate sized buffer 5034 */ 5035 if (udp->udp_recvttl) { 5036 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5037 UDP_STAT(udp_in_recvttl); 5038 } 5039 5040 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5041 mp1 = allocb(udi_size, BPRI_MED); 5042 if (mp1 == NULL) { 5043 freemsg(mp); 5044 if (options_mp != NULL) 5045 freeb(options_mp); 5046 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5047 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5048 BUMP_MIB(&udp_mib, udpInErrors); 5049 return; 5050 } 5051 mp1->b_cont = mp; 5052 mp = mp1; 5053 mp->b_datap->db_type = M_PROTO; 5054 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5055 mp->b_wptr = (uchar_t *)tudi + udi_size; 5056 tudi->PRIM_type = T_UNITDATA_IND; 5057 tudi->SRC_length = sizeof (sin_t); 5058 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5059 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5060 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5061 tudi->OPT_length = udi_size; 5062 5063 sin = (sin_t *)&tudi[1]; 5064 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5065 sin->sin_port = ((in_port_t *) 5066 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5067 sin->sin_family = AF_INET; 5068 *(uint32_t *)&sin->sin_zero[0] = 0; 5069 *(uint32_t *)&sin->sin_zero[4] = 0; 5070 5071 /* 5072 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5073 * IP_RECVTTL has been set. 5074 */ 5075 if (udi_size != 0) { 5076 /* 5077 * Copy in destination address before options to avoid any 5078 * padding issues. 5079 */ 5080 char *dstopt; 5081 5082 dstopt = (char *)&sin[1]; 5083 if (udp->udp_recvdstaddr) { 5084 struct T_opthdr *toh; 5085 ipaddr_t *dstptr; 5086 5087 toh = (struct T_opthdr *)dstopt; 5088 toh->level = IPPROTO_IP; 5089 toh->name = IP_RECVDSTADDR; 5090 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5091 toh->status = 0; 5092 dstopt += sizeof (struct T_opthdr); 5093 dstptr = (ipaddr_t *)dstopt; 5094 *dstptr = (((ipaddr_t *)rptr)[4]); 5095 dstopt += sizeof (ipaddr_t); 5096 udi_size -= toh->len; 5097 } 5098 if (udp->udp_recvopts && udi_size != 0) { 5099 struct T_opthdr *toh; 5100 5101 toh = (struct T_opthdr *)dstopt; 5102 toh->level = IPPROTO_IP; 5103 toh->name = IP_RECVOPTS; 5104 toh->len = sizeof (struct T_opthdr) + opt_len; 5105 toh->status = 0; 5106 dstopt += sizeof (struct T_opthdr); 5107 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5108 dstopt += opt_len; 5109 udi_size -= toh->len; 5110 } 5111 5112 if (udp->udp_recvslla && recv_on && 5113 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5114 5115 struct T_opthdr *toh; 5116 struct sockaddr_dl *dstptr; 5117 5118 toh = (struct T_opthdr *)dstopt; 5119 toh->level = IPPROTO_IP; 5120 toh->name = IP_RECVSLLA; 5121 toh->len = sizeof (struct T_opthdr) + 5122 sizeof (struct sockaddr_dl); 5123 toh->status = 0; 5124 dstopt += sizeof (struct T_opthdr); 5125 dstptr = (struct sockaddr_dl *)dstopt; 5126 bcopy(&pinfo->in_pkt_slla, dstptr, 5127 sizeof (struct sockaddr_dl)); 5128 dstopt += sizeof (struct sockaddr_dl); 5129 udi_size -= toh->len; 5130 } 5131 5132 if (udp->udp_recvif && recv_on && 5133 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5134 5135 struct T_opthdr *toh; 5136 uint_t *dstptr; 5137 5138 toh = (struct T_opthdr *)dstopt; 5139 toh->level = IPPROTO_IP; 5140 toh->name = IP_RECVIF; 5141 toh->len = sizeof (struct T_opthdr) + 5142 sizeof (uint_t); 5143 toh->status = 0; 5144 dstopt += sizeof (struct T_opthdr); 5145 dstptr = (uint_t *)dstopt; 5146 *dstptr = pinfo->in_pkt_ifindex; 5147 dstopt += sizeof (uint_t); 5148 udi_size -= toh->len; 5149 } 5150 5151 if (cr != NULL) { 5152 struct T_opthdr *toh; 5153 5154 toh = (struct T_opthdr *)dstopt; 5155 toh->level = SOL_SOCKET; 5156 toh->name = SCM_UCRED; 5157 toh->len = sizeof (struct T_opthdr) + ucredsize; 5158 toh->status = 0; 5159 (void) cred2ucred(cr, cpid, &toh[1]); 5160 dstopt += toh->len; 5161 udi_size -= toh->len; 5162 } 5163 5164 if (udp->udp_recvttl) { 5165 struct T_opthdr *toh; 5166 uint8_t *dstptr; 5167 5168 toh = (struct T_opthdr *)dstopt; 5169 toh->level = IPPROTO_IP; 5170 toh->name = IP_RECVTTL; 5171 toh->len = sizeof (struct T_opthdr) + 5172 sizeof (uint8_t); 5173 toh->status = 0; 5174 dstopt += sizeof (struct T_opthdr); 5175 dstptr = (uint8_t *)dstopt; 5176 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5177 dstopt += sizeof (uint8_t); 5178 udi_size -= toh->len; 5179 } 5180 5181 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5182 } 5183 BUMP_MIB(&udp_mib, udpInDatagrams); 5184 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5185 "udp_rput_other_end: q %p (%S)", q, "end"); 5186 if (options_mp != NULL) 5187 freeb(options_mp); 5188 5189 if (udp->udp_direct_sockfs) { 5190 /* 5191 * There is nothing above us except for the stream head; 5192 * use the read-side synchronous stream interface in 5193 * order to reduce the time spent in interrupt thread. 5194 */ 5195 ASSERT(udp->udp_issocket); 5196 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5197 } else { 5198 /* 5199 * Use regular STREAMS interface to pass data upstream 5200 * if this is not a socket endpoint, or if we have 5201 * switched over to the slow mode due to sockmod being 5202 * popped or a module being pushed on top of us. 5203 */ 5204 putnext(UDP_RD(q), mp); 5205 } 5206 } 5207 5208 /* ARGSUSED */ 5209 static void 5210 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5211 { 5212 conn_t *connp = arg; 5213 5214 udp_rput_other(connp->conn_rq, mp); 5215 udp_exit(connp); 5216 } 5217 5218 /* 5219 * Process a T_BIND_ACK 5220 */ 5221 static void 5222 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5223 { 5224 udp_t *udp = Q_TO_UDP(q); 5225 mblk_t *mp1; 5226 ire_t *ire; 5227 struct T_bind_ack *tba; 5228 uchar_t *addrp; 5229 ipa_conn_t *ac; 5230 ipa6_conn_t *ac6; 5231 5232 if (udp->udp_discon_pending) 5233 udp->udp_discon_pending = 0; 5234 5235 /* 5236 * If a broadcast/multicast address was bound set 5237 * the source address to 0. 5238 * This ensures no datagrams with broadcast address 5239 * as source address are emitted (which would violate 5240 * RFC1122 - Hosts requirements) 5241 * 5242 * Note that when connecting the returned IRE is 5243 * for the destination address and we only perform 5244 * the broadcast check for the source address (it 5245 * is OK to connect to a broadcast/multicast address.) 5246 */ 5247 mp1 = mp->b_cont; 5248 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5249 ire = (ire_t *)mp1->b_rptr; 5250 5251 /* 5252 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5253 * local address. 5254 */ 5255 if (ire->ire_type == IRE_BROADCAST && 5256 udp->udp_state != TS_DATA_XFER) { 5257 /* This was just a local bind to a broadcast addr */ 5258 V6_SET_ZERO(udp->udp_v6src); 5259 if (udp->udp_family == AF_INET6) 5260 (void) udp_build_hdrs(q, udp); 5261 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5262 /* 5263 * Local address not yet set - pick it from the 5264 * T_bind_ack 5265 */ 5266 tba = (struct T_bind_ack *)mp->b_rptr; 5267 addrp = &mp->b_rptr[tba->ADDR_offset]; 5268 switch (udp->udp_family) { 5269 case AF_INET: 5270 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5271 ac = (ipa_conn_t *)addrp; 5272 } else { 5273 ASSERT(tba->ADDR_length == 5274 sizeof (ipa_conn_x_t)); 5275 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5276 } 5277 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5278 &udp->udp_v6src); 5279 break; 5280 case AF_INET6: 5281 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5282 ac6 = (ipa6_conn_t *)addrp; 5283 } else { 5284 ASSERT(tba->ADDR_length == 5285 sizeof (ipa6_conn_x_t)); 5286 ac6 = &((ipa6_conn_x_t *) 5287 addrp)->ac6x_conn; 5288 } 5289 udp->udp_v6src = ac6->ac6_laddr; 5290 (void) udp_build_hdrs(q, udp); 5291 break; 5292 } 5293 } 5294 mp1 = mp1->b_cont; 5295 } 5296 /* 5297 * Look for one or more appended ACK message added by 5298 * udp_connect or udp_disconnect. 5299 * If none found just send up the T_BIND_ACK. 5300 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5301 * udp_disconnect has appended a T_OK_ACK. 5302 */ 5303 if (mp1 != NULL) { 5304 if (mp->b_cont == mp1) 5305 mp->b_cont = NULL; 5306 else { 5307 ASSERT(mp->b_cont->b_cont == mp1); 5308 mp->b_cont->b_cont = NULL; 5309 } 5310 freemsg(mp); 5311 mp = mp1; 5312 while (mp != NULL) { 5313 mp1 = mp->b_cont; 5314 mp->b_cont = NULL; 5315 putnext(UDP_RD(q), mp); 5316 mp = mp1; 5317 } 5318 return; 5319 } 5320 freemsg(mp->b_cont); 5321 mp->b_cont = NULL; 5322 putnext(UDP_RD(q), mp); 5323 } 5324 5325 /* 5326 * return SNMP stuff in buffer in mpdata 5327 */ 5328 int 5329 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5330 { 5331 mblk_t *mpdata; 5332 mblk_t *mp_conn_ctl; 5333 mblk_t *mp6_conn_ctl; 5334 mblk_t *mp_conn_data; 5335 mblk_t *mp6_conn_data; 5336 mblk_t *mp_conn_tail = NULL; 5337 mblk_t *mp6_conn_tail = NULL; 5338 struct opthdr *optp; 5339 mib2_udpEntry_t ude; 5340 mib2_udp6Entry_t ude6; 5341 int state; 5342 zoneid_t zoneid; 5343 int i; 5344 connf_t *connfp; 5345 conn_t *connp = Q_TO_CONN(q); 5346 udp_t *udp = connp->conn_udp; 5347 5348 if (mpctl == NULL || 5349 (mpdata = mpctl->b_cont) == NULL || 5350 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5351 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5352 freemsg(mp_conn_ctl); 5353 return (0); 5354 } 5355 5356 mp_conn_data = mp_conn_ctl->b_cont; 5357 mp6_conn_data = mp6_conn_ctl->b_cont; 5358 5359 zoneid = connp->conn_zoneid; 5360 5361 /* fixed length structure for IPv4 and IPv6 counters */ 5362 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5363 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5364 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5365 optp->level = MIB2_UDP; 5366 optp->name = 0; 5367 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5368 optp->len = msgdsize(mpdata); 5369 qreply(q, mpctl); 5370 5371 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5372 connfp = &ipcl_globalhash_fanout[i]; 5373 connp = NULL; 5374 5375 while ((connp = ipcl_get_next_conn(connfp, connp, 5376 IPCL_UDP))) { 5377 udp = connp->conn_udp; 5378 if (zoneid != connp->conn_zoneid) 5379 continue; 5380 5381 /* 5382 * Note that the port numbers are sent in 5383 * host byte order 5384 */ 5385 5386 if (udp->udp_state == TS_UNBND) 5387 state = MIB2_UDP_unbound; 5388 else if (udp->udp_state == TS_IDLE) 5389 state = MIB2_UDP_idle; 5390 else if (udp->udp_state == TS_DATA_XFER) 5391 state = MIB2_UDP_connected; 5392 else 5393 state = MIB2_UDP_unknown; 5394 5395 /* 5396 * Create an IPv4 table entry for IPv4 entries and also 5397 * any IPv6 entries which are bound to in6addr_any 5398 * (i.e. anything a IPv4 peer could connect/send to). 5399 */ 5400 if (udp->udp_ipversion == IPV4_VERSION || 5401 (udp->udp_state <= TS_IDLE && 5402 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5403 ude.udpEntryInfo.ue_state = state; 5404 /* 5405 * If in6addr_any this will set it to 5406 * INADDR_ANY 5407 */ 5408 ude.udpLocalAddress = 5409 V4_PART_OF_V6(udp->udp_v6src); 5410 ude.udpLocalPort = ntohs(udp->udp_port); 5411 if (udp->udp_state == TS_DATA_XFER) { 5412 /* 5413 * Can potentially get here for 5414 * v6 socket if another process 5415 * (say, ping) has just done a 5416 * sendto(), changing the state 5417 * from the TS_IDLE above to 5418 * TS_DATA_XFER by the time we hit 5419 * this part of the code. 5420 */ 5421 ude.udpEntryInfo.ue_RemoteAddress = 5422 V4_PART_OF_V6(udp->udp_v6dst); 5423 ude.udpEntryInfo.ue_RemotePort = 5424 ntohs(udp->udp_dstport); 5425 } else { 5426 ude.udpEntryInfo.ue_RemoteAddress = 0; 5427 ude.udpEntryInfo.ue_RemotePort = 0; 5428 } 5429 (void) snmp_append_data2(mp_conn_data, 5430 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5431 } 5432 if (udp->udp_ipversion == IPV6_VERSION) { 5433 ude6.udp6EntryInfo.ue_state = state; 5434 ude6.udp6LocalAddress = udp->udp_v6src; 5435 ude6.udp6LocalPort = ntohs(udp->udp_port); 5436 ude6.udp6IfIndex = udp->udp_bound_if; 5437 if (udp->udp_state == TS_DATA_XFER) { 5438 ude6.udp6EntryInfo.ue_RemoteAddress = 5439 udp->udp_v6dst; 5440 ude6.udp6EntryInfo.ue_RemotePort = 5441 ntohs(udp->udp_dstport); 5442 } else { 5443 ude6.udp6EntryInfo.ue_RemoteAddress = 5444 sin6_null.sin6_addr; 5445 ude6.udp6EntryInfo.ue_RemotePort = 0; 5446 } 5447 (void) snmp_append_data2(mp6_conn_data, 5448 &mp6_conn_tail, (char *)&ude6, 5449 sizeof (ude6)); 5450 } 5451 } 5452 } 5453 5454 /* IPv4 UDP endpoints */ 5455 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5456 sizeof (struct T_optmgmt_ack)]; 5457 optp->level = MIB2_UDP; 5458 optp->name = MIB2_UDP_ENTRY; 5459 optp->len = msgdsize(mp_conn_data); 5460 qreply(q, mp_conn_ctl); 5461 5462 /* IPv6 UDP endpoints */ 5463 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5464 sizeof (struct T_optmgmt_ack)]; 5465 optp->level = MIB2_UDP6; 5466 optp->name = MIB2_UDP6_ENTRY; 5467 optp->len = msgdsize(mp6_conn_data); 5468 qreply(q, mp6_conn_ctl); 5469 5470 return (1); 5471 } 5472 5473 /* 5474 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5475 * NOTE: Per MIB-II, UDP has no writable data. 5476 * TODO: If this ever actually tries to set anything, it needs to be 5477 * to do the appropriate locking. 5478 */ 5479 /* ARGSUSED */ 5480 int 5481 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5482 uchar_t *ptr, int len) 5483 { 5484 switch (level) { 5485 case MIB2_UDP: 5486 return (0); 5487 default: 5488 return (1); 5489 } 5490 } 5491 5492 static void 5493 udp_report_item(mblk_t *mp, udp_t *udp) 5494 { 5495 char *state; 5496 char addrbuf1[INET6_ADDRSTRLEN]; 5497 char addrbuf2[INET6_ADDRSTRLEN]; 5498 uint_t print_len, buf_len; 5499 5500 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5501 ASSERT(buf_len >= 0); 5502 if (buf_len == 0) 5503 return; 5504 5505 if (udp->udp_state == TS_UNBND) 5506 state = "UNBOUND"; 5507 else if (udp->udp_state == TS_IDLE) 5508 state = "IDLE"; 5509 else if (udp->udp_state == TS_DATA_XFER) 5510 state = "CONNECTED"; 5511 else 5512 state = "UnkState"; 5513 print_len = snprintf((char *)mp->b_wptr, buf_len, 5514 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5515 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5516 inet_ntop(AF_INET6, &udp->udp_v6src, 5517 addrbuf1, sizeof (addrbuf1)), 5518 inet_ntop(AF_INET6, &udp->udp_v6dst, 5519 addrbuf2, sizeof (addrbuf2)), 5520 ntohs(udp->udp_dstport), state); 5521 if (print_len < buf_len) { 5522 mp->b_wptr += print_len; 5523 } else { 5524 mp->b_wptr += buf_len; 5525 } 5526 } 5527 5528 /* Report for ndd "udp_status" */ 5529 /* ARGSUSED */ 5530 static int 5531 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5532 { 5533 zoneid_t zoneid; 5534 connf_t *connfp; 5535 conn_t *connp = Q_TO_CONN(q); 5536 udp_t *udp = connp->conn_udp; 5537 int i; 5538 5539 /* 5540 * Because of the ndd constraint, at most we can have 64K buffer 5541 * to put in all UDP info. So to be more efficient, just 5542 * allocate a 64K buffer here, assuming we need that large buffer. 5543 * This may be a problem as any user can read udp_status. Therefore 5544 * we limit the rate of doing this using udp_ndd_get_info_interval. 5545 * This should be OK as normal users should not do this too often. 5546 */ 5547 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5548 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5549 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5550 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5551 return (0); 5552 } 5553 } 5554 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5555 /* The following may work even if we cannot get a large buf. */ 5556 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5557 return (0); 5558 } 5559 (void) mi_mpprintf(mp, 5560 "UDP " MI_COL_HDRPAD_STR 5561 /* 12345678[89ABCDEF] */ 5562 " zone lport src addr dest addr port state"); 5563 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5564 5565 zoneid = connp->conn_zoneid; 5566 5567 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5568 connfp = &ipcl_globalhash_fanout[i]; 5569 connp = NULL; 5570 5571 while ((connp = ipcl_get_next_conn(connfp, connp, 5572 IPCL_UDP))) { 5573 udp = connp->conn_udp; 5574 if (zoneid != GLOBAL_ZONEID && 5575 zoneid != connp->conn_zoneid) 5576 continue; 5577 5578 udp_report_item(mp->b_cont, udp); 5579 } 5580 } 5581 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5582 return (0); 5583 } 5584 5585 /* 5586 * This routine creates a T_UDERROR_IND message and passes it upstream. 5587 * The address and options are copied from the T_UNITDATA_REQ message 5588 * passed in mp. This message is freed. 5589 */ 5590 static void 5591 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5592 t_scalar_t err) 5593 { 5594 struct T_unitdata_req *tudr; 5595 mblk_t *mp1; 5596 uchar_t *optaddr; 5597 t_scalar_t optlen; 5598 5599 if (DB_TYPE(mp) == M_DATA) { 5600 ASSERT(destaddr != NULL && destlen != 0); 5601 optaddr = NULL; 5602 optlen = 0; 5603 } else { 5604 if ((mp->b_wptr < mp->b_rptr) || 5605 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5606 goto done; 5607 } 5608 tudr = (struct T_unitdata_req *)mp->b_rptr; 5609 destaddr = mp->b_rptr + tudr->DEST_offset; 5610 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5611 destaddr + tudr->DEST_length < mp->b_rptr || 5612 destaddr + tudr->DEST_length > mp->b_wptr) { 5613 goto done; 5614 } 5615 optaddr = mp->b_rptr + tudr->OPT_offset; 5616 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5617 optaddr + tudr->OPT_length < mp->b_rptr || 5618 optaddr + tudr->OPT_length > mp->b_wptr) { 5619 goto done; 5620 } 5621 destlen = tudr->DEST_length; 5622 optlen = tudr->OPT_length; 5623 } 5624 5625 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5626 (char *)optaddr, optlen, err); 5627 if (mp1 != NULL) 5628 putnext(UDP_RD(q), mp1); 5629 5630 done: 5631 freemsg(mp); 5632 } 5633 5634 /* 5635 * This routine removes a port number association from a stream. It 5636 * is called by udp_wput to handle T_UNBIND_REQ messages. 5637 */ 5638 static void 5639 udp_unbind(queue_t *q, mblk_t *mp) 5640 { 5641 udp_t *udp = Q_TO_UDP(q); 5642 5643 /* If a bind has not been done, we can't unbind. */ 5644 if (udp->udp_state == TS_UNBND) { 5645 udp_err_ack(q, mp, TOUTSTATE, 0); 5646 return; 5647 } 5648 if (cl_inet_unbind != NULL) { 5649 /* 5650 * Running in cluster mode - register unbind information 5651 */ 5652 if (udp->udp_ipversion == IPV4_VERSION) { 5653 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5654 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5655 (in_port_t)udp->udp_port); 5656 } else { 5657 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5658 (uint8_t *)&(udp->udp_v6src), 5659 (in_port_t)udp->udp_port); 5660 } 5661 } 5662 5663 udp_bind_hash_remove(udp, B_FALSE); 5664 V6_SET_ZERO(udp->udp_v6src); 5665 V6_SET_ZERO(udp->udp_bound_v6src); 5666 udp->udp_port = 0; 5667 udp->udp_state = TS_UNBND; 5668 5669 if (udp->udp_family == AF_INET6) { 5670 int error; 5671 5672 /* Rebuild the header template */ 5673 error = udp_build_hdrs(q, udp); 5674 if (error != 0) { 5675 udp_err_ack(q, mp, TSYSERR, error); 5676 return; 5677 } 5678 } 5679 /* 5680 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5681 * and therefore ip_unbind must never return NULL. 5682 */ 5683 mp = ip_unbind(q, mp); 5684 ASSERT(mp != NULL); 5685 putnext(UDP_RD(q), mp); 5686 } 5687 5688 /* 5689 * Don't let port fall into the privileged range. 5690 * Since the extra priviledged ports can be arbitrary we also 5691 * ensure that we exclude those from consideration. 5692 * udp_g_epriv_ports is not sorted thus we loop over it until 5693 * there are no changes. 5694 */ 5695 static in_port_t 5696 udp_update_next_port(in_port_t port, boolean_t random) 5697 { 5698 int i; 5699 5700 if (random && udp_random_anon_port != 0) { 5701 (void) random_get_pseudo_bytes((uint8_t *)&port, 5702 sizeof (in_port_t)); 5703 /* 5704 * Unless changed by a sys admin, the smallest anon port 5705 * is 32768 and the largest anon port is 65535. It is 5706 * very likely (50%) for the random port to be smaller 5707 * than the smallest anon port. When that happens, 5708 * add port % (anon port range) to the smallest anon 5709 * port to get the random port. It should fall into the 5710 * valid anon port range. 5711 */ 5712 if (port < udp_smallest_anon_port) { 5713 port = udp_smallest_anon_port + 5714 port % (udp_largest_anon_port - 5715 udp_smallest_anon_port); 5716 } 5717 } 5718 5719 retry: 5720 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5721 port = udp_smallest_anon_port; 5722 5723 if (port < udp_smallest_nonpriv_port) 5724 port = udp_smallest_nonpriv_port; 5725 5726 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5727 if (port == udp_g_epriv_ports[i]) { 5728 port++; 5729 /* 5730 * Make sure that the port is in the 5731 * valid range. 5732 */ 5733 goto retry; 5734 } 5735 } 5736 return (port); 5737 } 5738 5739 static mblk_t * 5740 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5741 uint_t srcid, int *error) 5742 { 5743 udp_t *udp = connp->conn_udp; 5744 queue_t *q = connp->conn_wq; 5745 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5746 mblk_t *mp2; 5747 ipha_t *ipha; 5748 int ip_hdr_length; 5749 uint32_t ip_len; 5750 udpha_t *udpha; 5751 5752 *error = 0; 5753 5754 /* mp1 points to the M_DATA mblk carrying the packet */ 5755 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5756 5757 /* Add an IP header */ 5758 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5759 udp->udp_ip_snd_options_len; 5760 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5761 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5762 !OK_32PTR(ipha)) { 5763 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5764 if (mp2 == NULL) { 5765 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5766 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5767 *error = ENOMEM; 5768 goto done; 5769 } 5770 mp2->b_wptr = DB_LIM(mp2); 5771 mp2->b_cont = mp1; 5772 mp1 = mp2; 5773 if (DB_TYPE(mp) != M_DATA) 5774 mp->b_cont = mp1; 5775 else 5776 mp = mp1; 5777 5778 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5779 } 5780 ip_hdr_length -= UDPH_SIZE; 5781 #ifdef _BIG_ENDIAN 5782 /* Set version, header length, and tos */ 5783 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5784 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5785 udp->udp_type_of_service); 5786 /* Set ttl and protocol */ 5787 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5788 #else 5789 /* Set version, header length, and tos */ 5790 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5791 ((udp->udp_type_of_service << 8) | 5792 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5793 /* Set ttl and protocol */ 5794 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5795 #endif 5796 /* 5797 * Copy our address into the packet. If this is zero, 5798 * first look at __sin6_src_id for a hint. If we leave the source 5799 * as INADDR_ANY then ip will fill in the real source address. 5800 */ 5801 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5802 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5803 in6_addr_t v6src; 5804 5805 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5806 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5807 } 5808 5809 ipha->ipha_fragment_offset_and_flags = 0; 5810 ipha->ipha_ident = 0; 5811 5812 mp1->b_rptr = (uchar_t *)ipha; 5813 5814 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5815 (uintptr_t)UINT_MAX); 5816 5817 /* Determine length of packet */ 5818 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5819 if ((mp2 = mp1->b_cont) != NULL) { 5820 do { 5821 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5822 ip_len += (uint32_t)MBLKL(mp2); 5823 } while ((mp2 = mp2->b_cont) != NULL); 5824 } 5825 /* 5826 * If the size of the packet is greater than the maximum allowed by 5827 * ip, return an error. Passing this down could cause panics because 5828 * the size will have wrapped and be inconsistent with the msg size. 5829 */ 5830 if (ip_len > IP_MAXPACKET) { 5831 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5832 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5833 *error = EMSGSIZE; 5834 goto done; 5835 } 5836 ipha->ipha_length = htons((uint16_t)ip_len); 5837 ip_len -= ip_hdr_length; 5838 ip_len = htons((uint16_t)ip_len); 5839 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5840 5841 /* 5842 * Copy in the destination address 5843 */ 5844 if (v4dst == INADDR_ANY) 5845 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5846 else 5847 ipha->ipha_dst = v4dst; 5848 5849 /* 5850 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5851 */ 5852 if (CLASSD(v4dst)) 5853 ipha->ipha_ttl = udp->udp_multicast_ttl; 5854 5855 udpha->uha_dst_port = port; 5856 udpha->uha_src_port = udp->udp_port; 5857 5858 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5859 uint32_t cksum; 5860 5861 bcopy(udp->udp_ip_snd_options, &ipha[1], 5862 udp->udp_ip_snd_options_len); 5863 /* 5864 * Massage source route putting first source route in ipha_dst. 5865 * Ignore the destination in T_unitdata_req. 5866 * Create a checksum adjustment for a source route, if any. 5867 */ 5868 cksum = ip_massage_options(ipha); 5869 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5870 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5871 (ipha->ipha_dst & 0xFFFF); 5872 if ((int)cksum < 0) 5873 cksum--; 5874 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5875 /* 5876 * IP does the checksum if uha_checksum is non-zero, 5877 * We make it easy for IP to include our pseudo header 5878 * by putting our length in uha_checksum. 5879 */ 5880 cksum += ip_len; 5881 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5882 /* There might be a carry. */ 5883 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5884 #ifdef _LITTLE_ENDIAN 5885 if (udp_do_checksum) 5886 ip_len = (cksum << 16) | ip_len; 5887 #else 5888 if (udp_do_checksum) 5889 ip_len = (ip_len << 16) | cksum; 5890 else 5891 ip_len <<= 16; 5892 #endif 5893 } else { 5894 /* 5895 * IP does the checksum if uha_checksum is non-zero, 5896 * We make it easy for IP to include our pseudo header 5897 * by putting our length in uha_checksum. 5898 */ 5899 if (udp_do_checksum) 5900 ip_len |= (ip_len << 16); 5901 #ifndef _LITTLE_ENDIAN 5902 else 5903 ip_len <<= 16; 5904 #endif 5905 } 5906 /* Set UDP length and checksum */ 5907 *((uint32_t *)&udpha->uha_length) = ip_len; 5908 5909 if (DB_TYPE(mp) != M_DATA) { 5910 ASSERT(mp != mp1); 5911 freeb(mp); 5912 } 5913 5914 /* mp has been consumed and we'll return success */ 5915 ASSERT(*error == 0); 5916 mp = NULL; 5917 5918 /* We're done. Pass the packet to ip. */ 5919 BUMP_MIB(&udp_mib, udpOutDatagrams); 5920 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5921 "udp_wput_end: q %p (%S)", q, "end"); 5922 5923 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5924 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5925 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5926 connp->conn_nofailover_ill != NULL || 5927 connp->conn_outgoing_ill != NULL || 5928 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5929 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5930 UDP_STAT(udp_ip_send); 5931 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5932 } else { 5933 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5934 } 5935 5936 done: 5937 if (*error != 0) { 5938 ASSERT(mp != NULL); 5939 BUMP_MIB(&udp_mib, udpOutErrors); 5940 } 5941 return (mp); 5942 } 5943 5944 static void 5945 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5946 { 5947 conn_t *connp = udp->udp_connp; 5948 ipaddr_t src, dst; 5949 ill_t *ill; 5950 ire_t *ire; 5951 ipif_t *ipif = NULL; 5952 mblk_t *ire_fp_mp; 5953 uint_t ire_fp_mp_len; 5954 uint16_t *up; 5955 uint32_t cksum, hcksum_txflags; 5956 queue_t *dev_q; 5957 boolean_t retry_caching; 5958 5959 dst = ipha->ipha_dst; 5960 src = ipha->ipha_src; 5961 ASSERT(ipha->ipha_ident == 0); 5962 5963 if (CLASSD(dst)) { 5964 int err; 5965 5966 ipif = conn_get_held_ipif(connp, 5967 &connp->conn_multicast_ipif, &err); 5968 5969 if (ipif == NULL || ipif->ipif_isv6 || 5970 (ipif->ipif_ill->ill_phyint->phyint_flags & 5971 PHYI_LOOPBACK)) { 5972 if (ipif != NULL) 5973 ipif_refrele(ipif); 5974 UDP_STAT(udp_ip_send); 5975 ip_output(connp, mp, q, IP_WPUT); 5976 return; 5977 } 5978 } 5979 5980 retry_caching = B_FALSE; 5981 mutex_enter(&connp->conn_lock); 5982 ire = connp->conn_ire_cache; 5983 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5984 5985 if (ire == NULL || ire->ire_addr != dst || 5986 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5987 retry_caching = B_TRUE; 5988 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5989 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5990 5991 ASSERT(ipif != NULL); 5992 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 5993 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 5994 retry_caching = B_TRUE; 5995 } 5996 5997 if (!retry_caching) { 5998 ASSERT(ire != NULL); 5999 IRE_REFHOLD(ire); 6000 mutex_exit(&connp->conn_lock); 6001 } else { 6002 boolean_t cached = B_FALSE; 6003 6004 connp->conn_ire_cache = NULL; 6005 mutex_exit(&connp->conn_lock); 6006 6007 /* Release the old ire */ 6008 if (ire != NULL) { 6009 IRE_REFRELE_NOTR(ire); 6010 ire = NULL; 6011 } 6012 6013 if (CLASSD(dst)) { 6014 ASSERT(ipif != NULL); 6015 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6016 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6017 } else { 6018 ASSERT(ipif == NULL); 6019 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6020 } 6021 6022 if (ire == NULL) { 6023 if (ipif != NULL) 6024 ipif_refrele(ipif); 6025 UDP_STAT(udp_ire_null); 6026 ip_output(connp, mp, q, IP_WPUT); 6027 return; 6028 } 6029 IRE_REFHOLD_NOTR(ire); 6030 6031 mutex_enter(&connp->conn_lock); 6032 if (!(connp->conn_state_flags & CONN_CLOSING) && 6033 connp->conn_ire_cache == NULL) { 6034 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6035 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6036 connp->conn_ire_cache = ire; 6037 cached = B_TRUE; 6038 } 6039 rw_exit(&ire->ire_bucket->irb_lock); 6040 } 6041 mutex_exit(&connp->conn_lock); 6042 6043 /* 6044 * We can continue to use the ire but since it was not 6045 * cached, we should drop the extra reference. 6046 */ 6047 if (!cached) 6048 IRE_REFRELE_NOTR(ire); 6049 } 6050 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6051 ASSERT(!CLASSD(dst) || ipif != NULL); 6052 6053 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6054 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6055 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6056 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6057 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6058 if (ipif != NULL) 6059 ipif_refrele(ipif); 6060 UDP_STAT(udp_ip_ire_send); 6061 IRE_REFRELE(ire); 6062 ip_output(connp, mp, q, IP_WPUT); 6063 return; 6064 } 6065 6066 BUMP_MIB(&ip_mib, ipOutRequests); 6067 6068 ill = ire_to_ill(ire); 6069 ASSERT(ill != NULL); 6070 6071 dev_q = ire->ire_stq->q_next; 6072 ASSERT(dev_q != NULL); 6073 /* 6074 * If the service thread is already running, or if the driver 6075 * queue is currently flow-controlled, queue this packet. 6076 */ 6077 if ((q->q_first != NULL || connp->conn_draining) || 6078 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6079 if (ip_output_queue) { 6080 (void) putq(q, mp); 6081 } else { 6082 BUMP_MIB(&ip_mib, ipOutDiscards); 6083 freemsg(mp); 6084 } 6085 if (ipif != NULL) 6086 ipif_refrele(ipif); 6087 IRE_REFRELE(ire); 6088 return; 6089 } 6090 6091 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6092 #ifndef _BIG_ENDIAN 6093 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6094 #endif 6095 6096 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6097 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6098 src = ipha->ipha_src = ipif->ipif_src_addr; 6099 else 6100 src = ipha->ipha_src = ire->ire_src_addr; 6101 } 6102 6103 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6104 ASSERT(ill->ill_hcksum_capab != NULL); 6105 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6106 } else { 6107 hcksum_txflags = 0; 6108 } 6109 6110 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6111 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6112 6113 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6114 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6115 if (*up != 0) { 6116 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6117 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6118 ntohs(ipha->ipha_length), cksum); 6119 6120 /* Software checksum? */ 6121 if (DB_CKSUMFLAGS(mp) == 0) { 6122 UDP_STAT(udp_out_sw_cksum); 6123 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6124 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6125 } 6126 } 6127 6128 ipha->ipha_fragment_offset_and_flags |= 6129 (uint32_t)htons(ire->ire_frag_flag); 6130 6131 /* Calculate IP header checksum if hardware isn't capable */ 6132 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6133 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6134 ((uint16_t *)ipha)[4]); 6135 } 6136 6137 if (CLASSD(dst)) { 6138 ilm_t *ilm; 6139 6140 ILM_WALKER_HOLD(ill); 6141 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6142 ILM_WALKER_RELE(ill); 6143 if (ilm != NULL) { 6144 ip_multicast_loopback(q, ill, mp, 6145 connp->conn_multicast_loop ? 0 : 6146 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6147 } 6148 6149 /* If multicast TTL is 0 then we are done */ 6150 if (ipha->ipha_ttl == 0) { 6151 if (ipif != NULL) 6152 ipif_refrele(ipif); 6153 freemsg(mp); 6154 IRE_REFRELE(ire); 6155 return; 6156 } 6157 } 6158 6159 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6160 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6161 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6162 6163 UPDATE_OB_PKT_COUNT(ire); 6164 ire->ire_last_used_time = lbolt; 6165 6166 if (ILL_POLL_CAPABLE(ill)) { 6167 /* 6168 * Send the packet directly to DLD, where it may be queued 6169 * depending on the availability of transmit resources at 6170 * the media layer. 6171 */ 6172 IP_POLL_ILL_TX(ill, mp); 6173 } else { 6174 putnext(ire->ire_stq, mp); 6175 } 6176 6177 if (ipif != NULL) 6178 ipif_refrele(ipif); 6179 IRE_REFRELE(ire); 6180 } 6181 6182 /* 6183 * This routine handles all messages passed downstream. It either 6184 * consumes the message or passes it downstream; it never queues a 6185 * a message. 6186 */ 6187 static void 6188 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6189 { 6190 sin6_t *sin6; 6191 sin_t *sin; 6192 ipaddr_t v4dst; 6193 uint16_t port; 6194 uint_t srcid; 6195 queue_t *q = connp->conn_wq; 6196 udp_t *udp = connp->conn_udp; 6197 t_scalar_t optlen; 6198 int error = 0; 6199 struct sockaddr_storage ss; 6200 6201 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6202 "udp_wput_start: connp %p mp %p", connp, mp); 6203 6204 /* 6205 * We directly handle several cases here: T_UNITDATA_REQ message 6206 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6207 * connected and non-connected socket. The latter carries the 6208 * address structure along when this routine gets called. 6209 */ 6210 switch (DB_TYPE(mp)) { 6211 case M_DATA: 6212 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6213 if (!udp->udp_direct_sockfs || 6214 addr == NULL || addrlen == 0) { 6215 /* Not connected; address is required */ 6216 BUMP_MIB(&udp_mib, udpOutErrors); 6217 UDP_STAT(udp_out_err_notconn); 6218 freemsg(mp); 6219 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6220 "udp_wput_end: connp %p (%S)", connp, 6221 "not-connected; address required"); 6222 return; 6223 } 6224 ASSERT(udp->udp_issocket); 6225 UDP_DBGSTAT(udp_data_notconn); 6226 /* Not connected; do some more checks below */ 6227 optlen = 0; 6228 break; 6229 } 6230 /* M_DATA for connected socket */ 6231 UDP_DBGSTAT(udp_data_conn); 6232 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6233 6234 /* Initialize addr and addrlen as if they're passed in */ 6235 if (udp->udp_family == AF_INET) { 6236 sin = (sin_t *)&ss; 6237 sin->sin_family = AF_INET; 6238 sin->sin_port = udp->udp_dstport; 6239 sin->sin_addr.s_addr = v4dst; 6240 addr = (struct sockaddr *)sin; 6241 addrlen = sizeof (*sin); 6242 } else { 6243 sin6 = (sin6_t *)&ss; 6244 sin6->sin6_family = AF_INET6; 6245 sin6->sin6_port = udp->udp_dstport; 6246 sin6->sin6_flowinfo = udp->udp_flowinfo; 6247 sin6->sin6_addr = udp->udp_v6dst; 6248 sin6->sin6_scope_id = 0; 6249 sin6->__sin6_src_id = 0; 6250 addr = (struct sockaddr *)sin6; 6251 addrlen = sizeof (*sin6); 6252 } 6253 6254 if (udp->udp_family == AF_INET || 6255 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6256 /* 6257 * Handle both AF_INET and AF_INET6; the latter 6258 * for IPV4 mapped destination addresses. Note 6259 * here that both addr and addrlen point to the 6260 * corresponding struct depending on the address 6261 * family of the socket. 6262 */ 6263 mp = udp_output_v4(connp, mp, v4dst, 6264 udp->udp_dstport, 0, &error); 6265 } else { 6266 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6267 } 6268 if (error != 0) { 6269 ASSERT(addr != NULL && addrlen != 0); 6270 goto ud_error; 6271 } 6272 return; 6273 case M_PROTO: 6274 case M_PCPROTO: { 6275 struct T_unitdata_req *tudr; 6276 6277 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6278 tudr = (struct T_unitdata_req *)mp->b_rptr; 6279 6280 /* Handle valid T_UNITDATA_REQ here */ 6281 if (MBLKL(mp) >= sizeof (*tudr) && 6282 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6283 if (mp->b_cont == NULL) { 6284 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6285 "udp_wput_end: q %p (%S)", q, "badaddr"); 6286 error = EPROTO; 6287 goto ud_error; 6288 } 6289 6290 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6291 tudr->DEST_length)) { 6292 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6293 "udp_wput_end: q %p (%S)", q, "badaddr"); 6294 error = EADDRNOTAVAIL; 6295 goto ud_error; 6296 } 6297 /* 6298 * If a port has not been bound to the stream, fail. 6299 * This is not a problem when sockfs is directly 6300 * above us, because it will ensure that the socket 6301 * is first bound before allowing data to be sent. 6302 */ 6303 if (udp->udp_state == TS_UNBND) { 6304 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6305 "udp_wput_end: q %p (%S)", q, "outstate"); 6306 error = EPROTO; 6307 goto ud_error; 6308 } 6309 addr = (struct sockaddr *) 6310 &mp->b_rptr[tudr->DEST_offset]; 6311 addrlen = tudr->DEST_length; 6312 optlen = tudr->OPT_length; 6313 if (optlen != 0) 6314 UDP_STAT(udp_out_opt); 6315 break; 6316 } 6317 /* FALLTHRU */ 6318 } 6319 default: 6320 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6321 SQTAG_UDP_OUTPUT); 6322 return; 6323 } 6324 ASSERT(addr != NULL); 6325 6326 switch (udp->udp_family) { 6327 case AF_INET6: 6328 sin6 = (sin6_t *)addr; 6329 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6330 sin6->sin6_family != AF_INET6) { 6331 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6332 "udp_wput_end: q %p (%S)", q, "badaddr"); 6333 error = EADDRNOTAVAIL; 6334 goto ud_error; 6335 } 6336 6337 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6338 /* 6339 * Destination is a non-IPv4-compatible IPv6 address. 6340 * Send out an IPv6 format packet. 6341 */ 6342 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6343 if (error != 0) 6344 goto ud_error; 6345 6346 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6347 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6348 return; 6349 } 6350 /* 6351 * If the local address is not zero or a mapped address 6352 * return an error. It would be possible to send an IPv4 6353 * packet but the response would never make it back to the 6354 * application since it is bound to a non-mapped address. 6355 */ 6356 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6357 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6358 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6359 "udp_wput_end: q %p (%S)", q, "badaddr"); 6360 error = EADDRNOTAVAIL; 6361 goto ud_error; 6362 } 6363 /* Send IPv4 packet without modifying udp_ipversion */ 6364 /* Extract port and ipaddr */ 6365 port = sin6->sin6_port; 6366 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6367 srcid = sin6->__sin6_src_id; 6368 break; 6369 6370 case AF_INET: 6371 sin = (sin_t *)addr; 6372 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6373 sin->sin_family != AF_INET) { 6374 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6375 "udp_wput_end: q %p (%S)", q, "badaddr"); 6376 error = EADDRNOTAVAIL; 6377 goto ud_error; 6378 } 6379 /* Extract port and ipaddr */ 6380 port = sin->sin_port; 6381 v4dst = sin->sin_addr.s_addr; 6382 srcid = 0; 6383 break; 6384 } 6385 6386 /* 6387 * If options passed in, feed it for verification and handling 6388 */ 6389 if (optlen != 0) { 6390 ASSERT(DB_TYPE(mp) != M_DATA); 6391 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6392 /* failure */ 6393 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6394 "udp_wput_end: q %p (%S)", q, 6395 "udp_unitdata_opt_process"); 6396 goto ud_error; 6397 } 6398 /* 6399 * Note: success in processing options. 6400 * mp option buffer represented by 6401 * OPT_length/offset now potentially modified 6402 * and contain option setting results 6403 */ 6404 } 6405 ASSERT(error == 0); 6406 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6407 if (error != 0) { 6408 ud_error: 6409 UDP_STAT(udp_out_err_output); 6410 ASSERT(mp != NULL); 6411 /* mp is freed by the following routine */ 6412 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6413 (t_scalar_t)error); 6414 } 6415 } 6416 6417 /* ARGSUSED */ 6418 static void 6419 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6420 { 6421 udp_output((conn_t *)arg, mp, NULL, 0); 6422 _UDP_EXIT((conn_t *)arg); 6423 } 6424 6425 static void 6426 udp_wput(queue_t *q, mblk_t *mp) 6427 { 6428 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6429 SQTAG_UDP_WPUT); 6430 } 6431 6432 /* 6433 * Allocate and prepare a T_UNITDATA_REQ message. 6434 */ 6435 static mblk_t * 6436 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6437 { 6438 struct T_unitdata_req *tudr; 6439 mblk_t *mp; 6440 6441 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6442 if (mp != NULL) { 6443 mp->b_wptr += sizeof (*tudr) + addrlen; 6444 DB_TYPE(mp) = M_PROTO; 6445 6446 tudr = (struct T_unitdata_req *)mp->b_rptr; 6447 tudr->PRIM_type = T_UNITDATA_REQ; 6448 tudr->DEST_length = addrlen; 6449 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6450 tudr->OPT_length = 0; 6451 tudr->OPT_offset = 0; 6452 bcopy(addr, tudr+1, addrlen); 6453 } 6454 return (mp); 6455 } 6456 6457 /* 6458 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6459 * is valid when we are directly beneath the stream head, and thus sockfs 6460 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6461 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6462 * this is done for both connected and non-connected endpoint. 6463 */ 6464 void 6465 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6466 { 6467 conn_t *connp; 6468 udp_t *udp; 6469 6470 q = UDP_WR(q); 6471 connp = Q_TO_CONN(q); 6472 udp = connp->conn_udp; 6473 6474 /* udpsockfs should only send down M_DATA for this entry point */ 6475 ASSERT(DB_TYPE(mp) == M_DATA); 6476 6477 mutex_enter(&connp->conn_lock); 6478 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6479 6480 if (udp->udp_mode != UDP_MT_HOT) { 6481 /* 6482 * We can't enter this conn right away because another 6483 * thread is currently executing as writer; therefore we 6484 * need to deposit the message into the squeue to be 6485 * drained later. If a socket address is present, we 6486 * need to create a T_UNITDATA_REQ message as placeholder. 6487 */ 6488 if (addr != NULL && addrlen != 0) { 6489 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6490 6491 if (tudr_mp == NULL) { 6492 mutex_exit(&connp->conn_lock); 6493 BUMP_MIB(&udp_mib, udpOutErrors); 6494 UDP_STAT(udp_out_err_tudr); 6495 freemsg(mp); 6496 return; 6497 } 6498 /* Tag the packet with T_UNITDATA_REQ */ 6499 tudr_mp->b_cont = mp; 6500 mp = tudr_mp; 6501 } 6502 mutex_exit(&connp->conn_lock); 6503 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6504 return; 6505 } 6506 6507 /* We can execute as reader right away. */ 6508 UDP_READERS_INCREF(udp); 6509 mutex_exit(&connp->conn_lock); 6510 6511 udp_output(connp, mp, addr, addrlen); 6512 6513 mutex_enter(&connp->conn_lock); 6514 UDP_MODE_ASSERTIONS(udp, UDP_EXIT); 6515 UDP_READERS_DECREF(udp); 6516 mutex_exit(&connp->conn_lock); 6517 } 6518 6519 /* 6520 * udp_output_v6(): 6521 * Assumes that udp_wput did some sanity checking on the destination 6522 * address. 6523 */ 6524 static mblk_t * 6525 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6526 int *error) 6527 { 6528 ip6_t *ip6h; 6529 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6530 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6531 mblk_t *mp2; 6532 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6533 size_t ip_len; 6534 udpha_t *udph; 6535 udp_t *udp = connp->conn_udp; 6536 queue_t *q = connp->conn_wq; 6537 ip6_pkt_t ipp_s; /* For ancillary data options */ 6538 ip6_pkt_t *ipp = &ipp_s; 6539 ip6_pkt_t *tipp; /* temporary ipp */ 6540 uint32_t csum = 0; 6541 uint_t ignore = 0; 6542 uint_t option_exists = 0, is_sticky = 0; 6543 uint8_t *cp; 6544 uint8_t *nxthdr_ptr; 6545 6546 *error = 0; 6547 6548 /* mp1 points to the M_DATA mblk carrying the packet */ 6549 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6550 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6551 6552 /* 6553 * If the local address is a mapped address return 6554 * an error. 6555 * It would be possible to send an IPv6 packet but the 6556 * response would never make it back to the application 6557 * since it is bound to a mapped address. 6558 */ 6559 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6560 *error = EADDRNOTAVAIL; 6561 goto done; 6562 } 6563 6564 ipp->ipp_fields = 0; 6565 ipp->ipp_sticky_ignored = 0; 6566 6567 /* 6568 * If TPI options passed in, feed it for verification and handling 6569 */ 6570 if (tudr_optlen != 0) { 6571 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6572 /* failure */ 6573 goto done; 6574 } 6575 ignore = ipp->ipp_sticky_ignored; 6576 ASSERT(*error == 0); 6577 } 6578 6579 if (sin6->sin6_scope_id != 0 && 6580 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6581 /* 6582 * IPPF_SCOPE_ID is special. It's neither a sticky 6583 * option nor ancillary data. It needs to be 6584 * explicitly set in options_exists. 6585 */ 6586 option_exists |= IPPF_SCOPE_ID; 6587 } 6588 6589 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6590 /* No sticky options nor ancillary data. */ 6591 goto no_options; 6592 } 6593 6594 /* 6595 * Go through the options figuring out where each is going to 6596 * come from and build two masks. The first mask indicates if 6597 * the option exists at all. The second mask indicates if the 6598 * option is sticky or ancillary. 6599 */ 6600 if (!(ignore & IPPF_HOPOPTS)) { 6601 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6602 option_exists |= IPPF_HOPOPTS; 6603 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6604 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6605 option_exists |= IPPF_HOPOPTS; 6606 is_sticky |= IPPF_HOPOPTS; 6607 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6608 } 6609 } 6610 6611 if (!(ignore & IPPF_RTHDR)) { 6612 if (ipp->ipp_fields & IPPF_RTHDR) { 6613 option_exists |= IPPF_RTHDR; 6614 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6615 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6616 option_exists |= IPPF_RTHDR; 6617 is_sticky |= IPPF_RTHDR; 6618 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6619 } 6620 } 6621 6622 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6623 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6624 option_exists |= IPPF_RTDSTOPTS; 6625 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6626 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6627 option_exists |= IPPF_RTDSTOPTS; 6628 is_sticky |= IPPF_RTDSTOPTS; 6629 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6630 } 6631 } 6632 6633 if (!(ignore & IPPF_DSTOPTS)) { 6634 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6635 option_exists |= IPPF_DSTOPTS; 6636 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6637 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6638 option_exists |= IPPF_DSTOPTS; 6639 is_sticky |= IPPF_DSTOPTS; 6640 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6641 } 6642 } 6643 6644 if (!(ignore & IPPF_IFINDEX)) { 6645 if (ipp->ipp_fields & IPPF_IFINDEX) { 6646 option_exists |= IPPF_IFINDEX; 6647 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6648 option_exists |= IPPF_IFINDEX; 6649 is_sticky |= IPPF_IFINDEX; 6650 } 6651 } 6652 6653 if (!(ignore & IPPF_ADDR)) { 6654 if (ipp->ipp_fields & IPPF_ADDR) { 6655 option_exists |= IPPF_ADDR; 6656 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6657 option_exists |= IPPF_ADDR; 6658 is_sticky |= IPPF_ADDR; 6659 } 6660 } 6661 6662 if (!(ignore & IPPF_DONTFRAG)) { 6663 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6664 option_exists |= IPPF_DONTFRAG; 6665 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6666 option_exists |= IPPF_DONTFRAG; 6667 is_sticky |= IPPF_DONTFRAG; 6668 } 6669 } 6670 6671 if (!(ignore & IPPF_USE_MIN_MTU)) { 6672 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6673 option_exists |= IPPF_USE_MIN_MTU; 6674 } else if (udp->udp_sticky_ipp.ipp_fields & 6675 IPPF_USE_MIN_MTU) { 6676 option_exists |= IPPF_USE_MIN_MTU; 6677 is_sticky |= IPPF_USE_MIN_MTU; 6678 } 6679 } 6680 6681 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6682 option_exists |= IPPF_HOPLIMIT; 6683 /* IPV6_HOPLIMIT can never be sticky */ 6684 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6685 6686 if (!(ignore & IPPF_UNICAST_HOPS) && 6687 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6688 option_exists |= IPPF_UNICAST_HOPS; 6689 is_sticky |= IPPF_UNICAST_HOPS; 6690 } 6691 6692 if (!(ignore & IPPF_MULTICAST_HOPS) && 6693 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6694 option_exists |= IPPF_MULTICAST_HOPS; 6695 is_sticky |= IPPF_MULTICAST_HOPS; 6696 } 6697 6698 if (!(ignore & IPPF_TCLASS)) { 6699 if (ipp->ipp_fields & IPPF_TCLASS) { 6700 option_exists |= IPPF_TCLASS; 6701 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6702 option_exists |= IPPF_TCLASS; 6703 is_sticky |= IPPF_TCLASS; 6704 } 6705 } 6706 6707 no_options: 6708 6709 /* 6710 * If any options carried in the ip6i_t were specified, we 6711 * need to account for the ip6i_t in the data we'll be sending 6712 * down. 6713 */ 6714 if (option_exists & IPPF_HAS_IP6I) 6715 udp_ip_hdr_len += sizeof (ip6i_t); 6716 6717 /* check/fix buffer config, setup pointers into it */ 6718 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6719 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6720 !OK_32PTR(ip6h)) { 6721 /* Try to get everything in a single mblk next time */ 6722 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6723 udp->udp_max_hdr_len = udp_ip_hdr_len; 6724 (void) mi_set_sth_wroff(UDP_RD(q), 6725 udp->udp_max_hdr_len + udp_wroff_extra); 6726 } 6727 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6728 if (mp2 == NULL) { 6729 *error = ENOMEM; 6730 goto done; 6731 } 6732 mp2->b_wptr = DB_LIM(mp2); 6733 mp2->b_cont = mp1; 6734 mp1 = mp2; 6735 if (DB_TYPE(mp) != M_DATA) 6736 mp->b_cont = mp1; 6737 else 6738 mp = mp1; 6739 6740 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6741 } 6742 mp1->b_rptr = (unsigned char *)ip6h; 6743 ip6i = (ip6i_t *)ip6h; 6744 6745 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6746 if (option_exists & IPPF_HAS_IP6I) { 6747 ip6h = (ip6_t *)&ip6i[1]; 6748 ip6i->ip6i_flags = 0; 6749 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6750 6751 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6752 if (option_exists & IPPF_SCOPE_ID) { 6753 ip6i->ip6i_flags |= IP6I_IFINDEX; 6754 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6755 } else if (option_exists & IPPF_IFINDEX) { 6756 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6757 ASSERT(tipp->ipp_ifindex != 0); 6758 ip6i->ip6i_flags |= IP6I_IFINDEX; 6759 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6760 } 6761 6762 if (option_exists & IPPF_ADDR) { 6763 /* 6764 * Enable per-packet source address verification if 6765 * IPV6_PKTINFO specified the source address. 6766 * ip6_src is set in the transport's _wput function. 6767 */ 6768 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6769 } 6770 6771 if (option_exists & IPPF_DONTFRAG) { 6772 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6773 } 6774 6775 if (option_exists & IPPF_USE_MIN_MTU) { 6776 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6777 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6778 } 6779 6780 if (option_exists & IPPF_NEXTHOP) { 6781 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6782 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6783 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6784 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6785 } 6786 6787 /* 6788 * tell IP this is an ip6i_t private header 6789 */ 6790 ip6i->ip6i_nxt = IPPROTO_RAW; 6791 } 6792 6793 /* Initialize IPv6 header */ 6794 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6795 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6796 6797 /* Set the hoplimit of the outgoing packet. */ 6798 if (option_exists & IPPF_HOPLIMIT) { 6799 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6800 ip6h->ip6_hops = ipp->ipp_hoplimit; 6801 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6802 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6803 ip6h->ip6_hops = udp->udp_multicast_ttl; 6804 if (option_exists & IPPF_MULTICAST_HOPS) 6805 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6806 } else { 6807 ip6h->ip6_hops = udp->udp_ttl; 6808 if (option_exists & IPPF_UNICAST_HOPS) 6809 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6810 } 6811 6812 if (option_exists & IPPF_ADDR) { 6813 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6814 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6815 ip6h->ip6_src = tipp->ipp_addr; 6816 } else { 6817 /* 6818 * The source address was not set using IPV6_PKTINFO. 6819 * First look at the bound source. 6820 * If unspecified fallback to __sin6_src_id. 6821 */ 6822 ip6h->ip6_src = udp->udp_v6src; 6823 if (sin6->__sin6_src_id != 0 && 6824 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6825 ip_srcid_find_id(sin6->__sin6_src_id, 6826 &ip6h->ip6_src, connp->conn_zoneid); 6827 } 6828 } 6829 6830 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6831 cp = (uint8_t *)&ip6h[1]; 6832 6833 /* 6834 * Here's where we have to start stringing together 6835 * any extension headers in the right order: 6836 * Hop-by-hop, destination, routing, and final destination opts. 6837 */ 6838 if (option_exists & IPPF_HOPOPTS) { 6839 /* Hop-by-hop options */ 6840 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6841 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6842 6843 *nxthdr_ptr = IPPROTO_HOPOPTS; 6844 nxthdr_ptr = &hbh->ip6h_nxt; 6845 6846 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6847 cp += tipp->ipp_hopoptslen; 6848 } 6849 /* 6850 * En-route destination options 6851 * Only do them if there's a routing header as well 6852 */ 6853 if (option_exists & IPPF_RTDSTOPTS) { 6854 ip6_dest_t *dst = (ip6_dest_t *)cp; 6855 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6856 6857 *nxthdr_ptr = IPPROTO_DSTOPTS; 6858 nxthdr_ptr = &dst->ip6d_nxt; 6859 6860 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6861 cp += tipp->ipp_rtdstoptslen; 6862 } 6863 /* 6864 * Routing header next 6865 */ 6866 if (option_exists & IPPF_RTHDR) { 6867 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6868 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6869 6870 *nxthdr_ptr = IPPROTO_ROUTING; 6871 nxthdr_ptr = &rt->ip6r_nxt; 6872 6873 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6874 cp += tipp->ipp_rthdrlen; 6875 } 6876 /* 6877 * Do ultimate destination options 6878 */ 6879 if (option_exists & IPPF_DSTOPTS) { 6880 ip6_dest_t *dest = (ip6_dest_t *)cp; 6881 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6882 6883 *nxthdr_ptr = IPPROTO_DSTOPTS; 6884 nxthdr_ptr = &dest->ip6d_nxt; 6885 6886 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6887 cp += tipp->ipp_dstoptslen; 6888 } 6889 /* 6890 * Now set the last header pointer to the proto passed in 6891 */ 6892 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6893 *nxthdr_ptr = IPPROTO_UDP; 6894 6895 /* Update UDP header */ 6896 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6897 udph->uha_dst_port = sin6->sin6_port; 6898 udph->uha_src_port = udp->udp_port; 6899 6900 /* 6901 * Copy in the destination address 6902 */ 6903 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6904 ip6h->ip6_dst = ipv6_loopback; 6905 else 6906 ip6h->ip6_dst = sin6->sin6_addr; 6907 6908 ip6h->ip6_vcf = 6909 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6910 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6911 6912 if (option_exists & IPPF_TCLASS) { 6913 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6914 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6915 tipp->ipp_tclass); 6916 } 6917 6918 if (option_exists & IPPF_RTHDR) { 6919 ip6_rthdr_t *rth; 6920 6921 /* 6922 * Perform any processing needed for source routing. 6923 * We know that all extension headers will be in the same mblk 6924 * as the IPv6 header. 6925 */ 6926 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6927 if (rth != NULL && rth->ip6r_segleft != 0) { 6928 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6929 /* 6930 * Drop packet - only support Type 0 routing. 6931 * Notify the application as well. 6932 */ 6933 *error = EPROTO; 6934 goto done; 6935 } 6936 6937 /* 6938 * rth->ip6r_len is twice the number of 6939 * addresses in the header. Thus it must be even. 6940 */ 6941 if (rth->ip6r_len & 0x1) { 6942 *error = EPROTO; 6943 goto done; 6944 } 6945 /* 6946 * Shuffle the routing header and ip6_dst 6947 * addresses, and get the checksum difference 6948 * between the first hop (in ip6_dst) and 6949 * the destination (in the last routing hdr entry). 6950 */ 6951 csum = ip_massage_options_v6(ip6h, rth); 6952 /* 6953 * Verify that the first hop isn't a mapped address. 6954 * Routers along the path need to do this verification 6955 * for subsequent hops. 6956 */ 6957 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6958 *error = EADDRNOTAVAIL; 6959 goto done; 6960 } 6961 6962 cp += (rth->ip6r_len + 1)*8; 6963 } 6964 } 6965 6966 /* count up length of UDP packet */ 6967 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6968 if ((mp2 = mp1->b_cont) != NULL) { 6969 do { 6970 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6971 ip_len += (uint32_t)MBLKL(mp2); 6972 } while ((mp2 = mp2->b_cont) != NULL); 6973 } 6974 6975 /* 6976 * If the size of the packet is greater than the maximum allowed by 6977 * ip, return an error. Passing this down could cause panics because 6978 * the size will have wrapped and be inconsistent with the msg size. 6979 */ 6980 if (ip_len > IP_MAXPACKET) { 6981 *error = EMSGSIZE; 6982 goto done; 6983 } 6984 6985 /* Store the UDP length. Subtract length of extension hdrs */ 6986 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6987 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6988 6989 /* 6990 * We make it easy for IP to include our pseudo header 6991 * by putting our length in uh_checksum, modified (if 6992 * we have a routing header) by the checksum difference 6993 * between the ultimate destination and first hop addresses. 6994 * Note: UDP over IPv6 must always checksum the packet. 6995 */ 6996 csum += udph->uha_length; 6997 csum = (csum & 0xFFFF) + (csum >> 16); 6998 udph->uha_checksum = (uint16_t)csum; 6999 7000 #ifdef _LITTLE_ENDIAN 7001 ip_len = htons(ip_len); 7002 #endif 7003 ip6h->ip6_plen = ip_len; 7004 7005 if (DB_TYPE(mp) != M_DATA) { 7006 ASSERT(mp != mp1); 7007 freeb(mp); 7008 } 7009 7010 /* mp has been consumed and we'll return success */ 7011 ASSERT(*error == 0); 7012 mp = NULL; 7013 7014 /* We're done. Pass the packet to IP */ 7015 BUMP_MIB(&udp_mib, udpOutDatagrams); 7016 ip_output_v6(connp, mp1, q, IP_WPUT); 7017 7018 done: 7019 if (*error != 0) { 7020 ASSERT(mp != NULL); 7021 BUMP_MIB(&udp_mib, udpOutErrors); 7022 } 7023 return (mp); 7024 } 7025 7026 static void 7027 udp_wput_other(queue_t *q, mblk_t *mp) 7028 { 7029 uchar_t *rptr = mp->b_rptr; 7030 struct datab *db; 7031 struct iocblk *iocp; 7032 cred_t *cr; 7033 conn_t *connp = Q_TO_CONN(q); 7034 udp_t *udp = connp->conn_udp; 7035 7036 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7037 "udp_wput_other_start: q %p", q); 7038 7039 db = mp->b_datap; 7040 7041 cr = DB_CREDDEF(mp, connp->conn_cred); 7042 7043 switch (db->db_type) { 7044 case M_PROTO: 7045 case M_PCPROTO: 7046 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7047 freemsg(mp); 7048 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7049 "udp_wput_other_end: q %p (%S)", 7050 q, "protoshort"); 7051 return; 7052 } 7053 switch (((t_primp_t)rptr)->type) { 7054 case T_ADDR_REQ: 7055 udp_addr_req(q, mp); 7056 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7057 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7058 return; 7059 case O_T_BIND_REQ: 7060 case T_BIND_REQ: 7061 udp_bind(q, mp); 7062 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7063 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7064 return; 7065 case T_CONN_REQ: 7066 udp_connect(q, mp); 7067 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7068 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7069 return; 7070 case T_CAPABILITY_REQ: 7071 udp_capability_req(q, mp); 7072 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7073 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7074 return; 7075 case T_INFO_REQ: 7076 udp_info_req(q, mp); 7077 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7078 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7079 return; 7080 case T_UNITDATA_REQ: 7081 /* 7082 * If a T_UNITDATA_REQ gets here, the address must 7083 * be bad. Valid T_UNITDATA_REQs are handled 7084 * in udp_wput. 7085 */ 7086 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7087 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7088 "udp_wput_other_end: q %p (%S)", 7089 q, "unitdatareq"); 7090 return; 7091 case T_UNBIND_REQ: 7092 udp_unbind(q, mp); 7093 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7094 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7095 return; 7096 case T_SVR4_OPTMGMT_REQ: 7097 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7098 /* 7099 * Use upper queue for option processing in 7100 * case the request is not handled at this 7101 * level and needs to be passed down to IP. 7102 */ 7103 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7104 mp, cr, &udp_opt_obj); 7105 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7106 "udp_wput_other_end: q %p (%S)", 7107 q, "optmgmtreq"); 7108 return; 7109 7110 case T_OPTMGMT_REQ: 7111 /* 7112 * Use upper queue for option processing in 7113 * case the request is not handled at this 7114 * level and needs to be passed down to IP. 7115 */ 7116 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7117 mp, cr, &udp_opt_obj); 7118 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7119 "udp_wput_other_end: q %p (%S)", 7120 q, "optmgmtreq"); 7121 return; 7122 7123 case T_DISCON_REQ: 7124 udp_disconnect(q, mp); 7125 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7126 "udp_wput_other_end: q %p (%S)", 7127 q, "disconreq"); 7128 return; 7129 7130 /* The following TPI message is not supported by udp. */ 7131 case O_T_CONN_RES: 7132 case T_CONN_RES: 7133 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7134 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7135 "udp_wput_other_end: q %p (%S)", 7136 q, "connres/disconreq"); 7137 return; 7138 7139 /* The following 3 TPI messages are illegal for udp. */ 7140 case T_DATA_REQ: 7141 case T_EXDATA_REQ: 7142 case T_ORDREL_REQ: 7143 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7144 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7145 "udp_wput_other_end: q %p (%S)", 7146 q, "data/exdata/ordrel"); 7147 return; 7148 default: 7149 break; 7150 } 7151 break; 7152 case M_FLUSH: 7153 if (*rptr & FLUSHW) 7154 flushq(q, FLUSHDATA); 7155 break; 7156 case M_IOCTL: 7157 iocp = (struct iocblk *)mp->b_rptr; 7158 switch (iocp->ioc_cmd) { 7159 case TI_GETPEERNAME: 7160 if (udp->udp_state != TS_DATA_XFER) { 7161 /* 7162 * If a default destination address has not 7163 * been associated with the stream, then we 7164 * don't know the peer's name. 7165 */ 7166 iocp->ioc_error = ENOTCONN; 7167 iocp->ioc_count = 0; 7168 mp->b_datap->db_type = M_IOCACK; 7169 putnext(UDP_RD(q), mp); 7170 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7171 "udp_wput_other_end: q %p (%S)", 7172 q, "getpeername"); 7173 return; 7174 } 7175 /* FALLTHRU */ 7176 case TI_GETMYNAME: { 7177 /* 7178 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7179 * need to copyin the user's strbuf structure. 7180 * Processing will continue in the M_IOCDATA case 7181 * below. 7182 */ 7183 mi_copyin(q, mp, NULL, 7184 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7185 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7186 "udp_wput_other_end: q %p (%S)", 7187 q, "getmyname"); 7188 return; 7189 } 7190 case ND_SET: 7191 /* nd_getset performs the necessary checking */ 7192 case ND_GET: 7193 if (nd_getset(q, udp_g_nd, mp)) { 7194 putnext(UDP_RD(q), mp); 7195 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7196 "udp_wput_other_end: q %p (%S)", 7197 q, "get"); 7198 return; 7199 } 7200 break; 7201 case _SIOCSOCKFALLBACK: 7202 /* 7203 * Either sockmod is about to be popped and the 7204 * socket would now be treated as a plain stream, 7205 * or a module is about to be pushed so we could 7206 * no longer use read-side synchronous stream. 7207 * Drain any queued data and disable direct sockfs 7208 * interface from now on. 7209 */ 7210 if (!udp->udp_issocket) { 7211 DB_TYPE(mp) = M_IOCNAK; 7212 iocp->ioc_error = EINVAL; 7213 } else { 7214 udp->udp_issocket = B_FALSE; 7215 if (udp->udp_direct_sockfs) { 7216 /* 7217 * Disable read-side synchronous 7218 * stream interface and drain any 7219 * queued data. 7220 */ 7221 udp_rcv_drain(UDP_RD(q), udp, 7222 B_FALSE); 7223 ASSERT(!udp->udp_direct_sockfs); 7224 UDP_STAT(udp_sock_fallback); 7225 } 7226 DB_TYPE(mp) = M_IOCACK; 7227 iocp->ioc_error = 0; 7228 } 7229 iocp->ioc_count = 0; 7230 iocp->ioc_rval = 0; 7231 putnext(UDP_RD(q), mp); 7232 return; 7233 default: 7234 break; 7235 } 7236 break; 7237 case M_IOCDATA: 7238 udp_wput_iocdata(q, mp); 7239 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7240 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7241 return; 7242 default: 7243 /* Unrecognized messages are passed through without change. */ 7244 break; 7245 } 7246 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7247 "udp_wput_other_end: q %p (%S)", q, "end"); 7248 ip_output(connp, mp, q, IP_WPUT); 7249 } 7250 7251 /* ARGSUSED */ 7252 static void 7253 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7254 { 7255 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7256 udp_exit((conn_t *)arg); 7257 } 7258 7259 /* 7260 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7261 * messages. 7262 */ 7263 static void 7264 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7265 { 7266 mblk_t *mp1; 7267 STRUCT_HANDLE(strbuf, sb); 7268 uint16_t port; 7269 in6_addr_t v6addr; 7270 ipaddr_t v4addr; 7271 uint32_t flowinfo = 0; 7272 int addrlen; 7273 udp_t *udp = Q_TO_UDP(q); 7274 7275 /* Make sure it is one of ours. */ 7276 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7277 case TI_GETMYNAME: 7278 case TI_GETPEERNAME: 7279 break; 7280 default: 7281 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7282 return; 7283 } 7284 7285 q = WR(UDP_RD(q)); 7286 switch (mi_copy_state(q, mp, &mp1)) { 7287 case -1: 7288 return; 7289 case MI_COPY_CASE(MI_COPY_IN, 1): 7290 break; 7291 case MI_COPY_CASE(MI_COPY_OUT, 1): 7292 /* 7293 * The address has been copied out, so now 7294 * copyout the strbuf. 7295 */ 7296 mi_copyout(q, mp); 7297 return; 7298 case MI_COPY_CASE(MI_COPY_OUT, 2): 7299 /* 7300 * The address and strbuf have been copied out. 7301 * We're done, so just acknowledge the original 7302 * M_IOCTL. 7303 */ 7304 mi_copy_done(q, mp, 0); 7305 return; 7306 default: 7307 /* 7308 * Something strange has happened, so acknowledge 7309 * the original M_IOCTL with an EPROTO error. 7310 */ 7311 mi_copy_done(q, mp, EPROTO); 7312 return; 7313 } 7314 7315 /* 7316 * Now we have the strbuf structure for TI_GETMYNAME 7317 * and TI_GETPEERNAME. Next we copyout the requested 7318 * address and then we'll copyout the strbuf. 7319 */ 7320 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7321 (void *)mp1->b_rptr); 7322 if (udp->udp_family == AF_INET) 7323 addrlen = sizeof (sin_t); 7324 else 7325 addrlen = sizeof (sin6_t); 7326 7327 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7328 mi_copy_done(q, mp, EINVAL); 7329 return; 7330 } 7331 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7332 case TI_GETMYNAME: 7333 if (udp->udp_family == AF_INET) { 7334 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7335 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7336 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7337 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7338 } else { 7339 /* 7340 * INADDR_ANY 7341 * udp_v6src is not set, we might be bound to 7342 * broadcast/multicast. Use udp_bound_v6src as 7343 * local address instead (that could 7344 * also still be INADDR_ANY) 7345 */ 7346 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7347 } 7348 } else { 7349 /* udp->udp_family == AF_INET6 */ 7350 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7351 v6addr = udp->udp_v6src; 7352 } else { 7353 /* 7354 * UNSPECIFIED 7355 * udp_v6src is not set, we might be bound to 7356 * broadcast/multicast. Use udp_bound_v6src as 7357 * local address instead (that could 7358 * also still be UNSPECIFIED) 7359 */ 7360 v6addr = udp->udp_bound_v6src; 7361 } 7362 } 7363 port = udp->udp_port; 7364 break; 7365 case TI_GETPEERNAME: 7366 if (udp->udp_state != TS_DATA_XFER) { 7367 mi_copy_done(q, mp, ENOTCONN); 7368 return; 7369 } 7370 if (udp->udp_family == AF_INET) { 7371 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7372 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7373 } else { 7374 /* udp->udp_family == AF_INET6) */ 7375 v6addr = udp->udp_v6dst; 7376 flowinfo = udp->udp_flowinfo; 7377 } 7378 port = udp->udp_dstport; 7379 break; 7380 default: 7381 mi_copy_done(q, mp, EPROTO); 7382 return; 7383 } 7384 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7385 if (!mp1) 7386 return; 7387 7388 if (udp->udp_family == AF_INET) { 7389 sin_t *sin; 7390 7391 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7392 sin = (sin_t *)mp1->b_rptr; 7393 mp1->b_wptr = (uchar_t *)&sin[1]; 7394 *sin = sin_null; 7395 sin->sin_family = AF_INET; 7396 sin->sin_addr.s_addr = v4addr; 7397 sin->sin_port = port; 7398 } else { 7399 /* udp->udp_family == AF_INET6 */ 7400 sin6_t *sin6; 7401 7402 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7403 sin6 = (sin6_t *)mp1->b_rptr; 7404 mp1->b_wptr = (uchar_t *)&sin6[1]; 7405 *sin6 = sin6_null; 7406 sin6->sin6_family = AF_INET6; 7407 sin6->sin6_flowinfo = flowinfo; 7408 sin6->sin6_addr = v6addr; 7409 sin6->sin6_port = port; 7410 } 7411 /* Copy out the address */ 7412 mi_copyout(q, mp); 7413 } 7414 7415 7416 static int 7417 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7418 void *thisdg_attrs) 7419 { 7420 struct T_unitdata_req *udreqp; 7421 int is_absreq_failure; 7422 cred_t *cr; 7423 conn_t *connp = Q_TO_CONN(q); 7424 7425 ASSERT(((t_primp_t)mp->b_rptr)->type); 7426 7427 cr = DB_CREDDEF(mp, connp->conn_cred); 7428 7429 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7430 *errorp = 0; 7431 7432 /* 7433 * Use upper queue for option processing since the callback 7434 * routines expect to be called in UDP instance instead of IP. 7435 */ 7436 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7437 udreqp->OPT_offset, cr, &udp_opt_obj, 7438 thisdg_attrs, &is_absreq_failure); 7439 7440 if (*errorp != 0) { 7441 /* 7442 * Note: No special action needed in this 7443 * module for "is_absreq_failure" 7444 */ 7445 return (-1); /* failure */ 7446 } 7447 ASSERT(is_absreq_failure == 0); 7448 return (0); /* success */ 7449 } 7450 7451 void 7452 udp_ddi_init(void) 7453 { 7454 int i; 7455 7456 UDP6_MAJ = ddi_name_to_major(UDP6); 7457 7458 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7459 udp_opt_obj.odb_opt_arr_cnt); 7460 7461 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7462 /* Not a power of two. Round up to nearest power of two */ 7463 for (i = 0; i < 31; i++) { 7464 if (udp_bind_fanout_size < (1 << i)) 7465 break; 7466 } 7467 udp_bind_fanout_size = 1 << i; 7468 } 7469 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7470 sizeof (udp_fanout_t), KM_SLEEP); 7471 for (i = 0; i < udp_bind_fanout_size; i++) { 7472 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7473 NULL); 7474 } 7475 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7476 7477 udp_kstat_init(); 7478 7479 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7480 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7481 } 7482 7483 void 7484 udp_ddi_destroy(void) 7485 { 7486 int i; 7487 7488 nd_free(&udp_g_nd); 7489 7490 for (i = 0; i < udp_bind_fanout_size; i++) { 7491 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7492 } 7493 7494 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7495 sizeof (udp_fanout_t)); 7496 7497 udp_kstat_fini(); 7498 7499 kmem_cache_destroy(udp_cache); 7500 } 7501 7502 static void 7503 udp_kstat_init(void) 7504 { 7505 udp_named_kstat_t template = { 7506 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7507 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7508 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7509 { "entrySize", KSTAT_DATA_INT32, 0 }, 7510 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7511 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7512 }; 7513 7514 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7515 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7516 7517 if (udp_mibkp == NULL) 7518 return; 7519 7520 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7521 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7522 7523 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7524 7525 udp_mibkp->ks_update = udp_kstat_update; 7526 7527 kstat_install(udp_mibkp); 7528 7529 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7530 "net", KSTAT_TYPE_NAMED, 7531 sizeof (udp_statistics) / sizeof (kstat_named_t), 7532 KSTAT_FLAG_VIRTUAL)) != NULL) { 7533 udp_ksp->ks_data = &udp_statistics; 7534 kstat_install(udp_ksp); 7535 } 7536 } 7537 7538 static void 7539 udp_kstat_fini(void) 7540 { 7541 if (udp_ksp != NULL) { 7542 kstat_delete(udp_ksp); 7543 udp_ksp = NULL; 7544 } 7545 if (udp_mibkp != NULL) { 7546 kstat_delete(udp_mibkp); 7547 udp_mibkp = NULL; 7548 } 7549 } 7550 7551 static int 7552 udp_kstat_update(kstat_t *kp, int rw) 7553 { 7554 udp_named_kstat_t *udpkp; 7555 7556 if ((kp == NULL) || (kp->ks_data == NULL)) 7557 return (EIO); 7558 7559 if (rw == KSTAT_WRITE) 7560 return (EACCES); 7561 7562 udpkp = (udp_named_kstat_t *)kp->ks_data; 7563 7564 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7565 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7566 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7567 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7568 7569 return (0); 7570 } 7571 7572 /* ARGSUSED */ 7573 static void 7574 udp_rput(queue_t *q, mblk_t *mp) 7575 { 7576 /* 7577 * We get here whenever we do qreply() from IP, 7578 * i.e as part of handlings ioctls, etc. 7579 */ 7580 putnext(q, mp); 7581 } 7582 7583 /* 7584 * Read-side synchronous stream info entry point, called as a 7585 * result of handling certain STREAMS ioctl operations. 7586 */ 7587 static int 7588 udp_rinfop(queue_t *q, infod_t *dp) 7589 { 7590 mblk_t *mp; 7591 uint_t cmd = dp->d_cmd; 7592 int res = 0; 7593 int error = 0; 7594 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7595 struct stdata *stp = STREAM(q); 7596 7597 mutex_enter(&udp->udp_drain_lock); 7598 /* If shutdown on read has happened, return nothing */ 7599 mutex_enter(&stp->sd_lock); 7600 if (stp->sd_flag & STREOF) { 7601 mutex_exit(&stp->sd_lock); 7602 goto done; 7603 } 7604 mutex_exit(&stp->sd_lock); 7605 7606 if ((mp = udp->udp_rcv_list_head) == NULL) 7607 goto done; 7608 7609 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7610 7611 if (cmd & INFOD_COUNT) { 7612 /* 7613 * Return the number of messages. 7614 */ 7615 dp->d_count += udp->udp_rcv_msgcnt; 7616 res |= INFOD_COUNT; 7617 } 7618 if (cmd & INFOD_BYTES) { 7619 /* 7620 * Return size of all data messages. 7621 */ 7622 dp->d_bytes += udp->udp_rcv_cnt; 7623 res |= INFOD_BYTES; 7624 } 7625 if (cmd & INFOD_FIRSTBYTES) { 7626 /* 7627 * Return size of first data message. 7628 */ 7629 dp->d_bytes = msgdsize(mp); 7630 res |= INFOD_FIRSTBYTES; 7631 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7632 } 7633 if (cmd & INFOD_COPYOUT) { 7634 mblk_t *mp1 = mp->b_cont; 7635 int n; 7636 /* 7637 * Return data contents of first message. 7638 */ 7639 ASSERT(DB_TYPE(mp1) == M_DATA); 7640 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7641 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7642 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7643 UIO_READ, dp->d_uiop)) != 0) { 7644 goto done; 7645 } 7646 mp1 = mp1->b_cont; 7647 } 7648 res |= INFOD_COPYOUT; 7649 dp->d_cmd &= ~INFOD_COPYOUT; 7650 } 7651 done: 7652 mutex_exit(&udp->udp_drain_lock); 7653 7654 dp->d_res |= res; 7655 7656 return (error); 7657 } 7658 7659 /* 7660 * Read-side synchronous stream entry point. This is called as a result 7661 * of recv/read operation done at sockfs, and is guaranteed to execute 7662 * outside of the interrupt thread context. It returns a single datagram 7663 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7664 */ 7665 static int 7666 udp_rrw(queue_t *q, struiod_t *dp) 7667 { 7668 mblk_t *mp; 7669 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7670 7671 /* We should never get here when we're in SNMP mode */ 7672 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7673 7674 /* 7675 * Dequeue datagram from the head of the list and return 7676 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7677 * set/cleared depending on whether or not there's data 7678 * remaining in the list. 7679 */ 7680 mutex_enter(&udp->udp_drain_lock); 7681 if (!udp->udp_direct_sockfs) { 7682 mutex_exit(&udp->udp_drain_lock); 7683 UDP_STAT(udp_rrw_busy); 7684 return (EBUSY); 7685 } 7686 if ((mp = udp->udp_rcv_list_head) != NULL) { 7687 uint_t size = msgdsize(mp); 7688 7689 /* Last datagram in the list? */ 7690 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7691 udp->udp_rcv_list_tail = NULL; 7692 mp->b_next = NULL; 7693 7694 udp->udp_rcv_cnt -= size; 7695 udp->udp_rcv_msgcnt--; 7696 UDP_STAT(udp_rrw_msgcnt); 7697 7698 /* No longer flow-controlling? */ 7699 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7700 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7701 udp->udp_drain_qfull = B_FALSE; 7702 } 7703 if (udp->udp_rcv_list_head == NULL) { 7704 /* 7705 * Either we just dequeued the last datagram or 7706 * we get here from sockfs and have nothing to 7707 * return; in this case clear RSLEEP. 7708 */ 7709 ASSERT(udp->udp_rcv_cnt == 0); 7710 ASSERT(udp->udp_rcv_msgcnt == 0); 7711 ASSERT(udp->udp_rcv_list_tail == NULL); 7712 STR_WAKEUP_CLEAR(STREAM(q)); 7713 } else { 7714 /* 7715 * More data follows; we need udp_rrw() to be 7716 * called in future to pick up the rest. 7717 */ 7718 STR_WAKEUP_SET(STREAM(q)); 7719 } 7720 mutex_exit(&udp->udp_drain_lock); 7721 dp->d_mp = mp; 7722 return (0); 7723 } 7724 7725 /* 7726 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7727 * list; this is typically executed within the interrupt thread context 7728 * and so we do things as quickly as possible. 7729 */ 7730 static void 7731 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7732 { 7733 ASSERT(q == RD(q)); 7734 ASSERT(pkt_len == msgdsize(mp)); 7735 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7736 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7737 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7738 7739 mutex_enter(&udp->udp_drain_lock); 7740 /* 7741 * Wake up and signal the receiving app; it is okay to do this 7742 * before enqueueing the mp because we are holding the drain lock. 7743 * One of the advantages of synchronous stream is the ability for 7744 * us to find out when the application performs a read on the 7745 * socket by way of udp_rrw() entry point being called. We need 7746 * to generate SIGPOLL/SIGIO for each received data in the case 7747 * of asynchronous socket just as in the strrput() case. However, 7748 * we only wake the application up when necessary, i.e. during the 7749 * first enqueue. When udp_rrw() is called, we send up a single 7750 * datagram upstream and call STR_WAKEUP_SET() again when there 7751 * are still data remaining in our receive queue. 7752 */ 7753 if (udp->udp_rcv_list_head == NULL) { 7754 STR_WAKEUP_SET(STREAM(q)); 7755 udp->udp_rcv_list_head = mp; 7756 } else { 7757 udp->udp_rcv_list_tail->b_next = mp; 7758 } 7759 udp->udp_rcv_list_tail = mp; 7760 udp->udp_rcv_cnt += pkt_len; 7761 udp->udp_rcv_msgcnt++; 7762 7763 /* Need to flow-control? */ 7764 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7765 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7766 udp->udp_drain_qfull = B_TRUE; 7767 7768 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7769 STR_SENDSIG(STREAM(q)); 7770 mutex_exit(&udp->udp_drain_lock); 7771 } 7772 7773 /* 7774 * Drain the contents of receive list to the module upstream; we do 7775 * this during close or when we fallback to the slow mode due to 7776 * sockmod being popped or a module being pushed on top of us. 7777 */ 7778 static void 7779 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7780 { 7781 mblk_t *mp; 7782 7783 ASSERT(q == RD(q)); 7784 7785 mutex_enter(&udp->udp_drain_lock); 7786 /* 7787 * There is no race with a concurrent udp_input() sending 7788 * up packets using putnext() after we have cleared the 7789 * udp_direct_sockfs flag but before we have completed 7790 * sending up the packets in udp_rcv_list, since we are 7791 * either a writer or we have quiesced the conn. 7792 */ 7793 udp->udp_direct_sockfs = B_FALSE; 7794 mutex_exit(&udp->udp_drain_lock); 7795 7796 if (udp->udp_rcv_list_head != NULL) 7797 UDP_STAT(udp_drain); 7798 7799 /* 7800 * Send up everything via putnext(); note here that we 7801 * don't need the udp_drain_lock to protect us since 7802 * nothing can enter udp_rrw() and that we currently 7803 * have exclusive access to this udp. 7804 */ 7805 while ((mp = udp->udp_rcv_list_head) != NULL) { 7806 udp->udp_rcv_list_head = mp->b_next; 7807 mp->b_next = NULL; 7808 udp->udp_rcv_cnt -= msgdsize(mp); 7809 udp->udp_rcv_msgcnt--; 7810 if (closing) { 7811 freemsg(mp); 7812 } else { 7813 putnext(q, mp); 7814 } 7815 } 7816 ASSERT(udp->udp_rcv_cnt == 0); 7817 ASSERT(udp->udp_rcv_msgcnt == 0); 7818 ASSERT(udp->udp_rcv_list_head == NULL); 7819 udp->udp_rcv_list_tail = NULL; 7820 udp->udp_drain_qfull = B_FALSE; 7821 } 7822 7823 static size_t 7824 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7825 { 7826 /* We add a bit of extra buffering */ 7827 size += size >> 1; 7828 if (size > udp_max_buf) 7829 size = udp_max_buf; 7830 7831 udp->udp_rcv_hiwat = size; 7832 return (size); 7833 } 7834 7835 /* 7836 * Little helper for IPsec's NAT-T processing. 7837 */ 7838 boolean_t 7839 udp_compute_checksum(void) 7840 { 7841 return (udp_do_checksum); 7842 } 7843