1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/pattr.h> 36 #include <sys/stropts.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/strsubr.h> 46 #include <sys/suntpi.h> 47 #include <sys/xti_inet.h> 48 #include <sys/cmn_err.h> 49 #include <sys/kmem.h> 50 #include <sys/policy.h> 51 #include <sys/ucred.h> 52 #include <sys/zone.h> 53 54 #include <sys/socket.h> 55 #include <sys/sockio.h> 56 #include <sys/vtrace.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 /* 94 * Synchronization notes: 95 * 96 * UDP uses a combination of its internal perimeter, a global lock and 97 * a set of bind hash locks to protect its data structures. Please see 98 * the note above udp_mode_assertions for details about the internal 99 * perimeter. 100 * 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure. An UDP endpoint is removed from the bind hash list only 108 * when it is being unbound or being closed. The per bucket lock also 109 * protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * 113 * Both udp and ip are merged, but the streams plumbing is kept unchanged 114 * in that udp is always pushed atop /dev/ip. This is done to preserve 115 * backwards compatibility for certain applications which rely on such 116 * plumbing geometry to do things such as issuing I_POP on the stream 117 * in order to obtain direct access to /dev/ip, etc. 118 * 119 * All UDP processings happen in the /dev/ip instance; the udp module 120 * instance does not possess any state about the endpoint, and merely 121 * acts as a dummy module whose presence is to keep the streams plumbing 122 * appearance unchanged. At open time /dev/ip allocates a conn_t that 123 * happens to embed a udp_t. This stays dormant until the time udp is 124 * pushed, which indicates to /dev/ip that it must convert itself from 125 * an IP to a UDP endpoint. 126 * 127 * We only allow for the following plumbing cases: 128 * 129 * Normal: 130 * /dev/ip is first opened and later udp is pushed directly on top. 131 * This is the default action that happens when a udp socket or 132 * /dev/udp is opened. The conn_t created by /dev/ip instance is 133 * now shared and is marked with IPCL_UDP. 134 * 135 * SNMP-only: 136 * udp is pushed on top of a module other than /dev/ip. When this 137 * happens it will support only SNMP semantics. A new conn_t is 138 * allocated and marked with IPCL_UDPMOD. 139 * 140 * The above cases imply that we don't support any intermediate module to 141 * reside in between /dev/ip and udp -- in fact, we never supported such 142 * scenario in the past as the inter-layer communication semantics have 143 * always been private. Also note that the normal case allows for SNMP 144 * requests to be processed in addition to the rest of UDP operations. 145 * 146 * The normal case plumbing is depicted by the following diagram: 147 * 148 * +---------------+---------------+ 149 * | | | udp 150 * | udp_wq | udp_rq | 151 * | | UDP_RD | 152 * | | | 153 * +---------------+---------------+ 154 * | ^ 155 * v | 156 * +---------------+---------------+ 157 * | | | /dev/ip 158 * | ip_wq | ip_rq | conn_t 159 * | UDP_WR | | 160 * | | | 161 * +---------------+---------------+ 162 * 163 * Messages arriving at udp_wq from above will end up in ip_wq before 164 * it gets processed, i.e. udp write entry points will advance udp_wq 165 * and use its q_next value as ip_wq in order to use the conn_t that 166 * is stored in its q_ptr. Likewise, messages generated by ip to the 167 * module above udp will appear as if they are originated from udp_rq, 168 * i.e. putnext() calls to the module above udp is done using the 169 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 170 * nothing more than calling putnext(). 171 * 172 * The above implies the following rule of thumb: 173 * 174 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 175 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 176 * is no direct reference to conn_t from either udp_wq or udp_rq. 177 * 178 * 2. Write-side entry points of udp can obtain the conn_t via the 179 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 180 * 181 * 3. While in /dev/ip context, putnext() to the module above udp can 182 * be done by supplying the queue value obtained from UDP_RD(). 183 * 184 */ 185 186 static queue_t *UDP_WR(queue_t *); 187 static queue_t *UDP_RD(queue_t *); 188 189 udp_stat_t udp_statistics = { 190 { "udp_ip_send", KSTAT_DATA_UINT64 }, 191 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 192 { "udp_ire_null", KSTAT_DATA_UINT64 }, 193 { "udp_drain", KSTAT_DATA_UINT64 }, 194 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 196 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 198 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 199 { "udp_out_opt", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 202 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 203 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 217 #ifdef DEBUG 218 { "udp_data_conn", KSTAT_DATA_UINT64 }, 219 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 220 #endif 221 }; 222 223 static kstat_t *udp_ksp; 224 struct kmem_cache *udp_cache; 225 226 /* 227 * Bind hash list size and hash function. It has to be a power of 2 for 228 * hashing. 229 */ 230 #define UDP_BIND_FANOUT_SIZE 512 231 #define UDP_BIND_HASH(lport) \ 232 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 233 234 /* UDP bind fanout hash structure. */ 235 typedef struct udp_fanout_s { 236 udp_t *uf_udp; 237 kmutex_t uf_lock; 238 #if defined(_LP64) || defined(_I32LPx) 239 char uf_pad[48]; 240 #else 241 char uf_pad[56]; 242 #endif 243 } udp_fanout_t; 244 245 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 246 /* udp_fanout_t *udp_bind_fanout. */ 247 static udp_fanout_t *udp_bind_fanout; 248 249 /* 250 * This controls the rate some ndd info report functions can be used 251 * by non-priviledged users. It stores the last time such info is 252 * requested. When those report functions are called again, this 253 * is checked with the current time and compare with the ndd param 254 * udp_ndd_get_info_interval. 255 */ 256 static clock_t udp_last_ndd_get_info_time; 257 #define NDD_TOO_QUICK_MSG \ 258 "ndd get info rate too high for non-priviledged users, try again " \ 259 "later.\n" 260 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 261 262 static void udp_addr_req(queue_t *q, mblk_t *mp); 263 static void udp_bind(queue_t *q, mblk_t *mp); 264 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 265 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 266 static int udp_build_hdrs(queue_t *q, udp_t *udp); 267 static void udp_capability_req(queue_t *q, mblk_t *mp); 268 static int udp_close(queue_t *q); 269 static void udp_connect(queue_t *q, mblk_t *mp); 270 static void udp_disconnect(queue_t *q, mblk_t *mp); 271 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 272 int sys_error); 273 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 274 t_scalar_t tlierr, int unixerr); 275 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 276 cred_t *cr); 277 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 278 char *value, caddr_t cp, cred_t *cr); 279 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 280 char *value, caddr_t cp, cred_t *cr); 281 static void udp_icmp_error(queue_t *q, mblk_t *mp); 282 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 283 static void udp_info_req(queue_t *q, mblk_t *mp); 284 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 285 t_scalar_t addr_length); 286 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 287 cred_t *credp); 288 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 289 int *errorp, void *thisdg_attrs); 290 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 291 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 292 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 293 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 294 cred_t *cr); 295 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 296 uchar_t **optbufp, uint_t *optlenp); 297 static void udp_report_item(mblk_t *mp, udp_t *udp); 298 static void udp_rput(queue_t *q, mblk_t *mp); 299 static void udp_rput_other(queue_t *, mblk_t *); 300 static int udp_rinfop(queue_t *q, infod_t *dp); 301 static int udp_rrw(queue_t *q, struiod_t *dp); 302 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 303 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 304 cred_t *cr); 305 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 306 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 307 t_scalar_t destlen, t_scalar_t err); 308 static void udp_unbind(queue_t *q, mblk_t *mp); 309 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 310 static void udp_wput(queue_t *q, mblk_t *mp); 311 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 312 uint16_t port, uint_t srcid, int *error); 313 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 314 t_scalar_t tudr_optlen, int *error); 315 static void udp_wput_other(queue_t *q, mblk_t *mp); 316 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 317 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 318 socklen_t addrlen); 319 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 320 321 static void udp_kstat_init(void); 322 static void udp_kstat_fini(void); 323 static int udp_kstat_update(kstat_t *kp, int rw); 324 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 325 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 327 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 328 329 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 330 uint_t pkt_len); 331 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 332 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 333 static void udp_exit(conn_t *); 334 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 335 #ifdef DEBUG 336 static void udp_mode_assertions(udp_t *, int); 337 #endif /* DEBUG */ 338 339 major_t UDP6_MAJ; 340 #define UDP6 "udp6" 341 342 #define UDP_RECV_HIWATER (56 * 1024) 343 #define UDP_RECV_LOWATER 128 344 #define UDP_XMIT_HIWATER (56 * 1024) 345 #define UDP_XMIT_LOWATER 1024 346 347 static struct module_info udp_info = { 348 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 349 }; 350 351 static struct qinit udp_rinit = { 352 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 353 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 354 }; 355 356 static struct qinit udp_winit = { 357 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 358 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 359 }; 360 361 static struct qinit winit = { 362 (pfi_t)putnext, NULL, NULL, NULL, NULL, 363 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 364 }; 365 366 /* Support for just SNMP if UDP is not pushed directly over device IP */ 367 struct qinit udp_snmp_rinit = { 368 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 369 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 370 }; 371 372 struct qinit udp_snmp_winit = { 373 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 374 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 375 }; 376 377 struct streamtab udpinfo = { 378 &udp_rinit, &winit 379 }; 380 381 static sin_t sin_null; /* Zero address for quick clears */ 382 static sin6_t sin6_null; /* Zero address for quick clears */ 383 384 /* Hint not protected by any lock */ 385 static in_port_t udp_g_next_port_to_try; 386 387 /* 388 * Extra privileged ports. In host byte order. 389 */ 390 #define UDP_NUM_EPRIV_PORTS 64 391 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 392 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 393 394 /* Only modified during _init and _fini thus no locking is needed. */ 395 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 396 397 /* MIB-2 stuff for SNMP */ 398 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 399 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 400 401 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 402 403 /* Default structure copied into T_INFO_ACK messages */ 404 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 405 T_INFO_ACK, 406 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 407 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 408 T_INVALID, /* CDATA_size. udp does not support connect data. */ 409 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 410 sizeof (sin_t), /* ADDR_size. */ 411 0, /* OPT_size - not initialized here */ 412 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 413 T_CLTS, /* SERV_type. udp supports connection-less. */ 414 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 415 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 416 }; 417 418 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 419 420 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 421 T_INFO_ACK, 422 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 423 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 424 T_INVALID, /* CDATA_size. udp does not support connect data. */ 425 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 426 sizeof (sin6_t), /* ADDR_size. */ 427 0, /* OPT_size - not initialized here */ 428 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 429 T_CLTS, /* SERV_type. udp supports connection-less. */ 430 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 431 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 432 }; 433 434 /* largest UDP port number */ 435 #define UDP_MAX_PORT 65535 436 437 /* 438 * Table of ND variables supported by udp. These are loaded into udp_g_nd 439 * in udp_open. 440 * All of these are alterable, within the min/max values given, at run time. 441 */ 442 /* BEGIN CSTYLED */ 443 udpparam_t udp_param_arr[] = { 444 /*min max value name */ 445 { 0L, 256, 32, "udp_wroff_extra" }, 446 { 1L, 255, 255, "udp_ipv4_ttl" }, 447 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 448 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 449 { 0, 1, 1, "udp_do_checksum" }, 450 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 451 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 452 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 453 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 454 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 455 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 456 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 457 }; 458 /* END CSTYLED */ 459 460 /* 461 * The smallest anonymous port in the priviledged port range which UDP 462 * looks for free port. Use in the option UDP_ANONPRIVBIND. 463 */ 464 static in_port_t udp_min_anonpriv_port = 512; 465 466 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 467 uint32_t udp_random_anon_port = 1; 468 469 /* 470 * Hook functions to enable cluster networking. 471 * On non-clustered systems these vectors must always be NULL 472 */ 473 474 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 475 uint8_t *laddrp, in_port_t lport) = NULL; 476 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 477 uint8_t *laddrp, in_port_t lport) = NULL; 478 479 typedef union T_primitives *t_primp_t; 480 481 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 482 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 483 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 484 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 485 (mp)->b_prev = (mblk_t *)proc; \ 486 if ((udp)->udp_mphead == NULL) \ 487 (udp)->udp_mphead = (mp); \ 488 else \ 489 (udp)->udp_mptail->b_next = (mp); \ 490 (udp)->udp_mptail = (mp); \ 491 (udp)->udp_mpcount++; \ 492 } 493 494 #define UDP_READERS_INCREF(udp) { \ 495 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 496 (udp)->udp_reader_count++; \ 497 } 498 499 #define UDP_READERS_DECREF(udp) { \ 500 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 501 (udp)->udp_reader_count--; \ 502 if ((udp)->udp_reader_count == 0) \ 503 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 504 } 505 506 #define UDP_SQUEUE_DECREF(udp) { \ 507 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 508 (udp)->udp_squeue_count--; \ 509 if ((udp)->udp_squeue_count == 0) \ 510 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 511 } 512 513 /* 514 * Notes on UDP endpoint synchronization: 515 * 516 * UDP needs exclusive operation on a per endpoint basis, when executing 517 * functions that modify the endpoint state. udp_rput_other() deals with 518 * packets with IP options, and processing these packets end up having 519 * to update the endpoint's option related state. udp_wput_other() deals 520 * with control operations from the top, e.g. connect() that needs to 521 * update the endpoint state. These could be synchronized using locks, 522 * but the current version uses squeues for this purpose. squeues may 523 * give performance improvement for certain cases such as connected UDP 524 * sockets; thus the framework allows for using squeues. 525 * 526 * The perimeter routines are described as follows: 527 * 528 * udp_enter(): 529 * Enter the UDP endpoint perimeter. 530 * 531 * udp_become_writer(): 532 * Become exclusive on the UDP endpoint. Specifies a function 533 * that will be called exclusively either immediately or later 534 * when the perimeter is available exclusively. 535 * 536 * udp_exit(): 537 * Exit the UDP perimeter. 538 * 539 * Entering UDP from the top or from the bottom must be done using 540 * udp_enter(). No lock must be held while attempting to enter the UDP 541 * perimeter. When finished, udp_exit() must be called to get out of 542 * the perimeter. 543 * 544 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 545 * multiple threads may enter a UDP endpoint concurrently. This is used 546 * for sending and/or receiving normal data. Control operations and other 547 * special cases call udp_become_writer() to become exclusive on a per 548 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 549 * by definition serializes access to the conn_t. When there are no more 550 * pending messages on the squeue for the UDP connection, the endpoint 551 * reverts to MT_HOT mode. During the interregnum when not all MT threads 552 * of an endpoint have finished, messages are queued in the UDP endpoint 553 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 554 * 555 * These modes have the following analogs: 556 * 557 * UDP_MT_HOT/udp_reader_count==0 none 558 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 559 * UDP_MT_QUEUED RW_WRITE_WANTED 560 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 561 * 562 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 563 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 564 * 565 * While in stable modes, UDP keeps track of the number of threads 566 * operating on the endpoint. The udp_reader_count variable represents 567 * the number of threads entering the endpoint as readers while it is 568 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 569 * is only a single reader, i.e. when this counter drops to 1. Likewise, 570 * udp_squeue_count represents the number of threads operating on the 571 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 572 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 573 * when this counter drops to 0. 574 * 575 * The default mode is set to UDP_MT_HOT and UDP alternates between 576 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 577 * 578 * Mode transition: 579 * ---------------------------------------------------------------- 580 * old mode Event New mode 581 * ---------------------------------------------------------------- 582 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 583 * and udp_reader_count == 1 584 * 585 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 586 * and udp_reader_count > 1 587 * 588 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 589 * 590 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 591 * internal UDP queue successfully 592 * moved to squeue AND udp_squeue_count != 0 593 * 594 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 595 * internal UDP queue successfully 596 * moved to squeue AND udp_squeue_count 597 * drops to zero 598 * 599 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 600 * ---------------------------------------------------------------- 601 */ 602 603 static queue_t * 604 UDP_WR(queue_t *q) 605 { 606 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 607 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 608 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 609 610 return (_WR(q)->q_next); 611 } 612 613 static queue_t * 614 UDP_RD(queue_t *q) 615 { 616 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 617 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 618 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 619 620 return (_RD(q)->q_next); 621 } 622 623 #ifdef DEBUG 624 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 625 #else 626 #define UDP_MODE_ASSERTIONS(udp, caller) 627 #endif 628 629 /* Invariants */ 630 #ifdef DEBUG 631 632 uint32_t udp_count[4]; 633 634 /* Context of udp_mode_assertions */ 635 #define UDP_ENTER 1 636 #define UDP_BECOME_WRITER 2 637 #define UDP_EXIT 3 638 639 static void 640 udp_mode_assertions(udp_t *udp, int caller) 641 { 642 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 643 644 switch (udp->udp_mode) { 645 case UDP_MT_HOT: 646 /* 647 * Messages have not yet been enqueued on the internal queue, 648 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 649 * by definition, there can't be any messages enqueued on the 650 * squeue. The UDP could be quiescent, so udp_reader_count 651 * could be zero at entry. 652 */ 653 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 654 udp->udp_squeue_count == 0); 655 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 656 udp_count[0]++; 657 break; 658 659 case UDP_MT_QUEUED: 660 /* 661 * The last MT thread to exit the udp perimeter empties the 662 * internal queue and then switches the UDP to 663 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 664 * mode, it means there must be at least 1 MT thread still in 665 * the perimeter and at least 1 message on the internal queue. 666 */ 667 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 668 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 669 udp_count[1]++; 670 break; 671 672 case UDP_QUEUED_SQUEUE: 673 /* 674 * The switch has happened from MT to SQUEUE. So there can't 675 * any MT threads. Messages could still pile up on the internal 676 * queue until the transition is complete and we move to 677 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 678 * since the squeue could drain any time. 679 */ 680 ASSERT(udp->udp_reader_count == 0); 681 udp_count[2]++; 682 break; 683 684 case UDP_SQUEUE: 685 /* 686 * The transition is complete. Thre can't be any messages on 687 * the internal queue. The udp could be quiescent or the squeue 688 * could drain any time, so we can't assert on nonzero 689 * udp_squeue_count during entry. Nor can we assert that 690 * udp_reader_count is zero, since, a reader thread could have 691 * directly become writer in line by calling udp_become_writer 692 * without going through the queued states. 693 */ 694 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 695 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 696 udp_count[3]++; 697 break; 698 } 699 } 700 #endif 701 702 #define _UDP_ENTER(connp, mp, proc, tag) { \ 703 udp_t *_udp = (connp)->conn_udp; \ 704 \ 705 mutex_enter(&(connp)->conn_lock); \ 706 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 707 mutex_exit(&(connp)->conn_lock); \ 708 freemsg(mp); \ 709 } else { \ 710 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 711 \ 712 switch (_udp->udp_mode) { \ 713 case UDP_MT_HOT: \ 714 /* We can execute as reader right away. */ \ 715 UDP_READERS_INCREF(_udp); \ 716 mutex_exit(&(connp)->conn_lock); \ 717 (*(proc))(connp, mp, (connp)->conn_sqp); \ 718 break; \ 719 \ 720 case UDP_SQUEUE: \ 721 /* \ 722 * We are in squeue mode, send the \ 723 * packet to the squeue \ 724 */ \ 725 _udp->udp_squeue_count++; \ 726 CONN_INC_REF_LOCKED(connp); \ 727 mutex_exit(&(connp)->conn_lock); \ 728 squeue_enter((connp)->conn_sqp, mp, proc, \ 729 connp, tag); \ 730 break; \ 731 \ 732 case UDP_MT_QUEUED: \ 733 case UDP_QUEUED_SQUEUE: \ 734 /* \ 735 * Some messages may have been enqueued \ 736 * ahead of us. Enqueue the new message \ 737 * at the tail of the internal queue to \ 738 * preserve message ordering. \ 739 */ \ 740 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 741 mutex_exit(&(connp)->conn_lock); \ 742 break; \ 743 } \ 744 } \ 745 } 746 747 static void 748 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 749 { 750 _UDP_ENTER(connp, mp, proc, tag); 751 } 752 753 static void 754 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 755 { 756 udp_t *udp; 757 758 udp = connp->conn_udp; 759 760 mutex_enter(&connp->conn_lock); 761 762 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 763 764 switch (udp->udp_mode) { 765 case UDP_MT_HOT: 766 if (udp->udp_reader_count == 1) { 767 /* 768 * We are the only MT thread. Switch to squeue mode 769 * immediately. 770 */ 771 udp->udp_mode = UDP_SQUEUE; 772 udp->udp_squeue_count = 1; 773 CONN_INC_REF_LOCKED(connp); 774 mutex_exit(&connp->conn_lock); 775 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 776 return; 777 } 778 /* FALLTHRU */ 779 780 case UDP_MT_QUEUED: 781 /* Enqueue the packet internally in UDP */ 782 udp->udp_mode = UDP_MT_QUEUED; 783 UDP_ENQUEUE_MP(udp, mp, proc, tag); 784 mutex_exit(&connp->conn_lock); 785 return; 786 787 case UDP_SQUEUE: 788 case UDP_QUEUED_SQUEUE: 789 /* 790 * We are already exclusive. i.e. we are already 791 * writer. Simply call the desired function. 792 */ 793 udp->udp_squeue_count++; 794 mutex_exit(&connp->conn_lock); 795 (*proc)(connp, mp, connp->conn_sqp); 796 return; 797 } 798 } 799 800 /* 801 * Transition from MT mode to SQUEUE mode, when the last MT thread 802 * is exiting the UDP perimeter. Move all messages from the internal 803 * udp queue to the squeue. A better way would be to move all the 804 * messages in one shot, this needs more support from the squeue framework 805 */ 806 static void 807 udp_switch_to_squeue(udp_t *udp) 808 { 809 mblk_t *mp; 810 mblk_t *mp_next; 811 sqproc_t proc; 812 uint8_t tag; 813 conn_t *connp = udp->udp_connp; 814 815 ASSERT(MUTEX_HELD(&connp->conn_lock)); 816 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 817 while (udp->udp_mphead != NULL) { 818 mp = udp->udp_mphead; 819 udp->udp_mphead = NULL; 820 udp->udp_mptail = NULL; 821 udp->udp_mpcount = 0; 822 udp->udp_mode = UDP_QUEUED_SQUEUE; 823 mutex_exit(&connp->conn_lock); 824 /* 825 * It is best not to hold any locks across the calls 826 * to squeue functions. Since we drop the lock we 827 * need to go back and check the udp_mphead once again 828 * after the squeue_fill and hence the while loop at 829 * the top of this function 830 */ 831 for (; mp != NULL; mp = mp_next) { 832 mp_next = mp->b_next; 833 proc = (sqproc_t)mp->b_prev; 834 tag = (uint8_t)((uintptr_t)mp->b_queue); 835 mp->b_next = NULL; 836 mp->b_prev = NULL; 837 mp->b_queue = NULL; 838 CONN_INC_REF(connp); 839 udp->udp_squeue_count++; 840 squeue_fill(connp->conn_sqp, mp, proc, connp, 841 tag); 842 } 843 mutex_enter(&connp->conn_lock); 844 } 845 /* 846 * udp_squeue_count of zero implies that the squeue has drained 847 * even before we arrived here (i.e. after the squeue_fill above) 848 */ 849 udp->udp_mode = (udp->udp_squeue_count != 0) ? 850 UDP_SQUEUE : UDP_MT_HOT; 851 } 852 853 #define _UDP_EXIT(connp) { \ 854 udp_t *_udp = (connp)->conn_udp; \ 855 \ 856 mutex_enter(&(connp)->conn_lock); \ 857 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 858 \ 859 switch (_udp->udp_mode) { \ 860 case UDP_MT_HOT: \ 861 UDP_READERS_DECREF(_udp); \ 862 mutex_exit(&(connp)->conn_lock); \ 863 break; \ 864 \ 865 case UDP_SQUEUE: \ 866 UDP_SQUEUE_DECREF(_udp); \ 867 if (_udp->udp_squeue_count == 0) \ 868 _udp->udp_mode = UDP_MT_HOT; \ 869 mutex_exit(&(connp)->conn_lock); \ 870 break; \ 871 \ 872 case UDP_MT_QUEUED: \ 873 /* \ 874 * If this is the last MT thread, we need to \ 875 * switch to squeue mode \ 876 */ \ 877 UDP_READERS_DECREF(_udp); \ 878 if (_udp->udp_reader_count == 0) \ 879 udp_switch_to_squeue(_udp); \ 880 mutex_exit(&(connp)->conn_lock); \ 881 break; \ 882 \ 883 case UDP_QUEUED_SQUEUE: \ 884 UDP_SQUEUE_DECREF(_udp); \ 885 /* \ 886 * Even if the udp_squeue_count drops to zero, we \ 887 * don't want to change udp_mode to UDP_MT_HOT here. \ 888 * The thread in udp_switch_to_squeue will take care \ 889 * of the transition to UDP_MT_HOT, after emptying \ 890 * any more new messages that have been enqueued in \ 891 * udp_mphead. \ 892 */ \ 893 mutex_exit(&(connp)->conn_lock); \ 894 break; \ 895 } \ 896 } 897 898 static void 899 udp_exit(conn_t *connp) 900 { 901 _UDP_EXIT(connp); 902 } 903 904 /* 905 * Return the next anonymous port in the priviledged port range for 906 * bind checking. 907 */ 908 static in_port_t 909 udp_get_next_priv_port(void) 910 { 911 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 912 913 if (next_priv_port < udp_min_anonpriv_port) { 914 next_priv_port = IPPORT_RESERVED - 1; 915 } 916 return (next_priv_port--); 917 } 918 919 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 920 /* ARGSUSED */ 921 static int 922 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 923 { 924 udp_fanout_t *udpf; 925 int i; 926 zoneid_t zoneid; 927 conn_t *connp; 928 udp_t *udp; 929 930 connp = Q_TO_CONN(q); 931 udp = connp->conn_udp; 932 933 /* Refer to comments in udp_status_report(). */ 934 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 935 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 936 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 937 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 938 return (0); 939 } 940 } 941 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 942 /* The following may work even if we cannot get a large buf. */ 943 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 944 return (0); 945 } 946 947 (void) mi_mpprintf(mp, 948 "UDP " MI_COL_HDRPAD_STR 949 /* 12345678[89ABCDEF] */ 950 " zone lport src addr dest addr port state"); 951 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 952 953 zoneid = connp->conn_zoneid; 954 955 for (i = 0; i < udp_bind_fanout_size; i++) { 956 udpf = &udp_bind_fanout[i]; 957 mutex_enter(&udpf->uf_lock); 958 959 /* Print the hash index. */ 960 udp = udpf->uf_udp; 961 if (zoneid != GLOBAL_ZONEID) { 962 /* skip to first entry in this zone; might be none */ 963 while (udp != NULL && 964 udp->udp_connp->conn_zoneid != zoneid) 965 udp = udp->udp_bind_hash; 966 } 967 if (udp != NULL) { 968 uint_t print_len, buf_len; 969 970 buf_len = mp->b_cont->b_datap->db_lim - 971 mp->b_cont->b_wptr; 972 print_len = snprintf((char *)mp->b_cont->b_wptr, 973 buf_len, "%d\n", i); 974 if (print_len < buf_len) { 975 mp->b_cont->b_wptr += print_len; 976 } else { 977 mp->b_cont->b_wptr += buf_len; 978 } 979 for (; udp != NULL; udp = udp->udp_bind_hash) { 980 if (zoneid == GLOBAL_ZONEID || 981 zoneid == udp->udp_connp->conn_zoneid) 982 udp_report_item(mp->b_cont, udp); 983 } 984 } 985 mutex_exit(&udpf->uf_lock); 986 } 987 udp_last_ndd_get_info_time = ddi_get_lbolt(); 988 return (0); 989 } 990 991 /* 992 * Hash list removal routine for udp_t structures. 993 */ 994 static void 995 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 996 { 997 udp_t *udpnext; 998 kmutex_t *lockp; 999 1000 if (udp->udp_ptpbhn == NULL) 1001 return; 1002 1003 /* 1004 * Extract the lock pointer in case there are concurrent 1005 * hash_remove's for this instance. 1006 */ 1007 ASSERT(udp->udp_port != 0); 1008 if (!caller_holds_lock) { 1009 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1010 ASSERT(lockp != NULL); 1011 mutex_enter(lockp); 1012 } 1013 if (udp->udp_ptpbhn != NULL) { 1014 udpnext = udp->udp_bind_hash; 1015 if (udpnext != NULL) { 1016 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1017 udp->udp_bind_hash = NULL; 1018 } 1019 *udp->udp_ptpbhn = udpnext; 1020 udp->udp_ptpbhn = NULL; 1021 } 1022 if (!caller_holds_lock) { 1023 mutex_exit(lockp); 1024 } 1025 } 1026 1027 static void 1028 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1029 { 1030 udp_t **udpp; 1031 udp_t *udpnext; 1032 1033 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1034 if (udp->udp_ptpbhn != NULL) { 1035 udp_bind_hash_remove(udp, B_TRUE); 1036 } 1037 udpp = &uf->uf_udp; 1038 udpnext = udpp[0]; 1039 if (udpnext != NULL) { 1040 /* 1041 * If the new udp bound to the INADDR_ANY address 1042 * and the first one in the list is not bound to 1043 * INADDR_ANY we skip all entries until we find the 1044 * first one bound to INADDR_ANY. 1045 * This makes sure that applications binding to a 1046 * specific address get preference over those binding to 1047 * INADDR_ANY. 1048 */ 1049 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1050 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1051 while ((udpnext = udpp[0]) != NULL && 1052 !V6_OR_V4_INADDR_ANY( 1053 udpnext->udp_bound_v6src)) { 1054 udpp = &(udpnext->udp_bind_hash); 1055 } 1056 if (udpnext != NULL) 1057 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1058 } else { 1059 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1060 } 1061 } 1062 udp->udp_bind_hash = udpnext; 1063 udp->udp_ptpbhn = udpp; 1064 udpp[0] = udp; 1065 } 1066 1067 /* 1068 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1069 * passed to udp_wput. 1070 * It associates a port number and local address with the stream. 1071 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1072 * protocol type (IPPROTO_UDP) placed in the message following the address. 1073 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1074 * (Called as writer.) 1075 * 1076 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1077 * without setting SO_REUSEADDR. This is needed so that they 1078 * can be viewed as two independent transport protocols. 1079 * However, anonymouns ports are allocated from the same range to avoid 1080 * duplicating the udp_g_next_port_to_try. 1081 */ 1082 static void 1083 udp_bind(queue_t *q, mblk_t *mp) 1084 { 1085 sin_t *sin; 1086 sin6_t *sin6; 1087 mblk_t *mp1; 1088 in_port_t port; /* Host byte order */ 1089 in_port_t requested_port; /* Host byte order */ 1090 struct T_bind_req *tbr; 1091 int count; 1092 in6_addr_t v6src; 1093 boolean_t bind_to_req_port_only; 1094 int loopmax; 1095 udp_fanout_t *udpf; 1096 in_port_t lport; /* Network byte order */ 1097 zoneid_t zoneid; 1098 conn_t *connp; 1099 udp_t *udp; 1100 1101 connp = Q_TO_CONN(q); 1102 udp = connp->conn_udp; 1103 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1104 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1105 "udp_bind: bad req, len %u", 1106 (uint_t)(mp->b_wptr - mp->b_rptr)); 1107 udp_err_ack(q, mp, TPROTO, 0); 1108 return; 1109 } 1110 1111 if (udp->udp_state != TS_UNBND) { 1112 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1113 "udp_bind: bad state, %u", udp->udp_state); 1114 udp_err_ack(q, mp, TOUTSTATE, 0); 1115 return; 1116 } 1117 /* 1118 * Reallocate the message to make sure we have enough room for an 1119 * address and the protocol type. 1120 */ 1121 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1122 if (!mp1) { 1123 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1124 return; 1125 } 1126 1127 mp = mp1; 1128 tbr = (struct T_bind_req *)mp->b_rptr; 1129 switch (tbr->ADDR_length) { 1130 case 0: /* Request for a generic port */ 1131 tbr->ADDR_offset = sizeof (struct T_bind_req); 1132 if (udp->udp_family == AF_INET) { 1133 tbr->ADDR_length = sizeof (sin_t); 1134 sin = (sin_t *)&tbr[1]; 1135 *sin = sin_null; 1136 sin->sin_family = AF_INET; 1137 mp->b_wptr = (uchar_t *)&sin[1]; 1138 } else { 1139 ASSERT(udp->udp_family == AF_INET6); 1140 tbr->ADDR_length = sizeof (sin6_t); 1141 sin6 = (sin6_t *)&tbr[1]; 1142 *sin6 = sin6_null; 1143 sin6->sin6_family = AF_INET6; 1144 mp->b_wptr = (uchar_t *)&sin6[1]; 1145 } 1146 port = 0; 1147 break; 1148 1149 case sizeof (sin_t): /* Complete IPv4 address */ 1150 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1151 sizeof (sin_t)); 1152 if (sin == NULL || !OK_32PTR((char *)sin)) { 1153 udp_err_ack(q, mp, TSYSERR, EINVAL); 1154 return; 1155 } 1156 if (udp->udp_family != AF_INET || 1157 sin->sin_family != AF_INET) { 1158 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1159 return; 1160 } 1161 port = ntohs(sin->sin_port); 1162 break; 1163 1164 case sizeof (sin6_t): /* complete IPv6 address */ 1165 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1166 sizeof (sin6_t)); 1167 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1168 udp_err_ack(q, mp, TSYSERR, EINVAL); 1169 return; 1170 } 1171 if (udp->udp_family != AF_INET6 || 1172 sin6->sin6_family != AF_INET6) { 1173 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1174 return; 1175 } 1176 port = ntohs(sin6->sin6_port); 1177 break; 1178 1179 default: /* Invalid request */ 1180 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1181 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1182 udp_err_ack(q, mp, TBADADDR, 0); 1183 return; 1184 } 1185 1186 requested_port = port; 1187 1188 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1189 bind_to_req_port_only = B_FALSE; 1190 else /* T_BIND_REQ and requested_port != 0 */ 1191 bind_to_req_port_only = B_TRUE; 1192 1193 if (requested_port == 0) { 1194 /* 1195 * If the application passed in zero for the port number, it 1196 * doesn't care which port number we bind to. Get one in the 1197 * valid range. 1198 */ 1199 if (udp->udp_anon_priv_bind) { 1200 port = udp_get_next_priv_port(); 1201 } else { 1202 port = udp_update_next_port(udp_g_next_port_to_try, 1203 B_TRUE); 1204 } 1205 } else { 1206 /* 1207 * If the port is in the well-known privileged range, 1208 * make sure the caller was privileged. 1209 */ 1210 int i; 1211 boolean_t priv = B_FALSE; 1212 1213 if (port < udp_smallest_nonpriv_port) { 1214 priv = B_TRUE; 1215 } else { 1216 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1217 if (port == udp_g_epriv_ports[i]) { 1218 priv = B_TRUE; 1219 break; 1220 } 1221 } 1222 } 1223 1224 if (priv) { 1225 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1226 1227 if (secpolicy_net_privaddr(cr, port) != 0) { 1228 udp_err_ack(q, mp, TACCES, 0); 1229 return; 1230 } 1231 } 1232 } 1233 1234 /* 1235 * Copy the source address into our udp structure. This address 1236 * may still be zero; if so, IP will fill in the correct address 1237 * each time an outbound packet is passed to it. 1238 */ 1239 if (udp->udp_family == AF_INET) { 1240 ASSERT(sin != NULL); 1241 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1242 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1243 udp->udp_ip_snd_options_len; 1244 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1245 } else { 1246 ASSERT(sin6 != NULL); 1247 v6src = sin6->sin6_addr; 1248 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1249 udp->udp_ipversion = IPV4_VERSION; 1250 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1251 UDPH_SIZE + udp->udp_ip_snd_options_len; 1252 } else { 1253 udp->udp_ipversion = IPV6_VERSION; 1254 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1255 } 1256 } 1257 1258 /* 1259 * If udp_reuseaddr is not set, then we have to make sure that 1260 * the IP address and port number the application requested 1261 * (or we selected for the application) is not being used by 1262 * another stream. If another stream is already using the 1263 * requested IP address and port, the behavior depends on 1264 * "bind_to_req_port_only". If set the bind fails; otherwise we 1265 * search for any an unused port to bind to the the stream. 1266 * 1267 * As per the BSD semantics, as modified by the Deering multicast 1268 * changes, if udp_reuseaddr is set, then we allow multiple binds 1269 * to the same port independent of the local IP address. 1270 * 1271 * This is slightly different than in SunOS 4.X which did not 1272 * support IP multicast. Note that the change implemented by the 1273 * Deering multicast code effects all binds - not only binding 1274 * to IP multicast addresses. 1275 * 1276 * Note that when binding to port zero we ignore SO_REUSEADDR in 1277 * order to guarantee a unique port. 1278 */ 1279 1280 count = 0; 1281 if (udp->udp_anon_priv_bind) { 1282 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1283 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1284 } else { 1285 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1286 } 1287 1288 zoneid = connp->conn_zoneid; 1289 for (;;) { 1290 udp_t *udp1; 1291 boolean_t is_inaddr_any; 1292 boolean_t found_exclbind = B_FALSE; 1293 1294 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1295 /* 1296 * Walk through the list of udp streams bound to 1297 * requested port with the same IP address. 1298 */ 1299 lport = htons(port); 1300 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1301 mutex_enter(&udpf->uf_lock); 1302 for (udp1 = udpf->uf_udp; udp1 != NULL; 1303 udp1 = udp1->udp_bind_hash) { 1304 if (lport != udp1->udp_port || 1305 zoneid != udp1->udp_connp->conn_zoneid) 1306 continue; 1307 1308 /* 1309 * If UDP_EXCLBIND is set for either the bound or 1310 * binding endpoint, the semantics of bind 1311 * is changed according to the following chart. 1312 * 1313 * spec = specified address (v4 or v6) 1314 * unspec = unspecified address (v4 or v6) 1315 * A = specified addresses are different for endpoints 1316 * 1317 * bound bind to allowed? 1318 * ------------------------------------- 1319 * unspec unspec no 1320 * unspec spec no 1321 * spec unspec no 1322 * spec spec yes if A 1323 */ 1324 if (udp1->udp_exclbind || udp->udp_exclbind) { 1325 if (V6_OR_V4_INADDR_ANY( 1326 udp1->udp_bound_v6src) || 1327 is_inaddr_any || 1328 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1329 &v6src)) { 1330 found_exclbind = B_TRUE; 1331 break; 1332 } 1333 continue; 1334 } 1335 1336 /* 1337 * Check ipversion to allow IPv4 and IPv6 sockets to 1338 * have disjoint port number spaces. 1339 */ 1340 if (udp->udp_ipversion != udp1->udp_ipversion) 1341 continue; 1342 1343 /* 1344 * No difference depending on SO_REUSEADDR. 1345 * 1346 * If existing port is bound to a 1347 * non-wildcard IP address and 1348 * the requesting stream is bound to 1349 * a distinct different IP addresses 1350 * (non-wildcard, also), keep going. 1351 */ 1352 if (!is_inaddr_any && 1353 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1354 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1355 &v6src)) { 1356 continue; 1357 } 1358 break; 1359 } 1360 1361 if (!found_exclbind && 1362 (udp->udp_reuseaddr && requested_port != 0)) { 1363 break; 1364 } 1365 1366 if (udp1 == NULL) { 1367 /* 1368 * No other stream has this IP address 1369 * and port number. We can use it. 1370 */ 1371 break; 1372 } 1373 mutex_exit(&udpf->uf_lock); 1374 if (bind_to_req_port_only) { 1375 /* 1376 * We get here only when requested port 1377 * is bound (and only first of the for() 1378 * loop iteration). 1379 * 1380 * The semantics of this bind request 1381 * require it to fail so we return from 1382 * the routine (and exit the loop). 1383 * 1384 */ 1385 udp_err_ack(q, mp, TADDRBUSY, 0); 1386 return; 1387 } 1388 1389 if (udp->udp_anon_priv_bind) { 1390 port = udp_get_next_priv_port(); 1391 } else { 1392 if ((count == 0) && (requested_port != 0)) { 1393 /* 1394 * If the application wants us to find 1395 * a port, get one to start with. Set 1396 * requested_port to 0, so that we will 1397 * update udp_g_next_port_to_try below. 1398 */ 1399 port = udp_update_next_port( 1400 udp_g_next_port_to_try, B_TRUE); 1401 requested_port = 0; 1402 } else { 1403 port = udp_update_next_port(port + 1, B_FALSE); 1404 } 1405 } 1406 1407 if (++count >= loopmax) { 1408 /* 1409 * We've tried every possible port number and 1410 * there are none available, so send an error 1411 * to the user. 1412 */ 1413 udp_err_ack(q, mp, TNOADDR, 0); 1414 return; 1415 } 1416 } 1417 1418 /* 1419 * Copy the source address into our udp structure. This address 1420 * may still be zero; if so, ip will fill in the correct address 1421 * each time an outbound packet is passed to it. 1422 * If we are binding to a broadcast or multicast address udp_rput 1423 * will clear the source address when it receives the T_BIND_ACK. 1424 */ 1425 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1426 udp->udp_port = lport; 1427 /* 1428 * Now reset the the next anonymous port if the application requested 1429 * an anonymous port, or we handed out the next anonymous port. 1430 */ 1431 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1432 udp_g_next_port_to_try = port + 1; 1433 } 1434 1435 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1436 if (udp->udp_family == AF_INET) { 1437 sin->sin_port = udp->udp_port; 1438 } else { 1439 int error; 1440 1441 sin6->sin6_port = udp->udp_port; 1442 /* Rebuild the header template */ 1443 error = udp_build_hdrs(q, udp); 1444 if (error != 0) { 1445 mutex_exit(&udpf->uf_lock); 1446 udp_err_ack(q, mp, TSYSERR, error); 1447 return; 1448 } 1449 } 1450 udp->udp_state = TS_IDLE; 1451 udp_bind_hash_insert(udpf, udp); 1452 mutex_exit(&udpf->uf_lock); 1453 1454 if (cl_inet_bind) { 1455 /* 1456 * Running in cluster mode - register bind information 1457 */ 1458 if (udp->udp_ipversion == IPV4_VERSION) { 1459 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1460 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1461 (in_port_t)udp->udp_port); 1462 } else { 1463 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1464 (uint8_t *)&(udp->udp_v6src), 1465 (in_port_t)udp->udp_port); 1466 } 1467 1468 } 1469 1470 /* Pass the protocol number in the message following the address. */ 1471 *mp->b_wptr++ = IPPROTO_UDP; 1472 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1473 /* 1474 * Append a request for an IRE if udp_v6src not 1475 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1476 */ 1477 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1478 if (!mp->b_cont) { 1479 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1480 return; 1481 } 1482 mp->b_cont->b_wptr += sizeof (ire_t); 1483 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1484 } 1485 if (udp->udp_family == AF_INET6) 1486 mp = ip_bind_v6(q, mp, connp, NULL); 1487 else 1488 mp = ip_bind_v4(q, mp, connp); 1489 1490 if (mp != NULL) 1491 udp_rput_other(_RD(q), mp); 1492 else 1493 CONN_INC_REF(connp); 1494 } 1495 1496 1497 void 1498 udp_resume_bind(conn_t *connp, mblk_t *mp) 1499 { 1500 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1501 } 1502 1503 /* 1504 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1505 */ 1506 /* ARGSUSED */ 1507 static void 1508 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1509 { 1510 conn_t *connp = arg; 1511 1512 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1513 1514 udp_rput_other(connp->conn_rq, mp); 1515 1516 CONN_OPER_PENDING_DONE(connp); 1517 udp_exit(connp); 1518 } 1519 1520 /* 1521 * This routine handles each T_CONN_REQ message passed to udp. It 1522 * associates a default destination address with the stream. 1523 * 1524 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1525 * T_BIND_REQ - specifying local and remote address/port 1526 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1527 * T_OK_ACK - for the T_CONN_REQ 1528 * T_CONN_CON - to keep the TPI user happy 1529 * 1530 * The connect completes in udp_rput. 1531 * When a T_BIND_ACK is received information is extracted from the IRE 1532 * and the two appended messages are sent to the TPI user. 1533 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1534 * it to an error ack for the appropriate primitive. 1535 */ 1536 static void 1537 udp_connect(queue_t *q, mblk_t *mp) 1538 { 1539 sin6_t *sin6; 1540 sin_t *sin; 1541 struct T_conn_req *tcr; 1542 in6_addr_t v6dst; 1543 ipaddr_t v4dst; 1544 uint16_t dstport; 1545 uint32_t flowinfo; 1546 mblk_t *mp1, *mp2; 1547 udp_fanout_t *udpf; 1548 udp_t *udp, *udp1; 1549 1550 udp = Q_TO_UDP(q); 1551 1552 tcr = (struct T_conn_req *)mp->b_rptr; 1553 1554 /* A bit of sanity checking */ 1555 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1556 udp_err_ack(q, mp, TPROTO, 0); 1557 return; 1558 } 1559 /* 1560 * This UDP must have bound to a port already before doing 1561 * a connect. 1562 */ 1563 if (udp->udp_state == TS_UNBND) { 1564 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1565 "udp_connect: bad state, %u", udp->udp_state); 1566 udp_err_ack(q, mp, TOUTSTATE, 0); 1567 return; 1568 } 1569 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1570 1571 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1572 1573 if (udp->udp_state == TS_DATA_XFER) { 1574 /* Already connected - clear out state */ 1575 mutex_enter(&udpf->uf_lock); 1576 udp->udp_v6src = udp->udp_bound_v6src; 1577 udp->udp_state = TS_IDLE; 1578 mutex_exit(&udpf->uf_lock); 1579 } 1580 1581 if (tcr->OPT_length != 0) { 1582 udp_err_ack(q, mp, TBADOPT, 0); 1583 return; 1584 } 1585 1586 /* 1587 * Determine packet type based on type of address passed in 1588 * the request should contain an IPv4 or IPv6 address. 1589 * Make sure that address family matches the type of 1590 * family of the the address passed down 1591 */ 1592 switch (tcr->DEST_length) { 1593 default: 1594 udp_err_ack(q, mp, TBADADDR, 0); 1595 return; 1596 1597 case sizeof (sin_t): 1598 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1599 sizeof (sin_t)); 1600 if (sin == NULL || !OK_32PTR((char *)sin)) { 1601 udp_err_ack(q, mp, TSYSERR, EINVAL); 1602 return; 1603 } 1604 if (udp->udp_family != AF_INET || 1605 sin->sin_family != AF_INET) { 1606 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1607 return; 1608 } 1609 v4dst = sin->sin_addr.s_addr; 1610 dstport = sin->sin_port; 1611 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1612 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1613 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1614 udp->udp_ip_snd_options_len; 1615 break; 1616 1617 case sizeof (sin6_t): 1618 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1619 sizeof (sin6_t)); 1620 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1621 udp_err_ack(q, mp, TSYSERR, EINVAL); 1622 return; 1623 } 1624 if (udp->udp_family != AF_INET6 || 1625 sin6->sin6_family != AF_INET6) { 1626 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1627 return; 1628 } 1629 v6dst = sin6->sin6_addr; 1630 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1631 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1632 udp->udp_ipversion = IPV4_VERSION; 1633 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1634 UDPH_SIZE + udp->udp_ip_snd_options_len; 1635 flowinfo = 0; 1636 } else { 1637 udp->udp_ipversion = IPV6_VERSION; 1638 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1639 flowinfo = sin6->sin6_flowinfo; 1640 } 1641 dstport = sin6->sin6_port; 1642 break; 1643 } 1644 if (dstport == 0) { 1645 udp_err_ack(q, mp, TBADADDR, 0); 1646 return; 1647 } 1648 1649 /* 1650 * Create a default IP header with no IP options. 1651 */ 1652 udp->udp_dstport = dstport; 1653 if (udp->udp_ipversion == IPV4_VERSION) { 1654 /* 1655 * Interpret a zero destination to mean loopback. 1656 * Update the T_CONN_REQ (sin/sin6) since it is used to 1657 * generate the T_CONN_CON. 1658 */ 1659 if (v4dst == INADDR_ANY) { 1660 v4dst = htonl(INADDR_LOOPBACK); 1661 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1662 if (udp->udp_family == AF_INET) { 1663 sin->sin_addr.s_addr = v4dst; 1664 } else { 1665 sin6->sin6_addr = v6dst; 1666 } 1667 } 1668 udp->udp_v6dst = v6dst; 1669 udp->udp_flowinfo = 0; 1670 1671 /* 1672 * If the destination address is multicast and 1673 * an outgoing multicast interface has been set, 1674 * use the address of that interface as our 1675 * source address if no source address has been set. 1676 */ 1677 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1678 CLASSD(v4dst) && 1679 udp->udp_multicast_if_addr != INADDR_ANY) { 1680 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1681 &udp->udp_v6src); 1682 } 1683 } else { 1684 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1685 /* 1686 * Interpret a zero destination to mean loopback. 1687 * Update the T_CONN_REQ (sin/sin6) since it is used to 1688 * generate the T_CONN_CON. 1689 */ 1690 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1691 v6dst = ipv6_loopback; 1692 sin6->sin6_addr = v6dst; 1693 } 1694 udp->udp_v6dst = v6dst; 1695 udp->udp_flowinfo = flowinfo; 1696 /* 1697 * If the destination address is multicast and 1698 * an outgoing multicast interface has been set, 1699 * then the ip bind logic will pick the correct source 1700 * address (i.e. matching the outgoing multicast interface). 1701 */ 1702 } 1703 1704 /* 1705 * Verify that the src/port/dst/port is unique for all 1706 * connections in TS_DATA_XFER 1707 */ 1708 mutex_enter(&udpf->uf_lock); 1709 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1710 if (udp1->udp_state != TS_DATA_XFER) 1711 continue; 1712 if (udp->udp_port != udp1->udp_port || 1713 udp->udp_ipversion != udp1->udp_ipversion || 1714 dstport != udp1->udp_dstport || 1715 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1716 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1717 continue; 1718 mutex_exit(&udpf->uf_lock); 1719 udp_err_ack(q, mp, TBADADDR, 0); 1720 return; 1721 } 1722 udp->udp_state = TS_DATA_XFER; 1723 mutex_exit(&udpf->uf_lock); 1724 1725 /* 1726 * Send down bind to IP to verify that there is a route 1727 * and to determine the source address. 1728 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1729 */ 1730 if (udp->udp_family == AF_INET) 1731 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1732 else 1733 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1734 if (mp1 == NULL) { 1735 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1736 bind_failed: 1737 mutex_enter(&udpf->uf_lock); 1738 udp->udp_state = TS_IDLE; 1739 mutex_exit(&udpf->uf_lock); 1740 return; 1741 } 1742 1743 /* 1744 * We also have to send a connection confirmation to 1745 * keep TLI happy. Prepare it for udp_rput. 1746 */ 1747 if (udp->udp_family == AF_INET) 1748 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1749 sizeof (*sin), NULL, 0); 1750 else 1751 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1752 sizeof (*sin6), NULL, 0); 1753 if (mp2 == NULL) { 1754 freemsg(mp1); 1755 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1756 goto bind_failed; 1757 } 1758 1759 mp = mi_tpi_ok_ack_alloc(mp); 1760 if (mp == NULL) { 1761 /* Unable to reuse the T_CONN_REQ for the ack. */ 1762 freemsg(mp2); 1763 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1764 goto bind_failed; 1765 } 1766 1767 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1768 linkb(mp1, mp); 1769 linkb(mp1, mp2); 1770 1771 if (udp->udp_family == AF_INET) 1772 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1773 else 1774 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1775 1776 if (mp1 != NULL) 1777 udp_rput_other(_RD(q), mp1); 1778 else 1779 CONN_INC_REF(udp->udp_connp); 1780 } 1781 1782 static int 1783 udp_close(queue_t *q) 1784 { 1785 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1786 udp_t *udp; 1787 queue_t *ip_rq = RD(UDP_WR(q)); 1788 1789 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1790 udp = connp->conn_udp; 1791 1792 ip_quiesce_conn(connp); 1793 /* 1794 * Disable read-side synchronous stream 1795 * interface and drain any queued data. 1796 */ 1797 udp_rcv_drain(q, udp, B_TRUE); 1798 ASSERT(!udp->udp_direct_sockfs); 1799 1800 qprocsoff(q); 1801 1802 /* restore IP module's high and low water marks to default values */ 1803 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1804 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1805 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1806 1807 ASSERT(udp->udp_rcv_cnt == 0); 1808 ASSERT(udp->udp_rcv_msgcnt == 0); 1809 ASSERT(udp->udp_rcv_list_head == NULL); 1810 ASSERT(udp->udp_rcv_list_tail == NULL); 1811 1812 /* connp is now single threaded. */ 1813 udp_close_free(connp); 1814 /* 1815 * Restore connp as an IP endpoint. We don't need 1816 * any locks since we are now single threaded 1817 */ 1818 connp->conn_flags &= ~IPCL_UDP; 1819 connp->conn_state_flags &= 1820 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1821 return (0); 1822 } 1823 1824 /* 1825 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1826 */ 1827 void 1828 udp_quiesce_conn(conn_t *connp) 1829 { 1830 udp_t *udp = connp->conn_udp; 1831 1832 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1833 /* 1834 * Running in cluster mode - register unbind information 1835 */ 1836 if (udp->udp_ipversion == IPV4_VERSION) { 1837 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1838 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1839 (in_port_t)udp->udp_port); 1840 } else { 1841 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1842 (uint8_t *)(&(udp->udp_v6src)), 1843 (in_port_t)udp->udp_port); 1844 } 1845 } 1846 1847 udp_bind_hash_remove(udp, B_FALSE); 1848 1849 mutex_enter(&connp->conn_lock); 1850 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1851 udp->udp_mode != UDP_MT_HOT) { 1852 cv_wait(&connp->conn_cv, &connp->conn_lock); 1853 } 1854 mutex_exit(&connp->conn_lock); 1855 } 1856 1857 void 1858 udp_close_free(conn_t *connp) 1859 { 1860 udp_t *udp = connp->conn_udp; 1861 1862 /* If there are any options associated with the stream, free them. */ 1863 if (udp->udp_ip_snd_options) { 1864 mi_free((char *)udp->udp_ip_snd_options); 1865 udp->udp_ip_snd_options = NULL; 1866 } 1867 1868 if (udp->udp_ip_rcv_options) { 1869 mi_free((char *)udp->udp_ip_rcv_options); 1870 udp->udp_ip_rcv_options = NULL; 1871 } 1872 1873 /* Free memory associated with sticky options */ 1874 if (udp->udp_sticky_hdrs_len != 0) { 1875 kmem_free(udp->udp_sticky_hdrs, 1876 udp->udp_sticky_hdrs_len); 1877 udp->udp_sticky_hdrs = NULL; 1878 udp->udp_sticky_hdrs_len = 0; 1879 } 1880 1881 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1882 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1883 udp->udp_sticky_ipp.ipp_hopoptslen); 1884 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1885 } 1886 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1887 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1888 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1889 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1890 } 1891 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1892 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1893 udp->udp_sticky_ipp.ipp_rthdrlen); 1894 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1895 } 1896 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1897 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1898 udp->udp_sticky_ipp.ipp_dstoptslen); 1899 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1900 } 1901 udp->udp_sticky_ipp.ipp_fields &= 1902 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1903 1904 udp->udp_connp = NULL; 1905 connp->conn_udp = NULL; 1906 kmem_cache_free(udp_cache, udp); 1907 } 1908 1909 /* 1910 * This routine handles each T_DISCON_REQ message passed to udp 1911 * as an indicating that UDP is no longer connected. This results 1912 * in sending a T_BIND_REQ to IP to restore the binding to just 1913 * the local address/port. 1914 * 1915 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1916 * T_BIND_REQ - specifying just the local address/port 1917 * T_OK_ACK - for the T_DISCON_REQ 1918 * 1919 * The disconnect completes in udp_rput. 1920 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1921 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1922 * it to an error ack for the appropriate primitive. 1923 */ 1924 static void 1925 udp_disconnect(queue_t *q, mblk_t *mp) 1926 { 1927 udp_t *udp = Q_TO_UDP(q); 1928 mblk_t *mp1; 1929 udp_fanout_t *udpf; 1930 1931 if (udp->udp_state != TS_DATA_XFER) { 1932 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1933 "udp_disconnect: bad state, %u", udp->udp_state); 1934 udp_err_ack(q, mp, TOUTSTATE, 0); 1935 return; 1936 } 1937 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1938 mutex_enter(&udpf->uf_lock); 1939 udp->udp_v6src = udp->udp_bound_v6src; 1940 udp->udp_state = TS_IDLE; 1941 mutex_exit(&udpf->uf_lock); 1942 1943 /* 1944 * Send down bind to IP to remove the full binding and revert 1945 * to the local address binding. 1946 */ 1947 if (udp->udp_family == AF_INET) 1948 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1949 else 1950 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1951 if (mp1 == NULL) { 1952 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1953 return; 1954 } 1955 mp = mi_tpi_ok_ack_alloc(mp); 1956 if (mp == NULL) { 1957 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1958 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1959 return; 1960 } 1961 1962 if (udp->udp_family == AF_INET6) { 1963 int error; 1964 1965 /* Rebuild the header template */ 1966 error = udp_build_hdrs(q, udp); 1967 if (error != 0) { 1968 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1969 freemsg(mp1); 1970 return; 1971 } 1972 } 1973 mutex_enter(&udpf->uf_lock); 1974 udp->udp_discon_pending = 1; 1975 mutex_exit(&udpf->uf_lock); 1976 1977 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1978 linkb(mp1, mp); 1979 1980 if (udp->udp_family == AF_INET6) 1981 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1982 else 1983 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1984 1985 if (mp1 != NULL) 1986 udp_rput_other(_RD(q), mp1); 1987 else 1988 CONN_INC_REF(udp->udp_connp); 1989 } 1990 1991 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1992 static void 1993 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1994 { 1995 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1996 putnext(UDP_RD(q), mp); 1997 } 1998 1999 /* Shorthand to generate and send TPI error acks to our client */ 2000 static void 2001 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2002 int sys_error) 2003 { 2004 struct T_error_ack *teackp; 2005 2006 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2007 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2008 teackp = (struct T_error_ack *)mp->b_rptr; 2009 teackp->ERROR_prim = primitive; 2010 teackp->TLI_error = t_error; 2011 teackp->UNIX_error = sys_error; 2012 putnext(UDP_RD(q), mp); 2013 } 2014 } 2015 2016 /*ARGSUSED*/ 2017 static int 2018 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2019 { 2020 int i; 2021 2022 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2023 if (udp_g_epriv_ports[i] != 0) 2024 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2025 } 2026 return (0); 2027 } 2028 2029 /* ARGSUSED */ 2030 static int 2031 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2032 cred_t *cr) 2033 { 2034 long new_value; 2035 int i; 2036 2037 /* 2038 * Fail the request if the new value does not lie within the 2039 * port number limits. 2040 */ 2041 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2042 new_value <= 0 || new_value >= 65536) { 2043 return (EINVAL); 2044 } 2045 2046 /* Check if the value is already in the list */ 2047 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2048 if (new_value == udp_g_epriv_ports[i]) { 2049 return (EEXIST); 2050 } 2051 } 2052 /* Find an empty slot */ 2053 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2054 if (udp_g_epriv_ports[i] == 0) 2055 break; 2056 } 2057 if (i == udp_g_num_epriv_ports) { 2058 return (EOVERFLOW); 2059 } 2060 2061 /* Set the new value */ 2062 udp_g_epriv_ports[i] = (in_port_t)new_value; 2063 return (0); 2064 } 2065 2066 /* ARGSUSED */ 2067 static int 2068 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2069 cred_t *cr) 2070 { 2071 long new_value; 2072 int i; 2073 2074 /* 2075 * Fail the request if the new value does not lie within the 2076 * port number limits. 2077 */ 2078 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2079 new_value <= 0 || new_value >= 65536) { 2080 return (EINVAL); 2081 } 2082 2083 /* Check that the value is already in the list */ 2084 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2085 if (udp_g_epriv_ports[i] == new_value) 2086 break; 2087 } 2088 if (i == udp_g_num_epriv_ports) { 2089 return (ESRCH); 2090 } 2091 2092 /* Clear the value */ 2093 udp_g_epriv_ports[i] = 0; 2094 return (0); 2095 } 2096 2097 /* At minimum we need 4 bytes of UDP header */ 2098 #define ICMP_MIN_UDP_HDR 4 2099 2100 /* 2101 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2102 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2103 * Assumes that IP has pulled up everything up to and including the ICMP header. 2104 * An M_CTL could potentially come here from some other module (i.e. if UDP 2105 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2106 * does not have enough ICMP information , following STREAMS conventions, 2107 * we send it upstream assuming it is an M_CTL we don't understand. 2108 */ 2109 static void 2110 udp_icmp_error(queue_t *q, mblk_t *mp) 2111 { 2112 icmph_t *icmph; 2113 ipha_t *ipha; 2114 int iph_hdr_length; 2115 udpha_t *udpha; 2116 sin_t sin; 2117 sin6_t sin6; 2118 mblk_t *mp1; 2119 int error = 0; 2120 size_t mp_size = MBLKL(mp); 2121 udp_t *udp = Q_TO_UDP(q); 2122 2123 /* 2124 * Assume IP provides aligned packets - otherwise toss 2125 */ 2126 if (!OK_32PTR(mp->b_rptr)) { 2127 freemsg(mp); 2128 return; 2129 } 2130 2131 /* 2132 * Verify that we have a complete IP header and the application has 2133 * asked for errors. If not, send it upstream. 2134 */ 2135 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2136 noticmpv4: 2137 putnext(UDP_RD(q), mp); 2138 return; 2139 } 2140 2141 ipha = (ipha_t *)mp->b_rptr; 2142 /* 2143 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2144 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2145 */ 2146 switch (IPH_HDR_VERSION(ipha)) { 2147 case IPV6_VERSION: 2148 udp_icmp_error_ipv6(q, mp); 2149 return; 2150 case IPV4_VERSION: 2151 break; 2152 default: 2153 goto noticmpv4; 2154 } 2155 2156 /* Skip past the outer IP and ICMP headers */ 2157 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2158 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2159 /* 2160 * If we don't have the correct outer IP header length or if the ULP 2161 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2162 * send the packet upstream. 2163 */ 2164 if (iph_hdr_length < sizeof (ipha_t) || 2165 ipha->ipha_protocol != IPPROTO_ICMP || 2166 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2167 goto noticmpv4; 2168 } 2169 ipha = (ipha_t *)&icmph[1]; 2170 2171 /* Skip past the inner IP and find the ULP header */ 2172 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2173 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2174 /* 2175 * If we don't have the correct inner IP header length or if the ULP 2176 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2177 * bytes of UDP header, send it upstream. 2178 */ 2179 if (iph_hdr_length < sizeof (ipha_t) || 2180 ipha->ipha_protocol != IPPROTO_UDP || 2181 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2182 goto noticmpv4; 2183 } 2184 2185 switch (icmph->icmph_type) { 2186 case ICMP_DEST_UNREACHABLE: 2187 switch (icmph->icmph_code) { 2188 case ICMP_FRAGMENTATION_NEEDED: 2189 /* 2190 * IP has already adjusted the path MTU. 2191 * XXX Somehow pass MTU indication to application? 2192 */ 2193 break; 2194 case ICMP_PORT_UNREACHABLE: 2195 case ICMP_PROTOCOL_UNREACHABLE: 2196 error = ECONNREFUSED; 2197 break; 2198 default: 2199 /* Transient errors */ 2200 break; 2201 } 2202 break; 2203 default: 2204 /* Transient errors */ 2205 break; 2206 } 2207 if (error == 0) { 2208 freemsg(mp); 2209 return; 2210 } 2211 2212 switch (udp->udp_family) { 2213 case AF_INET: 2214 sin = sin_null; 2215 sin.sin_family = AF_INET; 2216 sin.sin_addr.s_addr = ipha->ipha_dst; 2217 sin.sin_port = udpha->uha_dst_port; 2218 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2219 error); 2220 break; 2221 case AF_INET6: 2222 sin6 = sin6_null; 2223 sin6.sin6_family = AF_INET6; 2224 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2225 sin6.sin6_port = udpha->uha_dst_port; 2226 2227 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2228 NULL, 0, error); 2229 break; 2230 } 2231 if (mp1) 2232 putnext(UDP_RD(q), mp1); 2233 freemsg(mp); 2234 } 2235 2236 /* 2237 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2238 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2239 * Assumes that IP has pulled up all the extension headers as well as the 2240 * ICMPv6 header. 2241 * An M_CTL could potentially come here from some other module (i.e. if UDP 2242 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2243 * does not have enough ICMP information , following STREAMS conventions, 2244 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2245 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2246 * field (when cast to ipha_t in udp_icmp_error). 2247 */ 2248 static void 2249 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2250 { 2251 icmp6_t *icmp6; 2252 ip6_t *ip6h, *outer_ip6h; 2253 uint16_t hdr_length; 2254 uint8_t *nexthdrp; 2255 udpha_t *udpha; 2256 sin6_t sin6; 2257 mblk_t *mp1; 2258 int error = 0; 2259 size_t mp_size = MBLKL(mp); 2260 udp_t *udp = Q_TO_UDP(q); 2261 2262 /* 2263 * Verify that we have a complete IP header. If not, send it upstream. 2264 */ 2265 if (mp_size < sizeof (ip6_t)) { 2266 noticmpv6: 2267 putnext(UDP_RD(q), mp); 2268 return; 2269 } 2270 2271 outer_ip6h = (ip6_t *)mp->b_rptr; 2272 /* 2273 * Verify this is an ICMPV6 packet, else send it upstream 2274 */ 2275 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2276 hdr_length = IPV6_HDR_LEN; 2277 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2278 &nexthdrp) || 2279 *nexthdrp != IPPROTO_ICMPV6) { 2280 goto noticmpv6; 2281 } 2282 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2283 ip6h = (ip6_t *)&icmp6[1]; 2284 /* 2285 * Verify we have a complete ICMP and inner IP header. 2286 */ 2287 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2288 goto noticmpv6; 2289 2290 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2291 goto noticmpv6; 2292 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2293 /* 2294 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2295 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2296 * packet upstream. 2297 */ 2298 if ((*nexthdrp != IPPROTO_UDP) || 2299 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2300 goto noticmpv6; 2301 } 2302 2303 switch (icmp6->icmp6_type) { 2304 case ICMP6_DST_UNREACH: 2305 switch (icmp6->icmp6_code) { 2306 case ICMP6_DST_UNREACH_NOPORT: 2307 error = ECONNREFUSED; 2308 break; 2309 case ICMP6_DST_UNREACH_ADMIN: 2310 case ICMP6_DST_UNREACH_NOROUTE: 2311 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2312 case ICMP6_DST_UNREACH_ADDR: 2313 /* Transient errors */ 2314 break; 2315 default: 2316 break; 2317 } 2318 break; 2319 case ICMP6_PACKET_TOO_BIG: { 2320 struct T_unitdata_ind *tudi; 2321 struct T_opthdr *toh; 2322 size_t udi_size; 2323 mblk_t *newmp; 2324 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2325 sizeof (struct ip6_mtuinfo); 2326 sin6_t *sin6; 2327 struct ip6_mtuinfo *mtuinfo; 2328 2329 /* 2330 * If the application has requested to receive path mtu 2331 * information, send up an empty message containing an 2332 * IPV6_PATHMTU ancillary data item. 2333 */ 2334 if (!udp->udp_ipv6_recvpathmtu) 2335 break; 2336 2337 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2338 opt_length; 2339 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2340 BUMP_MIB(&udp_mib, udpInErrors); 2341 break; 2342 } 2343 2344 /* 2345 * newmp->b_cont is left to NULL on purpose. This is an 2346 * empty message containing only ancillary data. 2347 */ 2348 newmp->b_datap->db_type = M_PROTO; 2349 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2350 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2351 tudi->PRIM_type = T_UNITDATA_IND; 2352 tudi->SRC_length = sizeof (sin6_t); 2353 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2354 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2355 tudi->OPT_length = opt_length; 2356 2357 sin6 = (sin6_t *)&tudi[1]; 2358 bzero(sin6, sizeof (sin6_t)); 2359 sin6->sin6_family = AF_INET6; 2360 sin6->sin6_addr = udp->udp_v6dst; 2361 2362 toh = (struct T_opthdr *)&sin6[1]; 2363 toh->level = IPPROTO_IPV6; 2364 toh->name = IPV6_PATHMTU; 2365 toh->len = opt_length; 2366 toh->status = 0; 2367 2368 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2369 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2370 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2371 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2372 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2373 /* 2374 * We've consumed everything we need from the original 2375 * message. Free it, then send our empty message. 2376 */ 2377 freemsg(mp); 2378 putnext(UDP_RD(q), newmp); 2379 return; 2380 } 2381 case ICMP6_TIME_EXCEEDED: 2382 /* Transient errors */ 2383 break; 2384 case ICMP6_PARAM_PROB: 2385 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2386 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2387 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2388 (uchar_t *)nexthdrp) { 2389 error = ECONNREFUSED; 2390 break; 2391 } 2392 break; 2393 } 2394 if (error == 0) { 2395 freemsg(mp); 2396 return; 2397 } 2398 2399 sin6 = sin6_null; 2400 sin6.sin6_family = AF_INET6; 2401 sin6.sin6_addr = ip6h->ip6_dst; 2402 sin6.sin6_port = udpha->uha_dst_port; 2403 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2404 2405 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2406 error); 2407 if (mp1) 2408 putnext(UDP_RD(q), mp1); 2409 freemsg(mp); 2410 } 2411 2412 /* 2413 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2414 * The local address is filled in if endpoint is bound. The remote address 2415 * is filled in if remote address has been precified ("connected endpoint") 2416 * (The concept of connected CLTS sockets is alien to published TPI 2417 * but we support it anyway). 2418 */ 2419 static void 2420 udp_addr_req(queue_t *q, mblk_t *mp) 2421 { 2422 sin_t *sin; 2423 sin6_t *sin6; 2424 mblk_t *ackmp; 2425 struct T_addr_ack *taa; 2426 udp_t *udp = Q_TO_UDP(q); 2427 2428 /* Make it large enough for worst case */ 2429 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2430 2 * sizeof (sin6_t), 1); 2431 if (ackmp == NULL) { 2432 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2433 return; 2434 } 2435 taa = (struct T_addr_ack *)ackmp->b_rptr; 2436 2437 bzero(taa, sizeof (struct T_addr_ack)); 2438 ackmp->b_wptr = (uchar_t *)&taa[1]; 2439 2440 taa->PRIM_type = T_ADDR_ACK; 2441 ackmp->b_datap->db_type = M_PCPROTO; 2442 /* 2443 * Note: Following code assumes 32 bit alignment of basic 2444 * data structures like sin_t and struct T_addr_ack. 2445 */ 2446 if (udp->udp_state != TS_UNBND) { 2447 /* 2448 * Fill in local address first 2449 */ 2450 taa->LOCADDR_offset = sizeof (*taa); 2451 if (udp->udp_family == AF_INET) { 2452 taa->LOCADDR_length = sizeof (sin_t); 2453 sin = (sin_t *)&taa[1]; 2454 /* Fill zeroes and then initialize non-zero fields */ 2455 *sin = sin_null; 2456 sin->sin_family = AF_INET; 2457 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2458 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2459 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2460 sin->sin_addr.s_addr); 2461 } else { 2462 /* 2463 * INADDR_ANY 2464 * udp_v6src is not set, we might be bound to 2465 * broadcast/multicast. Use udp_bound_v6src as 2466 * local address instead (that could 2467 * also still be INADDR_ANY) 2468 */ 2469 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2470 sin->sin_addr.s_addr); 2471 } 2472 sin->sin_port = udp->udp_port; 2473 ackmp->b_wptr = (uchar_t *)&sin[1]; 2474 if (udp->udp_state == TS_DATA_XFER) { 2475 /* 2476 * connected, fill remote address too 2477 */ 2478 taa->REMADDR_length = sizeof (sin_t); 2479 /* assumed 32-bit alignment */ 2480 taa->REMADDR_offset = taa->LOCADDR_offset + 2481 taa->LOCADDR_length; 2482 2483 sin = (sin_t *)(ackmp->b_rptr + 2484 taa->REMADDR_offset); 2485 /* initialize */ 2486 *sin = sin_null; 2487 sin->sin_family = AF_INET; 2488 sin->sin_addr.s_addr = 2489 V4_PART_OF_V6(udp->udp_v6dst); 2490 sin->sin_port = udp->udp_dstport; 2491 ackmp->b_wptr = (uchar_t *)&sin[1]; 2492 } 2493 } else { 2494 taa->LOCADDR_length = sizeof (sin6_t); 2495 sin6 = (sin6_t *)&taa[1]; 2496 /* Fill zeroes and then initialize non-zero fields */ 2497 *sin6 = sin6_null; 2498 sin6->sin6_family = AF_INET6; 2499 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2500 sin6->sin6_addr = udp->udp_v6src; 2501 } else { 2502 /* 2503 * UNSPECIFIED 2504 * udp_v6src is not set, we might be bound to 2505 * broadcast/multicast. Use udp_bound_v6src as 2506 * local address instead (that could 2507 * also still be UNSPECIFIED) 2508 */ 2509 sin6->sin6_addr = 2510 udp->udp_bound_v6src; 2511 } 2512 sin6->sin6_port = udp->udp_port; 2513 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2514 if (udp->udp_state == TS_DATA_XFER) { 2515 /* 2516 * connected, fill remote address too 2517 */ 2518 taa->REMADDR_length = sizeof (sin6_t); 2519 /* assumed 32-bit alignment */ 2520 taa->REMADDR_offset = taa->LOCADDR_offset + 2521 taa->LOCADDR_length; 2522 2523 sin6 = (sin6_t *)(ackmp->b_rptr + 2524 taa->REMADDR_offset); 2525 /* initialize */ 2526 *sin6 = sin6_null; 2527 sin6->sin6_family = AF_INET6; 2528 sin6->sin6_addr = udp->udp_v6dst; 2529 sin6->sin6_port = udp->udp_dstport; 2530 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2531 } 2532 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2533 } 2534 } 2535 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2536 putnext(UDP_RD(q), ackmp); 2537 } 2538 2539 static void 2540 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2541 { 2542 if (udp->udp_family == AF_INET) { 2543 *tap = udp_g_t_info_ack_ipv4; 2544 } else { 2545 *tap = udp_g_t_info_ack_ipv6; 2546 } 2547 tap->CURRENT_state = udp->udp_state; 2548 tap->OPT_size = udp_max_optsize; 2549 } 2550 2551 /* 2552 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2553 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2554 * udp_g_t_info_ack. The current state of the stream is copied from 2555 * udp_state. 2556 */ 2557 static void 2558 udp_capability_req(queue_t *q, mblk_t *mp) 2559 { 2560 t_uscalar_t cap_bits1; 2561 struct T_capability_ack *tcap; 2562 udp_t *udp = Q_TO_UDP(q); 2563 2564 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2565 2566 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2567 mp->b_datap->db_type, T_CAPABILITY_ACK); 2568 if (!mp) 2569 return; 2570 2571 tcap = (struct T_capability_ack *)mp->b_rptr; 2572 tcap->CAP_bits1 = 0; 2573 2574 if (cap_bits1 & TC1_INFO) { 2575 udp_copy_info(&tcap->INFO_ack, udp); 2576 tcap->CAP_bits1 |= TC1_INFO; 2577 } 2578 2579 putnext(UDP_RD(q), mp); 2580 } 2581 2582 /* 2583 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2584 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2585 * The current state of the stream is copied from udp_state. 2586 */ 2587 static void 2588 udp_info_req(queue_t *q, mblk_t *mp) 2589 { 2590 udp_t *udp = Q_TO_UDP(q); 2591 2592 /* Create a T_INFO_ACK message. */ 2593 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2594 T_INFO_ACK); 2595 if (!mp) 2596 return; 2597 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2598 putnext(UDP_RD(q), mp); 2599 } 2600 2601 /* 2602 * IP recognizes seven kinds of bind requests: 2603 * 2604 * - A zero-length address binds only to the protocol number. 2605 * 2606 * - A 4-byte address is treated as a request to 2607 * validate that the address is a valid local IPv4 2608 * address, appropriate for an application to bind to. 2609 * IP does the verification, but does not make any note 2610 * of the address at this time. 2611 * 2612 * - A 16-byte address contains is treated as a request 2613 * to validate a local IPv6 address, as the 4-byte 2614 * address case above. 2615 * 2616 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2617 * use it for the inbound fanout of packets. 2618 * 2619 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2620 * use it for the inbound fanout of packets. 2621 * 2622 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2623 * information consisting of local and remote addresses 2624 * and ports. In this case, the addresses are both 2625 * validated as appropriate for this operation, and, if 2626 * so, the information is retained for use in the 2627 * inbound fanout. 2628 * 2629 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2630 * fanout information, like the 12-byte case above. 2631 * 2632 * IP will also fill in the IRE request mblk with information 2633 * regarding our peer. In all cases, we notify IP of our protocol 2634 * type by appending a single protocol byte to the bind request. 2635 */ 2636 static mblk_t * 2637 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2638 { 2639 char *cp; 2640 mblk_t *mp; 2641 struct T_bind_req *tbr; 2642 ipa_conn_t *ac; 2643 ipa6_conn_t *ac6; 2644 sin_t *sin; 2645 sin6_t *sin6; 2646 2647 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2648 2649 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2650 if (!mp) 2651 return (mp); 2652 mp->b_datap->db_type = M_PROTO; 2653 tbr = (struct T_bind_req *)mp->b_rptr; 2654 tbr->PRIM_type = bind_prim; 2655 tbr->ADDR_offset = sizeof (*tbr); 2656 tbr->CONIND_number = 0; 2657 tbr->ADDR_length = addr_length; 2658 cp = (char *)&tbr[1]; 2659 switch (addr_length) { 2660 case sizeof (ipa_conn_t): 2661 ASSERT(udp->udp_family == AF_INET); 2662 /* Append a request for an IRE */ 2663 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2664 if (!mp->b_cont) { 2665 freemsg(mp); 2666 return (NULL); 2667 } 2668 mp->b_cont->b_wptr += sizeof (ire_t); 2669 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2670 2671 /* cp known to be 32 bit aligned */ 2672 ac = (ipa_conn_t *)cp; 2673 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2674 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2675 ac->ac_fport = udp->udp_dstport; 2676 ac->ac_lport = udp->udp_port; 2677 break; 2678 2679 case sizeof (ipa6_conn_t): 2680 ASSERT(udp->udp_family == AF_INET6); 2681 /* Append a request for an IRE */ 2682 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2683 if (!mp->b_cont) { 2684 freemsg(mp); 2685 return (NULL); 2686 } 2687 mp->b_cont->b_wptr += sizeof (ire_t); 2688 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2689 2690 /* cp known to be 32 bit aligned */ 2691 ac6 = (ipa6_conn_t *)cp; 2692 ac6->ac6_laddr = udp->udp_v6src; 2693 ac6->ac6_faddr = udp->udp_v6dst; 2694 ac6->ac6_fport = udp->udp_dstport; 2695 ac6->ac6_lport = udp->udp_port; 2696 break; 2697 2698 case sizeof (sin_t): 2699 ASSERT(udp->udp_family == AF_INET); 2700 /* Append a request for an IRE */ 2701 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2702 if (!mp->b_cont) { 2703 freemsg(mp); 2704 return (NULL); 2705 } 2706 mp->b_cont->b_wptr += sizeof (ire_t); 2707 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2708 2709 sin = (sin_t *)cp; 2710 *sin = sin_null; 2711 sin->sin_family = AF_INET; 2712 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2713 sin->sin_port = udp->udp_port; 2714 break; 2715 2716 case sizeof (sin6_t): 2717 ASSERT(udp->udp_family == AF_INET6); 2718 /* Append a request for an IRE */ 2719 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2720 if (!mp->b_cont) { 2721 freemsg(mp); 2722 return (NULL); 2723 } 2724 mp->b_cont->b_wptr += sizeof (ire_t); 2725 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2726 2727 sin6 = (sin6_t *)cp; 2728 *sin6 = sin6_null; 2729 sin6->sin6_family = AF_INET6; 2730 sin6->sin6_addr = udp->udp_bound_v6src; 2731 sin6->sin6_port = udp->udp_port; 2732 break; 2733 } 2734 /* Add protocol number to end */ 2735 cp[addr_length] = (char)IPPROTO_UDP; 2736 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2737 return (mp); 2738 } 2739 2740 /* 2741 * This is the open routine for udp. It allocates a udp_t structure for 2742 * the stream and, on the first open of the module, creates an ND table. 2743 */ 2744 /* ARGSUSED */ 2745 static int 2746 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2747 { 2748 int err; 2749 udp_t *udp; 2750 conn_t *connp; 2751 zoneid_t zoneid = getzoneid(); 2752 queue_t *ip_wq; 2753 char *name; 2754 2755 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2756 2757 /* If the stream is already open, return immediately. */ 2758 if (q->q_ptr != NULL) 2759 return (0); 2760 2761 /* If this is not a push of udp as a module, fail. */ 2762 if (sflag != MODOPEN) 2763 return (EINVAL); 2764 2765 q->q_hiwat = udp_recv_hiwat; 2766 WR(q)->q_hiwat = udp_xmit_hiwat; 2767 WR(q)->q_lowat = udp_xmit_lowat; 2768 2769 /* Insert ourselves in the stream since we're about to walk q_next */ 2770 qprocson(q); 2771 2772 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2773 bzero(udp, sizeof (*udp)); 2774 2775 /* 2776 * UDP is supported only as a module and it has to be pushed directly 2777 * above the device instance of IP. If UDP is pushed anywhere else 2778 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2779 * sake of MIB browsers and fail everything else. 2780 */ 2781 ip_wq = WR(q)->q_next; 2782 if (ip_wq->q_next != NULL || 2783 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2784 strcmp(name, IP_MOD_NAME) != 0 || 2785 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2786 /* Support just SNMP for MIB browsers */ 2787 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2788 connp->conn_rq = q; 2789 connp->conn_wq = WR(q); 2790 connp->conn_flags |= IPCL_UDPMOD; 2791 connp->conn_cred = credp; 2792 connp->conn_zoneid = zoneid; 2793 connp->conn_udp = udp; 2794 udp->udp_connp = connp; 2795 q->q_ptr = WR(q)->q_ptr = connp; 2796 crhold(credp); 2797 q->q_qinfo = &udp_snmp_rinit; 2798 WR(q)->q_qinfo = &udp_snmp_winit; 2799 return (0); 2800 } 2801 2802 /* 2803 * Initialize the udp_t structure for this stream. 2804 */ 2805 q = RD(ip_wq); 2806 connp = Q_TO_CONN(q); 2807 mutex_enter(&connp->conn_lock); 2808 connp->conn_proto = IPPROTO_UDP; 2809 connp->conn_flags |= IPCL_UDP; 2810 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2811 connp->conn_udp = udp; 2812 2813 /* Set the initial state of the stream and the privilege status. */ 2814 udp->udp_connp = connp; 2815 udp->udp_state = TS_UNBND; 2816 udp->udp_mode = UDP_MT_HOT; 2817 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2818 udp->udp_family = AF_INET6; 2819 udp->udp_ipversion = IPV6_VERSION; 2820 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2821 udp->udp_ttl = udp_ipv6_hoplimit; 2822 connp->conn_af_isv6 = B_TRUE; 2823 connp->conn_flags |= IPCL_ISV6; 2824 } else { 2825 udp->udp_family = AF_INET; 2826 udp->udp_ipversion = IPV4_VERSION; 2827 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2828 udp->udp_ttl = udp_ipv4_ttl; 2829 connp->conn_af_isv6 = B_FALSE; 2830 connp->conn_flags &= ~IPCL_ISV6; 2831 } 2832 2833 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2834 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2835 connp->conn_zoneid = zoneid; 2836 2837 if (connp->conn_flags & IPCL_SOCKET) { 2838 udp->udp_issocket = B_TRUE; 2839 udp->udp_direct_sockfs = B_TRUE; 2840 } 2841 mutex_exit(&connp->conn_lock); 2842 2843 /* 2844 * The transmit hiwat/lowat is only looked at on IP's queue. 2845 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2846 * getsockopts. 2847 */ 2848 q->q_hiwat = udp_recv_hiwat; 2849 WR(q)->q_hiwat = udp_xmit_hiwat; 2850 WR(q)->q_lowat = udp_xmit_lowat; 2851 2852 if (udp->udp_family == AF_INET6) { 2853 /* Build initial header template for transmit */ 2854 if ((err = udp_build_hdrs(q, udp)) != 0) { 2855 qprocsoff(UDP_RD(q)); 2856 udp->udp_connp = NULL; 2857 connp->conn_udp = NULL; 2858 kmem_cache_free(udp_cache, udp); 2859 return (err); 2860 } 2861 } 2862 2863 /* Set the Stream head write offset and high watermark. */ 2864 (void) mi_set_sth_wroff(UDP_RD(q), 2865 udp->udp_max_hdr_len + udp_wroff_extra); 2866 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2867 2868 WR(UDP_RD(q))->q_qinfo = &udp_winit; 2869 2870 return (0); 2871 } 2872 2873 /* 2874 * Which UDP options OK to set through T_UNITDATA_REQ... 2875 */ 2876 /* ARGSUSED */ 2877 static boolean_t 2878 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2879 { 2880 return (B_TRUE); 2881 } 2882 2883 /* 2884 * This routine gets default values of certain options whose default 2885 * values are maintained by protcol specific code 2886 */ 2887 /* ARGSUSED */ 2888 int 2889 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2890 { 2891 int *i1 = (int *)ptr; 2892 2893 switch (level) { 2894 case IPPROTO_IP: 2895 switch (name) { 2896 case IP_MULTICAST_TTL: 2897 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2898 return (sizeof (uchar_t)); 2899 case IP_MULTICAST_LOOP: 2900 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2901 return (sizeof (uchar_t)); 2902 } 2903 break; 2904 case IPPROTO_IPV6: 2905 switch (name) { 2906 case IPV6_MULTICAST_HOPS: 2907 *i1 = IP_DEFAULT_MULTICAST_TTL; 2908 return (sizeof (int)); 2909 case IPV6_MULTICAST_LOOP: 2910 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2911 return (sizeof (int)); 2912 case IPV6_UNICAST_HOPS: 2913 *i1 = udp_ipv6_hoplimit; 2914 return (sizeof (int)); 2915 } 2916 break; 2917 } 2918 return (-1); 2919 } 2920 2921 /* 2922 * This routine retrieves the current status of socket options 2923 * and expects the caller to pass in the queue pointer of the 2924 * upper instance. It returns the size of the option retrieved. 2925 */ 2926 int 2927 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2928 { 2929 int *i1 = (int *)ptr; 2930 conn_t *connp; 2931 udp_t *udp; 2932 ip6_pkt_t *ipp; 2933 2934 q = UDP_WR(q); 2935 connp = Q_TO_CONN(q); 2936 udp = connp->conn_udp; 2937 ipp = &udp->udp_sticky_ipp; 2938 2939 switch (level) { 2940 case SOL_SOCKET: 2941 switch (name) { 2942 case SO_DEBUG: 2943 *i1 = udp->udp_debug; 2944 break; /* goto sizeof (int) option return */ 2945 case SO_REUSEADDR: 2946 *i1 = udp->udp_reuseaddr; 2947 break; /* goto sizeof (int) option return */ 2948 case SO_TYPE: 2949 *i1 = SOCK_DGRAM; 2950 break; /* goto sizeof (int) option return */ 2951 2952 /* 2953 * The following three items are available here, 2954 * but are only meaningful to IP. 2955 */ 2956 case SO_DONTROUTE: 2957 *i1 = udp->udp_dontroute; 2958 break; /* goto sizeof (int) option return */ 2959 case SO_USELOOPBACK: 2960 *i1 = udp->udp_useloopback; 2961 break; /* goto sizeof (int) option return */ 2962 case SO_BROADCAST: 2963 *i1 = udp->udp_broadcast; 2964 break; /* goto sizeof (int) option return */ 2965 2966 case SO_SNDBUF: 2967 *i1 = q->q_hiwat; 2968 break; /* goto sizeof (int) option return */ 2969 case SO_RCVBUF: 2970 *i1 = RD(q)->q_hiwat; 2971 break; /* goto sizeof (int) option return */ 2972 case SO_DGRAM_ERRIND: 2973 *i1 = udp->udp_dgram_errind; 2974 break; /* goto sizeof (int) option return */ 2975 case SO_RECVUCRED: 2976 *i1 = udp->udp_recvucred; 2977 break; /* goto sizeof (int) option return */ 2978 default: 2979 return (-1); 2980 } 2981 break; 2982 case IPPROTO_IP: 2983 if (udp->udp_family != AF_INET) 2984 return (-1); 2985 switch (name) { 2986 case IP_OPTIONS: 2987 case T_IP_OPTIONS: 2988 if (udp->udp_ip_rcv_options_len) 2989 bcopy(udp->udp_ip_rcv_options, ptr, 2990 udp->udp_ip_rcv_options_len); 2991 return (udp->udp_ip_rcv_options_len); 2992 case IP_TOS: 2993 case T_IP_TOS: 2994 *i1 = (int)udp->udp_type_of_service; 2995 break; /* goto sizeof (int) option return */ 2996 case IP_TTL: 2997 *i1 = (int)udp->udp_ttl; 2998 break; /* goto sizeof (int) option return */ 2999 case IP_NEXTHOP: 3000 /* Handled at IP level */ 3001 return (-EINVAL); 3002 case IP_MULTICAST_IF: 3003 /* 0 address if not set */ 3004 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3005 return (sizeof (ipaddr_t)); 3006 case IP_MULTICAST_TTL: 3007 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3008 return (sizeof (uchar_t)); 3009 case IP_MULTICAST_LOOP: 3010 *ptr = connp->conn_multicast_loop; 3011 return (sizeof (uint8_t)); 3012 case IP_RECVOPTS: 3013 *i1 = udp->udp_recvopts; 3014 break; /* goto sizeof (int) option return */ 3015 case IP_RECVDSTADDR: 3016 *i1 = udp->udp_recvdstaddr; 3017 break; /* goto sizeof (int) option return */ 3018 case IP_RECVIF: 3019 *i1 = udp->udp_recvif; 3020 break; /* goto sizeof (int) option return */ 3021 case IP_RECVSLLA: 3022 *i1 = udp->udp_recvslla; 3023 break; /* goto sizeof (int) option return */ 3024 case IP_RECVTTL: 3025 *i1 = udp->udp_recvttl; 3026 break; /* goto sizeof (int) option return */ 3027 case IP_ADD_MEMBERSHIP: 3028 case IP_DROP_MEMBERSHIP: 3029 case IP_BLOCK_SOURCE: 3030 case IP_UNBLOCK_SOURCE: 3031 case IP_ADD_SOURCE_MEMBERSHIP: 3032 case IP_DROP_SOURCE_MEMBERSHIP: 3033 case MCAST_JOIN_GROUP: 3034 case MCAST_LEAVE_GROUP: 3035 case MCAST_BLOCK_SOURCE: 3036 case MCAST_UNBLOCK_SOURCE: 3037 case MCAST_JOIN_SOURCE_GROUP: 3038 case MCAST_LEAVE_SOURCE_GROUP: 3039 case IP_DONTFAILOVER_IF: 3040 /* cannot "get" the value for these */ 3041 return (-1); 3042 case IP_BOUND_IF: 3043 /* Zero if not set */ 3044 *i1 = udp->udp_bound_if; 3045 break; /* goto sizeof (int) option return */ 3046 case IP_UNSPEC_SRC: 3047 *i1 = udp->udp_unspec_source; 3048 break; /* goto sizeof (int) option return */ 3049 case IP_XMIT_IF: 3050 *i1 = udp->udp_xmit_if; 3051 break; /* goto sizeof (int) option return */ 3052 default: 3053 return (-1); 3054 } 3055 break; 3056 case IPPROTO_IPV6: 3057 if (udp->udp_family != AF_INET6) 3058 return (-1); 3059 switch (name) { 3060 case IPV6_UNICAST_HOPS: 3061 *i1 = (unsigned int)udp->udp_ttl; 3062 break; /* goto sizeof (int) option return */ 3063 case IPV6_MULTICAST_IF: 3064 /* 0 index if not set */ 3065 *i1 = udp->udp_multicast_if_index; 3066 break; /* goto sizeof (int) option return */ 3067 case IPV6_MULTICAST_HOPS: 3068 *i1 = udp->udp_multicast_ttl; 3069 break; /* goto sizeof (int) option return */ 3070 case IPV6_MULTICAST_LOOP: 3071 *i1 = connp->conn_multicast_loop; 3072 break; /* goto sizeof (int) option return */ 3073 case IPV6_JOIN_GROUP: 3074 case IPV6_LEAVE_GROUP: 3075 case MCAST_JOIN_GROUP: 3076 case MCAST_LEAVE_GROUP: 3077 case MCAST_BLOCK_SOURCE: 3078 case MCAST_UNBLOCK_SOURCE: 3079 case MCAST_JOIN_SOURCE_GROUP: 3080 case MCAST_LEAVE_SOURCE_GROUP: 3081 /* cannot "get" the value for these */ 3082 return (-1); 3083 case IPV6_BOUND_IF: 3084 /* Zero if not set */ 3085 *i1 = udp->udp_bound_if; 3086 break; /* goto sizeof (int) option return */ 3087 case IPV6_UNSPEC_SRC: 3088 *i1 = udp->udp_unspec_source; 3089 break; /* goto sizeof (int) option return */ 3090 case IPV6_RECVPKTINFO: 3091 *i1 = udp->udp_ipv6_recvpktinfo; 3092 break; /* goto sizeof (int) option return */ 3093 case IPV6_RECVTCLASS: 3094 *i1 = udp->udp_ipv6_recvtclass; 3095 break; /* goto sizeof (int) option return */ 3096 case IPV6_RECVPATHMTU: 3097 *i1 = udp->udp_ipv6_recvpathmtu; 3098 break; /* goto sizeof (int) option return */ 3099 case IPV6_RECVHOPLIMIT: 3100 *i1 = udp->udp_ipv6_recvhoplimit; 3101 break; /* goto sizeof (int) option return */ 3102 case IPV6_RECVHOPOPTS: 3103 *i1 = udp->udp_ipv6_recvhopopts; 3104 break; /* goto sizeof (int) option return */ 3105 case IPV6_RECVDSTOPTS: 3106 *i1 = udp->udp_ipv6_recvdstopts; 3107 break; /* goto sizeof (int) option return */ 3108 case _OLD_IPV6_RECVDSTOPTS: 3109 *i1 = udp->udp_old_ipv6_recvdstopts; 3110 break; /* goto sizeof (int) option return */ 3111 case IPV6_RECVRTHDRDSTOPTS: 3112 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3113 break; /* goto sizeof (int) option return */ 3114 case IPV6_RECVRTHDR: 3115 *i1 = udp->udp_ipv6_recvrthdr; 3116 break; /* goto sizeof (int) option return */ 3117 case IPV6_PKTINFO: { 3118 /* XXX assumes that caller has room for max size! */ 3119 struct in6_pktinfo *pkti; 3120 3121 pkti = (struct in6_pktinfo *)ptr; 3122 if (ipp->ipp_fields & IPPF_IFINDEX) 3123 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3124 else 3125 pkti->ipi6_ifindex = 0; 3126 if (ipp->ipp_fields & IPPF_ADDR) 3127 pkti->ipi6_addr = ipp->ipp_addr; 3128 else 3129 pkti->ipi6_addr = ipv6_all_zeros; 3130 return (sizeof (struct in6_pktinfo)); 3131 } 3132 case IPV6_TCLASS: 3133 if (ipp->ipp_fields & IPPF_TCLASS) 3134 *i1 = ipp->ipp_tclass; 3135 else 3136 *i1 = IPV6_FLOW_TCLASS( 3137 IPV6_DEFAULT_VERS_AND_FLOW); 3138 break; /* goto sizeof (int) option return */ 3139 case IPV6_NEXTHOP: { 3140 sin6_t *sin6 = (sin6_t *)ptr; 3141 3142 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3143 return (0); 3144 *sin6 = sin6_null; 3145 sin6->sin6_family = AF_INET6; 3146 sin6->sin6_addr = ipp->ipp_nexthop; 3147 return (sizeof (sin6_t)); 3148 } 3149 case IPV6_HOPOPTS: 3150 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3151 return (0); 3152 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3153 return (ipp->ipp_hopoptslen); 3154 case IPV6_RTHDRDSTOPTS: 3155 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3156 return (0); 3157 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3158 return (ipp->ipp_rtdstoptslen); 3159 case IPV6_RTHDR: 3160 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3161 return (0); 3162 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3163 return (ipp->ipp_rthdrlen); 3164 case IPV6_DSTOPTS: 3165 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3166 return (0); 3167 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3168 return (ipp->ipp_dstoptslen); 3169 case IPV6_PATHMTU: 3170 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3171 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3172 default: 3173 return (-1); 3174 } 3175 break; 3176 case IPPROTO_UDP: 3177 switch (name) { 3178 case UDP_ANONPRIVBIND: 3179 *i1 = udp->udp_anon_priv_bind; 3180 break; 3181 case UDP_EXCLBIND: 3182 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3183 break; 3184 case UDP_RCVHDR: 3185 *i1 = udp->udp_rcvhdr ? 1 : 0; 3186 break; 3187 default: 3188 return (-1); 3189 } 3190 break; 3191 default: 3192 return (-1); 3193 } 3194 return (sizeof (int)); 3195 } 3196 3197 /* 3198 * This routine sets socket options; it expects the caller 3199 * to pass in the queue pointer of the upper instance. 3200 */ 3201 /* ARGSUSED */ 3202 int 3203 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3204 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3205 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3206 { 3207 int *i1 = (int *)invalp; 3208 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3209 boolean_t checkonly; 3210 int error; 3211 conn_t *connp; 3212 udp_t *udp; 3213 3214 q = UDP_WR(q); 3215 connp = Q_TO_CONN(q); 3216 udp = connp->conn_udp; 3217 3218 switch (optset_context) { 3219 case SETFN_OPTCOM_CHECKONLY: 3220 checkonly = B_TRUE; 3221 /* 3222 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3223 * inlen != 0 implies value supplied and 3224 * we have to "pretend" to set it. 3225 * inlen == 0 implies that there is no 3226 * value part in T_CHECK request and just validation 3227 * done elsewhere should be enough, we just return here. 3228 */ 3229 if (inlen == 0) { 3230 *outlenp = 0; 3231 return (0); 3232 } 3233 break; 3234 case SETFN_OPTCOM_NEGOTIATE: 3235 checkonly = B_FALSE; 3236 break; 3237 case SETFN_UD_NEGOTIATE: 3238 case SETFN_CONN_NEGOTIATE: 3239 checkonly = B_FALSE; 3240 /* 3241 * Negotiating local and "association-related" options 3242 * through T_UNITDATA_REQ. 3243 * 3244 * Following routine can filter out ones we do not 3245 * want to be "set" this way. 3246 */ 3247 if (!udp_opt_allow_udr_set(level, name)) { 3248 *outlenp = 0; 3249 return (EINVAL); 3250 } 3251 break; 3252 default: 3253 /* 3254 * We should never get here 3255 */ 3256 *outlenp = 0; 3257 return (EINVAL); 3258 } 3259 3260 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3261 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3262 3263 /* 3264 * For fixed length options, no sanity check 3265 * of passed in length is done. It is assumed *_optcom_req() 3266 * routines do the right thing. 3267 */ 3268 3269 switch (level) { 3270 case SOL_SOCKET: 3271 switch (name) { 3272 case SO_REUSEADDR: 3273 if (!checkonly) 3274 udp->udp_reuseaddr = onoff; 3275 break; 3276 case SO_DEBUG: 3277 if (!checkonly) 3278 udp->udp_debug = onoff; 3279 break; 3280 /* 3281 * The following three items are available here, 3282 * but are only meaningful to IP. 3283 */ 3284 case SO_DONTROUTE: 3285 if (!checkonly) 3286 udp->udp_dontroute = onoff; 3287 break; 3288 case SO_USELOOPBACK: 3289 if (!checkonly) 3290 udp->udp_useloopback = onoff; 3291 break; 3292 case SO_BROADCAST: 3293 if (!checkonly) 3294 udp->udp_broadcast = onoff; 3295 break; 3296 3297 case SO_SNDBUF: 3298 if (*i1 > udp_max_buf) { 3299 *outlenp = 0; 3300 return (ENOBUFS); 3301 } 3302 if (!checkonly) { 3303 q->q_hiwat = *i1; 3304 WR(UDP_RD(q))->q_hiwat = *i1; 3305 } 3306 break; 3307 case SO_RCVBUF: 3308 if (*i1 > udp_max_buf) { 3309 *outlenp = 0; 3310 return (ENOBUFS); 3311 } 3312 if (!checkonly) { 3313 RD(q)->q_hiwat = *i1; 3314 UDP_RD(q)->q_hiwat = *i1; 3315 (void) mi_set_sth_hiwat(UDP_RD(q), 3316 udp_set_rcv_hiwat(udp, *i1)); 3317 } 3318 break; 3319 case SO_DGRAM_ERRIND: 3320 if (!checkonly) 3321 udp->udp_dgram_errind = onoff; 3322 break; 3323 case SO_RECVUCRED: 3324 if (!checkonly) 3325 udp->udp_recvucred = onoff; 3326 break; 3327 default: 3328 *outlenp = 0; 3329 return (EINVAL); 3330 } 3331 break; 3332 case IPPROTO_IP: 3333 if (udp->udp_family != AF_INET) { 3334 *outlenp = 0; 3335 return (ENOPROTOOPT); 3336 } 3337 switch (name) { 3338 case IP_OPTIONS: 3339 case T_IP_OPTIONS: 3340 /* Save options for use by IP. */ 3341 if (inlen & 0x3) { 3342 *outlenp = 0; 3343 return (EINVAL); 3344 } 3345 if (checkonly) 3346 break; 3347 3348 if (udp->udp_ip_snd_options) { 3349 mi_free((char *)udp->udp_ip_snd_options); 3350 udp->udp_ip_snd_options_len = 0; 3351 udp->udp_ip_snd_options = NULL; 3352 } 3353 if (inlen) { 3354 udp->udp_ip_snd_options = 3355 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3356 if (udp->udp_ip_snd_options) { 3357 bcopy(invalp, udp->udp_ip_snd_options, 3358 inlen); 3359 udp->udp_ip_snd_options_len = inlen; 3360 } 3361 } 3362 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3363 UDPH_SIZE + udp->udp_ip_snd_options_len; 3364 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3365 udp_wroff_extra); 3366 break; 3367 case IP_TTL: 3368 if (!checkonly) { 3369 udp->udp_ttl = (uchar_t)*i1; 3370 } 3371 break; 3372 case IP_TOS: 3373 case T_IP_TOS: 3374 if (!checkonly) { 3375 udp->udp_type_of_service = (uchar_t)*i1; 3376 } 3377 break; 3378 case IP_MULTICAST_IF: { 3379 /* 3380 * TODO should check OPTMGMT reply and undo this if 3381 * there is an error. 3382 */ 3383 struct in_addr *inap = (struct in_addr *)invalp; 3384 if (!checkonly) { 3385 udp->udp_multicast_if_addr = 3386 inap->s_addr; 3387 } 3388 break; 3389 } 3390 case IP_MULTICAST_TTL: 3391 if (!checkonly) 3392 udp->udp_multicast_ttl = *invalp; 3393 break; 3394 case IP_MULTICAST_LOOP: 3395 if (!checkonly) 3396 connp->conn_multicast_loop = *invalp; 3397 break; 3398 case IP_RECVOPTS: 3399 if (!checkonly) 3400 udp->udp_recvopts = onoff; 3401 break; 3402 case IP_RECVDSTADDR: 3403 if (!checkonly) 3404 udp->udp_recvdstaddr = onoff; 3405 break; 3406 case IP_RECVIF: 3407 if (!checkonly) 3408 udp->udp_recvif = onoff; 3409 break; 3410 case IP_RECVSLLA: 3411 if (!checkonly) 3412 udp->udp_recvslla = onoff; 3413 break; 3414 case IP_RECVTTL: 3415 if (!checkonly) 3416 udp->udp_recvttl = onoff; 3417 break; 3418 case IP_ADD_MEMBERSHIP: 3419 case IP_DROP_MEMBERSHIP: 3420 case IP_BLOCK_SOURCE: 3421 case IP_UNBLOCK_SOURCE: 3422 case IP_ADD_SOURCE_MEMBERSHIP: 3423 case IP_DROP_SOURCE_MEMBERSHIP: 3424 case MCAST_JOIN_GROUP: 3425 case MCAST_LEAVE_GROUP: 3426 case MCAST_BLOCK_SOURCE: 3427 case MCAST_UNBLOCK_SOURCE: 3428 case MCAST_JOIN_SOURCE_GROUP: 3429 case MCAST_LEAVE_SOURCE_GROUP: 3430 case IP_SEC_OPT: 3431 case IP_NEXTHOP: 3432 /* 3433 * "soft" error (negative) 3434 * option not handled at this level 3435 * Do not modify *outlenp. 3436 */ 3437 return (-EINVAL); 3438 case IP_BOUND_IF: 3439 if (!checkonly) 3440 udp->udp_bound_if = *i1; 3441 break; 3442 case IP_UNSPEC_SRC: 3443 if (!checkonly) 3444 udp->udp_unspec_source = onoff; 3445 break; 3446 case IP_XMIT_IF: 3447 if (!checkonly) 3448 udp->udp_xmit_if = *i1; 3449 break; 3450 default: 3451 *outlenp = 0; 3452 return (EINVAL); 3453 } 3454 break; 3455 case IPPROTO_IPV6: { 3456 ip6_pkt_t *ipp; 3457 boolean_t sticky; 3458 3459 if (udp->udp_family != AF_INET6) { 3460 *outlenp = 0; 3461 return (ENOPROTOOPT); 3462 } 3463 /* 3464 * Deal with both sticky options and ancillary data 3465 */ 3466 if (thisdg_attrs == NULL) { 3467 /* sticky options, or none */ 3468 ipp = &udp->udp_sticky_ipp; 3469 sticky = B_TRUE; 3470 } else { 3471 /* ancillary data */ 3472 ipp = (ip6_pkt_t *)thisdg_attrs; 3473 sticky = B_FALSE; 3474 } 3475 3476 switch (name) { 3477 case IPV6_MULTICAST_IF: 3478 if (!checkonly) 3479 udp->udp_multicast_if_index = *i1; 3480 break; 3481 case IPV6_UNICAST_HOPS: 3482 /* -1 means use default */ 3483 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3484 *outlenp = 0; 3485 return (EINVAL); 3486 } 3487 if (!checkonly) { 3488 if (*i1 == -1) { 3489 udp->udp_ttl = ipp->ipp_unicast_hops = 3490 udp_ipv6_hoplimit; 3491 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3492 /* Pass modified value to IP. */ 3493 *i1 = udp->udp_ttl; 3494 } else { 3495 udp->udp_ttl = ipp->ipp_unicast_hops = 3496 (uint8_t)*i1; 3497 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3498 } 3499 /* Rebuild the header template */ 3500 error = udp_build_hdrs(q, udp); 3501 if (error != 0) { 3502 *outlenp = 0; 3503 return (error); 3504 } 3505 } 3506 break; 3507 case IPV6_MULTICAST_HOPS: 3508 /* -1 means use default */ 3509 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3510 *outlenp = 0; 3511 return (EINVAL); 3512 } 3513 if (!checkonly) { 3514 if (*i1 == -1) { 3515 udp->udp_multicast_ttl = 3516 ipp->ipp_multicast_hops = 3517 IP_DEFAULT_MULTICAST_TTL; 3518 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3519 /* Pass modified value to IP. */ 3520 *i1 = udp->udp_multicast_ttl; 3521 } else { 3522 udp->udp_multicast_ttl = 3523 ipp->ipp_multicast_hops = 3524 (uint8_t)*i1; 3525 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3526 } 3527 } 3528 break; 3529 case IPV6_MULTICAST_LOOP: 3530 if (*i1 != 0 && *i1 != 1) { 3531 *outlenp = 0; 3532 return (EINVAL); 3533 } 3534 if (!checkonly) 3535 connp->conn_multicast_loop = *i1; 3536 break; 3537 case IPV6_JOIN_GROUP: 3538 case IPV6_LEAVE_GROUP: 3539 case MCAST_JOIN_GROUP: 3540 case MCAST_LEAVE_GROUP: 3541 case MCAST_BLOCK_SOURCE: 3542 case MCAST_UNBLOCK_SOURCE: 3543 case MCAST_JOIN_SOURCE_GROUP: 3544 case MCAST_LEAVE_SOURCE_GROUP: 3545 /* 3546 * "soft" error (negative) 3547 * option not handled at this level 3548 * Note: Do not modify *outlenp 3549 */ 3550 return (-EINVAL); 3551 case IPV6_BOUND_IF: 3552 if (!checkonly) 3553 udp->udp_bound_if = *i1; 3554 break; 3555 case IPV6_UNSPEC_SRC: 3556 if (!checkonly) 3557 udp->udp_unspec_source = onoff; 3558 break; 3559 /* 3560 * Set boolean switches for ancillary data delivery 3561 */ 3562 case IPV6_RECVPKTINFO: 3563 if (!checkonly) 3564 udp->udp_ipv6_recvpktinfo = onoff; 3565 break; 3566 case IPV6_RECVTCLASS: 3567 if (!checkonly) { 3568 udp->udp_ipv6_recvtclass = onoff; 3569 } 3570 break; 3571 case IPV6_RECVPATHMTU: 3572 if (!checkonly) { 3573 udp->udp_ipv6_recvpathmtu = onoff; 3574 } 3575 break; 3576 case IPV6_RECVHOPLIMIT: 3577 if (!checkonly) 3578 udp->udp_ipv6_recvhoplimit = onoff; 3579 break; 3580 case IPV6_RECVHOPOPTS: 3581 if (!checkonly) 3582 udp->udp_ipv6_recvhopopts = onoff; 3583 break; 3584 case IPV6_RECVDSTOPTS: 3585 if (!checkonly) 3586 udp->udp_ipv6_recvdstopts = onoff; 3587 break; 3588 case _OLD_IPV6_RECVDSTOPTS: 3589 if (!checkonly) 3590 udp->udp_old_ipv6_recvdstopts = onoff; 3591 break; 3592 case IPV6_RECVRTHDRDSTOPTS: 3593 if (!checkonly) 3594 udp->udp_ipv6_recvrthdrdstopts = onoff; 3595 break; 3596 case IPV6_RECVRTHDR: 3597 if (!checkonly) 3598 udp->udp_ipv6_recvrthdr = onoff; 3599 break; 3600 /* 3601 * Set sticky options or ancillary data. 3602 * If sticky options, (re)build any extension headers 3603 * that might be needed as a result. 3604 */ 3605 case IPV6_PKTINFO: 3606 /* 3607 * The source address and ifindex are verified 3608 * in ip_opt_set(). For ancillary data the 3609 * source address is checked in ip_wput_v6. 3610 */ 3611 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3612 return (EINVAL); 3613 if (checkonly) 3614 break; 3615 3616 if (inlen == 0) { 3617 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3618 ipp->ipp_sticky_ignored |= 3619 (IPPF_IFINDEX|IPPF_ADDR); 3620 } else { 3621 struct in6_pktinfo *pkti; 3622 3623 pkti = (struct in6_pktinfo *)invalp; 3624 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3625 ipp->ipp_addr = pkti->ipi6_addr; 3626 if (ipp->ipp_ifindex != 0) 3627 ipp->ipp_fields |= IPPF_IFINDEX; 3628 else 3629 ipp->ipp_fields &= ~IPPF_IFINDEX; 3630 if (!IN6_IS_ADDR_UNSPECIFIED( 3631 &ipp->ipp_addr)) 3632 ipp->ipp_fields |= IPPF_ADDR; 3633 else 3634 ipp->ipp_fields &= ~IPPF_ADDR; 3635 } 3636 if (sticky) { 3637 error = udp_build_hdrs(q, udp); 3638 if (error != 0) 3639 return (error); 3640 } 3641 break; 3642 case IPV6_HOPLIMIT: 3643 if (sticky) 3644 return (EINVAL); 3645 if (inlen != 0 && inlen != sizeof (int)) 3646 return (EINVAL); 3647 if (checkonly) 3648 break; 3649 3650 if (inlen == 0) { 3651 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3652 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3653 } else { 3654 if (*i1 > 255 || *i1 < -1) 3655 return (EINVAL); 3656 if (*i1 == -1) 3657 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3658 else 3659 ipp->ipp_hoplimit = *i1; 3660 ipp->ipp_fields |= IPPF_HOPLIMIT; 3661 } 3662 break; 3663 case IPV6_TCLASS: 3664 if (inlen != 0 && inlen != sizeof (int)) 3665 return (EINVAL); 3666 if (checkonly) 3667 break; 3668 3669 if (inlen == 0) { 3670 ipp->ipp_fields &= ~IPPF_TCLASS; 3671 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3672 } else { 3673 if (*i1 > 255 || *i1 < -1) 3674 return (EINVAL); 3675 if (*i1 == -1) 3676 ipp->ipp_tclass = 0; 3677 else 3678 ipp->ipp_tclass = *i1; 3679 ipp->ipp_fields |= IPPF_TCLASS; 3680 } 3681 if (sticky) { 3682 error = udp_build_hdrs(q, udp); 3683 if (error != 0) 3684 return (error); 3685 } 3686 break; 3687 case IPV6_NEXTHOP: 3688 /* 3689 * IP will verify that the nexthop is reachable 3690 * and fail for sticky options. 3691 */ 3692 if (inlen != 0 && inlen != sizeof (sin6_t)) 3693 return (EINVAL); 3694 if (checkonly) 3695 break; 3696 3697 if (inlen == 0) { 3698 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3699 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3700 } else { 3701 sin6_t *sin6 = (sin6_t *)invalp; 3702 3703 if (sin6->sin6_family != AF_INET6) 3704 return (EAFNOSUPPORT); 3705 if (IN6_IS_ADDR_V4MAPPED( 3706 &sin6->sin6_addr)) 3707 return (EADDRNOTAVAIL); 3708 ipp->ipp_nexthop = sin6->sin6_addr; 3709 if (!IN6_IS_ADDR_UNSPECIFIED( 3710 &ipp->ipp_nexthop)) 3711 ipp->ipp_fields |= IPPF_NEXTHOP; 3712 else 3713 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3714 } 3715 if (sticky) { 3716 error = udp_build_hdrs(q, udp); 3717 if (error != 0) 3718 return (error); 3719 } 3720 break; 3721 case IPV6_HOPOPTS: { 3722 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3723 /* 3724 * Sanity checks - minimum size, size a multiple of 3725 * eight bytes, and matching size passed in. 3726 */ 3727 if (inlen != 0 && 3728 inlen != (8 * (hopts->ip6h_len + 1))) 3729 return (EINVAL); 3730 3731 if (checkonly) 3732 break; 3733 3734 if (inlen == 0) { 3735 if (sticky && 3736 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3737 kmem_free(ipp->ipp_hopopts, 3738 ipp->ipp_hopoptslen); 3739 ipp->ipp_hopopts = NULL; 3740 ipp->ipp_hopoptslen = 0; 3741 } 3742 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3743 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3744 } else { 3745 error = udp_pkt_set(invalp, inlen, sticky, 3746 (uchar_t **)&ipp->ipp_hopopts, 3747 &ipp->ipp_hopoptslen); 3748 if (error != 0) 3749 return (error); 3750 ipp->ipp_fields |= IPPF_HOPOPTS; 3751 } 3752 if (sticky) { 3753 error = udp_build_hdrs(q, udp); 3754 if (error != 0) 3755 return (error); 3756 } 3757 break; 3758 } 3759 case IPV6_RTHDRDSTOPTS: { 3760 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3761 3762 /* 3763 * Sanity checks - minimum size, size a multiple of 3764 * eight bytes, and matching size passed in. 3765 */ 3766 if (inlen != 0 && 3767 inlen != (8 * (dopts->ip6d_len + 1))) 3768 return (EINVAL); 3769 3770 if (checkonly) 3771 break; 3772 3773 if (inlen == 0) { 3774 if (sticky && 3775 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3776 kmem_free(ipp->ipp_rtdstopts, 3777 ipp->ipp_rtdstoptslen); 3778 ipp->ipp_rtdstopts = NULL; 3779 ipp->ipp_rtdstoptslen = 0; 3780 } 3781 3782 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3783 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3784 } else { 3785 error = udp_pkt_set(invalp, inlen, sticky, 3786 (uchar_t **)&ipp->ipp_rtdstopts, 3787 &ipp->ipp_rtdstoptslen); 3788 if (error != 0) 3789 return (error); 3790 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3791 } 3792 if (sticky) { 3793 error = udp_build_hdrs(q, udp); 3794 if (error != 0) 3795 return (error); 3796 } 3797 break; 3798 } 3799 case IPV6_DSTOPTS: { 3800 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3801 3802 /* 3803 * Sanity checks - minimum size, size a multiple of 3804 * eight bytes, and matching size passed in. 3805 */ 3806 if (inlen != 0 && 3807 inlen != (8 * (dopts->ip6d_len + 1))) 3808 return (EINVAL); 3809 3810 if (checkonly) 3811 break; 3812 3813 if (inlen == 0) { 3814 if (sticky && 3815 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3816 kmem_free(ipp->ipp_dstopts, 3817 ipp->ipp_dstoptslen); 3818 ipp->ipp_dstopts = NULL; 3819 ipp->ipp_dstoptslen = 0; 3820 } 3821 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3822 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3823 } else { 3824 error = udp_pkt_set(invalp, inlen, sticky, 3825 (uchar_t **)&ipp->ipp_dstopts, 3826 &ipp->ipp_dstoptslen); 3827 if (error != 0) 3828 return (error); 3829 ipp->ipp_fields |= IPPF_DSTOPTS; 3830 } 3831 if (sticky) { 3832 error = udp_build_hdrs(q, udp); 3833 if (error != 0) 3834 return (error); 3835 } 3836 break; 3837 } 3838 case IPV6_RTHDR: { 3839 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3840 3841 /* 3842 * Sanity checks - minimum size, size a multiple of 3843 * eight bytes, and matching size passed in. 3844 */ 3845 if (inlen != 0 && 3846 inlen != (8 * (rt->ip6r_len + 1))) 3847 return (EINVAL); 3848 3849 if (checkonly) 3850 break; 3851 3852 if (inlen == 0) { 3853 if (sticky && 3854 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3855 kmem_free(ipp->ipp_rthdr, 3856 ipp->ipp_rthdrlen); 3857 ipp->ipp_rthdr = NULL; 3858 ipp->ipp_rthdrlen = 0; 3859 } 3860 ipp->ipp_fields &= ~IPPF_RTHDR; 3861 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3862 } else { 3863 error = udp_pkt_set(invalp, inlen, sticky, 3864 (uchar_t **)&ipp->ipp_rthdr, 3865 &ipp->ipp_rthdrlen); 3866 if (error != 0) 3867 return (error); 3868 ipp->ipp_fields |= IPPF_RTHDR; 3869 } 3870 if (sticky) { 3871 error = udp_build_hdrs(q, udp); 3872 if (error != 0) 3873 return (error); 3874 } 3875 break; 3876 } 3877 3878 case IPV6_DONTFRAG: 3879 if (checkonly) 3880 break; 3881 3882 if (onoff) { 3883 ipp->ipp_fields |= IPPF_DONTFRAG; 3884 } else { 3885 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3886 } 3887 break; 3888 3889 case IPV6_USE_MIN_MTU: 3890 if (inlen != sizeof (int)) 3891 return (EINVAL); 3892 3893 if (*i1 < -1 || *i1 > 1) 3894 return (EINVAL); 3895 3896 if (checkonly) 3897 break; 3898 3899 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3900 ipp->ipp_use_min_mtu = *i1; 3901 break; 3902 3903 case IPV6_BOUND_PIF: 3904 case IPV6_SEC_OPT: 3905 case IPV6_DONTFAILOVER_IF: 3906 case IPV6_SRC_PREFERENCES: 3907 case IPV6_V6ONLY: 3908 /* Handled at the IP level */ 3909 return (-EINVAL); 3910 default: 3911 *outlenp = 0; 3912 return (EINVAL); 3913 } 3914 break; 3915 } /* end IPPROTO_IPV6 */ 3916 case IPPROTO_UDP: 3917 switch (name) { 3918 case UDP_ANONPRIVBIND: 3919 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3920 *outlenp = 0; 3921 return (error); 3922 } 3923 if (!checkonly) { 3924 udp->udp_anon_priv_bind = onoff; 3925 } 3926 break; 3927 case UDP_EXCLBIND: 3928 if (!checkonly) 3929 udp->udp_exclbind = onoff; 3930 break; 3931 case UDP_RCVHDR: 3932 if (!checkonly) 3933 udp->udp_rcvhdr = onoff; 3934 break; 3935 default: 3936 *outlenp = 0; 3937 return (EINVAL); 3938 } 3939 break; 3940 default: 3941 *outlenp = 0; 3942 return (EINVAL); 3943 } 3944 /* 3945 * Common case of OK return with outval same as inval. 3946 */ 3947 if (invalp != outvalp) { 3948 /* don't trust bcopy for identical src/dst */ 3949 (void) bcopy(invalp, outvalp, inlen); 3950 } 3951 *outlenp = inlen; 3952 return (0); 3953 } 3954 3955 /* 3956 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3957 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3958 * headers, and the udp header. 3959 * Returns failure if can't allocate memory. 3960 */ 3961 static int 3962 udp_build_hdrs(queue_t *q, udp_t *udp) 3963 { 3964 uchar_t *hdrs; 3965 uint_t hdrs_len; 3966 ip6_t *ip6h; 3967 ip6i_t *ip6i; 3968 udpha_t *udpha; 3969 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3970 3971 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3972 ASSERT(hdrs_len != 0); 3973 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3974 /* Need to reallocate */ 3975 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3976 if (hdrs == NULL) 3977 return (ENOMEM); 3978 3979 if (udp->udp_sticky_hdrs_len != 0) { 3980 kmem_free(udp->udp_sticky_hdrs, 3981 udp->udp_sticky_hdrs_len); 3982 } 3983 udp->udp_sticky_hdrs = hdrs; 3984 udp->udp_sticky_hdrs_len = hdrs_len; 3985 } 3986 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3987 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3988 3989 /* Set header fields not in ipp */ 3990 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3991 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3992 ip6h = (ip6_t *)&ip6i[1]; 3993 } else { 3994 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3995 } 3996 3997 if (!(ipp->ipp_fields & IPPF_ADDR)) 3998 ip6h->ip6_src = udp->udp_v6src; 3999 4000 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4001 udpha->uha_src_port = udp->udp_port; 4002 4003 /* Try to get everything in a single mblk */ 4004 if (hdrs_len > udp->udp_max_hdr_len) { 4005 udp->udp_max_hdr_len = hdrs_len; 4006 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4007 udp_wroff_extra); 4008 } 4009 return (0); 4010 } 4011 4012 /* 4013 * Set optbuf and optlen for the option. 4014 * If sticky is set allocate memory (if not already present). 4015 * Otherwise just point optbuf and optlen at invalp and inlen. 4016 * Returns failure if memory can not be allocated. 4017 */ 4018 static int 4019 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4020 uchar_t **optbufp, uint_t *optlenp) 4021 { 4022 uchar_t *optbuf; 4023 4024 if (!sticky) { 4025 *optbufp = invalp; 4026 *optlenp = inlen; 4027 return (0); 4028 } 4029 if (inlen == *optlenp) { 4030 /* Unchanged length - no need to realocate */ 4031 bcopy(invalp, *optbufp, inlen); 4032 return (0); 4033 } 4034 if (inlen != 0) { 4035 /* Allocate new buffer before free */ 4036 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4037 if (optbuf == NULL) 4038 return (ENOMEM); 4039 } else { 4040 optbuf = NULL; 4041 } 4042 /* Free old buffer */ 4043 if (*optlenp != 0) 4044 kmem_free(*optbufp, *optlenp); 4045 4046 bcopy(invalp, optbuf, inlen); 4047 *optbufp = optbuf; 4048 *optlenp = inlen; 4049 return (0); 4050 } 4051 4052 /* 4053 * This routine retrieves the value of an ND variable in a udpparam_t 4054 * structure. It is called through nd_getset when a user reads the 4055 * variable. 4056 */ 4057 /* ARGSUSED */ 4058 static int 4059 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4060 { 4061 udpparam_t *udppa = (udpparam_t *)cp; 4062 4063 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4064 return (0); 4065 } 4066 4067 /* 4068 * Walk through the param array specified registering each element with the 4069 * named dispatch (ND) handler. 4070 */ 4071 static boolean_t 4072 udp_param_register(udpparam_t *udppa, int cnt) 4073 { 4074 for (; cnt-- > 0; udppa++) { 4075 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4076 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4077 udp_param_get, udp_param_set, 4078 (caddr_t)udppa)) { 4079 nd_free(&udp_g_nd); 4080 return (B_FALSE); 4081 } 4082 } 4083 } 4084 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4085 udp_extra_priv_ports_get, NULL, NULL)) { 4086 nd_free(&udp_g_nd); 4087 return (B_FALSE); 4088 } 4089 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4090 NULL, udp_extra_priv_ports_add, NULL)) { 4091 nd_free(&udp_g_nd); 4092 return (B_FALSE); 4093 } 4094 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4095 NULL, udp_extra_priv_ports_del, NULL)) { 4096 nd_free(&udp_g_nd); 4097 return (B_FALSE); 4098 } 4099 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4100 NULL)) { 4101 nd_free(&udp_g_nd); 4102 return (B_FALSE); 4103 } 4104 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4105 NULL)) { 4106 nd_free(&udp_g_nd); 4107 return (B_FALSE); 4108 } 4109 return (B_TRUE); 4110 } 4111 4112 /* This routine sets an ND variable in a udpparam_t structure. */ 4113 /* ARGSUSED */ 4114 static int 4115 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4116 { 4117 long new_value; 4118 udpparam_t *udppa = (udpparam_t *)cp; 4119 4120 /* 4121 * Fail the request if the new value does not lie within the 4122 * required bounds. 4123 */ 4124 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4125 new_value < udppa->udp_param_min || 4126 new_value > udppa->udp_param_max) { 4127 return (EINVAL); 4128 } 4129 4130 /* Set the new value */ 4131 udppa->udp_param_value = new_value; 4132 return (0); 4133 } 4134 4135 static void 4136 udp_input(conn_t *connp, mblk_t *mp) 4137 { 4138 struct T_unitdata_ind *tudi; 4139 uchar_t *rptr; /* Pointer to IP header */ 4140 int hdr_length; /* Length of IP+UDP headers */ 4141 int udi_size; /* Size of T_unitdata_ind */ 4142 int mp_len; 4143 udp_t *udp; 4144 udpha_t *udpha; 4145 int ipversion; 4146 ip6_pkt_t ipp; 4147 ip6_t *ip6h; 4148 ip6i_t *ip6i; 4149 mblk_t *mp1; 4150 mblk_t *options_mp = NULL; 4151 in_pktinfo_t *pinfo = NULL; 4152 cred_t *cr = NULL; 4153 queue_t *q = connp->conn_rq; 4154 pid_t cpid; 4155 4156 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4157 "udp_rput_start: q %p mp %p", q, mp); 4158 4159 udp = connp->conn_udp; 4160 rptr = mp->b_rptr; 4161 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4162 ASSERT(OK_32PTR(rptr)); 4163 4164 /* 4165 * IP should have prepended the options data in an M_CTL 4166 * Check M_CTL "type" to make sure are not here bcos of 4167 * a valid ICMP message 4168 */ 4169 if (DB_TYPE(mp) == M_CTL) { 4170 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4171 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4172 IN_PKTINFO) { 4173 /* 4174 * IP_RECVIF or IP_RECVSLLA information has been 4175 * appended to the packet by IP. We need to 4176 * extract the mblk and adjust the rptr 4177 */ 4178 pinfo = (in_pktinfo_t *)mp->b_rptr; 4179 options_mp = mp; 4180 mp = mp->b_cont; 4181 rptr = mp->b_rptr; 4182 UDP_STAT(udp_in_pktinfo); 4183 } else { 4184 /* 4185 * ICMP messages. 4186 */ 4187 udp_icmp_error(q, mp); 4188 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4189 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4190 return; 4191 } 4192 } 4193 4194 mp_len = msgdsize(mp); 4195 /* 4196 * This is the inbound data path. 4197 * First, we check to make sure the IP version number is correct, 4198 * and then pull the IP and UDP headers into the first mblk. 4199 * Assume IP provides aligned packets - otherwise toss. 4200 * Also, check if we have a complete IP header. 4201 */ 4202 4203 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4204 ipp.ipp_fields = 0; 4205 4206 ipversion = IPH_HDR_VERSION(rptr); 4207 switch (ipversion) { 4208 case IPV4_VERSION: 4209 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4210 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4211 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4212 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4213 (udp->udp_ip_rcv_options_len)) { 4214 /* 4215 * Handle IPv4 packets with options outside of the 4216 * main data path. Not needed for AF_INET6 sockets 4217 * since they don't support a getsockopt of IP_OPTIONS. 4218 */ 4219 if (udp->udp_family == AF_INET6) 4220 break; 4221 /* 4222 * UDP length check performed for IPv4 packets with 4223 * options to check whether UDP length specified in 4224 * the header is the same as the physical length of 4225 * the packet. 4226 */ 4227 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4228 if (mp_len != (ntohs(udpha->uha_length) + 4229 hdr_length - UDPH_SIZE)) { 4230 goto tossit; 4231 } 4232 /* 4233 * Handle the case where the packet has IP options 4234 * and the IP_RECVSLLA & IP_RECVIF are set 4235 */ 4236 if (pinfo != NULL) 4237 mp = options_mp; 4238 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4239 SQTAG_UDP_INPUT); 4240 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4241 "udp_rput_end: q %p (%S)", q, "end"); 4242 return; 4243 } 4244 4245 /* Handle IPV6_RECVHOPLIMIT. */ 4246 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4247 udp->udp_ipv6_recvpktinfo) { 4248 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4249 ipp.ipp_fields |= IPPF_IFINDEX; 4250 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4251 } 4252 } 4253 break; 4254 case IPV6_VERSION: 4255 /* 4256 * IPv6 packets can only be received by applications 4257 * that are prepared to receive IPv6 addresses. 4258 * The IP fanout must ensure this. 4259 */ 4260 ASSERT(udp->udp_family == AF_INET6); 4261 4262 ip6h = (ip6_t *)rptr; 4263 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4264 4265 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4266 uint8_t nexthdrp; 4267 /* Look for ifindex information */ 4268 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4269 ip6i = (ip6i_t *)ip6h; 4270 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4271 goto tossit; 4272 4273 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4274 ASSERT(ip6i->ip6i_ifindex != 0); 4275 ipp.ipp_fields |= IPPF_IFINDEX; 4276 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4277 } 4278 rptr = (uchar_t *)&ip6i[1]; 4279 mp->b_rptr = rptr; 4280 if (rptr == mp->b_wptr) { 4281 mp1 = mp->b_cont; 4282 freeb(mp); 4283 mp = mp1; 4284 rptr = mp->b_rptr; 4285 } 4286 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4287 goto tossit; 4288 ip6h = (ip6_t *)rptr; 4289 mp_len = msgdsize(mp); 4290 } 4291 /* 4292 * Find any potentially interesting extension headers 4293 * as well as the length of the IPv6 + extension 4294 * headers. 4295 */ 4296 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4297 UDPH_SIZE; 4298 ASSERT(nexthdrp == IPPROTO_UDP); 4299 } else { 4300 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4301 ip6i = NULL; 4302 } 4303 break; 4304 default: 4305 ASSERT(0); 4306 } 4307 4308 /* 4309 * IP inspected the UDP header thus all of it must be in the mblk. 4310 * UDP length check is performed for IPv6 packets and IPv4 packets 4311 * without options to check if the size of the packet as specified 4312 * by the header is the same as the physical size of the packet. 4313 */ 4314 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4315 if ((MBLKL(mp) < hdr_length) || 4316 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4317 goto tossit; 4318 } 4319 4320 /* Walk past the headers. */ 4321 if (!udp->udp_rcvhdr) { 4322 mp->b_rptr = rptr + hdr_length; 4323 mp_len -= hdr_length; 4324 } 4325 4326 /* 4327 * This is the inbound data path. Packets are passed upstream as 4328 * T_UNITDATA_IND messages with full IP headers still attached. 4329 */ 4330 if (udp->udp_family == AF_INET) { 4331 sin_t *sin; 4332 4333 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4334 4335 /* 4336 * Normally only send up the address. 4337 * If IP_RECVDSTADDR is set we include the destination IP 4338 * address as an option. With IP_RECVOPTS we include all 4339 * the IP options. Only ip_rput_other() handles packets 4340 * that contain IP options. 4341 */ 4342 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4343 if (udp->udp_recvdstaddr) { 4344 udi_size += sizeof (struct T_opthdr) + 4345 sizeof (struct in_addr); 4346 UDP_STAT(udp_in_recvdstaddr); 4347 } 4348 4349 /* 4350 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4351 * space accordingly 4352 */ 4353 if (udp->udp_recvif && (pinfo != NULL) && 4354 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4355 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4356 UDP_STAT(udp_in_recvif); 4357 } 4358 4359 if (udp->udp_recvslla && (pinfo != NULL) && 4360 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4361 udi_size += sizeof (struct T_opthdr) + 4362 sizeof (struct sockaddr_dl); 4363 UDP_STAT(udp_in_recvslla); 4364 } 4365 4366 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4367 udi_size += sizeof (struct T_opthdr) + ucredsize; 4368 cpid = DB_CPID(mp); 4369 UDP_STAT(udp_in_recvucred); 4370 } 4371 /* 4372 * If IP_RECVTTL is set allocate the appropriate sized buffer 4373 */ 4374 if (udp->udp_recvttl) { 4375 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4376 UDP_STAT(udp_in_recvttl); 4377 } 4378 4379 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4380 4381 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4382 mp1 = allocb(udi_size, BPRI_MED); 4383 if (mp1 == NULL) { 4384 freemsg(mp); 4385 if (options_mp != NULL) 4386 freeb(options_mp); 4387 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4388 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4389 BUMP_MIB(&udp_mib, udpInErrors); 4390 return; 4391 } 4392 mp1->b_cont = mp; 4393 mp = mp1; 4394 mp->b_datap->db_type = M_PROTO; 4395 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4396 mp->b_wptr = (uchar_t *)tudi + udi_size; 4397 tudi->PRIM_type = T_UNITDATA_IND; 4398 tudi->SRC_length = sizeof (sin_t); 4399 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4400 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4401 sizeof (sin_t); 4402 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4403 tudi->OPT_length = udi_size; 4404 sin = (sin_t *)&tudi[1]; 4405 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4406 sin->sin_port = udpha->uha_src_port; 4407 sin->sin_family = udp->udp_family; 4408 *(uint32_t *)&sin->sin_zero[0] = 0; 4409 *(uint32_t *)&sin->sin_zero[4] = 0; 4410 4411 /* 4412 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4413 * IP_RECVTTL has been set. 4414 */ 4415 if (udi_size != 0) { 4416 /* 4417 * Copy in destination address before options to avoid 4418 * any padding issues. 4419 */ 4420 char *dstopt; 4421 4422 dstopt = (char *)&sin[1]; 4423 if (udp->udp_recvdstaddr) { 4424 struct T_opthdr *toh; 4425 ipaddr_t *dstptr; 4426 4427 toh = (struct T_opthdr *)dstopt; 4428 toh->level = IPPROTO_IP; 4429 toh->name = IP_RECVDSTADDR; 4430 toh->len = sizeof (struct T_opthdr) + 4431 sizeof (ipaddr_t); 4432 toh->status = 0; 4433 dstopt += sizeof (struct T_opthdr); 4434 dstptr = (ipaddr_t *)dstopt; 4435 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4436 dstopt += sizeof (ipaddr_t); 4437 udi_size -= toh->len; 4438 } 4439 4440 if (udp->udp_recvslla && (pinfo != NULL) && 4441 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4442 4443 struct T_opthdr *toh; 4444 struct sockaddr_dl *dstptr; 4445 4446 toh = (struct T_opthdr *)dstopt; 4447 toh->level = IPPROTO_IP; 4448 toh->name = IP_RECVSLLA; 4449 toh->len = sizeof (struct T_opthdr) + 4450 sizeof (struct sockaddr_dl); 4451 toh->status = 0; 4452 dstopt += sizeof (struct T_opthdr); 4453 dstptr = (struct sockaddr_dl *)dstopt; 4454 bcopy(&pinfo->in_pkt_slla, dstptr, 4455 sizeof (struct sockaddr_dl)); 4456 dstopt += sizeof (struct sockaddr_dl); 4457 udi_size -= toh->len; 4458 } 4459 4460 if (udp->udp_recvif && (pinfo != NULL) && 4461 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4462 4463 struct T_opthdr *toh; 4464 uint_t *dstptr; 4465 4466 toh = (struct T_opthdr *)dstopt; 4467 toh->level = IPPROTO_IP; 4468 toh->name = IP_RECVIF; 4469 toh->len = sizeof (struct T_opthdr) + 4470 sizeof (uint_t); 4471 toh->status = 0; 4472 dstopt += sizeof (struct T_opthdr); 4473 dstptr = (uint_t *)dstopt; 4474 *dstptr = pinfo->in_pkt_ifindex; 4475 dstopt += sizeof (uint_t); 4476 udi_size -= toh->len; 4477 } 4478 4479 if (cr != NULL) { 4480 struct T_opthdr *toh; 4481 4482 toh = (struct T_opthdr *)dstopt; 4483 toh->level = SOL_SOCKET; 4484 toh->name = SCM_UCRED; 4485 toh->len = sizeof (struct T_opthdr) + ucredsize; 4486 toh->status = 0; 4487 (void) cred2ucred(cr, cpid, &toh[1]); 4488 dstopt += toh->len; 4489 udi_size -= toh->len; 4490 } 4491 4492 if (udp->udp_recvttl) { 4493 struct T_opthdr *toh; 4494 uint8_t *dstptr; 4495 4496 toh = (struct T_opthdr *)dstopt; 4497 toh->level = IPPROTO_IP; 4498 toh->name = IP_RECVTTL; 4499 toh->len = sizeof (struct T_opthdr) + 4500 sizeof (uint8_t); 4501 toh->status = 0; 4502 dstopt += sizeof (struct T_opthdr); 4503 dstptr = (uint8_t *)dstopt; 4504 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4505 dstopt += sizeof (uint8_t); 4506 udi_size -= toh->len; 4507 } 4508 4509 /* Consumed all of allocated space */ 4510 ASSERT(udi_size == 0); 4511 } 4512 } else { 4513 sin6_t *sin6; 4514 4515 /* 4516 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4517 * 4518 * Normally we only send up the address. If receiving of any 4519 * optional receive side information is enabled, we also send 4520 * that up as options. 4521 * [ Only udp_rput_other() handles packets that contain IP 4522 * options so code to account for does not appear immediately 4523 * below but elsewhere ] 4524 */ 4525 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4526 4527 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4528 IPPF_RTHDR|IPPF_IFINDEX)) { 4529 if (udp->udp_ipv6_recvhopopts && 4530 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4531 udi_size += sizeof (struct T_opthdr) + 4532 ipp.ipp_hopoptslen; 4533 UDP_STAT(udp_in_recvhopopts); 4534 } 4535 if ((udp->udp_ipv6_recvdstopts || 4536 udp->udp_old_ipv6_recvdstopts) && 4537 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4538 udi_size += sizeof (struct T_opthdr) + 4539 ipp.ipp_dstoptslen; 4540 UDP_STAT(udp_in_recvdstopts); 4541 } 4542 if (((udp->udp_ipv6_recvdstopts && 4543 udp->udp_ipv6_recvrthdr && 4544 (ipp.ipp_fields & IPPF_RTHDR)) || 4545 udp->udp_ipv6_recvrthdrdstopts) && 4546 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4547 udi_size += sizeof (struct T_opthdr) + 4548 ipp.ipp_rtdstoptslen; 4549 UDP_STAT(udp_in_recvrtdstopts); 4550 } 4551 if (udp->udp_ipv6_recvrthdr && 4552 (ipp.ipp_fields & IPPF_RTHDR)) { 4553 udi_size += sizeof (struct T_opthdr) + 4554 ipp.ipp_rthdrlen; 4555 UDP_STAT(udp_in_recvrthdr); 4556 } 4557 if (udp->udp_ipv6_recvpktinfo && 4558 (ipp.ipp_fields & IPPF_IFINDEX)) { 4559 udi_size += sizeof (struct T_opthdr) + 4560 sizeof (struct in6_pktinfo); 4561 UDP_STAT(udp_in_recvpktinfo); 4562 } 4563 4564 } 4565 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4566 udi_size += sizeof (struct T_opthdr) + ucredsize; 4567 cpid = DB_CPID(mp); 4568 UDP_STAT(udp_in_recvucred); 4569 } 4570 4571 if (udp->udp_ipv6_recvhoplimit) { 4572 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4573 UDP_STAT(udp_in_recvhoplimit); 4574 } 4575 4576 if (udp->udp_ipv6_recvtclass) { 4577 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4578 UDP_STAT(udp_in_recvtclass); 4579 } 4580 4581 mp1 = allocb(udi_size, BPRI_MED); 4582 if (mp1 == NULL) { 4583 freemsg(mp); 4584 if (options_mp != NULL) 4585 freeb(options_mp); 4586 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4587 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4588 BUMP_MIB(&udp_mib, udpInErrors); 4589 return; 4590 } 4591 mp1->b_cont = mp; 4592 mp = mp1; 4593 mp->b_datap->db_type = M_PROTO; 4594 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4595 mp->b_wptr = (uchar_t *)tudi + udi_size; 4596 tudi->PRIM_type = T_UNITDATA_IND; 4597 tudi->SRC_length = sizeof (sin6_t); 4598 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4599 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4600 sizeof (sin6_t); 4601 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4602 tudi->OPT_length = udi_size; 4603 sin6 = (sin6_t *)&tudi[1]; 4604 if (ipversion == IPV4_VERSION) { 4605 in6_addr_t v6dst; 4606 4607 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4608 &sin6->sin6_addr); 4609 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4610 &v6dst); 4611 sin6->sin6_flowinfo = 0; 4612 sin6->sin6_scope_id = 0; 4613 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4614 connp->conn_zoneid); 4615 } else { 4616 sin6->sin6_addr = ip6h->ip6_src; 4617 /* No sin6_flowinfo per API */ 4618 sin6->sin6_flowinfo = 0; 4619 /* For link-scope source pass up scope id */ 4620 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4621 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4622 sin6->sin6_scope_id = ipp.ipp_ifindex; 4623 else 4624 sin6->sin6_scope_id = 0; 4625 sin6->__sin6_src_id = ip_srcid_find_addr( 4626 &ip6h->ip6_dst, connp->conn_zoneid); 4627 } 4628 sin6->sin6_port = udpha->uha_src_port; 4629 sin6->sin6_family = udp->udp_family; 4630 4631 if (udi_size != 0) { 4632 uchar_t *dstopt; 4633 4634 dstopt = (uchar_t *)&sin6[1]; 4635 if (udp->udp_ipv6_recvpktinfo && 4636 (ipp.ipp_fields & IPPF_IFINDEX)) { 4637 struct T_opthdr *toh; 4638 struct in6_pktinfo *pkti; 4639 4640 toh = (struct T_opthdr *)dstopt; 4641 toh->level = IPPROTO_IPV6; 4642 toh->name = IPV6_PKTINFO; 4643 toh->len = sizeof (struct T_opthdr) + 4644 sizeof (*pkti); 4645 toh->status = 0; 4646 dstopt += sizeof (struct T_opthdr); 4647 pkti = (struct in6_pktinfo *)dstopt; 4648 if (ipversion == IPV6_VERSION) 4649 pkti->ipi6_addr = ip6h->ip6_dst; 4650 else 4651 IN6_IPADDR_TO_V4MAPPED( 4652 ((ipha_t *)rptr)->ipha_dst, 4653 &pkti->ipi6_addr); 4654 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4655 dstopt += sizeof (*pkti); 4656 udi_size -= toh->len; 4657 } 4658 if (udp->udp_ipv6_recvhoplimit) { 4659 struct T_opthdr *toh; 4660 4661 toh = (struct T_opthdr *)dstopt; 4662 toh->level = IPPROTO_IPV6; 4663 toh->name = IPV6_HOPLIMIT; 4664 toh->len = sizeof (struct T_opthdr) + 4665 sizeof (uint_t); 4666 toh->status = 0; 4667 dstopt += sizeof (struct T_opthdr); 4668 if (ipversion == IPV6_VERSION) 4669 *(uint_t *)dstopt = ip6h->ip6_hops; 4670 else 4671 *(uint_t *)dstopt = 4672 ((ipha_t *)rptr)->ipha_ttl; 4673 dstopt += sizeof (uint_t); 4674 udi_size -= toh->len; 4675 } 4676 if (udp->udp_ipv6_recvtclass) { 4677 struct T_opthdr *toh; 4678 4679 toh = (struct T_opthdr *)dstopt; 4680 toh->level = IPPROTO_IPV6; 4681 toh->name = IPV6_TCLASS; 4682 toh->len = sizeof (struct T_opthdr) + 4683 sizeof (uint_t); 4684 toh->status = 0; 4685 dstopt += sizeof (struct T_opthdr); 4686 if (ipversion == IPV6_VERSION) { 4687 *(uint_t *)dstopt = 4688 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4689 } else { 4690 ipha_t *ipha = (ipha_t *)rptr; 4691 *(uint_t *)dstopt = 4692 ipha->ipha_type_of_service; 4693 } 4694 dstopt += sizeof (uint_t); 4695 udi_size -= toh->len; 4696 } 4697 if (udp->udp_ipv6_recvhopopts && 4698 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4699 struct T_opthdr *toh; 4700 4701 toh = (struct T_opthdr *)dstopt; 4702 toh->level = IPPROTO_IPV6; 4703 toh->name = IPV6_HOPOPTS; 4704 toh->len = sizeof (struct T_opthdr) + 4705 ipp.ipp_hopoptslen; 4706 toh->status = 0; 4707 dstopt += sizeof (struct T_opthdr); 4708 bcopy(ipp.ipp_hopopts, dstopt, 4709 ipp.ipp_hopoptslen); 4710 dstopt += ipp.ipp_hopoptslen; 4711 udi_size -= toh->len; 4712 } 4713 if (udp->udp_ipv6_recvdstopts && 4714 udp->udp_ipv6_recvrthdr && 4715 (ipp.ipp_fields & IPPF_RTHDR) && 4716 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4717 struct T_opthdr *toh; 4718 4719 toh = (struct T_opthdr *)dstopt; 4720 toh->level = IPPROTO_IPV6; 4721 toh->name = IPV6_DSTOPTS; 4722 toh->len = sizeof (struct T_opthdr) + 4723 ipp.ipp_rtdstoptslen; 4724 toh->status = 0; 4725 dstopt += sizeof (struct T_opthdr); 4726 bcopy(ipp.ipp_rtdstopts, dstopt, 4727 ipp.ipp_rtdstoptslen); 4728 dstopt += ipp.ipp_rtdstoptslen; 4729 udi_size -= toh->len; 4730 } 4731 if (udp->udp_ipv6_recvrthdr && 4732 (ipp.ipp_fields & IPPF_RTHDR)) { 4733 struct T_opthdr *toh; 4734 4735 toh = (struct T_opthdr *)dstopt; 4736 toh->level = IPPROTO_IPV6; 4737 toh->name = IPV6_RTHDR; 4738 toh->len = sizeof (struct T_opthdr) + 4739 ipp.ipp_rthdrlen; 4740 toh->status = 0; 4741 dstopt += sizeof (struct T_opthdr); 4742 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4743 dstopt += ipp.ipp_rthdrlen; 4744 udi_size -= toh->len; 4745 } 4746 if (udp->udp_ipv6_recvdstopts && 4747 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4748 struct T_opthdr *toh; 4749 4750 toh = (struct T_opthdr *)dstopt; 4751 toh->level = IPPROTO_IPV6; 4752 toh->name = IPV6_DSTOPTS; 4753 toh->len = sizeof (struct T_opthdr) + 4754 ipp.ipp_dstoptslen; 4755 toh->status = 0; 4756 dstopt += sizeof (struct T_opthdr); 4757 bcopy(ipp.ipp_dstopts, dstopt, 4758 ipp.ipp_dstoptslen); 4759 dstopt += ipp.ipp_dstoptslen; 4760 udi_size -= toh->len; 4761 } 4762 4763 if (cr != NULL) { 4764 struct T_opthdr *toh; 4765 4766 toh = (struct T_opthdr *)dstopt; 4767 toh->level = SOL_SOCKET; 4768 toh->name = SCM_UCRED; 4769 toh->len = sizeof (struct T_opthdr) + ucredsize; 4770 toh->status = 0; 4771 (void) cred2ucred(cr, cpid, &toh[1]); 4772 dstopt += toh->len; 4773 udi_size -= toh->len; 4774 } 4775 /* Consumed all of allocated space */ 4776 ASSERT(udi_size == 0); 4777 } 4778 #undef sin6 4779 /* No IP_RECVDSTADDR for IPv6. */ 4780 } 4781 4782 BUMP_MIB(&udp_mib, udpInDatagrams); 4783 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4784 "udp_rput_end: q %p (%S)", q, "end"); 4785 if (options_mp != NULL) 4786 freeb(options_mp); 4787 4788 if (udp->udp_direct_sockfs) { 4789 /* 4790 * There is nothing above us except for the stream head; 4791 * use the read-side synchronous stream interface in 4792 * order to reduce the time spent in interrupt thread. 4793 */ 4794 ASSERT(udp->udp_issocket); 4795 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4796 } else { 4797 /* 4798 * Use regular STREAMS interface to pass data upstream 4799 * if this is not a socket endpoint, or if we have 4800 * switched over to the slow mode due to sockmod being 4801 * popped or a module being pushed on top of us. 4802 */ 4803 putnext(UDP_RD(q), mp); 4804 } 4805 return; 4806 4807 tossit: 4808 freemsg(mp); 4809 if (options_mp != NULL) 4810 freeb(options_mp); 4811 BUMP_MIB(&udp_mib, udpInErrors); 4812 } 4813 4814 void 4815 udp_conn_recv(conn_t *connp, mblk_t *mp) 4816 { 4817 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4818 } 4819 4820 /* ARGSUSED */ 4821 static void 4822 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4823 { 4824 udp_input((conn_t *)arg, mp); 4825 _UDP_EXIT((conn_t *)arg); 4826 } 4827 4828 /* 4829 * Process non-M_DATA messages as well as M_DATA messages that requires 4830 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4831 */ 4832 static void 4833 udp_rput_other(queue_t *q, mblk_t *mp) 4834 { 4835 struct T_unitdata_ind *tudi; 4836 mblk_t *mp1; 4837 uchar_t *rptr; 4838 uchar_t *new_rptr; 4839 int hdr_length; 4840 int udi_size; /* Size of T_unitdata_ind */ 4841 int opt_len; /* Length of IP options */ 4842 sin_t *sin; 4843 struct T_error_ack *tea; 4844 mblk_t *options_mp = NULL; 4845 in_pktinfo_t *pinfo; 4846 boolean_t recv_on = B_FALSE; 4847 cred_t *cr = NULL; 4848 udp_t *udp = Q_TO_UDP(q); 4849 pid_t cpid; 4850 4851 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4852 "udp_rput_other: q %p mp %p", q, mp); 4853 4854 ASSERT(OK_32PTR(mp->b_rptr)); 4855 rptr = mp->b_rptr; 4856 4857 switch (mp->b_datap->db_type) { 4858 case M_CTL: 4859 /* 4860 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4861 */ 4862 recv_on = B_TRUE; 4863 options_mp = mp; 4864 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4865 4866 /* 4867 * The actual data is in mp->b_cont 4868 */ 4869 mp = mp->b_cont; 4870 ASSERT(OK_32PTR(mp->b_rptr)); 4871 rptr = mp->b_rptr; 4872 break; 4873 case M_DATA: 4874 /* 4875 * M_DATA messages contain IPv4 datagrams. They are handled 4876 * after this switch. 4877 */ 4878 break; 4879 case M_PROTO: 4880 case M_PCPROTO: 4881 /* M_PROTO messages contain some type of TPI message. */ 4882 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4883 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4884 freemsg(mp); 4885 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4886 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4887 return; 4888 } 4889 tea = (struct T_error_ack *)rptr; 4890 4891 switch (tea->PRIM_type) { 4892 case T_ERROR_ACK: 4893 switch (tea->ERROR_prim) { 4894 case O_T_BIND_REQ: 4895 case T_BIND_REQ: { 4896 /* 4897 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4898 * clear out the associated port and source 4899 * address before passing the message 4900 * upstream. If this was caused by a T_CONN_REQ 4901 * revert back to bound state. 4902 */ 4903 udp_fanout_t *udpf; 4904 4905 udpf = &udp_bind_fanout[ 4906 UDP_BIND_HASH(udp->udp_port)]; 4907 mutex_enter(&udpf->uf_lock); 4908 if (udp->udp_state == TS_DATA_XFER) { 4909 /* Connect failed */ 4910 tea->ERROR_prim = T_CONN_REQ; 4911 /* Revert back to the bound source */ 4912 udp->udp_v6src = udp->udp_bound_v6src; 4913 udp->udp_state = TS_IDLE; 4914 mutex_exit(&udpf->uf_lock); 4915 if (udp->udp_family == AF_INET6) 4916 (void) udp_build_hdrs(q, udp); 4917 break; 4918 } 4919 4920 if (udp->udp_discon_pending) { 4921 tea->ERROR_prim = T_DISCON_REQ; 4922 udp->udp_discon_pending = 0; 4923 } 4924 V6_SET_ZERO(udp->udp_v6src); 4925 V6_SET_ZERO(udp->udp_bound_v6src); 4926 udp->udp_state = TS_UNBND; 4927 udp_bind_hash_remove(udp, B_TRUE); 4928 udp->udp_port = 0; 4929 mutex_exit(&udpf->uf_lock); 4930 if (udp->udp_family == AF_INET6) 4931 (void) udp_build_hdrs(q, udp); 4932 break; 4933 } 4934 default: 4935 break; 4936 } 4937 break; 4938 case T_BIND_ACK: 4939 udp_rput_bind_ack(q, mp); 4940 return; 4941 4942 case T_OPTMGMT_ACK: 4943 case T_OK_ACK: 4944 break; 4945 default: 4946 freemsg(mp); 4947 return; 4948 } 4949 putnext(UDP_RD(q), mp); 4950 return; 4951 } 4952 4953 /* 4954 * This is the inbound data path. 4955 * First, we make sure the data contains both IP and UDP headers. 4956 * 4957 * This handle IPv4 packets for only AF_INET sockets. 4958 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4959 * is no need saving the options. 4960 */ 4961 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4962 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4963 if (mp->b_wptr - rptr < hdr_length) { 4964 if (!pullupmsg(mp, hdr_length)) { 4965 freemsg(mp); 4966 if (options_mp != NULL) 4967 freeb(options_mp); 4968 BUMP_MIB(&udp_mib, udpInErrors); 4969 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4970 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 4971 BUMP_MIB(&udp_mib, udpInErrors); 4972 return; 4973 } 4974 rptr = mp->b_rptr; 4975 } 4976 /* Walk past the headers. */ 4977 new_rptr = rptr + hdr_length; 4978 if (!udp->udp_rcvhdr) 4979 mp->b_rptr = new_rptr; 4980 4981 /* Save the options if any */ 4982 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4983 if (opt_len > 0) { 4984 if (opt_len > udp->udp_ip_rcv_options_len) { 4985 if (udp->udp_ip_rcv_options_len) 4986 mi_free((char *)udp->udp_ip_rcv_options); 4987 udp->udp_ip_rcv_options_len = 0; 4988 udp->udp_ip_rcv_options = 4989 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4990 if (udp->udp_ip_rcv_options) 4991 udp->udp_ip_rcv_options_len = opt_len; 4992 } 4993 if (udp->udp_ip_rcv_options_len) { 4994 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 4995 udp->udp_ip_rcv_options, opt_len); 4996 /* Adjust length if we are resusing the space */ 4997 udp->udp_ip_rcv_options_len = opt_len; 4998 } 4999 } else if (udp->udp_ip_rcv_options_len) { 5000 mi_free((char *)udp->udp_ip_rcv_options); 5001 udp->udp_ip_rcv_options = NULL; 5002 udp->udp_ip_rcv_options_len = 0; 5003 } 5004 5005 /* 5006 * Normally only send up the address. 5007 * If IP_RECVDSTADDR is set we include the destination IP 5008 * address as an option. With IP_RECVOPTS we include all 5009 * the IP options. 5010 */ 5011 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5012 if (udp->udp_recvdstaddr) { 5013 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5014 UDP_STAT(udp_in_recvdstaddr); 5015 } 5016 if (udp->udp_recvopts && opt_len > 0) { 5017 udi_size += sizeof (struct T_opthdr) + opt_len; 5018 UDP_STAT(udp_in_recvopts); 5019 } 5020 5021 /* 5022 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5023 * space accordingly 5024 */ 5025 if (udp->udp_recvif && recv_on && 5026 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5027 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5028 UDP_STAT(udp_in_recvif); 5029 } 5030 5031 if (udp->udp_recvslla && recv_on && 5032 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5033 udi_size += sizeof (struct T_opthdr) + 5034 sizeof (struct sockaddr_dl); 5035 UDP_STAT(udp_in_recvslla); 5036 } 5037 5038 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5039 udi_size += sizeof (struct T_opthdr) + ucredsize; 5040 cpid = DB_CPID(mp); 5041 UDP_STAT(udp_in_recvucred); 5042 } 5043 /* 5044 * If IP_RECVTTL is set allocate the appropriate sized buffer 5045 */ 5046 if (udp->udp_recvttl) { 5047 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5048 UDP_STAT(udp_in_recvttl); 5049 } 5050 5051 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5052 mp1 = allocb(udi_size, BPRI_MED); 5053 if (mp1 == NULL) { 5054 freemsg(mp); 5055 if (options_mp != NULL) 5056 freeb(options_mp); 5057 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5058 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5059 BUMP_MIB(&udp_mib, udpInErrors); 5060 return; 5061 } 5062 mp1->b_cont = mp; 5063 mp = mp1; 5064 mp->b_datap->db_type = M_PROTO; 5065 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5066 mp->b_wptr = (uchar_t *)tudi + udi_size; 5067 tudi->PRIM_type = T_UNITDATA_IND; 5068 tudi->SRC_length = sizeof (sin_t); 5069 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5070 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5071 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5072 tudi->OPT_length = udi_size; 5073 5074 sin = (sin_t *)&tudi[1]; 5075 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5076 sin->sin_port = ((in_port_t *) 5077 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5078 sin->sin_family = AF_INET; 5079 *(uint32_t *)&sin->sin_zero[0] = 0; 5080 *(uint32_t *)&sin->sin_zero[4] = 0; 5081 5082 /* 5083 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5084 * IP_RECVTTL has been set. 5085 */ 5086 if (udi_size != 0) { 5087 /* 5088 * Copy in destination address before options to avoid any 5089 * padding issues. 5090 */ 5091 char *dstopt; 5092 5093 dstopt = (char *)&sin[1]; 5094 if (udp->udp_recvdstaddr) { 5095 struct T_opthdr *toh; 5096 ipaddr_t *dstptr; 5097 5098 toh = (struct T_opthdr *)dstopt; 5099 toh->level = IPPROTO_IP; 5100 toh->name = IP_RECVDSTADDR; 5101 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5102 toh->status = 0; 5103 dstopt += sizeof (struct T_opthdr); 5104 dstptr = (ipaddr_t *)dstopt; 5105 *dstptr = (((ipaddr_t *)rptr)[4]); 5106 dstopt += sizeof (ipaddr_t); 5107 udi_size -= toh->len; 5108 } 5109 if (udp->udp_recvopts && udi_size != 0) { 5110 struct T_opthdr *toh; 5111 5112 toh = (struct T_opthdr *)dstopt; 5113 toh->level = IPPROTO_IP; 5114 toh->name = IP_RECVOPTS; 5115 toh->len = sizeof (struct T_opthdr) + opt_len; 5116 toh->status = 0; 5117 dstopt += sizeof (struct T_opthdr); 5118 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5119 dstopt += opt_len; 5120 udi_size -= toh->len; 5121 } 5122 5123 if (udp->udp_recvslla && recv_on && 5124 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5125 5126 struct T_opthdr *toh; 5127 struct sockaddr_dl *dstptr; 5128 5129 toh = (struct T_opthdr *)dstopt; 5130 toh->level = IPPROTO_IP; 5131 toh->name = IP_RECVSLLA; 5132 toh->len = sizeof (struct T_opthdr) + 5133 sizeof (struct sockaddr_dl); 5134 toh->status = 0; 5135 dstopt += sizeof (struct T_opthdr); 5136 dstptr = (struct sockaddr_dl *)dstopt; 5137 bcopy(&pinfo->in_pkt_slla, dstptr, 5138 sizeof (struct sockaddr_dl)); 5139 dstopt += sizeof (struct sockaddr_dl); 5140 udi_size -= toh->len; 5141 } 5142 5143 if (udp->udp_recvif && recv_on && 5144 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5145 5146 struct T_opthdr *toh; 5147 uint_t *dstptr; 5148 5149 toh = (struct T_opthdr *)dstopt; 5150 toh->level = IPPROTO_IP; 5151 toh->name = IP_RECVIF; 5152 toh->len = sizeof (struct T_opthdr) + 5153 sizeof (uint_t); 5154 toh->status = 0; 5155 dstopt += sizeof (struct T_opthdr); 5156 dstptr = (uint_t *)dstopt; 5157 *dstptr = pinfo->in_pkt_ifindex; 5158 dstopt += sizeof (uint_t); 5159 udi_size -= toh->len; 5160 } 5161 5162 if (cr != NULL) { 5163 struct T_opthdr *toh; 5164 5165 toh = (struct T_opthdr *)dstopt; 5166 toh->level = SOL_SOCKET; 5167 toh->name = SCM_UCRED; 5168 toh->len = sizeof (struct T_opthdr) + ucredsize; 5169 toh->status = 0; 5170 (void) cred2ucred(cr, cpid, &toh[1]); 5171 dstopt += toh->len; 5172 udi_size -= toh->len; 5173 } 5174 5175 if (udp->udp_recvttl) { 5176 struct T_opthdr *toh; 5177 uint8_t *dstptr; 5178 5179 toh = (struct T_opthdr *)dstopt; 5180 toh->level = IPPROTO_IP; 5181 toh->name = IP_RECVTTL; 5182 toh->len = sizeof (struct T_opthdr) + 5183 sizeof (uint8_t); 5184 toh->status = 0; 5185 dstopt += sizeof (struct T_opthdr); 5186 dstptr = (uint8_t *)dstopt; 5187 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5188 dstopt += sizeof (uint8_t); 5189 udi_size -= toh->len; 5190 } 5191 5192 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5193 } 5194 BUMP_MIB(&udp_mib, udpInDatagrams); 5195 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5196 "udp_rput_other_end: q %p (%S)", q, "end"); 5197 if (options_mp != NULL) 5198 freeb(options_mp); 5199 5200 if (udp->udp_direct_sockfs) { 5201 /* 5202 * There is nothing above us except for the stream head; 5203 * use the read-side synchronous stream interface in 5204 * order to reduce the time spent in interrupt thread. 5205 */ 5206 ASSERT(udp->udp_issocket); 5207 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5208 } else { 5209 /* 5210 * Use regular STREAMS interface to pass data upstream 5211 * if this is not a socket endpoint, or if we have 5212 * switched over to the slow mode due to sockmod being 5213 * popped or a module being pushed on top of us. 5214 */ 5215 putnext(UDP_RD(q), mp); 5216 } 5217 } 5218 5219 /* ARGSUSED */ 5220 static void 5221 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5222 { 5223 conn_t *connp = arg; 5224 5225 udp_rput_other(connp->conn_rq, mp); 5226 udp_exit(connp); 5227 } 5228 5229 /* 5230 * Process a T_BIND_ACK 5231 */ 5232 static void 5233 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5234 { 5235 udp_t *udp = Q_TO_UDP(q); 5236 mblk_t *mp1; 5237 ire_t *ire; 5238 struct T_bind_ack *tba; 5239 uchar_t *addrp; 5240 ipa_conn_t *ac; 5241 ipa6_conn_t *ac6; 5242 5243 if (udp->udp_discon_pending) 5244 udp->udp_discon_pending = 0; 5245 5246 /* 5247 * If a broadcast/multicast address was bound set 5248 * the source address to 0. 5249 * This ensures no datagrams with broadcast address 5250 * as source address are emitted (which would violate 5251 * RFC1122 - Hosts requirements) 5252 * 5253 * Note that when connecting the returned IRE is 5254 * for the destination address and we only perform 5255 * the broadcast check for the source address (it 5256 * is OK to connect to a broadcast/multicast address.) 5257 */ 5258 mp1 = mp->b_cont; 5259 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5260 ire = (ire_t *)mp1->b_rptr; 5261 5262 /* 5263 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5264 * local address. 5265 */ 5266 if (ire->ire_type == IRE_BROADCAST && 5267 udp->udp_state != TS_DATA_XFER) { 5268 /* This was just a local bind to a broadcast addr */ 5269 V6_SET_ZERO(udp->udp_v6src); 5270 if (udp->udp_family == AF_INET6) 5271 (void) udp_build_hdrs(q, udp); 5272 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5273 /* 5274 * Local address not yet set - pick it from the 5275 * T_bind_ack 5276 */ 5277 tba = (struct T_bind_ack *)mp->b_rptr; 5278 addrp = &mp->b_rptr[tba->ADDR_offset]; 5279 switch (udp->udp_family) { 5280 case AF_INET: 5281 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5282 ac = (ipa_conn_t *)addrp; 5283 } else { 5284 ASSERT(tba->ADDR_length == 5285 sizeof (ipa_conn_x_t)); 5286 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5287 } 5288 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5289 &udp->udp_v6src); 5290 break; 5291 case AF_INET6: 5292 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5293 ac6 = (ipa6_conn_t *)addrp; 5294 } else { 5295 ASSERT(tba->ADDR_length == 5296 sizeof (ipa6_conn_x_t)); 5297 ac6 = &((ipa6_conn_x_t *) 5298 addrp)->ac6x_conn; 5299 } 5300 udp->udp_v6src = ac6->ac6_laddr; 5301 (void) udp_build_hdrs(q, udp); 5302 break; 5303 } 5304 } 5305 mp1 = mp1->b_cont; 5306 } 5307 /* 5308 * Look for one or more appended ACK message added by 5309 * udp_connect or udp_disconnect. 5310 * If none found just send up the T_BIND_ACK. 5311 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5312 * udp_disconnect has appended a T_OK_ACK. 5313 */ 5314 if (mp1 != NULL) { 5315 if (mp->b_cont == mp1) 5316 mp->b_cont = NULL; 5317 else { 5318 ASSERT(mp->b_cont->b_cont == mp1); 5319 mp->b_cont->b_cont = NULL; 5320 } 5321 freemsg(mp); 5322 mp = mp1; 5323 while (mp != NULL) { 5324 mp1 = mp->b_cont; 5325 mp->b_cont = NULL; 5326 putnext(UDP_RD(q), mp); 5327 mp = mp1; 5328 } 5329 return; 5330 } 5331 freemsg(mp->b_cont); 5332 mp->b_cont = NULL; 5333 putnext(UDP_RD(q), mp); 5334 } 5335 5336 /* 5337 * return SNMP stuff in buffer in mpdata 5338 */ 5339 int 5340 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5341 { 5342 mblk_t *mpdata; 5343 mblk_t *mp_conn_ctl; 5344 mblk_t *mp6_conn_ctl; 5345 mblk_t *mp_conn_data; 5346 mblk_t *mp6_conn_data; 5347 mblk_t *mp_conn_tail = NULL; 5348 mblk_t *mp6_conn_tail = NULL; 5349 struct opthdr *optp; 5350 mib2_udpEntry_t ude; 5351 mib2_udp6Entry_t ude6; 5352 int state; 5353 zoneid_t zoneid; 5354 int i; 5355 connf_t *connfp; 5356 conn_t *connp = Q_TO_CONN(q); 5357 udp_t *udp = connp->conn_udp; 5358 5359 if (mpctl == NULL || 5360 (mpdata = mpctl->b_cont) == NULL || 5361 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5362 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5363 freemsg(mp_conn_ctl); 5364 return (0); 5365 } 5366 5367 mp_conn_data = mp_conn_ctl->b_cont; 5368 mp6_conn_data = mp6_conn_ctl->b_cont; 5369 5370 zoneid = connp->conn_zoneid; 5371 5372 /* fixed length structure for IPv4 and IPv6 counters */ 5373 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5374 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5375 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5376 optp->level = MIB2_UDP; 5377 optp->name = 0; 5378 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5379 optp->len = msgdsize(mpdata); 5380 qreply(q, mpctl); 5381 5382 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5383 connfp = &ipcl_globalhash_fanout[i]; 5384 connp = NULL; 5385 5386 while ((connp = ipcl_get_next_conn(connfp, connp, 5387 IPCL_UDP))) { 5388 udp = connp->conn_udp; 5389 if (zoneid != connp->conn_zoneid) 5390 continue; 5391 5392 /* 5393 * Note that the port numbers are sent in 5394 * host byte order 5395 */ 5396 5397 if (udp->udp_state == TS_UNBND) 5398 state = MIB2_UDP_unbound; 5399 else if (udp->udp_state == TS_IDLE) 5400 state = MIB2_UDP_idle; 5401 else if (udp->udp_state == TS_DATA_XFER) 5402 state = MIB2_UDP_connected; 5403 else 5404 state = MIB2_UDP_unknown; 5405 5406 /* 5407 * Create an IPv4 table entry for IPv4 entries and also 5408 * any IPv6 entries which are bound to in6addr_any 5409 * (i.e. anything a IPv4 peer could connect/send to). 5410 */ 5411 if (udp->udp_ipversion == IPV4_VERSION || 5412 (udp->udp_state <= TS_IDLE && 5413 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5414 ude.udpEntryInfo.ue_state = state; 5415 /* 5416 * If in6addr_any this will set it to 5417 * INADDR_ANY 5418 */ 5419 ude.udpLocalAddress = 5420 V4_PART_OF_V6(udp->udp_v6src); 5421 ude.udpLocalPort = ntohs(udp->udp_port); 5422 if (udp->udp_state == TS_DATA_XFER) { 5423 /* 5424 * Can potentially get here for 5425 * v6 socket if another process 5426 * (say, ping) has just done a 5427 * sendto(), changing the state 5428 * from the TS_IDLE above to 5429 * TS_DATA_XFER by the time we hit 5430 * this part of the code. 5431 */ 5432 ude.udpEntryInfo.ue_RemoteAddress = 5433 V4_PART_OF_V6(udp->udp_v6dst); 5434 ude.udpEntryInfo.ue_RemotePort = 5435 ntohs(udp->udp_dstport); 5436 } else { 5437 ude.udpEntryInfo.ue_RemoteAddress = 0; 5438 ude.udpEntryInfo.ue_RemotePort = 0; 5439 } 5440 (void) snmp_append_data2(mp_conn_data, 5441 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5442 } 5443 if (udp->udp_ipversion == IPV6_VERSION) { 5444 ude6.udp6EntryInfo.ue_state = state; 5445 ude6.udp6LocalAddress = udp->udp_v6src; 5446 ude6.udp6LocalPort = ntohs(udp->udp_port); 5447 ude6.udp6IfIndex = udp->udp_bound_if; 5448 if (udp->udp_state == TS_DATA_XFER) { 5449 ude6.udp6EntryInfo.ue_RemoteAddress = 5450 udp->udp_v6dst; 5451 ude6.udp6EntryInfo.ue_RemotePort = 5452 ntohs(udp->udp_dstport); 5453 } else { 5454 ude6.udp6EntryInfo.ue_RemoteAddress = 5455 sin6_null.sin6_addr; 5456 ude6.udp6EntryInfo.ue_RemotePort = 0; 5457 } 5458 (void) snmp_append_data2(mp6_conn_data, 5459 &mp6_conn_tail, (char *)&ude6, 5460 sizeof (ude6)); 5461 } 5462 } 5463 } 5464 5465 /* IPv4 UDP endpoints */ 5466 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5467 sizeof (struct T_optmgmt_ack)]; 5468 optp->level = MIB2_UDP; 5469 optp->name = MIB2_UDP_ENTRY; 5470 optp->len = msgdsize(mp_conn_data); 5471 qreply(q, mp_conn_ctl); 5472 5473 /* IPv6 UDP endpoints */ 5474 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5475 sizeof (struct T_optmgmt_ack)]; 5476 optp->level = MIB2_UDP6; 5477 optp->name = MIB2_UDP6_ENTRY; 5478 optp->len = msgdsize(mp6_conn_data); 5479 qreply(q, mp6_conn_ctl); 5480 5481 return (1); 5482 } 5483 5484 /* 5485 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5486 * NOTE: Per MIB-II, UDP has no writable data. 5487 * TODO: If this ever actually tries to set anything, it needs to be 5488 * to do the appropriate locking. 5489 */ 5490 /* ARGSUSED */ 5491 int 5492 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5493 uchar_t *ptr, int len) 5494 { 5495 switch (level) { 5496 case MIB2_UDP: 5497 return (0); 5498 default: 5499 return (1); 5500 } 5501 } 5502 5503 static void 5504 udp_report_item(mblk_t *mp, udp_t *udp) 5505 { 5506 char *state; 5507 char addrbuf1[INET6_ADDRSTRLEN]; 5508 char addrbuf2[INET6_ADDRSTRLEN]; 5509 uint_t print_len, buf_len; 5510 5511 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5512 ASSERT(buf_len >= 0); 5513 if (buf_len == 0) 5514 return; 5515 5516 if (udp->udp_state == TS_UNBND) 5517 state = "UNBOUND"; 5518 else if (udp->udp_state == TS_IDLE) 5519 state = "IDLE"; 5520 else if (udp->udp_state == TS_DATA_XFER) 5521 state = "CONNECTED"; 5522 else 5523 state = "UnkState"; 5524 print_len = snprintf((char *)mp->b_wptr, buf_len, 5525 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5526 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5527 inet_ntop(AF_INET6, &udp->udp_v6src, 5528 addrbuf1, sizeof (addrbuf1)), 5529 inet_ntop(AF_INET6, &udp->udp_v6dst, 5530 addrbuf2, sizeof (addrbuf2)), 5531 ntohs(udp->udp_dstport), state); 5532 if (print_len < buf_len) { 5533 mp->b_wptr += print_len; 5534 } else { 5535 mp->b_wptr += buf_len; 5536 } 5537 } 5538 5539 /* Report for ndd "udp_status" */ 5540 /* ARGSUSED */ 5541 static int 5542 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5543 { 5544 zoneid_t zoneid; 5545 connf_t *connfp; 5546 conn_t *connp = Q_TO_CONN(q); 5547 udp_t *udp = connp->conn_udp; 5548 int i; 5549 5550 /* 5551 * Because of the ndd constraint, at most we can have 64K buffer 5552 * to put in all UDP info. So to be more efficient, just 5553 * allocate a 64K buffer here, assuming we need that large buffer. 5554 * This may be a problem as any user can read udp_status. Therefore 5555 * we limit the rate of doing this using udp_ndd_get_info_interval. 5556 * This should be OK as normal users should not do this too often. 5557 */ 5558 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5559 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5560 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5561 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5562 return (0); 5563 } 5564 } 5565 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5566 /* The following may work even if we cannot get a large buf. */ 5567 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5568 return (0); 5569 } 5570 (void) mi_mpprintf(mp, 5571 "UDP " MI_COL_HDRPAD_STR 5572 /* 12345678[89ABCDEF] */ 5573 " zone lport src addr dest addr port state"); 5574 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5575 5576 zoneid = connp->conn_zoneid; 5577 5578 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5579 connfp = &ipcl_globalhash_fanout[i]; 5580 connp = NULL; 5581 5582 while ((connp = ipcl_get_next_conn(connfp, connp, 5583 IPCL_UDP))) { 5584 udp = connp->conn_udp; 5585 if (zoneid != GLOBAL_ZONEID && 5586 zoneid != connp->conn_zoneid) 5587 continue; 5588 5589 udp_report_item(mp->b_cont, udp); 5590 } 5591 } 5592 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5593 return (0); 5594 } 5595 5596 /* 5597 * This routine creates a T_UDERROR_IND message and passes it upstream. 5598 * The address and options are copied from the T_UNITDATA_REQ message 5599 * passed in mp. This message is freed. 5600 */ 5601 static void 5602 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5603 t_scalar_t err) 5604 { 5605 struct T_unitdata_req *tudr; 5606 mblk_t *mp1; 5607 uchar_t *optaddr; 5608 t_scalar_t optlen; 5609 5610 if (DB_TYPE(mp) == M_DATA) { 5611 ASSERT(destaddr != NULL && destlen != 0); 5612 optaddr = NULL; 5613 optlen = 0; 5614 } else { 5615 if ((mp->b_wptr < mp->b_rptr) || 5616 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5617 goto done; 5618 } 5619 tudr = (struct T_unitdata_req *)mp->b_rptr; 5620 destaddr = mp->b_rptr + tudr->DEST_offset; 5621 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5622 destaddr + tudr->DEST_length < mp->b_rptr || 5623 destaddr + tudr->DEST_length > mp->b_wptr) { 5624 goto done; 5625 } 5626 optaddr = mp->b_rptr + tudr->OPT_offset; 5627 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5628 optaddr + tudr->OPT_length < mp->b_rptr || 5629 optaddr + tudr->OPT_length > mp->b_wptr) { 5630 goto done; 5631 } 5632 destlen = tudr->DEST_length; 5633 optlen = tudr->OPT_length; 5634 } 5635 5636 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5637 (char *)optaddr, optlen, err); 5638 if (mp1 != NULL) 5639 putnext(UDP_RD(q), mp1); 5640 5641 done: 5642 freemsg(mp); 5643 } 5644 5645 /* 5646 * This routine removes a port number association from a stream. It 5647 * is called by udp_wput to handle T_UNBIND_REQ messages. 5648 */ 5649 static void 5650 udp_unbind(queue_t *q, mblk_t *mp) 5651 { 5652 udp_t *udp = Q_TO_UDP(q); 5653 5654 /* If a bind has not been done, we can't unbind. */ 5655 if (udp->udp_state == TS_UNBND) { 5656 udp_err_ack(q, mp, TOUTSTATE, 0); 5657 return; 5658 } 5659 if (cl_inet_unbind != NULL) { 5660 /* 5661 * Running in cluster mode - register unbind information 5662 */ 5663 if (udp->udp_ipversion == IPV4_VERSION) { 5664 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5665 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5666 (in_port_t)udp->udp_port); 5667 } else { 5668 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5669 (uint8_t *)&(udp->udp_v6src), 5670 (in_port_t)udp->udp_port); 5671 } 5672 } 5673 5674 udp_bind_hash_remove(udp, B_FALSE); 5675 V6_SET_ZERO(udp->udp_v6src); 5676 V6_SET_ZERO(udp->udp_bound_v6src); 5677 udp->udp_port = 0; 5678 udp->udp_state = TS_UNBND; 5679 5680 if (udp->udp_family == AF_INET6) { 5681 int error; 5682 5683 /* Rebuild the header template */ 5684 error = udp_build_hdrs(q, udp); 5685 if (error != 0) { 5686 udp_err_ack(q, mp, TSYSERR, error); 5687 return; 5688 } 5689 } 5690 /* 5691 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5692 * and therefore ip_unbind must never return NULL. 5693 */ 5694 mp = ip_unbind(q, mp); 5695 ASSERT(mp != NULL); 5696 putnext(UDP_RD(q), mp); 5697 } 5698 5699 /* 5700 * Don't let port fall into the privileged range. 5701 * Since the extra priviledged ports can be arbitrary we also 5702 * ensure that we exclude those from consideration. 5703 * udp_g_epriv_ports is not sorted thus we loop over it until 5704 * there are no changes. 5705 */ 5706 static in_port_t 5707 udp_update_next_port(in_port_t port, boolean_t random) 5708 { 5709 int i; 5710 5711 if (random && udp_random_anon_port != 0) { 5712 (void) random_get_pseudo_bytes((uint8_t *)&port, 5713 sizeof (in_port_t)); 5714 /* 5715 * Unless changed by a sys admin, the smallest anon port 5716 * is 32768 and the largest anon port is 65535. It is 5717 * very likely (50%) for the random port to be smaller 5718 * than the smallest anon port. When that happens, 5719 * add port % (anon port range) to the smallest anon 5720 * port to get the random port. It should fall into the 5721 * valid anon port range. 5722 */ 5723 if (port < udp_smallest_anon_port) { 5724 port = udp_smallest_anon_port + 5725 port % (udp_largest_anon_port - 5726 udp_smallest_anon_port); 5727 } 5728 } 5729 5730 retry: 5731 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5732 port = udp_smallest_anon_port; 5733 5734 if (port < udp_smallest_nonpriv_port) 5735 port = udp_smallest_nonpriv_port; 5736 5737 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5738 if (port == udp_g_epriv_ports[i]) { 5739 port++; 5740 /* 5741 * Make sure that the port is in the 5742 * valid range. 5743 */ 5744 goto retry; 5745 } 5746 } 5747 return (port); 5748 } 5749 5750 static mblk_t * 5751 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5752 uint_t srcid, int *error) 5753 { 5754 udp_t *udp = connp->conn_udp; 5755 queue_t *q = connp->conn_wq; 5756 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5757 mblk_t *mp2; 5758 ipha_t *ipha; 5759 int ip_hdr_length; 5760 uint32_t ip_len; 5761 udpha_t *udpha; 5762 5763 *error = 0; 5764 5765 /* mp1 points to the M_DATA mblk carrying the packet */ 5766 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5767 5768 /* Add an IP header */ 5769 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5770 udp->udp_ip_snd_options_len; 5771 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5772 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5773 !OK_32PTR(ipha)) { 5774 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5775 if (mp2 == NULL) { 5776 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5777 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5778 *error = ENOMEM; 5779 goto done; 5780 } 5781 mp2->b_wptr = DB_LIM(mp2); 5782 mp2->b_cont = mp1; 5783 mp1 = mp2; 5784 if (DB_TYPE(mp) != M_DATA) 5785 mp->b_cont = mp1; 5786 else 5787 mp = mp1; 5788 5789 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5790 } 5791 ip_hdr_length -= UDPH_SIZE; 5792 #ifdef _BIG_ENDIAN 5793 /* Set version, header length, and tos */ 5794 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5795 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5796 udp->udp_type_of_service); 5797 /* Set ttl and protocol */ 5798 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5799 #else 5800 /* Set version, header length, and tos */ 5801 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5802 ((udp->udp_type_of_service << 8) | 5803 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5804 /* Set ttl and protocol */ 5805 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5806 #endif 5807 /* 5808 * Copy our address into the packet. If this is zero, 5809 * first look at __sin6_src_id for a hint. If we leave the source 5810 * as INADDR_ANY then ip will fill in the real source address. 5811 */ 5812 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5813 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5814 in6_addr_t v6src; 5815 5816 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5817 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5818 } 5819 5820 ipha->ipha_fragment_offset_and_flags = 0; 5821 ipha->ipha_ident = 0; 5822 5823 mp1->b_rptr = (uchar_t *)ipha; 5824 5825 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5826 (uintptr_t)UINT_MAX); 5827 5828 /* Determine length of packet */ 5829 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5830 if ((mp2 = mp1->b_cont) != NULL) { 5831 do { 5832 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5833 ip_len += (uint32_t)MBLKL(mp2); 5834 } while ((mp2 = mp2->b_cont) != NULL); 5835 } 5836 /* 5837 * If the size of the packet is greater than the maximum allowed by 5838 * ip, return an error. Passing this down could cause panics because 5839 * the size will have wrapped and be inconsistent with the msg size. 5840 */ 5841 if (ip_len > IP_MAXPACKET) { 5842 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5843 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5844 *error = EMSGSIZE; 5845 goto done; 5846 } 5847 ipha->ipha_length = htons((uint16_t)ip_len); 5848 ip_len -= ip_hdr_length; 5849 ip_len = htons((uint16_t)ip_len); 5850 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5851 5852 /* 5853 * Copy in the destination address 5854 */ 5855 if (v4dst == INADDR_ANY) 5856 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5857 else 5858 ipha->ipha_dst = v4dst; 5859 5860 /* 5861 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5862 */ 5863 if (CLASSD(v4dst)) 5864 ipha->ipha_ttl = udp->udp_multicast_ttl; 5865 5866 udpha->uha_dst_port = port; 5867 udpha->uha_src_port = udp->udp_port; 5868 5869 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5870 uint32_t cksum; 5871 5872 bcopy(udp->udp_ip_snd_options, &ipha[1], 5873 udp->udp_ip_snd_options_len); 5874 /* 5875 * Massage source route putting first source route in ipha_dst. 5876 * Ignore the destination in T_unitdata_req. 5877 * Create a checksum adjustment for a source route, if any. 5878 */ 5879 cksum = ip_massage_options(ipha); 5880 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5881 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5882 (ipha->ipha_dst & 0xFFFF); 5883 if ((int)cksum < 0) 5884 cksum--; 5885 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5886 /* 5887 * IP does the checksum if uha_checksum is non-zero, 5888 * We make it easy for IP to include our pseudo header 5889 * by putting our length in uha_checksum. 5890 */ 5891 cksum += ip_len; 5892 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5893 /* There might be a carry. */ 5894 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5895 #ifdef _LITTLE_ENDIAN 5896 if (udp_do_checksum) 5897 ip_len = (cksum << 16) | ip_len; 5898 #else 5899 if (udp_do_checksum) 5900 ip_len = (ip_len << 16) | cksum; 5901 else 5902 ip_len <<= 16; 5903 #endif 5904 } else { 5905 /* 5906 * IP does the checksum if uha_checksum is non-zero, 5907 * We make it easy for IP to include our pseudo header 5908 * by putting our length in uha_checksum. 5909 */ 5910 if (udp_do_checksum) 5911 ip_len |= (ip_len << 16); 5912 #ifndef _LITTLE_ENDIAN 5913 else 5914 ip_len <<= 16; 5915 #endif 5916 } 5917 /* Set UDP length and checksum */ 5918 *((uint32_t *)&udpha->uha_length) = ip_len; 5919 5920 if (DB_TYPE(mp) != M_DATA) { 5921 ASSERT(mp != mp1); 5922 freeb(mp); 5923 } 5924 5925 /* mp has been consumed and we'll return success */ 5926 ASSERT(*error == 0); 5927 mp = NULL; 5928 5929 /* We're done. Pass the packet to ip. */ 5930 BUMP_MIB(&udp_mib, udpOutDatagrams); 5931 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5932 "udp_wput_end: q %p (%S)", q, "end"); 5933 5934 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5935 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5936 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5937 connp->conn_nofailover_ill != NULL || 5938 connp->conn_outgoing_ill != NULL || 5939 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5940 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5941 UDP_STAT(udp_ip_send); 5942 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5943 } else { 5944 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5945 } 5946 5947 done: 5948 if (*error != 0) { 5949 ASSERT(mp != NULL); 5950 BUMP_MIB(&udp_mib, udpOutErrors); 5951 } 5952 return (mp); 5953 } 5954 5955 static void 5956 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5957 { 5958 conn_t *connp = udp->udp_connp; 5959 ipaddr_t src, dst; 5960 ill_t *ill; 5961 ire_t *ire; 5962 ipif_t *ipif = NULL; 5963 mblk_t *ire_fp_mp; 5964 uint_t ire_fp_mp_len; 5965 uint16_t *up; 5966 uint32_t cksum, hcksum_txflags; 5967 queue_t *dev_q; 5968 boolean_t retry_caching; 5969 5970 dst = ipha->ipha_dst; 5971 src = ipha->ipha_src; 5972 ASSERT(ipha->ipha_ident == 0); 5973 5974 if (CLASSD(dst)) { 5975 int err; 5976 5977 ipif = conn_get_held_ipif(connp, 5978 &connp->conn_multicast_ipif, &err); 5979 5980 if (ipif == NULL || ipif->ipif_isv6 || 5981 (ipif->ipif_ill->ill_phyint->phyint_flags & 5982 PHYI_LOOPBACK)) { 5983 if (ipif != NULL) 5984 ipif_refrele(ipif); 5985 UDP_STAT(udp_ip_send); 5986 ip_output(connp, mp, q, IP_WPUT); 5987 return; 5988 } 5989 } 5990 5991 retry_caching = B_FALSE; 5992 mutex_enter(&connp->conn_lock); 5993 ire = connp->conn_ire_cache; 5994 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5995 5996 if (ire == NULL || ire->ire_addr != dst || 5997 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5998 retry_caching = B_TRUE; 5999 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6000 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6001 6002 ASSERT(ipif != NULL); 6003 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6004 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6005 retry_caching = B_TRUE; 6006 } 6007 6008 if (!retry_caching) { 6009 ASSERT(ire != NULL); 6010 IRE_REFHOLD(ire); 6011 mutex_exit(&connp->conn_lock); 6012 } else { 6013 boolean_t cached = B_FALSE; 6014 6015 connp->conn_ire_cache = NULL; 6016 mutex_exit(&connp->conn_lock); 6017 6018 /* Release the old ire */ 6019 if (ire != NULL) { 6020 IRE_REFRELE_NOTR(ire); 6021 ire = NULL; 6022 } 6023 6024 if (CLASSD(dst)) { 6025 ASSERT(ipif != NULL); 6026 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6027 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6028 } else { 6029 ASSERT(ipif == NULL); 6030 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6031 } 6032 6033 if (ire == NULL) { 6034 if (ipif != NULL) 6035 ipif_refrele(ipif); 6036 UDP_STAT(udp_ire_null); 6037 ip_output(connp, mp, q, IP_WPUT); 6038 return; 6039 } 6040 IRE_REFHOLD_NOTR(ire); 6041 6042 mutex_enter(&connp->conn_lock); 6043 if (!(connp->conn_state_flags & CONN_CLOSING) && 6044 connp->conn_ire_cache == NULL) { 6045 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6046 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6047 connp->conn_ire_cache = ire; 6048 cached = B_TRUE; 6049 } 6050 rw_exit(&ire->ire_bucket->irb_lock); 6051 } 6052 mutex_exit(&connp->conn_lock); 6053 6054 /* 6055 * We can continue to use the ire but since it was not 6056 * cached, we should drop the extra reference. 6057 */ 6058 if (!cached) 6059 IRE_REFRELE_NOTR(ire); 6060 } 6061 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6062 ASSERT(!CLASSD(dst) || ipif != NULL); 6063 6064 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6065 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6066 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6067 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6068 (connp->conn_nexthop_set) || 6069 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6070 if (ipif != NULL) 6071 ipif_refrele(ipif); 6072 UDP_STAT(udp_ip_ire_send); 6073 IRE_REFRELE(ire); 6074 ip_output(connp, mp, q, IP_WPUT); 6075 return; 6076 } 6077 6078 BUMP_MIB(&ip_mib, ipOutRequests); 6079 6080 ill = ire_to_ill(ire); 6081 ASSERT(ill != NULL); 6082 6083 dev_q = ire->ire_stq->q_next; 6084 ASSERT(dev_q != NULL); 6085 /* 6086 * If the service thread is already running, or if the driver 6087 * queue is currently flow-controlled, queue this packet. 6088 */ 6089 if ((q->q_first != NULL || connp->conn_draining) || 6090 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6091 if (ip_output_queue) { 6092 (void) putq(q, mp); 6093 } else { 6094 BUMP_MIB(&ip_mib, ipOutDiscards); 6095 freemsg(mp); 6096 } 6097 if (ipif != NULL) 6098 ipif_refrele(ipif); 6099 IRE_REFRELE(ire); 6100 return; 6101 } 6102 6103 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6104 #ifndef _BIG_ENDIAN 6105 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6106 #endif 6107 6108 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6109 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6110 src = ipha->ipha_src = ipif->ipif_src_addr; 6111 else 6112 src = ipha->ipha_src = ire->ire_src_addr; 6113 } 6114 6115 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6116 ASSERT(ill->ill_hcksum_capab != NULL); 6117 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6118 } else { 6119 hcksum_txflags = 0; 6120 } 6121 6122 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6123 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6124 6125 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6126 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6127 if (*up != 0) { 6128 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6129 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6130 ntohs(ipha->ipha_length), cksum); 6131 6132 /* Software checksum? */ 6133 if (DB_CKSUMFLAGS(mp) == 0) { 6134 UDP_STAT(udp_out_sw_cksum); 6135 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6136 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6137 } 6138 } 6139 6140 ipha->ipha_fragment_offset_and_flags |= 6141 (uint32_t)htons(ire->ire_frag_flag); 6142 6143 /* Calculate IP header checksum if hardware isn't capable */ 6144 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6145 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6146 ((uint16_t *)ipha)[4]); 6147 } 6148 6149 if (CLASSD(dst)) { 6150 ilm_t *ilm; 6151 6152 ILM_WALKER_HOLD(ill); 6153 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6154 ILM_WALKER_RELE(ill); 6155 if (ilm != NULL) { 6156 ip_multicast_loopback(q, ill, mp, 6157 connp->conn_multicast_loop ? 0 : 6158 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6159 } 6160 6161 /* If multicast TTL is 0 then we are done */ 6162 if (ipha->ipha_ttl == 0) { 6163 if (ipif != NULL) 6164 ipif_refrele(ipif); 6165 freemsg(mp); 6166 IRE_REFRELE(ire); 6167 return; 6168 } 6169 } 6170 6171 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6172 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6173 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6174 6175 UPDATE_OB_PKT_COUNT(ire); 6176 ire->ire_last_used_time = lbolt; 6177 6178 if (ILL_DLS_CAPABLE(ill)) { 6179 /* 6180 * Send the packet directly to DLD, where it may be queued 6181 * depending on the availability of transmit resources at 6182 * the media layer. 6183 */ 6184 IP_DLS_ILL_TX(ill, mp); 6185 } else { 6186 putnext(ire->ire_stq, mp); 6187 } 6188 6189 if (ipif != NULL) 6190 ipif_refrele(ipif); 6191 IRE_REFRELE(ire); 6192 } 6193 6194 /* 6195 * This routine handles all messages passed downstream. It either 6196 * consumes the message or passes it downstream; it never queues a 6197 * a message. 6198 */ 6199 static void 6200 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6201 { 6202 sin6_t *sin6; 6203 sin_t *sin; 6204 ipaddr_t v4dst; 6205 uint16_t port; 6206 uint_t srcid; 6207 queue_t *q = connp->conn_wq; 6208 udp_t *udp = connp->conn_udp; 6209 t_scalar_t optlen; 6210 int error = 0; 6211 struct sockaddr_storage ss; 6212 6213 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6214 "udp_wput_start: connp %p mp %p", connp, mp); 6215 6216 /* 6217 * We directly handle several cases here: T_UNITDATA_REQ message 6218 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6219 * connected and non-connected socket. The latter carries the 6220 * address structure along when this routine gets called. 6221 */ 6222 switch (DB_TYPE(mp)) { 6223 case M_DATA: 6224 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6225 if (!udp->udp_direct_sockfs || 6226 addr == NULL || addrlen == 0) { 6227 /* Not connected; address is required */ 6228 BUMP_MIB(&udp_mib, udpOutErrors); 6229 UDP_STAT(udp_out_err_notconn); 6230 freemsg(mp); 6231 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6232 "udp_wput_end: connp %p (%S)", connp, 6233 "not-connected; address required"); 6234 return; 6235 } 6236 ASSERT(udp->udp_issocket); 6237 UDP_DBGSTAT(udp_data_notconn); 6238 /* Not connected; do some more checks below */ 6239 optlen = 0; 6240 break; 6241 } 6242 /* M_DATA for connected socket */ 6243 UDP_DBGSTAT(udp_data_conn); 6244 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6245 6246 /* Initialize addr and addrlen as if they're passed in */ 6247 if (udp->udp_family == AF_INET) { 6248 sin = (sin_t *)&ss; 6249 sin->sin_family = AF_INET; 6250 sin->sin_port = udp->udp_dstport; 6251 sin->sin_addr.s_addr = v4dst; 6252 addr = (struct sockaddr *)sin; 6253 addrlen = sizeof (*sin); 6254 } else { 6255 sin6 = (sin6_t *)&ss; 6256 sin6->sin6_family = AF_INET6; 6257 sin6->sin6_port = udp->udp_dstport; 6258 sin6->sin6_flowinfo = udp->udp_flowinfo; 6259 sin6->sin6_addr = udp->udp_v6dst; 6260 sin6->sin6_scope_id = 0; 6261 sin6->__sin6_src_id = 0; 6262 addr = (struct sockaddr *)sin6; 6263 addrlen = sizeof (*sin6); 6264 } 6265 6266 if (udp->udp_family == AF_INET || 6267 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6268 /* 6269 * Handle both AF_INET and AF_INET6; the latter 6270 * for IPV4 mapped destination addresses. Note 6271 * here that both addr and addrlen point to the 6272 * corresponding struct depending on the address 6273 * family of the socket. 6274 */ 6275 mp = udp_output_v4(connp, mp, v4dst, 6276 udp->udp_dstport, 0, &error); 6277 } else { 6278 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6279 } 6280 if (error != 0) { 6281 ASSERT(addr != NULL && addrlen != 0); 6282 goto ud_error; 6283 } 6284 return; 6285 case M_PROTO: 6286 case M_PCPROTO: { 6287 struct T_unitdata_req *tudr; 6288 6289 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6290 tudr = (struct T_unitdata_req *)mp->b_rptr; 6291 6292 /* Handle valid T_UNITDATA_REQ here */ 6293 if (MBLKL(mp) >= sizeof (*tudr) && 6294 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6295 if (mp->b_cont == NULL) { 6296 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6297 "udp_wput_end: q %p (%S)", q, "badaddr"); 6298 error = EPROTO; 6299 goto ud_error; 6300 } 6301 6302 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6303 tudr->DEST_length)) { 6304 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6305 "udp_wput_end: q %p (%S)", q, "badaddr"); 6306 error = EADDRNOTAVAIL; 6307 goto ud_error; 6308 } 6309 /* 6310 * If a port has not been bound to the stream, fail. 6311 * This is not a problem when sockfs is directly 6312 * above us, because it will ensure that the socket 6313 * is first bound before allowing data to be sent. 6314 */ 6315 if (udp->udp_state == TS_UNBND) { 6316 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6317 "udp_wput_end: q %p (%S)", q, "outstate"); 6318 error = EPROTO; 6319 goto ud_error; 6320 } 6321 addr = (struct sockaddr *) 6322 &mp->b_rptr[tudr->DEST_offset]; 6323 addrlen = tudr->DEST_length; 6324 optlen = tudr->OPT_length; 6325 if (optlen != 0) 6326 UDP_STAT(udp_out_opt); 6327 break; 6328 } 6329 /* FALLTHRU */ 6330 } 6331 default: 6332 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6333 SQTAG_UDP_OUTPUT); 6334 return; 6335 } 6336 ASSERT(addr != NULL); 6337 6338 switch (udp->udp_family) { 6339 case AF_INET6: 6340 sin6 = (sin6_t *)addr; 6341 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6342 sin6->sin6_family != AF_INET6) { 6343 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6344 "udp_wput_end: q %p (%S)", q, "badaddr"); 6345 error = EADDRNOTAVAIL; 6346 goto ud_error; 6347 } 6348 6349 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6350 /* 6351 * Destination is a non-IPv4-compatible IPv6 address. 6352 * Send out an IPv6 format packet. 6353 */ 6354 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6355 if (error != 0) 6356 goto ud_error; 6357 6358 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6359 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6360 return; 6361 } 6362 /* 6363 * If the local address is not zero or a mapped address 6364 * return an error. It would be possible to send an IPv4 6365 * packet but the response would never make it back to the 6366 * application since it is bound to a non-mapped address. 6367 */ 6368 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6369 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6370 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6371 "udp_wput_end: q %p (%S)", q, "badaddr"); 6372 error = EADDRNOTAVAIL; 6373 goto ud_error; 6374 } 6375 /* Send IPv4 packet without modifying udp_ipversion */ 6376 /* Extract port and ipaddr */ 6377 port = sin6->sin6_port; 6378 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6379 srcid = sin6->__sin6_src_id; 6380 break; 6381 6382 case AF_INET: 6383 sin = (sin_t *)addr; 6384 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6385 sin->sin_family != AF_INET) { 6386 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6387 "udp_wput_end: q %p (%S)", q, "badaddr"); 6388 error = EADDRNOTAVAIL; 6389 goto ud_error; 6390 } 6391 /* Extract port and ipaddr */ 6392 port = sin->sin_port; 6393 v4dst = sin->sin_addr.s_addr; 6394 srcid = 0; 6395 break; 6396 } 6397 6398 /* 6399 * If options passed in, feed it for verification and handling 6400 */ 6401 if (optlen != 0) { 6402 ASSERT(DB_TYPE(mp) != M_DATA); 6403 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6404 /* failure */ 6405 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6406 "udp_wput_end: q %p (%S)", q, 6407 "udp_unitdata_opt_process"); 6408 goto ud_error; 6409 } 6410 /* 6411 * Note: success in processing options. 6412 * mp option buffer represented by 6413 * OPT_length/offset now potentially modified 6414 * and contain option setting results 6415 */ 6416 } 6417 ASSERT(error == 0); 6418 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6419 if (error != 0) { 6420 ud_error: 6421 UDP_STAT(udp_out_err_output); 6422 ASSERT(mp != NULL); 6423 /* mp is freed by the following routine */ 6424 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6425 (t_scalar_t)error); 6426 } 6427 } 6428 6429 /* ARGSUSED */ 6430 static void 6431 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6432 { 6433 udp_output((conn_t *)arg, mp, NULL, 0); 6434 _UDP_EXIT((conn_t *)arg); 6435 } 6436 6437 static void 6438 udp_wput(queue_t *q, mblk_t *mp) 6439 { 6440 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6441 SQTAG_UDP_WPUT); 6442 } 6443 6444 /* 6445 * Allocate and prepare a T_UNITDATA_REQ message. 6446 */ 6447 static mblk_t * 6448 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6449 { 6450 struct T_unitdata_req *tudr; 6451 mblk_t *mp; 6452 6453 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6454 if (mp != NULL) { 6455 mp->b_wptr += sizeof (*tudr) + addrlen; 6456 DB_TYPE(mp) = M_PROTO; 6457 6458 tudr = (struct T_unitdata_req *)mp->b_rptr; 6459 tudr->PRIM_type = T_UNITDATA_REQ; 6460 tudr->DEST_length = addrlen; 6461 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6462 tudr->OPT_length = 0; 6463 tudr->OPT_offset = 0; 6464 bcopy(addr, tudr+1, addrlen); 6465 } 6466 return (mp); 6467 } 6468 6469 /* 6470 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6471 * is valid when we are directly beneath the stream head, and thus sockfs 6472 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6473 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6474 * this is done for both connected and non-connected endpoint. 6475 */ 6476 void 6477 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6478 { 6479 conn_t *connp; 6480 udp_t *udp; 6481 6482 q = UDP_WR(q); 6483 connp = Q_TO_CONN(q); 6484 udp = connp->conn_udp; 6485 6486 /* udpsockfs should only send down M_DATA for this entry point */ 6487 ASSERT(DB_TYPE(mp) == M_DATA); 6488 6489 mutex_enter(&connp->conn_lock); 6490 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6491 6492 if (udp->udp_mode != UDP_MT_HOT) { 6493 /* 6494 * We can't enter this conn right away because another 6495 * thread is currently executing as writer; therefore we 6496 * need to deposit the message into the squeue to be 6497 * drained later. If a socket address is present, we 6498 * need to create a T_UNITDATA_REQ message as placeholder. 6499 */ 6500 if (addr != NULL && addrlen != 0) { 6501 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6502 6503 if (tudr_mp == NULL) { 6504 mutex_exit(&connp->conn_lock); 6505 BUMP_MIB(&udp_mib, udpOutErrors); 6506 UDP_STAT(udp_out_err_tudr); 6507 freemsg(mp); 6508 return; 6509 } 6510 /* Tag the packet with T_UNITDATA_REQ */ 6511 tudr_mp->b_cont = mp; 6512 mp = tudr_mp; 6513 } 6514 mutex_exit(&connp->conn_lock); 6515 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6516 return; 6517 } 6518 6519 /* We can execute as reader right away. */ 6520 UDP_READERS_INCREF(udp); 6521 mutex_exit(&connp->conn_lock); 6522 6523 udp_output(connp, mp, addr, addrlen); 6524 6525 mutex_enter(&connp->conn_lock); 6526 UDP_MODE_ASSERTIONS(udp, UDP_EXIT); 6527 UDP_READERS_DECREF(udp); 6528 mutex_exit(&connp->conn_lock); 6529 } 6530 6531 /* 6532 * udp_output_v6(): 6533 * Assumes that udp_wput did some sanity checking on the destination 6534 * address. 6535 */ 6536 static mblk_t * 6537 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6538 int *error) 6539 { 6540 ip6_t *ip6h; 6541 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6542 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6543 mblk_t *mp2; 6544 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6545 size_t ip_len; 6546 udpha_t *udph; 6547 udp_t *udp = connp->conn_udp; 6548 queue_t *q = connp->conn_wq; 6549 ip6_pkt_t ipp_s; /* For ancillary data options */ 6550 ip6_pkt_t *ipp = &ipp_s; 6551 ip6_pkt_t *tipp; /* temporary ipp */ 6552 uint32_t csum = 0; 6553 uint_t ignore = 0; 6554 uint_t option_exists = 0, is_sticky = 0; 6555 uint8_t *cp; 6556 uint8_t *nxthdr_ptr; 6557 6558 *error = 0; 6559 6560 /* mp1 points to the M_DATA mblk carrying the packet */ 6561 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6562 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6563 6564 /* 6565 * If the local address is a mapped address return 6566 * an error. 6567 * It would be possible to send an IPv6 packet but the 6568 * response would never make it back to the application 6569 * since it is bound to a mapped address. 6570 */ 6571 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6572 *error = EADDRNOTAVAIL; 6573 goto done; 6574 } 6575 6576 ipp->ipp_fields = 0; 6577 ipp->ipp_sticky_ignored = 0; 6578 6579 /* 6580 * If TPI options passed in, feed it for verification and handling 6581 */ 6582 if (tudr_optlen != 0) { 6583 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6584 /* failure */ 6585 goto done; 6586 } 6587 ignore = ipp->ipp_sticky_ignored; 6588 ASSERT(*error == 0); 6589 } 6590 6591 if (sin6->sin6_scope_id != 0 && 6592 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6593 /* 6594 * IPPF_SCOPE_ID is special. It's neither a sticky 6595 * option nor ancillary data. It needs to be 6596 * explicitly set in options_exists. 6597 */ 6598 option_exists |= IPPF_SCOPE_ID; 6599 } 6600 6601 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6602 /* No sticky options nor ancillary data. */ 6603 goto no_options; 6604 } 6605 6606 /* 6607 * Go through the options figuring out where each is going to 6608 * come from and build two masks. The first mask indicates if 6609 * the option exists at all. The second mask indicates if the 6610 * option is sticky or ancillary. 6611 */ 6612 if (!(ignore & IPPF_HOPOPTS)) { 6613 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6614 option_exists |= IPPF_HOPOPTS; 6615 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6616 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6617 option_exists |= IPPF_HOPOPTS; 6618 is_sticky |= IPPF_HOPOPTS; 6619 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6620 } 6621 } 6622 6623 if (!(ignore & IPPF_RTHDR)) { 6624 if (ipp->ipp_fields & IPPF_RTHDR) { 6625 option_exists |= IPPF_RTHDR; 6626 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6627 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6628 option_exists |= IPPF_RTHDR; 6629 is_sticky |= IPPF_RTHDR; 6630 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6631 } 6632 } 6633 6634 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6635 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6636 option_exists |= IPPF_RTDSTOPTS; 6637 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6638 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6639 option_exists |= IPPF_RTDSTOPTS; 6640 is_sticky |= IPPF_RTDSTOPTS; 6641 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6642 } 6643 } 6644 6645 if (!(ignore & IPPF_DSTOPTS)) { 6646 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6647 option_exists |= IPPF_DSTOPTS; 6648 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6649 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6650 option_exists |= IPPF_DSTOPTS; 6651 is_sticky |= IPPF_DSTOPTS; 6652 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6653 } 6654 } 6655 6656 if (!(ignore & IPPF_IFINDEX)) { 6657 if (ipp->ipp_fields & IPPF_IFINDEX) { 6658 option_exists |= IPPF_IFINDEX; 6659 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6660 option_exists |= IPPF_IFINDEX; 6661 is_sticky |= IPPF_IFINDEX; 6662 } 6663 } 6664 6665 if (!(ignore & IPPF_ADDR)) { 6666 if (ipp->ipp_fields & IPPF_ADDR) { 6667 option_exists |= IPPF_ADDR; 6668 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6669 option_exists |= IPPF_ADDR; 6670 is_sticky |= IPPF_ADDR; 6671 } 6672 } 6673 6674 if (!(ignore & IPPF_DONTFRAG)) { 6675 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6676 option_exists |= IPPF_DONTFRAG; 6677 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6678 option_exists |= IPPF_DONTFRAG; 6679 is_sticky |= IPPF_DONTFRAG; 6680 } 6681 } 6682 6683 if (!(ignore & IPPF_USE_MIN_MTU)) { 6684 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6685 option_exists |= IPPF_USE_MIN_MTU; 6686 } else if (udp->udp_sticky_ipp.ipp_fields & 6687 IPPF_USE_MIN_MTU) { 6688 option_exists |= IPPF_USE_MIN_MTU; 6689 is_sticky |= IPPF_USE_MIN_MTU; 6690 } 6691 } 6692 6693 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6694 option_exists |= IPPF_HOPLIMIT; 6695 /* IPV6_HOPLIMIT can never be sticky */ 6696 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6697 6698 if (!(ignore & IPPF_UNICAST_HOPS) && 6699 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6700 option_exists |= IPPF_UNICAST_HOPS; 6701 is_sticky |= IPPF_UNICAST_HOPS; 6702 } 6703 6704 if (!(ignore & IPPF_MULTICAST_HOPS) && 6705 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6706 option_exists |= IPPF_MULTICAST_HOPS; 6707 is_sticky |= IPPF_MULTICAST_HOPS; 6708 } 6709 6710 if (!(ignore & IPPF_TCLASS)) { 6711 if (ipp->ipp_fields & IPPF_TCLASS) { 6712 option_exists |= IPPF_TCLASS; 6713 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6714 option_exists |= IPPF_TCLASS; 6715 is_sticky |= IPPF_TCLASS; 6716 } 6717 } 6718 6719 no_options: 6720 6721 /* 6722 * If any options carried in the ip6i_t were specified, we 6723 * need to account for the ip6i_t in the data we'll be sending 6724 * down. 6725 */ 6726 if (option_exists & IPPF_HAS_IP6I) 6727 udp_ip_hdr_len += sizeof (ip6i_t); 6728 6729 /* check/fix buffer config, setup pointers into it */ 6730 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6731 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6732 !OK_32PTR(ip6h)) { 6733 /* Try to get everything in a single mblk next time */ 6734 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6735 udp->udp_max_hdr_len = udp_ip_hdr_len; 6736 (void) mi_set_sth_wroff(UDP_RD(q), 6737 udp->udp_max_hdr_len + udp_wroff_extra); 6738 } 6739 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6740 if (mp2 == NULL) { 6741 *error = ENOMEM; 6742 goto done; 6743 } 6744 mp2->b_wptr = DB_LIM(mp2); 6745 mp2->b_cont = mp1; 6746 mp1 = mp2; 6747 if (DB_TYPE(mp) != M_DATA) 6748 mp->b_cont = mp1; 6749 else 6750 mp = mp1; 6751 6752 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6753 } 6754 mp1->b_rptr = (unsigned char *)ip6h; 6755 ip6i = (ip6i_t *)ip6h; 6756 6757 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6758 if (option_exists & IPPF_HAS_IP6I) { 6759 ip6h = (ip6_t *)&ip6i[1]; 6760 ip6i->ip6i_flags = 0; 6761 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6762 6763 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6764 if (option_exists & IPPF_SCOPE_ID) { 6765 ip6i->ip6i_flags |= IP6I_IFINDEX; 6766 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6767 } else if (option_exists & IPPF_IFINDEX) { 6768 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6769 ASSERT(tipp->ipp_ifindex != 0); 6770 ip6i->ip6i_flags |= IP6I_IFINDEX; 6771 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6772 } 6773 6774 if (option_exists & IPPF_ADDR) { 6775 /* 6776 * Enable per-packet source address verification if 6777 * IPV6_PKTINFO specified the source address. 6778 * ip6_src is set in the transport's _wput function. 6779 */ 6780 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6781 } 6782 6783 if (option_exists & IPPF_DONTFRAG) { 6784 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6785 } 6786 6787 if (option_exists & IPPF_USE_MIN_MTU) { 6788 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6789 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6790 } 6791 6792 if (option_exists & IPPF_NEXTHOP) { 6793 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6794 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6795 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6796 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6797 } 6798 6799 /* 6800 * tell IP this is an ip6i_t private header 6801 */ 6802 ip6i->ip6i_nxt = IPPROTO_RAW; 6803 } 6804 6805 /* Initialize IPv6 header */ 6806 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6807 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6808 6809 /* Set the hoplimit of the outgoing packet. */ 6810 if (option_exists & IPPF_HOPLIMIT) { 6811 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6812 ip6h->ip6_hops = ipp->ipp_hoplimit; 6813 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6814 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6815 ip6h->ip6_hops = udp->udp_multicast_ttl; 6816 if (option_exists & IPPF_MULTICAST_HOPS) 6817 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6818 } else { 6819 ip6h->ip6_hops = udp->udp_ttl; 6820 if (option_exists & IPPF_UNICAST_HOPS) 6821 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6822 } 6823 6824 if (option_exists & IPPF_ADDR) { 6825 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6826 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6827 ip6h->ip6_src = tipp->ipp_addr; 6828 } else { 6829 /* 6830 * The source address was not set using IPV6_PKTINFO. 6831 * First look at the bound source. 6832 * If unspecified fallback to __sin6_src_id. 6833 */ 6834 ip6h->ip6_src = udp->udp_v6src; 6835 if (sin6->__sin6_src_id != 0 && 6836 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6837 ip_srcid_find_id(sin6->__sin6_src_id, 6838 &ip6h->ip6_src, connp->conn_zoneid); 6839 } 6840 } 6841 6842 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6843 cp = (uint8_t *)&ip6h[1]; 6844 6845 /* 6846 * Here's where we have to start stringing together 6847 * any extension headers in the right order: 6848 * Hop-by-hop, destination, routing, and final destination opts. 6849 */ 6850 if (option_exists & IPPF_HOPOPTS) { 6851 /* Hop-by-hop options */ 6852 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6853 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6854 6855 *nxthdr_ptr = IPPROTO_HOPOPTS; 6856 nxthdr_ptr = &hbh->ip6h_nxt; 6857 6858 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6859 cp += tipp->ipp_hopoptslen; 6860 } 6861 /* 6862 * En-route destination options 6863 * Only do them if there's a routing header as well 6864 */ 6865 if (option_exists & IPPF_RTDSTOPTS) { 6866 ip6_dest_t *dst = (ip6_dest_t *)cp; 6867 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6868 6869 *nxthdr_ptr = IPPROTO_DSTOPTS; 6870 nxthdr_ptr = &dst->ip6d_nxt; 6871 6872 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6873 cp += tipp->ipp_rtdstoptslen; 6874 } 6875 /* 6876 * Routing header next 6877 */ 6878 if (option_exists & IPPF_RTHDR) { 6879 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6880 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6881 6882 *nxthdr_ptr = IPPROTO_ROUTING; 6883 nxthdr_ptr = &rt->ip6r_nxt; 6884 6885 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6886 cp += tipp->ipp_rthdrlen; 6887 } 6888 /* 6889 * Do ultimate destination options 6890 */ 6891 if (option_exists & IPPF_DSTOPTS) { 6892 ip6_dest_t *dest = (ip6_dest_t *)cp; 6893 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6894 6895 *nxthdr_ptr = IPPROTO_DSTOPTS; 6896 nxthdr_ptr = &dest->ip6d_nxt; 6897 6898 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6899 cp += tipp->ipp_dstoptslen; 6900 } 6901 /* 6902 * Now set the last header pointer to the proto passed in 6903 */ 6904 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6905 *nxthdr_ptr = IPPROTO_UDP; 6906 6907 /* Update UDP header */ 6908 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6909 udph->uha_dst_port = sin6->sin6_port; 6910 udph->uha_src_port = udp->udp_port; 6911 6912 /* 6913 * Copy in the destination address 6914 */ 6915 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6916 ip6h->ip6_dst = ipv6_loopback; 6917 else 6918 ip6h->ip6_dst = sin6->sin6_addr; 6919 6920 ip6h->ip6_vcf = 6921 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6922 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6923 6924 if (option_exists & IPPF_TCLASS) { 6925 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6926 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6927 tipp->ipp_tclass); 6928 } 6929 6930 if (option_exists & IPPF_RTHDR) { 6931 ip6_rthdr_t *rth; 6932 6933 /* 6934 * Perform any processing needed for source routing. 6935 * We know that all extension headers will be in the same mblk 6936 * as the IPv6 header. 6937 */ 6938 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6939 if (rth != NULL && rth->ip6r_segleft != 0) { 6940 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6941 /* 6942 * Drop packet - only support Type 0 routing. 6943 * Notify the application as well. 6944 */ 6945 *error = EPROTO; 6946 goto done; 6947 } 6948 6949 /* 6950 * rth->ip6r_len is twice the number of 6951 * addresses in the header. Thus it must be even. 6952 */ 6953 if (rth->ip6r_len & 0x1) { 6954 *error = EPROTO; 6955 goto done; 6956 } 6957 /* 6958 * Shuffle the routing header and ip6_dst 6959 * addresses, and get the checksum difference 6960 * between the first hop (in ip6_dst) and 6961 * the destination (in the last routing hdr entry). 6962 */ 6963 csum = ip_massage_options_v6(ip6h, rth); 6964 /* 6965 * Verify that the first hop isn't a mapped address. 6966 * Routers along the path need to do this verification 6967 * for subsequent hops. 6968 */ 6969 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6970 *error = EADDRNOTAVAIL; 6971 goto done; 6972 } 6973 6974 cp += (rth->ip6r_len + 1)*8; 6975 } 6976 } 6977 6978 /* count up length of UDP packet */ 6979 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6980 if ((mp2 = mp1->b_cont) != NULL) { 6981 do { 6982 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6983 ip_len += (uint32_t)MBLKL(mp2); 6984 } while ((mp2 = mp2->b_cont) != NULL); 6985 } 6986 6987 /* 6988 * If the size of the packet is greater than the maximum allowed by 6989 * ip, return an error. Passing this down could cause panics because 6990 * the size will have wrapped and be inconsistent with the msg size. 6991 */ 6992 if (ip_len > IP_MAXPACKET) { 6993 *error = EMSGSIZE; 6994 goto done; 6995 } 6996 6997 /* Store the UDP length. Subtract length of extension hdrs */ 6998 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6999 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7000 7001 /* 7002 * We make it easy for IP to include our pseudo header 7003 * by putting our length in uh_checksum, modified (if 7004 * we have a routing header) by the checksum difference 7005 * between the ultimate destination and first hop addresses. 7006 * Note: UDP over IPv6 must always checksum the packet. 7007 */ 7008 csum += udph->uha_length; 7009 csum = (csum & 0xFFFF) + (csum >> 16); 7010 udph->uha_checksum = (uint16_t)csum; 7011 7012 #ifdef _LITTLE_ENDIAN 7013 ip_len = htons(ip_len); 7014 #endif 7015 ip6h->ip6_plen = ip_len; 7016 7017 if (DB_TYPE(mp) != M_DATA) { 7018 ASSERT(mp != mp1); 7019 freeb(mp); 7020 } 7021 7022 /* mp has been consumed and we'll return success */ 7023 ASSERT(*error == 0); 7024 mp = NULL; 7025 7026 /* We're done. Pass the packet to IP */ 7027 BUMP_MIB(&udp_mib, udpOutDatagrams); 7028 ip_output_v6(connp, mp1, q, IP_WPUT); 7029 7030 done: 7031 if (*error != 0) { 7032 ASSERT(mp != NULL); 7033 BUMP_MIB(&udp_mib, udpOutErrors); 7034 } 7035 return (mp); 7036 } 7037 7038 static void 7039 udp_wput_other(queue_t *q, mblk_t *mp) 7040 { 7041 uchar_t *rptr = mp->b_rptr; 7042 struct datab *db; 7043 struct iocblk *iocp; 7044 cred_t *cr; 7045 conn_t *connp = Q_TO_CONN(q); 7046 udp_t *udp = connp->conn_udp; 7047 7048 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7049 "udp_wput_other_start: q %p", q); 7050 7051 db = mp->b_datap; 7052 7053 cr = DB_CREDDEF(mp, connp->conn_cred); 7054 7055 switch (db->db_type) { 7056 case M_PROTO: 7057 case M_PCPROTO: 7058 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7059 freemsg(mp); 7060 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7061 "udp_wput_other_end: q %p (%S)", 7062 q, "protoshort"); 7063 return; 7064 } 7065 switch (((t_primp_t)rptr)->type) { 7066 case T_ADDR_REQ: 7067 udp_addr_req(q, mp); 7068 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7069 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7070 return; 7071 case O_T_BIND_REQ: 7072 case T_BIND_REQ: 7073 udp_bind(q, mp); 7074 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7075 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7076 return; 7077 case T_CONN_REQ: 7078 udp_connect(q, mp); 7079 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7080 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7081 return; 7082 case T_CAPABILITY_REQ: 7083 udp_capability_req(q, mp); 7084 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7085 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7086 return; 7087 case T_INFO_REQ: 7088 udp_info_req(q, mp); 7089 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7090 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7091 return; 7092 case T_UNITDATA_REQ: 7093 /* 7094 * If a T_UNITDATA_REQ gets here, the address must 7095 * be bad. Valid T_UNITDATA_REQs are handled 7096 * in udp_wput. 7097 */ 7098 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7099 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7100 "udp_wput_other_end: q %p (%S)", 7101 q, "unitdatareq"); 7102 return; 7103 case T_UNBIND_REQ: 7104 udp_unbind(q, mp); 7105 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7106 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7107 return; 7108 case T_SVR4_OPTMGMT_REQ: 7109 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7110 /* 7111 * Use upper queue for option processing in 7112 * case the request is not handled at this 7113 * level and needs to be passed down to IP. 7114 */ 7115 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7116 mp, cr, &udp_opt_obj); 7117 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7118 "udp_wput_other_end: q %p (%S)", 7119 q, "optmgmtreq"); 7120 return; 7121 7122 case T_OPTMGMT_REQ: 7123 /* 7124 * Use upper queue for option processing in 7125 * case the request is not handled at this 7126 * level and needs to be passed down to IP. 7127 */ 7128 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7129 mp, cr, &udp_opt_obj); 7130 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7131 "udp_wput_other_end: q %p (%S)", 7132 q, "optmgmtreq"); 7133 return; 7134 7135 case T_DISCON_REQ: 7136 udp_disconnect(q, mp); 7137 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7138 "udp_wput_other_end: q %p (%S)", 7139 q, "disconreq"); 7140 return; 7141 7142 /* The following TPI message is not supported by udp. */ 7143 case O_T_CONN_RES: 7144 case T_CONN_RES: 7145 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7146 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7147 "udp_wput_other_end: q %p (%S)", 7148 q, "connres/disconreq"); 7149 return; 7150 7151 /* The following 3 TPI messages are illegal for udp. */ 7152 case T_DATA_REQ: 7153 case T_EXDATA_REQ: 7154 case T_ORDREL_REQ: 7155 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7156 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7157 "udp_wput_other_end: q %p (%S)", 7158 q, "data/exdata/ordrel"); 7159 return; 7160 default: 7161 break; 7162 } 7163 break; 7164 case M_FLUSH: 7165 if (*rptr & FLUSHW) 7166 flushq(q, FLUSHDATA); 7167 break; 7168 case M_IOCTL: 7169 iocp = (struct iocblk *)mp->b_rptr; 7170 switch (iocp->ioc_cmd) { 7171 case TI_GETPEERNAME: 7172 if (udp->udp_state != TS_DATA_XFER) { 7173 /* 7174 * If a default destination address has not 7175 * been associated with the stream, then we 7176 * don't know the peer's name. 7177 */ 7178 iocp->ioc_error = ENOTCONN; 7179 iocp->ioc_count = 0; 7180 mp->b_datap->db_type = M_IOCACK; 7181 putnext(UDP_RD(q), mp); 7182 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7183 "udp_wput_other_end: q %p (%S)", 7184 q, "getpeername"); 7185 return; 7186 } 7187 /* FALLTHRU */ 7188 case TI_GETMYNAME: { 7189 /* 7190 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7191 * need to copyin the user's strbuf structure. 7192 * Processing will continue in the M_IOCDATA case 7193 * below. 7194 */ 7195 mi_copyin(q, mp, NULL, 7196 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7197 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7198 "udp_wput_other_end: q %p (%S)", 7199 q, "getmyname"); 7200 return; 7201 } 7202 case ND_SET: 7203 /* nd_getset performs the necessary checking */ 7204 case ND_GET: 7205 if (nd_getset(q, udp_g_nd, mp)) { 7206 putnext(UDP_RD(q), mp); 7207 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7208 "udp_wput_other_end: q %p (%S)", 7209 q, "get"); 7210 return; 7211 } 7212 break; 7213 case _SIOCSOCKFALLBACK: 7214 /* 7215 * Either sockmod is about to be popped and the 7216 * socket would now be treated as a plain stream, 7217 * or a module is about to be pushed so we could 7218 * no longer use read-side synchronous stream. 7219 * Drain any queued data and disable direct sockfs 7220 * interface from now on. 7221 */ 7222 if (!udp->udp_issocket) { 7223 DB_TYPE(mp) = M_IOCNAK; 7224 iocp->ioc_error = EINVAL; 7225 } else { 7226 udp->udp_issocket = B_FALSE; 7227 if (udp->udp_direct_sockfs) { 7228 /* 7229 * Disable read-side synchronous 7230 * stream interface and drain any 7231 * queued data. 7232 */ 7233 udp_rcv_drain(UDP_RD(q), udp, 7234 B_FALSE); 7235 ASSERT(!udp->udp_direct_sockfs); 7236 UDP_STAT(udp_sock_fallback); 7237 } 7238 DB_TYPE(mp) = M_IOCACK; 7239 iocp->ioc_error = 0; 7240 } 7241 iocp->ioc_count = 0; 7242 iocp->ioc_rval = 0; 7243 putnext(UDP_RD(q), mp); 7244 return; 7245 default: 7246 break; 7247 } 7248 break; 7249 case M_IOCDATA: 7250 udp_wput_iocdata(q, mp); 7251 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7252 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7253 return; 7254 default: 7255 /* Unrecognized messages are passed through without change. */ 7256 break; 7257 } 7258 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7259 "udp_wput_other_end: q %p (%S)", q, "end"); 7260 ip_output(connp, mp, q, IP_WPUT); 7261 } 7262 7263 /* ARGSUSED */ 7264 static void 7265 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7266 { 7267 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7268 udp_exit((conn_t *)arg); 7269 } 7270 7271 /* 7272 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7273 * messages. 7274 */ 7275 static void 7276 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7277 { 7278 mblk_t *mp1; 7279 STRUCT_HANDLE(strbuf, sb); 7280 uint16_t port; 7281 in6_addr_t v6addr; 7282 ipaddr_t v4addr; 7283 uint32_t flowinfo = 0; 7284 int addrlen; 7285 udp_t *udp = Q_TO_UDP(q); 7286 7287 /* Make sure it is one of ours. */ 7288 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7289 case TI_GETMYNAME: 7290 case TI_GETPEERNAME: 7291 break; 7292 default: 7293 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7294 return; 7295 } 7296 7297 q = WR(UDP_RD(q)); 7298 switch (mi_copy_state(q, mp, &mp1)) { 7299 case -1: 7300 return; 7301 case MI_COPY_CASE(MI_COPY_IN, 1): 7302 break; 7303 case MI_COPY_CASE(MI_COPY_OUT, 1): 7304 /* 7305 * The address has been copied out, so now 7306 * copyout the strbuf. 7307 */ 7308 mi_copyout(q, mp); 7309 return; 7310 case MI_COPY_CASE(MI_COPY_OUT, 2): 7311 /* 7312 * The address and strbuf have been copied out. 7313 * We're done, so just acknowledge the original 7314 * M_IOCTL. 7315 */ 7316 mi_copy_done(q, mp, 0); 7317 return; 7318 default: 7319 /* 7320 * Something strange has happened, so acknowledge 7321 * the original M_IOCTL with an EPROTO error. 7322 */ 7323 mi_copy_done(q, mp, EPROTO); 7324 return; 7325 } 7326 7327 /* 7328 * Now we have the strbuf structure for TI_GETMYNAME 7329 * and TI_GETPEERNAME. Next we copyout the requested 7330 * address and then we'll copyout the strbuf. 7331 */ 7332 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7333 (void *)mp1->b_rptr); 7334 if (udp->udp_family == AF_INET) 7335 addrlen = sizeof (sin_t); 7336 else 7337 addrlen = sizeof (sin6_t); 7338 7339 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7340 mi_copy_done(q, mp, EINVAL); 7341 return; 7342 } 7343 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7344 case TI_GETMYNAME: 7345 if (udp->udp_family == AF_INET) { 7346 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7347 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7348 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7349 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7350 } else { 7351 /* 7352 * INADDR_ANY 7353 * udp_v6src is not set, we might be bound to 7354 * broadcast/multicast. Use udp_bound_v6src as 7355 * local address instead (that could 7356 * also still be INADDR_ANY) 7357 */ 7358 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7359 } 7360 } else { 7361 /* udp->udp_family == AF_INET6 */ 7362 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7363 v6addr = udp->udp_v6src; 7364 } else { 7365 /* 7366 * UNSPECIFIED 7367 * udp_v6src is not set, we might be bound to 7368 * broadcast/multicast. Use udp_bound_v6src as 7369 * local address instead (that could 7370 * also still be UNSPECIFIED) 7371 */ 7372 v6addr = udp->udp_bound_v6src; 7373 } 7374 } 7375 port = udp->udp_port; 7376 break; 7377 case TI_GETPEERNAME: 7378 if (udp->udp_state != TS_DATA_XFER) { 7379 mi_copy_done(q, mp, ENOTCONN); 7380 return; 7381 } 7382 if (udp->udp_family == AF_INET) { 7383 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7384 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7385 } else { 7386 /* udp->udp_family == AF_INET6) */ 7387 v6addr = udp->udp_v6dst; 7388 flowinfo = udp->udp_flowinfo; 7389 } 7390 port = udp->udp_dstport; 7391 break; 7392 default: 7393 mi_copy_done(q, mp, EPROTO); 7394 return; 7395 } 7396 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7397 if (!mp1) 7398 return; 7399 7400 if (udp->udp_family == AF_INET) { 7401 sin_t *sin; 7402 7403 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7404 sin = (sin_t *)mp1->b_rptr; 7405 mp1->b_wptr = (uchar_t *)&sin[1]; 7406 *sin = sin_null; 7407 sin->sin_family = AF_INET; 7408 sin->sin_addr.s_addr = v4addr; 7409 sin->sin_port = port; 7410 } else { 7411 /* udp->udp_family == AF_INET6 */ 7412 sin6_t *sin6; 7413 7414 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7415 sin6 = (sin6_t *)mp1->b_rptr; 7416 mp1->b_wptr = (uchar_t *)&sin6[1]; 7417 *sin6 = sin6_null; 7418 sin6->sin6_family = AF_INET6; 7419 sin6->sin6_flowinfo = flowinfo; 7420 sin6->sin6_addr = v6addr; 7421 sin6->sin6_port = port; 7422 } 7423 /* Copy out the address */ 7424 mi_copyout(q, mp); 7425 } 7426 7427 7428 static int 7429 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7430 void *thisdg_attrs) 7431 { 7432 struct T_unitdata_req *udreqp; 7433 int is_absreq_failure; 7434 cred_t *cr; 7435 conn_t *connp = Q_TO_CONN(q); 7436 7437 ASSERT(((t_primp_t)mp->b_rptr)->type); 7438 7439 cr = DB_CREDDEF(mp, connp->conn_cred); 7440 7441 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7442 *errorp = 0; 7443 7444 /* 7445 * Use upper queue for option processing since the callback 7446 * routines expect to be called in UDP instance instead of IP. 7447 */ 7448 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7449 udreqp->OPT_offset, cr, &udp_opt_obj, 7450 thisdg_attrs, &is_absreq_failure); 7451 7452 if (*errorp != 0) { 7453 /* 7454 * Note: No special action needed in this 7455 * module for "is_absreq_failure" 7456 */ 7457 return (-1); /* failure */ 7458 } 7459 ASSERT(is_absreq_failure == 0); 7460 return (0); /* success */ 7461 } 7462 7463 void 7464 udp_ddi_init(void) 7465 { 7466 int i; 7467 7468 UDP6_MAJ = ddi_name_to_major(UDP6); 7469 7470 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7471 udp_opt_obj.odb_opt_arr_cnt); 7472 7473 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7474 /* Not a power of two. Round up to nearest power of two */ 7475 for (i = 0; i < 31; i++) { 7476 if (udp_bind_fanout_size < (1 << i)) 7477 break; 7478 } 7479 udp_bind_fanout_size = 1 << i; 7480 } 7481 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7482 sizeof (udp_fanout_t), KM_SLEEP); 7483 for (i = 0; i < udp_bind_fanout_size; i++) { 7484 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7485 NULL); 7486 } 7487 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7488 7489 udp_kstat_init(); 7490 7491 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7492 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7493 } 7494 7495 void 7496 udp_ddi_destroy(void) 7497 { 7498 int i; 7499 7500 nd_free(&udp_g_nd); 7501 7502 for (i = 0; i < udp_bind_fanout_size; i++) { 7503 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7504 } 7505 7506 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7507 sizeof (udp_fanout_t)); 7508 7509 udp_kstat_fini(); 7510 7511 kmem_cache_destroy(udp_cache); 7512 } 7513 7514 static void 7515 udp_kstat_init(void) 7516 { 7517 udp_named_kstat_t template = { 7518 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7519 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7520 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7521 { "entrySize", KSTAT_DATA_INT32, 0 }, 7522 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7523 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7524 }; 7525 7526 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7527 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7528 7529 if (udp_mibkp == NULL) 7530 return; 7531 7532 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7533 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7534 7535 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7536 7537 udp_mibkp->ks_update = udp_kstat_update; 7538 7539 kstat_install(udp_mibkp); 7540 7541 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7542 "net", KSTAT_TYPE_NAMED, 7543 sizeof (udp_statistics) / sizeof (kstat_named_t), 7544 KSTAT_FLAG_VIRTUAL)) != NULL) { 7545 udp_ksp->ks_data = &udp_statistics; 7546 kstat_install(udp_ksp); 7547 } 7548 } 7549 7550 static void 7551 udp_kstat_fini(void) 7552 { 7553 if (udp_ksp != NULL) { 7554 kstat_delete(udp_ksp); 7555 udp_ksp = NULL; 7556 } 7557 if (udp_mibkp != NULL) { 7558 kstat_delete(udp_mibkp); 7559 udp_mibkp = NULL; 7560 } 7561 } 7562 7563 static int 7564 udp_kstat_update(kstat_t *kp, int rw) 7565 { 7566 udp_named_kstat_t *udpkp; 7567 7568 if ((kp == NULL) || (kp->ks_data == NULL)) 7569 return (EIO); 7570 7571 if (rw == KSTAT_WRITE) 7572 return (EACCES); 7573 7574 udpkp = (udp_named_kstat_t *)kp->ks_data; 7575 7576 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7577 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7578 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7579 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7580 7581 return (0); 7582 } 7583 7584 /* ARGSUSED */ 7585 static void 7586 udp_rput(queue_t *q, mblk_t *mp) 7587 { 7588 /* 7589 * We get here whenever we do qreply() from IP, 7590 * i.e as part of handlings ioctls, etc. 7591 */ 7592 putnext(q, mp); 7593 } 7594 7595 /* 7596 * Read-side synchronous stream info entry point, called as a 7597 * result of handling certain STREAMS ioctl operations. 7598 */ 7599 static int 7600 udp_rinfop(queue_t *q, infod_t *dp) 7601 { 7602 mblk_t *mp; 7603 uint_t cmd = dp->d_cmd; 7604 int res = 0; 7605 int error = 0; 7606 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7607 struct stdata *stp = STREAM(q); 7608 7609 mutex_enter(&udp->udp_drain_lock); 7610 /* If shutdown on read has happened, return nothing */ 7611 mutex_enter(&stp->sd_lock); 7612 if (stp->sd_flag & STREOF) { 7613 mutex_exit(&stp->sd_lock); 7614 goto done; 7615 } 7616 mutex_exit(&stp->sd_lock); 7617 7618 if ((mp = udp->udp_rcv_list_head) == NULL) 7619 goto done; 7620 7621 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7622 7623 if (cmd & INFOD_COUNT) { 7624 /* 7625 * Return the number of messages. 7626 */ 7627 dp->d_count += udp->udp_rcv_msgcnt; 7628 res |= INFOD_COUNT; 7629 } 7630 if (cmd & INFOD_BYTES) { 7631 /* 7632 * Return size of all data messages. 7633 */ 7634 dp->d_bytes += udp->udp_rcv_cnt; 7635 res |= INFOD_BYTES; 7636 } 7637 if (cmd & INFOD_FIRSTBYTES) { 7638 /* 7639 * Return size of first data message. 7640 */ 7641 dp->d_bytes = msgdsize(mp); 7642 res |= INFOD_FIRSTBYTES; 7643 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7644 } 7645 if (cmd & INFOD_COPYOUT) { 7646 mblk_t *mp1 = mp->b_cont; 7647 int n; 7648 /* 7649 * Return data contents of first message. 7650 */ 7651 ASSERT(DB_TYPE(mp1) == M_DATA); 7652 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7653 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7654 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7655 UIO_READ, dp->d_uiop)) != 0) { 7656 goto done; 7657 } 7658 mp1 = mp1->b_cont; 7659 } 7660 res |= INFOD_COPYOUT; 7661 dp->d_cmd &= ~INFOD_COPYOUT; 7662 } 7663 done: 7664 mutex_exit(&udp->udp_drain_lock); 7665 7666 dp->d_res |= res; 7667 7668 return (error); 7669 } 7670 7671 /* 7672 * Read-side synchronous stream entry point. This is called as a result 7673 * of recv/read operation done at sockfs, and is guaranteed to execute 7674 * outside of the interrupt thread context. It returns a single datagram 7675 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7676 */ 7677 static int 7678 udp_rrw(queue_t *q, struiod_t *dp) 7679 { 7680 mblk_t *mp; 7681 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7682 7683 /* We should never get here when we're in SNMP mode */ 7684 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7685 7686 /* 7687 * Dequeue datagram from the head of the list and return 7688 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7689 * set/cleared depending on whether or not there's data 7690 * remaining in the list. 7691 */ 7692 mutex_enter(&udp->udp_drain_lock); 7693 if (!udp->udp_direct_sockfs) { 7694 mutex_exit(&udp->udp_drain_lock); 7695 UDP_STAT(udp_rrw_busy); 7696 return (EBUSY); 7697 } 7698 if ((mp = udp->udp_rcv_list_head) != NULL) { 7699 uint_t size = msgdsize(mp); 7700 7701 /* Last datagram in the list? */ 7702 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7703 udp->udp_rcv_list_tail = NULL; 7704 mp->b_next = NULL; 7705 7706 udp->udp_rcv_cnt -= size; 7707 udp->udp_rcv_msgcnt--; 7708 UDP_STAT(udp_rrw_msgcnt); 7709 7710 /* No longer flow-controlling? */ 7711 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7712 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7713 udp->udp_drain_qfull = B_FALSE; 7714 } 7715 if (udp->udp_rcv_list_head == NULL) { 7716 /* 7717 * Either we just dequeued the last datagram or 7718 * we get here from sockfs and have nothing to 7719 * return; in this case clear RSLEEP. 7720 */ 7721 ASSERT(udp->udp_rcv_cnt == 0); 7722 ASSERT(udp->udp_rcv_msgcnt == 0); 7723 ASSERT(udp->udp_rcv_list_tail == NULL); 7724 STR_WAKEUP_CLEAR(STREAM(q)); 7725 } else { 7726 /* 7727 * More data follows; we need udp_rrw() to be 7728 * called in future to pick up the rest. 7729 */ 7730 STR_WAKEUP_SET(STREAM(q)); 7731 } 7732 mutex_exit(&udp->udp_drain_lock); 7733 dp->d_mp = mp; 7734 return (0); 7735 } 7736 7737 /* 7738 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7739 * list; this is typically executed within the interrupt thread context 7740 * and so we do things as quickly as possible. 7741 */ 7742 static void 7743 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7744 { 7745 ASSERT(q == RD(q)); 7746 ASSERT(pkt_len == msgdsize(mp)); 7747 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7748 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7749 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7750 7751 mutex_enter(&udp->udp_drain_lock); 7752 /* 7753 * Wake up and signal the receiving app; it is okay to do this 7754 * before enqueueing the mp because we are holding the drain lock. 7755 * One of the advantages of synchronous stream is the ability for 7756 * us to find out when the application performs a read on the 7757 * socket by way of udp_rrw() entry point being called. We need 7758 * to generate SIGPOLL/SIGIO for each received data in the case 7759 * of asynchronous socket just as in the strrput() case. However, 7760 * we only wake the application up when necessary, i.e. during the 7761 * first enqueue. When udp_rrw() is called, we send up a single 7762 * datagram upstream and call STR_WAKEUP_SET() again when there 7763 * are still data remaining in our receive queue. 7764 */ 7765 if (udp->udp_rcv_list_head == NULL) { 7766 STR_WAKEUP_SET(STREAM(q)); 7767 udp->udp_rcv_list_head = mp; 7768 } else { 7769 udp->udp_rcv_list_tail->b_next = mp; 7770 } 7771 udp->udp_rcv_list_tail = mp; 7772 udp->udp_rcv_cnt += pkt_len; 7773 udp->udp_rcv_msgcnt++; 7774 7775 /* Need to flow-control? */ 7776 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7777 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7778 udp->udp_drain_qfull = B_TRUE; 7779 7780 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7781 STR_SENDSIG(STREAM(q)); 7782 mutex_exit(&udp->udp_drain_lock); 7783 } 7784 7785 /* 7786 * Drain the contents of receive list to the module upstream; we do 7787 * this during close or when we fallback to the slow mode due to 7788 * sockmod being popped or a module being pushed on top of us. 7789 */ 7790 static void 7791 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7792 { 7793 mblk_t *mp; 7794 7795 ASSERT(q == RD(q)); 7796 7797 mutex_enter(&udp->udp_drain_lock); 7798 /* 7799 * There is no race with a concurrent udp_input() sending 7800 * up packets using putnext() after we have cleared the 7801 * udp_direct_sockfs flag but before we have completed 7802 * sending up the packets in udp_rcv_list, since we are 7803 * either a writer or we have quiesced the conn. 7804 */ 7805 udp->udp_direct_sockfs = B_FALSE; 7806 mutex_exit(&udp->udp_drain_lock); 7807 7808 if (udp->udp_rcv_list_head != NULL) 7809 UDP_STAT(udp_drain); 7810 7811 /* 7812 * Send up everything via putnext(); note here that we 7813 * don't need the udp_drain_lock to protect us since 7814 * nothing can enter udp_rrw() and that we currently 7815 * have exclusive access to this udp. 7816 */ 7817 while ((mp = udp->udp_rcv_list_head) != NULL) { 7818 udp->udp_rcv_list_head = mp->b_next; 7819 mp->b_next = NULL; 7820 udp->udp_rcv_cnt -= msgdsize(mp); 7821 udp->udp_rcv_msgcnt--; 7822 if (closing) { 7823 freemsg(mp); 7824 } else { 7825 putnext(q, mp); 7826 } 7827 } 7828 ASSERT(udp->udp_rcv_cnt == 0); 7829 ASSERT(udp->udp_rcv_msgcnt == 0); 7830 ASSERT(udp->udp_rcv_list_head == NULL); 7831 udp->udp_rcv_list_tail = NULL; 7832 udp->udp_drain_qfull = B_FALSE; 7833 } 7834 7835 static size_t 7836 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7837 { 7838 /* We add a bit of extra buffering */ 7839 size += size >> 1; 7840 if (size > udp_max_buf) 7841 size = udp_max_buf; 7842 7843 udp->udp_rcv_hiwat = size; 7844 return (size); 7845 } 7846 7847 /* 7848 * Little helper for IPsec's NAT-T processing. 7849 */ 7850 boolean_t 7851 udp_compute_checksum(void) 7852 { 7853 return (udp_do_checksum); 7854 } 7855