1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/pattr.h> 36 #include <sys/stropts.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/strsubr.h> 46 #include <sys/suntpi.h> 47 #include <sys/xti_inet.h> 48 #include <sys/cmn_err.h> 49 #include <sys/kmem.h> 50 #include <sys/policy.h> 51 #include <sys/ucred.h> 52 #include <sys/zone.h> 53 54 #include <sys/socket.h> 55 #include <sys/sockio.h> 56 #include <sys/vtrace.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 /* 94 * Synchronization notes: 95 * 96 * UDP uses a combination of its internal perimeter, a global lock and 97 * a set of bind hash locks to protect its data structures. Please see 98 * the note above udp_mode_assertions for details about the internal 99 * perimeter. 100 * 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure. An UDP endpoint is removed from the bind hash list only 108 * when it is being unbound or being closed. The per bucket lock also 109 * protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * 113 * Both udp and ip are merged, but the streams plumbing is kept unchanged 114 * in that udp is always pushed atop /dev/ip. This is done to preserve 115 * backwards compatibility for certain applications which rely on such 116 * plumbing geometry to do things such as issuing I_POP on the stream 117 * in order to obtain direct access to /dev/ip, etc. 118 * 119 * All UDP processings happen in the /dev/ip instance; the udp module 120 * instance does not possess any state about the endpoint, and merely 121 * acts as a dummy module whose presence is to keep the streams plumbing 122 * appearance unchanged. At open time /dev/ip allocates a conn_t that 123 * happens to embed a udp_t. This stays dormant until the time udp is 124 * pushed, which indicates to /dev/ip that it must convert itself from 125 * an IP to a UDP endpoint. 126 * 127 * We only allow for the following plumbing cases: 128 * 129 * Normal: 130 * /dev/ip is first opened and later udp is pushed directly on top. 131 * This is the default action that happens when a udp socket or 132 * /dev/udp is opened. The conn_t created by /dev/ip instance is 133 * now shared and is marked with IPCL_UDP. 134 * 135 * SNMP-only: 136 * udp is pushed on top of a module other than /dev/ip. When this 137 * happens it will support only SNMP semantics. A new conn_t is 138 * allocated and marked with IPCL_UDPMOD. 139 * 140 * The above cases imply that we don't support any intermediate module to 141 * reside in between /dev/ip and udp -- in fact, we never supported such 142 * scenario in the past as the inter-layer communication semantics have 143 * always been private. Also note that the normal case allows for SNMP 144 * requests to be processed in addition to the rest of UDP operations. 145 * 146 * The normal case plumbing is depicted by the following diagram: 147 * 148 * +---------------+---------------+ 149 * | | | udp 150 * | udp_wq | udp_rq | 151 * | | UDP_RD | 152 * | | | 153 * +---------------+---------------+ 154 * | ^ 155 * v | 156 * +---------------+---------------+ 157 * | | | /dev/ip 158 * | ip_wq | ip_rq | conn_t 159 * | UDP_WR | | 160 * | | | 161 * +---------------+---------------+ 162 * 163 * Messages arriving at udp_wq from above will end up in ip_wq before 164 * it gets processed, i.e. udp write entry points will advance udp_wq 165 * and use its q_next value as ip_wq in order to use the conn_t that 166 * is stored in its q_ptr. Likewise, messages generated by ip to the 167 * module above udp will appear as if they are originated from udp_rq, 168 * i.e. putnext() calls to the module above udp is done using the 169 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 170 * nothing more than calling putnext(). 171 * 172 * The above implies the following rule of thumb: 173 * 174 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 175 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 176 * is no direct reference to conn_t from either udp_wq or udp_rq. 177 * 178 * 2. Write-side entry points of udp can obtain the conn_t via the 179 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 180 * 181 * 3. While in /dev/ip context, putnext() to the module above udp can 182 * be done by supplying the queue value obtained from UDP_RD(). 183 * 184 */ 185 186 static queue_t *UDP_WR(queue_t *); 187 static queue_t *UDP_RD(queue_t *); 188 189 udp_stat_t udp_statistics = { 190 { "udp_ip_send", KSTAT_DATA_UINT64 }, 191 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 192 { "udp_ire_null", KSTAT_DATA_UINT64 }, 193 { "udp_drain", KSTAT_DATA_UINT64 }, 194 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 196 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 198 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 199 { "udp_out_opt", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 202 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 203 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 217 #ifdef DEBUG 218 { "udp_data_conn", KSTAT_DATA_UINT64 }, 219 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 220 #endif 221 }; 222 223 static kstat_t *udp_ksp; 224 struct kmem_cache *udp_cache; 225 226 /* 227 * Bind hash list size and hash function. It has to be a power of 2 for 228 * hashing. 229 */ 230 #define UDP_BIND_FANOUT_SIZE 512 231 #define UDP_BIND_HASH(lport) \ 232 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 233 234 /* UDP bind fanout hash structure. */ 235 typedef struct udp_fanout_s { 236 udp_t *uf_udp; 237 kmutex_t uf_lock; 238 #if defined(_LP64) || defined(_I32LPx) 239 char uf_pad[48]; 240 #else 241 char uf_pad[56]; 242 #endif 243 } udp_fanout_t; 244 245 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 246 /* udp_fanout_t *udp_bind_fanout. */ 247 static udp_fanout_t *udp_bind_fanout; 248 249 /* 250 * This controls the rate some ndd info report functions can be used 251 * by non-priviledged users. It stores the last time such info is 252 * requested. When those report functions are called again, this 253 * is checked with the current time and compare with the ndd param 254 * udp_ndd_get_info_interval. 255 */ 256 static clock_t udp_last_ndd_get_info_time; 257 #define NDD_TOO_QUICK_MSG \ 258 "ndd get info rate too high for non-priviledged users, try again " \ 259 "later.\n" 260 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 261 262 static void udp_addr_req(queue_t *q, mblk_t *mp); 263 static void udp_bind(queue_t *q, mblk_t *mp); 264 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 265 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 266 static int udp_build_hdrs(queue_t *q, udp_t *udp); 267 static void udp_capability_req(queue_t *q, mblk_t *mp); 268 static int udp_close(queue_t *q); 269 static void udp_connect(queue_t *q, mblk_t *mp); 270 static void udp_disconnect(queue_t *q, mblk_t *mp); 271 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 272 int sys_error); 273 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 274 t_scalar_t tlierr, int unixerr); 275 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 276 cred_t *cr); 277 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 278 char *value, caddr_t cp, cred_t *cr); 279 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 280 char *value, caddr_t cp, cred_t *cr); 281 static void udp_icmp_error(queue_t *q, mblk_t *mp); 282 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 283 static void udp_info_req(queue_t *q, mblk_t *mp); 284 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 285 t_scalar_t addr_length); 286 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 287 cred_t *credp); 288 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 289 int *errorp, void *thisdg_attrs); 290 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 291 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 292 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 293 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 294 cred_t *cr); 295 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 296 uchar_t **optbufp, uint_t *optlenp); 297 static void udp_report_item(mblk_t *mp, udp_t *udp); 298 static void udp_rput(queue_t *q, mblk_t *mp); 299 static void udp_rput_other(queue_t *, mblk_t *); 300 static int udp_rinfop(queue_t *q, infod_t *dp); 301 static int udp_rrw(queue_t *q, struiod_t *dp); 302 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 303 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 304 cred_t *cr); 305 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 306 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 307 t_scalar_t destlen, t_scalar_t err); 308 static void udp_unbind(queue_t *q, mblk_t *mp); 309 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 310 static void udp_wput(queue_t *q, mblk_t *mp); 311 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 312 uint16_t port, uint_t srcid, int *error); 313 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 314 t_scalar_t tudr_optlen, int *error); 315 static void udp_wput_other(queue_t *q, mblk_t *mp); 316 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 317 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 318 socklen_t addrlen); 319 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 320 321 static void udp_kstat_init(void); 322 static void udp_kstat_fini(void); 323 static int udp_kstat_update(kstat_t *kp, int rw); 324 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 325 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 327 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 328 329 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 330 uint_t pkt_len); 331 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 332 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 333 static void udp_exit(conn_t *); 334 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 335 #ifdef DEBUG 336 static void udp_mode_assertions(udp_t *, int); 337 #endif /* DEBUG */ 338 339 major_t UDP6_MAJ; 340 #define UDP6 "udp6" 341 342 #define UDP_RECV_HIWATER (56 * 1024) 343 #define UDP_RECV_LOWATER 128 344 #define UDP_XMIT_HIWATER (56 * 1024) 345 #define UDP_XMIT_LOWATER 1024 346 347 static struct module_info udp_info = { 348 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 349 }; 350 351 static struct qinit udp_rinit = { 352 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 353 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 354 }; 355 356 static struct qinit udp_winit = { 357 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 358 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 359 }; 360 361 /* Support for just SNMP if UDP is not pushed directly over device IP */ 362 struct qinit udp_snmp_rinit = { 363 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 364 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 365 }; 366 367 struct qinit udp_snmp_winit = { 368 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 369 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 370 }; 371 372 struct streamtab udpinfo = { 373 &udp_rinit, &udp_winit 374 }; 375 376 static sin_t sin_null; /* Zero address for quick clears */ 377 static sin6_t sin6_null; /* Zero address for quick clears */ 378 379 /* Hint not protected by any lock */ 380 static in_port_t udp_g_next_port_to_try; 381 382 /* 383 * Extra privileged ports. In host byte order. 384 */ 385 #define UDP_NUM_EPRIV_PORTS 64 386 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 387 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 388 389 /* Only modified during _init and _fini thus no locking is needed. */ 390 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 391 392 /* MIB-2 stuff for SNMP */ 393 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 394 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 395 396 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 397 398 /* Default structure copied into T_INFO_ACK messages */ 399 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 400 T_INFO_ACK, 401 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 402 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 403 T_INVALID, /* CDATA_size. udp does not support connect data. */ 404 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 405 sizeof (sin_t), /* ADDR_size. */ 406 0, /* OPT_size - not initialized here */ 407 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 408 T_CLTS, /* SERV_type. udp supports connection-less. */ 409 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 410 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 411 }; 412 413 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 414 415 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 416 T_INFO_ACK, 417 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 418 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 419 T_INVALID, /* CDATA_size. udp does not support connect data. */ 420 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 421 sizeof (sin6_t), /* ADDR_size. */ 422 0, /* OPT_size - not initialized here */ 423 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 424 T_CLTS, /* SERV_type. udp supports connection-less. */ 425 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 426 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 427 }; 428 429 /* largest UDP port number */ 430 #define UDP_MAX_PORT 65535 431 432 /* 433 * Table of ND variables supported by udp. These are loaded into udp_g_nd 434 * in udp_open. 435 * All of these are alterable, within the min/max values given, at run time. 436 */ 437 /* BEGIN CSTYLED */ 438 udpparam_t udp_param_arr[] = { 439 /*min max value name */ 440 { 0L, 256, 32, "udp_wroff_extra" }, 441 { 1L, 255, 255, "udp_ipv4_ttl" }, 442 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 443 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 444 { 0, 1, 1, "udp_do_checksum" }, 445 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 446 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 447 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 448 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 449 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 450 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 451 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 452 }; 453 /* END CSTYLED */ 454 455 /* 456 * The smallest anonymous port in the priviledged port range which UDP 457 * looks for free port. Use in the option UDP_ANONPRIVBIND. 458 */ 459 static in_port_t udp_min_anonpriv_port = 512; 460 461 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 462 uint32_t udp_random_anon_port = 1; 463 464 /* 465 * Hook functions to enable cluster networking. 466 * On non-clustered systems these vectors must always be NULL 467 */ 468 469 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 470 uint8_t *laddrp, in_port_t lport) = NULL; 471 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 472 uint8_t *laddrp, in_port_t lport) = NULL; 473 474 typedef union T_primitives *t_primp_t; 475 476 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 477 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 478 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 479 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 480 (mp)->b_prev = (mblk_t *)proc; \ 481 if ((udp)->udp_mphead == NULL) \ 482 (udp)->udp_mphead = (mp); \ 483 else \ 484 (udp)->udp_mptail->b_next = (mp); \ 485 (udp)->udp_mptail = (mp); \ 486 (udp)->udp_mpcount++; \ 487 } 488 489 #define UDP_READERS_INCREF(udp) { \ 490 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 491 (udp)->udp_reader_count++; \ 492 } 493 494 #define UDP_READERS_DECREF(udp) { \ 495 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 496 (udp)->udp_reader_count--; \ 497 if ((udp)->udp_reader_count == 0) \ 498 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 499 } 500 501 #define UDP_SQUEUE_DECREF(udp) { \ 502 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 503 (udp)->udp_squeue_count--; \ 504 if ((udp)->udp_squeue_count == 0) \ 505 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 506 } 507 508 /* 509 * Notes on UDP endpoint synchronization: 510 * 511 * UDP needs exclusive operation on a per endpoint basis, when executing 512 * functions that modify the endpoint state. udp_rput_other() deals with 513 * packets with IP options, and processing these packets end up having 514 * to update the endpoint's option related state. udp_wput_other() deals 515 * with control operations from the top, e.g. connect() that needs to 516 * update the endpoint state. These could be synchronized using locks, 517 * but the current version uses squeues for this purpose. squeues may 518 * give performance improvement for certain cases such as connected UDP 519 * sockets; thus the framework allows for using squeues. 520 * 521 * The perimeter routines are described as follows: 522 * 523 * udp_enter(): 524 * Enter the UDP endpoint perimeter. 525 * 526 * udp_become_writer(): 527 * Become exclusive on the UDP endpoint. Specifies a function 528 * that will be called exclusively either immediately or later 529 * when the perimeter is available exclusively. 530 * 531 * udp_exit(): 532 * Exit the UDP perimeter. 533 * 534 * Entering UDP from the top or from the bottom must be done using 535 * udp_enter(). No lock must be held while attempting to enter the UDP 536 * perimeter. When finished, udp_exit() must be called to get out of 537 * the perimeter. 538 * 539 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 540 * multiple threads may enter a UDP endpoint concurrently. This is used 541 * for sending and/or receiving normal data. Control operations and other 542 * special cases call udp_become_writer() to become exclusive on a per 543 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 544 * by definition serializes access to the conn_t. When there are no more 545 * pending messages on the squeue for the UDP connection, the endpoint 546 * reverts to MT_HOT mode. During the interregnum when not all MT threads 547 * of an endpoint have finished, messages are queued in the UDP endpoint 548 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 549 * 550 * These modes have the following analogs: 551 * 552 * UDP_MT_HOT/udp_reader_count==0 none 553 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 554 * UDP_MT_QUEUED RW_WRITE_WANTED 555 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 556 * 557 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 558 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 559 * 560 * While in stable modes, UDP keeps track of the number of threads 561 * operating on the endpoint. The udp_reader_count variable represents 562 * the number of threads entering the endpoint as readers while it is 563 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 564 * is only a single reader, i.e. when this counter drops to 1. Likewise, 565 * udp_squeue_count represents the number of threads operating on the 566 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 567 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 568 * when this counter drops to 0. 569 * 570 * The default mode is set to UDP_MT_HOT and UDP alternates between 571 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 572 * 573 * Mode transition: 574 * ---------------------------------------------------------------- 575 * old mode Event New mode 576 * ---------------------------------------------------------------- 577 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 578 * and udp_reader_count == 1 579 * 580 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 581 * and udp_reader_count > 1 582 * 583 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 584 * 585 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 586 * internal UDP queue successfully 587 * moved to squeue AND udp_squeue_count != 0 588 * 589 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 590 * internal UDP queue successfully 591 * moved to squeue AND udp_squeue_count 592 * drops to zero 593 * 594 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 595 * ---------------------------------------------------------------- 596 */ 597 598 static queue_t * 599 UDP_WR(queue_t *q) 600 { 601 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 602 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 603 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 604 605 return (_WR(q)->q_next); 606 } 607 608 static queue_t * 609 UDP_RD(queue_t *q) 610 { 611 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 612 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 613 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 614 615 return (_RD(q)->q_next); 616 } 617 618 #ifdef DEBUG 619 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 620 #else 621 #define UDP_MODE_ASSERTIONS(udp, caller) 622 #endif 623 624 /* Invariants */ 625 #ifdef DEBUG 626 627 uint32_t udp_count[4]; 628 629 /* Context of udp_mode_assertions */ 630 #define UDP_ENTER 1 631 #define UDP_BECOME_WRITER 2 632 #define UDP_EXIT 3 633 634 static void 635 udp_mode_assertions(udp_t *udp, int caller) 636 { 637 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 638 639 switch (udp->udp_mode) { 640 case UDP_MT_HOT: 641 /* 642 * Messages have not yet been enqueued on the internal queue, 643 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 644 * by definition, there can't be any messages enqueued on the 645 * squeue. The UDP could be quiescent, so udp_reader_count 646 * could be zero at entry. 647 */ 648 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 649 udp->udp_squeue_count == 0); 650 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 651 udp_count[0]++; 652 break; 653 654 case UDP_MT_QUEUED: 655 /* 656 * The last MT thread to exit the udp perimeter empties the 657 * internal queue and then switches the UDP to 658 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 659 * mode, it means there must be at least 1 MT thread still in 660 * the perimeter and at least 1 message on the internal queue. 661 */ 662 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 663 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 664 udp_count[1]++; 665 break; 666 667 case UDP_QUEUED_SQUEUE: 668 /* 669 * The switch has happened from MT to SQUEUE. So there can't 670 * any MT threads. Messages could still pile up on the internal 671 * queue until the transition is complete and we move to 672 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 673 * since the squeue could drain any time. 674 */ 675 ASSERT(udp->udp_reader_count == 0); 676 udp_count[2]++; 677 break; 678 679 case UDP_SQUEUE: 680 /* 681 * The transition is complete. Thre can't be any messages on 682 * the internal queue. The udp could be quiescent or the squeue 683 * could drain any time, so we can't assert on nonzero 684 * udp_squeue_count during entry. Nor can we assert that 685 * udp_reader_count is zero, since, a reader thread could have 686 * directly become writer in line by calling udp_become_writer 687 * without going through the queued states. 688 */ 689 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 690 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 691 udp_count[3]++; 692 break; 693 } 694 } 695 #endif 696 697 #define _UDP_ENTER(connp, mp, proc, tag) { \ 698 udp_t *_udp = (connp)->conn_udp; \ 699 \ 700 mutex_enter(&(connp)->conn_lock); \ 701 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 702 mutex_exit(&(connp)->conn_lock); \ 703 freemsg(mp); \ 704 } else { \ 705 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 706 \ 707 switch (_udp->udp_mode) { \ 708 case UDP_MT_HOT: \ 709 /* We can execute as reader right away. */ \ 710 UDP_READERS_INCREF(_udp); \ 711 mutex_exit(&(connp)->conn_lock); \ 712 (*(proc))(connp, mp, (connp)->conn_sqp); \ 713 break; \ 714 \ 715 case UDP_SQUEUE: \ 716 /* \ 717 * We are in squeue mode, send the \ 718 * packet to the squeue \ 719 */ \ 720 _udp->udp_squeue_count++; \ 721 CONN_INC_REF_LOCKED(connp); \ 722 mutex_exit(&(connp)->conn_lock); \ 723 squeue_enter((connp)->conn_sqp, mp, proc, \ 724 connp, tag); \ 725 break; \ 726 \ 727 case UDP_MT_QUEUED: \ 728 case UDP_QUEUED_SQUEUE: \ 729 /* \ 730 * Some messages may have been enqueued \ 731 * ahead of us. Enqueue the new message \ 732 * at the tail of the internal queue to \ 733 * preserve message ordering. \ 734 */ \ 735 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 736 mutex_exit(&(connp)->conn_lock); \ 737 break; \ 738 } \ 739 } \ 740 } 741 742 static void 743 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 744 { 745 _UDP_ENTER(connp, mp, proc, tag); 746 } 747 748 static void 749 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 750 { 751 udp_t *udp; 752 753 udp = connp->conn_udp; 754 755 mutex_enter(&connp->conn_lock); 756 757 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 758 759 switch (udp->udp_mode) { 760 case UDP_MT_HOT: 761 if (udp->udp_reader_count == 1) { 762 /* 763 * We are the only MT thread. Switch to squeue mode 764 * immediately. 765 */ 766 udp->udp_mode = UDP_SQUEUE; 767 udp->udp_squeue_count = 1; 768 CONN_INC_REF_LOCKED(connp); 769 mutex_exit(&connp->conn_lock); 770 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 771 return; 772 } 773 /* FALLTHRU */ 774 775 case UDP_MT_QUEUED: 776 /* Enqueue the packet internally in UDP */ 777 udp->udp_mode = UDP_MT_QUEUED; 778 UDP_ENQUEUE_MP(udp, mp, proc, tag); 779 mutex_exit(&connp->conn_lock); 780 return; 781 782 case UDP_SQUEUE: 783 case UDP_QUEUED_SQUEUE: 784 /* 785 * We are already exclusive. i.e. we are already 786 * writer. Simply call the desired function. 787 */ 788 udp->udp_squeue_count++; 789 mutex_exit(&connp->conn_lock); 790 (*proc)(connp, mp, connp->conn_sqp); 791 return; 792 } 793 } 794 795 /* 796 * Transition from MT mode to SQUEUE mode, when the last MT thread 797 * is exiting the UDP perimeter. Move all messages from the internal 798 * udp queue to the squeue. A better way would be to move all the 799 * messages in one shot, this needs more support from the squeue framework 800 */ 801 static void 802 udp_switch_to_squeue(udp_t *udp) 803 { 804 mblk_t *mp; 805 mblk_t *mp_next; 806 sqproc_t proc; 807 uint8_t tag; 808 conn_t *connp = udp->udp_connp; 809 810 ASSERT(MUTEX_HELD(&connp->conn_lock)); 811 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 812 while (udp->udp_mphead != NULL) { 813 mp = udp->udp_mphead; 814 udp->udp_mphead = NULL; 815 udp->udp_mptail = NULL; 816 udp->udp_mpcount = 0; 817 udp->udp_mode = UDP_QUEUED_SQUEUE; 818 mutex_exit(&connp->conn_lock); 819 /* 820 * It is best not to hold any locks across the calls 821 * to squeue functions. Since we drop the lock we 822 * need to go back and check the udp_mphead once again 823 * after the squeue_fill and hence the while loop at 824 * the top of this function 825 */ 826 for (; mp != NULL; mp = mp_next) { 827 mp_next = mp->b_next; 828 proc = (sqproc_t)mp->b_prev; 829 tag = (uint8_t)((uintptr_t)mp->b_queue); 830 mp->b_next = NULL; 831 mp->b_prev = NULL; 832 mp->b_queue = NULL; 833 CONN_INC_REF(connp); 834 udp->udp_squeue_count++; 835 squeue_fill(connp->conn_sqp, mp, proc, connp, 836 tag); 837 } 838 mutex_enter(&connp->conn_lock); 839 } 840 /* 841 * udp_squeue_count of zero implies that the squeue has drained 842 * even before we arrived here (i.e. after the squeue_fill above) 843 */ 844 udp->udp_mode = (udp->udp_squeue_count != 0) ? 845 UDP_SQUEUE : UDP_MT_HOT; 846 } 847 848 #define _UDP_EXIT(connp) { \ 849 udp_t *_udp = (connp)->conn_udp; \ 850 \ 851 mutex_enter(&(connp)->conn_lock); \ 852 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 853 \ 854 switch (_udp->udp_mode) { \ 855 case UDP_MT_HOT: \ 856 UDP_READERS_DECREF(_udp); \ 857 mutex_exit(&(connp)->conn_lock); \ 858 break; \ 859 \ 860 case UDP_SQUEUE: \ 861 UDP_SQUEUE_DECREF(_udp); \ 862 if (_udp->udp_squeue_count == 0) \ 863 _udp->udp_mode = UDP_MT_HOT; \ 864 mutex_exit(&(connp)->conn_lock); \ 865 break; \ 866 \ 867 case UDP_MT_QUEUED: \ 868 /* \ 869 * If this is the last MT thread, we need to \ 870 * switch to squeue mode \ 871 */ \ 872 UDP_READERS_DECREF(_udp); \ 873 if (_udp->udp_reader_count == 0) \ 874 udp_switch_to_squeue(_udp); \ 875 mutex_exit(&(connp)->conn_lock); \ 876 break; \ 877 \ 878 case UDP_QUEUED_SQUEUE: \ 879 UDP_SQUEUE_DECREF(_udp); \ 880 /* \ 881 * Even if the udp_squeue_count drops to zero, we \ 882 * don't want to change udp_mode to UDP_MT_HOT here. \ 883 * The thread in udp_switch_to_squeue will take care \ 884 * of the transition to UDP_MT_HOT, after emptying \ 885 * any more new messages that have been enqueued in \ 886 * udp_mphead. \ 887 */ \ 888 mutex_exit(&(connp)->conn_lock); \ 889 break; \ 890 } \ 891 } 892 893 static void 894 udp_exit(conn_t *connp) 895 { 896 _UDP_EXIT(connp); 897 } 898 899 /* 900 * Return the next anonymous port in the priviledged port range for 901 * bind checking. 902 */ 903 static in_port_t 904 udp_get_next_priv_port(void) 905 { 906 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 907 908 if (next_priv_port < udp_min_anonpriv_port) { 909 next_priv_port = IPPORT_RESERVED - 1; 910 } 911 return (next_priv_port--); 912 } 913 914 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 915 /* ARGSUSED */ 916 static int 917 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 918 { 919 udp_fanout_t *udpf; 920 int i; 921 zoneid_t zoneid; 922 conn_t *connp; 923 udp_t *udp; 924 925 connp = Q_TO_CONN(q); 926 udp = connp->conn_udp; 927 928 /* Refer to comments in udp_status_report(). */ 929 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 930 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 931 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 932 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 933 return (0); 934 } 935 } 936 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 937 /* The following may work even if we cannot get a large buf. */ 938 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 939 return (0); 940 } 941 942 (void) mi_mpprintf(mp, 943 "UDP " MI_COL_HDRPAD_STR 944 /* 12345678[89ABCDEF] */ 945 " zone lport src addr dest addr port state"); 946 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 947 948 zoneid = connp->conn_zoneid; 949 950 for (i = 0; i < udp_bind_fanout_size; i++) { 951 udpf = &udp_bind_fanout[i]; 952 mutex_enter(&udpf->uf_lock); 953 954 /* Print the hash index. */ 955 udp = udpf->uf_udp; 956 if (zoneid != GLOBAL_ZONEID) { 957 /* skip to first entry in this zone; might be none */ 958 while (udp != NULL && 959 udp->udp_connp->conn_zoneid != zoneid) 960 udp = udp->udp_bind_hash; 961 } 962 if (udp != NULL) { 963 uint_t print_len, buf_len; 964 965 buf_len = mp->b_cont->b_datap->db_lim - 966 mp->b_cont->b_wptr; 967 print_len = snprintf((char *)mp->b_cont->b_wptr, 968 buf_len, "%d\n", i); 969 if (print_len < buf_len) { 970 mp->b_cont->b_wptr += print_len; 971 } else { 972 mp->b_cont->b_wptr += buf_len; 973 } 974 for (; udp != NULL; udp = udp->udp_bind_hash) { 975 if (zoneid == GLOBAL_ZONEID || 976 zoneid == udp->udp_connp->conn_zoneid) 977 udp_report_item(mp->b_cont, udp); 978 } 979 } 980 mutex_exit(&udpf->uf_lock); 981 } 982 udp_last_ndd_get_info_time = ddi_get_lbolt(); 983 return (0); 984 } 985 986 /* 987 * Hash list removal routine for udp_t structures. 988 */ 989 static void 990 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 991 { 992 udp_t *udpnext; 993 kmutex_t *lockp; 994 995 if (udp->udp_ptpbhn == NULL) 996 return; 997 998 /* 999 * Extract the lock pointer in case there are concurrent 1000 * hash_remove's for this instance. 1001 */ 1002 ASSERT(udp->udp_port != 0); 1003 if (!caller_holds_lock) { 1004 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1005 ASSERT(lockp != NULL); 1006 mutex_enter(lockp); 1007 } 1008 if (udp->udp_ptpbhn != NULL) { 1009 udpnext = udp->udp_bind_hash; 1010 if (udpnext != NULL) { 1011 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1012 udp->udp_bind_hash = NULL; 1013 } 1014 *udp->udp_ptpbhn = udpnext; 1015 udp->udp_ptpbhn = NULL; 1016 } 1017 if (!caller_holds_lock) { 1018 mutex_exit(lockp); 1019 } 1020 } 1021 1022 static void 1023 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1024 { 1025 udp_t **udpp; 1026 udp_t *udpnext; 1027 1028 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1029 if (udp->udp_ptpbhn != NULL) { 1030 udp_bind_hash_remove(udp, B_TRUE); 1031 } 1032 udpp = &uf->uf_udp; 1033 udpnext = udpp[0]; 1034 if (udpnext != NULL) { 1035 /* 1036 * If the new udp bound to the INADDR_ANY address 1037 * and the first one in the list is not bound to 1038 * INADDR_ANY we skip all entries until we find the 1039 * first one bound to INADDR_ANY. 1040 * This makes sure that applications binding to a 1041 * specific address get preference over those binding to 1042 * INADDR_ANY. 1043 */ 1044 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1045 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1046 while ((udpnext = udpp[0]) != NULL && 1047 !V6_OR_V4_INADDR_ANY( 1048 udpnext->udp_bound_v6src)) { 1049 udpp = &(udpnext->udp_bind_hash); 1050 } 1051 if (udpnext != NULL) 1052 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1053 } else { 1054 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1055 } 1056 } 1057 udp->udp_bind_hash = udpnext; 1058 udp->udp_ptpbhn = udpp; 1059 udpp[0] = udp; 1060 } 1061 1062 /* 1063 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1064 * passed to udp_wput. 1065 * It associates a port number and local address with the stream. 1066 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1067 * protocol type (IPPROTO_UDP) placed in the message following the address. 1068 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1069 * (Called as writer.) 1070 * 1071 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1072 * without setting SO_REUSEADDR. This is needed so that they 1073 * can be viewed as two independent transport protocols. 1074 * However, anonymouns ports are allocated from the same range to avoid 1075 * duplicating the udp_g_next_port_to_try. 1076 */ 1077 static void 1078 udp_bind(queue_t *q, mblk_t *mp) 1079 { 1080 sin_t *sin; 1081 sin6_t *sin6; 1082 mblk_t *mp1; 1083 in_port_t port; /* Host byte order */ 1084 in_port_t requested_port; /* Host byte order */ 1085 struct T_bind_req *tbr; 1086 int count; 1087 in6_addr_t v6src; 1088 boolean_t bind_to_req_port_only; 1089 int loopmax; 1090 udp_fanout_t *udpf; 1091 in_port_t lport; /* Network byte order */ 1092 zoneid_t zoneid; 1093 conn_t *connp; 1094 udp_t *udp; 1095 1096 connp = Q_TO_CONN(q); 1097 udp = connp->conn_udp; 1098 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1099 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1100 "udp_bind: bad req, len %u", 1101 (uint_t)(mp->b_wptr - mp->b_rptr)); 1102 udp_err_ack(q, mp, TPROTO, 0); 1103 return; 1104 } 1105 1106 if (udp->udp_state != TS_UNBND) { 1107 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1108 "udp_bind: bad state, %u", udp->udp_state); 1109 udp_err_ack(q, mp, TOUTSTATE, 0); 1110 return; 1111 } 1112 /* 1113 * Reallocate the message to make sure we have enough room for an 1114 * address and the protocol type. 1115 */ 1116 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1117 if (!mp1) { 1118 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1119 return; 1120 } 1121 1122 mp = mp1; 1123 tbr = (struct T_bind_req *)mp->b_rptr; 1124 switch (tbr->ADDR_length) { 1125 case 0: /* Request for a generic port */ 1126 tbr->ADDR_offset = sizeof (struct T_bind_req); 1127 if (udp->udp_family == AF_INET) { 1128 tbr->ADDR_length = sizeof (sin_t); 1129 sin = (sin_t *)&tbr[1]; 1130 *sin = sin_null; 1131 sin->sin_family = AF_INET; 1132 mp->b_wptr = (uchar_t *)&sin[1]; 1133 } else { 1134 ASSERT(udp->udp_family == AF_INET6); 1135 tbr->ADDR_length = sizeof (sin6_t); 1136 sin6 = (sin6_t *)&tbr[1]; 1137 *sin6 = sin6_null; 1138 sin6->sin6_family = AF_INET6; 1139 mp->b_wptr = (uchar_t *)&sin6[1]; 1140 } 1141 port = 0; 1142 break; 1143 1144 case sizeof (sin_t): /* Complete IPv4 address */ 1145 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1146 sizeof (sin_t)); 1147 if (sin == NULL || !OK_32PTR((char *)sin)) { 1148 udp_err_ack(q, mp, TSYSERR, EINVAL); 1149 return; 1150 } 1151 if (udp->udp_family != AF_INET || 1152 sin->sin_family != AF_INET) { 1153 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1154 return; 1155 } 1156 port = ntohs(sin->sin_port); 1157 break; 1158 1159 case sizeof (sin6_t): /* complete IPv6 address */ 1160 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1161 sizeof (sin6_t)); 1162 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1163 udp_err_ack(q, mp, TSYSERR, EINVAL); 1164 return; 1165 } 1166 if (udp->udp_family != AF_INET6 || 1167 sin6->sin6_family != AF_INET6) { 1168 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1169 return; 1170 } 1171 port = ntohs(sin6->sin6_port); 1172 break; 1173 1174 default: /* Invalid request */ 1175 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1176 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1177 udp_err_ack(q, mp, TBADADDR, 0); 1178 return; 1179 } 1180 1181 requested_port = port; 1182 1183 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1184 bind_to_req_port_only = B_FALSE; 1185 else /* T_BIND_REQ and requested_port != 0 */ 1186 bind_to_req_port_only = B_TRUE; 1187 1188 if (requested_port == 0) { 1189 /* 1190 * If the application passed in zero for the port number, it 1191 * doesn't care which port number we bind to. Get one in the 1192 * valid range. 1193 */ 1194 if (udp->udp_anon_priv_bind) { 1195 port = udp_get_next_priv_port(); 1196 } else { 1197 port = udp_update_next_port(udp_g_next_port_to_try, 1198 B_TRUE); 1199 } 1200 } else { 1201 /* 1202 * If the port is in the well-known privileged range, 1203 * make sure the caller was privileged. 1204 */ 1205 int i; 1206 boolean_t priv = B_FALSE; 1207 1208 if (port < udp_smallest_nonpriv_port) { 1209 priv = B_TRUE; 1210 } else { 1211 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1212 if (port == udp_g_epriv_ports[i]) { 1213 priv = B_TRUE; 1214 break; 1215 } 1216 } 1217 } 1218 1219 if (priv) { 1220 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1221 1222 if (secpolicy_net_privaddr(cr, port) != 0) { 1223 udp_err_ack(q, mp, TACCES, 0); 1224 return; 1225 } 1226 } 1227 } 1228 1229 /* 1230 * Copy the source address into our udp structure. This address 1231 * may still be zero; if so, IP will fill in the correct address 1232 * each time an outbound packet is passed to it. 1233 */ 1234 if (udp->udp_family == AF_INET) { 1235 ASSERT(sin != NULL); 1236 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1237 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1238 udp->udp_ip_snd_options_len; 1239 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1240 } else { 1241 ASSERT(sin6 != NULL); 1242 v6src = sin6->sin6_addr; 1243 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1244 udp->udp_ipversion = IPV4_VERSION; 1245 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1246 UDPH_SIZE + udp->udp_ip_snd_options_len; 1247 } else { 1248 udp->udp_ipversion = IPV6_VERSION; 1249 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1250 } 1251 } 1252 1253 /* 1254 * If udp_reuseaddr is not set, then we have to make sure that 1255 * the IP address and port number the application requested 1256 * (or we selected for the application) is not being used by 1257 * another stream. If another stream is already using the 1258 * requested IP address and port, the behavior depends on 1259 * "bind_to_req_port_only". If set the bind fails; otherwise we 1260 * search for any an unused port to bind to the the stream. 1261 * 1262 * As per the BSD semantics, as modified by the Deering multicast 1263 * changes, if udp_reuseaddr is set, then we allow multiple binds 1264 * to the same port independent of the local IP address. 1265 * 1266 * This is slightly different than in SunOS 4.X which did not 1267 * support IP multicast. Note that the change implemented by the 1268 * Deering multicast code effects all binds - not only binding 1269 * to IP multicast addresses. 1270 * 1271 * Note that when binding to port zero we ignore SO_REUSEADDR in 1272 * order to guarantee a unique port. 1273 */ 1274 1275 count = 0; 1276 if (udp->udp_anon_priv_bind) { 1277 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1278 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1279 } else { 1280 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1281 } 1282 1283 zoneid = connp->conn_zoneid; 1284 for (;;) { 1285 udp_t *udp1; 1286 boolean_t is_inaddr_any; 1287 boolean_t found_exclbind = B_FALSE; 1288 1289 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1290 /* 1291 * Walk through the list of udp streams bound to 1292 * requested port with the same IP address. 1293 */ 1294 lport = htons(port); 1295 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1296 mutex_enter(&udpf->uf_lock); 1297 for (udp1 = udpf->uf_udp; udp1 != NULL; 1298 udp1 = udp1->udp_bind_hash) { 1299 if (lport != udp1->udp_port || 1300 zoneid != udp1->udp_connp->conn_zoneid) 1301 continue; 1302 1303 /* 1304 * If UDP_EXCLBIND is set for either the bound or 1305 * binding endpoint, the semantics of bind 1306 * is changed according to the following chart. 1307 * 1308 * spec = specified address (v4 or v6) 1309 * unspec = unspecified address (v4 or v6) 1310 * A = specified addresses are different for endpoints 1311 * 1312 * bound bind to allowed? 1313 * ------------------------------------- 1314 * unspec unspec no 1315 * unspec spec no 1316 * spec unspec no 1317 * spec spec yes if A 1318 */ 1319 if (udp1->udp_exclbind || udp->udp_exclbind) { 1320 if (V6_OR_V4_INADDR_ANY( 1321 udp1->udp_bound_v6src) || 1322 is_inaddr_any || 1323 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1324 &v6src)) { 1325 found_exclbind = B_TRUE; 1326 break; 1327 } 1328 continue; 1329 } 1330 1331 /* 1332 * Check ipversion to allow IPv4 and IPv6 sockets to 1333 * have disjoint port number spaces. 1334 */ 1335 if (udp->udp_ipversion != udp1->udp_ipversion) 1336 continue; 1337 1338 /* 1339 * No difference depending on SO_REUSEADDR. 1340 * 1341 * If existing port is bound to a 1342 * non-wildcard IP address and 1343 * the requesting stream is bound to 1344 * a distinct different IP addresses 1345 * (non-wildcard, also), keep going. 1346 */ 1347 if (!is_inaddr_any && 1348 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1349 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1350 &v6src)) { 1351 continue; 1352 } 1353 break; 1354 } 1355 1356 if (!found_exclbind && 1357 (udp->udp_reuseaddr && requested_port != 0)) { 1358 break; 1359 } 1360 1361 if (udp1 == NULL) { 1362 /* 1363 * No other stream has this IP address 1364 * and port number. We can use it. 1365 */ 1366 break; 1367 } 1368 mutex_exit(&udpf->uf_lock); 1369 if (bind_to_req_port_only) { 1370 /* 1371 * We get here only when requested port 1372 * is bound (and only first of the for() 1373 * loop iteration). 1374 * 1375 * The semantics of this bind request 1376 * require it to fail so we return from 1377 * the routine (and exit the loop). 1378 * 1379 */ 1380 udp_err_ack(q, mp, TADDRBUSY, 0); 1381 return; 1382 } 1383 1384 if (udp->udp_anon_priv_bind) { 1385 port = udp_get_next_priv_port(); 1386 } else { 1387 if ((count == 0) && (requested_port != 0)) { 1388 /* 1389 * If the application wants us to find 1390 * a port, get one to start with. Set 1391 * requested_port to 0, so that we will 1392 * update udp_g_next_port_to_try below. 1393 */ 1394 port = udp_update_next_port( 1395 udp_g_next_port_to_try, B_TRUE); 1396 requested_port = 0; 1397 } else { 1398 port = udp_update_next_port(port + 1, B_FALSE); 1399 } 1400 } 1401 1402 if (++count >= loopmax) { 1403 /* 1404 * We've tried every possible port number and 1405 * there are none available, so send an error 1406 * to the user. 1407 */ 1408 udp_err_ack(q, mp, TNOADDR, 0); 1409 return; 1410 } 1411 } 1412 1413 /* 1414 * Copy the source address into our udp structure. This address 1415 * may still be zero; if so, ip will fill in the correct address 1416 * each time an outbound packet is passed to it. 1417 * If we are binding to a broadcast or multicast address udp_rput 1418 * will clear the source address when it receives the T_BIND_ACK. 1419 */ 1420 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1421 udp->udp_port = lport; 1422 /* 1423 * Now reset the the next anonymous port if the application requested 1424 * an anonymous port, or we handed out the next anonymous port. 1425 */ 1426 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1427 udp_g_next_port_to_try = port + 1; 1428 } 1429 1430 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1431 if (udp->udp_family == AF_INET) { 1432 sin->sin_port = udp->udp_port; 1433 } else { 1434 int error; 1435 1436 sin6->sin6_port = udp->udp_port; 1437 /* Rebuild the header template */ 1438 error = udp_build_hdrs(q, udp); 1439 if (error != 0) { 1440 mutex_exit(&udpf->uf_lock); 1441 udp_err_ack(q, mp, TSYSERR, error); 1442 return; 1443 } 1444 } 1445 udp->udp_state = TS_IDLE; 1446 udp_bind_hash_insert(udpf, udp); 1447 mutex_exit(&udpf->uf_lock); 1448 1449 if (cl_inet_bind) { 1450 /* 1451 * Running in cluster mode - register bind information 1452 */ 1453 if (udp->udp_ipversion == IPV4_VERSION) { 1454 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1455 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1456 (in_port_t)udp->udp_port); 1457 } else { 1458 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1459 (uint8_t *)&(udp->udp_v6src), 1460 (in_port_t)udp->udp_port); 1461 } 1462 1463 } 1464 1465 /* Pass the protocol number in the message following the address. */ 1466 *mp->b_wptr++ = IPPROTO_UDP; 1467 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1468 /* 1469 * Append a request for an IRE if udp_v6src not 1470 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1471 */ 1472 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1473 if (!mp->b_cont) { 1474 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1475 return; 1476 } 1477 mp->b_cont->b_wptr += sizeof (ire_t); 1478 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1479 } 1480 if (udp->udp_family == AF_INET6) 1481 mp = ip_bind_v6(q, mp, connp, NULL); 1482 else 1483 mp = ip_bind_v4(q, mp, connp); 1484 1485 if (mp != NULL) 1486 udp_rput_other(_RD(q), mp); 1487 else 1488 CONN_INC_REF(connp); 1489 } 1490 1491 1492 void 1493 udp_resume_bind(conn_t *connp, mblk_t *mp) 1494 { 1495 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1496 } 1497 1498 /* 1499 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1500 */ 1501 /* ARGSUSED */ 1502 static void 1503 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1504 { 1505 conn_t *connp = arg; 1506 1507 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1508 1509 udp_rput_other(connp->conn_rq, mp); 1510 1511 CONN_OPER_PENDING_DONE(connp); 1512 udp_exit(connp); 1513 } 1514 1515 /* 1516 * This routine handles each T_CONN_REQ message passed to udp. It 1517 * associates a default destination address with the stream. 1518 * 1519 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1520 * T_BIND_REQ - specifying local and remote address/port 1521 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1522 * T_OK_ACK - for the T_CONN_REQ 1523 * T_CONN_CON - to keep the TPI user happy 1524 * 1525 * The connect completes in udp_rput. 1526 * When a T_BIND_ACK is received information is extracted from the IRE 1527 * and the two appended messages are sent to the TPI user. 1528 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1529 * it to an error ack for the appropriate primitive. 1530 */ 1531 static void 1532 udp_connect(queue_t *q, mblk_t *mp) 1533 { 1534 sin6_t *sin6; 1535 sin_t *sin; 1536 struct T_conn_req *tcr; 1537 in6_addr_t v6dst; 1538 ipaddr_t v4dst; 1539 uint16_t dstport; 1540 uint32_t flowinfo; 1541 mblk_t *mp1, *mp2; 1542 udp_fanout_t *udpf; 1543 udp_t *udp, *udp1; 1544 1545 udp = Q_TO_UDP(q); 1546 1547 tcr = (struct T_conn_req *)mp->b_rptr; 1548 1549 /* A bit of sanity checking */ 1550 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1551 udp_err_ack(q, mp, TPROTO, 0); 1552 return; 1553 } 1554 /* 1555 * This UDP must have bound to a port already before doing 1556 * a connect. 1557 */ 1558 if (udp->udp_state == TS_UNBND) { 1559 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1560 "udp_connect: bad state, %u", udp->udp_state); 1561 udp_err_ack(q, mp, TOUTSTATE, 0); 1562 return; 1563 } 1564 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1565 1566 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1567 1568 if (udp->udp_state == TS_DATA_XFER) { 1569 /* Already connected - clear out state */ 1570 mutex_enter(&udpf->uf_lock); 1571 udp->udp_v6src = udp->udp_bound_v6src; 1572 udp->udp_state = TS_IDLE; 1573 mutex_exit(&udpf->uf_lock); 1574 } 1575 1576 if (tcr->OPT_length != 0) { 1577 udp_err_ack(q, mp, TBADOPT, 0); 1578 return; 1579 } 1580 1581 /* 1582 * Determine packet type based on type of address passed in 1583 * the request should contain an IPv4 or IPv6 address. 1584 * Make sure that address family matches the type of 1585 * family of the the address passed down 1586 */ 1587 switch (tcr->DEST_length) { 1588 default: 1589 udp_err_ack(q, mp, TBADADDR, 0); 1590 return; 1591 1592 case sizeof (sin_t): 1593 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1594 sizeof (sin_t)); 1595 if (sin == NULL || !OK_32PTR((char *)sin)) { 1596 udp_err_ack(q, mp, TSYSERR, EINVAL); 1597 return; 1598 } 1599 if (udp->udp_family != AF_INET || 1600 sin->sin_family != AF_INET) { 1601 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1602 return; 1603 } 1604 v4dst = sin->sin_addr.s_addr; 1605 dstport = sin->sin_port; 1606 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1607 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1608 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1609 udp->udp_ip_snd_options_len; 1610 break; 1611 1612 case sizeof (sin6_t): 1613 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1614 sizeof (sin6_t)); 1615 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1616 udp_err_ack(q, mp, TSYSERR, EINVAL); 1617 return; 1618 } 1619 if (udp->udp_family != AF_INET6 || 1620 sin6->sin6_family != AF_INET6) { 1621 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1622 return; 1623 } 1624 v6dst = sin6->sin6_addr; 1625 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1626 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1627 udp->udp_ipversion = IPV4_VERSION; 1628 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1629 UDPH_SIZE + udp->udp_ip_snd_options_len; 1630 flowinfo = 0; 1631 } else { 1632 udp->udp_ipversion = IPV6_VERSION; 1633 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1634 flowinfo = sin6->sin6_flowinfo; 1635 } 1636 dstport = sin6->sin6_port; 1637 break; 1638 } 1639 if (dstport == 0) { 1640 udp_err_ack(q, mp, TBADADDR, 0); 1641 return; 1642 } 1643 1644 /* 1645 * Create a default IP header with no IP options. 1646 */ 1647 udp->udp_dstport = dstport; 1648 if (udp->udp_ipversion == IPV4_VERSION) { 1649 /* 1650 * Interpret a zero destination to mean loopback. 1651 * Update the T_CONN_REQ (sin/sin6) since it is used to 1652 * generate the T_CONN_CON. 1653 */ 1654 if (v4dst == INADDR_ANY) { 1655 v4dst = htonl(INADDR_LOOPBACK); 1656 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1657 if (udp->udp_family == AF_INET) { 1658 sin->sin_addr.s_addr = v4dst; 1659 } else { 1660 sin6->sin6_addr = v6dst; 1661 } 1662 } 1663 udp->udp_v6dst = v6dst; 1664 udp->udp_flowinfo = 0; 1665 1666 /* 1667 * If the destination address is multicast and 1668 * an outgoing multicast interface has been set, 1669 * use the address of that interface as our 1670 * source address if no source address has been set. 1671 */ 1672 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1673 CLASSD(v4dst) && 1674 udp->udp_multicast_if_addr != INADDR_ANY) { 1675 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1676 &udp->udp_v6src); 1677 } 1678 } else { 1679 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1680 /* 1681 * Interpret a zero destination to mean loopback. 1682 * Update the T_CONN_REQ (sin/sin6) since it is used to 1683 * generate the T_CONN_CON. 1684 */ 1685 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1686 v6dst = ipv6_loopback; 1687 sin6->sin6_addr = v6dst; 1688 } 1689 udp->udp_v6dst = v6dst; 1690 udp->udp_flowinfo = flowinfo; 1691 /* 1692 * If the destination address is multicast and 1693 * an outgoing multicast interface has been set, 1694 * then the ip bind logic will pick the correct source 1695 * address (i.e. matching the outgoing multicast interface). 1696 */ 1697 } 1698 1699 /* 1700 * Verify that the src/port/dst/port is unique for all 1701 * connections in TS_DATA_XFER 1702 */ 1703 mutex_enter(&udpf->uf_lock); 1704 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1705 if (udp1->udp_state != TS_DATA_XFER) 1706 continue; 1707 if (udp->udp_port != udp1->udp_port || 1708 udp->udp_ipversion != udp1->udp_ipversion || 1709 dstport != udp1->udp_dstport || 1710 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1711 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1712 continue; 1713 mutex_exit(&udpf->uf_lock); 1714 udp_err_ack(q, mp, TBADADDR, 0); 1715 return; 1716 } 1717 udp->udp_state = TS_DATA_XFER; 1718 mutex_exit(&udpf->uf_lock); 1719 1720 /* 1721 * Send down bind to IP to verify that there is a route 1722 * and to determine the source address. 1723 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1724 */ 1725 if (udp->udp_family == AF_INET) 1726 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1727 else 1728 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1729 if (mp1 == NULL) { 1730 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1731 bind_failed: 1732 mutex_enter(&udpf->uf_lock); 1733 udp->udp_state = TS_IDLE; 1734 mutex_exit(&udpf->uf_lock); 1735 return; 1736 } 1737 1738 /* 1739 * We also have to send a connection confirmation to 1740 * keep TLI happy. Prepare it for udp_rput. 1741 */ 1742 if (udp->udp_family == AF_INET) 1743 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1744 sizeof (*sin), NULL, 0); 1745 else 1746 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1747 sizeof (*sin6), NULL, 0); 1748 if (mp2 == NULL) { 1749 freemsg(mp1); 1750 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1751 goto bind_failed; 1752 } 1753 1754 mp = mi_tpi_ok_ack_alloc(mp); 1755 if (mp == NULL) { 1756 /* Unable to reuse the T_CONN_REQ for the ack. */ 1757 freemsg(mp2); 1758 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1759 goto bind_failed; 1760 } 1761 1762 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1763 linkb(mp1, mp); 1764 linkb(mp1, mp2); 1765 1766 if (udp->udp_family == AF_INET) 1767 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1768 else 1769 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1770 1771 if (mp1 != NULL) 1772 udp_rput_other(_RD(q), mp1); 1773 else 1774 CONN_INC_REF(udp->udp_connp); 1775 } 1776 1777 static int 1778 udp_close(queue_t *q) 1779 { 1780 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1781 udp_t *udp; 1782 queue_t *ip_rq = RD(UDP_WR(q)); 1783 1784 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1785 udp = connp->conn_udp; 1786 1787 ip_quiesce_conn(connp); 1788 /* 1789 * Disable read-side synchronous stream 1790 * interface and drain any queued data. 1791 */ 1792 udp_rcv_drain(q, udp, B_TRUE); 1793 ASSERT(!udp->udp_direct_sockfs); 1794 1795 qprocsoff(q); 1796 1797 /* restore IP module's high and low water marks to default values */ 1798 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1799 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1800 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1801 1802 ASSERT(udp->udp_rcv_cnt == 0); 1803 ASSERT(udp->udp_rcv_msgcnt == 0); 1804 ASSERT(udp->udp_rcv_list_head == NULL); 1805 ASSERT(udp->udp_rcv_list_tail == NULL); 1806 1807 /* connp is now single threaded. */ 1808 udp_close_free(connp); 1809 /* 1810 * Restore connp as an IP endpoint. We don't need 1811 * any locks since we are now single threaded 1812 */ 1813 connp->conn_flags &= ~IPCL_UDP; 1814 connp->conn_state_flags &= 1815 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1816 return (0); 1817 } 1818 1819 /* 1820 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1821 */ 1822 void 1823 udp_quiesce_conn(conn_t *connp) 1824 { 1825 udp_t *udp = connp->conn_udp; 1826 1827 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1828 /* 1829 * Running in cluster mode - register unbind information 1830 */ 1831 if (udp->udp_ipversion == IPV4_VERSION) { 1832 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1833 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1834 (in_port_t)udp->udp_port); 1835 } else { 1836 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1837 (uint8_t *)(&(udp->udp_v6src)), 1838 (in_port_t)udp->udp_port); 1839 } 1840 } 1841 1842 udp_bind_hash_remove(udp, B_FALSE); 1843 1844 mutex_enter(&connp->conn_lock); 1845 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1846 udp->udp_mode != UDP_MT_HOT) { 1847 cv_wait(&connp->conn_cv, &connp->conn_lock); 1848 } 1849 mutex_exit(&connp->conn_lock); 1850 } 1851 1852 void 1853 udp_close_free(conn_t *connp) 1854 { 1855 udp_t *udp = connp->conn_udp; 1856 1857 /* If there are any options associated with the stream, free them. */ 1858 if (udp->udp_ip_snd_options) { 1859 mi_free((char *)udp->udp_ip_snd_options); 1860 udp->udp_ip_snd_options = NULL; 1861 } 1862 1863 if (udp->udp_ip_rcv_options) { 1864 mi_free((char *)udp->udp_ip_rcv_options); 1865 udp->udp_ip_rcv_options = NULL; 1866 } 1867 1868 /* Free memory associated with sticky options */ 1869 if (udp->udp_sticky_hdrs_len != 0) { 1870 kmem_free(udp->udp_sticky_hdrs, 1871 udp->udp_sticky_hdrs_len); 1872 udp->udp_sticky_hdrs = NULL; 1873 udp->udp_sticky_hdrs_len = 0; 1874 } 1875 1876 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1877 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1878 udp->udp_sticky_ipp.ipp_hopoptslen); 1879 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1880 } 1881 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1882 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1883 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1884 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1885 } 1886 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1887 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1888 udp->udp_sticky_ipp.ipp_rthdrlen); 1889 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1890 } 1891 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1892 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1893 udp->udp_sticky_ipp.ipp_dstoptslen); 1894 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1895 } 1896 udp->udp_sticky_ipp.ipp_fields &= 1897 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1898 1899 udp->udp_connp = NULL; 1900 connp->conn_udp = NULL; 1901 kmem_cache_free(udp_cache, udp); 1902 } 1903 1904 /* 1905 * This routine handles each T_DISCON_REQ message passed to udp 1906 * as an indicating that UDP is no longer connected. This results 1907 * in sending a T_BIND_REQ to IP to restore the binding to just 1908 * the local address/port. 1909 * 1910 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1911 * T_BIND_REQ - specifying just the local address/port 1912 * T_OK_ACK - for the T_DISCON_REQ 1913 * 1914 * The disconnect completes in udp_rput. 1915 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1916 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1917 * it to an error ack for the appropriate primitive. 1918 */ 1919 static void 1920 udp_disconnect(queue_t *q, mblk_t *mp) 1921 { 1922 udp_t *udp = Q_TO_UDP(q); 1923 mblk_t *mp1; 1924 udp_fanout_t *udpf; 1925 1926 if (udp->udp_state != TS_DATA_XFER) { 1927 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1928 "udp_disconnect: bad state, %u", udp->udp_state); 1929 udp_err_ack(q, mp, TOUTSTATE, 0); 1930 return; 1931 } 1932 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1933 mutex_enter(&udpf->uf_lock); 1934 udp->udp_v6src = udp->udp_bound_v6src; 1935 udp->udp_state = TS_IDLE; 1936 mutex_exit(&udpf->uf_lock); 1937 1938 /* 1939 * Send down bind to IP to remove the full binding and revert 1940 * to the local address binding. 1941 */ 1942 if (udp->udp_family == AF_INET) 1943 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1944 else 1945 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1946 if (mp1 == NULL) { 1947 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1948 return; 1949 } 1950 mp = mi_tpi_ok_ack_alloc(mp); 1951 if (mp == NULL) { 1952 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1953 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1954 return; 1955 } 1956 1957 if (udp->udp_family == AF_INET6) { 1958 int error; 1959 1960 /* Rebuild the header template */ 1961 error = udp_build_hdrs(q, udp); 1962 if (error != 0) { 1963 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1964 freemsg(mp1); 1965 return; 1966 } 1967 } 1968 mutex_enter(&udpf->uf_lock); 1969 udp->udp_discon_pending = 1; 1970 mutex_exit(&udpf->uf_lock); 1971 1972 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1973 linkb(mp1, mp); 1974 1975 if (udp->udp_family == AF_INET6) 1976 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1977 else 1978 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1979 1980 if (mp1 != NULL) 1981 udp_rput_other(_RD(q), mp1); 1982 else 1983 CONN_INC_REF(udp->udp_connp); 1984 } 1985 1986 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1987 static void 1988 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1989 { 1990 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 1991 putnext(UDP_RD(q), mp); 1992 } 1993 1994 /* Shorthand to generate and send TPI error acks to our client */ 1995 static void 1996 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 1997 int sys_error) 1998 { 1999 struct T_error_ack *teackp; 2000 2001 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2002 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2003 teackp = (struct T_error_ack *)mp->b_rptr; 2004 teackp->ERROR_prim = primitive; 2005 teackp->TLI_error = t_error; 2006 teackp->UNIX_error = sys_error; 2007 putnext(UDP_RD(q), mp); 2008 } 2009 } 2010 2011 /*ARGSUSED*/ 2012 static int 2013 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2014 { 2015 int i; 2016 2017 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2018 if (udp_g_epriv_ports[i] != 0) 2019 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2020 } 2021 return (0); 2022 } 2023 2024 /* ARGSUSED */ 2025 static int 2026 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2027 cred_t *cr) 2028 { 2029 long new_value; 2030 int i; 2031 2032 /* 2033 * Fail the request if the new value does not lie within the 2034 * port number limits. 2035 */ 2036 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2037 new_value <= 0 || new_value >= 65536) { 2038 return (EINVAL); 2039 } 2040 2041 /* Check if the value is already in the list */ 2042 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2043 if (new_value == udp_g_epriv_ports[i]) { 2044 return (EEXIST); 2045 } 2046 } 2047 /* Find an empty slot */ 2048 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2049 if (udp_g_epriv_ports[i] == 0) 2050 break; 2051 } 2052 if (i == udp_g_num_epriv_ports) { 2053 return (EOVERFLOW); 2054 } 2055 2056 /* Set the new value */ 2057 udp_g_epriv_ports[i] = (in_port_t)new_value; 2058 return (0); 2059 } 2060 2061 /* ARGSUSED */ 2062 static int 2063 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2064 cred_t *cr) 2065 { 2066 long new_value; 2067 int i; 2068 2069 /* 2070 * Fail the request if the new value does not lie within the 2071 * port number limits. 2072 */ 2073 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2074 new_value <= 0 || new_value >= 65536) { 2075 return (EINVAL); 2076 } 2077 2078 /* Check that the value is already in the list */ 2079 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2080 if (udp_g_epriv_ports[i] == new_value) 2081 break; 2082 } 2083 if (i == udp_g_num_epriv_ports) { 2084 return (ESRCH); 2085 } 2086 2087 /* Clear the value */ 2088 udp_g_epriv_ports[i] = 0; 2089 return (0); 2090 } 2091 2092 /* At minimum we need 4 bytes of UDP header */ 2093 #define ICMP_MIN_UDP_HDR 4 2094 2095 /* 2096 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2097 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2098 * Assumes that IP has pulled up everything up to and including the ICMP header. 2099 * An M_CTL could potentially come here from some other module (i.e. if UDP 2100 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2101 * does not have enough ICMP information , following STREAMS conventions, 2102 * we send it upstream assuming it is an M_CTL we don't understand. 2103 */ 2104 static void 2105 udp_icmp_error(queue_t *q, mblk_t *mp) 2106 { 2107 icmph_t *icmph; 2108 ipha_t *ipha; 2109 int iph_hdr_length; 2110 udpha_t *udpha; 2111 sin_t sin; 2112 sin6_t sin6; 2113 mblk_t *mp1; 2114 int error = 0; 2115 size_t mp_size = MBLKL(mp); 2116 udp_t *udp = Q_TO_UDP(q); 2117 2118 /* 2119 * Assume IP provides aligned packets - otherwise toss 2120 */ 2121 if (!OK_32PTR(mp->b_rptr)) { 2122 freemsg(mp); 2123 return; 2124 } 2125 2126 /* 2127 * Verify that we have a complete IP header and the application has 2128 * asked for errors. If not, send it upstream. 2129 */ 2130 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2131 noticmpv4: 2132 putnext(UDP_RD(q), mp); 2133 return; 2134 } 2135 2136 ipha = (ipha_t *)mp->b_rptr; 2137 /* 2138 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2139 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2140 */ 2141 switch (IPH_HDR_VERSION(ipha)) { 2142 case IPV6_VERSION: 2143 udp_icmp_error_ipv6(q, mp); 2144 return; 2145 case IPV4_VERSION: 2146 break; 2147 default: 2148 goto noticmpv4; 2149 } 2150 2151 /* Skip past the outer IP and ICMP headers */ 2152 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2153 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2154 /* 2155 * If we don't have the correct outer IP header length or if the ULP 2156 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2157 * send the packet upstream. 2158 */ 2159 if (iph_hdr_length < sizeof (ipha_t) || 2160 ipha->ipha_protocol != IPPROTO_ICMP || 2161 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2162 goto noticmpv4; 2163 } 2164 ipha = (ipha_t *)&icmph[1]; 2165 2166 /* Skip past the inner IP and find the ULP header */ 2167 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2168 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2169 /* 2170 * If we don't have the correct inner IP header length or if the ULP 2171 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2172 * bytes of UDP header, send it upstream. 2173 */ 2174 if (iph_hdr_length < sizeof (ipha_t) || 2175 ipha->ipha_protocol != IPPROTO_UDP || 2176 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2177 goto noticmpv4; 2178 } 2179 2180 switch (icmph->icmph_type) { 2181 case ICMP_DEST_UNREACHABLE: 2182 switch (icmph->icmph_code) { 2183 case ICMP_FRAGMENTATION_NEEDED: 2184 /* 2185 * IP has already adjusted the path MTU. 2186 * XXX Somehow pass MTU indication to application? 2187 */ 2188 break; 2189 case ICMP_PORT_UNREACHABLE: 2190 case ICMP_PROTOCOL_UNREACHABLE: 2191 error = ECONNREFUSED; 2192 break; 2193 default: 2194 /* Transient errors */ 2195 break; 2196 } 2197 break; 2198 default: 2199 /* Transient errors */ 2200 break; 2201 } 2202 if (error == 0) { 2203 freemsg(mp); 2204 return; 2205 } 2206 2207 switch (udp->udp_family) { 2208 case AF_INET: 2209 sin = sin_null; 2210 sin.sin_family = AF_INET; 2211 sin.sin_addr.s_addr = ipha->ipha_dst; 2212 sin.sin_port = udpha->uha_dst_port; 2213 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2214 error); 2215 break; 2216 case AF_INET6: 2217 sin6 = sin6_null; 2218 sin6.sin6_family = AF_INET6; 2219 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2220 sin6.sin6_port = udpha->uha_dst_port; 2221 2222 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2223 NULL, 0, error); 2224 break; 2225 } 2226 if (mp1) 2227 putnext(UDP_RD(q), mp1); 2228 freemsg(mp); 2229 } 2230 2231 /* 2232 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2233 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2234 * Assumes that IP has pulled up all the extension headers as well as the 2235 * ICMPv6 header. 2236 * An M_CTL could potentially come here from some other module (i.e. if UDP 2237 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2238 * does not have enough ICMP information , following STREAMS conventions, 2239 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2240 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2241 * field (when cast to ipha_t in udp_icmp_error). 2242 */ 2243 static void 2244 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2245 { 2246 icmp6_t *icmp6; 2247 ip6_t *ip6h, *outer_ip6h; 2248 uint16_t hdr_length; 2249 uint8_t *nexthdrp; 2250 udpha_t *udpha; 2251 sin6_t sin6; 2252 mblk_t *mp1; 2253 int error = 0; 2254 size_t mp_size = MBLKL(mp); 2255 udp_t *udp = Q_TO_UDP(q); 2256 2257 /* 2258 * Verify that we have a complete IP header. If not, send it upstream. 2259 */ 2260 if (mp_size < sizeof (ip6_t)) { 2261 noticmpv6: 2262 putnext(UDP_RD(q), mp); 2263 return; 2264 } 2265 2266 outer_ip6h = (ip6_t *)mp->b_rptr; 2267 /* 2268 * Verify this is an ICMPV6 packet, else send it upstream 2269 */ 2270 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2271 hdr_length = IPV6_HDR_LEN; 2272 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2273 &nexthdrp) || 2274 *nexthdrp != IPPROTO_ICMPV6) { 2275 goto noticmpv6; 2276 } 2277 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2278 ip6h = (ip6_t *)&icmp6[1]; 2279 /* 2280 * Verify we have a complete ICMP and inner IP header. 2281 */ 2282 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2283 goto noticmpv6; 2284 2285 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2286 goto noticmpv6; 2287 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2288 /* 2289 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2290 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2291 * packet upstream. 2292 */ 2293 if ((*nexthdrp != IPPROTO_UDP) || 2294 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2295 goto noticmpv6; 2296 } 2297 2298 switch (icmp6->icmp6_type) { 2299 case ICMP6_DST_UNREACH: 2300 switch (icmp6->icmp6_code) { 2301 case ICMP6_DST_UNREACH_NOPORT: 2302 error = ECONNREFUSED; 2303 break; 2304 case ICMP6_DST_UNREACH_ADMIN: 2305 case ICMP6_DST_UNREACH_NOROUTE: 2306 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2307 case ICMP6_DST_UNREACH_ADDR: 2308 /* Transient errors */ 2309 break; 2310 default: 2311 break; 2312 } 2313 break; 2314 case ICMP6_PACKET_TOO_BIG: { 2315 struct T_unitdata_ind *tudi; 2316 struct T_opthdr *toh; 2317 size_t udi_size; 2318 mblk_t *newmp; 2319 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2320 sizeof (struct ip6_mtuinfo); 2321 sin6_t *sin6; 2322 struct ip6_mtuinfo *mtuinfo; 2323 2324 /* 2325 * If the application has requested to receive path mtu 2326 * information, send up an empty message containing an 2327 * IPV6_PATHMTU ancillary data item. 2328 */ 2329 if (!udp->udp_ipv6_recvpathmtu) 2330 break; 2331 2332 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2333 opt_length; 2334 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2335 BUMP_MIB(&udp_mib, udpInErrors); 2336 break; 2337 } 2338 2339 /* 2340 * newmp->b_cont is left to NULL on purpose. This is an 2341 * empty message containing only ancillary data. 2342 */ 2343 newmp->b_datap->db_type = M_PROTO; 2344 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2345 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2346 tudi->PRIM_type = T_UNITDATA_IND; 2347 tudi->SRC_length = sizeof (sin6_t); 2348 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2349 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2350 tudi->OPT_length = opt_length; 2351 2352 sin6 = (sin6_t *)&tudi[1]; 2353 bzero(sin6, sizeof (sin6_t)); 2354 sin6->sin6_family = AF_INET6; 2355 sin6->sin6_addr = udp->udp_v6dst; 2356 2357 toh = (struct T_opthdr *)&sin6[1]; 2358 toh->level = IPPROTO_IPV6; 2359 toh->name = IPV6_PATHMTU; 2360 toh->len = opt_length; 2361 toh->status = 0; 2362 2363 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2364 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2365 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2366 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2367 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2368 /* 2369 * We've consumed everything we need from the original 2370 * message. Free it, then send our empty message. 2371 */ 2372 freemsg(mp); 2373 putnext(UDP_RD(q), newmp); 2374 return; 2375 } 2376 case ICMP6_TIME_EXCEEDED: 2377 /* Transient errors */ 2378 break; 2379 case ICMP6_PARAM_PROB: 2380 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2381 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2382 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2383 (uchar_t *)nexthdrp) { 2384 error = ECONNREFUSED; 2385 break; 2386 } 2387 break; 2388 } 2389 if (error == 0) { 2390 freemsg(mp); 2391 return; 2392 } 2393 2394 sin6 = sin6_null; 2395 sin6.sin6_family = AF_INET6; 2396 sin6.sin6_addr = ip6h->ip6_dst; 2397 sin6.sin6_port = udpha->uha_dst_port; 2398 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2399 2400 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2401 error); 2402 if (mp1) 2403 putnext(UDP_RD(q), mp1); 2404 freemsg(mp); 2405 } 2406 2407 /* 2408 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2409 * The local address is filled in if endpoint is bound. The remote address 2410 * is filled in if remote address has been precified ("connected endpoint") 2411 * (The concept of connected CLTS sockets is alien to published TPI 2412 * but we support it anyway). 2413 */ 2414 static void 2415 udp_addr_req(queue_t *q, mblk_t *mp) 2416 { 2417 sin_t *sin; 2418 sin6_t *sin6; 2419 mblk_t *ackmp; 2420 struct T_addr_ack *taa; 2421 udp_t *udp = Q_TO_UDP(q); 2422 2423 /* Make it large enough for worst case */ 2424 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2425 2 * sizeof (sin6_t), 1); 2426 if (ackmp == NULL) { 2427 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2428 return; 2429 } 2430 taa = (struct T_addr_ack *)ackmp->b_rptr; 2431 2432 bzero(taa, sizeof (struct T_addr_ack)); 2433 ackmp->b_wptr = (uchar_t *)&taa[1]; 2434 2435 taa->PRIM_type = T_ADDR_ACK; 2436 ackmp->b_datap->db_type = M_PCPROTO; 2437 /* 2438 * Note: Following code assumes 32 bit alignment of basic 2439 * data structures like sin_t and struct T_addr_ack. 2440 */ 2441 if (udp->udp_state != TS_UNBND) { 2442 /* 2443 * Fill in local address first 2444 */ 2445 taa->LOCADDR_offset = sizeof (*taa); 2446 if (udp->udp_family == AF_INET) { 2447 taa->LOCADDR_length = sizeof (sin_t); 2448 sin = (sin_t *)&taa[1]; 2449 /* Fill zeroes and then initialize non-zero fields */ 2450 *sin = sin_null; 2451 sin->sin_family = AF_INET; 2452 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2453 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2454 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2455 sin->sin_addr.s_addr); 2456 } else { 2457 /* 2458 * INADDR_ANY 2459 * udp_v6src is not set, we might be bound to 2460 * broadcast/multicast. Use udp_bound_v6src as 2461 * local address instead (that could 2462 * also still be INADDR_ANY) 2463 */ 2464 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2465 sin->sin_addr.s_addr); 2466 } 2467 sin->sin_port = udp->udp_port; 2468 ackmp->b_wptr = (uchar_t *)&sin[1]; 2469 if (udp->udp_state == TS_DATA_XFER) { 2470 /* 2471 * connected, fill remote address too 2472 */ 2473 taa->REMADDR_length = sizeof (sin_t); 2474 /* assumed 32-bit alignment */ 2475 taa->REMADDR_offset = taa->LOCADDR_offset + 2476 taa->LOCADDR_length; 2477 2478 sin = (sin_t *)(ackmp->b_rptr + 2479 taa->REMADDR_offset); 2480 /* initialize */ 2481 *sin = sin_null; 2482 sin->sin_family = AF_INET; 2483 sin->sin_addr.s_addr = 2484 V4_PART_OF_V6(udp->udp_v6dst); 2485 sin->sin_port = udp->udp_dstport; 2486 ackmp->b_wptr = (uchar_t *)&sin[1]; 2487 } 2488 } else { 2489 taa->LOCADDR_length = sizeof (sin6_t); 2490 sin6 = (sin6_t *)&taa[1]; 2491 /* Fill zeroes and then initialize non-zero fields */ 2492 *sin6 = sin6_null; 2493 sin6->sin6_family = AF_INET6; 2494 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2495 sin6->sin6_addr = udp->udp_v6src; 2496 } else { 2497 /* 2498 * UNSPECIFIED 2499 * udp_v6src is not set, we might be bound to 2500 * broadcast/multicast. Use udp_bound_v6src as 2501 * local address instead (that could 2502 * also still be UNSPECIFIED) 2503 */ 2504 sin6->sin6_addr = 2505 udp->udp_bound_v6src; 2506 } 2507 sin6->sin6_port = udp->udp_port; 2508 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2509 if (udp->udp_state == TS_DATA_XFER) { 2510 /* 2511 * connected, fill remote address too 2512 */ 2513 taa->REMADDR_length = sizeof (sin6_t); 2514 /* assumed 32-bit alignment */ 2515 taa->REMADDR_offset = taa->LOCADDR_offset + 2516 taa->LOCADDR_length; 2517 2518 sin6 = (sin6_t *)(ackmp->b_rptr + 2519 taa->REMADDR_offset); 2520 /* initialize */ 2521 *sin6 = sin6_null; 2522 sin6->sin6_family = AF_INET6; 2523 sin6->sin6_addr = udp->udp_v6dst; 2524 sin6->sin6_port = udp->udp_dstport; 2525 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2526 } 2527 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2528 } 2529 } 2530 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2531 putnext(UDP_RD(q), ackmp); 2532 } 2533 2534 static void 2535 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2536 { 2537 if (udp->udp_family == AF_INET) { 2538 *tap = udp_g_t_info_ack_ipv4; 2539 } else { 2540 *tap = udp_g_t_info_ack_ipv6; 2541 } 2542 tap->CURRENT_state = udp->udp_state; 2543 tap->OPT_size = udp_max_optsize; 2544 } 2545 2546 /* 2547 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2548 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2549 * udp_g_t_info_ack. The current state of the stream is copied from 2550 * udp_state. 2551 */ 2552 static void 2553 udp_capability_req(queue_t *q, mblk_t *mp) 2554 { 2555 t_uscalar_t cap_bits1; 2556 struct T_capability_ack *tcap; 2557 udp_t *udp = Q_TO_UDP(q); 2558 2559 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2560 2561 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2562 mp->b_datap->db_type, T_CAPABILITY_ACK); 2563 if (!mp) 2564 return; 2565 2566 tcap = (struct T_capability_ack *)mp->b_rptr; 2567 tcap->CAP_bits1 = 0; 2568 2569 if (cap_bits1 & TC1_INFO) { 2570 udp_copy_info(&tcap->INFO_ack, udp); 2571 tcap->CAP_bits1 |= TC1_INFO; 2572 } 2573 2574 putnext(UDP_RD(q), mp); 2575 } 2576 2577 /* 2578 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2579 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2580 * The current state of the stream is copied from udp_state. 2581 */ 2582 static void 2583 udp_info_req(queue_t *q, mblk_t *mp) 2584 { 2585 udp_t *udp = Q_TO_UDP(q); 2586 2587 /* Create a T_INFO_ACK message. */ 2588 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2589 T_INFO_ACK); 2590 if (!mp) 2591 return; 2592 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2593 putnext(UDP_RD(q), mp); 2594 } 2595 2596 /* 2597 * IP recognizes seven kinds of bind requests: 2598 * 2599 * - A zero-length address binds only to the protocol number. 2600 * 2601 * - A 4-byte address is treated as a request to 2602 * validate that the address is a valid local IPv4 2603 * address, appropriate for an application to bind to. 2604 * IP does the verification, but does not make any note 2605 * of the address at this time. 2606 * 2607 * - A 16-byte address contains is treated as a request 2608 * to validate a local IPv6 address, as the 4-byte 2609 * address case above. 2610 * 2611 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2612 * use it for the inbound fanout of packets. 2613 * 2614 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2615 * use it for the inbound fanout of packets. 2616 * 2617 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2618 * information consisting of local and remote addresses 2619 * and ports. In this case, the addresses are both 2620 * validated as appropriate for this operation, and, if 2621 * so, the information is retained for use in the 2622 * inbound fanout. 2623 * 2624 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2625 * fanout information, like the 12-byte case above. 2626 * 2627 * IP will also fill in the IRE request mblk with information 2628 * regarding our peer. In all cases, we notify IP of our protocol 2629 * type by appending a single protocol byte to the bind request. 2630 */ 2631 static mblk_t * 2632 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2633 { 2634 char *cp; 2635 mblk_t *mp; 2636 struct T_bind_req *tbr; 2637 ipa_conn_t *ac; 2638 ipa6_conn_t *ac6; 2639 sin_t *sin; 2640 sin6_t *sin6; 2641 2642 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2643 2644 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2645 if (!mp) 2646 return (mp); 2647 mp->b_datap->db_type = M_PROTO; 2648 tbr = (struct T_bind_req *)mp->b_rptr; 2649 tbr->PRIM_type = bind_prim; 2650 tbr->ADDR_offset = sizeof (*tbr); 2651 tbr->CONIND_number = 0; 2652 tbr->ADDR_length = addr_length; 2653 cp = (char *)&tbr[1]; 2654 switch (addr_length) { 2655 case sizeof (ipa_conn_t): 2656 ASSERT(udp->udp_family == AF_INET); 2657 /* Append a request for an IRE */ 2658 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2659 if (!mp->b_cont) { 2660 freemsg(mp); 2661 return (NULL); 2662 } 2663 mp->b_cont->b_wptr += sizeof (ire_t); 2664 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2665 2666 /* cp known to be 32 bit aligned */ 2667 ac = (ipa_conn_t *)cp; 2668 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2669 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2670 ac->ac_fport = udp->udp_dstport; 2671 ac->ac_lport = udp->udp_port; 2672 break; 2673 2674 case sizeof (ipa6_conn_t): 2675 ASSERT(udp->udp_family == AF_INET6); 2676 /* Append a request for an IRE */ 2677 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2678 if (!mp->b_cont) { 2679 freemsg(mp); 2680 return (NULL); 2681 } 2682 mp->b_cont->b_wptr += sizeof (ire_t); 2683 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2684 2685 /* cp known to be 32 bit aligned */ 2686 ac6 = (ipa6_conn_t *)cp; 2687 ac6->ac6_laddr = udp->udp_v6src; 2688 ac6->ac6_faddr = udp->udp_v6dst; 2689 ac6->ac6_fport = udp->udp_dstport; 2690 ac6->ac6_lport = udp->udp_port; 2691 break; 2692 2693 case sizeof (sin_t): 2694 ASSERT(udp->udp_family == AF_INET); 2695 /* Append a request for an IRE */ 2696 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2697 if (!mp->b_cont) { 2698 freemsg(mp); 2699 return (NULL); 2700 } 2701 mp->b_cont->b_wptr += sizeof (ire_t); 2702 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2703 2704 sin = (sin_t *)cp; 2705 *sin = sin_null; 2706 sin->sin_family = AF_INET; 2707 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2708 sin->sin_port = udp->udp_port; 2709 break; 2710 2711 case sizeof (sin6_t): 2712 ASSERT(udp->udp_family == AF_INET6); 2713 /* Append a request for an IRE */ 2714 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2715 if (!mp->b_cont) { 2716 freemsg(mp); 2717 return (NULL); 2718 } 2719 mp->b_cont->b_wptr += sizeof (ire_t); 2720 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2721 2722 sin6 = (sin6_t *)cp; 2723 *sin6 = sin6_null; 2724 sin6->sin6_family = AF_INET6; 2725 sin6->sin6_addr = udp->udp_bound_v6src; 2726 sin6->sin6_port = udp->udp_port; 2727 break; 2728 } 2729 /* Add protocol number to end */ 2730 cp[addr_length] = (char)IPPROTO_UDP; 2731 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2732 return (mp); 2733 } 2734 2735 /* 2736 * This is the open routine for udp. It allocates a udp_t structure for 2737 * the stream and, on the first open of the module, creates an ND table. 2738 */ 2739 /* ARGSUSED */ 2740 static int 2741 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2742 { 2743 int err; 2744 udp_t *udp; 2745 conn_t *connp; 2746 zoneid_t zoneid = getzoneid(); 2747 queue_t *ip_wq; 2748 char *name; 2749 2750 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2751 2752 /* If the stream is already open, return immediately. */ 2753 if (q->q_ptr != NULL) 2754 return (0); 2755 2756 /* If this is not a push of udp as a module, fail. */ 2757 if (sflag != MODOPEN) 2758 return (EINVAL); 2759 2760 q->q_hiwat = udp_recv_hiwat; 2761 WR(q)->q_hiwat = udp_xmit_hiwat; 2762 WR(q)->q_lowat = udp_xmit_lowat; 2763 2764 /* Insert ourselves in the stream since we're about to walk q_next */ 2765 qprocson(q); 2766 2767 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2768 bzero(udp, sizeof (*udp)); 2769 2770 /* 2771 * UDP is supported only as a module and it has to be pushed directly 2772 * above the device instance of IP. If UDP is pushed anywhere else 2773 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2774 * sake of MIB browsers and fail everything else. 2775 */ 2776 ip_wq = WR(q)->q_next; 2777 if (ip_wq->q_next != NULL || 2778 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2779 strcmp(name, IP_MOD_NAME) != 0 || 2780 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2781 /* Support just SNMP for MIB browsers */ 2782 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2783 connp->conn_rq = q; 2784 connp->conn_wq = WR(q); 2785 connp->conn_flags |= IPCL_UDPMOD; 2786 connp->conn_cred = credp; 2787 connp->conn_zoneid = zoneid; 2788 connp->conn_udp = udp; 2789 udp->udp_connp = connp; 2790 q->q_ptr = WR(q)->q_ptr = connp; 2791 crhold(credp); 2792 q->q_qinfo = &udp_snmp_rinit; 2793 WR(q)->q_qinfo = &udp_snmp_winit; 2794 return (0); 2795 } 2796 2797 /* 2798 * Initialize the udp_t structure for this stream. 2799 */ 2800 q = RD(ip_wq); 2801 connp = Q_TO_CONN(q); 2802 mutex_enter(&connp->conn_lock); 2803 connp->conn_proto = IPPROTO_UDP; 2804 connp->conn_flags |= IPCL_UDP; 2805 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2806 connp->conn_udp = udp; 2807 2808 /* Set the initial state of the stream and the privilege status. */ 2809 udp->udp_connp = connp; 2810 udp->udp_state = TS_UNBND; 2811 udp->udp_mode = UDP_MT_HOT; 2812 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2813 udp->udp_family = AF_INET6; 2814 udp->udp_ipversion = IPV6_VERSION; 2815 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2816 udp->udp_ttl = udp_ipv6_hoplimit; 2817 connp->conn_af_isv6 = B_TRUE; 2818 connp->conn_flags |= IPCL_ISV6; 2819 } else { 2820 udp->udp_family = AF_INET; 2821 udp->udp_ipversion = IPV4_VERSION; 2822 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2823 udp->udp_ttl = udp_ipv4_ttl; 2824 connp->conn_af_isv6 = B_FALSE; 2825 connp->conn_flags &= ~IPCL_ISV6; 2826 } 2827 2828 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2829 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2830 connp->conn_zoneid = zoneid; 2831 2832 if (connp->conn_flags & IPCL_SOCKET) { 2833 udp->udp_issocket = B_TRUE; 2834 udp->udp_direct_sockfs = B_TRUE; 2835 } 2836 mutex_exit(&connp->conn_lock); 2837 2838 /* 2839 * The transmit hiwat/lowat is only looked at on IP's queue. 2840 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2841 * getsockopts. 2842 */ 2843 q->q_hiwat = udp_recv_hiwat; 2844 WR(q)->q_hiwat = udp_xmit_hiwat; 2845 WR(q)->q_lowat = udp_xmit_lowat; 2846 2847 if (udp->udp_family == AF_INET6) { 2848 /* Build initial header template for transmit */ 2849 if ((err = udp_build_hdrs(q, udp)) != 0) { 2850 qprocsoff(UDP_RD(q)); 2851 udp->udp_connp = NULL; 2852 connp->conn_udp = NULL; 2853 kmem_cache_free(udp_cache, udp); 2854 return (err); 2855 } 2856 } 2857 2858 /* Set the Stream head write offset and high watermark. */ 2859 (void) mi_set_sth_wroff(UDP_RD(q), 2860 udp->udp_max_hdr_len + udp_wroff_extra); 2861 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2862 2863 return (0); 2864 } 2865 2866 /* 2867 * Which UDP options OK to set through T_UNITDATA_REQ... 2868 */ 2869 /* ARGSUSED */ 2870 static boolean_t 2871 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2872 { 2873 return (B_TRUE); 2874 } 2875 2876 /* 2877 * This routine gets default values of certain options whose default 2878 * values are maintained by protcol specific code 2879 */ 2880 /* ARGSUSED */ 2881 int 2882 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2883 { 2884 int *i1 = (int *)ptr; 2885 2886 switch (level) { 2887 case IPPROTO_IP: 2888 switch (name) { 2889 case IP_MULTICAST_TTL: 2890 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2891 return (sizeof (uchar_t)); 2892 case IP_MULTICAST_LOOP: 2893 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2894 return (sizeof (uchar_t)); 2895 } 2896 break; 2897 case IPPROTO_IPV6: 2898 switch (name) { 2899 case IPV6_MULTICAST_HOPS: 2900 *i1 = IP_DEFAULT_MULTICAST_TTL; 2901 return (sizeof (int)); 2902 case IPV6_MULTICAST_LOOP: 2903 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2904 return (sizeof (int)); 2905 case IPV6_UNICAST_HOPS: 2906 *i1 = udp_ipv6_hoplimit; 2907 return (sizeof (int)); 2908 } 2909 break; 2910 } 2911 return (-1); 2912 } 2913 2914 /* 2915 * This routine retrieves the current status of socket options 2916 * and expects the caller to pass in the queue pointer of the 2917 * upper instance. It returns the size of the option retrieved. 2918 */ 2919 int 2920 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2921 { 2922 int *i1 = (int *)ptr; 2923 conn_t *connp; 2924 udp_t *udp; 2925 ip6_pkt_t *ipp; 2926 2927 q = UDP_WR(q); 2928 connp = Q_TO_CONN(q); 2929 udp = connp->conn_udp; 2930 ipp = &udp->udp_sticky_ipp; 2931 2932 switch (level) { 2933 case SOL_SOCKET: 2934 switch (name) { 2935 case SO_DEBUG: 2936 *i1 = udp->udp_debug; 2937 break; /* goto sizeof (int) option return */ 2938 case SO_REUSEADDR: 2939 *i1 = udp->udp_reuseaddr; 2940 break; /* goto sizeof (int) option return */ 2941 case SO_TYPE: 2942 *i1 = SOCK_DGRAM; 2943 break; /* goto sizeof (int) option return */ 2944 2945 /* 2946 * The following three items are available here, 2947 * but are only meaningful to IP. 2948 */ 2949 case SO_DONTROUTE: 2950 *i1 = udp->udp_dontroute; 2951 break; /* goto sizeof (int) option return */ 2952 case SO_USELOOPBACK: 2953 *i1 = udp->udp_useloopback; 2954 break; /* goto sizeof (int) option return */ 2955 case SO_BROADCAST: 2956 *i1 = udp->udp_broadcast; 2957 break; /* goto sizeof (int) option return */ 2958 2959 case SO_SNDBUF: 2960 *i1 = q->q_hiwat; 2961 break; /* goto sizeof (int) option return */ 2962 case SO_RCVBUF: 2963 *i1 = RD(q)->q_hiwat; 2964 break; /* goto sizeof (int) option return */ 2965 case SO_DGRAM_ERRIND: 2966 *i1 = udp->udp_dgram_errind; 2967 break; /* goto sizeof (int) option return */ 2968 case SO_RECVUCRED: 2969 *i1 = udp->udp_recvucred; 2970 break; /* goto sizeof (int) option return */ 2971 default: 2972 return (-1); 2973 } 2974 break; 2975 case IPPROTO_IP: 2976 if (udp->udp_family != AF_INET) 2977 return (-1); 2978 switch (name) { 2979 case IP_OPTIONS: 2980 case T_IP_OPTIONS: 2981 if (udp->udp_ip_rcv_options_len) 2982 bcopy(udp->udp_ip_rcv_options, ptr, 2983 udp->udp_ip_rcv_options_len); 2984 return (udp->udp_ip_rcv_options_len); 2985 case IP_TOS: 2986 case T_IP_TOS: 2987 *i1 = (int)udp->udp_type_of_service; 2988 break; /* goto sizeof (int) option return */ 2989 case IP_TTL: 2990 *i1 = (int)udp->udp_ttl; 2991 break; /* goto sizeof (int) option return */ 2992 case IP_NEXTHOP: 2993 /* Handled at IP level */ 2994 return (-EINVAL); 2995 case IP_MULTICAST_IF: 2996 /* 0 address if not set */ 2997 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 2998 return (sizeof (ipaddr_t)); 2999 case IP_MULTICAST_TTL: 3000 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3001 return (sizeof (uchar_t)); 3002 case IP_MULTICAST_LOOP: 3003 *ptr = connp->conn_multicast_loop; 3004 return (sizeof (uint8_t)); 3005 case IP_RECVOPTS: 3006 *i1 = udp->udp_recvopts; 3007 break; /* goto sizeof (int) option return */ 3008 case IP_RECVDSTADDR: 3009 *i1 = udp->udp_recvdstaddr; 3010 break; /* goto sizeof (int) option return */ 3011 case IP_RECVIF: 3012 *i1 = udp->udp_recvif; 3013 break; /* goto sizeof (int) option return */ 3014 case IP_RECVSLLA: 3015 *i1 = udp->udp_recvslla; 3016 break; /* goto sizeof (int) option return */ 3017 case IP_RECVTTL: 3018 *i1 = udp->udp_recvttl; 3019 break; /* goto sizeof (int) option return */ 3020 case IP_ADD_MEMBERSHIP: 3021 case IP_DROP_MEMBERSHIP: 3022 case IP_BLOCK_SOURCE: 3023 case IP_UNBLOCK_SOURCE: 3024 case IP_ADD_SOURCE_MEMBERSHIP: 3025 case IP_DROP_SOURCE_MEMBERSHIP: 3026 case MCAST_JOIN_GROUP: 3027 case MCAST_LEAVE_GROUP: 3028 case MCAST_BLOCK_SOURCE: 3029 case MCAST_UNBLOCK_SOURCE: 3030 case MCAST_JOIN_SOURCE_GROUP: 3031 case MCAST_LEAVE_SOURCE_GROUP: 3032 case IP_DONTFAILOVER_IF: 3033 /* cannot "get" the value for these */ 3034 return (-1); 3035 case IP_BOUND_IF: 3036 /* Zero if not set */ 3037 *i1 = udp->udp_bound_if; 3038 break; /* goto sizeof (int) option return */ 3039 case IP_UNSPEC_SRC: 3040 *i1 = udp->udp_unspec_source; 3041 break; /* goto sizeof (int) option return */ 3042 case IP_XMIT_IF: 3043 *i1 = udp->udp_xmit_if; 3044 break; /* goto sizeof (int) option return */ 3045 default: 3046 return (-1); 3047 } 3048 break; 3049 case IPPROTO_IPV6: 3050 if (udp->udp_family != AF_INET6) 3051 return (-1); 3052 switch (name) { 3053 case IPV6_UNICAST_HOPS: 3054 *i1 = (unsigned int)udp->udp_ttl; 3055 break; /* goto sizeof (int) option return */ 3056 case IPV6_MULTICAST_IF: 3057 /* 0 index if not set */ 3058 *i1 = udp->udp_multicast_if_index; 3059 break; /* goto sizeof (int) option return */ 3060 case IPV6_MULTICAST_HOPS: 3061 *i1 = udp->udp_multicast_ttl; 3062 break; /* goto sizeof (int) option return */ 3063 case IPV6_MULTICAST_LOOP: 3064 *i1 = connp->conn_multicast_loop; 3065 break; /* goto sizeof (int) option return */ 3066 case IPV6_JOIN_GROUP: 3067 case IPV6_LEAVE_GROUP: 3068 case MCAST_JOIN_GROUP: 3069 case MCAST_LEAVE_GROUP: 3070 case MCAST_BLOCK_SOURCE: 3071 case MCAST_UNBLOCK_SOURCE: 3072 case MCAST_JOIN_SOURCE_GROUP: 3073 case MCAST_LEAVE_SOURCE_GROUP: 3074 /* cannot "get" the value for these */ 3075 return (-1); 3076 case IPV6_BOUND_IF: 3077 /* Zero if not set */ 3078 *i1 = udp->udp_bound_if; 3079 break; /* goto sizeof (int) option return */ 3080 case IPV6_UNSPEC_SRC: 3081 *i1 = udp->udp_unspec_source; 3082 break; /* goto sizeof (int) option return */ 3083 case IPV6_RECVPKTINFO: 3084 *i1 = udp->udp_ipv6_recvpktinfo; 3085 break; /* goto sizeof (int) option return */ 3086 case IPV6_RECVTCLASS: 3087 *i1 = udp->udp_ipv6_recvtclass; 3088 break; /* goto sizeof (int) option return */ 3089 case IPV6_RECVPATHMTU: 3090 *i1 = udp->udp_ipv6_recvpathmtu; 3091 break; /* goto sizeof (int) option return */ 3092 case IPV6_RECVHOPLIMIT: 3093 *i1 = udp->udp_ipv6_recvhoplimit; 3094 break; /* goto sizeof (int) option return */ 3095 case IPV6_RECVHOPOPTS: 3096 *i1 = udp->udp_ipv6_recvhopopts; 3097 break; /* goto sizeof (int) option return */ 3098 case IPV6_RECVDSTOPTS: 3099 *i1 = udp->udp_ipv6_recvdstopts; 3100 break; /* goto sizeof (int) option return */ 3101 case _OLD_IPV6_RECVDSTOPTS: 3102 *i1 = udp->udp_old_ipv6_recvdstopts; 3103 break; /* goto sizeof (int) option return */ 3104 case IPV6_RECVRTHDRDSTOPTS: 3105 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3106 break; /* goto sizeof (int) option return */ 3107 case IPV6_RECVRTHDR: 3108 *i1 = udp->udp_ipv6_recvrthdr; 3109 break; /* goto sizeof (int) option return */ 3110 case IPV6_PKTINFO: { 3111 /* XXX assumes that caller has room for max size! */ 3112 struct in6_pktinfo *pkti; 3113 3114 pkti = (struct in6_pktinfo *)ptr; 3115 if (ipp->ipp_fields & IPPF_IFINDEX) 3116 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3117 else 3118 pkti->ipi6_ifindex = 0; 3119 if (ipp->ipp_fields & IPPF_ADDR) 3120 pkti->ipi6_addr = ipp->ipp_addr; 3121 else 3122 pkti->ipi6_addr = ipv6_all_zeros; 3123 return (sizeof (struct in6_pktinfo)); 3124 } 3125 case IPV6_TCLASS: 3126 if (ipp->ipp_fields & IPPF_TCLASS) 3127 *i1 = ipp->ipp_tclass; 3128 else 3129 *i1 = IPV6_FLOW_TCLASS( 3130 IPV6_DEFAULT_VERS_AND_FLOW); 3131 break; /* goto sizeof (int) option return */ 3132 case IPV6_NEXTHOP: { 3133 sin6_t *sin6 = (sin6_t *)ptr; 3134 3135 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3136 return (0); 3137 *sin6 = sin6_null; 3138 sin6->sin6_family = AF_INET6; 3139 sin6->sin6_addr = ipp->ipp_nexthop; 3140 return (sizeof (sin6_t)); 3141 } 3142 case IPV6_HOPOPTS: 3143 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3144 return (0); 3145 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3146 return (ipp->ipp_hopoptslen); 3147 case IPV6_RTHDRDSTOPTS: 3148 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3149 return (0); 3150 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3151 return (ipp->ipp_rtdstoptslen); 3152 case IPV6_RTHDR: 3153 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3154 return (0); 3155 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3156 return (ipp->ipp_rthdrlen); 3157 case IPV6_DSTOPTS: 3158 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3159 return (0); 3160 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3161 return (ipp->ipp_dstoptslen); 3162 case IPV6_PATHMTU: 3163 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3164 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3165 default: 3166 return (-1); 3167 } 3168 break; 3169 case IPPROTO_UDP: 3170 switch (name) { 3171 case UDP_ANONPRIVBIND: 3172 *i1 = udp->udp_anon_priv_bind; 3173 break; 3174 case UDP_EXCLBIND: 3175 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3176 break; 3177 case UDP_RCVHDR: 3178 *i1 = udp->udp_rcvhdr ? 1 : 0; 3179 break; 3180 default: 3181 return (-1); 3182 } 3183 break; 3184 default: 3185 return (-1); 3186 } 3187 return (sizeof (int)); 3188 } 3189 3190 /* 3191 * This routine sets socket options; it expects the caller 3192 * to pass in the queue pointer of the upper instance. 3193 */ 3194 /* ARGSUSED */ 3195 int 3196 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3197 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3198 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3199 { 3200 int *i1 = (int *)invalp; 3201 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3202 boolean_t checkonly; 3203 int error; 3204 conn_t *connp; 3205 udp_t *udp; 3206 3207 q = UDP_WR(q); 3208 connp = Q_TO_CONN(q); 3209 udp = connp->conn_udp; 3210 3211 switch (optset_context) { 3212 case SETFN_OPTCOM_CHECKONLY: 3213 checkonly = B_TRUE; 3214 /* 3215 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3216 * inlen != 0 implies value supplied and 3217 * we have to "pretend" to set it. 3218 * inlen == 0 implies that there is no 3219 * value part in T_CHECK request and just validation 3220 * done elsewhere should be enough, we just return here. 3221 */ 3222 if (inlen == 0) { 3223 *outlenp = 0; 3224 return (0); 3225 } 3226 break; 3227 case SETFN_OPTCOM_NEGOTIATE: 3228 checkonly = B_FALSE; 3229 break; 3230 case SETFN_UD_NEGOTIATE: 3231 case SETFN_CONN_NEGOTIATE: 3232 checkonly = B_FALSE; 3233 /* 3234 * Negotiating local and "association-related" options 3235 * through T_UNITDATA_REQ. 3236 * 3237 * Following routine can filter out ones we do not 3238 * want to be "set" this way. 3239 */ 3240 if (!udp_opt_allow_udr_set(level, name)) { 3241 *outlenp = 0; 3242 return (EINVAL); 3243 } 3244 break; 3245 default: 3246 /* 3247 * We should never get here 3248 */ 3249 *outlenp = 0; 3250 return (EINVAL); 3251 } 3252 3253 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3254 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3255 3256 /* 3257 * For fixed length options, no sanity check 3258 * of passed in length is done. It is assumed *_optcom_req() 3259 * routines do the right thing. 3260 */ 3261 3262 switch (level) { 3263 case SOL_SOCKET: 3264 switch (name) { 3265 case SO_REUSEADDR: 3266 if (!checkonly) 3267 udp->udp_reuseaddr = onoff; 3268 break; 3269 case SO_DEBUG: 3270 if (!checkonly) 3271 udp->udp_debug = onoff; 3272 break; 3273 /* 3274 * The following three items are available here, 3275 * but are only meaningful to IP. 3276 */ 3277 case SO_DONTROUTE: 3278 if (!checkonly) 3279 udp->udp_dontroute = onoff; 3280 break; 3281 case SO_USELOOPBACK: 3282 if (!checkonly) 3283 udp->udp_useloopback = onoff; 3284 break; 3285 case SO_BROADCAST: 3286 if (!checkonly) 3287 udp->udp_broadcast = onoff; 3288 break; 3289 3290 case SO_SNDBUF: 3291 if (*i1 > udp_max_buf) { 3292 *outlenp = 0; 3293 return (ENOBUFS); 3294 } 3295 if (!checkonly) { 3296 q->q_hiwat = *i1; 3297 WR(UDP_RD(q))->q_hiwat = *i1; 3298 } 3299 break; 3300 case SO_RCVBUF: 3301 if (*i1 > udp_max_buf) { 3302 *outlenp = 0; 3303 return (ENOBUFS); 3304 } 3305 if (!checkonly) { 3306 RD(q)->q_hiwat = *i1; 3307 UDP_RD(q)->q_hiwat = *i1; 3308 (void) mi_set_sth_hiwat(UDP_RD(q), 3309 udp_set_rcv_hiwat(udp, *i1)); 3310 } 3311 break; 3312 case SO_DGRAM_ERRIND: 3313 if (!checkonly) 3314 udp->udp_dgram_errind = onoff; 3315 break; 3316 case SO_RECVUCRED: 3317 if (!checkonly) 3318 udp->udp_recvucred = onoff; 3319 break; 3320 default: 3321 *outlenp = 0; 3322 return (EINVAL); 3323 } 3324 break; 3325 case IPPROTO_IP: 3326 if (udp->udp_family != AF_INET) { 3327 *outlenp = 0; 3328 return (ENOPROTOOPT); 3329 } 3330 switch (name) { 3331 case IP_OPTIONS: 3332 case T_IP_OPTIONS: 3333 /* Save options for use by IP. */ 3334 if (inlen & 0x3) { 3335 *outlenp = 0; 3336 return (EINVAL); 3337 } 3338 if (checkonly) 3339 break; 3340 3341 if (udp->udp_ip_snd_options) { 3342 mi_free((char *)udp->udp_ip_snd_options); 3343 udp->udp_ip_snd_options_len = 0; 3344 udp->udp_ip_snd_options = NULL; 3345 } 3346 if (inlen) { 3347 udp->udp_ip_snd_options = 3348 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3349 if (udp->udp_ip_snd_options) { 3350 bcopy(invalp, udp->udp_ip_snd_options, 3351 inlen); 3352 udp->udp_ip_snd_options_len = inlen; 3353 } 3354 } 3355 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3356 UDPH_SIZE + udp->udp_ip_snd_options_len; 3357 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3358 udp_wroff_extra); 3359 break; 3360 case IP_TTL: 3361 if (!checkonly) { 3362 udp->udp_ttl = (uchar_t)*i1; 3363 } 3364 break; 3365 case IP_TOS: 3366 case T_IP_TOS: 3367 if (!checkonly) { 3368 udp->udp_type_of_service = (uchar_t)*i1; 3369 } 3370 break; 3371 case IP_MULTICAST_IF: { 3372 /* 3373 * TODO should check OPTMGMT reply and undo this if 3374 * there is an error. 3375 */ 3376 struct in_addr *inap = (struct in_addr *)invalp; 3377 if (!checkonly) { 3378 udp->udp_multicast_if_addr = 3379 inap->s_addr; 3380 } 3381 break; 3382 } 3383 case IP_MULTICAST_TTL: 3384 if (!checkonly) 3385 udp->udp_multicast_ttl = *invalp; 3386 break; 3387 case IP_MULTICAST_LOOP: 3388 if (!checkonly) 3389 connp->conn_multicast_loop = *invalp; 3390 break; 3391 case IP_RECVOPTS: 3392 if (!checkonly) 3393 udp->udp_recvopts = onoff; 3394 break; 3395 case IP_RECVDSTADDR: 3396 if (!checkonly) 3397 udp->udp_recvdstaddr = onoff; 3398 break; 3399 case IP_RECVIF: 3400 if (!checkonly) 3401 udp->udp_recvif = onoff; 3402 break; 3403 case IP_RECVSLLA: 3404 if (!checkonly) 3405 udp->udp_recvslla = onoff; 3406 break; 3407 case IP_RECVTTL: 3408 if (!checkonly) 3409 udp->udp_recvttl = onoff; 3410 break; 3411 case IP_ADD_MEMBERSHIP: 3412 case IP_DROP_MEMBERSHIP: 3413 case IP_BLOCK_SOURCE: 3414 case IP_UNBLOCK_SOURCE: 3415 case IP_ADD_SOURCE_MEMBERSHIP: 3416 case IP_DROP_SOURCE_MEMBERSHIP: 3417 case MCAST_JOIN_GROUP: 3418 case MCAST_LEAVE_GROUP: 3419 case MCAST_BLOCK_SOURCE: 3420 case MCAST_UNBLOCK_SOURCE: 3421 case MCAST_JOIN_SOURCE_GROUP: 3422 case MCAST_LEAVE_SOURCE_GROUP: 3423 case IP_SEC_OPT: 3424 case IP_NEXTHOP: 3425 /* 3426 * "soft" error (negative) 3427 * option not handled at this level 3428 * Do not modify *outlenp. 3429 */ 3430 return (-EINVAL); 3431 case IP_BOUND_IF: 3432 if (!checkonly) 3433 udp->udp_bound_if = *i1; 3434 break; 3435 case IP_UNSPEC_SRC: 3436 if (!checkonly) 3437 udp->udp_unspec_source = onoff; 3438 break; 3439 case IP_XMIT_IF: 3440 if (!checkonly) 3441 udp->udp_xmit_if = *i1; 3442 break; 3443 default: 3444 *outlenp = 0; 3445 return (EINVAL); 3446 } 3447 break; 3448 case IPPROTO_IPV6: { 3449 ip6_pkt_t *ipp; 3450 boolean_t sticky; 3451 3452 if (udp->udp_family != AF_INET6) { 3453 *outlenp = 0; 3454 return (ENOPROTOOPT); 3455 } 3456 /* 3457 * Deal with both sticky options and ancillary data 3458 */ 3459 if (thisdg_attrs == NULL) { 3460 /* sticky options, or none */ 3461 ipp = &udp->udp_sticky_ipp; 3462 sticky = B_TRUE; 3463 } else { 3464 /* ancillary data */ 3465 ipp = (ip6_pkt_t *)thisdg_attrs; 3466 sticky = B_FALSE; 3467 } 3468 3469 switch (name) { 3470 case IPV6_MULTICAST_IF: 3471 if (!checkonly) 3472 udp->udp_multicast_if_index = *i1; 3473 break; 3474 case IPV6_UNICAST_HOPS: 3475 /* -1 means use default */ 3476 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3477 *outlenp = 0; 3478 return (EINVAL); 3479 } 3480 if (!checkonly) { 3481 if (*i1 == -1) { 3482 udp->udp_ttl = ipp->ipp_unicast_hops = 3483 udp_ipv6_hoplimit; 3484 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3485 /* Pass modified value to IP. */ 3486 *i1 = udp->udp_ttl; 3487 } else { 3488 udp->udp_ttl = ipp->ipp_unicast_hops = 3489 (uint8_t)*i1; 3490 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3491 } 3492 /* Rebuild the header template */ 3493 error = udp_build_hdrs(q, udp); 3494 if (error != 0) { 3495 *outlenp = 0; 3496 return (error); 3497 } 3498 } 3499 break; 3500 case IPV6_MULTICAST_HOPS: 3501 /* -1 means use default */ 3502 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3503 *outlenp = 0; 3504 return (EINVAL); 3505 } 3506 if (!checkonly) { 3507 if (*i1 == -1) { 3508 udp->udp_multicast_ttl = 3509 ipp->ipp_multicast_hops = 3510 IP_DEFAULT_MULTICAST_TTL; 3511 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3512 /* Pass modified value to IP. */ 3513 *i1 = udp->udp_multicast_ttl; 3514 } else { 3515 udp->udp_multicast_ttl = 3516 ipp->ipp_multicast_hops = 3517 (uint8_t)*i1; 3518 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3519 } 3520 } 3521 break; 3522 case IPV6_MULTICAST_LOOP: 3523 if (*i1 != 0 && *i1 != 1) { 3524 *outlenp = 0; 3525 return (EINVAL); 3526 } 3527 if (!checkonly) 3528 connp->conn_multicast_loop = *i1; 3529 break; 3530 case IPV6_JOIN_GROUP: 3531 case IPV6_LEAVE_GROUP: 3532 case MCAST_JOIN_GROUP: 3533 case MCAST_LEAVE_GROUP: 3534 case MCAST_BLOCK_SOURCE: 3535 case MCAST_UNBLOCK_SOURCE: 3536 case MCAST_JOIN_SOURCE_GROUP: 3537 case MCAST_LEAVE_SOURCE_GROUP: 3538 /* 3539 * "soft" error (negative) 3540 * option not handled at this level 3541 * Note: Do not modify *outlenp 3542 */ 3543 return (-EINVAL); 3544 case IPV6_BOUND_IF: 3545 if (!checkonly) 3546 udp->udp_bound_if = *i1; 3547 break; 3548 case IPV6_UNSPEC_SRC: 3549 if (!checkonly) 3550 udp->udp_unspec_source = onoff; 3551 break; 3552 /* 3553 * Set boolean switches for ancillary data delivery 3554 */ 3555 case IPV6_RECVPKTINFO: 3556 if (!checkonly) 3557 udp->udp_ipv6_recvpktinfo = onoff; 3558 break; 3559 case IPV6_RECVTCLASS: 3560 if (!checkonly) { 3561 udp->udp_ipv6_recvtclass = onoff; 3562 } 3563 break; 3564 case IPV6_RECVPATHMTU: 3565 if (!checkonly) { 3566 udp->udp_ipv6_recvpathmtu = onoff; 3567 } 3568 break; 3569 case IPV6_RECVHOPLIMIT: 3570 if (!checkonly) 3571 udp->udp_ipv6_recvhoplimit = onoff; 3572 break; 3573 case IPV6_RECVHOPOPTS: 3574 if (!checkonly) 3575 udp->udp_ipv6_recvhopopts = onoff; 3576 break; 3577 case IPV6_RECVDSTOPTS: 3578 if (!checkonly) 3579 udp->udp_ipv6_recvdstopts = onoff; 3580 break; 3581 case _OLD_IPV6_RECVDSTOPTS: 3582 if (!checkonly) 3583 udp->udp_old_ipv6_recvdstopts = onoff; 3584 break; 3585 case IPV6_RECVRTHDRDSTOPTS: 3586 if (!checkonly) 3587 udp->udp_ipv6_recvrthdrdstopts = onoff; 3588 break; 3589 case IPV6_RECVRTHDR: 3590 if (!checkonly) 3591 udp->udp_ipv6_recvrthdr = onoff; 3592 break; 3593 /* 3594 * Set sticky options or ancillary data. 3595 * If sticky options, (re)build any extension headers 3596 * that might be needed as a result. 3597 */ 3598 case IPV6_PKTINFO: 3599 /* 3600 * The source address and ifindex are verified 3601 * in ip_opt_set(). For ancillary data the 3602 * source address is checked in ip_wput_v6. 3603 */ 3604 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3605 return (EINVAL); 3606 if (checkonly) 3607 break; 3608 3609 if (inlen == 0) { 3610 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3611 ipp->ipp_sticky_ignored |= 3612 (IPPF_IFINDEX|IPPF_ADDR); 3613 } else { 3614 struct in6_pktinfo *pkti; 3615 3616 pkti = (struct in6_pktinfo *)invalp; 3617 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3618 ipp->ipp_addr = pkti->ipi6_addr; 3619 if (ipp->ipp_ifindex != 0) 3620 ipp->ipp_fields |= IPPF_IFINDEX; 3621 else 3622 ipp->ipp_fields &= ~IPPF_IFINDEX; 3623 if (!IN6_IS_ADDR_UNSPECIFIED( 3624 &ipp->ipp_addr)) 3625 ipp->ipp_fields |= IPPF_ADDR; 3626 else 3627 ipp->ipp_fields &= ~IPPF_ADDR; 3628 } 3629 if (sticky) { 3630 error = udp_build_hdrs(q, udp); 3631 if (error != 0) 3632 return (error); 3633 } 3634 break; 3635 case IPV6_HOPLIMIT: 3636 if (sticky) 3637 return (EINVAL); 3638 if (inlen != 0 && inlen != sizeof (int)) 3639 return (EINVAL); 3640 if (checkonly) 3641 break; 3642 3643 if (inlen == 0) { 3644 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3645 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3646 } else { 3647 if (*i1 > 255 || *i1 < -1) 3648 return (EINVAL); 3649 if (*i1 == -1) 3650 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3651 else 3652 ipp->ipp_hoplimit = *i1; 3653 ipp->ipp_fields |= IPPF_HOPLIMIT; 3654 } 3655 break; 3656 case IPV6_TCLASS: 3657 if (inlen != 0 && inlen != sizeof (int)) 3658 return (EINVAL); 3659 if (checkonly) 3660 break; 3661 3662 if (inlen == 0) { 3663 ipp->ipp_fields &= ~IPPF_TCLASS; 3664 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3665 } else { 3666 if (*i1 > 255 || *i1 < -1) 3667 return (EINVAL); 3668 if (*i1 == -1) 3669 ipp->ipp_tclass = 0; 3670 else 3671 ipp->ipp_tclass = *i1; 3672 ipp->ipp_fields |= IPPF_TCLASS; 3673 } 3674 if (sticky) { 3675 error = udp_build_hdrs(q, udp); 3676 if (error != 0) 3677 return (error); 3678 } 3679 break; 3680 case IPV6_NEXTHOP: 3681 /* 3682 * IP will verify that the nexthop is reachable 3683 * and fail for sticky options. 3684 */ 3685 if (inlen != 0 && inlen != sizeof (sin6_t)) 3686 return (EINVAL); 3687 if (checkonly) 3688 break; 3689 3690 if (inlen == 0) { 3691 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3692 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3693 } else { 3694 sin6_t *sin6 = (sin6_t *)invalp; 3695 3696 if (sin6->sin6_family != AF_INET6) 3697 return (EAFNOSUPPORT); 3698 if (IN6_IS_ADDR_V4MAPPED( 3699 &sin6->sin6_addr)) 3700 return (EADDRNOTAVAIL); 3701 ipp->ipp_nexthop = sin6->sin6_addr; 3702 if (!IN6_IS_ADDR_UNSPECIFIED( 3703 &ipp->ipp_nexthop)) 3704 ipp->ipp_fields |= IPPF_NEXTHOP; 3705 else 3706 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3707 } 3708 if (sticky) { 3709 error = udp_build_hdrs(q, udp); 3710 if (error != 0) 3711 return (error); 3712 } 3713 break; 3714 case IPV6_HOPOPTS: { 3715 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3716 /* 3717 * Sanity checks - minimum size, size a multiple of 3718 * eight bytes, and matching size passed in. 3719 */ 3720 if (inlen != 0 && 3721 inlen != (8 * (hopts->ip6h_len + 1))) 3722 return (EINVAL); 3723 3724 if (checkonly) 3725 break; 3726 3727 if (inlen == 0) { 3728 if (sticky && 3729 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3730 kmem_free(ipp->ipp_hopopts, 3731 ipp->ipp_hopoptslen); 3732 ipp->ipp_hopopts = NULL; 3733 ipp->ipp_hopoptslen = 0; 3734 } 3735 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3736 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3737 } else { 3738 error = udp_pkt_set(invalp, inlen, sticky, 3739 (uchar_t **)&ipp->ipp_hopopts, 3740 &ipp->ipp_hopoptslen); 3741 if (error != 0) 3742 return (error); 3743 ipp->ipp_fields |= IPPF_HOPOPTS; 3744 } 3745 if (sticky) { 3746 error = udp_build_hdrs(q, udp); 3747 if (error != 0) 3748 return (error); 3749 } 3750 break; 3751 } 3752 case IPV6_RTHDRDSTOPTS: { 3753 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3754 3755 /* 3756 * Sanity checks - minimum size, size a multiple of 3757 * eight bytes, and matching size passed in. 3758 */ 3759 if (inlen != 0 && 3760 inlen != (8 * (dopts->ip6d_len + 1))) 3761 return (EINVAL); 3762 3763 if (checkonly) 3764 break; 3765 3766 if (inlen == 0) { 3767 if (sticky && 3768 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3769 kmem_free(ipp->ipp_rtdstopts, 3770 ipp->ipp_rtdstoptslen); 3771 ipp->ipp_rtdstopts = NULL; 3772 ipp->ipp_rtdstoptslen = 0; 3773 } 3774 3775 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3776 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3777 } else { 3778 error = udp_pkt_set(invalp, inlen, sticky, 3779 (uchar_t **)&ipp->ipp_rtdstopts, 3780 &ipp->ipp_rtdstoptslen); 3781 if (error != 0) 3782 return (error); 3783 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3784 } 3785 if (sticky) { 3786 error = udp_build_hdrs(q, udp); 3787 if (error != 0) 3788 return (error); 3789 } 3790 break; 3791 } 3792 case IPV6_DSTOPTS: { 3793 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3794 3795 /* 3796 * Sanity checks - minimum size, size a multiple of 3797 * eight bytes, and matching size passed in. 3798 */ 3799 if (inlen != 0 && 3800 inlen != (8 * (dopts->ip6d_len + 1))) 3801 return (EINVAL); 3802 3803 if (checkonly) 3804 break; 3805 3806 if (inlen == 0) { 3807 if (sticky && 3808 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3809 kmem_free(ipp->ipp_dstopts, 3810 ipp->ipp_dstoptslen); 3811 ipp->ipp_dstopts = NULL; 3812 ipp->ipp_dstoptslen = 0; 3813 } 3814 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3815 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3816 } else { 3817 error = udp_pkt_set(invalp, inlen, sticky, 3818 (uchar_t **)&ipp->ipp_dstopts, 3819 &ipp->ipp_dstoptslen); 3820 if (error != 0) 3821 return (error); 3822 ipp->ipp_fields |= IPPF_DSTOPTS; 3823 } 3824 if (sticky) { 3825 error = udp_build_hdrs(q, udp); 3826 if (error != 0) 3827 return (error); 3828 } 3829 break; 3830 } 3831 case IPV6_RTHDR: { 3832 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3833 3834 /* 3835 * Sanity checks - minimum size, size a multiple of 3836 * eight bytes, and matching size passed in. 3837 */ 3838 if (inlen != 0 && 3839 inlen != (8 * (rt->ip6r_len + 1))) 3840 return (EINVAL); 3841 3842 if (checkonly) 3843 break; 3844 3845 if (inlen == 0) { 3846 if (sticky && 3847 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3848 kmem_free(ipp->ipp_rthdr, 3849 ipp->ipp_rthdrlen); 3850 ipp->ipp_rthdr = NULL; 3851 ipp->ipp_rthdrlen = 0; 3852 } 3853 ipp->ipp_fields &= ~IPPF_RTHDR; 3854 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3855 } else { 3856 error = udp_pkt_set(invalp, inlen, sticky, 3857 (uchar_t **)&ipp->ipp_rthdr, 3858 &ipp->ipp_rthdrlen); 3859 if (error != 0) 3860 return (error); 3861 ipp->ipp_fields |= IPPF_RTHDR; 3862 } 3863 if (sticky) { 3864 error = udp_build_hdrs(q, udp); 3865 if (error != 0) 3866 return (error); 3867 } 3868 break; 3869 } 3870 3871 case IPV6_DONTFRAG: 3872 if (checkonly) 3873 break; 3874 3875 if (onoff) { 3876 ipp->ipp_fields |= IPPF_DONTFRAG; 3877 } else { 3878 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3879 } 3880 break; 3881 3882 case IPV6_USE_MIN_MTU: 3883 if (inlen != sizeof (int)) 3884 return (EINVAL); 3885 3886 if (*i1 < -1 || *i1 > 1) 3887 return (EINVAL); 3888 3889 if (checkonly) 3890 break; 3891 3892 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3893 ipp->ipp_use_min_mtu = *i1; 3894 break; 3895 3896 case IPV6_BOUND_PIF: 3897 case IPV6_SEC_OPT: 3898 case IPV6_DONTFAILOVER_IF: 3899 case IPV6_SRC_PREFERENCES: 3900 case IPV6_V6ONLY: 3901 /* Handled at the IP level */ 3902 return (-EINVAL); 3903 default: 3904 *outlenp = 0; 3905 return (EINVAL); 3906 } 3907 break; 3908 } /* end IPPROTO_IPV6 */ 3909 case IPPROTO_UDP: 3910 switch (name) { 3911 case UDP_ANONPRIVBIND: 3912 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3913 *outlenp = 0; 3914 return (error); 3915 } 3916 if (!checkonly) { 3917 udp->udp_anon_priv_bind = onoff; 3918 } 3919 break; 3920 case UDP_EXCLBIND: 3921 if (!checkonly) 3922 udp->udp_exclbind = onoff; 3923 break; 3924 case UDP_RCVHDR: 3925 if (!checkonly) 3926 udp->udp_rcvhdr = onoff; 3927 break; 3928 default: 3929 *outlenp = 0; 3930 return (EINVAL); 3931 } 3932 break; 3933 default: 3934 *outlenp = 0; 3935 return (EINVAL); 3936 } 3937 /* 3938 * Common case of OK return with outval same as inval. 3939 */ 3940 if (invalp != outvalp) { 3941 /* don't trust bcopy for identical src/dst */ 3942 (void) bcopy(invalp, outvalp, inlen); 3943 } 3944 *outlenp = inlen; 3945 return (0); 3946 } 3947 3948 /* 3949 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3950 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3951 * headers, and the udp header. 3952 * Returns failure if can't allocate memory. 3953 */ 3954 static int 3955 udp_build_hdrs(queue_t *q, udp_t *udp) 3956 { 3957 uchar_t *hdrs; 3958 uint_t hdrs_len; 3959 ip6_t *ip6h; 3960 ip6i_t *ip6i; 3961 udpha_t *udpha; 3962 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3963 3964 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3965 ASSERT(hdrs_len != 0); 3966 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3967 /* Need to reallocate */ 3968 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3969 if (hdrs == NULL) 3970 return (ENOMEM); 3971 3972 if (udp->udp_sticky_hdrs_len != 0) { 3973 kmem_free(udp->udp_sticky_hdrs, 3974 udp->udp_sticky_hdrs_len); 3975 } 3976 udp->udp_sticky_hdrs = hdrs; 3977 udp->udp_sticky_hdrs_len = hdrs_len; 3978 } 3979 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3980 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3981 3982 /* Set header fields not in ipp */ 3983 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3984 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3985 ip6h = (ip6_t *)&ip6i[1]; 3986 } else { 3987 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3988 } 3989 3990 if (!(ipp->ipp_fields & IPPF_ADDR)) 3991 ip6h->ip6_src = udp->udp_v6src; 3992 3993 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 3994 udpha->uha_src_port = udp->udp_port; 3995 3996 /* Try to get everything in a single mblk */ 3997 if (hdrs_len > udp->udp_max_hdr_len) { 3998 udp->udp_max_hdr_len = hdrs_len; 3999 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4000 udp_wroff_extra); 4001 } 4002 return (0); 4003 } 4004 4005 /* 4006 * Set optbuf and optlen for the option. 4007 * If sticky is set allocate memory (if not already present). 4008 * Otherwise just point optbuf and optlen at invalp and inlen. 4009 * Returns failure if memory can not be allocated. 4010 */ 4011 static int 4012 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4013 uchar_t **optbufp, uint_t *optlenp) 4014 { 4015 uchar_t *optbuf; 4016 4017 if (!sticky) { 4018 *optbufp = invalp; 4019 *optlenp = inlen; 4020 return (0); 4021 } 4022 if (inlen == *optlenp) { 4023 /* Unchanged length - no need to realocate */ 4024 bcopy(invalp, *optbufp, inlen); 4025 return (0); 4026 } 4027 if (inlen != 0) { 4028 /* Allocate new buffer before free */ 4029 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4030 if (optbuf == NULL) 4031 return (ENOMEM); 4032 } else { 4033 optbuf = NULL; 4034 } 4035 /* Free old buffer */ 4036 if (*optlenp != 0) 4037 kmem_free(*optbufp, *optlenp); 4038 4039 bcopy(invalp, optbuf, inlen); 4040 *optbufp = optbuf; 4041 *optlenp = inlen; 4042 return (0); 4043 } 4044 4045 /* 4046 * This routine retrieves the value of an ND variable in a udpparam_t 4047 * structure. It is called through nd_getset when a user reads the 4048 * variable. 4049 */ 4050 /* ARGSUSED */ 4051 static int 4052 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4053 { 4054 udpparam_t *udppa = (udpparam_t *)cp; 4055 4056 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4057 return (0); 4058 } 4059 4060 /* 4061 * Walk through the param array specified registering each element with the 4062 * named dispatch (ND) handler. 4063 */ 4064 static boolean_t 4065 udp_param_register(udpparam_t *udppa, int cnt) 4066 { 4067 for (; cnt-- > 0; udppa++) { 4068 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4069 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4070 udp_param_get, udp_param_set, 4071 (caddr_t)udppa)) { 4072 nd_free(&udp_g_nd); 4073 return (B_FALSE); 4074 } 4075 } 4076 } 4077 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4078 udp_extra_priv_ports_get, NULL, NULL)) { 4079 nd_free(&udp_g_nd); 4080 return (B_FALSE); 4081 } 4082 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4083 NULL, udp_extra_priv_ports_add, NULL)) { 4084 nd_free(&udp_g_nd); 4085 return (B_FALSE); 4086 } 4087 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4088 NULL, udp_extra_priv_ports_del, NULL)) { 4089 nd_free(&udp_g_nd); 4090 return (B_FALSE); 4091 } 4092 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4093 NULL)) { 4094 nd_free(&udp_g_nd); 4095 return (B_FALSE); 4096 } 4097 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4098 NULL)) { 4099 nd_free(&udp_g_nd); 4100 return (B_FALSE); 4101 } 4102 return (B_TRUE); 4103 } 4104 4105 /* This routine sets an ND variable in a udpparam_t structure. */ 4106 /* ARGSUSED */ 4107 static int 4108 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4109 { 4110 long new_value; 4111 udpparam_t *udppa = (udpparam_t *)cp; 4112 4113 /* 4114 * Fail the request if the new value does not lie within the 4115 * required bounds. 4116 */ 4117 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4118 new_value < udppa->udp_param_min || 4119 new_value > udppa->udp_param_max) { 4120 return (EINVAL); 4121 } 4122 4123 /* Set the new value */ 4124 udppa->udp_param_value = new_value; 4125 return (0); 4126 } 4127 4128 static void 4129 udp_input(conn_t *connp, mblk_t *mp) 4130 { 4131 struct T_unitdata_ind *tudi; 4132 uchar_t *rptr; /* Pointer to IP header */ 4133 int hdr_length; /* Length of IP+UDP headers */ 4134 int udi_size; /* Size of T_unitdata_ind */ 4135 int mp_len; 4136 udp_t *udp; 4137 udpha_t *udpha; 4138 int ipversion; 4139 ip6_pkt_t ipp; 4140 ip6_t *ip6h; 4141 ip6i_t *ip6i; 4142 mblk_t *mp1; 4143 mblk_t *options_mp = NULL; 4144 in_pktinfo_t *pinfo = NULL; 4145 cred_t *cr = NULL; 4146 queue_t *q = connp->conn_rq; 4147 pid_t cpid; 4148 4149 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4150 "udp_rput_start: q %p mp %p", q, mp); 4151 4152 udp = connp->conn_udp; 4153 rptr = mp->b_rptr; 4154 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4155 ASSERT(OK_32PTR(rptr)); 4156 4157 /* 4158 * IP should have prepended the options data in an M_CTL 4159 * Check M_CTL "type" to make sure are not here bcos of 4160 * a valid ICMP message 4161 */ 4162 if (DB_TYPE(mp) == M_CTL) { 4163 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4164 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4165 IN_PKTINFO) { 4166 /* 4167 * IP_RECVIF or IP_RECVSLLA information has been 4168 * appended to the packet by IP. We need to 4169 * extract the mblk and adjust the rptr 4170 */ 4171 pinfo = (in_pktinfo_t *)mp->b_rptr; 4172 options_mp = mp; 4173 mp = mp->b_cont; 4174 rptr = mp->b_rptr; 4175 UDP_STAT(udp_in_pktinfo); 4176 } else { 4177 /* 4178 * ICMP messages. 4179 */ 4180 udp_icmp_error(q, mp); 4181 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4182 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4183 return; 4184 } 4185 } 4186 4187 mp_len = msgdsize(mp); 4188 /* 4189 * This is the inbound data path. 4190 * First, we check to make sure the IP version number is correct, 4191 * and then pull the IP and UDP headers into the first mblk. 4192 * Assume IP provides aligned packets - otherwise toss. 4193 * Also, check if we have a complete IP header. 4194 */ 4195 4196 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4197 ipp.ipp_fields = 0; 4198 4199 ipversion = IPH_HDR_VERSION(rptr); 4200 switch (ipversion) { 4201 case IPV4_VERSION: 4202 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4203 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4204 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4205 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4206 (udp->udp_ip_rcv_options_len)) { 4207 /* 4208 * Handle IPv4 packets with options outside of the 4209 * main data path. Not needed for AF_INET6 sockets 4210 * since they don't support a getsockopt of IP_OPTIONS. 4211 */ 4212 if (udp->udp_family == AF_INET6) 4213 break; 4214 /* 4215 * UDP length check performed for IPv4 packets with 4216 * options to check whether UDP length specified in 4217 * the header is the same as the physical length of 4218 * the packet. 4219 */ 4220 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4221 if (mp_len != (ntohs(udpha->uha_length) + 4222 hdr_length - UDPH_SIZE)) { 4223 goto tossit; 4224 } 4225 /* 4226 * Handle the case where the packet has IP options 4227 * and the IP_RECVSLLA & IP_RECVIF are set 4228 */ 4229 if (pinfo != NULL) 4230 mp = options_mp; 4231 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4232 SQTAG_UDP_INPUT); 4233 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4234 "udp_rput_end: q %p (%S)", q, "end"); 4235 return; 4236 } 4237 4238 /* Handle IPV6_RECVHOPLIMIT. */ 4239 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4240 udp->udp_ipv6_recvpktinfo) { 4241 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4242 ipp.ipp_fields |= IPPF_IFINDEX; 4243 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4244 } 4245 } 4246 break; 4247 case IPV6_VERSION: 4248 /* 4249 * IPv6 packets can only be received by applications 4250 * that are prepared to receive IPv6 addresses. 4251 * The IP fanout must ensure this. 4252 */ 4253 ASSERT(udp->udp_family == AF_INET6); 4254 4255 ip6h = (ip6_t *)rptr; 4256 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4257 4258 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4259 uint8_t nexthdrp; 4260 /* Look for ifindex information */ 4261 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4262 ip6i = (ip6i_t *)ip6h; 4263 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4264 goto tossit; 4265 4266 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4267 ASSERT(ip6i->ip6i_ifindex != 0); 4268 ipp.ipp_fields |= IPPF_IFINDEX; 4269 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4270 } 4271 rptr = (uchar_t *)&ip6i[1]; 4272 mp->b_rptr = rptr; 4273 if (rptr == mp->b_wptr) { 4274 mp1 = mp->b_cont; 4275 freeb(mp); 4276 mp = mp1; 4277 rptr = mp->b_rptr; 4278 } 4279 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4280 goto tossit; 4281 ip6h = (ip6_t *)rptr; 4282 mp_len = msgdsize(mp); 4283 } 4284 /* 4285 * Find any potentially interesting extension headers 4286 * as well as the length of the IPv6 + extension 4287 * headers. 4288 */ 4289 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4290 UDPH_SIZE; 4291 ASSERT(nexthdrp == IPPROTO_UDP); 4292 } else { 4293 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4294 ip6i = NULL; 4295 } 4296 break; 4297 default: 4298 ASSERT(0); 4299 } 4300 4301 /* 4302 * IP inspected the UDP header thus all of it must be in the mblk. 4303 * UDP length check is performed for IPv6 packets and IPv4 packets 4304 * without options to check if the size of the packet as specified 4305 * by the header is the same as the physical size of the packet. 4306 */ 4307 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4308 if ((MBLKL(mp) < hdr_length) || 4309 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4310 goto tossit; 4311 } 4312 4313 /* Walk past the headers. */ 4314 if (!udp->udp_rcvhdr) { 4315 mp->b_rptr = rptr + hdr_length; 4316 mp_len -= hdr_length; 4317 } 4318 4319 /* 4320 * This is the inbound data path. Packets are passed upstream as 4321 * T_UNITDATA_IND messages with full IP headers still attached. 4322 */ 4323 if (udp->udp_family == AF_INET) { 4324 sin_t *sin; 4325 4326 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4327 4328 /* 4329 * Normally only send up the address. 4330 * If IP_RECVDSTADDR is set we include the destination IP 4331 * address as an option. With IP_RECVOPTS we include all 4332 * the IP options. Only ip_rput_other() handles packets 4333 * that contain IP options. 4334 */ 4335 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4336 if (udp->udp_recvdstaddr) { 4337 udi_size += sizeof (struct T_opthdr) + 4338 sizeof (struct in_addr); 4339 UDP_STAT(udp_in_recvdstaddr); 4340 } 4341 4342 /* 4343 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4344 * space accordingly 4345 */ 4346 if (udp->udp_recvif && (pinfo != NULL) && 4347 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4348 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4349 UDP_STAT(udp_in_recvif); 4350 } 4351 4352 if (udp->udp_recvslla && (pinfo != NULL) && 4353 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4354 udi_size += sizeof (struct T_opthdr) + 4355 sizeof (struct sockaddr_dl); 4356 UDP_STAT(udp_in_recvslla); 4357 } 4358 4359 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4360 udi_size += sizeof (struct T_opthdr) + ucredsize; 4361 cpid = DB_CPID(mp); 4362 UDP_STAT(udp_in_recvucred); 4363 } 4364 /* 4365 * If IP_RECVTTL is set allocate the appropriate sized buffer 4366 */ 4367 if (udp->udp_recvttl) { 4368 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4369 UDP_STAT(udp_in_recvttl); 4370 } 4371 4372 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4373 4374 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4375 mp1 = allocb(udi_size, BPRI_MED); 4376 if (mp1 == NULL) { 4377 freemsg(mp); 4378 if (options_mp != NULL) 4379 freeb(options_mp); 4380 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4381 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4382 BUMP_MIB(&udp_mib, udpInErrors); 4383 return; 4384 } 4385 mp1->b_cont = mp; 4386 mp = mp1; 4387 mp->b_datap->db_type = M_PROTO; 4388 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4389 mp->b_wptr = (uchar_t *)tudi + udi_size; 4390 tudi->PRIM_type = T_UNITDATA_IND; 4391 tudi->SRC_length = sizeof (sin_t); 4392 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4393 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4394 sizeof (sin_t); 4395 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4396 tudi->OPT_length = udi_size; 4397 sin = (sin_t *)&tudi[1]; 4398 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4399 sin->sin_port = udpha->uha_src_port; 4400 sin->sin_family = udp->udp_family; 4401 *(uint32_t *)&sin->sin_zero[0] = 0; 4402 *(uint32_t *)&sin->sin_zero[4] = 0; 4403 4404 /* 4405 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4406 * IP_RECVTTL has been set. 4407 */ 4408 if (udi_size != 0) { 4409 /* 4410 * Copy in destination address before options to avoid 4411 * any padding issues. 4412 */ 4413 char *dstopt; 4414 4415 dstopt = (char *)&sin[1]; 4416 if (udp->udp_recvdstaddr) { 4417 struct T_opthdr *toh; 4418 ipaddr_t *dstptr; 4419 4420 toh = (struct T_opthdr *)dstopt; 4421 toh->level = IPPROTO_IP; 4422 toh->name = IP_RECVDSTADDR; 4423 toh->len = sizeof (struct T_opthdr) + 4424 sizeof (ipaddr_t); 4425 toh->status = 0; 4426 dstopt += sizeof (struct T_opthdr); 4427 dstptr = (ipaddr_t *)dstopt; 4428 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4429 dstopt += sizeof (ipaddr_t); 4430 udi_size -= toh->len; 4431 } 4432 4433 if (udp->udp_recvslla && (pinfo != NULL) && 4434 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4435 4436 struct T_opthdr *toh; 4437 struct sockaddr_dl *dstptr; 4438 4439 toh = (struct T_opthdr *)dstopt; 4440 toh->level = IPPROTO_IP; 4441 toh->name = IP_RECVSLLA; 4442 toh->len = sizeof (struct T_opthdr) + 4443 sizeof (struct sockaddr_dl); 4444 toh->status = 0; 4445 dstopt += sizeof (struct T_opthdr); 4446 dstptr = (struct sockaddr_dl *)dstopt; 4447 bcopy(&pinfo->in_pkt_slla, dstptr, 4448 sizeof (struct sockaddr_dl)); 4449 dstopt += sizeof (struct sockaddr_dl); 4450 udi_size -= toh->len; 4451 } 4452 4453 if (udp->udp_recvif && (pinfo != NULL) && 4454 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4455 4456 struct T_opthdr *toh; 4457 uint_t *dstptr; 4458 4459 toh = (struct T_opthdr *)dstopt; 4460 toh->level = IPPROTO_IP; 4461 toh->name = IP_RECVIF; 4462 toh->len = sizeof (struct T_opthdr) + 4463 sizeof (uint_t); 4464 toh->status = 0; 4465 dstopt += sizeof (struct T_opthdr); 4466 dstptr = (uint_t *)dstopt; 4467 *dstptr = pinfo->in_pkt_ifindex; 4468 dstopt += sizeof (uint_t); 4469 udi_size -= toh->len; 4470 } 4471 4472 if (cr != NULL) { 4473 struct T_opthdr *toh; 4474 4475 toh = (struct T_opthdr *)dstopt; 4476 toh->level = SOL_SOCKET; 4477 toh->name = SCM_UCRED; 4478 toh->len = sizeof (struct T_opthdr) + ucredsize; 4479 toh->status = 0; 4480 (void) cred2ucred(cr, cpid, &toh[1]); 4481 dstopt += toh->len; 4482 udi_size -= toh->len; 4483 } 4484 4485 if (udp->udp_recvttl) { 4486 struct T_opthdr *toh; 4487 uint8_t *dstptr; 4488 4489 toh = (struct T_opthdr *)dstopt; 4490 toh->level = IPPROTO_IP; 4491 toh->name = IP_RECVTTL; 4492 toh->len = sizeof (struct T_opthdr) + 4493 sizeof (uint8_t); 4494 toh->status = 0; 4495 dstopt += sizeof (struct T_opthdr); 4496 dstptr = (uint8_t *)dstopt; 4497 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4498 dstopt += sizeof (uint8_t); 4499 udi_size -= toh->len; 4500 } 4501 4502 /* Consumed all of allocated space */ 4503 ASSERT(udi_size == 0); 4504 } 4505 } else { 4506 sin6_t *sin6; 4507 4508 /* 4509 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4510 * 4511 * Normally we only send up the address. If receiving of any 4512 * optional receive side information is enabled, we also send 4513 * that up as options. 4514 * [ Only udp_rput_other() handles packets that contain IP 4515 * options so code to account for does not appear immediately 4516 * below but elsewhere ] 4517 */ 4518 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4519 4520 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4521 IPPF_RTHDR|IPPF_IFINDEX)) { 4522 if (udp->udp_ipv6_recvhopopts && 4523 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4524 udi_size += sizeof (struct T_opthdr) + 4525 ipp.ipp_hopoptslen; 4526 UDP_STAT(udp_in_recvhopopts); 4527 } 4528 if ((udp->udp_ipv6_recvdstopts || 4529 udp->udp_old_ipv6_recvdstopts) && 4530 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4531 udi_size += sizeof (struct T_opthdr) + 4532 ipp.ipp_dstoptslen; 4533 UDP_STAT(udp_in_recvdstopts); 4534 } 4535 if (((udp->udp_ipv6_recvdstopts && 4536 udp->udp_ipv6_recvrthdr && 4537 (ipp.ipp_fields & IPPF_RTHDR)) || 4538 udp->udp_ipv6_recvrthdrdstopts) && 4539 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4540 udi_size += sizeof (struct T_opthdr) + 4541 ipp.ipp_rtdstoptslen; 4542 UDP_STAT(udp_in_recvrtdstopts); 4543 } 4544 if (udp->udp_ipv6_recvrthdr && 4545 (ipp.ipp_fields & IPPF_RTHDR)) { 4546 udi_size += sizeof (struct T_opthdr) + 4547 ipp.ipp_rthdrlen; 4548 UDP_STAT(udp_in_recvrthdr); 4549 } 4550 if (udp->udp_ipv6_recvpktinfo && 4551 (ipp.ipp_fields & IPPF_IFINDEX)) { 4552 udi_size += sizeof (struct T_opthdr) + 4553 sizeof (struct in6_pktinfo); 4554 UDP_STAT(udp_in_recvpktinfo); 4555 } 4556 4557 } 4558 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4559 udi_size += sizeof (struct T_opthdr) + ucredsize; 4560 cpid = DB_CPID(mp); 4561 UDP_STAT(udp_in_recvucred); 4562 } 4563 4564 if (udp->udp_ipv6_recvhoplimit) { 4565 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4566 UDP_STAT(udp_in_recvhoplimit); 4567 } 4568 4569 if (udp->udp_ipv6_recvtclass) { 4570 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4571 UDP_STAT(udp_in_recvtclass); 4572 } 4573 4574 mp1 = allocb(udi_size, BPRI_MED); 4575 if (mp1 == NULL) { 4576 freemsg(mp); 4577 if (options_mp != NULL) 4578 freeb(options_mp); 4579 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4580 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4581 BUMP_MIB(&udp_mib, udpInErrors); 4582 return; 4583 } 4584 mp1->b_cont = mp; 4585 mp = mp1; 4586 mp->b_datap->db_type = M_PROTO; 4587 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4588 mp->b_wptr = (uchar_t *)tudi + udi_size; 4589 tudi->PRIM_type = T_UNITDATA_IND; 4590 tudi->SRC_length = sizeof (sin6_t); 4591 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4592 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4593 sizeof (sin6_t); 4594 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4595 tudi->OPT_length = udi_size; 4596 sin6 = (sin6_t *)&tudi[1]; 4597 if (ipversion == IPV4_VERSION) { 4598 in6_addr_t v6dst; 4599 4600 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4601 &sin6->sin6_addr); 4602 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4603 &v6dst); 4604 sin6->sin6_flowinfo = 0; 4605 sin6->sin6_scope_id = 0; 4606 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4607 connp->conn_zoneid); 4608 } else { 4609 sin6->sin6_addr = ip6h->ip6_src; 4610 /* No sin6_flowinfo per API */ 4611 sin6->sin6_flowinfo = 0; 4612 /* For link-scope source pass up scope id */ 4613 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4614 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4615 sin6->sin6_scope_id = ipp.ipp_ifindex; 4616 else 4617 sin6->sin6_scope_id = 0; 4618 sin6->__sin6_src_id = ip_srcid_find_addr( 4619 &ip6h->ip6_dst, connp->conn_zoneid); 4620 } 4621 sin6->sin6_port = udpha->uha_src_port; 4622 sin6->sin6_family = udp->udp_family; 4623 4624 if (udi_size != 0) { 4625 uchar_t *dstopt; 4626 4627 dstopt = (uchar_t *)&sin6[1]; 4628 if (udp->udp_ipv6_recvpktinfo && 4629 (ipp.ipp_fields & IPPF_IFINDEX)) { 4630 struct T_opthdr *toh; 4631 struct in6_pktinfo *pkti; 4632 4633 toh = (struct T_opthdr *)dstopt; 4634 toh->level = IPPROTO_IPV6; 4635 toh->name = IPV6_PKTINFO; 4636 toh->len = sizeof (struct T_opthdr) + 4637 sizeof (*pkti); 4638 toh->status = 0; 4639 dstopt += sizeof (struct T_opthdr); 4640 pkti = (struct in6_pktinfo *)dstopt; 4641 if (ipversion == IPV6_VERSION) 4642 pkti->ipi6_addr = ip6h->ip6_dst; 4643 else 4644 IN6_IPADDR_TO_V4MAPPED( 4645 ((ipha_t *)rptr)->ipha_dst, 4646 &pkti->ipi6_addr); 4647 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4648 dstopt += sizeof (*pkti); 4649 udi_size -= toh->len; 4650 } 4651 if (udp->udp_ipv6_recvhoplimit) { 4652 struct T_opthdr *toh; 4653 4654 toh = (struct T_opthdr *)dstopt; 4655 toh->level = IPPROTO_IPV6; 4656 toh->name = IPV6_HOPLIMIT; 4657 toh->len = sizeof (struct T_opthdr) + 4658 sizeof (uint_t); 4659 toh->status = 0; 4660 dstopt += sizeof (struct T_opthdr); 4661 if (ipversion == IPV6_VERSION) 4662 *(uint_t *)dstopt = ip6h->ip6_hops; 4663 else 4664 *(uint_t *)dstopt = 4665 ((ipha_t *)rptr)->ipha_ttl; 4666 dstopt += sizeof (uint_t); 4667 udi_size -= toh->len; 4668 } 4669 if (udp->udp_ipv6_recvtclass) { 4670 struct T_opthdr *toh; 4671 4672 toh = (struct T_opthdr *)dstopt; 4673 toh->level = IPPROTO_IPV6; 4674 toh->name = IPV6_TCLASS; 4675 toh->len = sizeof (struct T_opthdr) + 4676 sizeof (uint_t); 4677 toh->status = 0; 4678 dstopt += sizeof (struct T_opthdr); 4679 if (ipversion == IPV6_VERSION) { 4680 *(uint_t *)dstopt = 4681 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4682 } else { 4683 ipha_t *ipha = (ipha_t *)rptr; 4684 *(uint_t *)dstopt = 4685 ipha->ipha_type_of_service; 4686 } 4687 dstopt += sizeof (uint_t); 4688 udi_size -= toh->len; 4689 } 4690 if (udp->udp_ipv6_recvhopopts && 4691 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4692 struct T_opthdr *toh; 4693 4694 toh = (struct T_opthdr *)dstopt; 4695 toh->level = IPPROTO_IPV6; 4696 toh->name = IPV6_HOPOPTS; 4697 toh->len = sizeof (struct T_opthdr) + 4698 ipp.ipp_hopoptslen; 4699 toh->status = 0; 4700 dstopt += sizeof (struct T_opthdr); 4701 bcopy(ipp.ipp_hopopts, dstopt, 4702 ipp.ipp_hopoptslen); 4703 dstopt += ipp.ipp_hopoptslen; 4704 udi_size -= toh->len; 4705 } 4706 if (udp->udp_ipv6_recvdstopts && 4707 udp->udp_ipv6_recvrthdr && 4708 (ipp.ipp_fields & IPPF_RTHDR) && 4709 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4710 struct T_opthdr *toh; 4711 4712 toh = (struct T_opthdr *)dstopt; 4713 toh->level = IPPROTO_IPV6; 4714 toh->name = IPV6_DSTOPTS; 4715 toh->len = sizeof (struct T_opthdr) + 4716 ipp.ipp_rtdstoptslen; 4717 toh->status = 0; 4718 dstopt += sizeof (struct T_opthdr); 4719 bcopy(ipp.ipp_rtdstopts, dstopt, 4720 ipp.ipp_rtdstoptslen); 4721 dstopt += ipp.ipp_rtdstoptslen; 4722 udi_size -= toh->len; 4723 } 4724 if (udp->udp_ipv6_recvrthdr && 4725 (ipp.ipp_fields & IPPF_RTHDR)) { 4726 struct T_opthdr *toh; 4727 4728 toh = (struct T_opthdr *)dstopt; 4729 toh->level = IPPROTO_IPV6; 4730 toh->name = IPV6_RTHDR; 4731 toh->len = sizeof (struct T_opthdr) + 4732 ipp.ipp_rthdrlen; 4733 toh->status = 0; 4734 dstopt += sizeof (struct T_opthdr); 4735 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4736 dstopt += ipp.ipp_rthdrlen; 4737 udi_size -= toh->len; 4738 } 4739 if (udp->udp_ipv6_recvdstopts && 4740 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4741 struct T_opthdr *toh; 4742 4743 toh = (struct T_opthdr *)dstopt; 4744 toh->level = IPPROTO_IPV6; 4745 toh->name = IPV6_DSTOPTS; 4746 toh->len = sizeof (struct T_opthdr) + 4747 ipp.ipp_dstoptslen; 4748 toh->status = 0; 4749 dstopt += sizeof (struct T_opthdr); 4750 bcopy(ipp.ipp_dstopts, dstopt, 4751 ipp.ipp_dstoptslen); 4752 dstopt += ipp.ipp_dstoptslen; 4753 udi_size -= toh->len; 4754 } 4755 4756 if (cr != NULL) { 4757 struct T_opthdr *toh; 4758 4759 toh = (struct T_opthdr *)dstopt; 4760 toh->level = SOL_SOCKET; 4761 toh->name = SCM_UCRED; 4762 toh->len = sizeof (struct T_opthdr) + ucredsize; 4763 toh->status = 0; 4764 (void) cred2ucred(cr, cpid, &toh[1]); 4765 dstopt += toh->len; 4766 udi_size -= toh->len; 4767 } 4768 /* Consumed all of allocated space */ 4769 ASSERT(udi_size == 0); 4770 } 4771 #undef sin6 4772 /* No IP_RECVDSTADDR for IPv6. */ 4773 } 4774 4775 BUMP_MIB(&udp_mib, udpInDatagrams); 4776 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4777 "udp_rput_end: q %p (%S)", q, "end"); 4778 if (options_mp != NULL) 4779 freeb(options_mp); 4780 4781 if (udp->udp_direct_sockfs) { 4782 /* 4783 * There is nothing above us except for the stream head; 4784 * use the read-side synchronous stream interface in 4785 * order to reduce the time spent in interrupt thread. 4786 */ 4787 ASSERT(udp->udp_issocket); 4788 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4789 } else { 4790 /* 4791 * Use regular STREAMS interface to pass data upstream 4792 * if this is not a socket endpoint, or if we have 4793 * switched over to the slow mode due to sockmod being 4794 * popped or a module being pushed on top of us. 4795 */ 4796 putnext(UDP_RD(q), mp); 4797 } 4798 return; 4799 4800 tossit: 4801 freemsg(mp); 4802 if (options_mp != NULL) 4803 freeb(options_mp); 4804 BUMP_MIB(&udp_mib, udpInErrors); 4805 } 4806 4807 void 4808 udp_conn_recv(conn_t *connp, mblk_t *mp) 4809 { 4810 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4811 } 4812 4813 /* ARGSUSED */ 4814 static void 4815 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4816 { 4817 udp_input((conn_t *)arg, mp); 4818 _UDP_EXIT((conn_t *)arg); 4819 } 4820 4821 /* 4822 * Process non-M_DATA messages as well as M_DATA messages that requires 4823 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4824 */ 4825 static void 4826 udp_rput_other(queue_t *q, mblk_t *mp) 4827 { 4828 struct T_unitdata_ind *tudi; 4829 mblk_t *mp1; 4830 uchar_t *rptr; 4831 uchar_t *new_rptr; 4832 int hdr_length; 4833 int udi_size; /* Size of T_unitdata_ind */ 4834 int opt_len; /* Length of IP options */ 4835 sin_t *sin; 4836 struct T_error_ack *tea; 4837 mblk_t *options_mp = NULL; 4838 in_pktinfo_t *pinfo; 4839 boolean_t recv_on = B_FALSE; 4840 cred_t *cr = NULL; 4841 udp_t *udp = Q_TO_UDP(q); 4842 pid_t cpid; 4843 4844 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4845 "udp_rput_other: q %p mp %p", q, mp); 4846 4847 ASSERT(OK_32PTR(mp->b_rptr)); 4848 rptr = mp->b_rptr; 4849 4850 switch (mp->b_datap->db_type) { 4851 case M_CTL: 4852 /* 4853 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4854 */ 4855 recv_on = B_TRUE; 4856 options_mp = mp; 4857 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4858 4859 /* 4860 * The actual data is in mp->b_cont 4861 */ 4862 mp = mp->b_cont; 4863 ASSERT(OK_32PTR(mp->b_rptr)); 4864 rptr = mp->b_rptr; 4865 break; 4866 case M_DATA: 4867 /* 4868 * M_DATA messages contain IPv4 datagrams. They are handled 4869 * after this switch. 4870 */ 4871 break; 4872 case M_PROTO: 4873 case M_PCPROTO: 4874 /* M_PROTO messages contain some type of TPI message. */ 4875 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4876 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4877 freemsg(mp); 4878 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4879 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4880 return; 4881 } 4882 tea = (struct T_error_ack *)rptr; 4883 4884 switch (tea->PRIM_type) { 4885 case T_ERROR_ACK: 4886 switch (tea->ERROR_prim) { 4887 case O_T_BIND_REQ: 4888 case T_BIND_REQ: { 4889 /* 4890 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4891 * clear out the associated port and source 4892 * address before passing the message 4893 * upstream. If this was caused by a T_CONN_REQ 4894 * revert back to bound state. 4895 */ 4896 udp_fanout_t *udpf; 4897 4898 udpf = &udp_bind_fanout[ 4899 UDP_BIND_HASH(udp->udp_port)]; 4900 mutex_enter(&udpf->uf_lock); 4901 if (udp->udp_state == TS_DATA_XFER) { 4902 /* Connect failed */ 4903 tea->ERROR_prim = T_CONN_REQ; 4904 /* Revert back to the bound source */ 4905 udp->udp_v6src = udp->udp_bound_v6src; 4906 udp->udp_state = TS_IDLE; 4907 mutex_exit(&udpf->uf_lock); 4908 if (udp->udp_family == AF_INET6) 4909 (void) udp_build_hdrs(q, udp); 4910 break; 4911 } 4912 4913 if (udp->udp_discon_pending) { 4914 tea->ERROR_prim = T_DISCON_REQ; 4915 udp->udp_discon_pending = 0; 4916 } 4917 V6_SET_ZERO(udp->udp_v6src); 4918 V6_SET_ZERO(udp->udp_bound_v6src); 4919 udp->udp_state = TS_UNBND; 4920 udp_bind_hash_remove(udp, B_TRUE); 4921 udp->udp_port = 0; 4922 mutex_exit(&udpf->uf_lock); 4923 if (udp->udp_family == AF_INET6) 4924 (void) udp_build_hdrs(q, udp); 4925 break; 4926 } 4927 default: 4928 break; 4929 } 4930 break; 4931 case T_BIND_ACK: 4932 udp_rput_bind_ack(q, mp); 4933 return; 4934 4935 case T_OPTMGMT_ACK: 4936 case T_OK_ACK: 4937 break; 4938 default: 4939 freemsg(mp); 4940 return; 4941 } 4942 putnext(UDP_RD(q), mp); 4943 return; 4944 } 4945 4946 /* 4947 * This is the inbound data path. 4948 * First, we make sure the data contains both IP and UDP headers. 4949 * 4950 * This handle IPv4 packets for only AF_INET sockets. 4951 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4952 * is no need saving the options. 4953 */ 4954 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4955 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4956 if (mp->b_wptr - rptr < hdr_length) { 4957 if (!pullupmsg(mp, hdr_length)) { 4958 freemsg(mp); 4959 if (options_mp != NULL) 4960 freeb(options_mp); 4961 BUMP_MIB(&udp_mib, udpInErrors); 4962 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4963 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 4964 BUMP_MIB(&udp_mib, udpInErrors); 4965 return; 4966 } 4967 rptr = mp->b_rptr; 4968 } 4969 /* Walk past the headers. */ 4970 new_rptr = rptr + hdr_length; 4971 if (!udp->udp_rcvhdr) 4972 mp->b_rptr = new_rptr; 4973 4974 /* Save the options if any */ 4975 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4976 if (opt_len > 0) { 4977 if (opt_len > udp->udp_ip_rcv_options_len) { 4978 if (udp->udp_ip_rcv_options_len) 4979 mi_free((char *)udp->udp_ip_rcv_options); 4980 udp->udp_ip_rcv_options_len = 0; 4981 udp->udp_ip_rcv_options = 4982 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4983 if (udp->udp_ip_rcv_options) 4984 udp->udp_ip_rcv_options_len = opt_len; 4985 } 4986 if (udp->udp_ip_rcv_options_len) { 4987 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 4988 udp->udp_ip_rcv_options, opt_len); 4989 /* Adjust length if we are resusing the space */ 4990 udp->udp_ip_rcv_options_len = opt_len; 4991 } 4992 } else if (udp->udp_ip_rcv_options_len) { 4993 mi_free((char *)udp->udp_ip_rcv_options); 4994 udp->udp_ip_rcv_options = NULL; 4995 udp->udp_ip_rcv_options_len = 0; 4996 } 4997 4998 /* 4999 * Normally only send up the address. 5000 * If IP_RECVDSTADDR is set we include the destination IP 5001 * address as an option. With IP_RECVOPTS we include all 5002 * the IP options. 5003 */ 5004 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5005 if (udp->udp_recvdstaddr) { 5006 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5007 UDP_STAT(udp_in_recvdstaddr); 5008 } 5009 if (udp->udp_recvopts && opt_len > 0) { 5010 udi_size += sizeof (struct T_opthdr) + opt_len; 5011 UDP_STAT(udp_in_recvopts); 5012 } 5013 5014 /* 5015 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5016 * space accordingly 5017 */ 5018 if (udp->udp_recvif && recv_on && 5019 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5020 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5021 UDP_STAT(udp_in_recvif); 5022 } 5023 5024 if (udp->udp_recvslla && recv_on && 5025 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5026 udi_size += sizeof (struct T_opthdr) + 5027 sizeof (struct sockaddr_dl); 5028 UDP_STAT(udp_in_recvslla); 5029 } 5030 5031 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5032 udi_size += sizeof (struct T_opthdr) + ucredsize; 5033 cpid = DB_CPID(mp); 5034 UDP_STAT(udp_in_recvucred); 5035 } 5036 /* 5037 * If IP_RECVTTL is set allocate the appropriate sized buffer 5038 */ 5039 if (udp->udp_recvttl) { 5040 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5041 UDP_STAT(udp_in_recvttl); 5042 } 5043 5044 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5045 mp1 = allocb(udi_size, BPRI_MED); 5046 if (mp1 == NULL) { 5047 freemsg(mp); 5048 if (options_mp != NULL) 5049 freeb(options_mp); 5050 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5051 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5052 BUMP_MIB(&udp_mib, udpInErrors); 5053 return; 5054 } 5055 mp1->b_cont = mp; 5056 mp = mp1; 5057 mp->b_datap->db_type = M_PROTO; 5058 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5059 mp->b_wptr = (uchar_t *)tudi + udi_size; 5060 tudi->PRIM_type = T_UNITDATA_IND; 5061 tudi->SRC_length = sizeof (sin_t); 5062 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5063 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5064 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5065 tudi->OPT_length = udi_size; 5066 5067 sin = (sin_t *)&tudi[1]; 5068 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5069 sin->sin_port = ((in_port_t *) 5070 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5071 sin->sin_family = AF_INET; 5072 *(uint32_t *)&sin->sin_zero[0] = 0; 5073 *(uint32_t *)&sin->sin_zero[4] = 0; 5074 5075 /* 5076 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5077 * IP_RECVTTL has been set. 5078 */ 5079 if (udi_size != 0) { 5080 /* 5081 * Copy in destination address before options to avoid any 5082 * padding issues. 5083 */ 5084 char *dstopt; 5085 5086 dstopt = (char *)&sin[1]; 5087 if (udp->udp_recvdstaddr) { 5088 struct T_opthdr *toh; 5089 ipaddr_t *dstptr; 5090 5091 toh = (struct T_opthdr *)dstopt; 5092 toh->level = IPPROTO_IP; 5093 toh->name = IP_RECVDSTADDR; 5094 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5095 toh->status = 0; 5096 dstopt += sizeof (struct T_opthdr); 5097 dstptr = (ipaddr_t *)dstopt; 5098 *dstptr = (((ipaddr_t *)rptr)[4]); 5099 dstopt += sizeof (ipaddr_t); 5100 udi_size -= toh->len; 5101 } 5102 if (udp->udp_recvopts && udi_size != 0) { 5103 struct T_opthdr *toh; 5104 5105 toh = (struct T_opthdr *)dstopt; 5106 toh->level = IPPROTO_IP; 5107 toh->name = IP_RECVOPTS; 5108 toh->len = sizeof (struct T_opthdr) + opt_len; 5109 toh->status = 0; 5110 dstopt += sizeof (struct T_opthdr); 5111 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5112 dstopt += opt_len; 5113 udi_size -= toh->len; 5114 } 5115 5116 if (udp->udp_recvslla && recv_on && 5117 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5118 5119 struct T_opthdr *toh; 5120 struct sockaddr_dl *dstptr; 5121 5122 toh = (struct T_opthdr *)dstopt; 5123 toh->level = IPPROTO_IP; 5124 toh->name = IP_RECVSLLA; 5125 toh->len = sizeof (struct T_opthdr) + 5126 sizeof (struct sockaddr_dl); 5127 toh->status = 0; 5128 dstopt += sizeof (struct T_opthdr); 5129 dstptr = (struct sockaddr_dl *)dstopt; 5130 bcopy(&pinfo->in_pkt_slla, dstptr, 5131 sizeof (struct sockaddr_dl)); 5132 dstopt += sizeof (struct sockaddr_dl); 5133 udi_size -= toh->len; 5134 } 5135 5136 if (udp->udp_recvif && recv_on && 5137 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5138 5139 struct T_opthdr *toh; 5140 uint_t *dstptr; 5141 5142 toh = (struct T_opthdr *)dstopt; 5143 toh->level = IPPROTO_IP; 5144 toh->name = IP_RECVIF; 5145 toh->len = sizeof (struct T_opthdr) + 5146 sizeof (uint_t); 5147 toh->status = 0; 5148 dstopt += sizeof (struct T_opthdr); 5149 dstptr = (uint_t *)dstopt; 5150 *dstptr = pinfo->in_pkt_ifindex; 5151 dstopt += sizeof (uint_t); 5152 udi_size -= toh->len; 5153 } 5154 5155 if (cr != NULL) { 5156 struct T_opthdr *toh; 5157 5158 toh = (struct T_opthdr *)dstopt; 5159 toh->level = SOL_SOCKET; 5160 toh->name = SCM_UCRED; 5161 toh->len = sizeof (struct T_opthdr) + ucredsize; 5162 toh->status = 0; 5163 (void) cred2ucred(cr, cpid, &toh[1]); 5164 dstopt += toh->len; 5165 udi_size -= toh->len; 5166 } 5167 5168 if (udp->udp_recvttl) { 5169 struct T_opthdr *toh; 5170 uint8_t *dstptr; 5171 5172 toh = (struct T_opthdr *)dstopt; 5173 toh->level = IPPROTO_IP; 5174 toh->name = IP_RECVTTL; 5175 toh->len = sizeof (struct T_opthdr) + 5176 sizeof (uint8_t); 5177 toh->status = 0; 5178 dstopt += sizeof (struct T_opthdr); 5179 dstptr = (uint8_t *)dstopt; 5180 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5181 dstopt += sizeof (uint8_t); 5182 udi_size -= toh->len; 5183 } 5184 5185 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5186 } 5187 BUMP_MIB(&udp_mib, udpInDatagrams); 5188 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5189 "udp_rput_other_end: q %p (%S)", q, "end"); 5190 if (options_mp != NULL) 5191 freeb(options_mp); 5192 5193 if (udp->udp_direct_sockfs) { 5194 /* 5195 * There is nothing above us except for the stream head; 5196 * use the read-side synchronous stream interface in 5197 * order to reduce the time spent in interrupt thread. 5198 */ 5199 ASSERT(udp->udp_issocket); 5200 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5201 } else { 5202 /* 5203 * Use regular STREAMS interface to pass data upstream 5204 * if this is not a socket endpoint, or if we have 5205 * switched over to the slow mode due to sockmod being 5206 * popped or a module being pushed on top of us. 5207 */ 5208 putnext(UDP_RD(q), mp); 5209 } 5210 } 5211 5212 /* ARGSUSED */ 5213 static void 5214 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5215 { 5216 conn_t *connp = arg; 5217 5218 udp_rput_other(connp->conn_rq, mp); 5219 udp_exit(connp); 5220 } 5221 5222 /* 5223 * Process a T_BIND_ACK 5224 */ 5225 static void 5226 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5227 { 5228 udp_t *udp = Q_TO_UDP(q); 5229 mblk_t *mp1; 5230 ire_t *ire; 5231 struct T_bind_ack *tba; 5232 uchar_t *addrp; 5233 ipa_conn_t *ac; 5234 ipa6_conn_t *ac6; 5235 5236 if (udp->udp_discon_pending) 5237 udp->udp_discon_pending = 0; 5238 5239 /* 5240 * If a broadcast/multicast address was bound set 5241 * the source address to 0. 5242 * This ensures no datagrams with broadcast address 5243 * as source address are emitted (which would violate 5244 * RFC1122 - Hosts requirements) 5245 * 5246 * Note that when connecting the returned IRE is 5247 * for the destination address and we only perform 5248 * the broadcast check for the source address (it 5249 * is OK to connect to a broadcast/multicast address.) 5250 */ 5251 mp1 = mp->b_cont; 5252 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5253 ire = (ire_t *)mp1->b_rptr; 5254 5255 /* 5256 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5257 * local address. 5258 */ 5259 if (ire->ire_type == IRE_BROADCAST && 5260 udp->udp_state != TS_DATA_XFER) { 5261 /* This was just a local bind to a broadcast addr */ 5262 V6_SET_ZERO(udp->udp_v6src); 5263 if (udp->udp_family == AF_INET6) 5264 (void) udp_build_hdrs(q, udp); 5265 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5266 /* 5267 * Local address not yet set - pick it from the 5268 * T_bind_ack 5269 */ 5270 tba = (struct T_bind_ack *)mp->b_rptr; 5271 addrp = &mp->b_rptr[tba->ADDR_offset]; 5272 switch (udp->udp_family) { 5273 case AF_INET: 5274 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5275 ac = (ipa_conn_t *)addrp; 5276 } else { 5277 ASSERT(tba->ADDR_length == 5278 sizeof (ipa_conn_x_t)); 5279 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5280 } 5281 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5282 &udp->udp_v6src); 5283 break; 5284 case AF_INET6: 5285 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5286 ac6 = (ipa6_conn_t *)addrp; 5287 } else { 5288 ASSERT(tba->ADDR_length == 5289 sizeof (ipa6_conn_x_t)); 5290 ac6 = &((ipa6_conn_x_t *) 5291 addrp)->ac6x_conn; 5292 } 5293 udp->udp_v6src = ac6->ac6_laddr; 5294 (void) udp_build_hdrs(q, udp); 5295 break; 5296 } 5297 } 5298 mp1 = mp1->b_cont; 5299 } 5300 /* 5301 * Look for one or more appended ACK message added by 5302 * udp_connect or udp_disconnect. 5303 * If none found just send up the T_BIND_ACK. 5304 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5305 * udp_disconnect has appended a T_OK_ACK. 5306 */ 5307 if (mp1 != NULL) { 5308 if (mp->b_cont == mp1) 5309 mp->b_cont = NULL; 5310 else { 5311 ASSERT(mp->b_cont->b_cont == mp1); 5312 mp->b_cont->b_cont = NULL; 5313 } 5314 freemsg(mp); 5315 mp = mp1; 5316 while (mp != NULL) { 5317 mp1 = mp->b_cont; 5318 mp->b_cont = NULL; 5319 putnext(UDP_RD(q), mp); 5320 mp = mp1; 5321 } 5322 return; 5323 } 5324 freemsg(mp->b_cont); 5325 mp->b_cont = NULL; 5326 putnext(UDP_RD(q), mp); 5327 } 5328 5329 /* 5330 * return SNMP stuff in buffer in mpdata 5331 */ 5332 int 5333 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5334 { 5335 mblk_t *mpdata; 5336 mblk_t *mp_conn_ctl; 5337 mblk_t *mp6_conn_ctl; 5338 mblk_t *mp_conn_data; 5339 mblk_t *mp6_conn_data; 5340 mblk_t *mp_conn_tail = NULL; 5341 mblk_t *mp6_conn_tail = NULL; 5342 struct opthdr *optp; 5343 mib2_udpEntry_t ude; 5344 mib2_udp6Entry_t ude6; 5345 int state; 5346 zoneid_t zoneid; 5347 int i; 5348 connf_t *connfp; 5349 conn_t *connp = Q_TO_CONN(q); 5350 udp_t *udp = connp->conn_udp; 5351 5352 if (mpctl == NULL || 5353 (mpdata = mpctl->b_cont) == NULL || 5354 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5355 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5356 freemsg(mp_conn_ctl); 5357 return (0); 5358 } 5359 5360 mp_conn_data = mp_conn_ctl->b_cont; 5361 mp6_conn_data = mp6_conn_ctl->b_cont; 5362 5363 zoneid = connp->conn_zoneid; 5364 5365 /* fixed length structure for IPv4 and IPv6 counters */ 5366 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5367 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5368 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5369 optp->level = MIB2_UDP; 5370 optp->name = 0; 5371 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5372 optp->len = msgdsize(mpdata); 5373 qreply(q, mpctl); 5374 5375 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5376 connfp = &ipcl_globalhash_fanout[i]; 5377 connp = NULL; 5378 5379 while ((connp = ipcl_get_next_conn(connfp, connp, 5380 IPCL_UDP))) { 5381 udp = connp->conn_udp; 5382 if (zoneid != connp->conn_zoneid) 5383 continue; 5384 5385 /* 5386 * Note that the port numbers are sent in 5387 * host byte order 5388 */ 5389 5390 if (udp->udp_state == TS_UNBND) 5391 state = MIB2_UDP_unbound; 5392 else if (udp->udp_state == TS_IDLE) 5393 state = MIB2_UDP_idle; 5394 else if (udp->udp_state == TS_DATA_XFER) 5395 state = MIB2_UDP_connected; 5396 else 5397 state = MIB2_UDP_unknown; 5398 5399 /* 5400 * Create an IPv4 table entry for IPv4 entries and also 5401 * any IPv6 entries which are bound to in6addr_any 5402 * (i.e. anything a IPv4 peer could connect/send to). 5403 */ 5404 if (udp->udp_ipversion == IPV4_VERSION || 5405 (udp->udp_state <= TS_IDLE && 5406 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5407 ude.udpEntryInfo.ue_state = state; 5408 /* 5409 * If in6addr_any this will set it to 5410 * INADDR_ANY 5411 */ 5412 ude.udpLocalAddress = 5413 V4_PART_OF_V6(udp->udp_v6src); 5414 ude.udpLocalPort = ntohs(udp->udp_port); 5415 if (udp->udp_state == TS_DATA_XFER) { 5416 /* 5417 * Can potentially get here for 5418 * v6 socket if another process 5419 * (say, ping) has just done a 5420 * sendto(), changing the state 5421 * from the TS_IDLE above to 5422 * TS_DATA_XFER by the time we hit 5423 * this part of the code. 5424 */ 5425 ude.udpEntryInfo.ue_RemoteAddress = 5426 V4_PART_OF_V6(udp->udp_v6dst); 5427 ude.udpEntryInfo.ue_RemotePort = 5428 ntohs(udp->udp_dstport); 5429 } else { 5430 ude.udpEntryInfo.ue_RemoteAddress = 0; 5431 ude.udpEntryInfo.ue_RemotePort = 0; 5432 } 5433 (void) snmp_append_data2(mp_conn_data, 5434 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5435 } 5436 if (udp->udp_ipversion == IPV6_VERSION) { 5437 ude6.udp6EntryInfo.ue_state = state; 5438 ude6.udp6LocalAddress = udp->udp_v6src; 5439 ude6.udp6LocalPort = ntohs(udp->udp_port); 5440 ude6.udp6IfIndex = udp->udp_bound_if; 5441 if (udp->udp_state == TS_DATA_XFER) { 5442 ude6.udp6EntryInfo.ue_RemoteAddress = 5443 udp->udp_v6dst; 5444 ude6.udp6EntryInfo.ue_RemotePort = 5445 ntohs(udp->udp_dstport); 5446 } else { 5447 ude6.udp6EntryInfo.ue_RemoteAddress = 5448 sin6_null.sin6_addr; 5449 ude6.udp6EntryInfo.ue_RemotePort = 0; 5450 } 5451 (void) snmp_append_data2(mp6_conn_data, 5452 &mp6_conn_tail, (char *)&ude6, 5453 sizeof (ude6)); 5454 } 5455 } 5456 } 5457 5458 /* IPv4 UDP endpoints */ 5459 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5460 sizeof (struct T_optmgmt_ack)]; 5461 optp->level = MIB2_UDP; 5462 optp->name = MIB2_UDP_ENTRY; 5463 optp->len = msgdsize(mp_conn_data); 5464 qreply(q, mp_conn_ctl); 5465 5466 /* IPv6 UDP endpoints */ 5467 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5468 sizeof (struct T_optmgmt_ack)]; 5469 optp->level = MIB2_UDP6; 5470 optp->name = MIB2_UDP6_ENTRY; 5471 optp->len = msgdsize(mp6_conn_data); 5472 qreply(q, mp6_conn_ctl); 5473 5474 return (1); 5475 } 5476 5477 /* 5478 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5479 * NOTE: Per MIB-II, UDP has no writable data. 5480 * TODO: If this ever actually tries to set anything, it needs to be 5481 * to do the appropriate locking. 5482 */ 5483 /* ARGSUSED */ 5484 int 5485 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5486 uchar_t *ptr, int len) 5487 { 5488 switch (level) { 5489 case MIB2_UDP: 5490 return (0); 5491 default: 5492 return (1); 5493 } 5494 } 5495 5496 static void 5497 udp_report_item(mblk_t *mp, udp_t *udp) 5498 { 5499 char *state; 5500 char addrbuf1[INET6_ADDRSTRLEN]; 5501 char addrbuf2[INET6_ADDRSTRLEN]; 5502 uint_t print_len, buf_len; 5503 5504 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5505 ASSERT(buf_len >= 0); 5506 if (buf_len == 0) 5507 return; 5508 5509 if (udp->udp_state == TS_UNBND) 5510 state = "UNBOUND"; 5511 else if (udp->udp_state == TS_IDLE) 5512 state = "IDLE"; 5513 else if (udp->udp_state == TS_DATA_XFER) 5514 state = "CONNECTED"; 5515 else 5516 state = "UnkState"; 5517 print_len = snprintf((char *)mp->b_wptr, buf_len, 5518 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5519 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5520 inet_ntop(AF_INET6, &udp->udp_v6src, 5521 addrbuf1, sizeof (addrbuf1)), 5522 inet_ntop(AF_INET6, &udp->udp_v6dst, 5523 addrbuf2, sizeof (addrbuf2)), 5524 ntohs(udp->udp_dstport), state); 5525 if (print_len < buf_len) { 5526 mp->b_wptr += print_len; 5527 } else { 5528 mp->b_wptr += buf_len; 5529 } 5530 } 5531 5532 /* Report for ndd "udp_status" */ 5533 /* ARGSUSED */ 5534 static int 5535 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5536 { 5537 zoneid_t zoneid; 5538 connf_t *connfp; 5539 conn_t *connp = Q_TO_CONN(q); 5540 udp_t *udp = connp->conn_udp; 5541 int i; 5542 5543 /* 5544 * Because of the ndd constraint, at most we can have 64K buffer 5545 * to put in all UDP info. So to be more efficient, just 5546 * allocate a 64K buffer here, assuming we need that large buffer. 5547 * This may be a problem as any user can read udp_status. Therefore 5548 * we limit the rate of doing this using udp_ndd_get_info_interval. 5549 * This should be OK as normal users should not do this too often. 5550 */ 5551 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5552 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5553 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5554 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5555 return (0); 5556 } 5557 } 5558 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5559 /* The following may work even if we cannot get a large buf. */ 5560 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5561 return (0); 5562 } 5563 (void) mi_mpprintf(mp, 5564 "UDP " MI_COL_HDRPAD_STR 5565 /* 12345678[89ABCDEF] */ 5566 " zone lport src addr dest addr port state"); 5567 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5568 5569 zoneid = connp->conn_zoneid; 5570 5571 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5572 connfp = &ipcl_globalhash_fanout[i]; 5573 connp = NULL; 5574 5575 while ((connp = ipcl_get_next_conn(connfp, connp, 5576 IPCL_UDP))) { 5577 udp = connp->conn_udp; 5578 if (zoneid != GLOBAL_ZONEID && 5579 zoneid != connp->conn_zoneid) 5580 continue; 5581 5582 udp_report_item(mp->b_cont, udp); 5583 } 5584 } 5585 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5586 return (0); 5587 } 5588 5589 /* 5590 * This routine creates a T_UDERROR_IND message and passes it upstream. 5591 * The address and options are copied from the T_UNITDATA_REQ message 5592 * passed in mp. This message is freed. 5593 */ 5594 static void 5595 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5596 t_scalar_t err) 5597 { 5598 struct T_unitdata_req *tudr; 5599 mblk_t *mp1; 5600 uchar_t *optaddr; 5601 t_scalar_t optlen; 5602 5603 if (DB_TYPE(mp) == M_DATA) { 5604 ASSERT(destaddr != NULL && destlen != 0); 5605 optaddr = NULL; 5606 optlen = 0; 5607 } else { 5608 if ((mp->b_wptr < mp->b_rptr) || 5609 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5610 goto done; 5611 } 5612 tudr = (struct T_unitdata_req *)mp->b_rptr; 5613 destaddr = mp->b_rptr + tudr->DEST_offset; 5614 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5615 destaddr + tudr->DEST_length < mp->b_rptr || 5616 destaddr + tudr->DEST_length > mp->b_wptr) { 5617 goto done; 5618 } 5619 optaddr = mp->b_rptr + tudr->OPT_offset; 5620 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5621 optaddr + tudr->OPT_length < mp->b_rptr || 5622 optaddr + tudr->OPT_length > mp->b_wptr) { 5623 goto done; 5624 } 5625 destlen = tudr->DEST_length; 5626 optlen = tudr->OPT_length; 5627 } 5628 5629 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5630 (char *)optaddr, optlen, err); 5631 if (mp1 != NULL) 5632 putnext(UDP_RD(q), mp1); 5633 5634 done: 5635 freemsg(mp); 5636 } 5637 5638 /* 5639 * This routine removes a port number association from a stream. It 5640 * is called by udp_wput to handle T_UNBIND_REQ messages. 5641 */ 5642 static void 5643 udp_unbind(queue_t *q, mblk_t *mp) 5644 { 5645 udp_t *udp = Q_TO_UDP(q); 5646 5647 /* If a bind has not been done, we can't unbind. */ 5648 if (udp->udp_state == TS_UNBND) { 5649 udp_err_ack(q, mp, TOUTSTATE, 0); 5650 return; 5651 } 5652 if (cl_inet_unbind != NULL) { 5653 /* 5654 * Running in cluster mode - register unbind information 5655 */ 5656 if (udp->udp_ipversion == IPV4_VERSION) { 5657 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5658 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5659 (in_port_t)udp->udp_port); 5660 } else { 5661 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5662 (uint8_t *)&(udp->udp_v6src), 5663 (in_port_t)udp->udp_port); 5664 } 5665 } 5666 5667 udp_bind_hash_remove(udp, B_FALSE); 5668 V6_SET_ZERO(udp->udp_v6src); 5669 V6_SET_ZERO(udp->udp_bound_v6src); 5670 udp->udp_port = 0; 5671 udp->udp_state = TS_UNBND; 5672 5673 if (udp->udp_family == AF_INET6) { 5674 int error; 5675 5676 /* Rebuild the header template */ 5677 error = udp_build_hdrs(q, udp); 5678 if (error != 0) { 5679 udp_err_ack(q, mp, TSYSERR, error); 5680 return; 5681 } 5682 } 5683 /* 5684 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5685 * and therefore ip_unbind must never return NULL. 5686 */ 5687 mp = ip_unbind(q, mp); 5688 ASSERT(mp != NULL); 5689 putnext(UDP_RD(q), mp); 5690 } 5691 5692 /* 5693 * Don't let port fall into the privileged range. 5694 * Since the extra priviledged ports can be arbitrary we also 5695 * ensure that we exclude those from consideration. 5696 * udp_g_epriv_ports is not sorted thus we loop over it until 5697 * there are no changes. 5698 */ 5699 static in_port_t 5700 udp_update_next_port(in_port_t port, boolean_t random) 5701 { 5702 int i; 5703 5704 if (random && udp_random_anon_port != 0) { 5705 (void) random_get_pseudo_bytes((uint8_t *)&port, 5706 sizeof (in_port_t)); 5707 /* 5708 * Unless changed by a sys admin, the smallest anon port 5709 * is 32768 and the largest anon port is 65535. It is 5710 * very likely (50%) for the random port to be smaller 5711 * than the smallest anon port. When that happens, 5712 * add port % (anon port range) to the smallest anon 5713 * port to get the random port. It should fall into the 5714 * valid anon port range. 5715 */ 5716 if (port < udp_smallest_anon_port) { 5717 port = udp_smallest_anon_port + 5718 port % (udp_largest_anon_port - 5719 udp_smallest_anon_port); 5720 } 5721 } 5722 5723 retry: 5724 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5725 port = udp_smallest_anon_port; 5726 5727 if (port < udp_smallest_nonpriv_port) 5728 port = udp_smallest_nonpriv_port; 5729 5730 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5731 if (port == udp_g_epriv_ports[i]) { 5732 port++; 5733 /* 5734 * Make sure that the port is in the 5735 * valid range. 5736 */ 5737 goto retry; 5738 } 5739 } 5740 return (port); 5741 } 5742 5743 static mblk_t * 5744 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5745 uint_t srcid, int *error) 5746 { 5747 udp_t *udp = connp->conn_udp; 5748 queue_t *q = connp->conn_wq; 5749 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5750 mblk_t *mp2; 5751 ipha_t *ipha; 5752 int ip_hdr_length; 5753 uint32_t ip_len; 5754 udpha_t *udpha; 5755 5756 *error = 0; 5757 5758 /* mp1 points to the M_DATA mblk carrying the packet */ 5759 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5760 5761 /* Add an IP header */ 5762 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5763 udp->udp_ip_snd_options_len; 5764 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5765 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5766 !OK_32PTR(ipha)) { 5767 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5768 if (mp2 == NULL) { 5769 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5770 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5771 *error = ENOMEM; 5772 goto done; 5773 } 5774 mp2->b_wptr = DB_LIM(mp2); 5775 mp2->b_cont = mp1; 5776 mp1 = mp2; 5777 if (DB_TYPE(mp) != M_DATA) 5778 mp->b_cont = mp1; 5779 else 5780 mp = mp1; 5781 5782 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5783 } 5784 ip_hdr_length -= UDPH_SIZE; 5785 #ifdef _BIG_ENDIAN 5786 /* Set version, header length, and tos */ 5787 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5788 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5789 udp->udp_type_of_service); 5790 /* Set ttl and protocol */ 5791 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5792 #else 5793 /* Set version, header length, and tos */ 5794 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5795 ((udp->udp_type_of_service << 8) | 5796 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5797 /* Set ttl and protocol */ 5798 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5799 #endif 5800 /* 5801 * Copy our address into the packet. If this is zero, 5802 * first look at __sin6_src_id for a hint. If we leave the source 5803 * as INADDR_ANY then ip will fill in the real source address. 5804 */ 5805 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5806 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5807 in6_addr_t v6src; 5808 5809 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5810 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5811 } 5812 5813 ipha->ipha_fragment_offset_and_flags = 0; 5814 ipha->ipha_ident = 0; 5815 5816 mp1->b_rptr = (uchar_t *)ipha; 5817 5818 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5819 (uintptr_t)UINT_MAX); 5820 5821 /* Determine length of packet */ 5822 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5823 if ((mp2 = mp1->b_cont) != NULL) { 5824 do { 5825 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5826 ip_len += (uint32_t)MBLKL(mp2); 5827 } while ((mp2 = mp2->b_cont) != NULL); 5828 } 5829 /* 5830 * If the size of the packet is greater than the maximum allowed by 5831 * ip, return an error. Passing this down could cause panics because 5832 * the size will have wrapped and be inconsistent with the msg size. 5833 */ 5834 if (ip_len > IP_MAXPACKET) { 5835 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5836 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5837 *error = EMSGSIZE; 5838 goto done; 5839 } 5840 ipha->ipha_length = htons((uint16_t)ip_len); 5841 ip_len -= ip_hdr_length; 5842 ip_len = htons((uint16_t)ip_len); 5843 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5844 5845 /* 5846 * Copy in the destination address 5847 */ 5848 if (v4dst == INADDR_ANY) 5849 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5850 else 5851 ipha->ipha_dst = v4dst; 5852 5853 /* 5854 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5855 */ 5856 if (CLASSD(v4dst)) 5857 ipha->ipha_ttl = udp->udp_multicast_ttl; 5858 5859 udpha->uha_dst_port = port; 5860 udpha->uha_src_port = udp->udp_port; 5861 5862 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5863 uint32_t cksum; 5864 5865 bcopy(udp->udp_ip_snd_options, &ipha[1], 5866 udp->udp_ip_snd_options_len); 5867 /* 5868 * Massage source route putting first source route in ipha_dst. 5869 * Ignore the destination in T_unitdata_req. 5870 * Create a checksum adjustment for a source route, if any. 5871 */ 5872 cksum = ip_massage_options(ipha); 5873 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5874 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5875 (ipha->ipha_dst & 0xFFFF); 5876 if ((int)cksum < 0) 5877 cksum--; 5878 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5879 /* 5880 * IP does the checksum if uha_checksum is non-zero, 5881 * We make it easy for IP to include our pseudo header 5882 * by putting our length in uha_checksum. 5883 */ 5884 cksum += ip_len; 5885 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5886 /* There might be a carry. */ 5887 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5888 #ifdef _LITTLE_ENDIAN 5889 if (udp_do_checksum) 5890 ip_len = (cksum << 16) | ip_len; 5891 #else 5892 if (udp_do_checksum) 5893 ip_len = (ip_len << 16) | cksum; 5894 else 5895 ip_len <<= 16; 5896 #endif 5897 } else { 5898 /* 5899 * IP does the checksum if uha_checksum is non-zero, 5900 * We make it easy for IP to include our pseudo header 5901 * by putting our length in uha_checksum. 5902 */ 5903 if (udp_do_checksum) 5904 ip_len |= (ip_len << 16); 5905 #ifndef _LITTLE_ENDIAN 5906 else 5907 ip_len <<= 16; 5908 #endif 5909 } 5910 /* Set UDP length and checksum */ 5911 *((uint32_t *)&udpha->uha_length) = ip_len; 5912 5913 if (DB_TYPE(mp) != M_DATA) { 5914 ASSERT(mp != mp1); 5915 freeb(mp); 5916 } 5917 5918 /* mp has been consumed and we'll return success */ 5919 ASSERT(*error == 0); 5920 mp = NULL; 5921 5922 /* We're done. Pass the packet to ip. */ 5923 BUMP_MIB(&udp_mib, udpOutDatagrams); 5924 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5925 "udp_wput_end: q %p (%S)", q, "end"); 5926 5927 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5928 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5929 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5930 connp->conn_nofailover_ill != NULL || 5931 connp->conn_outgoing_ill != NULL || 5932 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5933 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5934 UDP_STAT(udp_ip_send); 5935 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5936 } else { 5937 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5938 } 5939 5940 done: 5941 if (*error != 0) { 5942 ASSERT(mp != NULL); 5943 BUMP_MIB(&udp_mib, udpOutErrors); 5944 } 5945 return (mp); 5946 } 5947 5948 static void 5949 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5950 { 5951 conn_t *connp = udp->udp_connp; 5952 ipaddr_t src, dst; 5953 ill_t *ill; 5954 ire_t *ire; 5955 ipif_t *ipif = NULL; 5956 mblk_t *ire_fp_mp; 5957 uint_t ire_fp_mp_len; 5958 uint16_t *up; 5959 uint32_t cksum, hcksum_txflags; 5960 queue_t *dev_q; 5961 boolean_t retry_caching; 5962 5963 dst = ipha->ipha_dst; 5964 src = ipha->ipha_src; 5965 ASSERT(ipha->ipha_ident == 0); 5966 5967 if (CLASSD(dst)) { 5968 int err; 5969 5970 ipif = conn_get_held_ipif(connp, 5971 &connp->conn_multicast_ipif, &err); 5972 5973 if (ipif == NULL || ipif->ipif_isv6 || 5974 (ipif->ipif_ill->ill_phyint->phyint_flags & 5975 PHYI_LOOPBACK)) { 5976 if (ipif != NULL) 5977 ipif_refrele(ipif); 5978 UDP_STAT(udp_ip_send); 5979 ip_output(connp, mp, q, IP_WPUT); 5980 return; 5981 } 5982 } 5983 5984 retry_caching = B_FALSE; 5985 mutex_enter(&connp->conn_lock); 5986 ire = connp->conn_ire_cache; 5987 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5988 5989 if (ire == NULL || ire->ire_addr != dst || 5990 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 5991 retry_caching = B_TRUE; 5992 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 5993 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 5994 5995 ASSERT(ipif != NULL); 5996 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 5997 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 5998 retry_caching = B_TRUE; 5999 } 6000 6001 if (!retry_caching) { 6002 ASSERT(ire != NULL); 6003 IRE_REFHOLD(ire); 6004 mutex_exit(&connp->conn_lock); 6005 } else { 6006 boolean_t cached = B_FALSE; 6007 6008 connp->conn_ire_cache = NULL; 6009 mutex_exit(&connp->conn_lock); 6010 6011 /* Release the old ire */ 6012 if (ire != NULL) { 6013 IRE_REFRELE_NOTR(ire); 6014 ire = NULL; 6015 } 6016 6017 if (CLASSD(dst)) { 6018 ASSERT(ipif != NULL); 6019 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6020 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6021 } else { 6022 ASSERT(ipif == NULL); 6023 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6024 } 6025 6026 if (ire == NULL) { 6027 if (ipif != NULL) 6028 ipif_refrele(ipif); 6029 UDP_STAT(udp_ire_null); 6030 ip_output(connp, mp, q, IP_WPUT); 6031 return; 6032 } 6033 IRE_REFHOLD_NOTR(ire); 6034 6035 mutex_enter(&connp->conn_lock); 6036 if (!(connp->conn_state_flags & CONN_CLOSING) && 6037 connp->conn_ire_cache == NULL) { 6038 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6039 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6040 connp->conn_ire_cache = ire; 6041 cached = B_TRUE; 6042 } 6043 rw_exit(&ire->ire_bucket->irb_lock); 6044 } 6045 mutex_exit(&connp->conn_lock); 6046 6047 /* 6048 * We can continue to use the ire but since it was not 6049 * cached, we should drop the extra reference. 6050 */ 6051 if (!cached) 6052 IRE_REFRELE_NOTR(ire); 6053 } 6054 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6055 ASSERT(!CLASSD(dst) || ipif != NULL); 6056 6057 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6058 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6059 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6060 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6061 (connp->conn_nexthop_set) || 6062 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6063 if (ipif != NULL) 6064 ipif_refrele(ipif); 6065 UDP_STAT(udp_ip_ire_send); 6066 IRE_REFRELE(ire); 6067 ip_output(connp, mp, q, IP_WPUT); 6068 return; 6069 } 6070 6071 BUMP_MIB(&ip_mib, ipOutRequests); 6072 6073 ill = ire_to_ill(ire); 6074 ASSERT(ill != NULL); 6075 6076 dev_q = ire->ire_stq->q_next; 6077 ASSERT(dev_q != NULL); 6078 /* 6079 * If the service thread is already running, or if the driver 6080 * queue is currently flow-controlled, queue this packet. 6081 */ 6082 if ((q->q_first != NULL || connp->conn_draining) || 6083 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6084 if (ip_output_queue) { 6085 (void) putq(q, mp); 6086 } else { 6087 BUMP_MIB(&ip_mib, ipOutDiscards); 6088 freemsg(mp); 6089 } 6090 if (ipif != NULL) 6091 ipif_refrele(ipif); 6092 IRE_REFRELE(ire); 6093 return; 6094 } 6095 6096 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6097 #ifndef _BIG_ENDIAN 6098 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6099 #endif 6100 6101 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6102 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6103 src = ipha->ipha_src = ipif->ipif_src_addr; 6104 else 6105 src = ipha->ipha_src = ire->ire_src_addr; 6106 } 6107 6108 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6109 ASSERT(ill->ill_hcksum_capab != NULL); 6110 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6111 } else { 6112 hcksum_txflags = 0; 6113 } 6114 6115 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6116 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6117 6118 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6119 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6120 if (*up != 0) { 6121 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6122 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6123 ntohs(ipha->ipha_length), cksum); 6124 6125 /* Software checksum? */ 6126 if (DB_CKSUMFLAGS(mp) == 0) { 6127 UDP_STAT(udp_out_sw_cksum); 6128 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6129 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6130 } 6131 } 6132 6133 ipha->ipha_fragment_offset_and_flags |= 6134 (uint32_t)htons(ire->ire_frag_flag); 6135 6136 /* Calculate IP header checksum if hardware isn't capable */ 6137 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6138 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6139 ((uint16_t *)ipha)[4]); 6140 } 6141 6142 if (CLASSD(dst)) { 6143 ilm_t *ilm; 6144 6145 ILM_WALKER_HOLD(ill); 6146 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6147 ILM_WALKER_RELE(ill); 6148 if (ilm != NULL) { 6149 ip_multicast_loopback(q, ill, mp, 6150 connp->conn_multicast_loop ? 0 : 6151 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6152 } 6153 6154 /* If multicast TTL is 0 then we are done */ 6155 if (ipha->ipha_ttl == 0) { 6156 if (ipif != NULL) 6157 ipif_refrele(ipif); 6158 freemsg(mp); 6159 IRE_REFRELE(ire); 6160 return; 6161 } 6162 } 6163 6164 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6165 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6166 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6167 6168 UPDATE_OB_PKT_COUNT(ire); 6169 ire->ire_last_used_time = lbolt; 6170 6171 if (ILL_POLL_CAPABLE(ill)) { 6172 /* 6173 * Send the packet directly to DLD, where it may be queued 6174 * depending on the availability of transmit resources at 6175 * the media layer. 6176 */ 6177 IP_POLL_ILL_TX(ill, mp); 6178 } else { 6179 putnext(ire->ire_stq, mp); 6180 } 6181 6182 if (ipif != NULL) 6183 ipif_refrele(ipif); 6184 IRE_REFRELE(ire); 6185 } 6186 6187 /* 6188 * This routine handles all messages passed downstream. It either 6189 * consumes the message or passes it downstream; it never queues a 6190 * a message. 6191 */ 6192 static void 6193 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6194 { 6195 sin6_t *sin6; 6196 sin_t *sin; 6197 ipaddr_t v4dst; 6198 uint16_t port; 6199 uint_t srcid; 6200 queue_t *q = connp->conn_wq; 6201 udp_t *udp = connp->conn_udp; 6202 t_scalar_t optlen; 6203 int error = 0; 6204 struct sockaddr_storage ss; 6205 6206 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6207 "udp_wput_start: connp %p mp %p", connp, mp); 6208 6209 /* 6210 * We directly handle several cases here: T_UNITDATA_REQ message 6211 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6212 * connected and non-connected socket. The latter carries the 6213 * address structure along when this routine gets called. 6214 */ 6215 switch (DB_TYPE(mp)) { 6216 case M_DATA: 6217 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6218 if (!udp->udp_direct_sockfs || 6219 addr == NULL || addrlen == 0) { 6220 /* Not connected; address is required */ 6221 BUMP_MIB(&udp_mib, udpOutErrors); 6222 UDP_STAT(udp_out_err_notconn); 6223 freemsg(mp); 6224 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6225 "udp_wput_end: connp %p (%S)", connp, 6226 "not-connected; address required"); 6227 return; 6228 } 6229 ASSERT(udp->udp_issocket); 6230 UDP_DBGSTAT(udp_data_notconn); 6231 /* Not connected; do some more checks below */ 6232 optlen = 0; 6233 break; 6234 } 6235 /* M_DATA for connected socket */ 6236 UDP_DBGSTAT(udp_data_conn); 6237 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6238 6239 /* Initialize addr and addrlen as if they're passed in */ 6240 if (udp->udp_family == AF_INET) { 6241 sin = (sin_t *)&ss; 6242 sin->sin_family = AF_INET; 6243 sin->sin_port = udp->udp_dstport; 6244 sin->sin_addr.s_addr = v4dst; 6245 addr = (struct sockaddr *)sin; 6246 addrlen = sizeof (*sin); 6247 } else { 6248 sin6 = (sin6_t *)&ss; 6249 sin6->sin6_family = AF_INET6; 6250 sin6->sin6_port = udp->udp_dstport; 6251 sin6->sin6_flowinfo = udp->udp_flowinfo; 6252 sin6->sin6_addr = udp->udp_v6dst; 6253 sin6->sin6_scope_id = 0; 6254 sin6->__sin6_src_id = 0; 6255 addr = (struct sockaddr *)sin6; 6256 addrlen = sizeof (*sin6); 6257 } 6258 6259 if (udp->udp_family == AF_INET || 6260 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6261 /* 6262 * Handle both AF_INET and AF_INET6; the latter 6263 * for IPV4 mapped destination addresses. Note 6264 * here that both addr and addrlen point to the 6265 * corresponding struct depending on the address 6266 * family of the socket. 6267 */ 6268 mp = udp_output_v4(connp, mp, v4dst, 6269 udp->udp_dstport, 0, &error); 6270 } else { 6271 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6272 } 6273 if (error != 0) { 6274 ASSERT(addr != NULL && addrlen != 0); 6275 goto ud_error; 6276 } 6277 return; 6278 case M_PROTO: 6279 case M_PCPROTO: { 6280 struct T_unitdata_req *tudr; 6281 6282 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6283 tudr = (struct T_unitdata_req *)mp->b_rptr; 6284 6285 /* Handle valid T_UNITDATA_REQ here */ 6286 if (MBLKL(mp) >= sizeof (*tudr) && 6287 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6288 if (mp->b_cont == NULL) { 6289 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6290 "udp_wput_end: q %p (%S)", q, "badaddr"); 6291 error = EPROTO; 6292 goto ud_error; 6293 } 6294 6295 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6296 tudr->DEST_length)) { 6297 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6298 "udp_wput_end: q %p (%S)", q, "badaddr"); 6299 error = EADDRNOTAVAIL; 6300 goto ud_error; 6301 } 6302 /* 6303 * If a port has not been bound to the stream, fail. 6304 * This is not a problem when sockfs is directly 6305 * above us, because it will ensure that the socket 6306 * is first bound before allowing data to be sent. 6307 */ 6308 if (udp->udp_state == TS_UNBND) { 6309 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6310 "udp_wput_end: q %p (%S)", q, "outstate"); 6311 error = EPROTO; 6312 goto ud_error; 6313 } 6314 addr = (struct sockaddr *) 6315 &mp->b_rptr[tudr->DEST_offset]; 6316 addrlen = tudr->DEST_length; 6317 optlen = tudr->OPT_length; 6318 if (optlen != 0) 6319 UDP_STAT(udp_out_opt); 6320 break; 6321 } 6322 /* FALLTHRU */ 6323 } 6324 default: 6325 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6326 SQTAG_UDP_OUTPUT); 6327 return; 6328 } 6329 ASSERT(addr != NULL); 6330 6331 switch (udp->udp_family) { 6332 case AF_INET6: 6333 sin6 = (sin6_t *)addr; 6334 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6335 sin6->sin6_family != AF_INET6) { 6336 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6337 "udp_wput_end: q %p (%S)", q, "badaddr"); 6338 error = EADDRNOTAVAIL; 6339 goto ud_error; 6340 } 6341 6342 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6343 /* 6344 * Destination is a non-IPv4-compatible IPv6 address. 6345 * Send out an IPv6 format packet. 6346 */ 6347 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6348 if (error != 0) 6349 goto ud_error; 6350 6351 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6352 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6353 return; 6354 } 6355 /* 6356 * If the local address is not zero or a mapped address 6357 * return an error. It would be possible to send an IPv4 6358 * packet but the response would never make it back to the 6359 * application since it is bound to a non-mapped address. 6360 */ 6361 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6362 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6363 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6364 "udp_wput_end: q %p (%S)", q, "badaddr"); 6365 error = EADDRNOTAVAIL; 6366 goto ud_error; 6367 } 6368 /* Send IPv4 packet without modifying udp_ipversion */ 6369 /* Extract port and ipaddr */ 6370 port = sin6->sin6_port; 6371 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6372 srcid = sin6->__sin6_src_id; 6373 break; 6374 6375 case AF_INET: 6376 sin = (sin_t *)addr; 6377 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6378 sin->sin_family != AF_INET) { 6379 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6380 "udp_wput_end: q %p (%S)", q, "badaddr"); 6381 error = EADDRNOTAVAIL; 6382 goto ud_error; 6383 } 6384 /* Extract port and ipaddr */ 6385 port = sin->sin_port; 6386 v4dst = sin->sin_addr.s_addr; 6387 srcid = 0; 6388 break; 6389 } 6390 6391 /* 6392 * If options passed in, feed it for verification and handling 6393 */ 6394 if (optlen != 0) { 6395 ASSERT(DB_TYPE(mp) != M_DATA); 6396 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6397 /* failure */ 6398 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6399 "udp_wput_end: q %p (%S)", q, 6400 "udp_unitdata_opt_process"); 6401 goto ud_error; 6402 } 6403 /* 6404 * Note: success in processing options. 6405 * mp option buffer represented by 6406 * OPT_length/offset now potentially modified 6407 * and contain option setting results 6408 */ 6409 } 6410 ASSERT(error == 0); 6411 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6412 if (error != 0) { 6413 ud_error: 6414 UDP_STAT(udp_out_err_output); 6415 ASSERT(mp != NULL); 6416 /* mp is freed by the following routine */ 6417 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6418 (t_scalar_t)error); 6419 } 6420 } 6421 6422 /* ARGSUSED */ 6423 static void 6424 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6425 { 6426 udp_output((conn_t *)arg, mp, NULL, 0); 6427 _UDP_EXIT((conn_t *)arg); 6428 } 6429 6430 static void 6431 udp_wput(queue_t *q, mblk_t *mp) 6432 { 6433 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6434 SQTAG_UDP_WPUT); 6435 } 6436 6437 /* 6438 * Allocate and prepare a T_UNITDATA_REQ message. 6439 */ 6440 static mblk_t * 6441 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6442 { 6443 struct T_unitdata_req *tudr; 6444 mblk_t *mp; 6445 6446 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6447 if (mp != NULL) { 6448 mp->b_wptr += sizeof (*tudr) + addrlen; 6449 DB_TYPE(mp) = M_PROTO; 6450 6451 tudr = (struct T_unitdata_req *)mp->b_rptr; 6452 tudr->PRIM_type = T_UNITDATA_REQ; 6453 tudr->DEST_length = addrlen; 6454 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6455 tudr->OPT_length = 0; 6456 tudr->OPT_offset = 0; 6457 bcopy(addr, tudr+1, addrlen); 6458 } 6459 return (mp); 6460 } 6461 6462 /* 6463 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6464 * is valid when we are directly beneath the stream head, and thus sockfs 6465 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6466 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6467 * this is done for both connected and non-connected endpoint. 6468 */ 6469 void 6470 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6471 { 6472 conn_t *connp; 6473 udp_t *udp; 6474 6475 q = UDP_WR(q); 6476 connp = Q_TO_CONN(q); 6477 udp = connp->conn_udp; 6478 6479 /* udpsockfs should only send down M_DATA for this entry point */ 6480 ASSERT(DB_TYPE(mp) == M_DATA); 6481 6482 mutex_enter(&connp->conn_lock); 6483 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6484 6485 if (udp->udp_mode != UDP_MT_HOT) { 6486 /* 6487 * We can't enter this conn right away because another 6488 * thread is currently executing as writer; therefore we 6489 * need to deposit the message into the squeue to be 6490 * drained later. If a socket address is present, we 6491 * need to create a T_UNITDATA_REQ message as placeholder. 6492 */ 6493 if (addr != NULL && addrlen != 0) { 6494 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6495 6496 if (tudr_mp == NULL) { 6497 mutex_exit(&connp->conn_lock); 6498 BUMP_MIB(&udp_mib, udpOutErrors); 6499 UDP_STAT(udp_out_err_tudr); 6500 freemsg(mp); 6501 return; 6502 } 6503 /* Tag the packet with T_UNITDATA_REQ */ 6504 tudr_mp->b_cont = mp; 6505 mp = tudr_mp; 6506 } 6507 mutex_exit(&connp->conn_lock); 6508 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6509 return; 6510 } 6511 6512 /* We can execute as reader right away. */ 6513 UDP_READERS_INCREF(udp); 6514 mutex_exit(&connp->conn_lock); 6515 6516 udp_output(connp, mp, addr, addrlen); 6517 6518 mutex_enter(&connp->conn_lock); 6519 UDP_MODE_ASSERTIONS(udp, UDP_EXIT); 6520 UDP_READERS_DECREF(udp); 6521 mutex_exit(&connp->conn_lock); 6522 } 6523 6524 /* 6525 * udp_output_v6(): 6526 * Assumes that udp_wput did some sanity checking on the destination 6527 * address. 6528 */ 6529 static mblk_t * 6530 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6531 int *error) 6532 { 6533 ip6_t *ip6h; 6534 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6535 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6536 mblk_t *mp2; 6537 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6538 size_t ip_len; 6539 udpha_t *udph; 6540 udp_t *udp = connp->conn_udp; 6541 queue_t *q = connp->conn_wq; 6542 ip6_pkt_t ipp_s; /* For ancillary data options */ 6543 ip6_pkt_t *ipp = &ipp_s; 6544 ip6_pkt_t *tipp; /* temporary ipp */ 6545 uint32_t csum = 0; 6546 uint_t ignore = 0; 6547 uint_t option_exists = 0, is_sticky = 0; 6548 uint8_t *cp; 6549 uint8_t *nxthdr_ptr; 6550 6551 *error = 0; 6552 6553 /* mp1 points to the M_DATA mblk carrying the packet */ 6554 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6555 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6556 6557 /* 6558 * If the local address is a mapped address return 6559 * an error. 6560 * It would be possible to send an IPv6 packet but the 6561 * response would never make it back to the application 6562 * since it is bound to a mapped address. 6563 */ 6564 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6565 *error = EADDRNOTAVAIL; 6566 goto done; 6567 } 6568 6569 ipp->ipp_fields = 0; 6570 ipp->ipp_sticky_ignored = 0; 6571 6572 /* 6573 * If TPI options passed in, feed it for verification and handling 6574 */ 6575 if (tudr_optlen != 0) { 6576 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6577 /* failure */ 6578 goto done; 6579 } 6580 ignore = ipp->ipp_sticky_ignored; 6581 ASSERT(*error == 0); 6582 } 6583 6584 if (sin6->sin6_scope_id != 0 && 6585 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6586 /* 6587 * IPPF_SCOPE_ID is special. It's neither a sticky 6588 * option nor ancillary data. It needs to be 6589 * explicitly set in options_exists. 6590 */ 6591 option_exists |= IPPF_SCOPE_ID; 6592 } 6593 6594 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6595 /* No sticky options nor ancillary data. */ 6596 goto no_options; 6597 } 6598 6599 /* 6600 * Go through the options figuring out where each is going to 6601 * come from and build two masks. The first mask indicates if 6602 * the option exists at all. The second mask indicates if the 6603 * option is sticky or ancillary. 6604 */ 6605 if (!(ignore & IPPF_HOPOPTS)) { 6606 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6607 option_exists |= IPPF_HOPOPTS; 6608 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6609 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6610 option_exists |= IPPF_HOPOPTS; 6611 is_sticky |= IPPF_HOPOPTS; 6612 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6613 } 6614 } 6615 6616 if (!(ignore & IPPF_RTHDR)) { 6617 if (ipp->ipp_fields & IPPF_RTHDR) { 6618 option_exists |= IPPF_RTHDR; 6619 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6620 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6621 option_exists |= IPPF_RTHDR; 6622 is_sticky |= IPPF_RTHDR; 6623 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6624 } 6625 } 6626 6627 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6628 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6629 option_exists |= IPPF_RTDSTOPTS; 6630 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6631 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6632 option_exists |= IPPF_RTDSTOPTS; 6633 is_sticky |= IPPF_RTDSTOPTS; 6634 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6635 } 6636 } 6637 6638 if (!(ignore & IPPF_DSTOPTS)) { 6639 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6640 option_exists |= IPPF_DSTOPTS; 6641 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6642 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6643 option_exists |= IPPF_DSTOPTS; 6644 is_sticky |= IPPF_DSTOPTS; 6645 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6646 } 6647 } 6648 6649 if (!(ignore & IPPF_IFINDEX)) { 6650 if (ipp->ipp_fields & IPPF_IFINDEX) { 6651 option_exists |= IPPF_IFINDEX; 6652 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6653 option_exists |= IPPF_IFINDEX; 6654 is_sticky |= IPPF_IFINDEX; 6655 } 6656 } 6657 6658 if (!(ignore & IPPF_ADDR)) { 6659 if (ipp->ipp_fields & IPPF_ADDR) { 6660 option_exists |= IPPF_ADDR; 6661 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6662 option_exists |= IPPF_ADDR; 6663 is_sticky |= IPPF_ADDR; 6664 } 6665 } 6666 6667 if (!(ignore & IPPF_DONTFRAG)) { 6668 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6669 option_exists |= IPPF_DONTFRAG; 6670 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6671 option_exists |= IPPF_DONTFRAG; 6672 is_sticky |= IPPF_DONTFRAG; 6673 } 6674 } 6675 6676 if (!(ignore & IPPF_USE_MIN_MTU)) { 6677 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6678 option_exists |= IPPF_USE_MIN_MTU; 6679 } else if (udp->udp_sticky_ipp.ipp_fields & 6680 IPPF_USE_MIN_MTU) { 6681 option_exists |= IPPF_USE_MIN_MTU; 6682 is_sticky |= IPPF_USE_MIN_MTU; 6683 } 6684 } 6685 6686 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6687 option_exists |= IPPF_HOPLIMIT; 6688 /* IPV6_HOPLIMIT can never be sticky */ 6689 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6690 6691 if (!(ignore & IPPF_UNICAST_HOPS) && 6692 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6693 option_exists |= IPPF_UNICAST_HOPS; 6694 is_sticky |= IPPF_UNICAST_HOPS; 6695 } 6696 6697 if (!(ignore & IPPF_MULTICAST_HOPS) && 6698 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6699 option_exists |= IPPF_MULTICAST_HOPS; 6700 is_sticky |= IPPF_MULTICAST_HOPS; 6701 } 6702 6703 if (!(ignore & IPPF_TCLASS)) { 6704 if (ipp->ipp_fields & IPPF_TCLASS) { 6705 option_exists |= IPPF_TCLASS; 6706 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6707 option_exists |= IPPF_TCLASS; 6708 is_sticky |= IPPF_TCLASS; 6709 } 6710 } 6711 6712 no_options: 6713 6714 /* 6715 * If any options carried in the ip6i_t were specified, we 6716 * need to account for the ip6i_t in the data we'll be sending 6717 * down. 6718 */ 6719 if (option_exists & IPPF_HAS_IP6I) 6720 udp_ip_hdr_len += sizeof (ip6i_t); 6721 6722 /* check/fix buffer config, setup pointers into it */ 6723 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6724 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6725 !OK_32PTR(ip6h)) { 6726 /* Try to get everything in a single mblk next time */ 6727 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6728 udp->udp_max_hdr_len = udp_ip_hdr_len; 6729 (void) mi_set_sth_wroff(UDP_RD(q), 6730 udp->udp_max_hdr_len + udp_wroff_extra); 6731 } 6732 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6733 if (mp2 == NULL) { 6734 *error = ENOMEM; 6735 goto done; 6736 } 6737 mp2->b_wptr = DB_LIM(mp2); 6738 mp2->b_cont = mp1; 6739 mp1 = mp2; 6740 if (DB_TYPE(mp) != M_DATA) 6741 mp->b_cont = mp1; 6742 else 6743 mp = mp1; 6744 6745 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6746 } 6747 mp1->b_rptr = (unsigned char *)ip6h; 6748 ip6i = (ip6i_t *)ip6h; 6749 6750 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6751 if (option_exists & IPPF_HAS_IP6I) { 6752 ip6h = (ip6_t *)&ip6i[1]; 6753 ip6i->ip6i_flags = 0; 6754 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6755 6756 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6757 if (option_exists & IPPF_SCOPE_ID) { 6758 ip6i->ip6i_flags |= IP6I_IFINDEX; 6759 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6760 } else if (option_exists & IPPF_IFINDEX) { 6761 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6762 ASSERT(tipp->ipp_ifindex != 0); 6763 ip6i->ip6i_flags |= IP6I_IFINDEX; 6764 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6765 } 6766 6767 if (option_exists & IPPF_ADDR) { 6768 /* 6769 * Enable per-packet source address verification if 6770 * IPV6_PKTINFO specified the source address. 6771 * ip6_src is set in the transport's _wput function. 6772 */ 6773 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6774 } 6775 6776 if (option_exists & IPPF_DONTFRAG) { 6777 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6778 } 6779 6780 if (option_exists & IPPF_USE_MIN_MTU) { 6781 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6782 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6783 } 6784 6785 if (option_exists & IPPF_NEXTHOP) { 6786 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6787 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6788 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6789 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6790 } 6791 6792 /* 6793 * tell IP this is an ip6i_t private header 6794 */ 6795 ip6i->ip6i_nxt = IPPROTO_RAW; 6796 } 6797 6798 /* Initialize IPv6 header */ 6799 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6800 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6801 6802 /* Set the hoplimit of the outgoing packet. */ 6803 if (option_exists & IPPF_HOPLIMIT) { 6804 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6805 ip6h->ip6_hops = ipp->ipp_hoplimit; 6806 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6807 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6808 ip6h->ip6_hops = udp->udp_multicast_ttl; 6809 if (option_exists & IPPF_MULTICAST_HOPS) 6810 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6811 } else { 6812 ip6h->ip6_hops = udp->udp_ttl; 6813 if (option_exists & IPPF_UNICAST_HOPS) 6814 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6815 } 6816 6817 if (option_exists & IPPF_ADDR) { 6818 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6819 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6820 ip6h->ip6_src = tipp->ipp_addr; 6821 } else { 6822 /* 6823 * The source address was not set using IPV6_PKTINFO. 6824 * First look at the bound source. 6825 * If unspecified fallback to __sin6_src_id. 6826 */ 6827 ip6h->ip6_src = udp->udp_v6src; 6828 if (sin6->__sin6_src_id != 0 && 6829 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6830 ip_srcid_find_id(sin6->__sin6_src_id, 6831 &ip6h->ip6_src, connp->conn_zoneid); 6832 } 6833 } 6834 6835 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6836 cp = (uint8_t *)&ip6h[1]; 6837 6838 /* 6839 * Here's where we have to start stringing together 6840 * any extension headers in the right order: 6841 * Hop-by-hop, destination, routing, and final destination opts. 6842 */ 6843 if (option_exists & IPPF_HOPOPTS) { 6844 /* Hop-by-hop options */ 6845 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6846 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6847 6848 *nxthdr_ptr = IPPROTO_HOPOPTS; 6849 nxthdr_ptr = &hbh->ip6h_nxt; 6850 6851 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6852 cp += tipp->ipp_hopoptslen; 6853 } 6854 /* 6855 * En-route destination options 6856 * Only do them if there's a routing header as well 6857 */ 6858 if (option_exists & IPPF_RTDSTOPTS) { 6859 ip6_dest_t *dst = (ip6_dest_t *)cp; 6860 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6861 6862 *nxthdr_ptr = IPPROTO_DSTOPTS; 6863 nxthdr_ptr = &dst->ip6d_nxt; 6864 6865 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6866 cp += tipp->ipp_rtdstoptslen; 6867 } 6868 /* 6869 * Routing header next 6870 */ 6871 if (option_exists & IPPF_RTHDR) { 6872 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6873 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6874 6875 *nxthdr_ptr = IPPROTO_ROUTING; 6876 nxthdr_ptr = &rt->ip6r_nxt; 6877 6878 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6879 cp += tipp->ipp_rthdrlen; 6880 } 6881 /* 6882 * Do ultimate destination options 6883 */ 6884 if (option_exists & IPPF_DSTOPTS) { 6885 ip6_dest_t *dest = (ip6_dest_t *)cp; 6886 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6887 6888 *nxthdr_ptr = IPPROTO_DSTOPTS; 6889 nxthdr_ptr = &dest->ip6d_nxt; 6890 6891 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6892 cp += tipp->ipp_dstoptslen; 6893 } 6894 /* 6895 * Now set the last header pointer to the proto passed in 6896 */ 6897 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6898 *nxthdr_ptr = IPPROTO_UDP; 6899 6900 /* Update UDP header */ 6901 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6902 udph->uha_dst_port = sin6->sin6_port; 6903 udph->uha_src_port = udp->udp_port; 6904 6905 /* 6906 * Copy in the destination address 6907 */ 6908 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6909 ip6h->ip6_dst = ipv6_loopback; 6910 else 6911 ip6h->ip6_dst = sin6->sin6_addr; 6912 6913 ip6h->ip6_vcf = 6914 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6915 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6916 6917 if (option_exists & IPPF_TCLASS) { 6918 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6919 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6920 tipp->ipp_tclass); 6921 } 6922 6923 if (option_exists & IPPF_RTHDR) { 6924 ip6_rthdr_t *rth; 6925 6926 /* 6927 * Perform any processing needed for source routing. 6928 * We know that all extension headers will be in the same mblk 6929 * as the IPv6 header. 6930 */ 6931 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6932 if (rth != NULL && rth->ip6r_segleft != 0) { 6933 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6934 /* 6935 * Drop packet - only support Type 0 routing. 6936 * Notify the application as well. 6937 */ 6938 *error = EPROTO; 6939 goto done; 6940 } 6941 6942 /* 6943 * rth->ip6r_len is twice the number of 6944 * addresses in the header. Thus it must be even. 6945 */ 6946 if (rth->ip6r_len & 0x1) { 6947 *error = EPROTO; 6948 goto done; 6949 } 6950 /* 6951 * Shuffle the routing header and ip6_dst 6952 * addresses, and get the checksum difference 6953 * between the first hop (in ip6_dst) and 6954 * the destination (in the last routing hdr entry). 6955 */ 6956 csum = ip_massage_options_v6(ip6h, rth); 6957 /* 6958 * Verify that the first hop isn't a mapped address. 6959 * Routers along the path need to do this verification 6960 * for subsequent hops. 6961 */ 6962 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6963 *error = EADDRNOTAVAIL; 6964 goto done; 6965 } 6966 6967 cp += (rth->ip6r_len + 1)*8; 6968 } 6969 } 6970 6971 /* count up length of UDP packet */ 6972 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6973 if ((mp2 = mp1->b_cont) != NULL) { 6974 do { 6975 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6976 ip_len += (uint32_t)MBLKL(mp2); 6977 } while ((mp2 = mp2->b_cont) != NULL); 6978 } 6979 6980 /* 6981 * If the size of the packet is greater than the maximum allowed by 6982 * ip, return an error. Passing this down could cause panics because 6983 * the size will have wrapped and be inconsistent with the msg size. 6984 */ 6985 if (ip_len > IP_MAXPACKET) { 6986 *error = EMSGSIZE; 6987 goto done; 6988 } 6989 6990 /* Store the UDP length. Subtract length of extension hdrs */ 6991 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 6992 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 6993 6994 /* 6995 * We make it easy for IP to include our pseudo header 6996 * by putting our length in uh_checksum, modified (if 6997 * we have a routing header) by the checksum difference 6998 * between the ultimate destination and first hop addresses. 6999 * Note: UDP over IPv6 must always checksum the packet. 7000 */ 7001 csum += udph->uha_length; 7002 csum = (csum & 0xFFFF) + (csum >> 16); 7003 udph->uha_checksum = (uint16_t)csum; 7004 7005 #ifdef _LITTLE_ENDIAN 7006 ip_len = htons(ip_len); 7007 #endif 7008 ip6h->ip6_plen = ip_len; 7009 7010 if (DB_TYPE(mp) != M_DATA) { 7011 ASSERT(mp != mp1); 7012 freeb(mp); 7013 } 7014 7015 /* mp has been consumed and we'll return success */ 7016 ASSERT(*error == 0); 7017 mp = NULL; 7018 7019 /* We're done. Pass the packet to IP */ 7020 BUMP_MIB(&udp_mib, udpOutDatagrams); 7021 ip_output_v6(connp, mp1, q, IP_WPUT); 7022 7023 done: 7024 if (*error != 0) { 7025 ASSERT(mp != NULL); 7026 BUMP_MIB(&udp_mib, udpOutErrors); 7027 } 7028 return (mp); 7029 } 7030 7031 static void 7032 udp_wput_other(queue_t *q, mblk_t *mp) 7033 { 7034 uchar_t *rptr = mp->b_rptr; 7035 struct datab *db; 7036 struct iocblk *iocp; 7037 cred_t *cr; 7038 conn_t *connp = Q_TO_CONN(q); 7039 udp_t *udp = connp->conn_udp; 7040 7041 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7042 "udp_wput_other_start: q %p", q); 7043 7044 db = mp->b_datap; 7045 7046 cr = DB_CREDDEF(mp, connp->conn_cred); 7047 7048 switch (db->db_type) { 7049 case M_PROTO: 7050 case M_PCPROTO: 7051 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7052 freemsg(mp); 7053 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7054 "udp_wput_other_end: q %p (%S)", 7055 q, "protoshort"); 7056 return; 7057 } 7058 switch (((t_primp_t)rptr)->type) { 7059 case T_ADDR_REQ: 7060 udp_addr_req(q, mp); 7061 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7062 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7063 return; 7064 case O_T_BIND_REQ: 7065 case T_BIND_REQ: 7066 udp_bind(q, mp); 7067 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7068 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7069 return; 7070 case T_CONN_REQ: 7071 udp_connect(q, mp); 7072 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7073 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7074 return; 7075 case T_CAPABILITY_REQ: 7076 udp_capability_req(q, mp); 7077 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7078 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7079 return; 7080 case T_INFO_REQ: 7081 udp_info_req(q, mp); 7082 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7083 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7084 return; 7085 case T_UNITDATA_REQ: 7086 /* 7087 * If a T_UNITDATA_REQ gets here, the address must 7088 * be bad. Valid T_UNITDATA_REQs are handled 7089 * in udp_wput. 7090 */ 7091 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7092 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7093 "udp_wput_other_end: q %p (%S)", 7094 q, "unitdatareq"); 7095 return; 7096 case T_UNBIND_REQ: 7097 udp_unbind(q, mp); 7098 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7099 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7100 return; 7101 case T_SVR4_OPTMGMT_REQ: 7102 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7103 /* 7104 * Use upper queue for option processing in 7105 * case the request is not handled at this 7106 * level and needs to be passed down to IP. 7107 */ 7108 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7109 mp, cr, &udp_opt_obj); 7110 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7111 "udp_wput_other_end: q %p (%S)", 7112 q, "optmgmtreq"); 7113 return; 7114 7115 case T_OPTMGMT_REQ: 7116 /* 7117 * Use upper queue for option processing in 7118 * case the request is not handled at this 7119 * level and needs to be passed down to IP. 7120 */ 7121 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7122 mp, cr, &udp_opt_obj); 7123 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7124 "udp_wput_other_end: q %p (%S)", 7125 q, "optmgmtreq"); 7126 return; 7127 7128 case T_DISCON_REQ: 7129 udp_disconnect(q, mp); 7130 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7131 "udp_wput_other_end: q %p (%S)", 7132 q, "disconreq"); 7133 return; 7134 7135 /* The following TPI message is not supported by udp. */ 7136 case O_T_CONN_RES: 7137 case T_CONN_RES: 7138 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7139 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7140 "udp_wput_other_end: q %p (%S)", 7141 q, "connres/disconreq"); 7142 return; 7143 7144 /* The following 3 TPI messages are illegal for udp. */ 7145 case T_DATA_REQ: 7146 case T_EXDATA_REQ: 7147 case T_ORDREL_REQ: 7148 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7149 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7150 "udp_wput_other_end: q %p (%S)", 7151 q, "data/exdata/ordrel"); 7152 return; 7153 default: 7154 break; 7155 } 7156 break; 7157 case M_FLUSH: 7158 if (*rptr & FLUSHW) 7159 flushq(q, FLUSHDATA); 7160 break; 7161 case M_IOCTL: 7162 iocp = (struct iocblk *)mp->b_rptr; 7163 switch (iocp->ioc_cmd) { 7164 case TI_GETPEERNAME: 7165 if (udp->udp_state != TS_DATA_XFER) { 7166 /* 7167 * If a default destination address has not 7168 * been associated with the stream, then we 7169 * don't know the peer's name. 7170 */ 7171 iocp->ioc_error = ENOTCONN; 7172 iocp->ioc_count = 0; 7173 mp->b_datap->db_type = M_IOCACK; 7174 putnext(UDP_RD(q), mp); 7175 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7176 "udp_wput_other_end: q %p (%S)", 7177 q, "getpeername"); 7178 return; 7179 } 7180 /* FALLTHRU */ 7181 case TI_GETMYNAME: { 7182 /* 7183 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7184 * need to copyin the user's strbuf structure. 7185 * Processing will continue in the M_IOCDATA case 7186 * below. 7187 */ 7188 mi_copyin(q, mp, NULL, 7189 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7190 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7191 "udp_wput_other_end: q %p (%S)", 7192 q, "getmyname"); 7193 return; 7194 } 7195 case ND_SET: 7196 /* nd_getset performs the necessary checking */ 7197 case ND_GET: 7198 if (nd_getset(q, udp_g_nd, mp)) { 7199 putnext(UDP_RD(q), mp); 7200 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7201 "udp_wput_other_end: q %p (%S)", 7202 q, "get"); 7203 return; 7204 } 7205 break; 7206 case _SIOCSOCKFALLBACK: 7207 /* 7208 * Either sockmod is about to be popped and the 7209 * socket would now be treated as a plain stream, 7210 * or a module is about to be pushed so we could 7211 * no longer use read-side synchronous stream. 7212 * Drain any queued data and disable direct sockfs 7213 * interface from now on. 7214 */ 7215 if (!udp->udp_issocket) { 7216 DB_TYPE(mp) = M_IOCNAK; 7217 iocp->ioc_error = EINVAL; 7218 } else { 7219 udp->udp_issocket = B_FALSE; 7220 if (udp->udp_direct_sockfs) { 7221 /* 7222 * Disable read-side synchronous 7223 * stream interface and drain any 7224 * queued data. 7225 */ 7226 udp_rcv_drain(UDP_RD(q), udp, 7227 B_FALSE); 7228 ASSERT(!udp->udp_direct_sockfs); 7229 UDP_STAT(udp_sock_fallback); 7230 } 7231 DB_TYPE(mp) = M_IOCACK; 7232 iocp->ioc_error = 0; 7233 } 7234 iocp->ioc_count = 0; 7235 iocp->ioc_rval = 0; 7236 putnext(UDP_RD(q), mp); 7237 return; 7238 default: 7239 break; 7240 } 7241 break; 7242 case M_IOCDATA: 7243 udp_wput_iocdata(q, mp); 7244 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7245 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7246 return; 7247 default: 7248 /* Unrecognized messages are passed through without change. */ 7249 break; 7250 } 7251 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7252 "udp_wput_other_end: q %p (%S)", q, "end"); 7253 ip_output(connp, mp, q, IP_WPUT); 7254 } 7255 7256 /* ARGSUSED */ 7257 static void 7258 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7259 { 7260 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7261 udp_exit((conn_t *)arg); 7262 } 7263 7264 /* 7265 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7266 * messages. 7267 */ 7268 static void 7269 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7270 { 7271 mblk_t *mp1; 7272 STRUCT_HANDLE(strbuf, sb); 7273 uint16_t port; 7274 in6_addr_t v6addr; 7275 ipaddr_t v4addr; 7276 uint32_t flowinfo = 0; 7277 int addrlen; 7278 udp_t *udp = Q_TO_UDP(q); 7279 7280 /* Make sure it is one of ours. */ 7281 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7282 case TI_GETMYNAME: 7283 case TI_GETPEERNAME: 7284 break; 7285 default: 7286 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7287 return; 7288 } 7289 7290 q = WR(UDP_RD(q)); 7291 switch (mi_copy_state(q, mp, &mp1)) { 7292 case -1: 7293 return; 7294 case MI_COPY_CASE(MI_COPY_IN, 1): 7295 break; 7296 case MI_COPY_CASE(MI_COPY_OUT, 1): 7297 /* 7298 * The address has been copied out, so now 7299 * copyout the strbuf. 7300 */ 7301 mi_copyout(q, mp); 7302 return; 7303 case MI_COPY_CASE(MI_COPY_OUT, 2): 7304 /* 7305 * The address and strbuf have been copied out. 7306 * We're done, so just acknowledge the original 7307 * M_IOCTL. 7308 */ 7309 mi_copy_done(q, mp, 0); 7310 return; 7311 default: 7312 /* 7313 * Something strange has happened, so acknowledge 7314 * the original M_IOCTL with an EPROTO error. 7315 */ 7316 mi_copy_done(q, mp, EPROTO); 7317 return; 7318 } 7319 7320 /* 7321 * Now we have the strbuf structure for TI_GETMYNAME 7322 * and TI_GETPEERNAME. Next we copyout the requested 7323 * address and then we'll copyout the strbuf. 7324 */ 7325 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7326 (void *)mp1->b_rptr); 7327 if (udp->udp_family == AF_INET) 7328 addrlen = sizeof (sin_t); 7329 else 7330 addrlen = sizeof (sin6_t); 7331 7332 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7333 mi_copy_done(q, mp, EINVAL); 7334 return; 7335 } 7336 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7337 case TI_GETMYNAME: 7338 if (udp->udp_family == AF_INET) { 7339 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7340 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7341 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7342 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7343 } else { 7344 /* 7345 * INADDR_ANY 7346 * udp_v6src is not set, we might be bound to 7347 * broadcast/multicast. Use udp_bound_v6src as 7348 * local address instead (that could 7349 * also still be INADDR_ANY) 7350 */ 7351 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7352 } 7353 } else { 7354 /* udp->udp_family == AF_INET6 */ 7355 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7356 v6addr = udp->udp_v6src; 7357 } else { 7358 /* 7359 * UNSPECIFIED 7360 * udp_v6src is not set, we might be bound to 7361 * broadcast/multicast. Use udp_bound_v6src as 7362 * local address instead (that could 7363 * also still be UNSPECIFIED) 7364 */ 7365 v6addr = udp->udp_bound_v6src; 7366 } 7367 } 7368 port = udp->udp_port; 7369 break; 7370 case TI_GETPEERNAME: 7371 if (udp->udp_state != TS_DATA_XFER) { 7372 mi_copy_done(q, mp, ENOTCONN); 7373 return; 7374 } 7375 if (udp->udp_family == AF_INET) { 7376 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7377 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7378 } else { 7379 /* udp->udp_family == AF_INET6) */ 7380 v6addr = udp->udp_v6dst; 7381 flowinfo = udp->udp_flowinfo; 7382 } 7383 port = udp->udp_dstport; 7384 break; 7385 default: 7386 mi_copy_done(q, mp, EPROTO); 7387 return; 7388 } 7389 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7390 if (!mp1) 7391 return; 7392 7393 if (udp->udp_family == AF_INET) { 7394 sin_t *sin; 7395 7396 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7397 sin = (sin_t *)mp1->b_rptr; 7398 mp1->b_wptr = (uchar_t *)&sin[1]; 7399 *sin = sin_null; 7400 sin->sin_family = AF_INET; 7401 sin->sin_addr.s_addr = v4addr; 7402 sin->sin_port = port; 7403 } else { 7404 /* udp->udp_family == AF_INET6 */ 7405 sin6_t *sin6; 7406 7407 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7408 sin6 = (sin6_t *)mp1->b_rptr; 7409 mp1->b_wptr = (uchar_t *)&sin6[1]; 7410 *sin6 = sin6_null; 7411 sin6->sin6_family = AF_INET6; 7412 sin6->sin6_flowinfo = flowinfo; 7413 sin6->sin6_addr = v6addr; 7414 sin6->sin6_port = port; 7415 } 7416 /* Copy out the address */ 7417 mi_copyout(q, mp); 7418 } 7419 7420 7421 static int 7422 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7423 void *thisdg_attrs) 7424 { 7425 struct T_unitdata_req *udreqp; 7426 int is_absreq_failure; 7427 cred_t *cr; 7428 conn_t *connp = Q_TO_CONN(q); 7429 7430 ASSERT(((t_primp_t)mp->b_rptr)->type); 7431 7432 cr = DB_CREDDEF(mp, connp->conn_cred); 7433 7434 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7435 *errorp = 0; 7436 7437 /* 7438 * Use upper queue for option processing since the callback 7439 * routines expect to be called in UDP instance instead of IP. 7440 */ 7441 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7442 udreqp->OPT_offset, cr, &udp_opt_obj, 7443 thisdg_attrs, &is_absreq_failure); 7444 7445 if (*errorp != 0) { 7446 /* 7447 * Note: No special action needed in this 7448 * module for "is_absreq_failure" 7449 */ 7450 return (-1); /* failure */ 7451 } 7452 ASSERT(is_absreq_failure == 0); 7453 return (0); /* success */ 7454 } 7455 7456 void 7457 udp_ddi_init(void) 7458 { 7459 int i; 7460 7461 UDP6_MAJ = ddi_name_to_major(UDP6); 7462 7463 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7464 udp_opt_obj.odb_opt_arr_cnt); 7465 7466 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7467 /* Not a power of two. Round up to nearest power of two */ 7468 for (i = 0; i < 31; i++) { 7469 if (udp_bind_fanout_size < (1 << i)) 7470 break; 7471 } 7472 udp_bind_fanout_size = 1 << i; 7473 } 7474 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7475 sizeof (udp_fanout_t), KM_SLEEP); 7476 for (i = 0; i < udp_bind_fanout_size; i++) { 7477 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7478 NULL); 7479 } 7480 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7481 7482 udp_kstat_init(); 7483 7484 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7485 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7486 } 7487 7488 void 7489 udp_ddi_destroy(void) 7490 { 7491 int i; 7492 7493 nd_free(&udp_g_nd); 7494 7495 for (i = 0; i < udp_bind_fanout_size; i++) { 7496 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7497 } 7498 7499 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7500 sizeof (udp_fanout_t)); 7501 7502 udp_kstat_fini(); 7503 7504 kmem_cache_destroy(udp_cache); 7505 } 7506 7507 static void 7508 udp_kstat_init(void) 7509 { 7510 udp_named_kstat_t template = { 7511 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7512 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7513 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7514 { "entrySize", KSTAT_DATA_INT32, 0 }, 7515 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7516 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7517 }; 7518 7519 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7520 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7521 7522 if (udp_mibkp == NULL) 7523 return; 7524 7525 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7526 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7527 7528 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7529 7530 udp_mibkp->ks_update = udp_kstat_update; 7531 7532 kstat_install(udp_mibkp); 7533 7534 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7535 "net", KSTAT_TYPE_NAMED, 7536 sizeof (udp_statistics) / sizeof (kstat_named_t), 7537 KSTAT_FLAG_VIRTUAL)) != NULL) { 7538 udp_ksp->ks_data = &udp_statistics; 7539 kstat_install(udp_ksp); 7540 } 7541 } 7542 7543 static void 7544 udp_kstat_fini(void) 7545 { 7546 if (udp_ksp != NULL) { 7547 kstat_delete(udp_ksp); 7548 udp_ksp = NULL; 7549 } 7550 if (udp_mibkp != NULL) { 7551 kstat_delete(udp_mibkp); 7552 udp_mibkp = NULL; 7553 } 7554 } 7555 7556 static int 7557 udp_kstat_update(kstat_t *kp, int rw) 7558 { 7559 udp_named_kstat_t *udpkp; 7560 7561 if ((kp == NULL) || (kp->ks_data == NULL)) 7562 return (EIO); 7563 7564 if (rw == KSTAT_WRITE) 7565 return (EACCES); 7566 7567 udpkp = (udp_named_kstat_t *)kp->ks_data; 7568 7569 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7570 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7571 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7572 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7573 7574 return (0); 7575 } 7576 7577 /* ARGSUSED */ 7578 static void 7579 udp_rput(queue_t *q, mblk_t *mp) 7580 { 7581 /* 7582 * We get here whenever we do qreply() from IP, 7583 * i.e as part of handlings ioctls, etc. 7584 */ 7585 putnext(q, mp); 7586 } 7587 7588 /* 7589 * Read-side synchronous stream info entry point, called as a 7590 * result of handling certain STREAMS ioctl operations. 7591 */ 7592 static int 7593 udp_rinfop(queue_t *q, infod_t *dp) 7594 { 7595 mblk_t *mp; 7596 uint_t cmd = dp->d_cmd; 7597 int res = 0; 7598 int error = 0; 7599 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7600 struct stdata *stp = STREAM(q); 7601 7602 mutex_enter(&udp->udp_drain_lock); 7603 /* If shutdown on read has happened, return nothing */ 7604 mutex_enter(&stp->sd_lock); 7605 if (stp->sd_flag & STREOF) { 7606 mutex_exit(&stp->sd_lock); 7607 goto done; 7608 } 7609 mutex_exit(&stp->sd_lock); 7610 7611 if ((mp = udp->udp_rcv_list_head) == NULL) 7612 goto done; 7613 7614 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7615 7616 if (cmd & INFOD_COUNT) { 7617 /* 7618 * Return the number of messages. 7619 */ 7620 dp->d_count += udp->udp_rcv_msgcnt; 7621 res |= INFOD_COUNT; 7622 } 7623 if (cmd & INFOD_BYTES) { 7624 /* 7625 * Return size of all data messages. 7626 */ 7627 dp->d_bytes += udp->udp_rcv_cnt; 7628 res |= INFOD_BYTES; 7629 } 7630 if (cmd & INFOD_FIRSTBYTES) { 7631 /* 7632 * Return size of first data message. 7633 */ 7634 dp->d_bytes = msgdsize(mp); 7635 res |= INFOD_FIRSTBYTES; 7636 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7637 } 7638 if (cmd & INFOD_COPYOUT) { 7639 mblk_t *mp1 = mp->b_cont; 7640 int n; 7641 /* 7642 * Return data contents of first message. 7643 */ 7644 ASSERT(DB_TYPE(mp1) == M_DATA); 7645 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7646 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7647 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7648 UIO_READ, dp->d_uiop)) != 0) { 7649 goto done; 7650 } 7651 mp1 = mp1->b_cont; 7652 } 7653 res |= INFOD_COPYOUT; 7654 dp->d_cmd &= ~INFOD_COPYOUT; 7655 } 7656 done: 7657 mutex_exit(&udp->udp_drain_lock); 7658 7659 dp->d_res |= res; 7660 7661 return (error); 7662 } 7663 7664 /* 7665 * Read-side synchronous stream entry point. This is called as a result 7666 * of recv/read operation done at sockfs, and is guaranteed to execute 7667 * outside of the interrupt thread context. It returns a single datagram 7668 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7669 */ 7670 static int 7671 udp_rrw(queue_t *q, struiod_t *dp) 7672 { 7673 mblk_t *mp; 7674 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7675 7676 /* We should never get here when we're in SNMP mode */ 7677 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7678 7679 /* 7680 * Dequeue datagram from the head of the list and return 7681 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7682 * set/cleared depending on whether or not there's data 7683 * remaining in the list. 7684 */ 7685 mutex_enter(&udp->udp_drain_lock); 7686 if (!udp->udp_direct_sockfs) { 7687 mutex_exit(&udp->udp_drain_lock); 7688 UDP_STAT(udp_rrw_busy); 7689 return (EBUSY); 7690 } 7691 if ((mp = udp->udp_rcv_list_head) != NULL) { 7692 uint_t size = msgdsize(mp); 7693 7694 /* Last datagram in the list? */ 7695 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7696 udp->udp_rcv_list_tail = NULL; 7697 mp->b_next = NULL; 7698 7699 udp->udp_rcv_cnt -= size; 7700 udp->udp_rcv_msgcnt--; 7701 UDP_STAT(udp_rrw_msgcnt); 7702 7703 /* No longer flow-controlling? */ 7704 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7705 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7706 udp->udp_drain_qfull = B_FALSE; 7707 } 7708 if (udp->udp_rcv_list_head == NULL) { 7709 /* 7710 * Either we just dequeued the last datagram or 7711 * we get here from sockfs and have nothing to 7712 * return; in this case clear RSLEEP. 7713 */ 7714 ASSERT(udp->udp_rcv_cnt == 0); 7715 ASSERT(udp->udp_rcv_msgcnt == 0); 7716 ASSERT(udp->udp_rcv_list_tail == NULL); 7717 STR_WAKEUP_CLEAR(STREAM(q)); 7718 } else { 7719 /* 7720 * More data follows; we need udp_rrw() to be 7721 * called in future to pick up the rest. 7722 */ 7723 STR_WAKEUP_SET(STREAM(q)); 7724 } 7725 mutex_exit(&udp->udp_drain_lock); 7726 dp->d_mp = mp; 7727 return (0); 7728 } 7729 7730 /* 7731 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7732 * list; this is typically executed within the interrupt thread context 7733 * and so we do things as quickly as possible. 7734 */ 7735 static void 7736 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7737 { 7738 ASSERT(q == RD(q)); 7739 ASSERT(pkt_len == msgdsize(mp)); 7740 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7741 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7742 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7743 7744 mutex_enter(&udp->udp_drain_lock); 7745 /* 7746 * Wake up and signal the receiving app; it is okay to do this 7747 * before enqueueing the mp because we are holding the drain lock. 7748 * One of the advantages of synchronous stream is the ability for 7749 * us to find out when the application performs a read on the 7750 * socket by way of udp_rrw() entry point being called. We need 7751 * to generate SIGPOLL/SIGIO for each received data in the case 7752 * of asynchronous socket just as in the strrput() case. However, 7753 * we only wake the application up when necessary, i.e. during the 7754 * first enqueue. When udp_rrw() is called, we send up a single 7755 * datagram upstream and call STR_WAKEUP_SET() again when there 7756 * are still data remaining in our receive queue. 7757 */ 7758 if (udp->udp_rcv_list_head == NULL) { 7759 STR_WAKEUP_SET(STREAM(q)); 7760 udp->udp_rcv_list_head = mp; 7761 } else { 7762 udp->udp_rcv_list_tail->b_next = mp; 7763 } 7764 udp->udp_rcv_list_tail = mp; 7765 udp->udp_rcv_cnt += pkt_len; 7766 udp->udp_rcv_msgcnt++; 7767 7768 /* Need to flow-control? */ 7769 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7770 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7771 udp->udp_drain_qfull = B_TRUE; 7772 7773 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7774 STR_SENDSIG(STREAM(q)); 7775 mutex_exit(&udp->udp_drain_lock); 7776 } 7777 7778 /* 7779 * Drain the contents of receive list to the module upstream; we do 7780 * this during close or when we fallback to the slow mode due to 7781 * sockmod being popped or a module being pushed on top of us. 7782 */ 7783 static void 7784 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7785 { 7786 mblk_t *mp; 7787 7788 ASSERT(q == RD(q)); 7789 7790 mutex_enter(&udp->udp_drain_lock); 7791 /* 7792 * There is no race with a concurrent udp_input() sending 7793 * up packets using putnext() after we have cleared the 7794 * udp_direct_sockfs flag but before we have completed 7795 * sending up the packets in udp_rcv_list, since we are 7796 * either a writer or we have quiesced the conn. 7797 */ 7798 udp->udp_direct_sockfs = B_FALSE; 7799 mutex_exit(&udp->udp_drain_lock); 7800 7801 if (udp->udp_rcv_list_head != NULL) 7802 UDP_STAT(udp_drain); 7803 7804 /* 7805 * Send up everything via putnext(); note here that we 7806 * don't need the udp_drain_lock to protect us since 7807 * nothing can enter udp_rrw() and that we currently 7808 * have exclusive access to this udp. 7809 */ 7810 while ((mp = udp->udp_rcv_list_head) != NULL) { 7811 udp->udp_rcv_list_head = mp->b_next; 7812 mp->b_next = NULL; 7813 udp->udp_rcv_cnt -= msgdsize(mp); 7814 udp->udp_rcv_msgcnt--; 7815 if (closing) { 7816 freemsg(mp); 7817 } else { 7818 putnext(q, mp); 7819 } 7820 } 7821 ASSERT(udp->udp_rcv_cnt == 0); 7822 ASSERT(udp->udp_rcv_msgcnt == 0); 7823 ASSERT(udp->udp_rcv_list_head == NULL); 7824 udp->udp_rcv_list_tail = NULL; 7825 udp->udp_drain_qfull = B_FALSE; 7826 } 7827 7828 static size_t 7829 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7830 { 7831 /* We add a bit of extra buffering */ 7832 size += size >> 1; 7833 if (size > udp_max_buf) 7834 size = udp_max_buf; 7835 7836 udp->udp_rcv_hiwat = size; 7837 return (size); 7838 } 7839 7840 /* 7841 * Little helper for IPsec's NAT-T processing. 7842 */ 7843 boolean_t 7844 udp_compute_checksum(void) 7845 { 7846 return (udp_do_checksum); 7847 } 7848