1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 31 32 #include <sys/types.h> 33 #include <sys/stream.h> 34 #include <sys/dlpi.h> 35 #include <sys/pattr.h> 36 #include <sys/stropts.h> 37 #include <sys/strlog.h> 38 #include <sys/strsun.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/tiuser.h> 43 #include <sys/ddi.h> 44 #include <sys/sunddi.h> 45 #include <sys/strsubr.h> 46 #include <sys/suntpi.h> 47 #include <sys/xti_inet.h> 48 #include <sys/cmn_err.h> 49 #include <sys/kmem.h> 50 #include <sys/policy.h> 51 #include <sys/ucred.h> 52 #include <sys/zone.h> 53 54 #include <sys/socket.h> 55 #include <sys/sockio.h> 56 #include <sys/vtrace.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/mi.h> 75 #include <inet/mib2.h> 76 #include <inet/nd.h> 77 #include <inet/optcom.h> 78 #include <inet/snmpcom.h> 79 #include <inet/kstatcom.h> 80 #include <inet/udp_impl.h> 81 #include <inet/ipclassifier.h> 82 #include <inet/ipsec_impl.h> 83 #include <inet/ipp_common.h> 84 85 /* 86 * The ipsec_info.h header file is here since it has the definition for the 87 * M_CTL message types used by IP to convey information to the ULP. The 88 * ipsec_info.h needs the pfkeyv2.h, hence the latters presence. 89 */ 90 #include <net/pfkeyv2.h> 91 #include <inet/ipsec_info.h> 92 93 /* 94 * Synchronization notes: 95 * 96 * UDP uses a combination of its internal perimeter, a global lock and 97 * a set of bind hash locks to protect its data structures. Please see 98 * the note above udp_mode_assertions for details about the internal 99 * perimeter. 100 * 101 * When a UDP endpoint is bound to a local port, it is inserted into 102 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 103 * The size of the array is controlled by the udp_bind_fanout_size variable. 104 * This variable can be changed in /etc/system if the default value is 105 * not large enough. Each bind hash bucket is protected by a per bucket 106 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 107 * structure. An UDP endpoint is removed from the bind hash list only 108 * when it is being unbound or being closed. The per bucket lock also 109 * protects a UDP endpoint's state changes. 110 * 111 * Plumbing notes: 112 * 113 * Both udp and ip are merged, but the streams plumbing is kept unchanged 114 * in that udp is always pushed atop /dev/ip. This is done to preserve 115 * backwards compatibility for certain applications which rely on such 116 * plumbing geometry to do things such as issuing I_POP on the stream 117 * in order to obtain direct access to /dev/ip, etc. 118 * 119 * All UDP processings happen in the /dev/ip instance; the udp module 120 * instance does not possess any state about the endpoint, and merely 121 * acts as a dummy module whose presence is to keep the streams plumbing 122 * appearance unchanged. At open time /dev/ip allocates a conn_t that 123 * happens to embed a udp_t. This stays dormant until the time udp is 124 * pushed, which indicates to /dev/ip that it must convert itself from 125 * an IP to a UDP endpoint. 126 * 127 * We only allow for the following plumbing cases: 128 * 129 * Normal: 130 * /dev/ip is first opened and later udp is pushed directly on top. 131 * This is the default action that happens when a udp socket or 132 * /dev/udp is opened. The conn_t created by /dev/ip instance is 133 * now shared and is marked with IPCL_UDP. 134 * 135 * SNMP-only: 136 * udp is pushed on top of a module other than /dev/ip. When this 137 * happens it will support only SNMP semantics. A new conn_t is 138 * allocated and marked with IPCL_UDPMOD. 139 * 140 * The above cases imply that we don't support any intermediate module to 141 * reside in between /dev/ip and udp -- in fact, we never supported such 142 * scenario in the past as the inter-layer communication semantics have 143 * always been private. Also note that the normal case allows for SNMP 144 * requests to be processed in addition to the rest of UDP operations. 145 * 146 * The normal case plumbing is depicted by the following diagram: 147 * 148 * +---------------+---------------+ 149 * | | | udp 150 * | udp_wq | udp_rq | 151 * | | UDP_RD | 152 * | | | 153 * +---------------+---------------+ 154 * | ^ 155 * v | 156 * +---------------+---------------+ 157 * | | | /dev/ip 158 * | ip_wq | ip_rq | conn_t 159 * | UDP_WR | | 160 * | | | 161 * +---------------+---------------+ 162 * 163 * Messages arriving at udp_wq from above will end up in ip_wq before 164 * it gets processed, i.e. udp write entry points will advance udp_wq 165 * and use its q_next value as ip_wq in order to use the conn_t that 166 * is stored in its q_ptr. Likewise, messages generated by ip to the 167 * module above udp will appear as if they are originated from udp_rq, 168 * i.e. putnext() calls to the module above udp is done using the 169 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 170 * nothing more than calling putnext(). 171 * 172 * The above implies the following rule of thumb: 173 * 174 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 175 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 176 * is no direct reference to conn_t from either udp_wq or udp_rq. 177 * 178 * 2. Write-side entry points of udp can obtain the conn_t via the 179 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 180 * 181 * 3. While in /dev/ip context, putnext() to the module above udp can 182 * be done by supplying the queue value obtained from UDP_RD(). 183 * 184 */ 185 186 static queue_t *UDP_WR(queue_t *); 187 static queue_t *UDP_RD(queue_t *); 188 189 udp_stat_t udp_statistics = { 190 { "udp_ip_send", KSTAT_DATA_UINT64 }, 191 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 192 { "udp_ire_null", KSTAT_DATA_UINT64 }, 193 { "udp_drain", KSTAT_DATA_UINT64 }, 194 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 195 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 196 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 197 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 198 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 199 { "udp_out_opt", KSTAT_DATA_UINT64 }, 200 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 201 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 202 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 203 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 204 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 205 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 206 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 207 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 208 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 217 #ifdef DEBUG 218 { "udp_data_conn", KSTAT_DATA_UINT64 }, 219 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 220 #endif 221 }; 222 223 static kstat_t *udp_ksp; 224 struct kmem_cache *udp_cache; 225 226 /* 227 * Bind hash list size and hash function. It has to be a power of 2 for 228 * hashing. 229 */ 230 #define UDP_BIND_FANOUT_SIZE 512 231 #define UDP_BIND_HASH(lport) \ 232 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 233 234 /* UDP bind fanout hash structure. */ 235 typedef struct udp_fanout_s { 236 udp_t *uf_udp; 237 kmutex_t uf_lock; 238 #if defined(_LP64) || defined(_I32LPx) 239 char uf_pad[48]; 240 #else 241 char uf_pad[56]; 242 #endif 243 } udp_fanout_t; 244 245 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 246 /* udp_fanout_t *udp_bind_fanout. */ 247 static udp_fanout_t *udp_bind_fanout; 248 249 /* 250 * This controls the rate some ndd info report functions can be used 251 * by non-priviledged users. It stores the last time such info is 252 * requested. When those report functions are called again, this 253 * is checked with the current time and compare with the ndd param 254 * udp_ndd_get_info_interval. 255 */ 256 static clock_t udp_last_ndd_get_info_time; 257 #define NDD_TOO_QUICK_MSG \ 258 "ndd get info rate too high for non-priviledged users, try again " \ 259 "later.\n" 260 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 261 262 static void udp_addr_req(queue_t *q, mblk_t *mp); 263 static void udp_bind(queue_t *q, mblk_t *mp); 264 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 265 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 266 static int udp_build_hdrs(queue_t *q, udp_t *udp); 267 static void udp_capability_req(queue_t *q, mblk_t *mp); 268 static int udp_close(queue_t *q); 269 static void udp_connect(queue_t *q, mblk_t *mp); 270 static void udp_disconnect(queue_t *q, mblk_t *mp); 271 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 272 int sys_error); 273 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 274 t_scalar_t tlierr, int unixerr); 275 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 276 cred_t *cr); 277 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 278 char *value, caddr_t cp, cred_t *cr); 279 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 280 char *value, caddr_t cp, cred_t *cr); 281 static void udp_icmp_error(queue_t *q, mblk_t *mp); 282 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 283 static void udp_info_req(queue_t *q, mblk_t *mp); 284 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 285 t_scalar_t addr_length); 286 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 287 cred_t *credp); 288 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 289 int *errorp, void *thisdg_attrs); 290 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 291 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 292 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 293 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 294 cred_t *cr); 295 static int udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 296 uchar_t **optbufp, uint_t *optlenp); 297 static void udp_report_item(mblk_t *mp, udp_t *udp); 298 static void udp_rput(queue_t *q, mblk_t *mp); 299 static void udp_rput_other(queue_t *, mblk_t *); 300 static int udp_rinfop(queue_t *q, infod_t *dp); 301 static int udp_rrw(queue_t *q, struiod_t *dp); 302 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 303 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 304 cred_t *cr); 305 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 306 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 307 t_scalar_t destlen, t_scalar_t err); 308 static void udp_unbind(queue_t *q, mblk_t *mp); 309 static in_port_t udp_update_next_port(in_port_t port, boolean_t random); 310 static void udp_wput(queue_t *q, mblk_t *mp); 311 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 312 uint16_t port, uint_t srcid, int *error); 313 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 314 t_scalar_t tudr_optlen, int *error); 315 static void udp_wput_other(queue_t *q, mblk_t *mp); 316 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 317 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 318 socklen_t addrlen); 319 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 320 321 static void udp_kstat_init(void); 322 static void udp_kstat_fini(void); 323 static int udp_kstat_update(kstat_t *kp, int rw); 324 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 325 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 326 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 327 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 328 329 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 330 uint_t pkt_len); 331 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 332 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 333 static void udp_exit(conn_t *); 334 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 335 #ifdef DEBUG 336 static void udp_mode_assertions(udp_t *, int); 337 #endif /* DEBUG */ 338 339 major_t UDP6_MAJ; 340 #define UDP6 "udp6" 341 342 #define UDP_RECV_HIWATER (56 * 1024) 343 #define UDP_RECV_LOWATER 128 344 #define UDP_XMIT_HIWATER (56 * 1024) 345 #define UDP_XMIT_LOWATER 1024 346 347 static struct module_info udp_info = { 348 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 349 }; 350 351 static struct qinit udp_rinit = { 352 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 353 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 354 }; 355 356 static struct qinit udp_winit = { 357 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 358 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 359 }; 360 361 static struct qinit winit = { 362 (pfi_t)putnext, NULL, NULL, NULL, NULL, 363 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 364 }; 365 366 /* Support for just SNMP if UDP is not pushed directly over device IP */ 367 struct qinit udp_snmp_rinit = { 368 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 369 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 370 }; 371 372 struct qinit udp_snmp_winit = { 373 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 374 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 375 }; 376 377 struct streamtab udpinfo = { 378 &udp_rinit, &winit 379 }; 380 381 static sin_t sin_null; /* Zero address for quick clears */ 382 static sin6_t sin6_null; /* Zero address for quick clears */ 383 384 /* Hint not protected by any lock */ 385 static in_port_t udp_g_next_port_to_try; 386 387 /* 388 * Extra privileged ports. In host byte order. 389 */ 390 #define UDP_NUM_EPRIV_PORTS 64 391 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 392 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 393 394 /* Only modified during _init and _fini thus no locking is needed. */ 395 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 396 397 /* MIB-2 stuff for SNMP */ 398 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 399 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 400 401 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 402 403 /* Default structure copied into T_INFO_ACK messages */ 404 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 405 T_INFO_ACK, 406 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 407 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 408 T_INVALID, /* CDATA_size. udp does not support connect data. */ 409 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 410 sizeof (sin_t), /* ADDR_size. */ 411 0, /* OPT_size - not initialized here */ 412 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 413 T_CLTS, /* SERV_type. udp supports connection-less. */ 414 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 415 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 416 }; 417 418 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 419 420 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 421 T_INFO_ACK, 422 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 423 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 424 T_INVALID, /* CDATA_size. udp does not support connect data. */ 425 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 426 sizeof (sin6_t), /* ADDR_size. */ 427 0, /* OPT_size - not initialized here */ 428 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 429 T_CLTS, /* SERV_type. udp supports connection-less. */ 430 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 431 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 432 }; 433 434 /* largest UDP port number */ 435 #define UDP_MAX_PORT 65535 436 437 /* 438 * Table of ND variables supported by udp. These are loaded into udp_g_nd 439 * in udp_open. 440 * All of these are alterable, within the min/max values given, at run time. 441 */ 442 /* BEGIN CSTYLED */ 443 udpparam_t udp_param_arr[] = { 444 /*min max value name */ 445 { 0L, 256, 32, "udp_wroff_extra" }, 446 { 1L, 255, 255, "udp_ipv4_ttl" }, 447 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 448 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 449 { 0, 1, 1, "udp_do_checksum" }, 450 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 451 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 452 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 453 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 454 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 455 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 456 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 457 }; 458 /* END CSTYLED */ 459 460 /* 461 * The smallest anonymous port in the priviledged port range which UDP 462 * looks for free port. Use in the option UDP_ANONPRIVBIND. 463 */ 464 static in_port_t udp_min_anonpriv_port = 512; 465 466 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 467 uint32_t udp_random_anon_port = 1; 468 469 /* 470 * Hook functions to enable cluster networking. 471 * On non-clustered systems these vectors must always be NULL 472 */ 473 474 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 475 uint8_t *laddrp, in_port_t lport) = NULL; 476 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 477 uint8_t *laddrp, in_port_t lport) = NULL; 478 479 typedef union T_primitives *t_primp_t; 480 481 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 482 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 483 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 484 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 485 (mp)->b_prev = (mblk_t *)proc; \ 486 if ((udp)->udp_mphead == NULL) \ 487 (udp)->udp_mphead = (mp); \ 488 else \ 489 (udp)->udp_mptail->b_next = (mp); \ 490 (udp)->udp_mptail = (mp); \ 491 (udp)->udp_mpcount++; \ 492 } 493 494 #define UDP_READERS_INCREF(udp) { \ 495 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 496 (udp)->udp_reader_count++; \ 497 } 498 499 #define UDP_READERS_DECREF(udp) { \ 500 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 501 (udp)->udp_reader_count--; \ 502 if ((udp)->udp_reader_count == 0) \ 503 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 504 } 505 506 #define UDP_SQUEUE_DECREF(udp) { \ 507 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 508 (udp)->udp_squeue_count--; \ 509 if ((udp)->udp_squeue_count == 0) \ 510 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 511 } 512 513 /* 514 * Notes on UDP endpoint synchronization: 515 * 516 * UDP needs exclusive operation on a per endpoint basis, when executing 517 * functions that modify the endpoint state. udp_rput_other() deals with 518 * packets with IP options, and processing these packets end up having 519 * to update the endpoint's option related state. udp_wput_other() deals 520 * with control operations from the top, e.g. connect() that needs to 521 * update the endpoint state. These could be synchronized using locks, 522 * but the current version uses squeues for this purpose. squeues may 523 * give performance improvement for certain cases such as connected UDP 524 * sockets; thus the framework allows for using squeues. 525 * 526 * The perimeter routines are described as follows: 527 * 528 * udp_enter(): 529 * Enter the UDP endpoint perimeter. 530 * 531 * udp_become_writer(): 532 * Become exclusive on the UDP endpoint. Specifies a function 533 * that will be called exclusively either immediately or later 534 * when the perimeter is available exclusively. 535 * 536 * udp_exit(): 537 * Exit the UDP perimeter. 538 * 539 * Entering UDP from the top or from the bottom must be done using 540 * udp_enter(). No lock must be held while attempting to enter the UDP 541 * perimeter. When finished, udp_exit() must be called to get out of 542 * the perimeter. 543 * 544 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 545 * multiple threads may enter a UDP endpoint concurrently. This is used 546 * for sending and/or receiving normal data. Control operations and other 547 * special cases call udp_become_writer() to become exclusive on a per 548 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 549 * by definition serializes access to the conn_t. When there are no more 550 * pending messages on the squeue for the UDP connection, the endpoint 551 * reverts to MT_HOT mode. During the interregnum when not all MT threads 552 * of an endpoint have finished, messages are queued in the UDP endpoint 553 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 554 * 555 * These modes have the following analogs: 556 * 557 * UDP_MT_HOT/udp_reader_count==0 none 558 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 559 * UDP_MT_QUEUED RW_WRITE_WANTED 560 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 561 * 562 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 563 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 564 * 565 * While in stable modes, UDP keeps track of the number of threads 566 * operating on the endpoint. The udp_reader_count variable represents 567 * the number of threads entering the endpoint as readers while it is 568 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 569 * is only a single reader, i.e. when this counter drops to 1. Likewise, 570 * udp_squeue_count represents the number of threads operating on the 571 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 572 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 573 * when this counter drops to 0. 574 * 575 * The default mode is set to UDP_MT_HOT and UDP alternates between 576 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 577 * 578 * Mode transition: 579 * ---------------------------------------------------------------- 580 * old mode Event New mode 581 * ---------------------------------------------------------------- 582 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 583 * and udp_reader_count == 1 584 * 585 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 586 * and udp_reader_count > 1 587 * 588 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 589 * 590 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 591 * internal UDP queue successfully 592 * moved to squeue AND udp_squeue_count != 0 593 * 594 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 595 * internal UDP queue successfully 596 * moved to squeue AND udp_squeue_count 597 * drops to zero 598 * 599 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 600 * ---------------------------------------------------------------- 601 */ 602 603 static queue_t * 604 UDP_WR(queue_t *q) 605 { 606 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 607 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 608 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 609 610 return (_WR(q)->q_next); 611 } 612 613 static queue_t * 614 UDP_RD(queue_t *q) 615 { 616 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 617 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 618 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 619 620 return (_RD(q)->q_next); 621 } 622 623 #ifdef DEBUG 624 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 625 #else 626 #define UDP_MODE_ASSERTIONS(udp, caller) 627 #endif 628 629 /* Invariants */ 630 #ifdef DEBUG 631 632 uint32_t udp_count[4]; 633 634 /* Context of udp_mode_assertions */ 635 #define UDP_ENTER 1 636 #define UDP_BECOME_WRITER 2 637 #define UDP_EXIT 3 638 639 static void 640 udp_mode_assertions(udp_t *udp, int caller) 641 { 642 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 643 644 switch (udp->udp_mode) { 645 case UDP_MT_HOT: 646 /* 647 * Messages have not yet been enqueued on the internal queue, 648 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 649 * by definition, there can't be any messages enqueued on the 650 * squeue. The UDP could be quiescent, so udp_reader_count 651 * could be zero at entry. 652 */ 653 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 654 udp->udp_squeue_count == 0); 655 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 656 udp_count[0]++; 657 break; 658 659 case UDP_MT_QUEUED: 660 /* 661 * The last MT thread to exit the udp perimeter empties the 662 * internal queue and then switches the UDP to 663 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 664 * mode, it means there must be at least 1 MT thread still in 665 * the perimeter and at least 1 message on the internal queue. 666 */ 667 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 668 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 669 udp_count[1]++; 670 break; 671 672 case UDP_QUEUED_SQUEUE: 673 /* 674 * The switch has happened from MT to SQUEUE. So there can't 675 * any MT threads. Messages could still pile up on the internal 676 * queue until the transition is complete and we move to 677 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 678 * since the squeue could drain any time. 679 */ 680 ASSERT(udp->udp_reader_count == 0); 681 udp_count[2]++; 682 break; 683 684 case UDP_SQUEUE: 685 /* 686 * The transition is complete. Thre can't be any messages on 687 * the internal queue. The udp could be quiescent or the squeue 688 * could drain any time, so we can't assert on nonzero 689 * udp_squeue_count during entry. Nor can we assert that 690 * udp_reader_count is zero, since, a reader thread could have 691 * directly become writer in line by calling udp_become_writer 692 * without going through the queued states. 693 */ 694 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 695 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 696 udp_count[3]++; 697 break; 698 } 699 } 700 #endif 701 702 #define _UDP_ENTER(connp, mp, proc, tag) { \ 703 udp_t *_udp = (connp)->conn_udp; \ 704 \ 705 mutex_enter(&(connp)->conn_lock); \ 706 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 707 mutex_exit(&(connp)->conn_lock); \ 708 freemsg(mp); \ 709 } else { \ 710 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 711 \ 712 switch (_udp->udp_mode) { \ 713 case UDP_MT_HOT: \ 714 /* We can execute as reader right away. */ \ 715 UDP_READERS_INCREF(_udp); \ 716 mutex_exit(&(connp)->conn_lock); \ 717 (*(proc))(connp, mp, (connp)->conn_sqp); \ 718 break; \ 719 \ 720 case UDP_SQUEUE: \ 721 /* \ 722 * We are in squeue mode, send the \ 723 * packet to the squeue \ 724 */ \ 725 _udp->udp_squeue_count++; \ 726 CONN_INC_REF_LOCKED(connp); \ 727 mutex_exit(&(connp)->conn_lock); \ 728 squeue_enter((connp)->conn_sqp, mp, proc, \ 729 connp, tag); \ 730 break; \ 731 \ 732 case UDP_MT_QUEUED: \ 733 case UDP_QUEUED_SQUEUE: \ 734 /* \ 735 * Some messages may have been enqueued \ 736 * ahead of us. Enqueue the new message \ 737 * at the tail of the internal queue to \ 738 * preserve message ordering. \ 739 */ \ 740 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 741 mutex_exit(&(connp)->conn_lock); \ 742 break; \ 743 } \ 744 } \ 745 } 746 747 static void 748 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 749 { 750 _UDP_ENTER(connp, mp, proc, tag); 751 } 752 753 static void 754 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 755 { 756 udp_t *udp; 757 758 udp = connp->conn_udp; 759 760 mutex_enter(&connp->conn_lock); 761 762 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 763 764 switch (udp->udp_mode) { 765 case UDP_MT_HOT: 766 if (udp->udp_reader_count == 1) { 767 /* 768 * We are the only MT thread. Switch to squeue mode 769 * immediately. 770 */ 771 udp->udp_mode = UDP_SQUEUE; 772 udp->udp_squeue_count = 1; 773 CONN_INC_REF_LOCKED(connp); 774 mutex_exit(&connp->conn_lock); 775 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 776 return; 777 } 778 /* FALLTHRU */ 779 780 case UDP_MT_QUEUED: 781 /* Enqueue the packet internally in UDP */ 782 udp->udp_mode = UDP_MT_QUEUED; 783 UDP_ENQUEUE_MP(udp, mp, proc, tag); 784 mutex_exit(&connp->conn_lock); 785 return; 786 787 case UDP_SQUEUE: 788 case UDP_QUEUED_SQUEUE: 789 /* 790 * We are already exclusive. i.e. we are already 791 * writer. Simply call the desired function. 792 */ 793 udp->udp_squeue_count++; 794 mutex_exit(&connp->conn_lock); 795 (*proc)(connp, mp, connp->conn_sqp); 796 return; 797 } 798 } 799 800 /* 801 * Transition from MT mode to SQUEUE mode, when the last MT thread 802 * is exiting the UDP perimeter. Move all messages from the internal 803 * udp queue to the squeue. A better way would be to move all the 804 * messages in one shot, this needs more support from the squeue framework 805 */ 806 static void 807 udp_switch_to_squeue(udp_t *udp) 808 { 809 mblk_t *mp; 810 mblk_t *mp_next; 811 sqproc_t proc; 812 uint8_t tag; 813 conn_t *connp = udp->udp_connp; 814 815 ASSERT(MUTEX_HELD(&connp->conn_lock)); 816 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 817 while (udp->udp_mphead != NULL) { 818 mp = udp->udp_mphead; 819 udp->udp_mphead = NULL; 820 udp->udp_mptail = NULL; 821 udp->udp_mpcount = 0; 822 udp->udp_mode = UDP_QUEUED_SQUEUE; 823 mutex_exit(&connp->conn_lock); 824 /* 825 * It is best not to hold any locks across the calls 826 * to squeue functions. Since we drop the lock we 827 * need to go back and check the udp_mphead once again 828 * after the squeue_fill and hence the while loop at 829 * the top of this function 830 */ 831 for (; mp != NULL; mp = mp_next) { 832 mp_next = mp->b_next; 833 proc = (sqproc_t)mp->b_prev; 834 tag = (uint8_t)((uintptr_t)mp->b_queue); 835 mp->b_next = NULL; 836 mp->b_prev = NULL; 837 mp->b_queue = NULL; 838 CONN_INC_REF(connp); 839 udp->udp_squeue_count++; 840 squeue_fill(connp->conn_sqp, mp, proc, connp, 841 tag); 842 } 843 mutex_enter(&connp->conn_lock); 844 } 845 /* 846 * udp_squeue_count of zero implies that the squeue has drained 847 * even before we arrived here (i.e. after the squeue_fill above) 848 */ 849 udp->udp_mode = (udp->udp_squeue_count != 0) ? 850 UDP_SQUEUE : UDP_MT_HOT; 851 } 852 853 #define _UDP_EXIT(connp) { \ 854 udp_t *_udp = (connp)->conn_udp; \ 855 \ 856 mutex_enter(&(connp)->conn_lock); \ 857 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 858 \ 859 switch (_udp->udp_mode) { \ 860 case UDP_MT_HOT: \ 861 UDP_READERS_DECREF(_udp); \ 862 mutex_exit(&(connp)->conn_lock); \ 863 break; \ 864 \ 865 case UDP_SQUEUE: \ 866 UDP_SQUEUE_DECREF(_udp); \ 867 if (_udp->udp_squeue_count == 0) \ 868 _udp->udp_mode = UDP_MT_HOT; \ 869 mutex_exit(&(connp)->conn_lock); \ 870 break; \ 871 \ 872 case UDP_MT_QUEUED: \ 873 /* \ 874 * If this is the last MT thread, we need to \ 875 * switch to squeue mode \ 876 */ \ 877 UDP_READERS_DECREF(_udp); \ 878 if (_udp->udp_reader_count == 0) \ 879 udp_switch_to_squeue(_udp); \ 880 mutex_exit(&(connp)->conn_lock); \ 881 break; \ 882 \ 883 case UDP_QUEUED_SQUEUE: \ 884 UDP_SQUEUE_DECREF(_udp); \ 885 /* \ 886 * Even if the udp_squeue_count drops to zero, we \ 887 * don't want to change udp_mode to UDP_MT_HOT here. \ 888 * The thread in udp_switch_to_squeue will take care \ 889 * of the transition to UDP_MT_HOT, after emptying \ 890 * any more new messages that have been enqueued in \ 891 * udp_mphead. \ 892 */ \ 893 mutex_exit(&(connp)->conn_lock); \ 894 break; \ 895 } \ 896 } 897 898 static void 899 udp_exit(conn_t *connp) 900 { 901 _UDP_EXIT(connp); 902 } 903 904 /* 905 * Return the next anonymous port in the priviledged port range for 906 * bind checking. 907 */ 908 static in_port_t 909 udp_get_next_priv_port(void) 910 { 911 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 912 913 if (next_priv_port < udp_min_anonpriv_port) { 914 next_priv_port = IPPORT_RESERVED - 1; 915 } 916 return (next_priv_port--); 917 } 918 919 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 920 /* ARGSUSED */ 921 static int 922 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 923 { 924 udp_fanout_t *udpf; 925 int i; 926 zoneid_t zoneid; 927 conn_t *connp; 928 udp_t *udp; 929 930 connp = Q_TO_CONN(q); 931 udp = connp->conn_udp; 932 933 /* Refer to comments in udp_status_report(). */ 934 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 935 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 936 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 937 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 938 return (0); 939 } 940 } 941 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 942 /* The following may work even if we cannot get a large buf. */ 943 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 944 return (0); 945 } 946 947 (void) mi_mpprintf(mp, 948 "UDP " MI_COL_HDRPAD_STR 949 /* 12345678[89ABCDEF] */ 950 " zone lport src addr dest addr port state"); 951 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 952 953 zoneid = connp->conn_zoneid; 954 955 for (i = 0; i < udp_bind_fanout_size; i++) { 956 udpf = &udp_bind_fanout[i]; 957 mutex_enter(&udpf->uf_lock); 958 959 /* Print the hash index. */ 960 udp = udpf->uf_udp; 961 if (zoneid != GLOBAL_ZONEID) { 962 /* skip to first entry in this zone; might be none */ 963 while (udp != NULL && 964 udp->udp_connp->conn_zoneid != zoneid) 965 udp = udp->udp_bind_hash; 966 } 967 if (udp != NULL) { 968 uint_t print_len, buf_len; 969 970 buf_len = mp->b_cont->b_datap->db_lim - 971 mp->b_cont->b_wptr; 972 print_len = snprintf((char *)mp->b_cont->b_wptr, 973 buf_len, "%d\n", i); 974 if (print_len < buf_len) { 975 mp->b_cont->b_wptr += print_len; 976 } else { 977 mp->b_cont->b_wptr += buf_len; 978 } 979 for (; udp != NULL; udp = udp->udp_bind_hash) { 980 if (zoneid == GLOBAL_ZONEID || 981 zoneid == udp->udp_connp->conn_zoneid) 982 udp_report_item(mp->b_cont, udp); 983 } 984 } 985 mutex_exit(&udpf->uf_lock); 986 } 987 udp_last_ndd_get_info_time = ddi_get_lbolt(); 988 return (0); 989 } 990 991 /* 992 * Hash list removal routine for udp_t structures. 993 */ 994 static void 995 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 996 { 997 udp_t *udpnext; 998 kmutex_t *lockp; 999 1000 if (udp->udp_ptpbhn == NULL) 1001 return; 1002 1003 /* 1004 * Extract the lock pointer in case there are concurrent 1005 * hash_remove's for this instance. 1006 */ 1007 ASSERT(udp->udp_port != 0); 1008 if (!caller_holds_lock) { 1009 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1010 ASSERT(lockp != NULL); 1011 mutex_enter(lockp); 1012 } 1013 if (udp->udp_ptpbhn != NULL) { 1014 udpnext = udp->udp_bind_hash; 1015 if (udpnext != NULL) { 1016 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1017 udp->udp_bind_hash = NULL; 1018 } 1019 *udp->udp_ptpbhn = udpnext; 1020 udp->udp_ptpbhn = NULL; 1021 } 1022 if (!caller_holds_lock) { 1023 mutex_exit(lockp); 1024 } 1025 } 1026 1027 static void 1028 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1029 { 1030 udp_t **udpp; 1031 udp_t *udpnext; 1032 1033 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1034 if (udp->udp_ptpbhn != NULL) { 1035 udp_bind_hash_remove(udp, B_TRUE); 1036 } 1037 udpp = &uf->uf_udp; 1038 udpnext = udpp[0]; 1039 if (udpnext != NULL) { 1040 /* 1041 * If the new udp bound to the INADDR_ANY address 1042 * and the first one in the list is not bound to 1043 * INADDR_ANY we skip all entries until we find the 1044 * first one bound to INADDR_ANY. 1045 * This makes sure that applications binding to a 1046 * specific address get preference over those binding to 1047 * INADDR_ANY. 1048 */ 1049 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1050 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1051 while ((udpnext = udpp[0]) != NULL && 1052 !V6_OR_V4_INADDR_ANY( 1053 udpnext->udp_bound_v6src)) { 1054 udpp = &(udpnext->udp_bind_hash); 1055 } 1056 if (udpnext != NULL) 1057 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1058 } else { 1059 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1060 } 1061 } 1062 udp->udp_bind_hash = udpnext; 1063 udp->udp_ptpbhn = udpp; 1064 udpp[0] = udp; 1065 } 1066 1067 /* 1068 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1069 * passed to udp_wput. 1070 * It associates a port number and local address with the stream. 1071 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1072 * protocol type (IPPROTO_UDP) placed in the message following the address. 1073 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1074 * (Called as writer.) 1075 * 1076 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1077 * without setting SO_REUSEADDR. This is needed so that they 1078 * can be viewed as two independent transport protocols. 1079 * However, anonymouns ports are allocated from the same range to avoid 1080 * duplicating the udp_g_next_port_to_try. 1081 */ 1082 static void 1083 udp_bind(queue_t *q, mblk_t *mp) 1084 { 1085 sin_t *sin; 1086 sin6_t *sin6; 1087 mblk_t *mp1; 1088 in_port_t port; /* Host byte order */ 1089 in_port_t requested_port; /* Host byte order */ 1090 struct T_bind_req *tbr; 1091 int count; 1092 in6_addr_t v6src; 1093 boolean_t bind_to_req_port_only; 1094 int loopmax; 1095 udp_fanout_t *udpf; 1096 in_port_t lport; /* Network byte order */ 1097 zoneid_t zoneid; 1098 conn_t *connp; 1099 udp_t *udp; 1100 1101 connp = Q_TO_CONN(q); 1102 udp = connp->conn_udp; 1103 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1104 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1105 "udp_bind: bad req, len %u", 1106 (uint_t)(mp->b_wptr - mp->b_rptr)); 1107 udp_err_ack(q, mp, TPROTO, 0); 1108 return; 1109 } 1110 1111 if (udp->udp_state != TS_UNBND) { 1112 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1113 "udp_bind: bad state, %u", udp->udp_state); 1114 udp_err_ack(q, mp, TOUTSTATE, 0); 1115 return; 1116 } 1117 /* 1118 * Reallocate the message to make sure we have enough room for an 1119 * address and the protocol type. 1120 */ 1121 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1122 if (!mp1) { 1123 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1124 return; 1125 } 1126 1127 mp = mp1; 1128 tbr = (struct T_bind_req *)mp->b_rptr; 1129 switch (tbr->ADDR_length) { 1130 case 0: /* Request for a generic port */ 1131 tbr->ADDR_offset = sizeof (struct T_bind_req); 1132 if (udp->udp_family == AF_INET) { 1133 tbr->ADDR_length = sizeof (sin_t); 1134 sin = (sin_t *)&tbr[1]; 1135 *sin = sin_null; 1136 sin->sin_family = AF_INET; 1137 mp->b_wptr = (uchar_t *)&sin[1]; 1138 } else { 1139 ASSERT(udp->udp_family == AF_INET6); 1140 tbr->ADDR_length = sizeof (sin6_t); 1141 sin6 = (sin6_t *)&tbr[1]; 1142 *sin6 = sin6_null; 1143 sin6->sin6_family = AF_INET6; 1144 mp->b_wptr = (uchar_t *)&sin6[1]; 1145 } 1146 port = 0; 1147 break; 1148 1149 case sizeof (sin_t): /* Complete IPv4 address */ 1150 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1151 sizeof (sin_t)); 1152 if (sin == NULL || !OK_32PTR((char *)sin)) { 1153 udp_err_ack(q, mp, TSYSERR, EINVAL); 1154 return; 1155 } 1156 if (udp->udp_family != AF_INET || 1157 sin->sin_family != AF_INET) { 1158 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1159 return; 1160 } 1161 port = ntohs(sin->sin_port); 1162 break; 1163 1164 case sizeof (sin6_t): /* complete IPv6 address */ 1165 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1166 sizeof (sin6_t)); 1167 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1168 udp_err_ack(q, mp, TSYSERR, EINVAL); 1169 return; 1170 } 1171 if (udp->udp_family != AF_INET6 || 1172 sin6->sin6_family != AF_INET6) { 1173 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1174 return; 1175 } 1176 port = ntohs(sin6->sin6_port); 1177 break; 1178 1179 default: /* Invalid request */ 1180 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1181 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1182 udp_err_ack(q, mp, TBADADDR, 0); 1183 return; 1184 } 1185 1186 requested_port = port; 1187 1188 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1189 bind_to_req_port_only = B_FALSE; 1190 else /* T_BIND_REQ and requested_port != 0 */ 1191 bind_to_req_port_only = B_TRUE; 1192 1193 if (requested_port == 0) { 1194 /* 1195 * If the application passed in zero for the port number, it 1196 * doesn't care which port number we bind to. Get one in the 1197 * valid range. 1198 */ 1199 if (udp->udp_anon_priv_bind) { 1200 port = udp_get_next_priv_port(); 1201 } else { 1202 port = udp_update_next_port(udp_g_next_port_to_try, 1203 B_TRUE); 1204 } 1205 } else { 1206 /* 1207 * If the port is in the well-known privileged range, 1208 * make sure the caller was privileged. 1209 */ 1210 int i; 1211 boolean_t priv = B_FALSE; 1212 1213 if (port < udp_smallest_nonpriv_port) { 1214 priv = B_TRUE; 1215 } else { 1216 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1217 if (port == udp_g_epriv_ports[i]) { 1218 priv = B_TRUE; 1219 break; 1220 } 1221 } 1222 } 1223 1224 if (priv) { 1225 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1226 1227 if (secpolicy_net_privaddr(cr, port) != 0) { 1228 udp_err_ack(q, mp, TACCES, 0); 1229 return; 1230 } 1231 } 1232 } 1233 1234 /* 1235 * Copy the source address into our udp structure. This address 1236 * may still be zero; if so, IP will fill in the correct address 1237 * each time an outbound packet is passed to it. 1238 */ 1239 if (udp->udp_family == AF_INET) { 1240 ASSERT(sin != NULL); 1241 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1242 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1243 udp->udp_ip_snd_options_len; 1244 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1245 } else { 1246 ASSERT(sin6 != NULL); 1247 v6src = sin6->sin6_addr; 1248 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1249 udp->udp_ipversion = IPV4_VERSION; 1250 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1251 UDPH_SIZE + udp->udp_ip_snd_options_len; 1252 } else { 1253 udp->udp_ipversion = IPV6_VERSION; 1254 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1255 } 1256 } 1257 1258 /* 1259 * If udp_reuseaddr is not set, then we have to make sure that 1260 * the IP address and port number the application requested 1261 * (or we selected for the application) is not being used by 1262 * another stream. If another stream is already using the 1263 * requested IP address and port, the behavior depends on 1264 * "bind_to_req_port_only". If set the bind fails; otherwise we 1265 * search for any an unused port to bind to the the stream. 1266 * 1267 * As per the BSD semantics, as modified by the Deering multicast 1268 * changes, if udp_reuseaddr is set, then we allow multiple binds 1269 * to the same port independent of the local IP address. 1270 * 1271 * This is slightly different than in SunOS 4.X which did not 1272 * support IP multicast. Note that the change implemented by the 1273 * Deering multicast code effects all binds - not only binding 1274 * to IP multicast addresses. 1275 * 1276 * Note that when binding to port zero we ignore SO_REUSEADDR in 1277 * order to guarantee a unique port. 1278 */ 1279 1280 count = 0; 1281 if (udp->udp_anon_priv_bind) { 1282 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1283 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1284 } else { 1285 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1286 } 1287 1288 zoneid = connp->conn_zoneid; 1289 for (;;) { 1290 udp_t *udp1; 1291 boolean_t is_inaddr_any; 1292 boolean_t found_exclbind = B_FALSE; 1293 1294 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1295 /* 1296 * Walk through the list of udp streams bound to 1297 * requested port with the same IP address. 1298 */ 1299 lport = htons(port); 1300 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1301 mutex_enter(&udpf->uf_lock); 1302 for (udp1 = udpf->uf_udp; udp1 != NULL; 1303 udp1 = udp1->udp_bind_hash) { 1304 if (lport != udp1->udp_port || 1305 zoneid != udp1->udp_connp->conn_zoneid) 1306 continue; 1307 1308 /* 1309 * If UDP_EXCLBIND is set for either the bound or 1310 * binding endpoint, the semantics of bind 1311 * is changed according to the following chart. 1312 * 1313 * spec = specified address (v4 or v6) 1314 * unspec = unspecified address (v4 or v6) 1315 * A = specified addresses are different for endpoints 1316 * 1317 * bound bind to allowed? 1318 * ------------------------------------- 1319 * unspec unspec no 1320 * unspec spec no 1321 * spec unspec no 1322 * spec spec yes if A 1323 */ 1324 if (udp1->udp_exclbind || udp->udp_exclbind) { 1325 if (V6_OR_V4_INADDR_ANY( 1326 udp1->udp_bound_v6src) || 1327 is_inaddr_any || 1328 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1329 &v6src)) { 1330 found_exclbind = B_TRUE; 1331 break; 1332 } 1333 continue; 1334 } 1335 1336 /* 1337 * Check ipversion to allow IPv4 and IPv6 sockets to 1338 * have disjoint port number spaces, if *_EXCLBIND 1339 * is not set and only if the application binds to a 1340 * specific port. We use the same autoassigned port 1341 * number space for IPv4 and IPv6 sockets. 1342 */ 1343 if (udp->udp_ipversion != udp1->udp_ipversion && 1344 bind_to_req_port_only) 1345 continue; 1346 1347 /* 1348 * No difference depending on SO_REUSEADDR. 1349 * 1350 * If existing port is bound to a 1351 * non-wildcard IP address and 1352 * the requesting stream is bound to 1353 * a distinct different IP addresses 1354 * (non-wildcard, also), keep going. 1355 */ 1356 if (!is_inaddr_any && 1357 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1358 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1359 &v6src)) { 1360 continue; 1361 } 1362 break; 1363 } 1364 1365 if (!found_exclbind && 1366 (udp->udp_reuseaddr && requested_port != 0)) { 1367 break; 1368 } 1369 1370 if (udp1 == NULL) { 1371 /* 1372 * No other stream has this IP address 1373 * and port number. We can use it. 1374 */ 1375 break; 1376 } 1377 mutex_exit(&udpf->uf_lock); 1378 if (bind_to_req_port_only) { 1379 /* 1380 * We get here only when requested port 1381 * is bound (and only first of the for() 1382 * loop iteration). 1383 * 1384 * The semantics of this bind request 1385 * require it to fail so we return from 1386 * the routine (and exit the loop). 1387 * 1388 */ 1389 udp_err_ack(q, mp, TADDRBUSY, 0); 1390 return; 1391 } 1392 1393 if (udp->udp_anon_priv_bind) { 1394 port = udp_get_next_priv_port(); 1395 } else { 1396 if ((count == 0) && (requested_port != 0)) { 1397 /* 1398 * If the application wants us to find 1399 * a port, get one to start with. Set 1400 * requested_port to 0, so that we will 1401 * update udp_g_next_port_to_try below. 1402 */ 1403 port = udp_update_next_port( 1404 udp_g_next_port_to_try, B_TRUE); 1405 requested_port = 0; 1406 } else { 1407 port = udp_update_next_port(port + 1, B_FALSE); 1408 } 1409 } 1410 1411 if (++count >= loopmax) { 1412 /* 1413 * We've tried every possible port number and 1414 * there are none available, so send an error 1415 * to the user. 1416 */ 1417 udp_err_ack(q, mp, TNOADDR, 0); 1418 return; 1419 } 1420 } 1421 1422 /* 1423 * Copy the source address into our udp structure. This address 1424 * may still be zero; if so, ip will fill in the correct address 1425 * each time an outbound packet is passed to it. 1426 * If we are binding to a broadcast or multicast address udp_rput 1427 * will clear the source address when it receives the T_BIND_ACK. 1428 */ 1429 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1430 udp->udp_port = lport; 1431 /* 1432 * Now reset the the next anonymous port if the application requested 1433 * an anonymous port, or we handed out the next anonymous port. 1434 */ 1435 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1436 udp_g_next_port_to_try = port + 1; 1437 } 1438 1439 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1440 if (udp->udp_family == AF_INET) { 1441 sin->sin_port = udp->udp_port; 1442 } else { 1443 int error; 1444 1445 sin6->sin6_port = udp->udp_port; 1446 /* Rebuild the header template */ 1447 error = udp_build_hdrs(q, udp); 1448 if (error != 0) { 1449 mutex_exit(&udpf->uf_lock); 1450 udp_err_ack(q, mp, TSYSERR, error); 1451 return; 1452 } 1453 } 1454 udp->udp_state = TS_IDLE; 1455 udp_bind_hash_insert(udpf, udp); 1456 mutex_exit(&udpf->uf_lock); 1457 1458 if (cl_inet_bind) { 1459 /* 1460 * Running in cluster mode - register bind information 1461 */ 1462 if (udp->udp_ipversion == IPV4_VERSION) { 1463 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1464 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1465 (in_port_t)udp->udp_port); 1466 } else { 1467 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1468 (uint8_t *)&(udp->udp_v6src), 1469 (in_port_t)udp->udp_port); 1470 } 1471 1472 } 1473 1474 /* Pass the protocol number in the message following the address. */ 1475 *mp->b_wptr++ = IPPROTO_UDP; 1476 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1477 /* 1478 * Append a request for an IRE if udp_v6src not 1479 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1480 */ 1481 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1482 if (!mp->b_cont) { 1483 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1484 return; 1485 } 1486 mp->b_cont->b_wptr += sizeof (ire_t); 1487 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1488 } 1489 if (udp->udp_family == AF_INET6) 1490 mp = ip_bind_v6(q, mp, connp, NULL); 1491 else 1492 mp = ip_bind_v4(q, mp, connp); 1493 1494 if (mp != NULL) 1495 udp_rput_other(_RD(q), mp); 1496 else 1497 CONN_INC_REF(connp); 1498 } 1499 1500 1501 void 1502 udp_resume_bind(conn_t *connp, mblk_t *mp) 1503 { 1504 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1505 } 1506 1507 /* 1508 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1509 */ 1510 /* ARGSUSED */ 1511 static void 1512 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1513 { 1514 conn_t *connp = arg; 1515 1516 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1517 1518 udp_rput_other(connp->conn_rq, mp); 1519 1520 CONN_OPER_PENDING_DONE(connp); 1521 udp_exit(connp); 1522 } 1523 1524 /* 1525 * This routine handles each T_CONN_REQ message passed to udp. It 1526 * associates a default destination address with the stream. 1527 * 1528 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1529 * T_BIND_REQ - specifying local and remote address/port 1530 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1531 * T_OK_ACK - for the T_CONN_REQ 1532 * T_CONN_CON - to keep the TPI user happy 1533 * 1534 * The connect completes in udp_rput. 1535 * When a T_BIND_ACK is received information is extracted from the IRE 1536 * and the two appended messages are sent to the TPI user. 1537 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1538 * it to an error ack for the appropriate primitive. 1539 */ 1540 static void 1541 udp_connect(queue_t *q, mblk_t *mp) 1542 { 1543 sin6_t *sin6; 1544 sin_t *sin; 1545 struct T_conn_req *tcr; 1546 in6_addr_t v6dst; 1547 ipaddr_t v4dst; 1548 uint16_t dstport; 1549 uint32_t flowinfo; 1550 mblk_t *mp1, *mp2; 1551 udp_fanout_t *udpf; 1552 udp_t *udp, *udp1; 1553 1554 udp = Q_TO_UDP(q); 1555 1556 tcr = (struct T_conn_req *)mp->b_rptr; 1557 1558 /* A bit of sanity checking */ 1559 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1560 udp_err_ack(q, mp, TPROTO, 0); 1561 return; 1562 } 1563 /* 1564 * This UDP must have bound to a port already before doing 1565 * a connect. 1566 */ 1567 if (udp->udp_state == TS_UNBND) { 1568 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1569 "udp_connect: bad state, %u", udp->udp_state); 1570 udp_err_ack(q, mp, TOUTSTATE, 0); 1571 return; 1572 } 1573 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1574 1575 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1576 1577 if (udp->udp_state == TS_DATA_XFER) { 1578 /* Already connected - clear out state */ 1579 mutex_enter(&udpf->uf_lock); 1580 udp->udp_v6src = udp->udp_bound_v6src; 1581 udp->udp_state = TS_IDLE; 1582 mutex_exit(&udpf->uf_lock); 1583 } 1584 1585 if (tcr->OPT_length != 0) { 1586 udp_err_ack(q, mp, TBADOPT, 0); 1587 return; 1588 } 1589 1590 /* 1591 * Determine packet type based on type of address passed in 1592 * the request should contain an IPv4 or IPv6 address. 1593 * Make sure that address family matches the type of 1594 * family of the the address passed down 1595 */ 1596 switch (tcr->DEST_length) { 1597 default: 1598 udp_err_ack(q, mp, TBADADDR, 0); 1599 return; 1600 1601 case sizeof (sin_t): 1602 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1603 sizeof (sin_t)); 1604 if (sin == NULL || !OK_32PTR((char *)sin)) { 1605 udp_err_ack(q, mp, TSYSERR, EINVAL); 1606 return; 1607 } 1608 if (udp->udp_family != AF_INET || 1609 sin->sin_family != AF_INET) { 1610 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1611 return; 1612 } 1613 v4dst = sin->sin_addr.s_addr; 1614 dstport = sin->sin_port; 1615 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1616 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1617 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1618 udp->udp_ip_snd_options_len; 1619 break; 1620 1621 case sizeof (sin6_t): 1622 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1623 sizeof (sin6_t)); 1624 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1625 udp_err_ack(q, mp, TSYSERR, EINVAL); 1626 return; 1627 } 1628 if (udp->udp_family != AF_INET6 || 1629 sin6->sin6_family != AF_INET6) { 1630 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1631 return; 1632 } 1633 v6dst = sin6->sin6_addr; 1634 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1635 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1636 udp->udp_ipversion = IPV4_VERSION; 1637 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1638 UDPH_SIZE + udp->udp_ip_snd_options_len; 1639 flowinfo = 0; 1640 } else { 1641 udp->udp_ipversion = IPV6_VERSION; 1642 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1643 flowinfo = sin6->sin6_flowinfo; 1644 } 1645 dstport = sin6->sin6_port; 1646 break; 1647 } 1648 if (dstport == 0) { 1649 udp_err_ack(q, mp, TBADADDR, 0); 1650 return; 1651 } 1652 1653 /* 1654 * Create a default IP header with no IP options. 1655 */ 1656 udp->udp_dstport = dstport; 1657 if (udp->udp_ipversion == IPV4_VERSION) { 1658 /* 1659 * Interpret a zero destination to mean loopback. 1660 * Update the T_CONN_REQ (sin/sin6) since it is used to 1661 * generate the T_CONN_CON. 1662 */ 1663 if (v4dst == INADDR_ANY) { 1664 v4dst = htonl(INADDR_LOOPBACK); 1665 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1666 if (udp->udp_family == AF_INET) { 1667 sin->sin_addr.s_addr = v4dst; 1668 } else { 1669 sin6->sin6_addr = v6dst; 1670 } 1671 } 1672 udp->udp_v6dst = v6dst; 1673 udp->udp_flowinfo = 0; 1674 1675 /* 1676 * If the destination address is multicast and 1677 * an outgoing multicast interface has been set, 1678 * use the address of that interface as our 1679 * source address if no source address has been set. 1680 */ 1681 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1682 CLASSD(v4dst) && 1683 udp->udp_multicast_if_addr != INADDR_ANY) { 1684 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1685 &udp->udp_v6src); 1686 } 1687 } else { 1688 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1689 /* 1690 * Interpret a zero destination to mean loopback. 1691 * Update the T_CONN_REQ (sin/sin6) since it is used to 1692 * generate the T_CONN_CON. 1693 */ 1694 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1695 v6dst = ipv6_loopback; 1696 sin6->sin6_addr = v6dst; 1697 } 1698 udp->udp_v6dst = v6dst; 1699 udp->udp_flowinfo = flowinfo; 1700 /* 1701 * If the destination address is multicast and 1702 * an outgoing multicast interface has been set, 1703 * then the ip bind logic will pick the correct source 1704 * address (i.e. matching the outgoing multicast interface). 1705 */ 1706 } 1707 1708 /* 1709 * Verify that the src/port/dst/port is unique for all 1710 * connections in TS_DATA_XFER 1711 */ 1712 mutex_enter(&udpf->uf_lock); 1713 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1714 if (udp1->udp_state != TS_DATA_XFER) 1715 continue; 1716 if (udp->udp_port != udp1->udp_port || 1717 udp->udp_ipversion != udp1->udp_ipversion || 1718 dstport != udp1->udp_dstport || 1719 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1720 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1721 continue; 1722 mutex_exit(&udpf->uf_lock); 1723 udp_err_ack(q, mp, TBADADDR, 0); 1724 return; 1725 } 1726 udp->udp_state = TS_DATA_XFER; 1727 mutex_exit(&udpf->uf_lock); 1728 1729 /* 1730 * Send down bind to IP to verify that there is a route 1731 * and to determine the source address. 1732 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1733 */ 1734 if (udp->udp_family == AF_INET) 1735 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1736 else 1737 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1738 if (mp1 == NULL) { 1739 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1740 bind_failed: 1741 mutex_enter(&udpf->uf_lock); 1742 udp->udp_state = TS_IDLE; 1743 mutex_exit(&udpf->uf_lock); 1744 return; 1745 } 1746 1747 /* 1748 * We also have to send a connection confirmation to 1749 * keep TLI happy. Prepare it for udp_rput. 1750 */ 1751 if (udp->udp_family == AF_INET) 1752 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1753 sizeof (*sin), NULL, 0); 1754 else 1755 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1756 sizeof (*sin6), NULL, 0); 1757 if (mp2 == NULL) { 1758 freemsg(mp1); 1759 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1760 goto bind_failed; 1761 } 1762 1763 mp = mi_tpi_ok_ack_alloc(mp); 1764 if (mp == NULL) { 1765 /* Unable to reuse the T_CONN_REQ for the ack. */ 1766 freemsg(mp2); 1767 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1768 goto bind_failed; 1769 } 1770 1771 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1772 linkb(mp1, mp); 1773 linkb(mp1, mp2); 1774 1775 if (udp->udp_family == AF_INET) 1776 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1777 else 1778 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1779 1780 if (mp1 != NULL) 1781 udp_rput_other(_RD(q), mp1); 1782 else 1783 CONN_INC_REF(udp->udp_connp); 1784 } 1785 1786 static int 1787 udp_close(queue_t *q) 1788 { 1789 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1790 udp_t *udp; 1791 queue_t *ip_rq = RD(UDP_WR(q)); 1792 1793 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1794 udp = connp->conn_udp; 1795 1796 ip_quiesce_conn(connp); 1797 /* 1798 * Disable read-side synchronous stream 1799 * interface and drain any queued data. 1800 */ 1801 udp_rcv_drain(q, udp, B_TRUE); 1802 ASSERT(!udp->udp_direct_sockfs); 1803 1804 qprocsoff(q); 1805 1806 /* restore IP module's high and low water marks to default values */ 1807 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1808 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1809 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1810 1811 ASSERT(udp->udp_rcv_cnt == 0); 1812 ASSERT(udp->udp_rcv_msgcnt == 0); 1813 ASSERT(udp->udp_rcv_list_head == NULL); 1814 ASSERT(udp->udp_rcv_list_tail == NULL); 1815 1816 /* connp is now single threaded. */ 1817 udp_close_free(connp); 1818 /* 1819 * Restore connp as an IP endpoint. We don't need 1820 * any locks since we are now single threaded 1821 */ 1822 connp->conn_flags &= ~IPCL_UDP; 1823 connp->conn_state_flags &= 1824 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1825 return (0); 1826 } 1827 1828 /* 1829 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1830 */ 1831 void 1832 udp_quiesce_conn(conn_t *connp) 1833 { 1834 udp_t *udp = connp->conn_udp; 1835 1836 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1837 /* 1838 * Running in cluster mode - register unbind information 1839 */ 1840 if (udp->udp_ipversion == IPV4_VERSION) { 1841 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1842 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1843 (in_port_t)udp->udp_port); 1844 } else { 1845 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 1846 (uint8_t *)(&(udp->udp_v6src)), 1847 (in_port_t)udp->udp_port); 1848 } 1849 } 1850 1851 udp_bind_hash_remove(udp, B_FALSE); 1852 1853 mutex_enter(&connp->conn_lock); 1854 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 1855 udp->udp_mode != UDP_MT_HOT) { 1856 cv_wait(&connp->conn_cv, &connp->conn_lock); 1857 } 1858 mutex_exit(&connp->conn_lock); 1859 } 1860 1861 void 1862 udp_close_free(conn_t *connp) 1863 { 1864 udp_t *udp = connp->conn_udp; 1865 1866 /* If there are any options associated with the stream, free them. */ 1867 if (udp->udp_ip_snd_options) { 1868 mi_free((char *)udp->udp_ip_snd_options); 1869 udp->udp_ip_snd_options = NULL; 1870 } 1871 1872 if (udp->udp_ip_rcv_options) { 1873 mi_free((char *)udp->udp_ip_rcv_options); 1874 udp->udp_ip_rcv_options = NULL; 1875 } 1876 1877 /* Free memory associated with sticky options */ 1878 if (udp->udp_sticky_hdrs_len != 0) { 1879 kmem_free(udp->udp_sticky_hdrs, 1880 udp->udp_sticky_hdrs_len); 1881 udp->udp_sticky_hdrs = NULL; 1882 udp->udp_sticky_hdrs_len = 0; 1883 } 1884 1885 if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 1886 kmem_free(udp->udp_sticky_ipp.ipp_hopopts, 1887 udp->udp_sticky_ipp.ipp_hopoptslen); 1888 udp->udp_sticky_ipp.ipp_hopopts = NULL; 1889 } 1890 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 1891 kmem_free(udp->udp_sticky_ipp.ipp_rtdstopts, 1892 udp->udp_sticky_ipp.ipp_rtdstoptslen); 1893 udp->udp_sticky_ipp.ipp_rtdstopts = NULL; 1894 } 1895 if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 1896 kmem_free(udp->udp_sticky_ipp.ipp_rthdr, 1897 udp->udp_sticky_ipp.ipp_rthdrlen); 1898 udp->udp_sticky_ipp.ipp_rthdr = NULL; 1899 } 1900 if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 1901 kmem_free(udp->udp_sticky_ipp.ipp_dstopts, 1902 udp->udp_sticky_ipp.ipp_dstoptslen); 1903 udp->udp_sticky_ipp.ipp_dstopts = NULL; 1904 } 1905 udp->udp_sticky_ipp.ipp_fields &= 1906 ~(IPPF_HOPOPTS|IPPF_RTDSTOPTS|IPPF_RTHDR|IPPF_DSTOPTS); 1907 1908 udp->udp_connp = NULL; 1909 connp->conn_udp = NULL; 1910 kmem_cache_free(udp_cache, udp); 1911 } 1912 1913 /* 1914 * This routine handles each T_DISCON_REQ message passed to udp 1915 * as an indicating that UDP is no longer connected. This results 1916 * in sending a T_BIND_REQ to IP to restore the binding to just 1917 * the local address/port. 1918 * 1919 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1920 * T_BIND_REQ - specifying just the local address/port 1921 * T_OK_ACK - for the T_DISCON_REQ 1922 * 1923 * The disconnect completes in udp_rput. 1924 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 1925 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1926 * it to an error ack for the appropriate primitive. 1927 */ 1928 static void 1929 udp_disconnect(queue_t *q, mblk_t *mp) 1930 { 1931 udp_t *udp = Q_TO_UDP(q); 1932 mblk_t *mp1; 1933 udp_fanout_t *udpf; 1934 1935 if (udp->udp_state != TS_DATA_XFER) { 1936 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1937 "udp_disconnect: bad state, %u", udp->udp_state); 1938 udp_err_ack(q, mp, TOUTSTATE, 0); 1939 return; 1940 } 1941 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1942 mutex_enter(&udpf->uf_lock); 1943 udp->udp_v6src = udp->udp_bound_v6src; 1944 udp->udp_state = TS_IDLE; 1945 mutex_exit(&udpf->uf_lock); 1946 1947 /* 1948 * Send down bind to IP to remove the full binding and revert 1949 * to the local address binding. 1950 */ 1951 if (udp->udp_family == AF_INET) 1952 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 1953 else 1954 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 1955 if (mp1 == NULL) { 1956 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1957 return; 1958 } 1959 mp = mi_tpi_ok_ack_alloc(mp); 1960 if (mp == NULL) { 1961 /* Unable to reuse the T_DISCON_REQ for the ack. */ 1962 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 1963 return; 1964 } 1965 1966 if (udp->udp_family == AF_INET6) { 1967 int error; 1968 1969 /* Rebuild the header template */ 1970 error = udp_build_hdrs(q, udp); 1971 if (error != 0) { 1972 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 1973 freemsg(mp1); 1974 return; 1975 } 1976 } 1977 mutex_enter(&udpf->uf_lock); 1978 udp->udp_discon_pending = 1; 1979 mutex_exit(&udpf->uf_lock); 1980 1981 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 1982 linkb(mp1, mp); 1983 1984 if (udp->udp_family == AF_INET6) 1985 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1986 else 1987 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1988 1989 if (mp1 != NULL) 1990 udp_rput_other(_RD(q), mp1); 1991 else 1992 CONN_INC_REF(udp->udp_connp); 1993 } 1994 1995 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 1996 static void 1997 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 1998 { 1999 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2000 putnext(UDP_RD(q), mp); 2001 } 2002 2003 /* Shorthand to generate and send TPI error acks to our client */ 2004 static void 2005 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2006 int sys_error) 2007 { 2008 struct T_error_ack *teackp; 2009 2010 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2011 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2012 teackp = (struct T_error_ack *)mp->b_rptr; 2013 teackp->ERROR_prim = primitive; 2014 teackp->TLI_error = t_error; 2015 teackp->UNIX_error = sys_error; 2016 putnext(UDP_RD(q), mp); 2017 } 2018 } 2019 2020 /*ARGSUSED*/ 2021 static int 2022 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2023 { 2024 int i; 2025 2026 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2027 if (udp_g_epriv_ports[i] != 0) 2028 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2029 } 2030 return (0); 2031 } 2032 2033 /* ARGSUSED */ 2034 static int 2035 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2036 cred_t *cr) 2037 { 2038 long new_value; 2039 int i; 2040 2041 /* 2042 * Fail the request if the new value does not lie within the 2043 * port number limits. 2044 */ 2045 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2046 new_value <= 0 || new_value >= 65536) { 2047 return (EINVAL); 2048 } 2049 2050 /* Check if the value is already in the list */ 2051 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2052 if (new_value == udp_g_epriv_ports[i]) { 2053 return (EEXIST); 2054 } 2055 } 2056 /* Find an empty slot */ 2057 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2058 if (udp_g_epriv_ports[i] == 0) 2059 break; 2060 } 2061 if (i == udp_g_num_epriv_ports) { 2062 return (EOVERFLOW); 2063 } 2064 2065 /* Set the new value */ 2066 udp_g_epriv_ports[i] = (in_port_t)new_value; 2067 return (0); 2068 } 2069 2070 /* ARGSUSED */ 2071 static int 2072 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2073 cred_t *cr) 2074 { 2075 long new_value; 2076 int i; 2077 2078 /* 2079 * Fail the request if the new value does not lie within the 2080 * port number limits. 2081 */ 2082 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2083 new_value <= 0 || new_value >= 65536) { 2084 return (EINVAL); 2085 } 2086 2087 /* Check that the value is already in the list */ 2088 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2089 if (udp_g_epriv_ports[i] == new_value) 2090 break; 2091 } 2092 if (i == udp_g_num_epriv_ports) { 2093 return (ESRCH); 2094 } 2095 2096 /* Clear the value */ 2097 udp_g_epriv_ports[i] = 0; 2098 return (0); 2099 } 2100 2101 /* At minimum we need 4 bytes of UDP header */ 2102 #define ICMP_MIN_UDP_HDR 4 2103 2104 /* 2105 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2106 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2107 * Assumes that IP has pulled up everything up to and including the ICMP header. 2108 * An M_CTL could potentially come here from some other module (i.e. if UDP 2109 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2110 * does not have enough ICMP information , following STREAMS conventions, 2111 * we send it upstream assuming it is an M_CTL we don't understand. 2112 */ 2113 static void 2114 udp_icmp_error(queue_t *q, mblk_t *mp) 2115 { 2116 icmph_t *icmph; 2117 ipha_t *ipha; 2118 int iph_hdr_length; 2119 udpha_t *udpha; 2120 sin_t sin; 2121 sin6_t sin6; 2122 mblk_t *mp1; 2123 int error = 0; 2124 size_t mp_size = MBLKL(mp); 2125 udp_t *udp = Q_TO_UDP(q); 2126 2127 /* 2128 * Assume IP provides aligned packets - otherwise toss 2129 */ 2130 if (!OK_32PTR(mp->b_rptr)) { 2131 freemsg(mp); 2132 return; 2133 } 2134 2135 /* 2136 * Verify that we have a complete IP header and the application has 2137 * asked for errors. If not, send it upstream. 2138 */ 2139 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2140 noticmpv4: 2141 putnext(UDP_RD(q), mp); 2142 return; 2143 } 2144 2145 ipha = (ipha_t *)mp->b_rptr; 2146 /* 2147 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2148 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2149 */ 2150 switch (IPH_HDR_VERSION(ipha)) { 2151 case IPV6_VERSION: 2152 udp_icmp_error_ipv6(q, mp); 2153 return; 2154 case IPV4_VERSION: 2155 break; 2156 default: 2157 goto noticmpv4; 2158 } 2159 2160 /* Skip past the outer IP and ICMP headers */ 2161 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2162 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2163 /* 2164 * If we don't have the correct outer IP header length or if the ULP 2165 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2166 * send the packet upstream. 2167 */ 2168 if (iph_hdr_length < sizeof (ipha_t) || 2169 ipha->ipha_protocol != IPPROTO_ICMP || 2170 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2171 goto noticmpv4; 2172 } 2173 ipha = (ipha_t *)&icmph[1]; 2174 2175 /* Skip past the inner IP and find the ULP header */ 2176 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2177 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2178 /* 2179 * If we don't have the correct inner IP header length or if the ULP 2180 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2181 * bytes of UDP header, send it upstream. 2182 */ 2183 if (iph_hdr_length < sizeof (ipha_t) || 2184 ipha->ipha_protocol != IPPROTO_UDP || 2185 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2186 goto noticmpv4; 2187 } 2188 2189 switch (icmph->icmph_type) { 2190 case ICMP_DEST_UNREACHABLE: 2191 switch (icmph->icmph_code) { 2192 case ICMP_FRAGMENTATION_NEEDED: 2193 /* 2194 * IP has already adjusted the path MTU. 2195 * XXX Somehow pass MTU indication to application? 2196 */ 2197 break; 2198 case ICMP_PORT_UNREACHABLE: 2199 case ICMP_PROTOCOL_UNREACHABLE: 2200 error = ECONNREFUSED; 2201 break; 2202 default: 2203 /* Transient errors */ 2204 break; 2205 } 2206 break; 2207 default: 2208 /* Transient errors */ 2209 break; 2210 } 2211 if (error == 0) { 2212 freemsg(mp); 2213 return; 2214 } 2215 2216 switch (udp->udp_family) { 2217 case AF_INET: 2218 sin = sin_null; 2219 sin.sin_family = AF_INET; 2220 sin.sin_addr.s_addr = ipha->ipha_dst; 2221 sin.sin_port = udpha->uha_dst_port; 2222 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2223 error); 2224 break; 2225 case AF_INET6: 2226 sin6 = sin6_null; 2227 sin6.sin6_family = AF_INET6; 2228 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2229 sin6.sin6_port = udpha->uha_dst_port; 2230 2231 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2232 NULL, 0, error); 2233 break; 2234 } 2235 if (mp1) 2236 putnext(UDP_RD(q), mp1); 2237 freemsg(mp); 2238 } 2239 2240 /* 2241 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2242 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2243 * Assumes that IP has pulled up all the extension headers as well as the 2244 * ICMPv6 header. 2245 * An M_CTL could potentially come here from some other module (i.e. if UDP 2246 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2247 * does not have enough ICMP information , following STREAMS conventions, 2248 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2249 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2250 * field (when cast to ipha_t in udp_icmp_error). 2251 */ 2252 static void 2253 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2254 { 2255 icmp6_t *icmp6; 2256 ip6_t *ip6h, *outer_ip6h; 2257 uint16_t hdr_length; 2258 uint8_t *nexthdrp; 2259 udpha_t *udpha; 2260 sin6_t sin6; 2261 mblk_t *mp1; 2262 int error = 0; 2263 size_t mp_size = MBLKL(mp); 2264 udp_t *udp = Q_TO_UDP(q); 2265 2266 /* 2267 * Verify that we have a complete IP header. If not, send it upstream. 2268 */ 2269 if (mp_size < sizeof (ip6_t)) { 2270 noticmpv6: 2271 putnext(UDP_RD(q), mp); 2272 return; 2273 } 2274 2275 outer_ip6h = (ip6_t *)mp->b_rptr; 2276 /* 2277 * Verify this is an ICMPV6 packet, else send it upstream 2278 */ 2279 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2280 hdr_length = IPV6_HDR_LEN; 2281 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2282 &nexthdrp) || 2283 *nexthdrp != IPPROTO_ICMPV6) { 2284 goto noticmpv6; 2285 } 2286 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2287 ip6h = (ip6_t *)&icmp6[1]; 2288 /* 2289 * Verify we have a complete ICMP and inner IP header. 2290 */ 2291 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2292 goto noticmpv6; 2293 2294 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2295 goto noticmpv6; 2296 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2297 /* 2298 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2299 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2300 * packet upstream. 2301 */ 2302 if ((*nexthdrp != IPPROTO_UDP) || 2303 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2304 goto noticmpv6; 2305 } 2306 2307 switch (icmp6->icmp6_type) { 2308 case ICMP6_DST_UNREACH: 2309 switch (icmp6->icmp6_code) { 2310 case ICMP6_DST_UNREACH_NOPORT: 2311 error = ECONNREFUSED; 2312 break; 2313 case ICMP6_DST_UNREACH_ADMIN: 2314 case ICMP6_DST_UNREACH_NOROUTE: 2315 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2316 case ICMP6_DST_UNREACH_ADDR: 2317 /* Transient errors */ 2318 break; 2319 default: 2320 break; 2321 } 2322 break; 2323 case ICMP6_PACKET_TOO_BIG: { 2324 struct T_unitdata_ind *tudi; 2325 struct T_opthdr *toh; 2326 size_t udi_size; 2327 mblk_t *newmp; 2328 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2329 sizeof (struct ip6_mtuinfo); 2330 sin6_t *sin6; 2331 struct ip6_mtuinfo *mtuinfo; 2332 2333 /* 2334 * If the application has requested to receive path mtu 2335 * information, send up an empty message containing an 2336 * IPV6_PATHMTU ancillary data item. 2337 */ 2338 if (!udp->udp_ipv6_recvpathmtu) 2339 break; 2340 2341 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2342 opt_length; 2343 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2344 BUMP_MIB(&udp_mib, udpInErrors); 2345 break; 2346 } 2347 2348 /* 2349 * newmp->b_cont is left to NULL on purpose. This is an 2350 * empty message containing only ancillary data. 2351 */ 2352 newmp->b_datap->db_type = M_PROTO; 2353 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2354 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2355 tudi->PRIM_type = T_UNITDATA_IND; 2356 tudi->SRC_length = sizeof (sin6_t); 2357 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2358 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2359 tudi->OPT_length = opt_length; 2360 2361 sin6 = (sin6_t *)&tudi[1]; 2362 bzero(sin6, sizeof (sin6_t)); 2363 sin6->sin6_family = AF_INET6; 2364 sin6->sin6_addr = udp->udp_v6dst; 2365 2366 toh = (struct T_opthdr *)&sin6[1]; 2367 toh->level = IPPROTO_IPV6; 2368 toh->name = IPV6_PATHMTU; 2369 toh->len = opt_length; 2370 toh->status = 0; 2371 2372 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2373 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2374 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2375 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2376 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2377 /* 2378 * We've consumed everything we need from the original 2379 * message. Free it, then send our empty message. 2380 */ 2381 freemsg(mp); 2382 putnext(UDP_RD(q), newmp); 2383 return; 2384 } 2385 case ICMP6_TIME_EXCEEDED: 2386 /* Transient errors */ 2387 break; 2388 case ICMP6_PARAM_PROB: 2389 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2390 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2391 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2392 (uchar_t *)nexthdrp) { 2393 error = ECONNREFUSED; 2394 break; 2395 } 2396 break; 2397 } 2398 if (error == 0) { 2399 freemsg(mp); 2400 return; 2401 } 2402 2403 sin6 = sin6_null; 2404 sin6.sin6_family = AF_INET6; 2405 sin6.sin6_addr = ip6h->ip6_dst; 2406 sin6.sin6_port = udpha->uha_dst_port; 2407 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2408 2409 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2410 error); 2411 if (mp1) 2412 putnext(UDP_RD(q), mp1); 2413 freemsg(mp); 2414 } 2415 2416 /* 2417 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2418 * The local address is filled in if endpoint is bound. The remote address 2419 * is filled in if remote address has been precified ("connected endpoint") 2420 * (The concept of connected CLTS sockets is alien to published TPI 2421 * but we support it anyway). 2422 */ 2423 static void 2424 udp_addr_req(queue_t *q, mblk_t *mp) 2425 { 2426 sin_t *sin; 2427 sin6_t *sin6; 2428 mblk_t *ackmp; 2429 struct T_addr_ack *taa; 2430 udp_t *udp = Q_TO_UDP(q); 2431 2432 /* Make it large enough for worst case */ 2433 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2434 2 * sizeof (sin6_t), 1); 2435 if (ackmp == NULL) { 2436 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2437 return; 2438 } 2439 taa = (struct T_addr_ack *)ackmp->b_rptr; 2440 2441 bzero(taa, sizeof (struct T_addr_ack)); 2442 ackmp->b_wptr = (uchar_t *)&taa[1]; 2443 2444 taa->PRIM_type = T_ADDR_ACK; 2445 ackmp->b_datap->db_type = M_PCPROTO; 2446 /* 2447 * Note: Following code assumes 32 bit alignment of basic 2448 * data structures like sin_t and struct T_addr_ack. 2449 */ 2450 if (udp->udp_state != TS_UNBND) { 2451 /* 2452 * Fill in local address first 2453 */ 2454 taa->LOCADDR_offset = sizeof (*taa); 2455 if (udp->udp_family == AF_INET) { 2456 taa->LOCADDR_length = sizeof (sin_t); 2457 sin = (sin_t *)&taa[1]; 2458 /* Fill zeroes and then initialize non-zero fields */ 2459 *sin = sin_null; 2460 sin->sin_family = AF_INET; 2461 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2462 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2463 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2464 sin->sin_addr.s_addr); 2465 } else { 2466 /* 2467 * INADDR_ANY 2468 * udp_v6src is not set, we might be bound to 2469 * broadcast/multicast. Use udp_bound_v6src as 2470 * local address instead (that could 2471 * also still be INADDR_ANY) 2472 */ 2473 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2474 sin->sin_addr.s_addr); 2475 } 2476 sin->sin_port = udp->udp_port; 2477 ackmp->b_wptr = (uchar_t *)&sin[1]; 2478 if (udp->udp_state == TS_DATA_XFER) { 2479 /* 2480 * connected, fill remote address too 2481 */ 2482 taa->REMADDR_length = sizeof (sin_t); 2483 /* assumed 32-bit alignment */ 2484 taa->REMADDR_offset = taa->LOCADDR_offset + 2485 taa->LOCADDR_length; 2486 2487 sin = (sin_t *)(ackmp->b_rptr + 2488 taa->REMADDR_offset); 2489 /* initialize */ 2490 *sin = sin_null; 2491 sin->sin_family = AF_INET; 2492 sin->sin_addr.s_addr = 2493 V4_PART_OF_V6(udp->udp_v6dst); 2494 sin->sin_port = udp->udp_dstport; 2495 ackmp->b_wptr = (uchar_t *)&sin[1]; 2496 } 2497 } else { 2498 taa->LOCADDR_length = sizeof (sin6_t); 2499 sin6 = (sin6_t *)&taa[1]; 2500 /* Fill zeroes and then initialize non-zero fields */ 2501 *sin6 = sin6_null; 2502 sin6->sin6_family = AF_INET6; 2503 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2504 sin6->sin6_addr = udp->udp_v6src; 2505 } else { 2506 /* 2507 * UNSPECIFIED 2508 * udp_v6src is not set, we might be bound to 2509 * broadcast/multicast. Use udp_bound_v6src as 2510 * local address instead (that could 2511 * also still be UNSPECIFIED) 2512 */ 2513 sin6->sin6_addr = 2514 udp->udp_bound_v6src; 2515 } 2516 sin6->sin6_port = udp->udp_port; 2517 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2518 if (udp->udp_state == TS_DATA_XFER) { 2519 /* 2520 * connected, fill remote address too 2521 */ 2522 taa->REMADDR_length = sizeof (sin6_t); 2523 /* assumed 32-bit alignment */ 2524 taa->REMADDR_offset = taa->LOCADDR_offset + 2525 taa->LOCADDR_length; 2526 2527 sin6 = (sin6_t *)(ackmp->b_rptr + 2528 taa->REMADDR_offset); 2529 /* initialize */ 2530 *sin6 = sin6_null; 2531 sin6->sin6_family = AF_INET6; 2532 sin6->sin6_addr = udp->udp_v6dst; 2533 sin6->sin6_port = udp->udp_dstport; 2534 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2535 } 2536 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2537 } 2538 } 2539 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2540 putnext(UDP_RD(q), ackmp); 2541 } 2542 2543 static void 2544 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2545 { 2546 if (udp->udp_family == AF_INET) { 2547 *tap = udp_g_t_info_ack_ipv4; 2548 } else { 2549 *tap = udp_g_t_info_ack_ipv6; 2550 } 2551 tap->CURRENT_state = udp->udp_state; 2552 tap->OPT_size = udp_max_optsize; 2553 } 2554 2555 /* 2556 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2557 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2558 * udp_g_t_info_ack. The current state of the stream is copied from 2559 * udp_state. 2560 */ 2561 static void 2562 udp_capability_req(queue_t *q, mblk_t *mp) 2563 { 2564 t_uscalar_t cap_bits1; 2565 struct T_capability_ack *tcap; 2566 udp_t *udp = Q_TO_UDP(q); 2567 2568 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2569 2570 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2571 mp->b_datap->db_type, T_CAPABILITY_ACK); 2572 if (!mp) 2573 return; 2574 2575 tcap = (struct T_capability_ack *)mp->b_rptr; 2576 tcap->CAP_bits1 = 0; 2577 2578 if (cap_bits1 & TC1_INFO) { 2579 udp_copy_info(&tcap->INFO_ack, udp); 2580 tcap->CAP_bits1 |= TC1_INFO; 2581 } 2582 2583 putnext(UDP_RD(q), mp); 2584 } 2585 2586 /* 2587 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2588 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2589 * The current state of the stream is copied from udp_state. 2590 */ 2591 static void 2592 udp_info_req(queue_t *q, mblk_t *mp) 2593 { 2594 udp_t *udp = Q_TO_UDP(q); 2595 2596 /* Create a T_INFO_ACK message. */ 2597 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2598 T_INFO_ACK); 2599 if (!mp) 2600 return; 2601 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2602 putnext(UDP_RD(q), mp); 2603 } 2604 2605 /* 2606 * IP recognizes seven kinds of bind requests: 2607 * 2608 * - A zero-length address binds only to the protocol number. 2609 * 2610 * - A 4-byte address is treated as a request to 2611 * validate that the address is a valid local IPv4 2612 * address, appropriate for an application to bind to. 2613 * IP does the verification, but does not make any note 2614 * of the address at this time. 2615 * 2616 * - A 16-byte address contains is treated as a request 2617 * to validate a local IPv6 address, as the 4-byte 2618 * address case above. 2619 * 2620 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2621 * use it for the inbound fanout of packets. 2622 * 2623 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2624 * use it for the inbound fanout of packets. 2625 * 2626 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2627 * information consisting of local and remote addresses 2628 * and ports. In this case, the addresses are both 2629 * validated as appropriate for this operation, and, if 2630 * so, the information is retained for use in the 2631 * inbound fanout. 2632 * 2633 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2634 * fanout information, like the 12-byte case above. 2635 * 2636 * IP will also fill in the IRE request mblk with information 2637 * regarding our peer. In all cases, we notify IP of our protocol 2638 * type by appending a single protocol byte to the bind request. 2639 */ 2640 static mblk_t * 2641 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2642 { 2643 char *cp; 2644 mblk_t *mp; 2645 struct T_bind_req *tbr; 2646 ipa_conn_t *ac; 2647 ipa6_conn_t *ac6; 2648 sin_t *sin; 2649 sin6_t *sin6; 2650 2651 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2652 2653 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2654 if (!mp) 2655 return (mp); 2656 mp->b_datap->db_type = M_PROTO; 2657 tbr = (struct T_bind_req *)mp->b_rptr; 2658 tbr->PRIM_type = bind_prim; 2659 tbr->ADDR_offset = sizeof (*tbr); 2660 tbr->CONIND_number = 0; 2661 tbr->ADDR_length = addr_length; 2662 cp = (char *)&tbr[1]; 2663 switch (addr_length) { 2664 case sizeof (ipa_conn_t): 2665 ASSERT(udp->udp_family == AF_INET); 2666 /* Append a request for an IRE */ 2667 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2668 if (!mp->b_cont) { 2669 freemsg(mp); 2670 return (NULL); 2671 } 2672 mp->b_cont->b_wptr += sizeof (ire_t); 2673 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2674 2675 /* cp known to be 32 bit aligned */ 2676 ac = (ipa_conn_t *)cp; 2677 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2678 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2679 ac->ac_fport = udp->udp_dstport; 2680 ac->ac_lport = udp->udp_port; 2681 break; 2682 2683 case sizeof (ipa6_conn_t): 2684 ASSERT(udp->udp_family == AF_INET6); 2685 /* Append a request for an IRE */ 2686 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2687 if (!mp->b_cont) { 2688 freemsg(mp); 2689 return (NULL); 2690 } 2691 mp->b_cont->b_wptr += sizeof (ire_t); 2692 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2693 2694 /* cp known to be 32 bit aligned */ 2695 ac6 = (ipa6_conn_t *)cp; 2696 ac6->ac6_laddr = udp->udp_v6src; 2697 ac6->ac6_faddr = udp->udp_v6dst; 2698 ac6->ac6_fport = udp->udp_dstport; 2699 ac6->ac6_lport = udp->udp_port; 2700 break; 2701 2702 case sizeof (sin_t): 2703 ASSERT(udp->udp_family == AF_INET); 2704 /* Append a request for an IRE */ 2705 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2706 if (!mp->b_cont) { 2707 freemsg(mp); 2708 return (NULL); 2709 } 2710 mp->b_cont->b_wptr += sizeof (ire_t); 2711 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2712 2713 sin = (sin_t *)cp; 2714 *sin = sin_null; 2715 sin->sin_family = AF_INET; 2716 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2717 sin->sin_port = udp->udp_port; 2718 break; 2719 2720 case sizeof (sin6_t): 2721 ASSERT(udp->udp_family == AF_INET6); 2722 /* Append a request for an IRE */ 2723 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2724 if (!mp->b_cont) { 2725 freemsg(mp); 2726 return (NULL); 2727 } 2728 mp->b_cont->b_wptr += sizeof (ire_t); 2729 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2730 2731 sin6 = (sin6_t *)cp; 2732 *sin6 = sin6_null; 2733 sin6->sin6_family = AF_INET6; 2734 sin6->sin6_addr = udp->udp_bound_v6src; 2735 sin6->sin6_port = udp->udp_port; 2736 break; 2737 } 2738 /* Add protocol number to end */ 2739 cp[addr_length] = (char)IPPROTO_UDP; 2740 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2741 return (mp); 2742 } 2743 2744 /* 2745 * This is the open routine for udp. It allocates a udp_t structure for 2746 * the stream and, on the first open of the module, creates an ND table. 2747 */ 2748 /* ARGSUSED */ 2749 static int 2750 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2751 { 2752 int err; 2753 udp_t *udp; 2754 conn_t *connp; 2755 zoneid_t zoneid = getzoneid(); 2756 queue_t *ip_wq; 2757 char *name; 2758 2759 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2760 2761 /* If the stream is already open, return immediately. */ 2762 if (q->q_ptr != NULL) 2763 return (0); 2764 2765 /* If this is not a push of udp as a module, fail. */ 2766 if (sflag != MODOPEN) 2767 return (EINVAL); 2768 2769 q->q_hiwat = udp_recv_hiwat; 2770 WR(q)->q_hiwat = udp_xmit_hiwat; 2771 WR(q)->q_lowat = udp_xmit_lowat; 2772 2773 /* Insert ourselves in the stream since we're about to walk q_next */ 2774 qprocson(q); 2775 2776 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2777 bzero(udp, sizeof (*udp)); 2778 2779 /* 2780 * UDP is supported only as a module and it has to be pushed directly 2781 * above the device instance of IP. If UDP is pushed anywhere else 2782 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2783 * sake of MIB browsers and fail everything else. 2784 */ 2785 ip_wq = WR(q)->q_next; 2786 if (ip_wq->q_next != NULL || 2787 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2788 strcmp(name, IP_MOD_NAME) != 0 || 2789 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2790 /* Support just SNMP for MIB browsers */ 2791 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2792 connp->conn_rq = q; 2793 connp->conn_wq = WR(q); 2794 connp->conn_flags |= IPCL_UDPMOD; 2795 connp->conn_cred = credp; 2796 connp->conn_zoneid = zoneid; 2797 connp->conn_udp = udp; 2798 udp->udp_connp = connp; 2799 q->q_ptr = WR(q)->q_ptr = connp; 2800 crhold(credp); 2801 q->q_qinfo = &udp_snmp_rinit; 2802 WR(q)->q_qinfo = &udp_snmp_winit; 2803 return (0); 2804 } 2805 2806 /* 2807 * Initialize the udp_t structure for this stream. 2808 */ 2809 q = RD(ip_wq); 2810 connp = Q_TO_CONN(q); 2811 mutex_enter(&connp->conn_lock); 2812 connp->conn_proto = IPPROTO_UDP; 2813 connp->conn_flags |= IPCL_UDP; 2814 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2815 connp->conn_udp = udp; 2816 2817 /* Set the initial state of the stream and the privilege status. */ 2818 udp->udp_connp = connp; 2819 udp->udp_state = TS_UNBND; 2820 udp->udp_mode = UDP_MT_HOT; 2821 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2822 udp->udp_family = AF_INET6; 2823 udp->udp_ipversion = IPV6_VERSION; 2824 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2825 udp->udp_ttl = udp_ipv6_hoplimit; 2826 connp->conn_af_isv6 = B_TRUE; 2827 connp->conn_flags |= IPCL_ISV6; 2828 } else { 2829 udp->udp_family = AF_INET; 2830 udp->udp_ipversion = IPV4_VERSION; 2831 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2832 udp->udp_ttl = udp_ipv4_ttl; 2833 connp->conn_af_isv6 = B_FALSE; 2834 connp->conn_flags &= ~IPCL_ISV6; 2835 } 2836 2837 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2838 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2839 connp->conn_zoneid = zoneid; 2840 2841 if (connp->conn_flags & IPCL_SOCKET) { 2842 udp->udp_issocket = B_TRUE; 2843 udp->udp_direct_sockfs = B_TRUE; 2844 } 2845 mutex_exit(&connp->conn_lock); 2846 2847 /* 2848 * The transmit hiwat/lowat is only looked at on IP's queue. 2849 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2850 * getsockopts. 2851 */ 2852 q->q_hiwat = udp_recv_hiwat; 2853 WR(q)->q_hiwat = udp_xmit_hiwat; 2854 WR(q)->q_lowat = udp_xmit_lowat; 2855 2856 if (udp->udp_family == AF_INET6) { 2857 /* Build initial header template for transmit */ 2858 if ((err = udp_build_hdrs(q, udp)) != 0) { 2859 qprocsoff(UDP_RD(q)); 2860 udp->udp_connp = NULL; 2861 connp->conn_udp = NULL; 2862 kmem_cache_free(udp_cache, udp); 2863 return (err); 2864 } 2865 } 2866 2867 /* Set the Stream head write offset and high watermark. */ 2868 (void) mi_set_sth_wroff(UDP_RD(q), 2869 udp->udp_max_hdr_len + udp_wroff_extra); 2870 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 2871 2872 WR(UDP_RD(q))->q_qinfo = &udp_winit; 2873 2874 return (0); 2875 } 2876 2877 /* 2878 * Which UDP options OK to set through T_UNITDATA_REQ... 2879 */ 2880 /* ARGSUSED */ 2881 static boolean_t 2882 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 2883 { 2884 return (B_TRUE); 2885 } 2886 2887 /* 2888 * This routine gets default values of certain options whose default 2889 * values are maintained by protcol specific code 2890 */ 2891 /* ARGSUSED */ 2892 int 2893 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2894 { 2895 int *i1 = (int *)ptr; 2896 2897 switch (level) { 2898 case IPPROTO_IP: 2899 switch (name) { 2900 case IP_MULTICAST_TTL: 2901 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 2902 return (sizeof (uchar_t)); 2903 case IP_MULTICAST_LOOP: 2904 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 2905 return (sizeof (uchar_t)); 2906 } 2907 break; 2908 case IPPROTO_IPV6: 2909 switch (name) { 2910 case IPV6_MULTICAST_HOPS: 2911 *i1 = IP_DEFAULT_MULTICAST_TTL; 2912 return (sizeof (int)); 2913 case IPV6_MULTICAST_LOOP: 2914 *i1 = IP_DEFAULT_MULTICAST_LOOP; 2915 return (sizeof (int)); 2916 case IPV6_UNICAST_HOPS: 2917 *i1 = udp_ipv6_hoplimit; 2918 return (sizeof (int)); 2919 } 2920 break; 2921 } 2922 return (-1); 2923 } 2924 2925 /* 2926 * This routine retrieves the current status of socket options 2927 * and expects the caller to pass in the queue pointer of the 2928 * upper instance. It returns the size of the option retrieved. 2929 */ 2930 int 2931 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 2932 { 2933 int *i1 = (int *)ptr; 2934 conn_t *connp; 2935 udp_t *udp; 2936 ip6_pkt_t *ipp; 2937 2938 q = UDP_WR(q); 2939 connp = Q_TO_CONN(q); 2940 udp = connp->conn_udp; 2941 ipp = &udp->udp_sticky_ipp; 2942 2943 switch (level) { 2944 case SOL_SOCKET: 2945 switch (name) { 2946 case SO_DEBUG: 2947 *i1 = udp->udp_debug; 2948 break; /* goto sizeof (int) option return */ 2949 case SO_REUSEADDR: 2950 *i1 = udp->udp_reuseaddr; 2951 break; /* goto sizeof (int) option return */ 2952 case SO_TYPE: 2953 *i1 = SOCK_DGRAM; 2954 break; /* goto sizeof (int) option return */ 2955 2956 /* 2957 * The following three items are available here, 2958 * but are only meaningful to IP. 2959 */ 2960 case SO_DONTROUTE: 2961 *i1 = udp->udp_dontroute; 2962 break; /* goto sizeof (int) option return */ 2963 case SO_USELOOPBACK: 2964 *i1 = udp->udp_useloopback; 2965 break; /* goto sizeof (int) option return */ 2966 case SO_BROADCAST: 2967 *i1 = udp->udp_broadcast; 2968 break; /* goto sizeof (int) option return */ 2969 2970 case SO_SNDBUF: 2971 *i1 = q->q_hiwat; 2972 break; /* goto sizeof (int) option return */ 2973 case SO_RCVBUF: 2974 *i1 = RD(q)->q_hiwat; 2975 break; /* goto sizeof (int) option return */ 2976 case SO_DGRAM_ERRIND: 2977 *i1 = udp->udp_dgram_errind; 2978 break; /* goto sizeof (int) option return */ 2979 case SO_RECVUCRED: 2980 *i1 = udp->udp_recvucred; 2981 break; /* goto sizeof (int) option return */ 2982 default: 2983 return (-1); 2984 } 2985 break; 2986 case IPPROTO_IP: 2987 if (udp->udp_family != AF_INET) 2988 return (-1); 2989 switch (name) { 2990 case IP_OPTIONS: 2991 case T_IP_OPTIONS: 2992 if (udp->udp_ip_rcv_options_len) 2993 bcopy(udp->udp_ip_rcv_options, ptr, 2994 udp->udp_ip_rcv_options_len); 2995 return (udp->udp_ip_rcv_options_len); 2996 case IP_TOS: 2997 case T_IP_TOS: 2998 *i1 = (int)udp->udp_type_of_service; 2999 break; /* goto sizeof (int) option return */ 3000 case IP_TTL: 3001 *i1 = (int)udp->udp_ttl; 3002 break; /* goto sizeof (int) option return */ 3003 case IP_NEXTHOP: 3004 /* Handled at IP level */ 3005 return (-EINVAL); 3006 case IP_MULTICAST_IF: 3007 /* 0 address if not set */ 3008 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3009 return (sizeof (ipaddr_t)); 3010 case IP_MULTICAST_TTL: 3011 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3012 return (sizeof (uchar_t)); 3013 case IP_MULTICAST_LOOP: 3014 *ptr = connp->conn_multicast_loop; 3015 return (sizeof (uint8_t)); 3016 case IP_RECVOPTS: 3017 *i1 = udp->udp_recvopts; 3018 break; /* goto sizeof (int) option return */ 3019 case IP_RECVDSTADDR: 3020 *i1 = udp->udp_recvdstaddr; 3021 break; /* goto sizeof (int) option return */ 3022 case IP_RECVIF: 3023 *i1 = udp->udp_recvif; 3024 break; /* goto sizeof (int) option return */ 3025 case IP_RECVSLLA: 3026 *i1 = udp->udp_recvslla; 3027 break; /* goto sizeof (int) option return */ 3028 case IP_RECVTTL: 3029 *i1 = udp->udp_recvttl; 3030 break; /* goto sizeof (int) option return */ 3031 case IP_ADD_MEMBERSHIP: 3032 case IP_DROP_MEMBERSHIP: 3033 case IP_BLOCK_SOURCE: 3034 case IP_UNBLOCK_SOURCE: 3035 case IP_ADD_SOURCE_MEMBERSHIP: 3036 case IP_DROP_SOURCE_MEMBERSHIP: 3037 case MCAST_JOIN_GROUP: 3038 case MCAST_LEAVE_GROUP: 3039 case MCAST_BLOCK_SOURCE: 3040 case MCAST_UNBLOCK_SOURCE: 3041 case MCAST_JOIN_SOURCE_GROUP: 3042 case MCAST_LEAVE_SOURCE_GROUP: 3043 case IP_DONTFAILOVER_IF: 3044 /* cannot "get" the value for these */ 3045 return (-1); 3046 case IP_BOUND_IF: 3047 /* Zero if not set */ 3048 *i1 = udp->udp_bound_if; 3049 break; /* goto sizeof (int) option return */ 3050 case IP_UNSPEC_SRC: 3051 *i1 = udp->udp_unspec_source; 3052 break; /* goto sizeof (int) option return */ 3053 case IP_XMIT_IF: 3054 *i1 = udp->udp_xmit_if; 3055 break; /* goto sizeof (int) option return */ 3056 default: 3057 return (-1); 3058 } 3059 break; 3060 case IPPROTO_IPV6: 3061 if (udp->udp_family != AF_INET6) 3062 return (-1); 3063 switch (name) { 3064 case IPV6_UNICAST_HOPS: 3065 *i1 = (unsigned int)udp->udp_ttl; 3066 break; /* goto sizeof (int) option return */ 3067 case IPV6_MULTICAST_IF: 3068 /* 0 index if not set */ 3069 *i1 = udp->udp_multicast_if_index; 3070 break; /* goto sizeof (int) option return */ 3071 case IPV6_MULTICAST_HOPS: 3072 *i1 = udp->udp_multicast_ttl; 3073 break; /* goto sizeof (int) option return */ 3074 case IPV6_MULTICAST_LOOP: 3075 *i1 = connp->conn_multicast_loop; 3076 break; /* goto sizeof (int) option return */ 3077 case IPV6_JOIN_GROUP: 3078 case IPV6_LEAVE_GROUP: 3079 case MCAST_JOIN_GROUP: 3080 case MCAST_LEAVE_GROUP: 3081 case MCAST_BLOCK_SOURCE: 3082 case MCAST_UNBLOCK_SOURCE: 3083 case MCAST_JOIN_SOURCE_GROUP: 3084 case MCAST_LEAVE_SOURCE_GROUP: 3085 /* cannot "get" the value for these */ 3086 return (-1); 3087 case IPV6_BOUND_IF: 3088 /* Zero if not set */ 3089 *i1 = udp->udp_bound_if; 3090 break; /* goto sizeof (int) option return */ 3091 case IPV6_UNSPEC_SRC: 3092 *i1 = udp->udp_unspec_source; 3093 break; /* goto sizeof (int) option return */ 3094 case IPV6_RECVPKTINFO: 3095 *i1 = udp->udp_ipv6_recvpktinfo; 3096 break; /* goto sizeof (int) option return */ 3097 case IPV6_RECVTCLASS: 3098 *i1 = udp->udp_ipv6_recvtclass; 3099 break; /* goto sizeof (int) option return */ 3100 case IPV6_RECVPATHMTU: 3101 *i1 = udp->udp_ipv6_recvpathmtu; 3102 break; /* goto sizeof (int) option return */ 3103 case IPV6_RECVHOPLIMIT: 3104 *i1 = udp->udp_ipv6_recvhoplimit; 3105 break; /* goto sizeof (int) option return */ 3106 case IPV6_RECVHOPOPTS: 3107 *i1 = udp->udp_ipv6_recvhopopts; 3108 break; /* goto sizeof (int) option return */ 3109 case IPV6_RECVDSTOPTS: 3110 *i1 = udp->udp_ipv6_recvdstopts; 3111 break; /* goto sizeof (int) option return */ 3112 case _OLD_IPV6_RECVDSTOPTS: 3113 *i1 = udp->udp_old_ipv6_recvdstopts; 3114 break; /* goto sizeof (int) option return */ 3115 case IPV6_RECVRTHDRDSTOPTS: 3116 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3117 break; /* goto sizeof (int) option return */ 3118 case IPV6_RECVRTHDR: 3119 *i1 = udp->udp_ipv6_recvrthdr; 3120 break; /* goto sizeof (int) option return */ 3121 case IPV6_PKTINFO: { 3122 /* XXX assumes that caller has room for max size! */ 3123 struct in6_pktinfo *pkti; 3124 3125 pkti = (struct in6_pktinfo *)ptr; 3126 if (ipp->ipp_fields & IPPF_IFINDEX) 3127 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3128 else 3129 pkti->ipi6_ifindex = 0; 3130 if (ipp->ipp_fields & IPPF_ADDR) 3131 pkti->ipi6_addr = ipp->ipp_addr; 3132 else 3133 pkti->ipi6_addr = ipv6_all_zeros; 3134 return (sizeof (struct in6_pktinfo)); 3135 } 3136 case IPV6_TCLASS: 3137 if (ipp->ipp_fields & IPPF_TCLASS) 3138 *i1 = ipp->ipp_tclass; 3139 else 3140 *i1 = IPV6_FLOW_TCLASS( 3141 IPV6_DEFAULT_VERS_AND_FLOW); 3142 break; /* goto sizeof (int) option return */ 3143 case IPV6_NEXTHOP: { 3144 sin6_t *sin6 = (sin6_t *)ptr; 3145 3146 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3147 return (0); 3148 *sin6 = sin6_null; 3149 sin6->sin6_family = AF_INET6; 3150 sin6->sin6_addr = ipp->ipp_nexthop; 3151 return (sizeof (sin6_t)); 3152 } 3153 case IPV6_HOPOPTS: 3154 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3155 return (0); 3156 bcopy(ipp->ipp_hopopts, ptr, ipp->ipp_hopoptslen); 3157 return (ipp->ipp_hopoptslen); 3158 case IPV6_RTHDRDSTOPTS: 3159 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3160 return (0); 3161 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3162 return (ipp->ipp_rtdstoptslen); 3163 case IPV6_RTHDR: 3164 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3165 return (0); 3166 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3167 return (ipp->ipp_rthdrlen); 3168 case IPV6_DSTOPTS: 3169 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3170 return (0); 3171 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3172 return (ipp->ipp_dstoptslen); 3173 case IPV6_PATHMTU: 3174 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3175 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3176 default: 3177 return (-1); 3178 } 3179 break; 3180 case IPPROTO_UDP: 3181 switch (name) { 3182 case UDP_ANONPRIVBIND: 3183 *i1 = udp->udp_anon_priv_bind; 3184 break; 3185 case UDP_EXCLBIND: 3186 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3187 break; 3188 case UDP_RCVHDR: 3189 *i1 = udp->udp_rcvhdr ? 1 : 0; 3190 break; 3191 default: 3192 return (-1); 3193 } 3194 break; 3195 default: 3196 return (-1); 3197 } 3198 return (sizeof (int)); 3199 } 3200 3201 /* 3202 * This routine sets socket options; it expects the caller 3203 * to pass in the queue pointer of the upper instance. 3204 */ 3205 /* ARGSUSED */ 3206 int 3207 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3208 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3209 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3210 { 3211 int *i1 = (int *)invalp; 3212 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3213 boolean_t checkonly; 3214 int error; 3215 conn_t *connp; 3216 udp_t *udp; 3217 3218 q = UDP_WR(q); 3219 connp = Q_TO_CONN(q); 3220 udp = connp->conn_udp; 3221 3222 switch (optset_context) { 3223 case SETFN_OPTCOM_CHECKONLY: 3224 checkonly = B_TRUE; 3225 /* 3226 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3227 * inlen != 0 implies value supplied and 3228 * we have to "pretend" to set it. 3229 * inlen == 0 implies that there is no 3230 * value part in T_CHECK request and just validation 3231 * done elsewhere should be enough, we just return here. 3232 */ 3233 if (inlen == 0) { 3234 *outlenp = 0; 3235 return (0); 3236 } 3237 break; 3238 case SETFN_OPTCOM_NEGOTIATE: 3239 checkonly = B_FALSE; 3240 break; 3241 case SETFN_UD_NEGOTIATE: 3242 case SETFN_CONN_NEGOTIATE: 3243 checkonly = B_FALSE; 3244 /* 3245 * Negotiating local and "association-related" options 3246 * through T_UNITDATA_REQ. 3247 * 3248 * Following routine can filter out ones we do not 3249 * want to be "set" this way. 3250 */ 3251 if (!udp_opt_allow_udr_set(level, name)) { 3252 *outlenp = 0; 3253 return (EINVAL); 3254 } 3255 break; 3256 default: 3257 /* 3258 * We should never get here 3259 */ 3260 *outlenp = 0; 3261 return (EINVAL); 3262 } 3263 3264 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3265 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3266 3267 /* 3268 * For fixed length options, no sanity check 3269 * of passed in length is done. It is assumed *_optcom_req() 3270 * routines do the right thing. 3271 */ 3272 3273 switch (level) { 3274 case SOL_SOCKET: 3275 switch (name) { 3276 case SO_REUSEADDR: 3277 if (!checkonly) 3278 udp->udp_reuseaddr = onoff; 3279 break; 3280 case SO_DEBUG: 3281 if (!checkonly) 3282 udp->udp_debug = onoff; 3283 break; 3284 /* 3285 * The following three items are available here, 3286 * but are only meaningful to IP. 3287 */ 3288 case SO_DONTROUTE: 3289 if (!checkonly) 3290 udp->udp_dontroute = onoff; 3291 break; 3292 case SO_USELOOPBACK: 3293 if (!checkonly) 3294 udp->udp_useloopback = onoff; 3295 break; 3296 case SO_BROADCAST: 3297 if (!checkonly) 3298 udp->udp_broadcast = onoff; 3299 break; 3300 3301 case SO_SNDBUF: 3302 if (*i1 > udp_max_buf) { 3303 *outlenp = 0; 3304 return (ENOBUFS); 3305 } 3306 if (!checkonly) { 3307 q->q_hiwat = *i1; 3308 WR(UDP_RD(q))->q_hiwat = *i1; 3309 } 3310 break; 3311 case SO_RCVBUF: 3312 if (*i1 > udp_max_buf) { 3313 *outlenp = 0; 3314 return (ENOBUFS); 3315 } 3316 if (!checkonly) { 3317 RD(q)->q_hiwat = *i1; 3318 UDP_RD(q)->q_hiwat = *i1; 3319 (void) mi_set_sth_hiwat(UDP_RD(q), 3320 udp_set_rcv_hiwat(udp, *i1)); 3321 } 3322 break; 3323 case SO_DGRAM_ERRIND: 3324 if (!checkonly) 3325 udp->udp_dgram_errind = onoff; 3326 break; 3327 case SO_RECVUCRED: 3328 if (!checkonly) 3329 udp->udp_recvucred = onoff; 3330 break; 3331 default: 3332 *outlenp = 0; 3333 return (EINVAL); 3334 } 3335 break; 3336 case IPPROTO_IP: 3337 if (udp->udp_family != AF_INET) { 3338 *outlenp = 0; 3339 return (ENOPROTOOPT); 3340 } 3341 switch (name) { 3342 case IP_OPTIONS: 3343 case T_IP_OPTIONS: 3344 /* Save options for use by IP. */ 3345 if (inlen & 0x3) { 3346 *outlenp = 0; 3347 return (EINVAL); 3348 } 3349 if (checkonly) 3350 break; 3351 3352 if (udp->udp_ip_snd_options) { 3353 mi_free((char *)udp->udp_ip_snd_options); 3354 udp->udp_ip_snd_options_len = 0; 3355 udp->udp_ip_snd_options = NULL; 3356 } 3357 if (inlen) { 3358 udp->udp_ip_snd_options = 3359 (uchar_t *)mi_alloc(inlen, BPRI_HI); 3360 if (udp->udp_ip_snd_options) { 3361 bcopy(invalp, udp->udp_ip_snd_options, 3362 inlen); 3363 udp->udp_ip_snd_options_len = inlen; 3364 } 3365 } 3366 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3367 UDPH_SIZE + udp->udp_ip_snd_options_len; 3368 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3369 udp_wroff_extra); 3370 break; 3371 case IP_TTL: 3372 if (!checkonly) { 3373 udp->udp_ttl = (uchar_t)*i1; 3374 } 3375 break; 3376 case IP_TOS: 3377 case T_IP_TOS: 3378 if (!checkonly) { 3379 udp->udp_type_of_service = (uchar_t)*i1; 3380 } 3381 break; 3382 case IP_MULTICAST_IF: { 3383 /* 3384 * TODO should check OPTMGMT reply and undo this if 3385 * there is an error. 3386 */ 3387 struct in_addr *inap = (struct in_addr *)invalp; 3388 if (!checkonly) { 3389 udp->udp_multicast_if_addr = 3390 inap->s_addr; 3391 } 3392 break; 3393 } 3394 case IP_MULTICAST_TTL: 3395 if (!checkonly) 3396 udp->udp_multicast_ttl = *invalp; 3397 break; 3398 case IP_MULTICAST_LOOP: 3399 if (!checkonly) 3400 connp->conn_multicast_loop = *invalp; 3401 break; 3402 case IP_RECVOPTS: 3403 if (!checkonly) 3404 udp->udp_recvopts = onoff; 3405 break; 3406 case IP_RECVDSTADDR: 3407 if (!checkonly) 3408 udp->udp_recvdstaddr = onoff; 3409 break; 3410 case IP_RECVIF: 3411 if (!checkonly) 3412 udp->udp_recvif = onoff; 3413 break; 3414 case IP_RECVSLLA: 3415 if (!checkonly) 3416 udp->udp_recvslla = onoff; 3417 break; 3418 case IP_RECVTTL: 3419 if (!checkonly) 3420 udp->udp_recvttl = onoff; 3421 break; 3422 case IP_ADD_MEMBERSHIP: 3423 case IP_DROP_MEMBERSHIP: 3424 case IP_BLOCK_SOURCE: 3425 case IP_UNBLOCK_SOURCE: 3426 case IP_ADD_SOURCE_MEMBERSHIP: 3427 case IP_DROP_SOURCE_MEMBERSHIP: 3428 case MCAST_JOIN_GROUP: 3429 case MCAST_LEAVE_GROUP: 3430 case MCAST_BLOCK_SOURCE: 3431 case MCAST_UNBLOCK_SOURCE: 3432 case MCAST_JOIN_SOURCE_GROUP: 3433 case MCAST_LEAVE_SOURCE_GROUP: 3434 case IP_SEC_OPT: 3435 case IP_NEXTHOP: 3436 /* 3437 * "soft" error (negative) 3438 * option not handled at this level 3439 * Do not modify *outlenp. 3440 */ 3441 return (-EINVAL); 3442 case IP_BOUND_IF: 3443 if (!checkonly) 3444 udp->udp_bound_if = *i1; 3445 break; 3446 case IP_UNSPEC_SRC: 3447 if (!checkonly) 3448 udp->udp_unspec_source = onoff; 3449 break; 3450 case IP_XMIT_IF: 3451 if (!checkonly) 3452 udp->udp_xmit_if = *i1; 3453 break; 3454 default: 3455 *outlenp = 0; 3456 return (EINVAL); 3457 } 3458 break; 3459 case IPPROTO_IPV6: { 3460 ip6_pkt_t *ipp; 3461 boolean_t sticky; 3462 3463 if (udp->udp_family != AF_INET6) { 3464 *outlenp = 0; 3465 return (ENOPROTOOPT); 3466 } 3467 /* 3468 * Deal with both sticky options and ancillary data 3469 */ 3470 if (thisdg_attrs == NULL) { 3471 /* sticky options, or none */ 3472 ipp = &udp->udp_sticky_ipp; 3473 sticky = B_TRUE; 3474 } else { 3475 /* ancillary data */ 3476 ipp = (ip6_pkt_t *)thisdg_attrs; 3477 sticky = B_FALSE; 3478 } 3479 3480 switch (name) { 3481 case IPV6_MULTICAST_IF: 3482 if (!checkonly) 3483 udp->udp_multicast_if_index = *i1; 3484 break; 3485 case IPV6_UNICAST_HOPS: 3486 /* -1 means use default */ 3487 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3488 *outlenp = 0; 3489 return (EINVAL); 3490 } 3491 if (!checkonly) { 3492 if (*i1 == -1) { 3493 udp->udp_ttl = ipp->ipp_unicast_hops = 3494 udp_ipv6_hoplimit; 3495 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3496 /* Pass modified value to IP. */ 3497 *i1 = udp->udp_ttl; 3498 } else { 3499 udp->udp_ttl = ipp->ipp_unicast_hops = 3500 (uint8_t)*i1; 3501 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3502 } 3503 /* Rebuild the header template */ 3504 error = udp_build_hdrs(q, udp); 3505 if (error != 0) { 3506 *outlenp = 0; 3507 return (error); 3508 } 3509 } 3510 break; 3511 case IPV6_MULTICAST_HOPS: 3512 /* -1 means use default */ 3513 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3514 *outlenp = 0; 3515 return (EINVAL); 3516 } 3517 if (!checkonly) { 3518 if (*i1 == -1) { 3519 udp->udp_multicast_ttl = 3520 ipp->ipp_multicast_hops = 3521 IP_DEFAULT_MULTICAST_TTL; 3522 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3523 /* Pass modified value to IP. */ 3524 *i1 = udp->udp_multicast_ttl; 3525 } else { 3526 udp->udp_multicast_ttl = 3527 ipp->ipp_multicast_hops = 3528 (uint8_t)*i1; 3529 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3530 } 3531 } 3532 break; 3533 case IPV6_MULTICAST_LOOP: 3534 if (*i1 != 0 && *i1 != 1) { 3535 *outlenp = 0; 3536 return (EINVAL); 3537 } 3538 if (!checkonly) 3539 connp->conn_multicast_loop = *i1; 3540 break; 3541 case IPV6_JOIN_GROUP: 3542 case IPV6_LEAVE_GROUP: 3543 case MCAST_JOIN_GROUP: 3544 case MCAST_LEAVE_GROUP: 3545 case MCAST_BLOCK_SOURCE: 3546 case MCAST_UNBLOCK_SOURCE: 3547 case MCAST_JOIN_SOURCE_GROUP: 3548 case MCAST_LEAVE_SOURCE_GROUP: 3549 /* 3550 * "soft" error (negative) 3551 * option not handled at this level 3552 * Note: Do not modify *outlenp 3553 */ 3554 return (-EINVAL); 3555 case IPV6_BOUND_IF: 3556 if (!checkonly) 3557 udp->udp_bound_if = *i1; 3558 break; 3559 case IPV6_UNSPEC_SRC: 3560 if (!checkonly) 3561 udp->udp_unspec_source = onoff; 3562 break; 3563 /* 3564 * Set boolean switches for ancillary data delivery 3565 */ 3566 case IPV6_RECVPKTINFO: 3567 if (!checkonly) 3568 udp->udp_ipv6_recvpktinfo = onoff; 3569 break; 3570 case IPV6_RECVTCLASS: 3571 if (!checkonly) { 3572 udp->udp_ipv6_recvtclass = onoff; 3573 } 3574 break; 3575 case IPV6_RECVPATHMTU: 3576 if (!checkonly) { 3577 udp->udp_ipv6_recvpathmtu = onoff; 3578 } 3579 break; 3580 case IPV6_RECVHOPLIMIT: 3581 if (!checkonly) 3582 udp->udp_ipv6_recvhoplimit = onoff; 3583 break; 3584 case IPV6_RECVHOPOPTS: 3585 if (!checkonly) 3586 udp->udp_ipv6_recvhopopts = onoff; 3587 break; 3588 case IPV6_RECVDSTOPTS: 3589 if (!checkonly) 3590 udp->udp_ipv6_recvdstopts = onoff; 3591 break; 3592 case _OLD_IPV6_RECVDSTOPTS: 3593 if (!checkonly) 3594 udp->udp_old_ipv6_recvdstopts = onoff; 3595 break; 3596 case IPV6_RECVRTHDRDSTOPTS: 3597 if (!checkonly) 3598 udp->udp_ipv6_recvrthdrdstopts = onoff; 3599 break; 3600 case IPV6_RECVRTHDR: 3601 if (!checkonly) 3602 udp->udp_ipv6_recvrthdr = onoff; 3603 break; 3604 /* 3605 * Set sticky options or ancillary data. 3606 * If sticky options, (re)build any extension headers 3607 * that might be needed as a result. 3608 */ 3609 case IPV6_PKTINFO: 3610 /* 3611 * The source address and ifindex are verified 3612 * in ip_opt_set(). For ancillary data the 3613 * source address is checked in ip_wput_v6. 3614 */ 3615 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3616 return (EINVAL); 3617 if (checkonly) 3618 break; 3619 3620 if (inlen == 0) { 3621 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3622 ipp->ipp_sticky_ignored |= 3623 (IPPF_IFINDEX|IPPF_ADDR); 3624 } else { 3625 struct in6_pktinfo *pkti; 3626 3627 pkti = (struct in6_pktinfo *)invalp; 3628 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3629 ipp->ipp_addr = pkti->ipi6_addr; 3630 if (ipp->ipp_ifindex != 0) 3631 ipp->ipp_fields |= IPPF_IFINDEX; 3632 else 3633 ipp->ipp_fields &= ~IPPF_IFINDEX; 3634 if (!IN6_IS_ADDR_UNSPECIFIED( 3635 &ipp->ipp_addr)) 3636 ipp->ipp_fields |= IPPF_ADDR; 3637 else 3638 ipp->ipp_fields &= ~IPPF_ADDR; 3639 } 3640 if (sticky) { 3641 error = udp_build_hdrs(q, udp); 3642 if (error != 0) 3643 return (error); 3644 } 3645 break; 3646 case IPV6_HOPLIMIT: 3647 if (sticky) 3648 return (EINVAL); 3649 if (inlen != 0 && inlen != sizeof (int)) 3650 return (EINVAL); 3651 if (checkonly) 3652 break; 3653 3654 if (inlen == 0) { 3655 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3656 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3657 } else { 3658 if (*i1 > 255 || *i1 < -1) 3659 return (EINVAL); 3660 if (*i1 == -1) 3661 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3662 else 3663 ipp->ipp_hoplimit = *i1; 3664 ipp->ipp_fields |= IPPF_HOPLIMIT; 3665 } 3666 break; 3667 case IPV6_TCLASS: 3668 if (inlen != 0 && inlen != sizeof (int)) 3669 return (EINVAL); 3670 if (checkonly) 3671 break; 3672 3673 if (inlen == 0) { 3674 ipp->ipp_fields &= ~IPPF_TCLASS; 3675 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3676 } else { 3677 if (*i1 > 255 || *i1 < -1) 3678 return (EINVAL); 3679 if (*i1 == -1) 3680 ipp->ipp_tclass = 0; 3681 else 3682 ipp->ipp_tclass = *i1; 3683 ipp->ipp_fields |= IPPF_TCLASS; 3684 } 3685 if (sticky) { 3686 error = udp_build_hdrs(q, udp); 3687 if (error != 0) 3688 return (error); 3689 } 3690 break; 3691 case IPV6_NEXTHOP: 3692 /* 3693 * IP will verify that the nexthop is reachable 3694 * and fail for sticky options. 3695 */ 3696 if (inlen != 0 && inlen != sizeof (sin6_t)) 3697 return (EINVAL); 3698 if (checkonly) 3699 break; 3700 3701 if (inlen == 0) { 3702 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3703 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3704 } else { 3705 sin6_t *sin6 = (sin6_t *)invalp; 3706 3707 if (sin6->sin6_family != AF_INET6) 3708 return (EAFNOSUPPORT); 3709 if (IN6_IS_ADDR_V4MAPPED( 3710 &sin6->sin6_addr)) 3711 return (EADDRNOTAVAIL); 3712 ipp->ipp_nexthop = sin6->sin6_addr; 3713 if (!IN6_IS_ADDR_UNSPECIFIED( 3714 &ipp->ipp_nexthop)) 3715 ipp->ipp_fields |= IPPF_NEXTHOP; 3716 else 3717 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3718 } 3719 if (sticky) { 3720 error = udp_build_hdrs(q, udp); 3721 if (error != 0) 3722 return (error); 3723 } 3724 break; 3725 case IPV6_HOPOPTS: { 3726 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3727 /* 3728 * Sanity checks - minimum size, size a multiple of 3729 * eight bytes, and matching size passed in. 3730 */ 3731 if (inlen != 0 && 3732 inlen != (8 * (hopts->ip6h_len + 1))) 3733 return (EINVAL); 3734 3735 if (checkonly) 3736 break; 3737 3738 if (inlen == 0) { 3739 if (sticky && 3740 (ipp->ipp_fields & IPPF_HOPOPTS) != 0) { 3741 kmem_free(ipp->ipp_hopopts, 3742 ipp->ipp_hopoptslen); 3743 ipp->ipp_hopopts = NULL; 3744 ipp->ipp_hopoptslen = 0; 3745 } 3746 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3747 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3748 } else { 3749 error = udp_pkt_set(invalp, inlen, sticky, 3750 (uchar_t **)&ipp->ipp_hopopts, 3751 &ipp->ipp_hopoptslen); 3752 if (error != 0) 3753 return (error); 3754 ipp->ipp_fields |= IPPF_HOPOPTS; 3755 } 3756 if (sticky) { 3757 error = udp_build_hdrs(q, udp); 3758 if (error != 0) 3759 return (error); 3760 } 3761 break; 3762 } 3763 case IPV6_RTHDRDSTOPTS: { 3764 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3765 3766 /* 3767 * Sanity checks - minimum size, size a multiple of 3768 * eight bytes, and matching size passed in. 3769 */ 3770 if (inlen != 0 && 3771 inlen != (8 * (dopts->ip6d_len + 1))) 3772 return (EINVAL); 3773 3774 if (checkonly) 3775 break; 3776 3777 if (inlen == 0) { 3778 if (sticky && 3779 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 3780 kmem_free(ipp->ipp_rtdstopts, 3781 ipp->ipp_rtdstoptslen); 3782 ipp->ipp_rtdstopts = NULL; 3783 ipp->ipp_rtdstoptslen = 0; 3784 } 3785 3786 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 3787 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 3788 } else { 3789 error = udp_pkt_set(invalp, inlen, sticky, 3790 (uchar_t **)&ipp->ipp_rtdstopts, 3791 &ipp->ipp_rtdstoptslen); 3792 if (error != 0) 3793 return (error); 3794 ipp->ipp_fields |= IPPF_RTDSTOPTS; 3795 } 3796 if (sticky) { 3797 error = udp_build_hdrs(q, udp); 3798 if (error != 0) 3799 return (error); 3800 } 3801 break; 3802 } 3803 case IPV6_DSTOPTS: { 3804 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3805 3806 /* 3807 * Sanity checks - minimum size, size a multiple of 3808 * eight bytes, and matching size passed in. 3809 */ 3810 if (inlen != 0 && 3811 inlen != (8 * (dopts->ip6d_len + 1))) 3812 return (EINVAL); 3813 3814 if (checkonly) 3815 break; 3816 3817 if (inlen == 0) { 3818 if (sticky && 3819 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 3820 kmem_free(ipp->ipp_dstopts, 3821 ipp->ipp_dstoptslen); 3822 ipp->ipp_dstopts = NULL; 3823 ipp->ipp_dstoptslen = 0; 3824 } 3825 ipp->ipp_fields &= ~IPPF_DSTOPTS; 3826 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 3827 } else { 3828 error = udp_pkt_set(invalp, inlen, sticky, 3829 (uchar_t **)&ipp->ipp_dstopts, 3830 &ipp->ipp_dstoptslen); 3831 if (error != 0) 3832 return (error); 3833 ipp->ipp_fields |= IPPF_DSTOPTS; 3834 } 3835 if (sticky) { 3836 error = udp_build_hdrs(q, udp); 3837 if (error != 0) 3838 return (error); 3839 } 3840 break; 3841 } 3842 case IPV6_RTHDR: { 3843 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 3844 3845 /* 3846 * Sanity checks - minimum size, size a multiple of 3847 * eight bytes, and matching size passed in. 3848 */ 3849 if (inlen != 0 && 3850 inlen != (8 * (rt->ip6r_len + 1))) 3851 return (EINVAL); 3852 3853 if (checkonly) 3854 break; 3855 3856 if (inlen == 0) { 3857 if (sticky && 3858 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 3859 kmem_free(ipp->ipp_rthdr, 3860 ipp->ipp_rthdrlen); 3861 ipp->ipp_rthdr = NULL; 3862 ipp->ipp_rthdrlen = 0; 3863 } 3864 ipp->ipp_fields &= ~IPPF_RTHDR; 3865 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 3866 } else { 3867 error = udp_pkt_set(invalp, inlen, sticky, 3868 (uchar_t **)&ipp->ipp_rthdr, 3869 &ipp->ipp_rthdrlen); 3870 if (error != 0) 3871 return (error); 3872 ipp->ipp_fields |= IPPF_RTHDR; 3873 } 3874 if (sticky) { 3875 error = udp_build_hdrs(q, udp); 3876 if (error != 0) 3877 return (error); 3878 } 3879 break; 3880 } 3881 3882 case IPV6_DONTFRAG: 3883 if (checkonly) 3884 break; 3885 3886 if (onoff) { 3887 ipp->ipp_fields |= IPPF_DONTFRAG; 3888 } else { 3889 ipp->ipp_fields &= ~IPPF_DONTFRAG; 3890 } 3891 break; 3892 3893 case IPV6_USE_MIN_MTU: 3894 if (inlen != sizeof (int)) 3895 return (EINVAL); 3896 3897 if (*i1 < -1 || *i1 > 1) 3898 return (EINVAL); 3899 3900 if (checkonly) 3901 break; 3902 3903 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 3904 ipp->ipp_use_min_mtu = *i1; 3905 break; 3906 3907 case IPV6_BOUND_PIF: 3908 case IPV6_SEC_OPT: 3909 case IPV6_DONTFAILOVER_IF: 3910 case IPV6_SRC_PREFERENCES: 3911 case IPV6_V6ONLY: 3912 /* Handled at the IP level */ 3913 return (-EINVAL); 3914 default: 3915 *outlenp = 0; 3916 return (EINVAL); 3917 } 3918 break; 3919 } /* end IPPROTO_IPV6 */ 3920 case IPPROTO_UDP: 3921 switch (name) { 3922 case UDP_ANONPRIVBIND: 3923 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 3924 *outlenp = 0; 3925 return (error); 3926 } 3927 if (!checkonly) { 3928 udp->udp_anon_priv_bind = onoff; 3929 } 3930 break; 3931 case UDP_EXCLBIND: 3932 if (!checkonly) 3933 udp->udp_exclbind = onoff; 3934 break; 3935 case UDP_RCVHDR: 3936 if (!checkonly) 3937 udp->udp_rcvhdr = onoff; 3938 break; 3939 default: 3940 *outlenp = 0; 3941 return (EINVAL); 3942 } 3943 break; 3944 default: 3945 *outlenp = 0; 3946 return (EINVAL); 3947 } 3948 /* 3949 * Common case of OK return with outval same as inval. 3950 */ 3951 if (invalp != outvalp) { 3952 /* don't trust bcopy for identical src/dst */ 3953 (void) bcopy(invalp, outvalp, inlen); 3954 } 3955 *outlenp = inlen; 3956 return (0); 3957 } 3958 3959 /* 3960 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 3961 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 3962 * headers, and the udp header. 3963 * Returns failure if can't allocate memory. 3964 */ 3965 static int 3966 udp_build_hdrs(queue_t *q, udp_t *udp) 3967 { 3968 uchar_t *hdrs; 3969 uint_t hdrs_len; 3970 ip6_t *ip6h; 3971 ip6i_t *ip6i; 3972 udpha_t *udpha; 3973 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 3974 3975 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 3976 ASSERT(hdrs_len != 0); 3977 if (hdrs_len != udp->udp_sticky_hdrs_len) { 3978 /* Need to reallocate */ 3979 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 3980 if (hdrs == NULL) 3981 return (ENOMEM); 3982 3983 if (udp->udp_sticky_hdrs_len != 0) { 3984 kmem_free(udp->udp_sticky_hdrs, 3985 udp->udp_sticky_hdrs_len); 3986 } 3987 udp->udp_sticky_hdrs = hdrs; 3988 udp->udp_sticky_hdrs_len = hdrs_len; 3989 } 3990 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 3991 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 3992 3993 /* Set header fields not in ipp */ 3994 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 3995 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 3996 ip6h = (ip6_t *)&ip6i[1]; 3997 } else { 3998 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 3999 } 4000 4001 if (!(ipp->ipp_fields & IPPF_ADDR)) 4002 ip6h->ip6_src = udp->udp_v6src; 4003 4004 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4005 udpha->uha_src_port = udp->udp_port; 4006 4007 /* Try to get everything in a single mblk */ 4008 if (hdrs_len > udp->udp_max_hdr_len) { 4009 udp->udp_max_hdr_len = hdrs_len; 4010 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4011 udp_wroff_extra); 4012 } 4013 return (0); 4014 } 4015 4016 /* 4017 * Set optbuf and optlen for the option. 4018 * If sticky is set allocate memory (if not already present). 4019 * Otherwise just point optbuf and optlen at invalp and inlen. 4020 * Returns failure if memory can not be allocated. 4021 */ 4022 static int 4023 udp_pkt_set(uchar_t *invalp, uint_t inlen, boolean_t sticky, 4024 uchar_t **optbufp, uint_t *optlenp) 4025 { 4026 uchar_t *optbuf; 4027 4028 if (!sticky) { 4029 *optbufp = invalp; 4030 *optlenp = inlen; 4031 return (0); 4032 } 4033 if (inlen == *optlenp) { 4034 /* Unchanged length - no need to realocate */ 4035 bcopy(invalp, *optbufp, inlen); 4036 return (0); 4037 } 4038 if (inlen != 0) { 4039 /* Allocate new buffer before free */ 4040 optbuf = kmem_alloc(inlen, KM_NOSLEEP); 4041 if (optbuf == NULL) 4042 return (ENOMEM); 4043 } else { 4044 optbuf = NULL; 4045 } 4046 /* Free old buffer */ 4047 if (*optlenp != 0) 4048 kmem_free(*optbufp, *optlenp); 4049 4050 bcopy(invalp, optbuf, inlen); 4051 *optbufp = optbuf; 4052 *optlenp = inlen; 4053 return (0); 4054 } 4055 4056 /* 4057 * This routine retrieves the value of an ND variable in a udpparam_t 4058 * structure. It is called through nd_getset when a user reads the 4059 * variable. 4060 */ 4061 /* ARGSUSED */ 4062 static int 4063 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4064 { 4065 udpparam_t *udppa = (udpparam_t *)cp; 4066 4067 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4068 return (0); 4069 } 4070 4071 /* 4072 * Walk through the param array specified registering each element with the 4073 * named dispatch (ND) handler. 4074 */ 4075 static boolean_t 4076 udp_param_register(udpparam_t *udppa, int cnt) 4077 { 4078 for (; cnt-- > 0; udppa++) { 4079 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4080 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4081 udp_param_get, udp_param_set, 4082 (caddr_t)udppa)) { 4083 nd_free(&udp_g_nd); 4084 return (B_FALSE); 4085 } 4086 } 4087 } 4088 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4089 udp_extra_priv_ports_get, NULL, NULL)) { 4090 nd_free(&udp_g_nd); 4091 return (B_FALSE); 4092 } 4093 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4094 NULL, udp_extra_priv_ports_add, NULL)) { 4095 nd_free(&udp_g_nd); 4096 return (B_FALSE); 4097 } 4098 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4099 NULL, udp_extra_priv_ports_del, NULL)) { 4100 nd_free(&udp_g_nd); 4101 return (B_FALSE); 4102 } 4103 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4104 NULL)) { 4105 nd_free(&udp_g_nd); 4106 return (B_FALSE); 4107 } 4108 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4109 NULL)) { 4110 nd_free(&udp_g_nd); 4111 return (B_FALSE); 4112 } 4113 return (B_TRUE); 4114 } 4115 4116 /* This routine sets an ND variable in a udpparam_t structure. */ 4117 /* ARGSUSED */ 4118 static int 4119 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4120 { 4121 long new_value; 4122 udpparam_t *udppa = (udpparam_t *)cp; 4123 4124 /* 4125 * Fail the request if the new value does not lie within the 4126 * required bounds. 4127 */ 4128 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4129 new_value < udppa->udp_param_min || 4130 new_value > udppa->udp_param_max) { 4131 return (EINVAL); 4132 } 4133 4134 /* Set the new value */ 4135 udppa->udp_param_value = new_value; 4136 return (0); 4137 } 4138 4139 static void 4140 udp_input(conn_t *connp, mblk_t *mp) 4141 { 4142 struct T_unitdata_ind *tudi; 4143 uchar_t *rptr; /* Pointer to IP header */ 4144 int hdr_length; /* Length of IP+UDP headers */ 4145 int udi_size; /* Size of T_unitdata_ind */ 4146 int mp_len; 4147 udp_t *udp; 4148 udpha_t *udpha; 4149 int ipversion; 4150 ip6_pkt_t ipp; 4151 ip6_t *ip6h; 4152 ip6i_t *ip6i; 4153 mblk_t *mp1; 4154 mblk_t *options_mp = NULL; 4155 in_pktinfo_t *pinfo = NULL; 4156 cred_t *cr = NULL; 4157 queue_t *q = connp->conn_rq; 4158 pid_t cpid; 4159 4160 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4161 "udp_rput_start: q %p mp %p", q, mp); 4162 4163 udp = connp->conn_udp; 4164 rptr = mp->b_rptr; 4165 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4166 ASSERT(OK_32PTR(rptr)); 4167 4168 /* 4169 * IP should have prepended the options data in an M_CTL 4170 * Check M_CTL "type" to make sure are not here bcos of 4171 * a valid ICMP message 4172 */ 4173 if (DB_TYPE(mp) == M_CTL) { 4174 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4175 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4176 IN_PKTINFO) { 4177 /* 4178 * IP_RECVIF or IP_RECVSLLA information has been 4179 * appended to the packet by IP. We need to 4180 * extract the mblk and adjust the rptr 4181 */ 4182 pinfo = (in_pktinfo_t *)mp->b_rptr; 4183 options_mp = mp; 4184 mp = mp->b_cont; 4185 rptr = mp->b_rptr; 4186 UDP_STAT(udp_in_pktinfo); 4187 } else { 4188 /* 4189 * ICMP messages. 4190 */ 4191 udp_icmp_error(q, mp); 4192 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4193 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4194 return; 4195 } 4196 } 4197 4198 mp_len = msgdsize(mp); 4199 /* 4200 * This is the inbound data path. 4201 * First, we check to make sure the IP version number is correct, 4202 * and then pull the IP and UDP headers into the first mblk. 4203 * Assume IP provides aligned packets - otherwise toss. 4204 * Also, check if we have a complete IP header. 4205 */ 4206 4207 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4208 ipp.ipp_fields = 0; 4209 4210 ipversion = IPH_HDR_VERSION(rptr); 4211 switch (ipversion) { 4212 case IPV4_VERSION: 4213 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4214 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4215 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4216 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4217 (udp->udp_ip_rcv_options_len)) { 4218 /* 4219 * Handle IPv4 packets with options outside of the 4220 * main data path. Not needed for AF_INET6 sockets 4221 * since they don't support a getsockopt of IP_OPTIONS. 4222 */ 4223 if (udp->udp_family == AF_INET6) 4224 break; 4225 /* 4226 * UDP length check performed for IPv4 packets with 4227 * options to check whether UDP length specified in 4228 * the header is the same as the physical length of 4229 * the packet. 4230 */ 4231 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4232 if (mp_len != (ntohs(udpha->uha_length) + 4233 hdr_length - UDPH_SIZE)) { 4234 goto tossit; 4235 } 4236 /* 4237 * Handle the case where the packet has IP options 4238 * and the IP_RECVSLLA & IP_RECVIF are set 4239 */ 4240 if (pinfo != NULL) 4241 mp = options_mp; 4242 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4243 SQTAG_UDP_INPUT); 4244 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4245 "udp_rput_end: q %p (%S)", q, "end"); 4246 return; 4247 } 4248 4249 /* Handle IPV6_RECVHOPLIMIT. */ 4250 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4251 udp->udp_ipv6_recvpktinfo) { 4252 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4253 ipp.ipp_fields |= IPPF_IFINDEX; 4254 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4255 } 4256 } 4257 break; 4258 case IPV6_VERSION: 4259 /* 4260 * IPv6 packets can only be received by applications 4261 * that are prepared to receive IPv6 addresses. 4262 * The IP fanout must ensure this. 4263 */ 4264 ASSERT(udp->udp_family == AF_INET6); 4265 4266 ip6h = (ip6_t *)rptr; 4267 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4268 4269 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4270 uint8_t nexthdrp; 4271 /* Look for ifindex information */ 4272 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4273 ip6i = (ip6i_t *)ip6h; 4274 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4275 goto tossit; 4276 4277 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4278 ASSERT(ip6i->ip6i_ifindex != 0); 4279 ipp.ipp_fields |= IPPF_IFINDEX; 4280 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4281 } 4282 rptr = (uchar_t *)&ip6i[1]; 4283 mp->b_rptr = rptr; 4284 if (rptr == mp->b_wptr) { 4285 mp1 = mp->b_cont; 4286 freeb(mp); 4287 mp = mp1; 4288 rptr = mp->b_rptr; 4289 } 4290 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4291 goto tossit; 4292 ip6h = (ip6_t *)rptr; 4293 mp_len = msgdsize(mp); 4294 } 4295 /* 4296 * Find any potentially interesting extension headers 4297 * as well as the length of the IPv6 + extension 4298 * headers. 4299 */ 4300 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4301 UDPH_SIZE; 4302 ASSERT(nexthdrp == IPPROTO_UDP); 4303 } else { 4304 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4305 ip6i = NULL; 4306 } 4307 break; 4308 default: 4309 ASSERT(0); 4310 } 4311 4312 /* 4313 * IP inspected the UDP header thus all of it must be in the mblk. 4314 * UDP length check is performed for IPv6 packets and IPv4 packets 4315 * without options to check if the size of the packet as specified 4316 * by the header is the same as the physical size of the packet. 4317 */ 4318 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4319 if ((MBLKL(mp) < hdr_length) || 4320 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4321 goto tossit; 4322 } 4323 4324 /* Walk past the headers. */ 4325 if (!udp->udp_rcvhdr) { 4326 mp->b_rptr = rptr + hdr_length; 4327 mp_len -= hdr_length; 4328 } 4329 4330 /* 4331 * This is the inbound data path. Packets are passed upstream as 4332 * T_UNITDATA_IND messages with full IP headers still attached. 4333 */ 4334 if (udp->udp_family == AF_INET) { 4335 sin_t *sin; 4336 4337 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4338 4339 /* 4340 * Normally only send up the address. 4341 * If IP_RECVDSTADDR is set we include the destination IP 4342 * address as an option. With IP_RECVOPTS we include all 4343 * the IP options. Only ip_rput_other() handles packets 4344 * that contain IP options. 4345 */ 4346 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4347 if (udp->udp_recvdstaddr) { 4348 udi_size += sizeof (struct T_opthdr) + 4349 sizeof (struct in_addr); 4350 UDP_STAT(udp_in_recvdstaddr); 4351 } 4352 4353 /* 4354 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4355 * space accordingly 4356 */ 4357 if (udp->udp_recvif && (pinfo != NULL) && 4358 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4359 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4360 UDP_STAT(udp_in_recvif); 4361 } 4362 4363 if (udp->udp_recvslla && (pinfo != NULL) && 4364 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4365 udi_size += sizeof (struct T_opthdr) + 4366 sizeof (struct sockaddr_dl); 4367 UDP_STAT(udp_in_recvslla); 4368 } 4369 4370 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4371 udi_size += sizeof (struct T_opthdr) + ucredsize; 4372 cpid = DB_CPID(mp); 4373 UDP_STAT(udp_in_recvucred); 4374 } 4375 /* 4376 * If IP_RECVTTL is set allocate the appropriate sized buffer 4377 */ 4378 if (udp->udp_recvttl) { 4379 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4380 UDP_STAT(udp_in_recvttl); 4381 } 4382 4383 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4384 4385 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4386 mp1 = allocb(udi_size, BPRI_MED); 4387 if (mp1 == NULL) { 4388 freemsg(mp); 4389 if (options_mp != NULL) 4390 freeb(options_mp); 4391 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4392 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4393 BUMP_MIB(&udp_mib, udpInErrors); 4394 return; 4395 } 4396 mp1->b_cont = mp; 4397 mp = mp1; 4398 mp->b_datap->db_type = M_PROTO; 4399 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4400 mp->b_wptr = (uchar_t *)tudi + udi_size; 4401 tudi->PRIM_type = T_UNITDATA_IND; 4402 tudi->SRC_length = sizeof (sin_t); 4403 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4404 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4405 sizeof (sin_t); 4406 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4407 tudi->OPT_length = udi_size; 4408 sin = (sin_t *)&tudi[1]; 4409 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4410 sin->sin_port = udpha->uha_src_port; 4411 sin->sin_family = udp->udp_family; 4412 *(uint32_t *)&sin->sin_zero[0] = 0; 4413 *(uint32_t *)&sin->sin_zero[4] = 0; 4414 4415 /* 4416 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4417 * IP_RECVTTL has been set. 4418 */ 4419 if (udi_size != 0) { 4420 /* 4421 * Copy in destination address before options to avoid 4422 * any padding issues. 4423 */ 4424 char *dstopt; 4425 4426 dstopt = (char *)&sin[1]; 4427 if (udp->udp_recvdstaddr) { 4428 struct T_opthdr *toh; 4429 ipaddr_t *dstptr; 4430 4431 toh = (struct T_opthdr *)dstopt; 4432 toh->level = IPPROTO_IP; 4433 toh->name = IP_RECVDSTADDR; 4434 toh->len = sizeof (struct T_opthdr) + 4435 sizeof (ipaddr_t); 4436 toh->status = 0; 4437 dstopt += sizeof (struct T_opthdr); 4438 dstptr = (ipaddr_t *)dstopt; 4439 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4440 dstopt += sizeof (ipaddr_t); 4441 udi_size -= toh->len; 4442 } 4443 4444 if (udp->udp_recvslla && (pinfo != NULL) && 4445 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4446 4447 struct T_opthdr *toh; 4448 struct sockaddr_dl *dstptr; 4449 4450 toh = (struct T_opthdr *)dstopt; 4451 toh->level = IPPROTO_IP; 4452 toh->name = IP_RECVSLLA; 4453 toh->len = sizeof (struct T_opthdr) + 4454 sizeof (struct sockaddr_dl); 4455 toh->status = 0; 4456 dstopt += sizeof (struct T_opthdr); 4457 dstptr = (struct sockaddr_dl *)dstopt; 4458 bcopy(&pinfo->in_pkt_slla, dstptr, 4459 sizeof (struct sockaddr_dl)); 4460 dstopt += sizeof (struct sockaddr_dl); 4461 udi_size -= toh->len; 4462 } 4463 4464 if (udp->udp_recvif && (pinfo != NULL) && 4465 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4466 4467 struct T_opthdr *toh; 4468 uint_t *dstptr; 4469 4470 toh = (struct T_opthdr *)dstopt; 4471 toh->level = IPPROTO_IP; 4472 toh->name = IP_RECVIF; 4473 toh->len = sizeof (struct T_opthdr) + 4474 sizeof (uint_t); 4475 toh->status = 0; 4476 dstopt += sizeof (struct T_opthdr); 4477 dstptr = (uint_t *)dstopt; 4478 *dstptr = pinfo->in_pkt_ifindex; 4479 dstopt += sizeof (uint_t); 4480 udi_size -= toh->len; 4481 } 4482 4483 if (cr != NULL) { 4484 struct T_opthdr *toh; 4485 4486 toh = (struct T_opthdr *)dstopt; 4487 toh->level = SOL_SOCKET; 4488 toh->name = SCM_UCRED; 4489 toh->len = sizeof (struct T_opthdr) + ucredsize; 4490 toh->status = 0; 4491 (void) cred2ucred(cr, cpid, &toh[1]); 4492 dstopt += toh->len; 4493 udi_size -= toh->len; 4494 } 4495 4496 if (udp->udp_recvttl) { 4497 struct T_opthdr *toh; 4498 uint8_t *dstptr; 4499 4500 toh = (struct T_opthdr *)dstopt; 4501 toh->level = IPPROTO_IP; 4502 toh->name = IP_RECVTTL; 4503 toh->len = sizeof (struct T_opthdr) + 4504 sizeof (uint8_t); 4505 toh->status = 0; 4506 dstopt += sizeof (struct T_opthdr); 4507 dstptr = (uint8_t *)dstopt; 4508 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4509 dstopt += sizeof (uint8_t); 4510 udi_size -= toh->len; 4511 } 4512 4513 /* Consumed all of allocated space */ 4514 ASSERT(udi_size == 0); 4515 } 4516 } else { 4517 sin6_t *sin6; 4518 4519 /* 4520 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4521 * 4522 * Normally we only send up the address. If receiving of any 4523 * optional receive side information is enabled, we also send 4524 * that up as options. 4525 * [ Only udp_rput_other() handles packets that contain IP 4526 * options so code to account for does not appear immediately 4527 * below but elsewhere ] 4528 */ 4529 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4530 4531 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4532 IPPF_RTHDR|IPPF_IFINDEX)) { 4533 if (udp->udp_ipv6_recvhopopts && 4534 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4535 udi_size += sizeof (struct T_opthdr) + 4536 ipp.ipp_hopoptslen; 4537 UDP_STAT(udp_in_recvhopopts); 4538 } 4539 if ((udp->udp_ipv6_recvdstopts || 4540 udp->udp_old_ipv6_recvdstopts) && 4541 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4542 udi_size += sizeof (struct T_opthdr) + 4543 ipp.ipp_dstoptslen; 4544 UDP_STAT(udp_in_recvdstopts); 4545 } 4546 if (((udp->udp_ipv6_recvdstopts && 4547 udp->udp_ipv6_recvrthdr && 4548 (ipp.ipp_fields & IPPF_RTHDR)) || 4549 udp->udp_ipv6_recvrthdrdstopts) && 4550 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4551 udi_size += sizeof (struct T_opthdr) + 4552 ipp.ipp_rtdstoptslen; 4553 UDP_STAT(udp_in_recvrtdstopts); 4554 } 4555 if (udp->udp_ipv6_recvrthdr && 4556 (ipp.ipp_fields & IPPF_RTHDR)) { 4557 udi_size += sizeof (struct T_opthdr) + 4558 ipp.ipp_rthdrlen; 4559 UDP_STAT(udp_in_recvrthdr); 4560 } 4561 if (udp->udp_ipv6_recvpktinfo && 4562 (ipp.ipp_fields & IPPF_IFINDEX)) { 4563 udi_size += sizeof (struct T_opthdr) + 4564 sizeof (struct in6_pktinfo); 4565 UDP_STAT(udp_in_recvpktinfo); 4566 } 4567 4568 } 4569 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4570 udi_size += sizeof (struct T_opthdr) + ucredsize; 4571 cpid = DB_CPID(mp); 4572 UDP_STAT(udp_in_recvucred); 4573 } 4574 4575 if (udp->udp_ipv6_recvhoplimit) { 4576 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4577 UDP_STAT(udp_in_recvhoplimit); 4578 } 4579 4580 if (udp->udp_ipv6_recvtclass) { 4581 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4582 UDP_STAT(udp_in_recvtclass); 4583 } 4584 4585 mp1 = allocb(udi_size, BPRI_MED); 4586 if (mp1 == NULL) { 4587 freemsg(mp); 4588 if (options_mp != NULL) 4589 freeb(options_mp); 4590 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4591 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4592 BUMP_MIB(&udp_mib, udpInErrors); 4593 return; 4594 } 4595 mp1->b_cont = mp; 4596 mp = mp1; 4597 mp->b_datap->db_type = M_PROTO; 4598 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4599 mp->b_wptr = (uchar_t *)tudi + udi_size; 4600 tudi->PRIM_type = T_UNITDATA_IND; 4601 tudi->SRC_length = sizeof (sin6_t); 4602 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4603 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4604 sizeof (sin6_t); 4605 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4606 tudi->OPT_length = udi_size; 4607 sin6 = (sin6_t *)&tudi[1]; 4608 if (ipversion == IPV4_VERSION) { 4609 in6_addr_t v6dst; 4610 4611 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4612 &sin6->sin6_addr); 4613 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4614 &v6dst); 4615 sin6->sin6_flowinfo = 0; 4616 sin6->sin6_scope_id = 0; 4617 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 4618 connp->conn_zoneid); 4619 } else { 4620 sin6->sin6_addr = ip6h->ip6_src; 4621 /* No sin6_flowinfo per API */ 4622 sin6->sin6_flowinfo = 0; 4623 /* For link-scope source pass up scope id */ 4624 if ((ipp.ipp_fields & IPPF_IFINDEX) && 4625 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 4626 sin6->sin6_scope_id = ipp.ipp_ifindex; 4627 else 4628 sin6->sin6_scope_id = 0; 4629 sin6->__sin6_src_id = ip_srcid_find_addr( 4630 &ip6h->ip6_dst, connp->conn_zoneid); 4631 } 4632 sin6->sin6_port = udpha->uha_src_port; 4633 sin6->sin6_family = udp->udp_family; 4634 4635 if (udi_size != 0) { 4636 uchar_t *dstopt; 4637 4638 dstopt = (uchar_t *)&sin6[1]; 4639 if (udp->udp_ipv6_recvpktinfo && 4640 (ipp.ipp_fields & IPPF_IFINDEX)) { 4641 struct T_opthdr *toh; 4642 struct in6_pktinfo *pkti; 4643 4644 toh = (struct T_opthdr *)dstopt; 4645 toh->level = IPPROTO_IPV6; 4646 toh->name = IPV6_PKTINFO; 4647 toh->len = sizeof (struct T_opthdr) + 4648 sizeof (*pkti); 4649 toh->status = 0; 4650 dstopt += sizeof (struct T_opthdr); 4651 pkti = (struct in6_pktinfo *)dstopt; 4652 if (ipversion == IPV6_VERSION) 4653 pkti->ipi6_addr = ip6h->ip6_dst; 4654 else 4655 IN6_IPADDR_TO_V4MAPPED( 4656 ((ipha_t *)rptr)->ipha_dst, 4657 &pkti->ipi6_addr); 4658 pkti->ipi6_ifindex = ipp.ipp_ifindex; 4659 dstopt += sizeof (*pkti); 4660 udi_size -= toh->len; 4661 } 4662 if (udp->udp_ipv6_recvhoplimit) { 4663 struct T_opthdr *toh; 4664 4665 toh = (struct T_opthdr *)dstopt; 4666 toh->level = IPPROTO_IPV6; 4667 toh->name = IPV6_HOPLIMIT; 4668 toh->len = sizeof (struct T_opthdr) + 4669 sizeof (uint_t); 4670 toh->status = 0; 4671 dstopt += sizeof (struct T_opthdr); 4672 if (ipversion == IPV6_VERSION) 4673 *(uint_t *)dstopt = ip6h->ip6_hops; 4674 else 4675 *(uint_t *)dstopt = 4676 ((ipha_t *)rptr)->ipha_ttl; 4677 dstopt += sizeof (uint_t); 4678 udi_size -= toh->len; 4679 } 4680 if (udp->udp_ipv6_recvtclass) { 4681 struct T_opthdr *toh; 4682 4683 toh = (struct T_opthdr *)dstopt; 4684 toh->level = IPPROTO_IPV6; 4685 toh->name = IPV6_TCLASS; 4686 toh->len = sizeof (struct T_opthdr) + 4687 sizeof (uint_t); 4688 toh->status = 0; 4689 dstopt += sizeof (struct T_opthdr); 4690 if (ipversion == IPV6_VERSION) { 4691 *(uint_t *)dstopt = 4692 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 4693 } else { 4694 ipha_t *ipha = (ipha_t *)rptr; 4695 *(uint_t *)dstopt = 4696 ipha->ipha_type_of_service; 4697 } 4698 dstopt += sizeof (uint_t); 4699 udi_size -= toh->len; 4700 } 4701 if (udp->udp_ipv6_recvhopopts && 4702 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4703 struct T_opthdr *toh; 4704 4705 toh = (struct T_opthdr *)dstopt; 4706 toh->level = IPPROTO_IPV6; 4707 toh->name = IPV6_HOPOPTS; 4708 toh->len = sizeof (struct T_opthdr) + 4709 ipp.ipp_hopoptslen; 4710 toh->status = 0; 4711 dstopt += sizeof (struct T_opthdr); 4712 bcopy(ipp.ipp_hopopts, dstopt, 4713 ipp.ipp_hopoptslen); 4714 dstopt += ipp.ipp_hopoptslen; 4715 udi_size -= toh->len; 4716 } 4717 if (udp->udp_ipv6_recvdstopts && 4718 udp->udp_ipv6_recvrthdr && 4719 (ipp.ipp_fields & IPPF_RTHDR) && 4720 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4721 struct T_opthdr *toh; 4722 4723 toh = (struct T_opthdr *)dstopt; 4724 toh->level = IPPROTO_IPV6; 4725 toh->name = IPV6_DSTOPTS; 4726 toh->len = sizeof (struct T_opthdr) + 4727 ipp.ipp_rtdstoptslen; 4728 toh->status = 0; 4729 dstopt += sizeof (struct T_opthdr); 4730 bcopy(ipp.ipp_rtdstopts, dstopt, 4731 ipp.ipp_rtdstoptslen); 4732 dstopt += ipp.ipp_rtdstoptslen; 4733 udi_size -= toh->len; 4734 } 4735 if (udp->udp_ipv6_recvrthdr && 4736 (ipp.ipp_fields & IPPF_RTHDR)) { 4737 struct T_opthdr *toh; 4738 4739 toh = (struct T_opthdr *)dstopt; 4740 toh->level = IPPROTO_IPV6; 4741 toh->name = IPV6_RTHDR; 4742 toh->len = sizeof (struct T_opthdr) + 4743 ipp.ipp_rthdrlen; 4744 toh->status = 0; 4745 dstopt += sizeof (struct T_opthdr); 4746 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 4747 dstopt += ipp.ipp_rthdrlen; 4748 udi_size -= toh->len; 4749 } 4750 if (udp->udp_ipv6_recvdstopts && 4751 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4752 struct T_opthdr *toh; 4753 4754 toh = (struct T_opthdr *)dstopt; 4755 toh->level = IPPROTO_IPV6; 4756 toh->name = IPV6_DSTOPTS; 4757 toh->len = sizeof (struct T_opthdr) + 4758 ipp.ipp_dstoptslen; 4759 toh->status = 0; 4760 dstopt += sizeof (struct T_opthdr); 4761 bcopy(ipp.ipp_dstopts, dstopt, 4762 ipp.ipp_dstoptslen); 4763 dstopt += ipp.ipp_dstoptslen; 4764 udi_size -= toh->len; 4765 } 4766 4767 if (cr != NULL) { 4768 struct T_opthdr *toh; 4769 4770 toh = (struct T_opthdr *)dstopt; 4771 toh->level = SOL_SOCKET; 4772 toh->name = SCM_UCRED; 4773 toh->len = sizeof (struct T_opthdr) + ucredsize; 4774 toh->status = 0; 4775 (void) cred2ucred(cr, cpid, &toh[1]); 4776 dstopt += toh->len; 4777 udi_size -= toh->len; 4778 } 4779 /* Consumed all of allocated space */ 4780 ASSERT(udi_size == 0); 4781 } 4782 #undef sin6 4783 /* No IP_RECVDSTADDR for IPv6. */ 4784 } 4785 4786 BUMP_MIB(&udp_mib, udpInDatagrams); 4787 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4788 "udp_rput_end: q %p (%S)", q, "end"); 4789 if (options_mp != NULL) 4790 freeb(options_mp); 4791 4792 if (udp->udp_direct_sockfs) { 4793 /* 4794 * There is nothing above us except for the stream head; 4795 * use the read-side synchronous stream interface in 4796 * order to reduce the time spent in interrupt thread. 4797 */ 4798 ASSERT(udp->udp_issocket); 4799 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 4800 } else { 4801 /* 4802 * Use regular STREAMS interface to pass data upstream 4803 * if this is not a socket endpoint, or if we have 4804 * switched over to the slow mode due to sockmod being 4805 * popped or a module being pushed on top of us. 4806 */ 4807 putnext(UDP_RD(q), mp); 4808 } 4809 return; 4810 4811 tossit: 4812 freemsg(mp); 4813 if (options_mp != NULL) 4814 freeb(options_mp); 4815 BUMP_MIB(&udp_mib, udpInErrors); 4816 } 4817 4818 void 4819 udp_conn_recv(conn_t *connp, mblk_t *mp) 4820 { 4821 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 4822 } 4823 4824 /* ARGSUSED */ 4825 static void 4826 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 4827 { 4828 udp_input((conn_t *)arg, mp); 4829 _UDP_EXIT((conn_t *)arg); 4830 } 4831 4832 /* 4833 * Process non-M_DATA messages as well as M_DATA messages that requires 4834 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 4835 */ 4836 static void 4837 udp_rput_other(queue_t *q, mblk_t *mp) 4838 { 4839 struct T_unitdata_ind *tudi; 4840 mblk_t *mp1; 4841 uchar_t *rptr; 4842 uchar_t *new_rptr; 4843 int hdr_length; 4844 int udi_size; /* Size of T_unitdata_ind */ 4845 int opt_len; /* Length of IP options */ 4846 sin_t *sin; 4847 struct T_error_ack *tea; 4848 mblk_t *options_mp = NULL; 4849 in_pktinfo_t *pinfo; 4850 boolean_t recv_on = B_FALSE; 4851 cred_t *cr = NULL; 4852 udp_t *udp = Q_TO_UDP(q); 4853 pid_t cpid; 4854 4855 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4856 "udp_rput_other: q %p mp %p", q, mp); 4857 4858 ASSERT(OK_32PTR(mp->b_rptr)); 4859 rptr = mp->b_rptr; 4860 4861 switch (mp->b_datap->db_type) { 4862 case M_CTL: 4863 /* 4864 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 4865 */ 4866 recv_on = B_TRUE; 4867 options_mp = mp; 4868 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 4869 4870 /* 4871 * The actual data is in mp->b_cont 4872 */ 4873 mp = mp->b_cont; 4874 ASSERT(OK_32PTR(mp->b_rptr)); 4875 rptr = mp->b_rptr; 4876 break; 4877 case M_DATA: 4878 /* 4879 * M_DATA messages contain IPv4 datagrams. They are handled 4880 * after this switch. 4881 */ 4882 break; 4883 case M_PROTO: 4884 case M_PCPROTO: 4885 /* M_PROTO messages contain some type of TPI message. */ 4886 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 4887 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 4888 freemsg(mp); 4889 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4890 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 4891 return; 4892 } 4893 tea = (struct T_error_ack *)rptr; 4894 4895 switch (tea->PRIM_type) { 4896 case T_ERROR_ACK: 4897 switch (tea->ERROR_prim) { 4898 case O_T_BIND_REQ: 4899 case T_BIND_REQ: { 4900 /* 4901 * If our O_T_BIND_REQ/T_BIND_REQ fails, 4902 * clear out the associated port and source 4903 * address before passing the message 4904 * upstream. If this was caused by a T_CONN_REQ 4905 * revert back to bound state. 4906 */ 4907 udp_fanout_t *udpf; 4908 4909 udpf = &udp_bind_fanout[ 4910 UDP_BIND_HASH(udp->udp_port)]; 4911 mutex_enter(&udpf->uf_lock); 4912 if (udp->udp_state == TS_DATA_XFER) { 4913 /* Connect failed */ 4914 tea->ERROR_prim = T_CONN_REQ; 4915 /* Revert back to the bound source */ 4916 udp->udp_v6src = udp->udp_bound_v6src; 4917 udp->udp_state = TS_IDLE; 4918 mutex_exit(&udpf->uf_lock); 4919 if (udp->udp_family == AF_INET6) 4920 (void) udp_build_hdrs(q, udp); 4921 break; 4922 } 4923 4924 if (udp->udp_discon_pending) { 4925 tea->ERROR_prim = T_DISCON_REQ; 4926 udp->udp_discon_pending = 0; 4927 } 4928 V6_SET_ZERO(udp->udp_v6src); 4929 V6_SET_ZERO(udp->udp_bound_v6src); 4930 udp->udp_state = TS_UNBND; 4931 udp_bind_hash_remove(udp, B_TRUE); 4932 udp->udp_port = 0; 4933 mutex_exit(&udpf->uf_lock); 4934 if (udp->udp_family == AF_INET6) 4935 (void) udp_build_hdrs(q, udp); 4936 break; 4937 } 4938 default: 4939 break; 4940 } 4941 break; 4942 case T_BIND_ACK: 4943 udp_rput_bind_ack(q, mp); 4944 return; 4945 4946 case T_OPTMGMT_ACK: 4947 case T_OK_ACK: 4948 break; 4949 default: 4950 freemsg(mp); 4951 return; 4952 } 4953 putnext(UDP_RD(q), mp); 4954 return; 4955 } 4956 4957 /* 4958 * This is the inbound data path. 4959 * First, we make sure the data contains both IP and UDP headers. 4960 * 4961 * This handle IPv4 packets for only AF_INET sockets. 4962 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 4963 * is no need saving the options. 4964 */ 4965 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4966 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4967 if (mp->b_wptr - rptr < hdr_length) { 4968 if (!pullupmsg(mp, hdr_length)) { 4969 freemsg(mp); 4970 if (options_mp != NULL) 4971 freeb(options_mp); 4972 BUMP_MIB(&udp_mib, udpInErrors); 4973 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4974 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 4975 BUMP_MIB(&udp_mib, udpInErrors); 4976 return; 4977 } 4978 rptr = mp->b_rptr; 4979 } 4980 /* Walk past the headers. */ 4981 new_rptr = rptr + hdr_length; 4982 if (!udp->udp_rcvhdr) 4983 mp->b_rptr = new_rptr; 4984 4985 /* Save the options if any */ 4986 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 4987 if (opt_len > 0) { 4988 if (opt_len > udp->udp_ip_rcv_options_len) { 4989 if (udp->udp_ip_rcv_options_len) 4990 mi_free((char *)udp->udp_ip_rcv_options); 4991 udp->udp_ip_rcv_options_len = 0; 4992 udp->udp_ip_rcv_options = 4993 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 4994 if (udp->udp_ip_rcv_options) 4995 udp->udp_ip_rcv_options_len = opt_len; 4996 } 4997 if (udp->udp_ip_rcv_options_len) { 4998 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 4999 udp->udp_ip_rcv_options, opt_len); 5000 /* Adjust length if we are resusing the space */ 5001 udp->udp_ip_rcv_options_len = opt_len; 5002 } 5003 } else if (udp->udp_ip_rcv_options_len) { 5004 mi_free((char *)udp->udp_ip_rcv_options); 5005 udp->udp_ip_rcv_options = NULL; 5006 udp->udp_ip_rcv_options_len = 0; 5007 } 5008 5009 /* 5010 * Normally only send up the address. 5011 * If IP_RECVDSTADDR is set we include the destination IP 5012 * address as an option. With IP_RECVOPTS we include all 5013 * the IP options. 5014 */ 5015 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5016 if (udp->udp_recvdstaddr) { 5017 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5018 UDP_STAT(udp_in_recvdstaddr); 5019 } 5020 if (udp->udp_recvopts && opt_len > 0) { 5021 udi_size += sizeof (struct T_opthdr) + opt_len; 5022 UDP_STAT(udp_in_recvopts); 5023 } 5024 5025 /* 5026 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5027 * space accordingly 5028 */ 5029 if (udp->udp_recvif && recv_on && 5030 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5031 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5032 UDP_STAT(udp_in_recvif); 5033 } 5034 5035 if (udp->udp_recvslla && recv_on && 5036 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5037 udi_size += sizeof (struct T_opthdr) + 5038 sizeof (struct sockaddr_dl); 5039 UDP_STAT(udp_in_recvslla); 5040 } 5041 5042 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5043 udi_size += sizeof (struct T_opthdr) + ucredsize; 5044 cpid = DB_CPID(mp); 5045 UDP_STAT(udp_in_recvucred); 5046 } 5047 /* 5048 * If IP_RECVTTL is set allocate the appropriate sized buffer 5049 */ 5050 if (udp->udp_recvttl) { 5051 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5052 UDP_STAT(udp_in_recvttl); 5053 } 5054 5055 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5056 mp1 = allocb(udi_size, BPRI_MED); 5057 if (mp1 == NULL) { 5058 freemsg(mp); 5059 if (options_mp != NULL) 5060 freeb(options_mp); 5061 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5062 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5063 BUMP_MIB(&udp_mib, udpInErrors); 5064 return; 5065 } 5066 mp1->b_cont = mp; 5067 mp = mp1; 5068 mp->b_datap->db_type = M_PROTO; 5069 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5070 mp->b_wptr = (uchar_t *)tudi + udi_size; 5071 tudi->PRIM_type = T_UNITDATA_IND; 5072 tudi->SRC_length = sizeof (sin_t); 5073 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5074 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5075 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5076 tudi->OPT_length = udi_size; 5077 5078 sin = (sin_t *)&tudi[1]; 5079 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5080 sin->sin_port = ((in_port_t *) 5081 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5082 sin->sin_family = AF_INET; 5083 *(uint32_t *)&sin->sin_zero[0] = 0; 5084 *(uint32_t *)&sin->sin_zero[4] = 0; 5085 5086 /* 5087 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5088 * IP_RECVTTL has been set. 5089 */ 5090 if (udi_size != 0) { 5091 /* 5092 * Copy in destination address before options to avoid any 5093 * padding issues. 5094 */ 5095 char *dstopt; 5096 5097 dstopt = (char *)&sin[1]; 5098 if (udp->udp_recvdstaddr) { 5099 struct T_opthdr *toh; 5100 ipaddr_t *dstptr; 5101 5102 toh = (struct T_opthdr *)dstopt; 5103 toh->level = IPPROTO_IP; 5104 toh->name = IP_RECVDSTADDR; 5105 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5106 toh->status = 0; 5107 dstopt += sizeof (struct T_opthdr); 5108 dstptr = (ipaddr_t *)dstopt; 5109 *dstptr = (((ipaddr_t *)rptr)[4]); 5110 dstopt += sizeof (ipaddr_t); 5111 udi_size -= toh->len; 5112 } 5113 if (udp->udp_recvopts && udi_size != 0) { 5114 struct T_opthdr *toh; 5115 5116 toh = (struct T_opthdr *)dstopt; 5117 toh->level = IPPROTO_IP; 5118 toh->name = IP_RECVOPTS; 5119 toh->len = sizeof (struct T_opthdr) + opt_len; 5120 toh->status = 0; 5121 dstopt += sizeof (struct T_opthdr); 5122 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5123 dstopt += opt_len; 5124 udi_size -= toh->len; 5125 } 5126 5127 if (udp->udp_recvslla && recv_on && 5128 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5129 5130 struct T_opthdr *toh; 5131 struct sockaddr_dl *dstptr; 5132 5133 toh = (struct T_opthdr *)dstopt; 5134 toh->level = IPPROTO_IP; 5135 toh->name = IP_RECVSLLA; 5136 toh->len = sizeof (struct T_opthdr) + 5137 sizeof (struct sockaddr_dl); 5138 toh->status = 0; 5139 dstopt += sizeof (struct T_opthdr); 5140 dstptr = (struct sockaddr_dl *)dstopt; 5141 bcopy(&pinfo->in_pkt_slla, dstptr, 5142 sizeof (struct sockaddr_dl)); 5143 dstopt += sizeof (struct sockaddr_dl); 5144 udi_size -= toh->len; 5145 } 5146 5147 if (udp->udp_recvif && recv_on && 5148 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5149 5150 struct T_opthdr *toh; 5151 uint_t *dstptr; 5152 5153 toh = (struct T_opthdr *)dstopt; 5154 toh->level = IPPROTO_IP; 5155 toh->name = IP_RECVIF; 5156 toh->len = sizeof (struct T_opthdr) + 5157 sizeof (uint_t); 5158 toh->status = 0; 5159 dstopt += sizeof (struct T_opthdr); 5160 dstptr = (uint_t *)dstopt; 5161 *dstptr = pinfo->in_pkt_ifindex; 5162 dstopt += sizeof (uint_t); 5163 udi_size -= toh->len; 5164 } 5165 5166 if (cr != NULL) { 5167 struct T_opthdr *toh; 5168 5169 toh = (struct T_opthdr *)dstopt; 5170 toh->level = SOL_SOCKET; 5171 toh->name = SCM_UCRED; 5172 toh->len = sizeof (struct T_opthdr) + ucredsize; 5173 toh->status = 0; 5174 (void) cred2ucred(cr, cpid, &toh[1]); 5175 dstopt += toh->len; 5176 udi_size -= toh->len; 5177 } 5178 5179 if (udp->udp_recvttl) { 5180 struct T_opthdr *toh; 5181 uint8_t *dstptr; 5182 5183 toh = (struct T_opthdr *)dstopt; 5184 toh->level = IPPROTO_IP; 5185 toh->name = IP_RECVTTL; 5186 toh->len = sizeof (struct T_opthdr) + 5187 sizeof (uint8_t); 5188 toh->status = 0; 5189 dstopt += sizeof (struct T_opthdr); 5190 dstptr = (uint8_t *)dstopt; 5191 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5192 dstopt += sizeof (uint8_t); 5193 udi_size -= toh->len; 5194 } 5195 5196 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5197 } 5198 BUMP_MIB(&udp_mib, udpInDatagrams); 5199 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5200 "udp_rput_other_end: q %p (%S)", q, "end"); 5201 if (options_mp != NULL) 5202 freeb(options_mp); 5203 5204 if (udp->udp_direct_sockfs) { 5205 /* 5206 * There is nothing above us except for the stream head; 5207 * use the read-side synchronous stream interface in 5208 * order to reduce the time spent in interrupt thread. 5209 */ 5210 ASSERT(udp->udp_issocket); 5211 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5212 } else { 5213 /* 5214 * Use regular STREAMS interface to pass data upstream 5215 * if this is not a socket endpoint, or if we have 5216 * switched over to the slow mode due to sockmod being 5217 * popped or a module being pushed on top of us. 5218 */ 5219 putnext(UDP_RD(q), mp); 5220 } 5221 } 5222 5223 /* ARGSUSED */ 5224 static void 5225 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5226 { 5227 conn_t *connp = arg; 5228 5229 udp_rput_other(connp->conn_rq, mp); 5230 udp_exit(connp); 5231 } 5232 5233 /* 5234 * Process a T_BIND_ACK 5235 */ 5236 static void 5237 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5238 { 5239 udp_t *udp = Q_TO_UDP(q); 5240 mblk_t *mp1; 5241 ire_t *ire; 5242 struct T_bind_ack *tba; 5243 uchar_t *addrp; 5244 ipa_conn_t *ac; 5245 ipa6_conn_t *ac6; 5246 5247 if (udp->udp_discon_pending) 5248 udp->udp_discon_pending = 0; 5249 5250 /* 5251 * If a broadcast/multicast address was bound set 5252 * the source address to 0. 5253 * This ensures no datagrams with broadcast address 5254 * as source address are emitted (which would violate 5255 * RFC1122 - Hosts requirements) 5256 * 5257 * Note that when connecting the returned IRE is 5258 * for the destination address and we only perform 5259 * the broadcast check for the source address (it 5260 * is OK to connect to a broadcast/multicast address.) 5261 */ 5262 mp1 = mp->b_cont; 5263 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5264 ire = (ire_t *)mp1->b_rptr; 5265 5266 /* 5267 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5268 * local address. 5269 */ 5270 if (ire->ire_type == IRE_BROADCAST && 5271 udp->udp_state != TS_DATA_XFER) { 5272 /* This was just a local bind to a broadcast addr */ 5273 V6_SET_ZERO(udp->udp_v6src); 5274 if (udp->udp_family == AF_INET6) 5275 (void) udp_build_hdrs(q, udp); 5276 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5277 /* 5278 * Local address not yet set - pick it from the 5279 * T_bind_ack 5280 */ 5281 tba = (struct T_bind_ack *)mp->b_rptr; 5282 addrp = &mp->b_rptr[tba->ADDR_offset]; 5283 switch (udp->udp_family) { 5284 case AF_INET: 5285 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5286 ac = (ipa_conn_t *)addrp; 5287 } else { 5288 ASSERT(tba->ADDR_length == 5289 sizeof (ipa_conn_x_t)); 5290 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5291 } 5292 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5293 &udp->udp_v6src); 5294 break; 5295 case AF_INET6: 5296 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5297 ac6 = (ipa6_conn_t *)addrp; 5298 } else { 5299 ASSERT(tba->ADDR_length == 5300 sizeof (ipa6_conn_x_t)); 5301 ac6 = &((ipa6_conn_x_t *) 5302 addrp)->ac6x_conn; 5303 } 5304 udp->udp_v6src = ac6->ac6_laddr; 5305 (void) udp_build_hdrs(q, udp); 5306 break; 5307 } 5308 } 5309 mp1 = mp1->b_cont; 5310 } 5311 /* 5312 * Look for one or more appended ACK message added by 5313 * udp_connect or udp_disconnect. 5314 * If none found just send up the T_BIND_ACK. 5315 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5316 * udp_disconnect has appended a T_OK_ACK. 5317 */ 5318 if (mp1 != NULL) { 5319 if (mp->b_cont == mp1) 5320 mp->b_cont = NULL; 5321 else { 5322 ASSERT(mp->b_cont->b_cont == mp1); 5323 mp->b_cont->b_cont = NULL; 5324 } 5325 freemsg(mp); 5326 mp = mp1; 5327 while (mp != NULL) { 5328 mp1 = mp->b_cont; 5329 mp->b_cont = NULL; 5330 putnext(UDP_RD(q), mp); 5331 mp = mp1; 5332 } 5333 return; 5334 } 5335 freemsg(mp->b_cont); 5336 mp->b_cont = NULL; 5337 putnext(UDP_RD(q), mp); 5338 } 5339 5340 /* 5341 * return SNMP stuff in buffer in mpdata 5342 */ 5343 int 5344 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5345 { 5346 mblk_t *mpdata; 5347 mblk_t *mp_conn_ctl; 5348 mblk_t *mp6_conn_ctl; 5349 mblk_t *mp_conn_data; 5350 mblk_t *mp6_conn_data; 5351 mblk_t *mp_conn_tail = NULL; 5352 mblk_t *mp6_conn_tail = NULL; 5353 struct opthdr *optp; 5354 mib2_udpEntry_t ude; 5355 mib2_udp6Entry_t ude6; 5356 int state; 5357 zoneid_t zoneid; 5358 int i; 5359 connf_t *connfp; 5360 conn_t *connp = Q_TO_CONN(q); 5361 udp_t *udp = connp->conn_udp; 5362 5363 if (mpctl == NULL || 5364 (mpdata = mpctl->b_cont) == NULL || 5365 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5366 (mp6_conn_ctl = copymsg(mpctl)) == NULL) { 5367 freemsg(mp_conn_ctl); 5368 return (0); 5369 } 5370 5371 mp_conn_data = mp_conn_ctl->b_cont; 5372 mp6_conn_data = mp6_conn_ctl->b_cont; 5373 5374 zoneid = connp->conn_zoneid; 5375 5376 /* fixed length structure for IPv4 and IPv6 counters */ 5377 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5378 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5379 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5380 optp->level = MIB2_UDP; 5381 optp->name = 0; 5382 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5383 optp->len = msgdsize(mpdata); 5384 qreply(q, mpctl); 5385 5386 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5387 connfp = &ipcl_globalhash_fanout[i]; 5388 connp = NULL; 5389 5390 while ((connp = ipcl_get_next_conn(connfp, connp, 5391 IPCL_UDP))) { 5392 udp = connp->conn_udp; 5393 if (zoneid != connp->conn_zoneid) 5394 continue; 5395 5396 /* 5397 * Note that the port numbers are sent in 5398 * host byte order 5399 */ 5400 5401 if (udp->udp_state == TS_UNBND) 5402 state = MIB2_UDP_unbound; 5403 else if (udp->udp_state == TS_IDLE) 5404 state = MIB2_UDP_idle; 5405 else if (udp->udp_state == TS_DATA_XFER) 5406 state = MIB2_UDP_connected; 5407 else 5408 state = MIB2_UDP_unknown; 5409 5410 /* 5411 * Create an IPv4 table entry for IPv4 entries and also 5412 * any IPv6 entries which are bound to in6addr_any 5413 * (i.e. anything a IPv4 peer could connect/send to). 5414 */ 5415 if (udp->udp_ipversion == IPV4_VERSION || 5416 (udp->udp_state <= TS_IDLE && 5417 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5418 ude.udpEntryInfo.ue_state = state; 5419 /* 5420 * If in6addr_any this will set it to 5421 * INADDR_ANY 5422 */ 5423 ude.udpLocalAddress = 5424 V4_PART_OF_V6(udp->udp_v6src); 5425 ude.udpLocalPort = ntohs(udp->udp_port); 5426 if (udp->udp_state == TS_DATA_XFER) { 5427 /* 5428 * Can potentially get here for 5429 * v6 socket if another process 5430 * (say, ping) has just done a 5431 * sendto(), changing the state 5432 * from the TS_IDLE above to 5433 * TS_DATA_XFER by the time we hit 5434 * this part of the code. 5435 */ 5436 ude.udpEntryInfo.ue_RemoteAddress = 5437 V4_PART_OF_V6(udp->udp_v6dst); 5438 ude.udpEntryInfo.ue_RemotePort = 5439 ntohs(udp->udp_dstport); 5440 } else { 5441 ude.udpEntryInfo.ue_RemoteAddress = 0; 5442 ude.udpEntryInfo.ue_RemotePort = 0; 5443 } 5444 (void) snmp_append_data2(mp_conn_data, 5445 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5446 } 5447 if (udp->udp_ipversion == IPV6_VERSION) { 5448 ude6.udp6EntryInfo.ue_state = state; 5449 ude6.udp6LocalAddress = udp->udp_v6src; 5450 ude6.udp6LocalPort = ntohs(udp->udp_port); 5451 ude6.udp6IfIndex = udp->udp_bound_if; 5452 if (udp->udp_state == TS_DATA_XFER) { 5453 ude6.udp6EntryInfo.ue_RemoteAddress = 5454 udp->udp_v6dst; 5455 ude6.udp6EntryInfo.ue_RemotePort = 5456 ntohs(udp->udp_dstport); 5457 } else { 5458 ude6.udp6EntryInfo.ue_RemoteAddress = 5459 sin6_null.sin6_addr; 5460 ude6.udp6EntryInfo.ue_RemotePort = 0; 5461 } 5462 (void) snmp_append_data2(mp6_conn_data, 5463 &mp6_conn_tail, (char *)&ude6, 5464 sizeof (ude6)); 5465 } 5466 } 5467 } 5468 5469 /* IPv4 UDP endpoints */ 5470 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5471 sizeof (struct T_optmgmt_ack)]; 5472 optp->level = MIB2_UDP; 5473 optp->name = MIB2_UDP_ENTRY; 5474 optp->len = msgdsize(mp_conn_data); 5475 qreply(q, mp_conn_ctl); 5476 5477 /* IPv6 UDP endpoints */ 5478 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5479 sizeof (struct T_optmgmt_ack)]; 5480 optp->level = MIB2_UDP6; 5481 optp->name = MIB2_UDP6_ENTRY; 5482 optp->len = msgdsize(mp6_conn_data); 5483 qreply(q, mp6_conn_ctl); 5484 5485 return (1); 5486 } 5487 5488 /* 5489 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5490 * NOTE: Per MIB-II, UDP has no writable data. 5491 * TODO: If this ever actually tries to set anything, it needs to be 5492 * to do the appropriate locking. 5493 */ 5494 /* ARGSUSED */ 5495 int 5496 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5497 uchar_t *ptr, int len) 5498 { 5499 switch (level) { 5500 case MIB2_UDP: 5501 return (0); 5502 default: 5503 return (1); 5504 } 5505 } 5506 5507 static void 5508 udp_report_item(mblk_t *mp, udp_t *udp) 5509 { 5510 char *state; 5511 char addrbuf1[INET6_ADDRSTRLEN]; 5512 char addrbuf2[INET6_ADDRSTRLEN]; 5513 uint_t print_len, buf_len; 5514 5515 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5516 ASSERT(buf_len >= 0); 5517 if (buf_len == 0) 5518 return; 5519 5520 if (udp->udp_state == TS_UNBND) 5521 state = "UNBOUND"; 5522 else if (udp->udp_state == TS_IDLE) 5523 state = "IDLE"; 5524 else if (udp->udp_state == TS_DATA_XFER) 5525 state = "CONNECTED"; 5526 else 5527 state = "UnkState"; 5528 print_len = snprintf((char *)mp->b_wptr, buf_len, 5529 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5530 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5531 inet_ntop(AF_INET6, &udp->udp_v6src, 5532 addrbuf1, sizeof (addrbuf1)), 5533 inet_ntop(AF_INET6, &udp->udp_v6dst, 5534 addrbuf2, sizeof (addrbuf2)), 5535 ntohs(udp->udp_dstport), state); 5536 if (print_len < buf_len) { 5537 mp->b_wptr += print_len; 5538 } else { 5539 mp->b_wptr += buf_len; 5540 } 5541 } 5542 5543 /* Report for ndd "udp_status" */ 5544 /* ARGSUSED */ 5545 static int 5546 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 5547 { 5548 zoneid_t zoneid; 5549 connf_t *connfp; 5550 conn_t *connp = Q_TO_CONN(q); 5551 udp_t *udp = connp->conn_udp; 5552 int i; 5553 5554 /* 5555 * Because of the ndd constraint, at most we can have 64K buffer 5556 * to put in all UDP info. So to be more efficient, just 5557 * allocate a 64K buffer here, assuming we need that large buffer. 5558 * This may be a problem as any user can read udp_status. Therefore 5559 * we limit the rate of doing this using udp_ndd_get_info_interval. 5560 * This should be OK as normal users should not do this too often. 5561 */ 5562 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 5563 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 5564 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 5565 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 5566 return (0); 5567 } 5568 } 5569 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 5570 /* The following may work even if we cannot get a large buf. */ 5571 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 5572 return (0); 5573 } 5574 (void) mi_mpprintf(mp, 5575 "UDP " MI_COL_HDRPAD_STR 5576 /* 12345678[89ABCDEF] */ 5577 " zone lport src addr dest addr port state"); 5578 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 5579 5580 zoneid = connp->conn_zoneid; 5581 5582 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5583 connfp = &ipcl_globalhash_fanout[i]; 5584 connp = NULL; 5585 5586 while ((connp = ipcl_get_next_conn(connfp, connp, 5587 IPCL_UDP))) { 5588 udp = connp->conn_udp; 5589 if (zoneid != GLOBAL_ZONEID && 5590 zoneid != connp->conn_zoneid) 5591 continue; 5592 5593 udp_report_item(mp->b_cont, udp); 5594 } 5595 } 5596 udp_last_ndd_get_info_time = ddi_get_lbolt(); 5597 return (0); 5598 } 5599 5600 /* 5601 * This routine creates a T_UDERROR_IND message and passes it upstream. 5602 * The address and options are copied from the T_UNITDATA_REQ message 5603 * passed in mp. This message is freed. 5604 */ 5605 static void 5606 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 5607 t_scalar_t err) 5608 { 5609 struct T_unitdata_req *tudr; 5610 mblk_t *mp1; 5611 uchar_t *optaddr; 5612 t_scalar_t optlen; 5613 5614 if (DB_TYPE(mp) == M_DATA) { 5615 ASSERT(destaddr != NULL && destlen != 0); 5616 optaddr = NULL; 5617 optlen = 0; 5618 } else { 5619 if ((mp->b_wptr < mp->b_rptr) || 5620 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 5621 goto done; 5622 } 5623 tudr = (struct T_unitdata_req *)mp->b_rptr; 5624 destaddr = mp->b_rptr + tudr->DEST_offset; 5625 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 5626 destaddr + tudr->DEST_length < mp->b_rptr || 5627 destaddr + tudr->DEST_length > mp->b_wptr) { 5628 goto done; 5629 } 5630 optaddr = mp->b_rptr + tudr->OPT_offset; 5631 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 5632 optaddr + tudr->OPT_length < mp->b_rptr || 5633 optaddr + tudr->OPT_length > mp->b_wptr) { 5634 goto done; 5635 } 5636 destlen = tudr->DEST_length; 5637 optlen = tudr->OPT_length; 5638 } 5639 5640 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 5641 (char *)optaddr, optlen, err); 5642 if (mp1 != NULL) 5643 putnext(UDP_RD(q), mp1); 5644 5645 done: 5646 freemsg(mp); 5647 } 5648 5649 /* 5650 * This routine removes a port number association from a stream. It 5651 * is called by udp_wput to handle T_UNBIND_REQ messages. 5652 */ 5653 static void 5654 udp_unbind(queue_t *q, mblk_t *mp) 5655 { 5656 udp_t *udp = Q_TO_UDP(q); 5657 5658 /* If a bind has not been done, we can't unbind. */ 5659 if (udp->udp_state == TS_UNBND) { 5660 udp_err_ack(q, mp, TOUTSTATE, 0); 5661 return; 5662 } 5663 if (cl_inet_unbind != NULL) { 5664 /* 5665 * Running in cluster mode - register unbind information 5666 */ 5667 if (udp->udp_ipversion == IPV4_VERSION) { 5668 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 5669 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 5670 (in_port_t)udp->udp_port); 5671 } else { 5672 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 5673 (uint8_t *)&(udp->udp_v6src), 5674 (in_port_t)udp->udp_port); 5675 } 5676 } 5677 5678 udp_bind_hash_remove(udp, B_FALSE); 5679 V6_SET_ZERO(udp->udp_v6src); 5680 V6_SET_ZERO(udp->udp_bound_v6src); 5681 udp->udp_port = 0; 5682 udp->udp_state = TS_UNBND; 5683 5684 if (udp->udp_family == AF_INET6) { 5685 int error; 5686 5687 /* Rebuild the header template */ 5688 error = udp_build_hdrs(q, udp); 5689 if (error != 0) { 5690 udp_err_ack(q, mp, TSYSERR, error); 5691 return; 5692 } 5693 } 5694 /* 5695 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 5696 * and therefore ip_unbind must never return NULL. 5697 */ 5698 mp = ip_unbind(q, mp); 5699 ASSERT(mp != NULL); 5700 putnext(UDP_RD(q), mp); 5701 } 5702 5703 /* 5704 * Don't let port fall into the privileged range. 5705 * Since the extra priviledged ports can be arbitrary we also 5706 * ensure that we exclude those from consideration. 5707 * udp_g_epriv_ports is not sorted thus we loop over it until 5708 * there are no changes. 5709 */ 5710 static in_port_t 5711 udp_update_next_port(in_port_t port, boolean_t random) 5712 { 5713 int i; 5714 5715 if (random && udp_random_anon_port != 0) { 5716 (void) random_get_pseudo_bytes((uint8_t *)&port, 5717 sizeof (in_port_t)); 5718 /* 5719 * Unless changed by a sys admin, the smallest anon port 5720 * is 32768 and the largest anon port is 65535. It is 5721 * very likely (50%) for the random port to be smaller 5722 * than the smallest anon port. When that happens, 5723 * add port % (anon port range) to the smallest anon 5724 * port to get the random port. It should fall into the 5725 * valid anon port range. 5726 */ 5727 if (port < udp_smallest_anon_port) { 5728 port = udp_smallest_anon_port + 5729 port % (udp_largest_anon_port - 5730 udp_smallest_anon_port); 5731 } 5732 } 5733 5734 retry: 5735 if (port < udp_smallest_anon_port || port > udp_largest_anon_port) 5736 port = udp_smallest_anon_port; 5737 5738 if (port < udp_smallest_nonpriv_port) 5739 port = udp_smallest_nonpriv_port; 5740 5741 for (i = 0; i < udp_g_num_epriv_ports; i++) { 5742 if (port == udp_g_epriv_ports[i]) { 5743 port++; 5744 /* 5745 * Make sure that the port is in the 5746 * valid range. 5747 */ 5748 goto retry; 5749 } 5750 } 5751 return (port); 5752 } 5753 5754 static mblk_t * 5755 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 5756 uint_t srcid, int *error) 5757 { 5758 udp_t *udp = connp->conn_udp; 5759 queue_t *q = connp->conn_wq; 5760 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 5761 mblk_t *mp2; 5762 ipha_t *ipha; 5763 int ip_hdr_length; 5764 uint32_t ip_len; 5765 udpha_t *udpha; 5766 5767 *error = 0; 5768 5769 /* mp1 points to the M_DATA mblk carrying the packet */ 5770 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 5771 5772 /* Add an IP header */ 5773 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 5774 udp->udp_ip_snd_options_len; 5775 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 5776 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 5777 !OK_32PTR(ipha)) { 5778 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 5779 if (mp2 == NULL) { 5780 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5781 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 5782 *error = ENOMEM; 5783 goto done; 5784 } 5785 mp2->b_wptr = DB_LIM(mp2); 5786 mp2->b_cont = mp1; 5787 mp1 = mp2; 5788 if (DB_TYPE(mp) != M_DATA) 5789 mp->b_cont = mp1; 5790 else 5791 mp = mp1; 5792 5793 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 5794 } 5795 ip_hdr_length -= UDPH_SIZE; 5796 #ifdef _BIG_ENDIAN 5797 /* Set version, header length, and tos */ 5798 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5799 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 5800 udp->udp_type_of_service); 5801 /* Set ttl and protocol */ 5802 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 5803 #else 5804 /* Set version, header length, and tos */ 5805 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 5806 ((udp->udp_type_of_service << 8) | 5807 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 5808 /* Set ttl and protocol */ 5809 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 5810 #endif 5811 /* 5812 * Copy our address into the packet. If this is zero, 5813 * first look at __sin6_src_id for a hint. If we leave the source 5814 * as INADDR_ANY then ip will fill in the real source address. 5815 */ 5816 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 5817 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 5818 in6_addr_t v6src; 5819 5820 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 5821 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 5822 } 5823 5824 ipha->ipha_fragment_offset_and_flags = 0; 5825 ipha->ipha_ident = 0; 5826 5827 mp1->b_rptr = (uchar_t *)ipha; 5828 5829 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 5830 (uintptr_t)UINT_MAX); 5831 5832 /* Determine length of packet */ 5833 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 5834 if ((mp2 = mp1->b_cont) != NULL) { 5835 do { 5836 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 5837 ip_len += (uint32_t)MBLKL(mp2); 5838 } while ((mp2 = mp2->b_cont) != NULL); 5839 } 5840 /* 5841 * If the size of the packet is greater than the maximum allowed by 5842 * ip, return an error. Passing this down could cause panics because 5843 * the size will have wrapped and be inconsistent with the msg size. 5844 */ 5845 if (ip_len > IP_MAXPACKET) { 5846 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5847 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 5848 *error = EMSGSIZE; 5849 goto done; 5850 } 5851 ipha->ipha_length = htons((uint16_t)ip_len); 5852 ip_len -= ip_hdr_length; 5853 ip_len = htons((uint16_t)ip_len); 5854 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 5855 5856 /* 5857 * Copy in the destination address 5858 */ 5859 if (v4dst == INADDR_ANY) 5860 ipha->ipha_dst = htonl(INADDR_LOOPBACK); 5861 else 5862 ipha->ipha_dst = v4dst; 5863 5864 /* 5865 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 5866 */ 5867 if (CLASSD(v4dst)) 5868 ipha->ipha_ttl = udp->udp_multicast_ttl; 5869 5870 udpha->uha_dst_port = port; 5871 udpha->uha_src_port = udp->udp_port; 5872 5873 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 5874 uint32_t cksum; 5875 5876 bcopy(udp->udp_ip_snd_options, &ipha[1], 5877 udp->udp_ip_snd_options_len); 5878 /* 5879 * Massage source route putting first source route in ipha_dst. 5880 * Ignore the destination in T_unitdata_req. 5881 * Create a checksum adjustment for a source route, if any. 5882 */ 5883 cksum = ip_massage_options(ipha); 5884 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5885 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 5886 (ipha->ipha_dst & 0xFFFF); 5887 if ((int)cksum < 0) 5888 cksum--; 5889 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5890 /* 5891 * IP does the checksum if uha_checksum is non-zero, 5892 * We make it easy for IP to include our pseudo header 5893 * by putting our length in uha_checksum. 5894 */ 5895 cksum += ip_len; 5896 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5897 /* There might be a carry. */ 5898 cksum = (cksum & 0xFFFF) + (cksum >> 16); 5899 #ifdef _LITTLE_ENDIAN 5900 if (udp_do_checksum) 5901 ip_len = (cksum << 16) | ip_len; 5902 #else 5903 if (udp_do_checksum) 5904 ip_len = (ip_len << 16) | cksum; 5905 else 5906 ip_len <<= 16; 5907 #endif 5908 } else { 5909 /* 5910 * IP does the checksum if uha_checksum is non-zero, 5911 * We make it easy for IP to include our pseudo header 5912 * by putting our length in uha_checksum. 5913 */ 5914 if (udp_do_checksum) 5915 ip_len |= (ip_len << 16); 5916 #ifndef _LITTLE_ENDIAN 5917 else 5918 ip_len <<= 16; 5919 #endif 5920 } 5921 /* Set UDP length and checksum */ 5922 *((uint32_t *)&udpha->uha_length) = ip_len; 5923 5924 if (DB_TYPE(mp) != M_DATA) { 5925 ASSERT(mp != mp1); 5926 freeb(mp); 5927 } 5928 5929 /* mp has been consumed and we'll return success */ 5930 ASSERT(*error == 0); 5931 mp = NULL; 5932 5933 /* We're done. Pass the packet to ip. */ 5934 BUMP_MIB(&udp_mib, udpOutDatagrams); 5935 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 5936 "udp_wput_end: q %p (%S)", q, "end"); 5937 5938 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 5939 CONN_OUTBOUND_POLICY_PRESENT(connp) || 5940 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 5941 connp->conn_nofailover_ill != NULL || 5942 connp->conn_outgoing_ill != NULL || 5943 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 5944 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 5945 UDP_STAT(udp_ip_send); 5946 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 5947 } else { 5948 udp_send_data(udp, connp->conn_wq, mp1, ipha); 5949 } 5950 5951 done: 5952 if (*error != 0) { 5953 ASSERT(mp != NULL); 5954 BUMP_MIB(&udp_mib, udpOutErrors); 5955 } 5956 return (mp); 5957 } 5958 5959 static void 5960 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 5961 { 5962 conn_t *connp = udp->udp_connp; 5963 ipaddr_t src, dst; 5964 ill_t *ill; 5965 ire_t *ire; 5966 ipif_t *ipif = NULL; 5967 mblk_t *ire_fp_mp; 5968 uint_t ire_fp_mp_len; 5969 uint16_t *up; 5970 uint32_t cksum, hcksum_txflags; 5971 queue_t *dev_q; 5972 boolean_t retry_caching; 5973 5974 dst = ipha->ipha_dst; 5975 src = ipha->ipha_src; 5976 ASSERT(ipha->ipha_ident == 0); 5977 5978 if (CLASSD(dst)) { 5979 int err; 5980 5981 ipif = conn_get_held_ipif(connp, 5982 &connp->conn_multicast_ipif, &err); 5983 5984 if (ipif == NULL || ipif->ipif_isv6 || 5985 (ipif->ipif_ill->ill_phyint->phyint_flags & 5986 PHYI_LOOPBACK)) { 5987 if (ipif != NULL) 5988 ipif_refrele(ipif); 5989 UDP_STAT(udp_ip_send); 5990 ip_output(connp, mp, q, IP_WPUT); 5991 return; 5992 } 5993 } 5994 5995 retry_caching = B_FALSE; 5996 mutex_enter(&connp->conn_lock); 5997 ire = connp->conn_ire_cache; 5998 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 5999 6000 if (ire == NULL || ire->ire_addr != dst || 6001 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6002 retry_caching = B_TRUE; 6003 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6004 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6005 6006 ASSERT(ipif != NULL); 6007 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6008 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6009 retry_caching = B_TRUE; 6010 } 6011 6012 if (!retry_caching) { 6013 ASSERT(ire != NULL); 6014 IRE_REFHOLD(ire); 6015 mutex_exit(&connp->conn_lock); 6016 } else { 6017 boolean_t cached = B_FALSE; 6018 6019 connp->conn_ire_cache = NULL; 6020 mutex_exit(&connp->conn_lock); 6021 6022 /* Release the old ire */ 6023 if (ire != NULL) { 6024 IRE_REFRELE_NOTR(ire); 6025 ire = NULL; 6026 } 6027 6028 if (CLASSD(dst)) { 6029 ASSERT(ipif != NULL); 6030 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6031 connp->conn_zoneid, MATCH_IRE_ILL_GROUP); 6032 } else { 6033 ASSERT(ipif == NULL); 6034 ire = ire_cache_lookup(dst, connp->conn_zoneid); 6035 } 6036 6037 if (ire == NULL) { 6038 if (ipif != NULL) 6039 ipif_refrele(ipif); 6040 UDP_STAT(udp_ire_null); 6041 ip_output(connp, mp, q, IP_WPUT); 6042 return; 6043 } 6044 IRE_REFHOLD_NOTR(ire); 6045 6046 mutex_enter(&connp->conn_lock); 6047 if (!(connp->conn_state_flags & CONN_CLOSING) && 6048 connp->conn_ire_cache == NULL) { 6049 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6050 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6051 connp->conn_ire_cache = ire; 6052 cached = B_TRUE; 6053 } 6054 rw_exit(&ire->ire_bucket->irb_lock); 6055 } 6056 mutex_exit(&connp->conn_lock); 6057 6058 /* 6059 * We can continue to use the ire but since it was not 6060 * cached, we should drop the extra reference. 6061 */ 6062 if (!cached) 6063 IRE_REFRELE_NOTR(ire); 6064 } 6065 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6066 ASSERT(!CLASSD(dst) || ipif != NULL); 6067 6068 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6069 (ire->ire_flags & RTF_MULTIRT) || ire->ire_stq == NULL || 6070 ire->ire_max_frag < ntohs(ipha->ipha_length) || 6071 (ire_fp_mp = ire->ire_fp_mp) == NULL || 6072 (connp->conn_nexthop_set) || 6073 (ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp)) { 6074 if (ipif != NULL) 6075 ipif_refrele(ipif); 6076 UDP_STAT(udp_ip_ire_send); 6077 IRE_REFRELE(ire); 6078 ip_output(connp, mp, q, IP_WPUT); 6079 return; 6080 } 6081 6082 BUMP_MIB(&ip_mib, ipOutRequests); 6083 6084 ill = ire_to_ill(ire); 6085 ASSERT(ill != NULL); 6086 6087 dev_q = ire->ire_stq->q_next; 6088 ASSERT(dev_q != NULL); 6089 /* 6090 * If the service thread is already running, or if the driver 6091 * queue is currently flow-controlled, queue this packet. 6092 */ 6093 if ((q->q_first != NULL || connp->conn_draining) || 6094 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6095 if (ip_output_queue) { 6096 (void) putq(q, mp); 6097 } else { 6098 BUMP_MIB(&ip_mib, ipOutDiscards); 6099 freemsg(mp); 6100 } 6101 if (ipif != NULL) 6102 ipif_refrele(ipif); 6103 IRE_REFRELE(ire); 6104 return; 6105 } 6106 6107 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6108 #ifndef _BIG_ENDIAN 6109 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6110 #endif 6111 6112 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6113 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6114 src = ipha->ipha_src = ipif->ipif_src_addr; 6115 else 6116 src = ipha->ipha_src = ire->ire_src_addr; 6117 } 6118 6119 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6120 ASSERT(ill->ill_hcksum_capab != NULL); 6121 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6122 } else { 6123 hcksum_txflags = 0; 6124 } 6125 6126 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6127 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6128 6129 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6130 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6131 if (*up != 0) { 6132 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6133 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6134 ntohs(ipha->ipha_length), cksum); 6135 6136 /* Software checksum? */ 6137 if (DB_CKSUMFLAGS(mp) == 0) { 6138 UDP_STAT(udp_out_sw_cksum); 6139 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6140 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6141 } 6142 } 6143 6144 ipha->ipha_fragment_offset_and_flags |= 6145 (uint32_t)htons(ire->ire_frag_flag); 6146 6147 /* Calculate IP header checksum if hardware isn't capable */ 6148 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6149 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6150 ((uint16_t *)ipha)[4]); 6151 } 6152 6153 if (CLASSD(dst)) { 6154 ilm_t *ilm; 6155 6156 ILM_WALKER_HOLD(ill); 6157 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6158 ILM_WALKER_RELE(ill); 6159 if (ilm != NULL) { 6160 ip_multicast_loopback(q, ill, mp, 6161 connp->conn_multicast_loop ? 0 : 6162 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6163 } 6164 6165 /* If multicast TTL is 0 then we are done */ 6166 if (ipha->ipha_ttl == 0) { 6167 if (ipif != NULL) 6168 ipif_refrele(ipif); 6169 freemsg(mp); 6170 IRE_REFRELE(ire); 6171 return; 6172 } 6173 } 6174 6175 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6176 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6177 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6178 6179 UPDATE_OB_PKT_COUNT(ire); 6180 ire->ire_last_used_time = lbolt; 6181 6182 if (ILL_DLS_CAPABLE(ill)) { 6183 /* 6184 * Send the packet directly to DLD, where it may be queued 6185 * depending on the availability of transmit resources at 6186 * the media layer. 6187 */ 6188 IP_DLS_ILL_TX(ill, mp); 6189 } else { 6190 putnext(ire->ire_stq, mp); 6191 } 6192 6193 if (ipif != NULL) 6194 ipif_refrele(ipif); 6195 IRE_REFRELE(ire); 6196 } 6197 6198 /* 6199 * This routine handles all messages passed downstream. It either 6200 * consumes the message or passes it downstream; it never queues a 6201 * a message. 6202 */ 6203 static void 6204 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6205 { 6206 sin6_t *sin6; 6207 sin_t *sin; 6208 ipaddr_t v4dst; 6209 uint16_t port; 6210 uint_t srcid; 6211 queue_t *q = connp->conn_wq; 6212 udp_t *udp = connp->conn_udp; 6213 t_scalar_t optlen; 6214 int error = 0; 6215 struct sockaddr_storage ss; 6216 6217 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6218 "udp_wput_start: connp %p mp %p", connp, mp); 6219 6220 /* 6221 * We directly handle several cases here: T_UNITDATA_REQ message 6222 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6223 * connected and non-connected socket. The latter carries the 6224 * address structure along when this routine gets called. 6225 */ 6226 switch (DB_TYPE(mp)) { 6227 case M_DATA: 6228 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6229 if (!udp->udp_direct_sockfs || 6230 addr == NULL || addrlen == 0) { 6231 /* Not connected; address is required */ 6232 BUMP_MIB(&udp_mib, udpOutErrors); 6233 UDP_STAT(udp_out_err_notconn); 6234 freemsg(mp); 6235 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6236 "udp_wput_end: connp %p (%S)", connp, 6237 "not-connected; address required"); 6238 return; 6239 } 6240 ASSERT(udp->udp_issocket); 6241 UDP_DBGSTAT(udp_data_notconn); 6242 /* Not connected; do some more checks below */ 6243 optlen = 0; 6244 break; 6245 } 6246 /* M_DATA for connected socket */ 6247 UDP_DBGSTAT(udp_data_conn); 6248 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6249 6250 /* Initialize addr and addrlen as if they're passed in */ 6251 if (udp->udp_family == AF_INET) { 6252 sin = (sin_t *)&ss; 6253 sin->sin_family = AF_INET; 6254 sin->sin_port = udp->udp_dstport; 6255 sin->sin_addr.s_addr = v4dst; 6256 addr = (struct sockaddr *)sin; 6257 addrlen = sizeof (*sin); 6258 } else { 6259 sin6 = (sin6_t *)&ss; 6260 sin6->sin6_family = AF_INET6; 6261 sin6->sin6_port = udp->udp_dstport; 6262 sin6->sin6_flowinfo = udp->udp_flowinfo; 6263 sin6->sin6_addr = udp->udp_v6dst; 6264 sin6->sin6_scope_id = 0; 6265 sin6->__sin6_src_id = 0; 6266 addr = (struct sockaddr *)sin6; 6267 addrlen = sizeof (*sin6); 6268 } 6269 6270 if (udp->udp_family == AF_INET || 6271 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6272 /* 6273 * Handle both AF_INET and AF_INET6; the latter 6274 * for IPV4 mapped destination addresses. Note 6275 * here that both addr and addrlen point to the 6276 * corresponding struct depending on the address 6277 * family of the socket. 6278 */ 6279 mp = udp_output_v4(connp, mp, v4dst, 6280 udp->udp_dstport, 0, &error); 6281 } else { 6282 mp = udp_output_v6(connp, mp, sin6, 0, &error); 6283 } 6284 if (error != 0) { 6285 ASSERT(addr != NULL && addrlen != 0); 6286 goto ud_error; 6287 } 6288 return; 6289 case M_PROTO: 6290 case M_PCPROTO: { 6291 struct T_unitdata_req *tudr; 6292 6293 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6294 tudr = (struct T_unitdata_req *)mp->b_rptr; 6295 6296 /* Handle valid T_UNITDATA_REQ here */ 6297 if (MBLKL(mp) >= sizeof (*tudr) && 6298 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6299 if (mp->b_cont == NULL) { 6300 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6301 "udp_wput_end: q %p (%S)", q, "badaddr"); 6302 error = EPROTO; 6303 goto ud_error; 6304 } 6305 6306 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6307 tudr->DEST_length)) { 6308 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6309 "udp_wput_end: q %p (%S)", q, "badaddr"); 6310 error = EADDRNOTAVAIL; 6311 goto ud_error; 6312 } 6313 /* 6314 * If a port has not been bound to the stream, fail. 6315 * This is not a problem when sockfs is directly 6316 * above us, because it will ensure that the socket 6317 * is first bound before allowing data to be sent. 6318 */ 6319 if (udp->udp_state == TS_UNBND) { 6320 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6321 "udp_wput_end: q %p (%S)", q, "outstate"); 6322 error = EPROTO; 6323 goto ud_error; 6324 } 6325 addr = (struct sockaddr *) 6326 &mp->b_rptr[tudr->DEST_offset]; 6327 addrlen = tudr->DEST_length; 6328 optlen = tudr->OPT_length; 6329 if (optlen != 0) 6330 UDP_STAT(udp_out_opt); 6331 break; 6332 } 6333 /* FALLTHRU */ 6334 } 6335 default: 6336 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6337 SQTAG_UDP_OUTPUT); 6338 return; 6339 } 6340 ASSERT(addr != NULL); 6341 6342 switch (udp->udp_family) { 6343 case AF_INET6: 6344 sin6 = (sin6_t *)addr; 6345 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6346 sin6->sin6_family != AF_INET6) { 6347 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6348 "udp_wput_end: q %p (%S)", q, "badaddr"); 6349 error = EADDRNOTAVAIL; 6350 goto ud_error; 6351 } 6352 6353 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6354 /* 6355 * Destination is a non-IPv4-compatible IPv6 address. 6356 * Send out an IPv6 format packet. 6357 */ 6358 mp = udp_output_v6(connp, mp, sin6, optlen, &error); 6359 if (error != 0) 6360 goto ud_error; 6361 6362 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6363 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6364 return; 6365 } 6366 /* 6367 * If the local address is not zero or a mapped address 6368 * return an error. It would be possible to send an IPv4 6369 * packet but the response would never make it back to the 6370 * application since it is bound to a non-mapped address. 6371 */ 6372 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6373 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6374 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6375 "udp_wput_end: q %p (%S)", q, "badaddr"); 6376 error = EADDRNOTAVAIL; 6377 goto ud_error; 6378 } 6379 /* Send IPv4 packet without modifying udp_ipversion */ 6380 /* Extract port and ipaddr */ 6381 port = sin6->sin6_port; 6382 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6383 srcid = sin6->__sin6_src_id; 6384 break; 6385 6386 case AF_INET: 6387 sin = (sin_t *)addr; 6388 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6389 sin->sin_family != AF_INET) { 6390 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6391 "udp_wput_end: q %p (%S)", q, "badaddr"); 6392 error = EADDRNOTAVAIL; 6393 goto ud_error; 6394 } 6395 /* Extract port and ipaddr */ 6396 port = sin->sin_port; 6397 v4dst = sin->sin_addr.s_addr; 6398 srcid = 0; 6399 break; 6400 } 6401 6402 /* 6403 * If options passed in, feed it for verification and handling 6404 */ 6405 if (optlen != 0) { 6406 ASSERT(DB_TYPE(mp) != M_DATA); 6407 if (udp_unitdata_opt_process(q, mp, &error, NULL) < 0) { 6408 /* failure */ 6409 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6410 "udp_wput_end: q %p (%S)", q, 6411 "udp_unitdata_opt_process"); 6412 goto ud_error; 6413 } 6414 /* 6415 * Note: success in processing options. 6416 * mp option buffer represented by 6417 * OPT_length/offset now potentially modified 6418 * and contain option setting results 6419 */ 6420 } 6421 ASSERT(error == 0); 6422 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 6423 if (error != 0) { 6424 ud_error: 6425 UDP_STAT(udp_out_err_output); 6426 ASSERT(mp != NULL); 6427 /* mp is freed by the following routine */ 6428 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 6429 (t_scalar_t)error); 6430 } 6431 } 6432 6433 /* ARGSUSED */ 6434 static void 6435 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 6436 { 6437 udp_output((conn_t *)arg, mp, NULL, 0); 6438 _UDP_EXIT((conn_t *)arg); 6439 } 6440 6441 static void 6442 udp_wput(queue_t *q, mblk_t *mp) 6443 { 6444 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 6445 SQTAG_UDP_WPUT); 6446 } 6447 6448 /* 6449 * Allocate and prepare a T_UNITDATA_REQ message. 6450 */ 6451 static mblk_t * 6452 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 6453 { 6454 struct T_unitdata_req *tudr; 6455 mblk_t *mp; 6456 6457 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 6458 if (mp != NULL) { 6459 mp->b_wptr += sizeof (*tudr) + addrlen; 6460 DB_TYPE(mp) = M_PROTO; 6461 6462 tudr = (struct T_unitdata_req *)mp->b_rptr; 6463 tudr->PRIM_type = T_UNITDATA_REQ; 6464 tudr->DEST_length = addrlen; 6465 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 6466 tudr->OPT_length = 0; 6467 tudr->OPT_offset = 0; 6468 bcopy(addr, tudr+1, addrlen); 6469 } 6470 return (mp); 6471 } 6472 6473 /* 6474 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 6475 * is valid when we are directly beneath the stream head, and thus sockfs 6476 * is able to bypass STREAMS and directly call us, passing along the sockaddr 6477 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 6478 * this is done for both connected and non-connected endpoint. 6479 */ 6480 void 6481 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6482 { 6483 conn_t *connp; 6484 udp_t *udp; 6485 6486 q = UDP_WR(q); 6487 connp = Q_TO_CONN(q); 6488 udp = connp->conn_udp; 6489 6490 /* udpsockfs should only send down M_DATA for this entry point */ 6491 ASSERT(DB_TYPE(mp) == M_DATA); 6492 6493 mutex_enter(&connp->conn_lock); 6494 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 6495 6496 if (udp->udp_mode != UDP_MT_HOT) { 6497 /* 6498 * We can't enter this conn right away because another 6499 * thread is currently executing as writer; therefore we 6500 * need to deposit the message into the squeue to be 6501 * drained later. If a socket address is present, we 6502 * need to create a T_UNITDATA_REQ message as placeholder. 6503 */ 6504 if (addr != NULL && addrlen != 0) { 6505 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 6506 6507 if (tudr_mp == NULL) { 6508 mutex_exit(&connp->conn_lock); 6509 BUMP_MIB(&udp_mib, udpOutErrors); 6510 UDP_STAT(udp_out_err_tudr); 6511 freemsg(mp); 6512 return; 6513 } 6514 /* Tag the packet with T_UNITDATA_REQ */ 6515 tudr_mp->b_cont = mp; 6516 mp = tudr_mp; 6517 } 6518 mutex_exit(&connp->conn_lock); 6519 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 6520 return; 6521 } 6522 6523 /* We can execute as reader right away. */ 6524 UDP_READERS_INCREF(udp); 6525 mutex_exit(&connp->conn_lock); 6526 6527 udp_output(connp, mp, addr, addrlen); 6528 6529 mutex_enter(&connp->conn_lock); 6530 UDP_MODE_ASSERTIONS(udp, UDP_EXIT); 6531 UDP_READERS_DECREF(udp); 6532 mutex_exit(&connp->conn_lock); 6533 } 6534 6535 /* 6536 * udp_output_v6(): 6537 * Assumes that udp_wput did some sanity checking on the destination 6538 * address. 6539 */ 6540 static mblk_t * 6541 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, t_scalar_t tudr_optlen, 6542 int *error) 6543 { 6544 ip6_t *ip6h; 6545 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 6546 mblk_t *mp1 = (DB_TYPE(mp) == M_DATA ? mp : mp->b_cont); 6547 mblk_t *mp2; 6548 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 6549 size_t ip_len; 6550 udpha_t *udph; 6551 udp_t *udp = connp->conn_udp; 6552 queue_t *q = connp->conn_wq; 6553 ip6_pkt_t ipp_s; /* For ancillary data options */ 6554 ip6_pkt_t *ipp = &ipp_s; 6555 ip6_pkt_t *tipp; /* temporary ipp */ 6556 uint32_t csum = 0; 6557 uint_t ignore = 0; 6558 uint_t option_exists = 0, is_sticky = 0; 6559 uint8_t *cp; 6560 uint8_t *nxthdr_ptr; 6561 6562 *error = 0; 6563 6564 /* mp1 points to the M_DATA mblk carrying the packet */ 6565 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6566 ASSERT(tudr_optlen == 0 || DB_TYPE(mp) != M_DATA); 6567 6568 /* 6569 * If the local address is a mapped address return 6570 * an error. 6571 * It would be possible to send an IPv6 packet but the 6572 * response would never make it back to the application 6573 * since it is bound to a mapped address. 6574 */ 6575 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 6576 *error = EADDRNOTAVAIL; 6577 goto done; 6578 } 6579 6580 ipp->ipp_fields = 0; 6581 ipp->ipp_sticky_ignored = 0; 6582 6583 /* 6584 * If TPI options passed in, feed it for verification and handling 6585 */ 6586 if (tudr_optlen != 0) { 6587 if (udp_unitdata_opt_process(q, mp, error, (void *)ipp) < 0) { 6588 /* failure */ 6589 goto done; 6590 } 6591 ignore = ipp->ipp_sticky_ignored; 6592 ASSERT(*error == 0); 6593 } 6594 6595 if (sin6->sin6_scope_id != 0 && 6596 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 6597 /* 6598 * IPPF_SCOPE_ID is special. It's neither a sticky 6599 * option nor ancillary data. It needs to be 6600 * explicitly set in options_exists. 6601 */ 6602 option_exists |= IPPF_SCOPE_ID; 6603 } 6604 6605 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 6606 /* No sticky options nor ancillary data. */ 6607 goto no_options; 6608 } 6609 6610 /* 6611 * Go through the options figuring out where each is going to 6612 * come from and build two masks. The first mask indicates if 6613 * the option exists at all. The second mask indicates if the 6614 * option is sticky or ancillary. 6615 */ 6616 if (!(ignore & IPPF_HOPOPTS)) { 6617 if (ipp->ipp_fields & IPPF_HOPOPTS) { 6618 option_exists |= IPPF_HOPOPTS; 6619 udp_ip_hdr_len += ipp->ipp_hopoptslen; 6620 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 6621 option_exists |= IPPF_HOPOPTS; 6622 is_sticky |= IPPF_HOPOPTS; 6623 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_hopoptslen; 6624 } 6625 } 6626 6627 if (!(ignore & IPPF_RTHDR)) { 6628 if (ipp->ipp_fields & IPPF_RTHDR) { 6629 option_exists |= IPPF_RTHDR; 6630 udp_ip_hdr_len += ipp->ipp_rthdrlen; 6631 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 6632 option_exists |= IPPF_RTHDR; 6633 is_sticky |= IPPF_RTHDR; 6634 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 6635 } 6636 } 6637 6638 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 6639 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 6640 option_exists |= IPPF_RTDSTOPTS; 6641 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 6642 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 6643 option_exists |= IPPF_RTDSTOPTS; 6644 is_sticky |= IPPF_RTDSTOPTS; 6645 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 6646 } 6647 } 6648 6649 if (!(ignore & IPPF_DSTOPTS)) { 6650 if (ipp->ipp_fields & IPPF_DSTOPTS) { 6651 option_exists |= IPPF_DSTOPTS; 6652 udp_ip_hdr_len += ipp->ipp_dstoptslen; 6653 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 6654 option_exists |= IPPF_DSTOPTS; 6655 is_sticky |= IPPF_DSTOPTS; 6656 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 6657 } 6658 } 6659 6660 if (!(ignore & IPPF_IFINDEX)) { 6661 if (ipp->ipp_fields & IPPF_IFINDEX) { 6662 option_exists |= IPPF_IFINDEX; 6663 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 6664 option_exists |= IPPF_IFINDEX; 6665 is_sticky |= IPPF_IFINDEX; 6666 } 6667 } 6668 6669 if (!(ignore & IPPF_ADDR)) { 6670 if (ipp->ipp_fields & IPPF_ADDR) { 6671 option_exists |= IPPF_ADDR; 6672 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 6673 option_exists |= IPPF_ADDR; 6674 is_sticky |= IPPF_ADDR; 6675 } 6676 } 6677 6678 if (!(ignore & IPPF_DONTFRAG)) { 6679 if (ipp->ipp_fields & IPPF_DONTFRAG) { 6680 option_exists |= IPPF_DONTFRAG; 6681 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 6682 option_exists |= IPPF_DONTFRAG; 6683 is_sticky |= IPPF_DONTFRAG; 6684 } 6685 } 6686 6687 if (!(ignore & IPPF_USE_MIN_MTU)) { 6688 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 6689 option_exists |= IPPF_USE_MIN_MTU; 6690 } else if (udp->udp_sticky_ipp.ipp_fields & 6691 IPPF_USE_MIN_MTU) { 6692 option_exists |= IPPF_USE_MIN_MTU; 6693 is_sticky |= IPPF_USE_MIN_MTU; 6694 } 6695 } 6696 6697 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 6698 option_exists |= IPPF_HOPLIMIT; 6699 /* IPV6_HOPLIMIT can never be sticky */ 6700 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 6701 6702 if (!(ignore & IPPF_UNICAST_HOPS) && 6703 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 6704 option_exists |= IPPF_UNICAST_HOPS; 6705 is_sticky |= IPPF_UNICAST_HOPS; 6706 } 6707 6708 if (!(ignore & IPPF_MULTICAST_HOPS) && 6709 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 6710 option_exists |= IPPF_MULTICAST_HOPS; 6711 is_sticky |= IPPF_MULTICAST_HOPS; 6712 } 6713 6714 if (!(ignore & IPPF_TCLASS)) { 6715 if (ipp->ipp_fields & IPPF_TCLASS) { 6716 option_exists |= IPPF_TCLASS; 6717 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 6718 option_exists |= IPPF_TCLASS; 6719 is_sticky |= IPPF_TCLASS; 6720 } 6721 } 6722 6723 no_options: 6724 6725 /* 6726 * If any options carried in the ip6i_t were specified, we 6727 * need to account for the ip6i_t in the data we'll be sending 6728 * down. 6729 */ 6730 if (option_exists & IPPF_HAS_IP6I) 6731 udp_ip_hdr_len += sizeof (ip6i_t); 6732 6733 /* check/fix buffer config, setup pointers into it */ 6734 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 6735 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 6736 !OK_32PTR(ip6h)) { 6737 /* Try to get everything in a single mblk next time */ 6738 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 6739 udp->udp_max_hdr_len = udp_ip_hdr_len; 6740 (void) mi_set_sth_wroff(UDP_RD(q), 6741 udp->udp_max_hdr_len + udp_wroff_extra); 6742 } 6743 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 6744 if (mp2 == NULL) { 6745 *error = ENOMEM; 6746 goto done; 6747 } 6748 mp2->b_wptr = DB_LIM(mp2); 6749 mp2->b_cont = mp1; 6750 mp1 = mp2; 6751 if (DB_TYPE(mp) != M_DATA) 6752 mp->b_cont = mp1; 6753 else 6754 mp = mp1; 6755 6756 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 6757 } 6758 mp1->b_rptr = (unsigned char *)ip6h; 6759 ip6i = (ip6i_t *)ip6h; 6760 6761 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 6762 if (option_exists & IPPF_HAS_IP6I) { 6763 ip6h = (ip6_t *)&ip6i[1]; 6764 ip6i->ip6i_flags = 0; 6765 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6766 6767 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 6768 if (option_exists & IPPF_SCOPE_ID) { 6769 ip6i->ip6i_flags |= IP6I_IFINDEX; 6770 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 6771 } else if (option_exists & IPPF_IFINDEX) { 6772 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 6773 ASSERT(tipp->ipp_ifindex != 0); 6774 ip6i->ip6i_flags |= IP6I_IFINDEX; 6775 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 6776 } 6777 6778 if (option_exists & IPPF_ADDR) { 6779 /* 6780 * Enable per-packet source address verification if 6781 * IPV6_PKTINFO specified the source address. 6782 * ip6_src is set in the transport's _wput function. 6783 */ 6784 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 6785 } 6786 6787 if (option_exists & IPPF_DONTFRAG) { 6788 ip6i->ip6i_flags |= IP6I_DONTFRAG; 6789 } 6790 6791 if (option_exists & IPPF_USE_MIN_MTU) { 6792 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 6793 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 6794 } 6795 6796 if (option_exists & IPPF_NEXTHOP) { 6797 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 6798 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 6799 ip6i->ip6i_flags |= IP6I_NEXTHOP; 6800 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 6801 } 6802 6803 /* 6804 * tell IP this is an ip6i_t private header 6805 */ 6806 ip6i->ip6i_nxt = IPPROTO_RAW; 6807 } 6808 6809 /* Initialize IPv6 header */ 6810 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 6811 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 6812 6813 /* Set the hoplimit of the outgoing packet. */ 6814 if (option_exists & IPPF_HOPLIMIT) { 6815 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 6816 ip6h->ip6_hops = ipp->ipp_hoplimit; 6817 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6818 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 6819 ip6h->ip6_hops = udp->udp_multicast_ttl; 6820 if (option_exists & IPPF_MULTICAST_HOPS) 6821 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6822 } else { 6823 ip6h->ip6_hops = udp->udp_ttl; 6824 if (option_exists & IPPF_UNICAST_HOPS) 6825 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 6826 } 6827 6828 if (option_exists & IPPF_ADDR) { 6829 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 6830 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 6831 ip6h->ip6_src = tipp->ipp_addr; 6832 } else { 6833 /* 6834 * The source address was not set using IPV6_PKTINFO. 6835 * First look at the bound source. 6836 * If unspecified fallback to __sin6_src_id. 6837 */ 6838 ip6h->ip6_src = udp->udp_v6src; 6839 if (sin6->__sin6_src_id != 0 && 6840 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 6841 ip_srcid_find_id(sin6->__sin6_src_id, 6842 &ip6h->ip6_src, connp->conn_zoneid); 6843 } 6844 } 6845 6846 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 6847 cp = (uint8_t *)&ip6h[1]; 6848 6849 /* 6850 * Here's where we have to start stringing together 6851 * any extension headers in the right order: 6852 * Hop-by-hop, destination, routing, and final destination opts. 6853 */ 6854 if (option_exists & IPPF_HOPOPTS) { 6855 /* Hop-by-hop options */ 6856 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 6857 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 6858 6859 *nxthdr_ptr = IPPROTO_HOPOPTS; 6860 nxthdr_ptr = &hbh->ip6h_nxt; 6861 6862 bcopy(tipp->ipp_hopopts, cp, tipp->ipp_hopoptslen); 6863 cp += tipp->ipp_hopoptslen; 6864 } 6865 /* 6866 * En-route destination options 6867 * Only do them if there's a routing header as well 6868 */ 6869 if (option_exists & IPPF_RTDSTOPTS) { 6870 ip6_dest_t *dst = (ip6_dest_t *)cp; 6871 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 6872 6873 *nxthdr_ptr = IPPROTO_DSTOPTS; 6874 nxthdr_ptr = &dst->ip6d_nxt; 6875 6876 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 6877 cp += tipp->ipp_rtdstoptslen; 6878 } 6879 /* 6880 * Routing header next 6881 */ 6882 if (option_exists & IPPF_RTHDR) { 6883 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 6884 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 6885 6886 *nxthdr_ptr = IPPROTO_ROUTING; 6887 nxthdr_ptr = &rt->ip6r_nxt; 6888 6889 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 6890 cp += tipp->ipp_rthdrlen; 6891 } 6892 /* 6893 * Do ultimate destination options 6894 */ 6895 if (option_exists & IPPF_DSTOPTS) { 6896 ip6_dest_t *dest = (ip6_dest_t *)cp; 6897 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 6898 6899 *nxthdr_ptr = IPPROTO_DSTOPTS; 6900 nxthdr_ptr = &dest->ip6d_nxt; 6901 6902 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 6903 cp += tipp->ipp_dstoptslen; 6904 } 6905 /* 6906 * Now set the last header pointer to the proto passed in 6907 */ 6908 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 6909 *nxthdr_ptr = IPPROTO_UDP; 6910 6911 /* Update UDP header */ 6912 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 6913 udph->uha_dst_port = sin6->sin6_port; 6914 udph->uha_src_port = udp->udp_port; 6915 6916 /* 6917 * Copy in the destination address 6918 */ 6919 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 6920 ip6h->ip6_dst = ipv6_loopback; 6921 else 6922 ip6h->ip6_dst = sin6->sin6_addr; 6923 6924 ip6h->ip6_vcf = 6925 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 6926 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 6927 6928 if (option_exists & IPPF_TCLASS) { 6929 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 6930 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 6931 tipp->ipp_tclass); 6932 } 6933 6934 if (option_exists & IPPF_RTHDR) { 6935 ip6_rthdr_t *rth; 6936 6937 /* 6938 * Perform any processing needed for source routing. 6939 * We know that all extension headers will be in the same mblk 6940 * as the IPv6 header. 6941 */ 6942 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 6943 if (rth != NULL && rth->ip6r_segleft != 0) { 6944 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 6945 /* 6946 * Drop packet - only support Type 0 routing. 6947 * Notify the application as well. 6948 */ 6949 *error = EPROTO; 6950 goto done; 6951 } 6952 6953 /* 6954 * rth->ip6r_len is twice the number of 6955 * addresses in the header. Thus it must be even. 6956 */ 6957 if (rth->ip6r_len & 0x1) { 6958 *error = EPROTO; 6959 goto done; 6960 } 6961 /* 6962 * Shuffle the routing header and ip6_dst 6963 * addresses, and get the checksum difference 6964 * between the first hop (in ip6_dst) and 6965 * the destination (in the last routing hdr entry). 6966 */ 6967 csum = ip_massage_options_v6(ip6h, rth); 6968 /* 6969 * Verify that the first hop isn't a mapped address. 6970 * Routers along the path need to do this verification 6971 * for subsequent hops. 6972 */ 6973 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 6974 *error = EADDRNOTAVAIL; 6975 goto done; 6976 } 6977 6978 cp += (rth->ip6r_len + 1)*8; 6979 } 6980 } 6981 6982 /* count up length of UDP packet */ 6983 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 6984 if ((mp2 = mp1->b_cont) != NULL) { 6985 do { 6986 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6987 ip_len += (uint32_t)MBLKL(mp2); 6988 } while ((mp2 = mp2->b_cont) != NULL); 6989 } 6990 6991 /* 6992 * If the size of the packet is greater than the maximum allowed by 6993 * ip, return an error. Passing this down could cause panics because 6994 * the size will have wrapped and be inconsistent with the msg size. 6995 */ 6996 if (ip_len > IP_MAXPACKET) { 6997 *error = EMSGSIZE; 6998 goto done; 6999 } 7000 7001 /* Store the UDP length. Subtract length of extension hdrs */ 7002 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7003 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7004 7005 /* 7006 * We make it easy for IP to include our pseudo header 7007 * by putting our length in uh_checksum, modified (if 7008 * we have a routing header) by the checksum difference 7009 * between the ultimate destination and first hop addresses. 7010 * Note: UDP over IPv6 must always checksum the packet. 7011 */ 7012 csum += udph->uha_length; 7013 csum = (csum & 0xFFFF) + (csum >> 16); 7014 udph->uha_checksum = (uint16_t)csum; 7015 7016 #ifdef _LITTLE_ENDIAN 7017 ip_len = htons(ip_len); 7018 #endif 7019 ip6h->ip6_plen = ip_len; 7020 7021 if (DB_TYPE(mp) != M_DATA) { 7022 ASSERT(mp != mp1); 7023 freeb(mp); 7024 } 7025 7026 /* mp has been consumed and we'll return success */ 7027 ASSERT(*error == 0); 7028 mp = NULL; 7029 7030 /* We're done. Pass the packet to IP */ 7031 BUMP_MIB(&udp_mib, udpOutDatagrams); 7032 ip_output_v6(connp, mp1, q, IP_WPUT); 7033 7034 done: 7035 if (*error != 0) { 7036 ASSERT(mp != NULL); 7037 BUMP_MIB(&udp_mib, udpOutErrors); 7038 } 7039 return (mp); 7040 } 7041 7042 static void 7043 udp_wput_other(queue_t *q, mblk_t *mp) 7044 { 7045 uchar_t *rptr = mp->b_rptr; 7046 struct datab *db; 7047 struct iocblk *iocp; 7048 cred_t *cr; 7049 conn_t *connp = Q_TO_CONN(q); 7050 udp_t *udp = connp->conn_udp; 7051 7052 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7053 "udp_wput_other_start: q %p", q); 7054 7055 db = mp->b_datap; 7056 7057 cr = DB_CREDDEF(mp, connp->conn_cred); 7058 7059 switch (db->db_type) { 7060 case M_PROTO: 7061 case M_PCPROTO: 7062 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7063 freemsg(mp); 7064 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7065 "udp_wput_other_end: q %p (%S)", 7066 q, "protoshort"); 7067 return; 7068 } 7069 switch (((t_primp_t)rptr)->type) { 7070 case T_ADDR_REQ: 7071 udp_addr_req(q, mp); 7072 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7073 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7074 return; 7075 case O_T_BIND_REQ: 7076 case T_BIND_REQ: 7077 udp_bind(q, mp); 7078 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7079 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7080 return; 7081 case T_CONN_REQ: 7082 udp_connect(q, mp); 7083 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7084 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7085 return; 7086 case T_CAPABILITY_REQ: 7087 udp_capability_req(q, mp); 7088 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7089 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7090 return; 7091 case T_INFO_REQ: 7092 udp_info_req(q, mp); 7093 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7094 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7095 return; 7096 case T_UNITDATA_REQ: 7097 /* 7098 * If a T_UNITDATA_REQ gets here, the address must 7099 * be bad. Valid T_UNITDATA_REQs are handled 7100 * in udp_wput. 7101 */ 7102 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7103 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7104 "udp_wput_other_end: q %p (%S)", 7105 q, "unitdatareq"); 7106 return; 7107 case T_UNBIND_REQ: 7108 udp_unbind(q, mp); 7109 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7110 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7111 return; 7112 case T_SVR4_OPTMGMT_REQ: 7113 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7114 /* 7115 * Use upper queue for option processing in 7116 * case the request is not handled at this 7117 * level and needs to be passed down to IP. 7118 */ 7119 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7120 mp, cr, &udp_opt_obj); 7121 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7122 "udp_wput_other_end: q %p (%S)", 7123 q, "optmgmtreq"); 7124 return; 7125 7126 case T_OPTMGMT_REQ: 7127 /* 7128 * Use upper queue for option processing in 7129 * case the request is not handled at this 7130 * level and needs to be passed down to IP. 7131 */ 7132 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7133 mp, cr, &udp_opt_obj); 7134 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7135 "udp_wput_other_end: q %p (%S)", 7136 q, "optmgmtreq"); 7137 return; 7138 7139 case T_DISCON_REQ: 7140 udp_disconnect(q, mp); 7141 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7142 "udp_wput_other_end: q %p (%S)", 7143 q, "disconreq"); 7144 return; 7145 7146 /* The following TPI message is not supported by udp. */ 7147 case O_T_CONN_RES: 7148 case T_CONN_RES: 7149 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7150 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7151 "udp_wput_other_end: q %p (%S)", 7152 q, "connres/disconreq"); 7153 return; 7154 7155 /* The following 3 TPI messages are illegal for udp. */ 7156 case T_DATA_REQ: 7157 case T_EXDATA_REQ: 7158 case T_ORDREL_REQ: 7159 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7160 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7161 "udp_wput_other_end: q %p (%S)", 7162 q, "data/exdata/ordrel"); 7163 return; 7164 default: 7165 break; 7166 } 7167 break; 7168 case M_FLUSH: 7169 if (*rptr & FLUSHW) 7170 flushq(q, FLUSHDATA); 7171 break; 7172 case M_IOCTL: 7173 iocp = (struct iocblk *)mp->b_rptr; 7174 switch (iocp->ioc_cmd) { 7175 case TI_GETPEERNAME: 7176 if (udp->udp_state != TS_DATA_XFER) { 7177 /* 7178 * If a default destination address has not 7179 * been associated with the stream, then we 7180 * don't know the peer's name. 7181 */ 7182 iocp->ioc_error = ENOTCONN; 7183 iocp->ioc_count = 0; 7184 mp->b_datap->db_type = M_IOCACK; 7185 putnext(UDP_RD(q), mp); 7186 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7187 "udp_wput_other_end: q %p (%S)", 7188 q, "getpeername"); 7189 return; 7190 } 7191 /* FALLTHRU */ 7192 case TI_GETMYNAME: { 7193 /* 7194 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7195 * need to copyin the user's strbuf structure. 7196 * Processing will continue in the M_IOCDATA case 7197 * below. 7198 */ 7199 mi_copyin(q, mp, NULL, 7200 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7201 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7202 "udp_wput_other_end: q %p (%S)", 7203 q, "getmyname"); 7204 return; 7205 } 7206 case ND_SET: 7207 /* nd_getset performs the necessary checking */ 7208 case ND_GET: 7209 if (nd_getset(q, udp_g_nd, mp)) { 7210 putnext(UDP_RD(q), mp); 7211 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7212 "udp_wput_other_end: q %p (%S)", 7213 q, "get"); 7214 return; 7215 } 7216 break; 7217 case _SIOCSOCKFALLBACK: 7218 /* 7219 * Either sockmod is about to be popped and the 7220 * socket would now be treated as a plain stream, 7221 * or a module is about to be pushed so we could 7222 * no longer use read-side synchronous stream. 7223 * Drain any queued data and disable direct sockfs 7224 * interface from now on. 7225 */ 7226 if (!udp->udp_issocket) { 7227 DB_TYPE(mp) = M_IOCNAK; 7228 iocp->ioc_error = EINVAL; 7229 } else { 7230 udp->udp_issocket = B_FALSE; 7231 if (udp->udp_direct_sockfs) { 7232 /* 7233 * Disable read-side synchronous 7234 * stream interface and drain any 7235 * queued data. 7236 */ 7237 udp_rcv_drain(UDP_RD(q), udp, 7238 B_FALSE); 7239 ASSERT(!udp->udp_direct_sockfs); 7240 UDP_STAT(udp_sock_fallback); 7241 } 7242 DB_TYPE(mp) = M_IOCACK; 7243 iocp->ioc_error = 0; 7244 } 7245 iocp->ioc_count = 0; 7246 iocp->ioc_rval = 0; 7247 putnext(UDP_RD(q), mp); 7248 return; 7249 default: 7250 break; 7251 } 7252 break; 7253 case M_IOCDATA: 7254 udp_wput_iocdata(q, mp); 7255 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7256 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7257 return; 7258 default: 7259 /* Unrecognized messages are passed through without change. */ 7260 break; 7261 } 7262 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7263 "udp_wput_other_end: q %p (%S)", q, "end"); 7264 ip_output(connp, mp, q, IP_WPUT); 7265 } 7266 7267 /* ARGSUSED */ 7268 static void 7269 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7270 { 7271 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7272 udp_exit((conn_t *)arg); 7273 } 7274 7275 /* 7276 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7277 * messages. 7278 */ 7279 static void 7280 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7281 { 7282 mblk_t *mp1; 7283 STRUCT_HANDLE(strbuf, sb); 7284 uint16_t port; 7285 in6_addr_t v6addr; 7286 ipaddr_t v4addr; 7287 uint32_t flowinfo = 0; 7288 int addrlen; 7289 udp_t *udp = Q_TO_UDP(q); 7290 7291 /* Make sure it is one of ours. */ 7292 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7293 case TI_GETMYNAME: 7294 case TI_GETPEERNAME: 7295 break; 7296 default: 7297 ip_output(Q_TO_CONN(q), mp, q, IP_WPUT); 7298 return; 7299 } 7300 7301 q = WR(UDP_RD(q)); 7302 switch (mi_copy_state(q, mp, &mp1)) { 7303 case -1: 7304 return; 7305 case MI_COPY_CASE(MI_COPY_IN, 1): 7306 break; 7307 case MI_COPY_CASE(MI_COPY_OUT, 1): 7308 /* 7309 * The address has been copied out, so now 7310 * copyout the strbuf. 7311 */ 7312 mi_copyout(q, mp); 7313 return; 7314 case MI_COPY_CASE(MI_COPY_OUT, 2): 7315 /* 7316 * The address and strbuf have been copied out. 7317 * We're done, so just acknowledge the original 7318 * M_IOCTL. 7319 */ 7320 mi_copy_done(q, mp, 0); 7321 return; 7322 default: 7323 /* 7324 * Something strange has happened, so acknowledge 7325 * the original M_IOCTL with an EPROTO error. 7326 */ 7327 mi_copy_done(q, mp, EPROTO); 7328 return; 7329 } 7330 7331 /* 7332 * Now we have the strbuf structure for TI_GETMYNAME 7333 * and TI_GETPEERNAME. Next we copyout the requested 7334 * address and then we'll copyout the strbuf. 7335 */ 7336 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 7337 (void *)mp1->b_rptr); 7338 if (udp->udp_family == AF_INET) 7339 addrlen = sizeof (sin_t); 7340 else 7341 addrlen = sizeof (sin6_t); 7342 7343 if (STRUCT_FGET(sb, maxlen) < addrlen) { 7344 mi_copy_done(q, mp, EINVAL); 7345 return; 7346 } 7347 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7348 case TI_GETMYNAME: 7349 if (udp->udp_family == AF_INET) { 7350 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7351 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 7352 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7353 v4addr = V4_PART_OF_V6(udp->udp_v6src); 7354 } else { 7355 /* 7356 * INADDR_ANY 7357 * udp_v6src is not set, we might be bound to 7358 * broadcast/multicast. Use udp_bound_v6src as 7359 * local address instead (that could 7360 * also still be INADDR_ANY) 7361 */ 7362 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 7363 } 7364 } else { 7365 /* udp->udp_family == AF_INET6 */ 7366 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 7367 v6addr = udp->udp_v6src; 7368 } else { 7369 /* 7370 * UNSPECIFIED 7371 * udp_v6src is not set, we might be bound to 7372 * broadcast/multicast. Use udp_bound_v6src as 7373 * local address instead (that could 7374 * also still be UNSPECIFIED) 7375 */ 7376 v6addr = udp->udp_bound_v6src; 7377 } 7378 } 7379 port = udp->udp_port; 7380 break; 7381 case TI_GETPEERNAME: 7382 if (udp->udp_state != TS_DATA_XFER) { 7383 mi_copy_done(q, mp, ENOTCONN); 7384 return; 7385 } 7386 if (udp->udp_family == AF_INET) { 7387 ASSERT(udp->udp_ipversion == IPV4_VERSION); 7388 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 7389 } else { 7390 /* udp->udp_family == AF_INET6) */ 7391 v6addr = udp->udp_v6dst; 7392 flowinfo = udp->udp_flowinfo; 7393 } 7394 port = udp->udp_dstport; 7395 break; 7396 default: 7397 mi_copy_done(q, mp, EPROTO); 7398 return; 7399 } 7400 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 7401 if (!mp1) 7402 return; 7403 7404 if (udp->udp_family == AF_INET) { 7405 sin_t *sin; 7406 7407 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 7408 sin = (sin_t *)mp1->b_rptr; 7409 mp1->b_wptr = (uchar_t *)&sin[1]; 7410 *sin = sin_null; 7411 sin->sin_family = AF_INET; 7412 sin->sin_addr.s_addr = v4addr; 7413 sin->sin_port = port; 7414 } else { 7415 /* udp->udp_family == AF_INET6 */ 7416 sin6_t *sin6; 7417 7418 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 7419 sin6 = (sin6_t *)mp1->b_rptr; 7420 mp1->b_wptr = (uchar_t *)&sin6[1]; 7421 *sin6 = sin6_null; 7422 sin6->sin6_family = AF_INET6; 7423 sin6->sin6_flowinfo = flowinfo; 7424 sin6->sin6_addr = v6addr; 7425 sin6->sin6_port = port; 7426 } 7427 /* Copy out the address */ 7428 mi_copyout(q, mp); 7429 } 7430 7431 7432 static int 7433 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 7434 void *thisdg_attrs) 7435 { 7436 struct T_unitdata_req *udreqp; 7437 int is_absreq_failure; 7438 cred_t *cr; 7439 conn_t *connp = Q_TO_CONN(q); 7440 7441 ASSERT(((t_primp_t)mp->b_rptr)->type); 7442 7443 cr = DB_CREDDEF(mp, connp->conn_cred); 7444 7445 udreqp = (struct T_unitdata_req *)mp->b_rptr; 7446 *errorp = 0; 7447 7448 /* 7449 * Use upper queue for option processing since the callback 7450 * routines expect to be called in UDP instance instead of IP. 7451 */ 7452 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 7453 udreqp->OPT_offset, cr, &udp_opt_obj, 7454 thisdg_attrs, &is_absreq_failure); 7455 7456 if (*errorp != 0) { 7457 /* 7458 * Note: No special action needed in this 7459 * module for "is_absreq_failure" 7460 */ 7461 return (-1); /* failure */ 7462 } 7463 ASSERT(is_absreq_failure == 0); 7464 return (0); /* success */ 7465 } 7466 7467 void 7468 udp_ddi_init(void) 7469 { 7470 int i; 7471 7472 UDP6_MAJ = ddi_name_to_major(UDP6); 7473 7474 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 7475 udp_opt_obj.odb_opt_arr_cnt); 7476 7477 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 7478 /* Not a power of two. Round up to nearest power of two */ 7479 for (i = 0; i < 31; i++) { 7480 if (udp_bind_fanout_size < (1 << i)) 7481 break; 7482 } 7483 udp_bind_fanout_size = 1 << i; 7484 } 7485 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 7486 sizeof (udp_fanout_t), KM_SLEEP); 7487 for (i = 0; i < udp_bind_fanout_size; i++) { 7488 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 7489 NULL); 7490 } 7491 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 7492 7493 udp_kstat_init(); 7494 7495 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 7496 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 7497 } 7498 7499 void 7500 udp_ddi_destroy(void) 7501 { 7502 int i; 7503 7504 nd_free(&udp_g_nd); 7505 7506 for (i = 0; i < udp_bind_fanout_size; i++) { 7507 mutex_destroy(&udp_bind_fanout[i].uf_lock); 7508 } 7509 7510 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 7511 sizeof (udp_fanout_t)); 7512 7513 udp_kstat_fini(); 7514 7515 kmem_cache_destroy(udp_cache); 7516 } 7517 7518 static void 7519 udp_kstat_init(void) 7520 { 7521 udp_named_kstat_t template = { 7522 { "inDatagrams", KSTAT_DATA_UINT32, 0 }, 7523 { "inErrors", KSTAT_DATA_UINT32, 0 }, 7524 { "outDatagrams", KSTAT_DATA_UINT32, 0 }, 7525 { "entrySize", KSTAT_DATA_INT32, 0 }, 7526 { "entry6Size", KSTAT_DATA_INT32, 0 }, 7527 { "outErrors", KSTAT_DATA_UINT32, 0 }, 7528 }; 7529 7530 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 7531 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 7532 7533 if (udp_mibkp == NULL) 7534 return; 7535 7536 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 7537 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 7538 7539 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 7540 7541 udp_mibkp->ks_update = udp_kstat_update; 7542 7543 kstat_install(udp_mibkp); 7544 7545 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 7546 "net", KSTAT_TYPE_NAMED, 7547 sizeof (udp_statistics) / sizeof (kstat_named_t), 7548 KSTAT_FLAG_VIRTUAL)) != NULL) { 7549 udp_ksp->ks_data = &udp_statistics; 7550 kstat_install(udp_ksp); 7551 } 7552 } 7553 7554 static void 7555 udp_kstat_fini(void) 7556 { 7557 if (udp_ksp != NULL) { 7558 kstat_delete(udp_ksp); 7559 udp_ksp = NULL; 7560 } 7561 if (udp_mibkp != NULL) { 7562 kstat_delete(udp_mibkp); 7563 udp_mibkp = NULL; 7564 } 7565 } 7566 7567 static int 7568 udp_kstat_update(kstat_t *kp, int rw) 7569 { 7570 udp_named_kstat_t *udpkp; 7571 7572 if ((kp == NULL) || (kp->ks_data == NULL)) 7573 return (EIO); 7574 7575 if (rw == KSTAT_WRITE) 7576 return (EACCES); 7577 7578 udpkp = (udp_named_kstat_t *)kp->ks_data; 7579 7580 udpkp->inDatagrams.value.ui32 = udp_mib.udpInDatagrams; 7581 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 7582 udpkp->outDatagrams.value.ui32 = udp_mib.udpOutDatagrams; 7583 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 7584 7585 return (0); 7586 } 7587 7588 /* ARGSUSED */ 7589 static void 7590 udp_rput(queue_t *q, mblk_t *mp) 7591 { 7592 /* 7593 * We get here whenever we do qreply() from IP, 7594 * i.e as part of handlings ioctls, etc. 7595 */ 7596 putnext(q, mp); 7597 } 7598 7599 /* 7600 * Read-side synchronous stream info entry point, called as a 7601 * result of handling certain STREAMS ioctl operations. 7602 */ 7603 static int 7604 udp_rinfop(queue_t *q, infod_t *dp) 7605 { 7606 mblk_t *mp; 7607 uint_t cmd = dp->d_cmd; 7608 int res = 0; 7609 int error = 0; 7610 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 7611 struct stdata *stp = STREAM(q); 7612 7613 mutex_enter(&udp->udp_drain_lock); 7614 /* If shutdown on read has happened, return nothing */ 7615 mutex_enter(&stp->sd_lock); 7616 if (stp->sd_flag & STREOF) { 7617 mutex_exit(&stp->sd_lock); 7618 goto done; 7619 } 7620 mutex_exit(&stp->sd_lock); 7621 7622 if ((mp = udp->udp_rcv_list_head) == NULL) 7623 goto done; 7624 7625 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 7626 7627 if (cmd & INFOD_COUNT) { 7628 /* 7629 * Return the number of messages. 7630 */ 7631 dp->d_count += udp->udp_rcv_msgcnt; 7632 res |= INFOD_COUNT; 7633 } 7634 if (cmd & INFOD_BYTES) { 7635 /* 7636 * Return size of all data messages. 7637 */ 7638 dp->d_bytes += udp->udp_rcv_cnt; 7639 res |= INFOD_BYTES; 7640 } 7641 if (cmd & INFOD_FIRSTBYTES) { 7642 /* 7643 * Return size of first data message. 7644 */ 7645 dp->d_bytes = msgdsize(mp); 7646 res |= INFOD_FIRSTBYTES; 7647 dp->d_cmd &= ~INFOD_FIRSTBYTES; 7648 } 7649 if (cmd & INFOD_COPYOUT) { 7650 mblk_t *mp1 = mp->b_cont; 7651 int n; 7652 /* 7653 * Return data contents of first message. 7654 */ 7655 ASSERT(DB_TYPE(mp1) == M_DATA); 7656 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 7657 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 7658 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 7659 UIO_READ, dp->d_uiop)) != 0) { 7660 goto done; 7661 } 7662 mp1 = mp1->b_cont; 7663 } 7664 res |= INFOD_COPYOUT; 7665 dp->d_cmd &= ~INFOD_COPYOUT; 7666 } 7667 done: 7668 mutex_exit(&udp->udp_drain_lock); 7669 7670 dp->d_res |= res; 7671 7672 return (error); 7673 } 7674 7675 /* 7676 * Read-side synchronous stream entry point. This is called as a result 7677 * of recv/read operation done at sockfs, and is guaranteed to execute 7678 * outside of the interrupt thread context. It returns a single datagram 7679 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 7680 */ 7681 static int 7682 udp_rrw(queue_t *q, struiod_t *dp) 7683 { 7684 mblk_t *mp; 7685 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 7686 7687 /* We should never get here when we're in SNMP mode */ 7688 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 7689 7690 /* 7691 * Dequeue datagram from the head of the list and return 7692 * it to caller; also ensure that RSLEEP sd_wakeq flag is 7693 * set/cleared depending on whether or not there's data 7694 * remaining in the list. 7695 */ 7696 mutex_enter(&udp->udp_drain_lock); 7697 if (!udp->udp_direct_sockfs) { 7698 mutex_exit(&udp->udp_drain_lock); 7699 UDP_STAT(udp_rrw_busy); 7700 return (EBUSY); 7701 } 7702 if ((mp = udp->udp_rcv_list_head) != NULL) { 7703 uint_t size = msgdsize(mp); 7704 7705 /* Last datagram in the list? */ 7706 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 7707 udp->udp_rcv_list_tail = NULL; 7708 mp->b_next = NULL; 7709 7710 udp->udp_rcv_cnt -= size; 7711 udp->udp_rcv_msgcnt--; 7712 UDP_STAT(udp_rrw_msgcnt); 7713 7714 /* No longer flow-controlling? */ 7715 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 7716 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 7717 udp->udp_drain_qfull = B_FALSE; 7718 } 7719 if (udp->udp_rcv_list_head == NULL) { 7720 /* 7721 * Either we just dequeued the last datagram or 7722 * we get here from sockfs and have nothing to 7723 * return; in this case clear RSLEEP. 7724 */ 7725 ASSERT(udp->udp_rcv_cnt == 0); 7726 ASSERT(udp->udp_rcv_msgcnt == 0); 7727 ASSERT(udp->udp_rcv_list_tail == NULL); 7728 STR_WAKEUP_CLEAR(STREAM(q)); 7729 } else { 7730 /* 7731 * More data follows; we need udp_rrw() to be 7732 * called in future to pick up the rest. 7733 */ 7734 STR_WAKEUP_SET(STREAM(q)); 7735 } 7736 mutex_exit(&udp->udp_drain_lock); 7737 dp->d_mp = mp; 7738 return (0); 7739 } 7740 7741 /* 7742 * Enqueue a completely-built T_UNITDATA_IND message into the receive 7743 * list; this is typically executed within the interrupt thread context 7744 * and so we do things as quickly as possible. 7745 */ 7746 static void 7747 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 7748 { 7749 ASSERT(q == RD(q)); 7750 ASSERT(pkt_len == msgdsize(mp)); 7751 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 7752 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 7753 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 7754 7755 mutex_enter(&udp->udp_drain_lock); 7756 /* 7757 * Wake up and signal the receiving app; it is okay to do this 7758 * before enqueueing the mp because we are holding the drain lock. 7759 * One of the advantages of synchronous stream is the ability for 7760 * us to find out when the application performs a read on the 7761 * socket by way of udp_rrw() entry point being called. We need 7762 * to generate SIGPOLL/SIGIO for each received data in the case 7763 * of asynchronous socket just as in the strrput() case. However, 7764 * we only wake the application up when necessary, i.e. during the 7765 * first enqueue. When udp_rrw() is called, we send up a single 7766 * datagram upstream and call STR_WAKEUP_SET() again when there 7767 * are still data remaining in our receive queue. 7768 */ 7769 if (udp->udp_rcv_list_head == NULL) { 7770 STR_WAKEUP_SET(STREAM(q)); 7771 udp->udp_rcv_list_head = mp; 7772 } else { 7773 udp->udp_rcv_list_tail->b_next = mp; 7774 } 7775 udp->udp_rcv_list_tail = mp; 7776 udp->udp_rcv_cnt += pkt_len; 7777 udp->udp_rcv_msgcnt++; 7778 7779 /* Need to flow-control? */ 7780 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 7781 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 7782 udp->udp_drain_qfull = B_TRUE; 7783 7784 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 7785 STR_SENDSIG(STREAM(q)); 7786 mutex_exit(&udp->udp_drain_lock); 7787 } 7788 7789 /* 7790 * Drain the contents of receive list to the module upstream; we do 7791 * this during close or when we fallback to the slow mode due to 7792 * sockmod being popped or a module being pushed on top of us. 7793 */ 7794 static void 7795 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 7796 { 7797 mblk_t *mp; 7798 7799 ASSERT(q == RD(q)); 7800 7801 mutex_enter(&udp->udp_drain_lock); 7802 /* 7803 * There is no race with a concurrent udp_input() sending 7804 * up packets using putnext() after we have cleared the 7805 * udp_direct_sockfs flag but before we have completed 7806 * sending up the packets in udp_rcv_list, since we are 7807 * either a writer or we have quiesced the conn. 7808 */ 7809 udp->udp_direct_sockfs = B_FALSE; 7810 mutex_exit(&udp->udp_drain_lock); 7811 7812 if (udp->udp_rcv_list_head != NULL) 7813 UDP_STAT(udp_drain); 7814 7815 /* 7816 * Send up everything via putnext(); note here that we 7817 * don't need the udp_drain_lock to protect us since 7818 * nothing can enter udp_rrw() and that we currently 7819 * have exclusive access to this udp. 7820 */ 7821 while ((mp = udp->udp_rcv_list_head) != NULL) { 7822 udp->udp_rcv_list_head = mp->b_next; 7823 mp->b_next = NULL; 7824 udp->udp_rcv_cnt -= msgdsize(mp); 7825 udp->udp_rcv_msgcnt--; 7826 if (closing) { 7827 freemsg(mp); 7828 } else { 7829 putnext(q, mp); 7830 } 7831 } 7832 ASSERT(udp->udp_rcv_cnt == 0); 7833 ASSERT(udp->udp_rcv_msgcnt == 0); 7834 ASSERT(udp->udp_rcv_list_head == NULL); 7835 udp->udp_rcv_list_tail = NULL; 7836 udp->udp_drain_qfull = B_FALSE; 7837 } 7838 7839 static size_t 7840 udp_set_rcv_hiwat(udp_t *udp, size_t size) 7841 { 7842 /* We add a bit of extra buffering */ 7843 size += size >> 1; 7844 if (size > udp_max_buf) 7845 size = udp_max_buf; 7846 7847 udp->udp_rcv_hiwat = size; 7848 return (size); 7849 } 7850 7851 /* 7852 * Little helper for IPsec's NAT-T processing. 7853 */ 7854 boolean_t 7855 udp_compute_checksum(void) 7856 { 7857 return (udp_do_checksum); 7858 } 7859