1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 const char udp_version[] = "%Z%%M% %I% %E% SMI"; 30 31 #include <sys/types.h> 32 #include <sys/stream.h> 33 #include <sys/dlpi.h> 34 #include <sys/pattr.h> 35 #include <sys/stropts.h> 36 #include <sys/strlog.h> 37 #include <sys/strsun.h> 38 #include <sys/time.h> 39 #define _SUN_TPI_VERSION 2 40 #include <sys/tihdr.h> 41 #include <sys/timod.h> 42 #include <sys/ddi.h> 43 #include <sys/sunddi.h> 44 #include <sys/strsubr.h> 45 #include <sys/suntpi.h> 46 #include <sys/xti_inet.h> 47 #include <sys/cmn_err.h> 48 #include <sys/kmem.h> 49 #include <sys/policy.h> 50 #include <sys/ucred.h> 51 #include <sys/zone.h> 52 53 #include <sys/socket.h> 54 #include <sys/sockio.h> 55 #include <sys/vtrace.h> 56 #include <sys/sdt.h> 57 #include <sys/debug.h> 58 #include <sys/isa_defs.h> 59 #include <sys/random.h> 60 #include <netinet/in.h> 61 #include <netinet/ip6.h> 62 #include <netinet/icmp6.h> 63 #include <netinet/udp.h> 64 #include <net/if.h> 65 #include <net/route.h> 66 67 #include <inet/common.h> 68 #include <inet/ip.h> 69 #include <inet/ip_impl.h> 70 #include <inet/ip6.h> 71 #include <inet/ip_ire.h> 72 #include <inet/ip_if.h> 73 #include <inet/ip_multi.h> 74 #include <inet/ip_ndp.h> 75 #include <inet/mi.h> 76 #include <inet/mib2.h> 77 #include <inet/nd.h> 78 #include <inet/optcom.h> 79 #include <inet/snmpcom.h> 80 #include <inet/kstatcom.h> 81 #include <inet/udp_impl.h> 82 #include <inet/ipclassifier.h> 83 #include <inet/ipsec_impl.h> 84 #include <inet/ipp_common.h> 85 86 /* 87 * The ipsec_info.h header file is here since it has the definition for the 88 * M_CTL message types used by IP to convey information to the ULP. The 89 * ipsec_info.h needs the pfkeyv2.h, hence the latter's presence. 90 */ 91 #include <net/pfkeyv2.h> 92 #include <inet/ipsec_info.h> 93 94 #include <sys/tsol/label.h> 95 #include <sys/tsol/tnet.h> 96 #include <rpc/pmap_prot.h> 97 98 /* 99 * Synchronization notes: 100 * 101 * UDP uses a combination of its internal perimeter, a global lock and 102 * a set of bind hash locks to protect its data structures. Please see 103 * the note above udp_mode_assertions for details about the internal 104 * perimeter. 105 * 106 * When a UDP endpoint is bound to a local port, it is inserted into 107 * a bind hash list. The list consists of an array of udp_fanout_t buckets. 108 * The size of the array is controlled by the udp_bind_fanout_size variable. 109 * This variable can be changed in /etc/system if the default value is 110 * not large enough. Each bind hash bucket is protected by a per bucket 111 * lock. It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t 112 * structure. An UDP endpoint is removed from the bind hash list only 113 * when it is being unbound or being closed. The per bucket lock also 114 * protects a UDP endpoint's state changes. 115 * 116 * Plumbing notes: 117 * 118 * Both udp and ip are merged, but the streams plumbing is kept unchanged 119 * in that udp is always pushed atop /dev/ip. This is done to preserve 120 * backwards compatibility for certain applications which rely on such 121 * plumbing geometry to do things such as issuing I_POP on the stream 122 * in order to obtain direct access to /dev/ip, etc. 123 * 124 * All UDP processings happen in the /dev/ip instance; the udp module 125 * instance does not possess any state about the endpoint, and merely 126 * acts as a dummy module whose presence is to keep the streams plumbing 127 * appearance unchanged. At open time /dev/ip allocates a conn_t that 128 * happens to embed a udp_t. This stays dormant until the time udp is 129 * pushed, which indicates to /dev/ip that it must convert itself from 130 * an IP to a UDP endpoint. 131 * 132 * We only allow for the following plumbing cases: 133 * 134 * Normal: 135 * /dev/ip is first opened and later udp is pushed directly on top. 136 * This is the default action that happens when a udp socket or 137 * /dev/udp is opened. The conn_t created by /dev/ip instance is 138 * now shared and is marked with IPCL_UDP. 139 * 140 * SNMP-only: 141 * udp is pushed on top of a module other than /dev/ip. When this 142 * happens it will support only SNMP semantics. A new conn_t is 143 * allocated and marked with IPCL_UDPMOD. 144 * 145 * The above cases imply that we don't support any intermediate module to 146 * reside in between /dev/ip and udp -- in fact, we never supported such 147 * scenario in the past as the inter-layer communication semantics have 148 * always been private. Also note that the normal case allows for SNMP 149 * requests to be processed in addition to the rest of UDP operations. 150 * 151 * The normal case plumbing is depicted by the following diagram: 152 * 153 * +---------------+---------------+ 154 * | | | udp 155 * | udp_wq | udp_rq | 156 * | | UDP_RD | 157 * | | | 158 * +---------------+---------------+ 159 * | ^ 160 * v | 161 * +---------------+---------------+ 162 * | | | /dev/ip 163 * | ip_wq | ip_rq | conn_t 164 * | UDP_WR | | 165 * | | | 166 * +---------------+---------------+ 167 * 168 * Messages arriving at udp_wq from above will end up in ip_wq before 169 * it gets processed, i.e. udp write entry points will advance udp_wq 170 * and use its q_next value as ip_wq in order to use the conn_t that 171 * is stored in its q_ptr. Likewise, messages generated by ip to the 172 * module above udp will appear as if they are originated from udp_rq, 173 * i.e. putnext() calls to the module above udp is done using the 174 * udp_rq instead of ip_rq in order to avoid udp_rput() which does 175 * nothing more than calling putnext(). 176 * 177 * The above implies the following rule of thumb: 178 * 179 * 1. udp_t is obtained from conn_t, which is created by the /dev/ip 180 * instance and is stored in q_ptr of both ip_wq and ip_rq. There 181 * is no direct reference to conn_t from either udp_wq or udp_rq. 182 * 183 * 2. Write-side entry points of udp can obtain the conn_t via the 184 * Q_TO_CONN() macro, using the queue value obtain from UDP_WR(). 185 * 186 * 3. While in /dev/ip context, putnext() to the module above udp can 187 * be done by supplying the queue value obtained from UDP_RD(). 188 * 189 */ 190 191 static queue_t *UDP_WR(queue_t *); 192 static queue_t *UDP_RD(queue_t *); 193 194 udp_stat_t udp_statistics = { 195 { "udp_ip_send", KSTAT_DATA_UINT64 }, 196 { "udp_ip_ire_send", KSTAT_DATA_UINT64 }, 197 { "udp_ire_null", KSTAT_DATA_UINT64 }, 198 { "udp_drain", KSTAT_DATA_UINT64 }, 199 { "udp_sock_fallback", KSTAT_DATA_UINT64 }, 200 { "udp_rrw_busy", KSTAT_DATA_UINT64 }, 201 { "udp_rrw_msgcnt", KSTAT_DATA_UINT64 }, 202 { "udp_out_sw_cksum", KSTAT_DATA_UINT64 }, 203 { "udp_out_sw_cksum_bytes", KSTAT_DATA_UINT64 }, 204 { "udp_out_opt", KSTAT_DATA_UINT64 }, 205 { "udp_out_err_notconn", KSTAT_DATA_UINT64 }, 206 { "udp_out_err_output", KSTAT_DATA_UINT64 }, 207 { "udp_out_err_tudr", KSTAT_DATA_UINT64 }, 208 { "udp_in_pktinfo", KSTAT_DATA_UINT64 }, 209 { "udp_in_recvdstaddr", KSTAT_DATA_UINT64 }, 210 { "udp_in_recvopts", KSTAT_DATA_UINT64 }, 211 { "udp_in_recvif", KSTAT_DATA_UINT64 }, 212 { "udp_in_recvslla", KSTAT_DATA_UINT64 }, 213 { "udp_in_recvucred", KSTAT_DATA_UINT64 }, 214 { "udp_in_recvttl", KSTAT_DATA_UINT64 }, 215 { "udp_in_recvhopopts", KSTAT_DATA_UINT64 }, 216 { "udp_in_recvhoplimit", KSTAT_DATA_UINT64 }, 217 { "udp_in_recvdstopts", KSTAT_DATA_UINT64 }, 218 { "udp_in_recvrtdstopts", KSTAT_DATA_UINT64 }, 219 { "udp_in_recvrthdr", KSTAT_DATA_UINT64 }, 220 { "udp_in_recvpktinfo", KSTAT_DATA_UINT64 }, 221 { "udp_in_recvtclass", KSTAT_DATA_UINT64 }, 222 { "udp_in_timestamp", KSTAT_DATA_UINT64 }, 223 #ifdef DEBUG 224 { "udp_data_conn", KSTAT_DATA_UINT64 }, 225 { "udp_data_notconn", KSTAT_DATA_UINT64 }, 226 #endif 227 }; 228 229 static kstat_t *udp_ksp; 230 struct kmem_cache *udp_cache; 231 232 /* 233 * Bind hash list size and hash function. It has to be a power of 2 for 234 * hashing. 235 */ 236 #define UDP_BIND_FANOUT_SIZE 512 237 #define UDP_BIND_HASH(lport) \ 238 ((ntohs((uint16_t)lport)) & (udp_bind_fanout_size - 1)) 239 240 /* UDP bind fanout hash structure. */ 241 typedef struct udp_fanout_s { 242 udp_t *uf_udp; 243 kmutex_t uf_lock; 244 #if defined(_LP64) || defined(_I32LPx) 245 char uf_pad[48]; 246 #else 247 char uf_pad[56]; 248 #endif 249 } udp_fanout_t; 250 251 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE; 252 /* udp_fanout_t *udp_bind_fanout. */ 253 static udp_fanout_t *udp_bind_fanout; 254 255 /* 256 * This controls the rate some ndd info report functions can be used 257 * by non-privileged users. It stores the last time such info is 258 * requested. When those report functions are called again, this 259 * is checked with the current time and compare with the ndd param 260 * udp_ndd_get_info_interval. 261 */ 262 static clock_t udp_last_ndd_get_info_time; 263 #define NDD_TOO_QUICK_MSG \ 264 "ndd get info rate too high for non-privileged users, try again " \ 265 "later.\n" 266 #define NDD_OUT_OF_BUF_MSG "<< Out of buffer >>\n" 267 268 /* Option processing attrs */ 269 typedef struct udpattrs_s { 270 ip6_pkt_t *udpattr_ipp; 271 mblk_t *udpattr_mb; 272 boolean_t udpattr_credset; 273 } udpattrs_t; 274 275 static void udp_addr_req(queue_t *q, mblk_t *mp); 276 static void udp_bind(queue_t *q, mblk_t *mp); 277 static void udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp); 278 static void udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock); 279 static int udp_build_hdrs(queue_t *q, udp_t *udp); 280 static void udp_capability_req(queue_t *q, mblk_t *mp); 281 static int udp_close(queue_t *q); 282 static void udp_connect(queue_t *q, mblk_t *mp); 283 static void udp_disconnect(queue_t *q, mblk_t *mp); 284 static void udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, 285 int sys_error); 286 static void udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, 287 t_scalar_t tlierr, int unixerr); 288 static int udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, 289 cred_t *cr); 290 static int udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, 291 char *value, caddr_t cp, cred_t *cr); 292 static int udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, 293 char *value, caddr_t cp, cred_t *cr); 294 static void udp_icmp_error(queue_t *q, mblk_t *mp); 295 static void udp_icmp_error_ipv6(queue_t *q, mblk_t *mp); 296 static void udp_info_req(queue_t *q, mblk_t *mp); 297 static mblk_t *udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, 298 t_scalar_t addr_length); 299 static int udp_open(queue_t *q, dev_t *devp, int flag, int sflag, 300 cred_t *credp); 301 static int udp_unitdata_opt_process(queue_t *q, mblk_t *mp, 302 int *errorp, udpattrs_t *udpattrs); 303 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name); 304 static int udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr); 305 static boolean_t udp_param_register(udpparam_t *udppa, int cnt); 306 static int udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 307 cred_t *cr); 308 static void udp_report_item(mblk_t *mp, udp_t *udp); 309 static void udp_rput(queue_t *q, mblk_t *mp); 310 static void udp_rput_other(queue_t *, mblk_t *); 311 static int udp_rinfop(queue_t *q, infod_t *dp); 312 static int udp_rrw(queue_t *q, struiod_t *dp); 313 static void udp_rput_bind_ack(queue_t *q, mblk_t *mp); 314 static int udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, 315 cred_t *cr); 316 static void udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha); 317 static void udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, 318 t_scalar_t destlen, t_scalar_t err); 319 static void udp_unbind(queue_t *q, mblk_t *mp); 320 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port, 321 boolean_t random); 322 static void udp_wput(queue_t *q, mblk_t *mp); 323 static mblk_t *udp_output_v4(conn_t *, mblk_t *mp, ipaddr_t v4dst, 324 uint16_t port, uint_t srcid, int *error); 325 static mblk_t *udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, 326 int *error); 327 static void udp_wput_other(queue_t *q, mblk_t *mp); 328 static void udp_wput_iocdata(queue_t *q, mblk_t *mp); 329 static void udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, 330 socklen_t addrlen); 331 static size_t udp_set_rcv_hiwat(udp_t *udp, size_t size); 332 333 static void udp_kstat_init(void); 334 static void udp_kstat_fini(void); 335 static int udp_kstat_update(kstat_t *kp, int rw); 336 static void udp_input_wrapper(void *arg, mblk_t *mp, void *arg2); 337 static void udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 338 static void udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2); 339 static void udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2); 340 341 static void udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, 342 uint_t pkt_len); 343 static void udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing); 344 static void udp_enter(conn_t *, mblk_t *, sqproc_t, uint8_t); 345 static void udp_exit(conn_t *); 346 static void udp_become_writer(conn_t *, mblk_t *, sqproc_t, uint8_t); 347 #ifdef DEBUG 348 static void udp_mode_assertions(udp_t *, int); 349 #endif /* DEBUG */ 350 351 major_t UDP6_MAJ; 352 #define UDP6 "udp6" 353 354 #define UDP_RECV_HIWATER (56 * 1024) 355 #define UDP_RECV_LOWATER 128 356 #define UDP_XMIT_HIWATER (56 * 1024) 357 #define UDP_XMIT_LOWATER 1024 358 359 static struct module_info udp_info = { 360 UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER 361 }; 362 363 static struct qinit udp_rinit = { 364 (pfi_t)udp_rput, NULL, udp_open, udp_close, NULL, 365 &udp_info, NULL, udp_rrw, udp_rinfop, STRUIOT_STANDARD 366 }; 367 368 static struct qinit udp_winit = { 369 (pfi_t)udp_wput, NULL, NULL, NULL, NULL, 370 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 371 }; 372 373 static struct qinit winit = { 374 (pfi_t)putnext, NULL, NULL, NULL, NULL, 375 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 376 }; 377 378 /* Support for just SNMP if UDP is not pushed directly over device IP */ 379 struct qinit udp_snmp_rinit = { 380 (pfi_t)putnext, NULL, udp_open, ip_snmpmod_close, NULL, 381 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 382 }; 383 384 struct qinit udp_snmp_winit = { 385 (pfi_t)ip_snmpmod_wput, NULL, udp_open, ip_snmpmod_close, NULL, 386 &udp_info, NULL, NULL, NULL, STRUIOT_NONE 387 }; 388 389 struct streamtab udpinfo = { 390 &udp_rinit, &winit 391 }; 392 393 static sin_t sin_null; /* Zero address for quick clears */ 394 static sin6_t sin6_null; /* Zero address for quick clears */ 395 396 /* Hint not protected by any lock */ 397 static in_port_t udp_g_next_port_to_try; 398 399 /* 400 * Extra privileged ports. In host byte order. 401 */ 402 #define UDP_NUM_EPRIV_PORTS 64 403 static int udp_g_num_epriv_ports = UDP_NUM_EPRIV_PORTS; 404 static in_port_t udp_g_epriv_ports[UDP_NUM_EPRIV_PORTS] = { 2049, 4045 }; 405 406 /* Only modified during _init and _fini thus no locking is needed. */ 407 static IDP udp_g_nd; /* Points to table of UDP ND variables. */ 408 409 /* MIB-2 stuff for SNMP */ 410 static mib2_udp_t udp_mib; /* SNMP fixed size info */ 411 static kstat_t *udp_mibkp; /* kstat exporting udp_mib data */ 412 413 #define UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH) 414 415 /* Default structure copied into T_INFO_ACK messages */ 416 static struct T_info_ack udp_g_t_info_ack_ipv4 = { 417 T_INFO_ACK, 418 UDP_MAXPACKET_IPV4, /* TSDU_size. Excl. headers */ 419 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 420 T_INVALID, /* CDATA_size. udp does not support connect data. */ 421 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 422 sizeof (sin_t), /* ADDR_size. */ 423 0, /* OPT_size - not initialized here */ 424 UDP_MAXPACKET_IPV4, /* TIDU_size. Excl. headers */ 425 T_CLTS, /* SERV_type. udp supports connection-less. */ 426 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 427 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 428 }; 429 430 #define UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN) 431 432 static struct T_info_ack udp_g_t_info_ack_ipv6 = { 433 T_INFO_ACK, 434 UDP_MAXPACKET_IPV6, /* TSDU_size. Excl. headers */ 435 T_INVALID, /* ETSU_size. udp does not support expedited data. */ 436 T_INVALID, /* CDATA_size. udp does not support connect data. */ 437 T_INVALID, /* DDATA_size. udp does not support disconnect data. */ 438 sizeof (sin6_t), /* ADDR_size. */ 439 0, /* OPT_size - not initialized here */ 440 UDP_MAXPACKET_IPV6, /* TIDU_size. Excl. headers */ 441 T_CLTS, /* SERV_type. udp supports connection-less. */ 442 TS_UNBND, /* CURRENT_state. This is set from udp_state. */ 443 (XPG4_1|SENDZERO) /* PROVIDER_flag */ 444 }; 445 446 /* largest UDP port number */ 447 #define UDP_MAX_PORT 65535 448 449 /* 450 * Table of ND variables supported by udp. These are loaded into udp_g_nd 451 * in udp_open. 452 * All of these are alterable, within the min/max values given, at run time. 453 */ 454 /* BEGIN CSTYLED */ 455 udpparam_t udp_param_arr[] = { 456 /*min max value name */ 457 { 0L, 256, 32, "udp_wroff_extra" }, 458 { 1L, 255, 255, "udp_ipv4_ttl" }, 459 { 0, IPV6_MAX_HOPS, IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"}, 460 { 1024, (32 * 1024), 1024, "udp_smallest_nonpriv_port" }, 461 { 0, 1, 1, "udp_do_checksum" }, 462 { 1024, UDP_MAX_PORT, (32 * 1024), "udp_smallest_anon_port" }, 463 { 1024, UDP_MAX_PORT, UDP_MAX_PORT, "udp_largest_anon_port" }, 464 { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER, "udp_xmit_hiwat"}, 465 { 0, (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"}, 466 { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER, "udp_recv_hiwat"}, 467 { 65536, (1<<30), 2*1024*1024, "udp_max_buf"}, 468 { 100, 60000, 1000, "udp_ndd_get_info_interval"}, 469 }; 470 /* END CSTYLED */ 471 472 /* 473 * The smallest anonymous port in the privileged port range which UDP 474 * looks for free port. Use in the option UDP_ANONPRIVBIND. 475 */ 476 static in_port_t udp_min_anonpriv_port = 512; 477 478 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */ 479 uint32_t udp_random_anon_port = 1; 480 481 /* 482 * Hook functions to enable cluster networking. 483 * On non-clustered systems these vectors must always be NULL 484 */ 485 486 void (*cl_inet_bind)(uchar_t protocol, sa_family_t addr_family, 487 uint8_t *laddrp, in_port_t lport) = NULL; 488 void (*cl_inet_unbind)(uint8_t protocol, sa_family_t addr_family, 489 uint8_t *laddrp, in_port_t lport) = NULL; 490 491 typedef union T_primitives *t_primp_t; 492 493 #define UDP_ENQUEUE_MP(udp, mp, proc, tag) { \ 494 ASSERT((mp)->b_prev == NULL && (mp)->b_queue == NULL); \ 495 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 496 (mp)->b_queue = (queue_t *)((uintptr_t)tag); \ 497 (mp)->b_prev = (mblk_t *)proc; \ 498 if ((udp)->udp_mphead == NULL) \ 499 (udp)->udp_mphead = (mp); \ 500 else \ 501 (udp)->udp_mptail->b_next = (mp); \ 502 (udp)->udp_mptail = (mp); \ 503 (udp)->udp_mpcount++; \ 504 } 505 506 #define UDP_READERS_INCREF(udp) { \ 507 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 508 (udp)->udp_reader_count++; \ 509 } 510 511 #define UDP_READERS_DECREF(udp) { \ 512 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 513 (udp)->udp_reader_count--; \ 514 if ((udp)->udp_reader_count == 0) \ 515 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 516 } 517 518 #define UDP_SQUEUE_DECREF(udp) { \ 519 ASSERT(MUTEX_HELD(&(udp)->udp_connp->conn_lock)); \ 520 (udp)->udp_squeue_count--; \ 521 if ((udp)->udp_squeue_count == 0) \ 522 cv_broadcast(&(udp)->udp_connp->conn_cv); \ 523 } 524 525 /* 526 * Notes on UDP endpoint synchronization: 527 * 528 * UDP needs exclusive operation on a per endpoint basis, when executing 529 * functions that modify the endpoint state. udp_rput_other() deals with 530 * packets with IP options, and processing these packets end up having 531 * to update the endpoint's option related state. udp_wput_other() deals 532 * with control operations from the top, e.g. connect() that needs to 533 * update the endpoint state. These could be synchronized using locks, 534 * but the current version uses squeues for this purpose. squeues may 535 * give performance improvement for certain cases such as connected UDP 536 * sockets; thus the framework allows for using squeues. 537 * 538 * The perimeter routines are described as follows: 539 * 540 * udp_enter(): 541 * Enter the UDP endpoint perimeter. 542 * 543 * udp_become_writer(): 544 * Become exclusive on the UDP endpoint. Specifies a function 545 * that will be called exclusively either immediately or later 546 * when the perimeter is available exclusively. 547 * 548 * udp_exit(): 549 * Exit the UDP perimeter. 550 * 551 * Entering UDP from the top or from the bottom must be done using 552 * udp_enter(). No lock must be held while attempting to enter the UDP 553 * perimeter. When finished, udp_exit() must be called to get out of 554 * the perimeter. 555 * 556 * UDP operates in either MT_HOT mode or in SQUEUE mode. In MT_HOT mode, 557 * multiple threads may enter a UDP endpoint concurrently. This is used 558 * for sending and/or receiving normal data. Control operations and other 559 * special cases call udp_become_writer() to become exclusive on a per 560 * endpoint basis and this results in transitioning to SQUEUE mode. squeue 561 * by definition serializes access to the conn_t. When there are no more 562 * pending messages on the squeue for the UDP connection, the endpoint 563 * reverts to MT_HOT mode. During the interregnum when not all MT threads 564 * of an endpoint have finished, messages are queued in the UDP endpoint 565 * and the UDP is in UDP_MT_QUEUED mode or UDP_QUEUED_SQUEUE mode. 566 * 567 * These modes have the following analogs: 568 * 569 * UDP_MT_HOT/udp_reader_count==0 none 570 * UDP_MT_HOT/udp_reader_count>0 RW_READ_LOCK 571 * UDP_MT_QUEUED RW_WRITE_WANTED 572 * UDP_SQUEUE or UDP_QUEUED_SQUEUE RW_WRITE_LOCKED 573 * 574 * Stable modes: UDP_MT_HOT, UDP_SQUEUE 575 * Transient modes: UDP_MT_QUEUED, UDP_QUEUED_SQUEUE 576 * 577 * While in stable modes, UDP keeps track of the number of threads 578 * operating on the endpoint. The udp_reader_count variable represents 579 * the number of threads entering the endpoint as readers while it is 580 * in UDP_MT_HOT mode. Transitioning to UDP_SQUEUE happens when there 581 * is only a single reader, i.e. when this counter drops to 1. Likewise, 582 * udp_squeue_count represents the number of threads operating on the 583 * endpoint's squeue while it is in UDP_SQUEUE mode. The mode transition 584 * to UDP_MT_HOT happens after the last thread exits the endpoint, i.e. 585 * when this counter drops to 0. 586 * 587 * The default mode is set to UDP_MT_HOT and UDP alternates between 588 * UDP_MT_HOT and UDP_SQUEUE as shown in the state transition below. 589 * 590 * Mode transition: 591 * ---------------------------------------------------------------- 592 * old mode Event New mode 593 * ---------------------------------------------------------------- 594 * UDP_MT_HOT Call to udp_become_writer() UDP_SQUEUE 595 * and udp_reader_count == 1 596 * 597 * UDP_MT_HOT Call to udp_become_writer() UDP_MT_QUEUED 598 * and udp_reader_count > 1 599 * 600 * UDP_MT_QUEUED udp_reader_count drops to zero UDP_QUEUED_SQUEUE 601 * 602 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_SQUEUE 603 * internal UDP queue successfully 604 * moved to squeue AND udp_squeue_count != 0 605 * 606 * UDP_QUEUED_SQUEUE All messages enqueued on the UDP_MT_HOT 607 * internal UDP queue successfully 608 * moved to squeue AND udp_squeue_count 609 * drops to zero 610 * 611 * UDP_SQUEUE udp_squeue_count drops to zero UDP_MT_HOT 612 * ---------------------------------------------------------------- 613 */ 614 615 static queue_t * 616 UDP_WR(queue_t *q) 617 { 618 ASSERT(q->q_ptr == NULL && _OTHERQ(q)->q_ptr == NULL); 619 ASSERT(WR(q)->q_next != NULL && WR(q)->q_next->q_ptr != NULL); 620 ASSERT(IPCL_IS_UDP(Q_TO_CONN(WR(q)->q_next))); 621 622 return (_WR(q)->q_next); 623 } 624 625 static queue_t * 626 UDP_RD(queue_t *q) 627 { 628 ASSERT(q->q_ptr != NULL && _OTHERQ(q)->q_ptr != NULL); 629 ASSERT(IPCL_IS_UDP(Q_TO_CONN(q))); 630 ASSERT(RD(q)->q_next != NULL && RD(q)->q_next->q_ptr == NULL); 631 632 return (_RD(q)->q_next); 633 } 634 635 #ifdef DEBUG 636 #define UDP_MODE_ASSERTIONS(udp, caller) udp_mode_assertions(udp, caller) 637 #else 638 #define UDP_MODE_ASSERTIONS(udp, caller) 639 #endif 640 641 /* Invariants */ 642 #ifdef DEBUG 643 644 uint32_t udp_count[4]; 645 646 /* Context of udp_mode_assertions */ 647 #define UDP_ENTER 1 648 #define UDP_BECOME_WRITER 2 649 #define UDP_EXIT 3 650 651 static void 652 udp_mode_assertions(udp_t *udp, int caller) 653 { 654 ASSERT(MUTEX_HELD(&udp->udp_connp->conn_lock)); 655 656 switch (udp->udp_mode) { 657 case UDP_MT_HOT: 658 /* 659 * Messages have not yet been enqueued on the internal queue, 660 * otherwise we would have switched to UDP_MT_QUEUED. Likewise 661 * by definition, there can't be any messages enqueued on the 662 * squeue. The UDP could be quiescent, so udp_reader_count 663 * could be zero at entry. 664 */ 665 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0 && 666 udp->udp_squeue_count == 0); 667 ASSERT(caller == UDP_ENTER || udp->udp_reader_count != 0); 668 udp_count[0]++; 669 break; 670 671 case UDP_MT_QUEUED: 672 /* 673 * The last MT thread to exit the udp perimeter empties the 674 * internal queue and then switches the UDP to 675 * UDP_QUEUED_SQUEUE mode. Since we are still in UDP_MT_QUEUED 676 * mode, it means there must be at least 1 MT thread still in 677 * the perimeter and at least 1 message on the internal queue. 678 */ 679 ASSERT(udp->udp_reader_count >= 1 && udp->udp_mphead != NULL && 680 udp->udp_mpcount != 0 && udp->udp_squeue_count == 0); 681 udp_count[1]++; 682 break; 683 684 case UDP_QUEUED_SQUEUE: 685 /* 686 * The switch has happened from MT to SQUEUE. So there can't 687 * any MT threads. Messages could still pile up on the internal 688 * queue until the transition is complete and we move to 689 * UDP_SQUEUE mode. We can't assert on nonzero udp_squeue_count 690 * since the squeue could drain any time. 691 */ 692 ASSERT(udp->udp_reader_count == 0); 693 udp_count[2]++; 694 break; 695 696 case UDP_SQUEUE: 697 /* 698 * The transition is complete. Thre can't be any messages on 699 * the internal queue. The udp could be quiescent or the squeue 700 * could drain any time, so we can't assert on nonzero 701 * udp_squeue_count during entry. Nor can we assert that 702 * udp_reader_count is zero, since, a reader thread could have 703 * directly become writer in line by calling udp_become_writer 704 * without going through the queued states. 705 */ 706 ASSERT(udp->udp_mphead == NULL && udp->udp_mpcount == 0); 707 ASSERT(caller == UDP_ENTER || udp->udp_squeue_count != 0); 708 udp_count[3]++; 709 break; 710 } 711 } 712 #endif 713 714 #define _UDP_ENTER(connp, mp, proc, tag) { \ 715 udp_t *_udp = (connp)->conn_udp; \ 716 \ 717 mutex_enter(&(connp)->conn_lock); \ 718 if ((connp)->conn_state_flags & CONN_CLOSING) { \ 719 mutex_exit(&(connp)->conn_lock); \ 720 freemsg(mp); \ 721 } else { \ 722 UDP_MODE_ASSERTIONS(_udp, UDP_ENTER); \ 723 \ 724 switch (_udp->udp_mode) { \ 725 case UDP_MT_HOT: \ 726 /* We can execute as reader right away. */ \ 727 UDP_READERS_INCREF(_udp); \ 728 mutex_exit(&(connp)->conn_lock); \ 729 (*(proc))(connp, mp, (connp)->conn_sqp); \ 730 break; \ 731 \ 732 case UDP_SQUEUE: \ 733 /* \ 734 * We are in squeue mode, send the \ 735 * packet to the squeue \ 736 */ \ 737 _udp->udp_squeue_count++; \ 738 CONN_INC_REF_LOCKED(connp); \ 739 mutex_exit(&(connp)->conn_lock); \ 740 squeue_enter((connp)->conn_sqp, mp, proc, \ 741 connp, tag); \ 742 break; \ 743 \ 744 case UDP_MT_QUEUED: \ 745 case UDP_QUEUED_SQUEUE: \ 746 /* \ 747 * Some messages may have been enqueued \ 748 * ahead of us. Enqueue the new message \ 749 * at the tail of the internal queue to \ 750 * preserve message ordering. \ 751 */ \ 752 UDP_ENQUEUE_MP(_udp, mp, proc, tag); \ 753 mutex_exit(&(connp)->conn_lock); \ 754 break; \ 755 } \ 756 } \ 757 } 758 759 static void 760 udp_enter(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 761 { 762 _UDP_ENTER(connp, mp, proc, tag); 763 } 764 765 static void 766 udp_become_writer(conn_t *connp, mblk_t *mp, sqproc_t proc, uint8_t tag) 767 { 768 udp_t *udp; 769 770 udp = connp->conn_udp; 771 772 mutex_enter(&connp->conn_lock); 773 774 UDP_MODE_ASSERTIONS(udp, UDP_BECOME_WRITER); 775 776 switch (udp->udp_mode) { 777 case UDP_MT_HOT: 778 if (udp->udp_reader_count == 1) { 779 /* 780 * We are the only MT thread. Switch to squeue mode 781 * immediately. 782 */ 783 udp->udp_mode = UDP_SQUEUE; 784 udp->udp_squeue_count = 1; 785 CONN_INC_REF_LOCKED(connp); 786 mutex_exit(&connp->conn_lock); 787 squeue_enter(connp->conn_sqp, mp, proc, connp, tag); 788 return; 789 } 790 /* FALLTHRU */ 791 792 case UDP_MT_QUEUED: 793 /* Enqueue the packet internally in UDP */ 794 udp->udp_mode = UDP_MT_QUEUED; 795 UDP_ENQUEUE_MP(udp, mp, proc, tag); 796 mutex_exit(&connp->conn_lock); 797 return; 798 799 case UDP_SQUEUE: 800 case UDP_QUEUED_SQUEUE: 801 /* 802 * We are already exclusive. i.e. we are already 803 * writer. Simply call the desired function. 804 */ 805 udp->udp_squeue_count++; 806 mutex_exit(&connp->conn_lock); 807 (*proc)(connp, mp, connp->conn_sqp); 808 return; 809 } 810 } 811 812 /* 813 * Transition from MT mode to SQUEUE mode, when the last MT thread 814 * is exiting the UDP perimeter. Move all messages from the internal 815 * udp queue to the squeue. A better way would be to move all the 816 * messages in one shot, this needs more support from the squeue framework 817 */ 818 static void 819 udp_switch_to_squeue(udp_t *udp) 820 { 821 mblk_t *mp; 822 mblk_t *mp_next; 823 sqproc_t proc; 824 uint8_t tag; 825 conn_t *connp = udp->udp_connp; 826 827 ASSERT(MUTEX_HELD(&connp->conn_lock)); 828 ASSERT(udp->udp_mode == UDP_MT_QUEUED); 829 while (udp->udp_mphead != NULL) { 830 mp = udp->udp_mphead; 831 udp->udp_mphead = NULL; 832 udp->udp_mptail = NULL; 833 udp->udp_mpcount = 0; 834 udp->udp_mode = UDP_QUEUED_SQUEUE; 835 mutex_exit(&connp->conn_lock); 836 /* 837 * It is best not to hold any locks across the calls 838 * to squeue functions. Since we drop the lock we 839 * need to go back and check the udp_mphead once again 840 * after the squeue_fill and hence the while loop at 841 * the top of this function 842 */ 843 for (; mp != NULL; mp = mp_next) { 844 mp_next = mp->b_next; 845 proc = (sqproc_t)mp->b_prev; 846 tag = (uint8_t)((uintptr_t)mp->b_queue); 847 mp->b_next = NULL; 848 mp->b_prev = NULL; 849 mp->b_queue = NULL; 850 CONN_INC_REF(connp); 851 udp->udp_squeue_count++; 852 squeue_fill(connp->conn_sqp, mp, proc, connp, 853 tag); 854 } 855 mutex_enter(&connp->conn_lock); 856 } 857 /* 858 * udp_squeue_count of zero implies that the squeue has drained 859 * even before we arrived here (i.e. after the squeue_fill above) 860 */ 861 udp->udp_mode = (udp->udp_squeue_count != 0) ? 862 UDP_SQUEUE : UDP_MT_HOT; 863 } 864 865 #define _UDP_EXIT(connp) { \ 866 udp_t *_udp = (connp)->conn_udp; \ 867 \ 868 mutex_enter(&(connp)->conn_lock); \ 869 UDP_MODE_ASSERTIONS(_udp, UDP_EXIT); \ 870 \ 871 switch (_udp->udp_mode) { \ 872 case UDP_MT_HOT: \ 873 UDP_READERS_DECREF(_udp); \ 874 mutex_exit(&(connp)->conn_lock); \ 875 break; \ 876 \ 877 case UDP_SQUEUE: \ 878 UDP_SQUEUE_DECREF(_udp); \ 879 if (_udp->udp_squeue_count == 0) \ 880 _udp->udp_mode = UDP_MT_HOT; \ 881 mutex_exit(&(connp)->conn_lock); \ 882 break; \ 883 \ 884 case UDP_MT_QUEUED: \ 885 /* \ 886 * If this is the last MT thread, we need to \ 887 * switch to squeue mode \ 888 */ \ 889 UDP_READERS_DECREF(_udp); \ 890 if (_udp->udp_reader_count == 0) \ 891 udp_switch_to_squeue(_udp); \ 892 mutex_exit(&(connp)->conn_lock); \ 893 break; \ 894 \ 895 case UDP_QUEUED_SQUEUE: \ 896 UDP_SQUEUE_DECREF(_udp); \ 897 /* \ 898 * Even if the udp_squeue_count drops to zero, we \ 899 * don't want to change udp_mode to UDP_MT_HOT here. \ 900 * The thread in udp_switch_to_squeue will take care \ 901 * of the transition to UDP_MT_HOT, after emptying \ 902 * any more new messages that have been enqueued in \ 903 * udp_mphead. \ 904 */ \ 905 mutex_exit(&(connp)->conn_lock); \ 906 break; \ 907 } \ 908 } 909 910 static void 911 udp_exit(conn_t *connp) 912 { 913 _UDP_EXIT(connp); 914 } 915 916 /* 917 * Return the next anonymous port in the privileged port range for 918 * bind checking. 919 * 920 * Trusted Extension (TX) notes: TX allows administrator to mark or 921 * reserve ports as Multilevel ports (MLP). MLP has special function 922 * on TX systems. Once a port is made MLP, it's not available as 923 * ordinary port. This creates "holes" in the port name space. It 924 * may be necessary to skip the "holes" find a suitable anon port. 925 */ 926 static in_port_t 927 udp_get_next_priv_port(udp_t *udp) 928 { 929 static in_port_t next_priv_port = IPPORT_RESERVED - 1; 930 in_port_t nextport; 931 boolean_t restart = B_FALSE; 932 933 retry: 934 if (next_priv_port < udp_min_anonpriv_port || 935 next_priv_port >= IPPORT_RESERVED) { 936 next_priv_port = IPPORT_RESERVED - 1; 937 if (restart) 938 return (0); 939 restart = B_TRUE; 940 } 941 942 if (is_system_labeled() && 943 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 944 next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) { 945 next_priv_port = nextport; 946 goto retry; 947 } 948 949 return (next_priv_port--); 950 } 951 952 /* UDP bind hash report triggered via the Named Dispatch mechanism. */ 953 /* ARGSUSED */ 954 static int 955 udp_bind_hash_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 956 { 957 udp_fanout_t *udpf; 958 int i; 959 zoneid_t zoneid; 960 conn_t *connp; 961 udp_t *udp; 962 963 connp = Q_TO_CONN(q); 964 udp = connp->conn_udp; 965 966 /* Refer to comments in udp_status_report(). */ 967 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 968 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 969 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 970 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 971 return (0); 972 } 973 } 974 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 975 /* The following may work even if we cannot get a large buf. */ 976 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 977 return (0); 978 } 979 980 (void) mi_mpprintf(mp, 981 "UDP " MI_COL_HDRPAD_STR 982 /* 12345678[89ABCDEF] */ 983 " zone lport src addr dest addr port state"); 984 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 985 986 zoneid = connp->conn_zoneid; 987 988 for (i = 0; i < udp_bind_fanout_size; i++) { 989 udpf = &udp_bind_fanout[i]; 990 mutex_enter(&udpf->uf_lock); 991 992 /* Print the hash index. */ 993 udp = udpf->uf_udp; 994 if (zoneid != GLOBAL_ZONEID) { 995 /* skip to first entry in this zone; might be none */ 996 while (udp != NULL && 997 udp->udp_connp->conn_zoneid != zoneid) 998 udp = udp->udp_bind_hash; 999 } 1000 if (udp != NULL) { 1001 uint_t print_len, buf_len; 1002 1003 buf_len = mp->b_cont->b_datap->db_lim - 1004 mp->b_cont->b_wptr; 1005 print_len = snprintf((char *)mp->b_cont->b_wptr, 1006 buf_len, "%d\n", i); 1007 if (print_len < buf_len) { 1008 mp->b_cont->b_wptr += print_len; 1009 } else { 1010 mp->b_cont->b_wptr += buf_len; 1011 } 1012 for (; udp != NULL; udp = udp->udp_bind_hash) { 1013 if (zoneid == GLOBAL_ZONEID || 1014 zoneid == udp->udp_connp->conn_zoneid) 1015 udp_report_item(mp->b_cont, udp); 1016 } 1017 } 1018 mutex_exit(&udpf->uf_lock); 1019 } 1020 udp_last_ndd_get_info_time = ddi_get_lbolt(); 1021 return (0); 1022 } 1023 1024 /* 1025 * Hash list removal routine for udp_t structures. 1026 */ 1027 static void 1028 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock) 1029 { 1030 udp_t *udpnext; 1031 kmutex_t *lockp; 1032 1033 if (udp->udp_ptpbhn == NULL) 1034 return; 1035 1036 /* 1037 * Extract the lock pointer in case there are concurrent 1038 * hash_remove's for this instance. 1039 */ 1040 ASSERT(udp->udp_port != 0); 1041 if (!caller_holds_lock) { 1042 lockp = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)].uf_lock; 1043 ASSERT(lockp != NULL); 1044 mutex_enter(lockp); 1045 } 1046 if (udp->udp_ptpbhn != NULL) { 1047 udpnext = udp->udp_bind_hash; 1048 if (udpnext != NULL) { 1049 udpnext->udp_ptpbhn = udp->udp_ptpbhn; 1050 udp->udp_bind_hash = NULL; 1051 } 1052 *udp->udp_ptpbhn = udpnext; 1053 udp->udp_ptpbhn = NULL; 1054 } 1055 if (!caller_holds_lock) { 1056 mutex_exit(lockp); 1057 } 1058 } 1059 1060 static void 1061 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp) 1062 { 1063 udp_t **udpp; 1064 udp_t *udpnext; 1065 1066 ASSERT(MUTEX_HELD(&uf->uf_lock)); 1067 if (udp->udp_ptpbhn != NULL) { 1068 udp_bind_hash_remove(udp, B_TRUE); 1069 } 1070 udpp = &uf->uf_udp; 1071 udpnext = udpp[0]; 1072 if (udpnext != NULL) { 1073 /* 1074 * If the new udp bound to the INADDR_ANY address 1075 * and the first one in the list is not bound to 1076 * INADDR_ANY we skip all entries until we find the 1077 * first one bound to INADDR_ANY. 1078 * This makes sure that applications binding to a 1079 * specific address get preference over those binding to 1080 * INADDR_ANY. 1081 */ 1082 if (V6_OR_V4_INADDR_ANY(udp->udp_bound_v6src) && 1083 !V6_OR_V4_INADDR_ANY(udpnext->udp_bound_v6src)) { 1084 while ((udpnext = udpp[0]) != NULL && 1085 !V6_OR_V4_INADDR_ANY( 1086 udpnext->udp_bound_v6src)) { 1087 udpp = &(udpnext->udp_bind_hash); 1088 } 1089 if (udpnext != NULL) 1090 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1091 } else { 1092 udpnext->udp_ptpbhn = &udp->udp_bind_hash; 1093 } 1094 } 1095 udp->udp_bind_hash = udpnext; 1096 udp->udp_ptpbhn = udpp; 1097 udpp[0] = udp; 1098 } 1099 1100 /* 1101 * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message 1102 * passed to udp_wput. 1103 * It associates a port number and local address with the stream. 1104 * The O_T_BIND_REQ/T_BIND_REQ is passed downstream to ip with the UDP 1105 * protocol type (IPPROTO_UDP) placed in the message following the address. 1106 * A T_BIND_ACK message is passed upstream when ip acknowledges the request. 1107 * (Called as writer.) 1108 * 1109 * Note that UDP over IPv4 and IPv6 sockets can use the same port number 1110 * without setting SO_REUSEADDR. This is needed so that they 1111 * can be viewed as two independent transport protocols. 1112 * However, anonymouns ports are allocated from the same range to avoid 1113 * duplicating the udp_g_next_port_to_try. 1114 */ 1115 static void 1116 udp_bind(queue_t *q, mblk_t *mp) 1117 { 1118 sin_t *sin; 1119 sin6_t *sin6; 1120 mblk_t *mp1; 1121 in_port_t port; /* Host byte order */ 1122 in_port_t requested_port; /* Host byte order */ 1123 struct T_bind_req *tbr; 1124 int count; 1125 in6_addr_t v6src; 1126 boolean_t bind_to_req_port_only; 1127 int loopmax; 1128 udp_fanout_t *udpf; 1129 in_port_t lport; /* Network byte order */ 1130 zoneid_t zoneid; 1131 conn_t *connp; 1132 udp_t *udp; 1133 boolean_t is_inaddr_any; 1134 mlp_type_t addrtype, mlptype; 1135 1136 connp = Q_TO_CONN(q); 1137 udp = connp->conn_udp; 1138 if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) { 1139 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1140 "udp_bind: bad req, len %u", 1141 (uint_t)(mp->b_wptr - mp->b_rptr)); 1142 udp_err_ack(q, mp, TPROTO, 0); 1143 return; 1144 } 1145 1146 if (udp->udp_state != TS_UNBND) { 1147 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1148 "udp_bind: bad state, %u", udp->udp_state); 1149 udp_err_ack(q, mp, TOUTSTATE, 0); 1150 return; 1151 } 1152 /* 1153 * Reallocate the message to make sure we have enough room for an 1154 * address and the protocol type. 1155 */ 1156 mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t) + 1, 1); 1157 if (!mp1) { 1158 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1159 return; 1160 } 1161 1162 mp = mp1; 1163 tbr = (struct T_bind_req *)mp->b_rptr; 1164 switch (tbr->ADDR_length) { 1165 case 0: /* Request for a generic port */ 1166 tbr->ADDR_offset = sizeof (struct T_bind_req); 1167 if (udp->udp_family == AF_INET) { 1168 tbr->ADDR_length = sizeof (sin_t); 1169 sin = (sin_t *)&tbr[1]; 1170 *sin = sin_null; 1171 sin->sin_family = AF_INET; 1172 mp->b_wptr = (uchar_t *)&sin[1]; 1173 } else { 1174 ASSERT(udp->udp_family == AF_INET6); 1175 tbr->ADDR_length = sizeof (sin6_t); 1176 sin6 = (sin6_t *)&tbr[1]; 1177 *sin6 = sin6_null; 1178 sin6->sin6_family = AF_INET6; 1179 mp->b_wptr = (uchar_t *)&sin6[1]; 1180 } 1181 port = 0; 1182 break; 1183 1184 case sizeof (sin_t): /* Complete IPv4 address */ 1185 sin = (sin_t *)mi_offset_param(mp, tbr->ADDR_offset, 1186 sizeof (sin_t)); 1187 if (sin == NULL || !OK_32PTR((char *)sin)) { 1188 udp_err_ack(q, mp, TSYSERR, EINVAL); 1189 return; 1190 } 1191 if (udp->udp_family != AF_INET || 1192 sin->sin_family != AF_INET) { 1193 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1194 return; 1195 } 1196 port = ntohs(sin->sin_port); 1197 break; 1198 1199 case sizeof (sin6_t): /* complete IPv6 address */ 1200 sin6 = (sin6_t *)mi_offset_param(mp, tbr->ADDR_offset, 1201 sizeof (sin6_t)); 1202 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1203 udp_err_ack(q, mp, TSYSERR, EINVAL); 1204 return; 1205 } 1206 if (udp->udp_family != AF_INET6 || 1207 sin6->sin6_family != AF_INET6) { 1208 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1209 return; 1210 } 1211 port = ntohs(sin6->sin6_port); 1212 break; 1213 1214 default: /* Invalid request */ 1215 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1216 "udp_bind: bad ADDR_length length %u", tbr->ADDR_length); 1217 udp_err_ack(q, mp, TBADADDR, 0); 1218 return; 1219 } 1220 1221 requested_port = port; 1222 1223 if (requested_port == 0 || tbr->PRIM_type == O_T_BIND_REQ) 1224 bind_to_req_port_only = B_FALSE; 1225 else /* T_BIND_REQ and requested_port != 0 */ 1226 bind_to_req_port_only = B_TRUE; 1227 1228 if (requested_port == 0) { 1229 /* 1230 * If the application passed in zero for the port number, it 1231 * doesn't care which port number we bind to. Get one in the 1232 * valid range. 1233 */ 1234 if (udp->udp_anon_priv_bind) { 1235 port = udp_get_next_priv_port(udp); 1236 } else { 1237 port = udp_update_next_port(udp, 1238 udp_g_next_port_to_try, B_TRUE); 1239 } 1240 } else { 1241 /* 1242 * If the port is in the well-known privileged range, 1243 * make sure the caller was privileged. 1244 */ 1245 int i; 1246 boolean_t priv = B_FALSE; 1247 1248 if (port < udp_smallest_nonpriv_port) { 1249 priv = B_TRUE; 1250 } else { 1251 for (i = 0; i < udp_g_num_epriv_ports; i++) { 1252 if (port == udp_g_epriv_ports[i]) { 1253 priv = B_TRUE; 1254 break; 1255 } 1256 } 1257 } 1258 1259 if (priv) { 1260 cred_t *cr = DB_CREDDEF(mp, connp->conn_cred); 1261 1262 if (secpolicy_net_privaddr(cr, port) != 0) { 1263 udp_err_ack(q, mp, TACCES, 0); 1264 return; 1265 } 1266 } 1267 } 1268 1269 if (port == 0) { 1270 udp_err_ack(q, mp, TNOADDR, 0); 1271 return; 1272 } 1273 1274 /* 1275 * Copy the source address into our udp structure. This address 1276 * may still be zero; if so, IP will fill in the correct address 1277 * each time an outbound packet is passed to it. 1278 */ 1279 if (udp->udp_family == AF_INET) { 1280 ASSERT(sin != NULL); 1281 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1282 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1283 udp->udp_ip_snd_options_len; 1284 IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6src); 1285 } else { 1286 ASSERT(sin6 != NULL); 1287 v6src = sin6->sin6_addr; 1288 if (IN6_IS_ADDR_V4MAPPED(&v6src)) { 1289 udp->udp_ipversion = IPV4_VERSION; 1290 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1291 UDPH_SIZE + udp->udp_ip_snd_options_len; 1292 } else { 1293 udp->udp_ipversion = IPV6_VERSION; 1294 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1295 } 1296 } 1297 1298 /* 1299 * If udp_reuseaddr is not set, then we have to make sure that 1300 * the IP address and port number the application requested 1301 * (or we selected for the application) is not being used by 1302 * another stream. If another stream is already using the 1303 * requested IP address and port, the behavior depends on 1304 * "bind_to_req_port_only". If set the bind fails; otherwise we 1305 * search for any an unused port to bind to the the stream. 1306 * 1307 * As per the BSD semantics, as modified by the Deering multicast 1308 * changes, if udp_reuseaddr is set, then we allow multiple binds 1309 * to the same port independent of the local IP address. 1310 * 1311 * This is slightly different than in SunOS 4.X which did not 1312 * support IP multicast. Note that the change implemented by the 1313 * Deering multicast code effects all binds - not only binding 1314 * to IP multicast addresses. 1315 * 1316 * Note that when binding to port zero we ignore SO_REUSEADDR in 1317 * order to guarantee a unique port. 1318 */ 1319 1320 count = 0; 1321 if (udp->udp_anon_priv_bind) { 1322 /* loopmax = (IPPORT_RESERVED-1) - udp_min_anonpriv_port + 1 */ 1323 loopmax = IPPORT_RESERVED - udp_min_anonpriv_port; 1324 } else { 1325 loopmax = udp_largest_anon_port - udp_smallest_anon_port + 1; 1326 } 1327 1328 is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src); 1329 zoneid = connp->conn_zoneid; 1330 1331 for (;;) { 1332 udp_t *udp1; 1333 boolean_t found_exclbind = B_FALSE; 1334 1335 /* 1336 * Walk through the list of udp streams bound to 1337 * requested port with the same IP address. 1338 */ 1339 lport = htons(port); 1340 udpf = &udp_bind_fanout[UDP_BIND_HASH(lport)]; 1341 mutex_enter(&udpf->uf_lock); 1342 for (udp1 = udpf->uf_udp; udp1 != NULL; 1343 udp1 = udp1->udp_bind_hash) { 1344 if (lport != udp1->udp_port) 1345 continue; 1346 1347 /* 1348 * On a labeled system, we must treat bindings to ports 1349 * on shared IP addresses by sockets with MAC exemption 1350 * privilege as being in all zones, as there's 1351 * otherwise no way to identify the right receiver. 1352 */ 1353 if (zoneid != udp1->udp_connp->conn_zoneid && 1354 !udp->udp_mac_exempt && !udp1->udp_mac_exempt) 1355 continue; 1356 1357 /* 1358 * If UDP_EXCLBIND is set for either the bound or 1359 * binding endpoint, the semantics of bind 1360 * is changed according to the following chart. 1361 * 1362 * spec = specified address (v4 or v6) 1363 * unspec = unspecified address (v4 or v6) 1364 * A = specified addresses are different for endpoints 1365 * 1366 * bound bind to allowed? 1367 * ------------------------------------- 1368 * unspec unspec no 1369 * unspec spec no 1370 * spec unspec no 1371 * spec spec yes if A 1372 * 1373 * For labeled systems, SO_MAC_EXEMPT behaves the same 1374 * as UDP_EXCLBIND, except that zoneid is ignored. 1375 */ 1376 if (udp1->udp_exclbind || udp->udp_exclbind || 1377 udp1->udp_mac_exempt || udp->udp_mac_exempt) { 1378 if (V6_OR_V4_INADDR_ANY( 1379 udp1->udp_bound_v6src) || 1380 is_inaddr_any || 1381 IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1382 &v6src)) { 1383 found_exclbind = B_TRUE; 1384 break; 1385 } 1386 continue; 1387 } 1388 1389 /* 1390 * Check ipversion to allow IPv4 and IPv6 sockets to 1391 * have disjoint port number spaces. 1392 */ 1393 if (udp->udp_ipversion != udp1->udp_ipversion) { 1394 1395 /* 1396 * On the first time through the loop, if the 1397 * the user intentionally specified a 1398 * particular port number, then ignore any 1399 * bindings of the other protocol that may 1400 * conflict. This allows the user to bind IPv6 1401 * alone and get both v4 and v6, or bind both 1402 * both and get each seperately. On subsequent 1403 * times through the loop, we're checking a 1404 * port that we chose (not the user) and thus 1405 * we do not allow casual duplicate bindings. 1406 */ 1407 if (count == 0 && requested_port != 0) 1408 continue; 1409 } 1410 1411 /* 1412 * No difference depending on SO_REUSEADDR. 1413 * 1414 * If existing port is bound to a 1415 * non-wildcard IP address and 1416 * the requesting stream is bound to 1417 * a distinct different IP addresses 1418 * (non-wildcard, also), keep going. 1419 */ 1420 if (!is_inaddr_any && 1421 !V6_OR_V4_INADDR_ANY(udp1->udp_bound_v6src) && 1422 !IN6_ARE_ADDR_EQUAL(&udp1->udp_bound_v6src, 1423 &v6src)) { 1424 continue; 1425 } 1426 break; 1427 } 1428 1429 if (!found_exclbind && 1430 (udp->udp_reuseaddr && requested_port != 0)) { 1431 break; 1432 } 1433 1434 if (udp1 == NULL) { 1435 /* 1436 * No other stream has this IP address 1437 * and port number. We can use it. 1438 */ 1439 break; 1440 } 1441 mutex_exit(&udpf->uf_lock); 1442 if (bind_to_req_port_only) { 1443 /* 1444 * We get here only when requested port 1445 * is bound (and only first of the for() 1446 * loop iteration). 1447 * 1448 * The semantics of this bind request 1449 * require it to fail so we return from 1450 * the routine (and exit the loop). 1451 * 1452 */ 1453 udp_err_ack(q, mp, TADDRBUSY, 0); 1454 return; 1455 } 1456 1457 if (udp->udp_anon_priv_bind) { 1458 port = udp_get_next_priv_port(udp); 1459 } else { 1460 if ((count == 0) && (requested_port != 0)) { 1461 /* 1462 * If the application wants us to find 1463 * a port, get one to start with. Set 1464 * requested_port to 0, so that we will 1465 * update udp_g_next_port_to_try below. 1466 */ 1467 port = udp_update_next_port(udp, 1468 udp_g_next_port_to_try, B_TRUE); 1469 requested_port = 0; 1470 } else { 1471 port = udp_update_next_port(udp, port + 1, 1472 B_FALSE); 1473 } 1474 } 1475 1476 if (port == 0 || ++count >= loopmax) { 1477 /* 1478 * We've tried every possible port number and 1479 * there are none available, so send an error 1480 * to the user. 1481 */ 1482 udp_err_ack(q, mp, TNOADDR, 0); 1483 return; 1484 } 1485 } 1486 1487 /* 1488 * Copy the source address into our udp structure. This address 1489 * may still be zero; if so, ip will fill in the correct address 1490 * each time an outbound packet is passed to it. 1491 * If we are binding to a broadcast or multicast address udp_rput 1492 * will clear the source address when it receives the T_BIND_ACK. 1493 */ 1494 udp->udp_v6src = udp->udp_bound_v6src = v6src; 1495 udp->udp_port = lport; 1496 /* 1497 * Now reset the the next anonymous port if the application requested 1498 * an anonymous port, or we handed out the next anonymous port. 1499 */ 1500 if ((requested_port == 0) && (!udp->udp_anon_priv_bind)) { 1501 udp_g_next_port_to_try = port + 1; 1502 } 1503 1504 /* Initialize the O_T_BIND_REQ/T_BIND_REQ for ip. */ 1505 if (udp->udp_family == AF_INET) { 1506 sin->sin_port = udp->udp_port; 1507 } else { 1508 int error; 1509 1510 sin6->sin6_port = udp->udp_port; 1511 /* Rebuild the header template */ 1512 error = udp_build_hdrs(q, udp); 1513 if (error != 0) { 1514 mutex_exit(&udpf->uf_lock); 1515 udp_err_ack(q, mp, TSYSERR, error); 1516 return; 1517 } 1518 } 1519 udp->udp_state = TS_IDLE; 1520 udp_bind_hash_insert(udpf, udp); 1521 mutex_exit(&udpf->uf_lock); 1522 1523 if (cl_inet_bind) { 1524 /* 1525 * Running in cluster mode - register bind information 1526 */ 1527 if (udp->udp_ipversion == IPV4_VERSION) { 1528 (*cl_inet_bind)(IPPROTO_UDP, AF_INET, 1529 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 1530 (in_port_t)udp->udp_port); 1531 } else { 1532 (*cl_inet_bind)(IPPROTO_UDP, AF_INET6, 1533 (uint8_t *)&(udp->udp_v6src), 1534 (in_port_t)udp->udp_port); 1535 } 1536 1537 } 1538 1539 connp->conn_anon_port = (is_system_labeled() && requested_port == 0); 1540 if (is_system_labeled() && (!connp->conn_anon_port || 1541 connp->conn_anon_mlp)) { 1542 uint16_t mlpport; 1543 cred_t *cr = connp->conn_cred; 1544 zone_t *zone; 1545 1546 connp->conn_mlp_type = udp->udp_recvucred ? mlptBoth : 1547 mlptSingle; 1548 addrtype = tsol_mlp_addr_type(zoneid, IPV6_VERSION, &v6src); 1549 if (addrtype == mlptSingle) { 1550 udp_err_ack(q, mp, TNOADDR, 0); 1551 connp->conn_anon_port = B_FALSE; 1552 connp->conn_mlp_type = mlptSingle; 1553 return; 1554 } 1555 mlpport = connp->conn_anon_port ? PMAPPORT : port; 1556 zone = crgetzone(cr); 1557 mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport, 1558 addrtype); 1559 if (mlptype != mlptSingle && 1560 (connp->conn_mlp_type == mlptSingle || 1561 secpolicy_net_bindmlp(cr) != 0)) { 1562 if (udp->udp_debug) { 1563 (void) strlog(UDP_MOD_ID, 0, 1, 1564 SL_ERROR|SL_TRACE, 1565 "udp_bind: no priv for multilevel port %d", 1566 mlpport); 1567 } 1568 udp_err_ack(q, mp, TACCES, 0); 1569 connp->conn_anon_port = B_FALSE; 1570 connp->conn_mlp_type = mlptSingle; 1571 return; 1572 } 1573 1574 /* 1575 * If we're specifically binding a shared IP address and the 1576 * port is MLP on shared addresses, then check to see if this 1577 * zone actually owns the MLP. Reject if not. 1578 */ 1579 if (mlptype == mlptShared && addrtype == mlptShared) { 1580 zoneid_t mlpzone; 1581 1582 mlpzone = tsol_mlp_findzone(IPPROTO_UDP, 1583 htons(mlpport)); 1584 if (connp->conn_zoneid != mlpzone) { 1585 if (udp->udp_debug) { 1586 (void) strlog(UDP_MOD_ID, 0, 1, 1587 SL_ERROR|SL_TRACE, 1588 "udp_bind: attempt to bind port " 1589 "%d on shared addr in zone %d " 1590 "(should be %d)", 1591 mlpport, connp->conn_zoneid, 1592 mlpzone); 1593 } 1594 udp_err_ack(q, mp, TACCES, 0); 1595 connp->conn_anon_port = B_FALSE; 1596 connp->conn_mlp_type = mlptSingle; 1597 return; 1598 } 1599 } 1600 if (connp->conn_anon_port) { 1601 int error; 1602 1603 error = tsol_mlp_anon(zone, mlptype, connp->conn_ulp, 1604 port, B_TRUE); 1605 if (error != 0) { 1606 if (udp->udp_debug) { 1607 (void) strlog(UDP_MOD_ID, 0, 1, 1608 SL_ERROR|SL_TRACE, 1609 "udp_bind: cannot establish anon " 1610 "MLP for port %d", port); 1611 } 1612 udp_err_ack(q, mp, TACCES, 0); 1613 connp->conn_anon_port = B_FALSE; 1614 connp->conn_mlp_type = mlptSingle; 1615 return; 1616 } 1617 } 1618 connp->conn_mlp_type = mlptype; 1619 } 1620 1621 /* Pass the protocol number in the message following the address. */ 1622 *mp->b_wptr++ = IPPROTO_UDP; 1623 if (!V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 1624 /* 1625 * Append a request for an IRE if udp_v6src not 1626 * zero (IPv4 - INADDR_ANY, or IPv6 - all-zeroes address). 1627 */ 1628 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 1629 if (!mp->b_cont) { 1630 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1631 return; 1632 } 1633 mp->b_cont->b_wptr += sizeof (ire_t); 1634 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 1635 } 1636 if (udp->udp_family == AF_INET6) 1637 mp = ip_bind_v6(q, mp, connp, NULL); 1638 else 1639 mp = ip_bind_v4(q, mp, connp); 1640 1641 if (mp != NULL) 1642 udp_rput_other(_RD(q), mp); 1643 else 1644 CONN_INC_REF(connp); 1645 } 1646 1647 1648 void 1649 udp_resume_bind(conn_t *connp, mblk_t *mp) 1650 { 1651 udp_enter(connp, mp, udp_resume_bind_cb, SQTAG_BIND_RETRY); 1652 } 1653 1654 /* 1655 * This is called from ip_wput_nondata to resume a deferred UDP bind. 1656 */ 1657 /* ARGSUSED */ 1658 static void 1659 udp_resume_bind_cb(void *arg, mblk_t *mp, void *arg2) 1660 { 1661 conn_t *connp = arg; 1662 1663 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1664 1665 udp_rput_other(connp->conn_rq, mp); 1666 1667 CONN_OPER_PENDING_DONE(connp); 1668 udp_exit(connp); 1669 } 1670 1671 /* 1672 * This routine handles each T_CONN_REQ message passed to udp. It 1673 * associates a default destination address with the stream. 1674 * 1675 * This routine sends down a T_BIND_REQ to IP with the following mblks: 1676 * T_BIND_REQ - specifying local and remote address/port 1677 * IRE_DB_REQ_TYPE - to get an IRE back containing ire_type and src 1678 * T_OK_ACK - for the T_CONN_REQ 1679 * T_CONN_CON - to keep the TPI user happy 1680 * 1681 * The connect completes in udp_rput. 1682 * When a T_BIND_ACK is received information is extracted from the IRE 1683 * and the two appended messages are sent to the TPI user. 1684 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 1685 * it to an error ack for the appropriate primitive. 1686 */ 1687 static void 1688 udp_connect(queue_t *q, mblk_t *mp) 1689 { 1690 sin6_t *sin6; 1691 sin_t *sin; 1692 struct T_conn_req *tcr; 1693 in6_addr_t v6dst; 1694 ipaddr_t v4dst; 1695 uint16_t dstport; 1696 uint32_t flowinfo; 1697 mblk_t *mp1, *mp2; 1698 udp_fanout_t *udpf; 1699 udp_t *udp, *udp1; 1700 1701 udp = Q_TO_UDP(q); 1702 1703 tcr = (struct T_conn_req *)mp->b_rptr; 1704 1705 /* A bit of sanity checking */ 1706 if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) { 1707 udp_err_ack(q, mp, TPROTO, 0); 1708 return; 1709 } 1710 /* 1711 * This UDP must have bound to a port already before doing 1712 * a connect. 1713 */ 1714 if (udp->udp_state == TS_UNBND) { 1715 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 1716 "udp_connect: bad state, %u", udp->udp_state); 1717 udp_err_ack(q, mp, TOUTSTATE, 0); 1718 return; 1719 } 1720 ASSERT(udp->udp_port != 0 && udp->udp_ptpbhn != NULL); 1721 1722 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 1723 1724 if (udp->udp_state == TS_DATA_XFER) { 1725 /* Already connected - clear out state */ 1726 mutex_enter(&udpf->uf_lock); 1727 udp->udp_v6src = udp->udp_bound_v6src; 1728 udp->udp_state = TS_IDLE; 1729 mutex_exit(&udpf->uf_lock); 1730 } 1731 1732 if (tcr->OPT_length != 0) { 1733 udp_err_ack(q, mp, TBADOPT, 0); 1734 return; 1735 } 1736 1737 /* 1738 * Determine packet type based on type of address passed in 1739 * the request should contain an IPv4 or IPv6 address. 1740 * Make sure that address family matches the type of 1741 * family of the the address passed down 1742 */ 1743 switch (tcr->DEST_length) { 1744 default: 1745 udp_err_ack(q, mp, TBADADDR, 0); 1746 return; 1747 1748 case sizeof (sin_t): 1749 sin = (sin_t *)mi_offset_param(mp, tcr->DEST_offset, 1750 sizeof (sin_t)); 1751 if (sin == NULL || !OK_32PTR((char *)sin)) { 1752 udp_err_ack(q, mp, TSYSERR, EINVAL); 1753 return; 1754 } 1755 if (udp->udp_family != AF_INET || 1756 sin->sin_family != AF_INET) { 1757 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1758 return; 1759 } 1760 v4dst = sin->sin_addr.s_addr; 1761 dstport = sin->sin_port; 1762 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1763 ASSERT(udp->udp_ipversion == IPV4_VERSION); 1764 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + 1765 udp->udp_ip_snd_options_len; 1766 break; 1767 1768 case sizeof (sin6_t): 1769 sin6 = (sin6_t *)mi_offset_param(mp, tcr->DEST_offset, 1770 sizeof (sin6_t)); 1771 if (sin6 == NULL || !OK_32PTR((char *)sin6)) { 1772 udp_err_ack(q, mp, TSYSERR, EINVAL); 1773 return; 1774 } 1775 if (udp->udp_family != AF_INET6 || 1776 sin6->sin6_family != AF_INET6) { 1777 udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT); 1778 return; 1779 } 1780 v6dst = sin6->sin6_addr; 1781 if (IN6_IS_ADDR_V4MAPPED(&v6dst)) { 1782 IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst); 1783 udp->udp_ipversion = IPV4_VERSION; 1784 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 1785 UDPH_SIZE + udp->udp_ip_snd_options_len; 1786 flowinfo = 0; 1787 } else { 1788 udp->udp_ipversion = IPV6_VERSION; 1789 udp->udp_max_hdr_len = udp->udp_sticky_hdrs_len; 1790 flowinfo = sin6->sin6_flowinfo; 1791 } 1792 dstport = sin6->sin6_port; 1793 break; 1794 } 1795 if (dstport == 0) { 1796 udp_err_ack(q, mp, TBADADDR, 0); 1797 return; 1798 } 1799 1800 /* 1801 * Create a default IP header with no IP options. 1802 */ 1803 udp->udp_dstport = dstport; 1804 if (udp->udp_ipversion == IPV4_VERSION) { 1805 /* 1806 * Interpret a zero destination to mean loopback. 1807 * Update the T_CONN_REQ (sin/sin6) since it is used to 1808 * generate the T_CONN_CON. 1809 */ 1810 if (v4dst == INADDR_ANY) { 1811 v4dst = htonl(INADDR_LOOPBACK); 1812 IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst); 1813 if (udp->udp_family == AF_INET) { 1814 sin->sin_addr.s_addr = v4dst; 1815 } else { 1816 sin6->sin6_addr = v6dst; 1817 } 1818 } 1819 udp->udp_v6dst = v6dst; 1820 udp->udp_flowinfo = 0; 1821 1822 /* 1823 * If the destination address is multicast and 1824 * an outgoing multicast interface has been set, 1825 * use the address of that interface as our 1826 * source address if no source address has been set. 1827 */ 1828 if (V4_PART_OF_V6(udp->udp_v6src) == INADDR_ANY && 1829 CLASSD(v4dst) && 1830 udp->udp_multicast_if_addr != INADDR_ANY) { 1831 IN6_IPADDR_TO_V4MAPPED(udp->udp_multicast_if_addr, 1832 &udp->udp_v6src); 1833 } 1834 } else { 1835 ASSERT(udp->udp_ipversion == IPV6_VERSION); 1836 /* 1837 * Interpret a zero destination to mean loopback. 1838 * Update the T_CONN_REQ (sin/sin6) since it is used to 1839 * generate the T_CONN_CON. 1840 */ 1841 if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) { 1842 v6dst = ipv6_loopback; 1843 sin6->sin6_addr = v6dst; 1844 } 1845 udp->udp_v6dst = v6dst; 1846 udp->udp_flowinfo = flowinfo; 1847 /* 1848 * If the destination address is multicast and 1849 * an outgoing multicast interface has been set, 1850 * then the ip bind logic will pick the correct source 1851 * address (i.e. matching the outgoing multicast interface). 1852 */ 1853 } 1854 1855 /* 1856 * Verify that the src/port/dst/port is unique for all 1857 * connections in TS_DATA_XFER 1858 */ 1859 mutex_enter(&udpf->uf_lock); 1860 for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) { 1861 if (udp1->udp_state != TS_DATA_XFER) 1862 continue; 1863 if (udp->udp_port != udp1->udp_port || 1864 udp->udp_ipversion != udp1->udp_ipversion || 1865 dstport != udp1->udp_dstport || 1866 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6src, &udp1->udp_v6src) || 1867 !IN6_ARE_ADDR_EQUAL(&v6dst, &udp1->udp_v6dst)) 1868 continue; 1869 mutex_exit(&udpf->uf_lock); 1870 udp_err_ack(q, mp, TBADADDR, 0); 1871 return; 1872 } 1873 udp->udp_state = TS_DATA_XFER; 1874 mutex_exit(&udpf->uf_lock); 1875 1876 /* 1877 * Send down bind to IP to verify that there is a route 1878 * and to determine the source address. 1879 * This will come back as T_BIND_ACK with an IRE_DB_TYPE in rput. 1880 */ 1881 if (udp->udp_family == AF_INET) 1882 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa_conn_t)); 1883 else 1884 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (ipa6_conn_t)); 1885 if (mp1 == NULL) { 1886 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1887 bind_failed: 1888 mutex_enter(&udpf->uf_lock); 1889 udp->udp_state = TS_IDLE; 1890 mutex_exit(&udpf->uf_lock); 1891 return; 1892 } 1893 1894 /* 1895 * We also have to send a connection confirmation to 1896 * keep TLI happy. Prepare it for udp_rput. 1897 */ 1898 if (udp->udp_family == AF_INET) 1899 mp2 = mi_tpi_conn_con(NULL, (char *)sin, 1900 sizeof (*sin), NULL, 0); 1901 else 1902 mp2 = mi_tpi_conn_con(NULL, (char *)sin6, 1903 sizeof (*sin6), NULL, 0); 1904 if (mp2 == NULL) { 1905 freemsg(mp1); 1906 udp_err_ack(q, mp, TSYSERR, ENOMEM); 1907 goto bind_failed; 1908 } 1909 1910 mp = mi_tpi_ok_ack_alloc(mp); 1911 if (mp == NULL) { 1912 /* Unable to reuse the T_CONN_REQ for the ack. */ 1913 freemsg(mp2); 1914 udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM); 1915 goto bind_failed; 1916 } 1917 1918 /* Hang onto the T_OK_ACK and T_CONN_CON for later. */ 1919 linkb(mp1, mp); 1920 linkb(mp1, mp2); 1921 1922 mblk_setcred(mp1, udp->udp_connp->conn_cred); 1923 if (udp->udp_family == AF_INET) 1924 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 1925 else 1926 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 1927 1928 if (mp1 != NULL) 1929 udp_rput_other(_RD(q), mp1); 1930 else 1931 CONN_INC_REF(udp->udp_connp); 1932 } 1933 1934 static int 1935 udp_close(queue_t *q) 1936 { 1937 conn_t *connp = Q_TO_CONN(UDP_WR(q)); 1938 udp_t *udp; 1939 queue_t *ip_rq = RD(UDP_WR(q)); 1940 1941 ASSERT(connp != NULL && IPCL_IS_UDP(connp)); 1942 udp = connp->conn_udp; 1943 1944 ip_quiesce_conn(connp); 1945 /* 1946 * Disable read-side synchronous stream 1947 * interface and drain any queued data. 1948 */ 1949 udp_rcv_drain(q, udp, B_TRUE); 1950 ASSERT(!udp->udp_direct_sockfs); 1951 1952 qprocsoff(q); 1953 1954 /* restore IP module's high and low water marks to default values */ 1955 ip_rq->q_hiwat = ip_rq->q_qinfo->qi_minfo->mi_hiwat; 1956 WR(ip_rq)->q_hiwat = WR(ip_rq)->q_qinfo->qi_minfo->mi_hiwat; 1957 WR(ip_rq)->q_lowat = WR(ip_rq)->q_qinfo->qi_minfo->mi_lowat; 1958 1959 ASSERT(udp->udp_rcv_cnt == 0); 1960 ASSERT(udp->udp_rcv_msgcnt == 0); 1961 ASSERT(udp->udp_rcv_list_head == NULL); 1962 ASSERT(udp->udp_rcv_list_tail == NULL); 1963 1964 udp_close_free(connp); 1965 1966 /* 1967 * Restore connp as an IP endpoint. 1968 * Locking required to prevent a race with udp_snmp_get()/ 1969 * ipcl_get_next_conn(), which selects conn_t which are 1970 * IPCL_UDP and not CONN_CONDEMNED. 1971 */ 1972 mutex_enter(&connp->conn_lock); 1973 connp->conn_flags &= ~IPCL_UDP; 1974 connp->conn_state_flags &= 1975 ~(CONN_CLOSING | CONN_CONDEMNED | CONN_QUIESCED); 1976 connp->conn_ulp_labeled = B_FALSE; 1977 mutex_exit(&connp->conn_lock); 1978 1979 return (0); 1980 } 1981 1982 /* 1983 * Called in the close path from IP (ip_quiesce_conn) to quiesce the conn 1984 */ 1985 void 1986 udp_quiesce_conn(conn_t *connp) 1987 { 1988 udp_t *udp = connp->conn_udp; 1989 1990 if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) { 1991 /* 1992 * Running in cluster mode - register unbind information 1993 */ 1994 if (udp->udp_ipversion == IPV4_VERSION) { 1995 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 1996 (uint8_t *)(&(V4_PART_OF_V6(udp->udp_v6src))), 1997 (in_port_t)udp->udp_port); 1998 } else { 1999 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 2000 (uint8_t *)(&(udp->udp_v6src)), 2001 (in_port_t)udp->udp_port); 2002 } 2003 } 2004 2005 udp_bind_hash_remove(udp, B_FALSE); 2006 2007 mutex_enter(&connp->conn_lock); 2008 while (udp->udp_reader_count != 0 || udp->udp_squeue_count != 0 || 2009 udp->udp_mode != UDP_MT_HOT) { 2010 cv_wait(&connp->conn_cv, &connp->conn_lock); 2011 } 2012 mutex_exit(&connp->conn_lock); 2013 } 2014 2015 void 2016 udp_close_free(conn_t *connp) 2017 { 2018 udp_t *udp = connp->conn_udp; 2019 2020 /* If there are any options associated with the stream, free them. */ 2021 if (udp->udp_ip_snd_options) { 2022 mi_free((char *)udp->udp_ip_snd_options); 2023 udp->udp_ip_snd_options = NULL; 2024 } 2025 2026 if (udp->udp_ip_rcv_options) { 2027 mi_free((char *)udp->udp_ip_rcv_options); 2028 udp->udp_ip_rcv_options = NULL; 2029 } 2030 2031 /* Free memory associated with sticky options */ 2032 if (udp->udp_sticky_hdrs_len != 0) { 2033 kmem_free(udp->udp_sticky_hdrs, 2034 udp->udp_sticky_hdrs_len); 2035 udp->udp_sticky_hdrs = NULL; 2036 udp->udp_sticky_hdrs_len = 0; 2037 } 2038 2039 ip6_pkt_free(&udp->udp_sticky_ipp); 2040 2041 udp->udp_connp = NULL; 2042 connp->conn_udp = NULL; 2043 kmem_cache_free(udp_cache, udp); 2044 } 2045 2046 /* 2047 * This routine handles each T_DISCON_REQ message passed to udp 2048 * as an indicating that UDP is no longer connected. This results 2049 * in sending a T_BIND_REQ to IP to restore the binding to just 2050 * the local address/port. 2051 * 2052 * This routine sends down a T_BIND_REQ to IP with the following mblks: 2053 * T_BIND_REQ - specifying just the local address/port 2054 * T_OK_ACK - for the T_DISCON_REQ 2055 * 2056 * The disconnect completes in udp_rput. 2057 * When a T_BIND_ACK is received the appended T_OK_ACK is sent to the TPI user. 2058 * Should udp_rput receive T_ERROR_ACK for the T_BIND_REQ it will convert 2059 * it to an error ack for the appropriate primitive. 2060 */ 2061 static void 2062 udp_disconnect(queue_t *q, mblk_t *mp) 2063 { 2064 udp_t *udp = Q_TO_UDP(q); 2065 mblk_t *mp1; 2066 udp_fanout_t *udpf; 2067 2068 if (udp->udp_state != TS_DATA_XFER) { 2069 (void) mi_strlog(q, 1, SL_ERROR|SL_TRACE, 2070 "udp_disconnect: bad state, %u", udp->udp_state); 2071 udp_err_ack(q, mp, TOUTSTATE, 0); 2072 return; 2073 } 2074 udpf = &udp_bind_fanout[UDP_BIND_HASH(udp->udp_port)]; 2075 mutex_enter(&udpf->uf_lock); 2076 udp->udp_v6src = udp->udp_bound_v6src; 2077 udp->udp_state = TS_IDLE; 2078 mutex_exit(&udpf->uf_lock); 2079 2080 /* 2081 * Send down bind to IP to remove the full binding and revert 2082 * to the local address binding. 2083 */ 2084 if (udp->udp_family == AF_INET) 2085 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin_t)); 2086 else 2087 mp1 = udp_ip_bind_mp(udp, O_T_BIND_REQ, sizeof (sin6_t)); 2088 if (mp1 == NULL) { 2089 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2090 return; 2091 } 2092 mp = mi_tpi_ok_ack_alloc(mp); 2093 if (mp == NULL) { 2094 /* Unable to reuse the T_DISCON_REQ for the ack. */ 2095 udp_err_ack_prim(q, mp1, T_DISCON_REQ, TSYSERR, ENOMEM); 2096 return; 2097 } 2098 2099 if (udp->udp_family == AF_INET6) { 2100 int error; 2101 2102 /* Rebuild the header template */ 2103 error = udp_build_hdrs(q, udp); 2104 if (error != 0) { 2105 udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, error); 2106 freemsg(mp1); 2107 return; 2108 } 2109 } 2110 mutex_enter(&udpf->uf_lock); 2111 udp->udp_discon_pending = 1; 2112 mutex_exit(&udpf->uf_lock); 2113 2114 /* Append the T_OK_ACK to the T_BIND_REQ for udp_rput */ 2115 linkb(mp1, mp); 2116 2117 if (udp->udp_family == AF_INET6) 2118 mp1 = ip_bind_v6(q, mp1, udp->udp_connp, NULL); 2119 else 2120 mp1 = ip_bind_v4(q, mp1, udp->udp_connp); 2121 2122 if (mp1 != NULL) 2123 udp_rput_other(_RD(q), mp1); 2124 else 2125 CONN_INC_REF(udp->udp_connp); 2126 } 2127 2128 /* This routine creates a T_ERROR_ACK message and passes it upstream. */ 2129 static void 2130 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error) 2131 { 2132 if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL) 2133 putnext(UDP_RD(q), mp); 2134 } 2135 2136 /* Shorthand to generate and send TPI error acks to our client */ 2137 static void 2138 udp_err_ack_prim(queue_t *q, mblk_t *mp, int primitive, t_scalar_t t_error, 2139 int sys_error) 2140 { 2141 struct T_error_ack *teackp; 2142 2143 if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack), 2144 M_PCPROTO, T_ERROR_ACK)) != NULL) { 2145 teackp = (struct T_error_ack *)mp->b_rptr; 2146 teackp->ERROR_prim = primitive; 2147 teackp->TLI_error = t_error; 2148 teackp->UNIX_error = sys_error; 2149 putnext(UDP_RD(q), mp); 2150 } 2151 } 2152 2153 /*ARGSUSED*/ 2154 static int 2155 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 2156 { 2157 int i; 2158 2159 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2160 if (udp_g_epriv_ports[i] != 0) 2161 (void) mi_mpprintf(mp, "%d ", udp_g_epriv_ports[i]); 2162 } 2163 return (0); 2164 } 2165 2166 /* ARGSUSED */ 2167 static int 2168 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2169 cred_t *cr) 2170 { 2171 long new_value; 2172 int i; 2173 2174 /* 2175 * Fail the request if the new value does not lie within the 2176 * port number limits. 2177 */ 2178 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2179 new_value <= 0 || new_value >= 65536) { 2180 return (EINVAL); 2181 } 2182 2183 /* Check if the value is already in the list */ 2184 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2185 if (new_value == udp_g_epriv_ports[i]) { 2186 return (EEXIST); 2187 } 2188 } 2189 /* Find an empty slot */ 2190 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2191 if (udp_g_epriv_ports[i] == 0) 2192 break; 2193 } 2194 if (i == udp_g_num_epriv_ports) { 2195 return (EOVERFLOW); 2196 } 2197 2198 /* Set the new value */ 2199 udp_g_epriv_ports[i] = (in_port_t)new_value; 2200 return (0); 2201 } 2202 2203 /* ARGSUSED */ 2204 static int 2205 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp, 2206 cred_t *cr) 2207 { 2208 long new_value; 2209 int i; 2210 2211 /* 2212 * Fail the request if the new value does not lie within the 2213 * port number limits. 2214 */ 2215 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 2216 new_value <= 0 || new_value >= 65536) { 2217 return (EINVAL); 2218 } 2219 2220 /* Check that the value is already in the list */ 2221 for (i = 0; i < udp_g_num_epriv_ports; i++) { 2222 if (udp_g_epriv_ports[i] == new_value) 2223 break; 2224 } 2225 if (i == udp_g_num_epriv_ports) { 2226 return (ESRCH); 2227 } 2228 2229 /* Clear the value */ 2230 udp_g_epriv_ports[i] = 0; 2231 return (0); 2232 } 2233 2234 /* At minimum we need 4 bytes of UDP header */ 2235 #define ICMP_MIN_UDP_HDR 4 2236 2237 /* 2238 * udp_icmp_error is called by udp_rput to process ICMP msgs. passed up by IP. 2239 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2240 * Assumes that IP has pulled up everything up to and including the ICMP header. 2241 * An M_CTL could potentially come here from some other module (i.e. if UDP 2242 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2243 * does not have enough ICMP information , following STREAMS conventions, 2244 * we send it upstream assuming it is an M_CTL we don't understand. 2245 */ 2246 static void 2247 udp_icmp_error(queue_t *q, mblk_t *mp) 2248 { 2249 icmph_t *icmph; 2250 ipha_t *ipha; 2251 int iph_hdr_length; 2252 udpha_t *udpha; 2253 sin_t sin; 2254 sin6_t sin6; 2255 mblk_t *mp1; 2256 int error = 0; 2257 size_t mp_size = MBLKL(mp); 2258 udp_t *udp = Q_TO_UDP(q); 2259 2260 /* 2261 * Assume IP provides aligned packets - otherwise toss 2262 */ 2263 if (!OK_32PTR(mp->b_rptr)) { 2264 freemsg(mp); 2265 return; 2266 } 2267 2268 /* 2269 * Verify that we have a complete IP header and the application has 2270 * asked for errors. If not, send it upstream. 2271 */ 2272 if (!udp->udp_dgram_errind || mp_size < sizeof (ipha_t)) { 2273 noticmpv4: 2274 putnext(UDP_RD(q), mp); 2275 return; 2276 } 2277 2278 ipha = (ipha_t *)mp->b_rptr; 2279 /* 2280 * Verify IP version. Anything other than IPv4 or IPv6 packet is sent 2281 * upstream. ICMPv6 is handled in udp_icmp_error_ipv6. 2282 */ 2283 switch (IPH_HDR_VERSION(ipha)) { 2284 case IPV6_VERSION: 2285 udp_icmp_error_ipv6(q, mp); 2286 return; 2287 case IPV4_VERSION: 2288 break; 2289 default: 2290 goto noticmpv4; 2291 } 2292 2293 /* Skip past the outer IP and ICMP headers */ 2294 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2295 icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length]; 2296 /* 2297 * If we don't have the correct outer IP header length or if the ULP 2298 * is not IPPROTO_ICMP or if we don't have a complete inner IP header 2299 * send the packet upstream. 2300 */ 2301 if (iph_hdr_length < sizeof (ipha_t) || 2302 ipha->ipha_protocol != IPPROTO_ICMP || 2303 (ipha_t *)&icmph[1] + 1 > (ipha_t *)mp->b_wptr) { 2304 goto noticmpv4; 2305 } 2306 ipha = (ipha_t *)&icmph[1]; 2307 2308 /* Skip past the inner IP and find the ULP header */ 2309 iph_hdr_length = IPH_HDR_LENGTH(ipha); 2310 udpha = (udpha_t *)((char *)ipha + iph_hdr_length); 2311 /* 2312 * If we don't have the correct inner IP header length or if the ULP 2313 * is not IPPROTO_UDP or if we don't have at least ICMP_MIN_UDP_HDR 2314 * bytes of UDP header, send it upstream. 2315 */ 2316 if (iph_hdr_length < sizeof (ipha_t) || 2317 ipha->ipha_protocol != IPPROTO_UDP || 2318 (uchar_t *)udpha + ICMP_MIN_UDP_HDR > mp->b_wptr) { 2319 goto noticmpv4; 2320 } 2321 2322 switch (icmph->icmph_type) { 2323 case ICMP_DEST_UNREACHABLE: 2324 switch (icmph->icmph_code) { 2325 case ICMP_FRAGMENTATION_NEEDED: 2326 /* 2327 * IP has already adjusted the path MTU. 2328 * XXX Somehow pass MTU indication to application? 2329 */ 2330 break; 2331 case ICMP_PORT_UNREACHABLE: 2332 case ICMP_PROTOCOL_UNREACHABLE: 2333 error = ECONNREFUSED; 2334 break; 2335 default: 2336 /* Transient errors */ 2337 break; 2338 } 2339 break; 2340 default: 2341 /* Transient errors */ 2342 break; 2343 } 2344 if (error == 0) { 2345 freemsg(mp); 2346 return; 2347 } 2348 2349 switch (udp->udp_family) { 2350 case AF_INET: 2351 sin = sin_null; 2352 sin.sin_family = AF_INET; 2353 sin.sin_addr.s_addr = ipha->ipha_dst; 2354 sin.sin_port = udpha->uha_dst_port; 2355 mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t), NULL, 0, 2356 error); 2357 break; 2358 case AF_INET6: 2359 sin6 = sin6_null; 2360 sin6.sin6_family = AF_INET6; 2361 IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr); 2362 sin6.sin6_port = udpha->uha_dst_port; 2363 2364 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), 2365 NULL, 0, error); 2366 break; 2367 } 2368 if (mp1) 2369 putnext(UDP_RD(q), mp1); 2370 freemsg(mp); 2371 } 2372 2373 /* 2374 * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6. 2375 * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors. 2376 * Assumes that IP has pulled up all the extension headers as well as the 2377 * ICMPv6 header. 2378 * An M_CTL could potentially come here from some other module (i.e. if UDP 2379 * is pushed on some module other than IP). Thus, if we find that the M_CTL 2380 * does not have enough ICMP information , following STREAMS conventions, 2381 * we send it upstream assuming it is an M_CTL we don't understand. The reason 2382 * it might get here is if the non-ICMP M_CTL accidently has 6 in the version 2383 * field (when cast to ipha_t in udp_icmp_error). 2384 */ 2385 static void 2386 udp_icmp_error_ipv6(queue_t *q, mblk_t *mp) 2387 { 2388 icmp6_t *icmp6; 2389 ip6_t *ip6h, *outer_ip6h; 2390 uint16_t hdr_length; 2391 uint8_t *nexthdrp; 2392 udpha_t *udpha; 2393 sin6_t sin6; 2394 mblk_t *mp1; 2395 int error = 0; 2396 size_t mp_size = MBLKL(mp); 2397 udp_t *udp = Q_TO_UDP(q); 2398 2399 /* 2400 * Verify that we have a complete IP header. If not, send it upstream. 2401 */ 2402 if (mp_size < sizeof (ip6_t)) { 2403 noticmpv6: 2404 putnext(UDP_RD(q), mp); 2405 return; 2406 } 2407 2408 outer_ip6h = (ip6_t *)mp->b_rptr; 2409 /* 2410 * Verify this is an ICMPV6 packet, else send it upstream 2411 */ 2412 if (outer_ip6h->ip6_nxt == IPPROTO_ICMPV6) { 2413 hdr_length = IPV6_HDR_LEN; 2414 } else if (!ip_hdr_length_nexthdr_v6(mp, outer_ip6h, &hdr_length, 2415 &nexthdrp) || 2416 *nexthdrp != IPPROTO_ICMPV6) { 2417 goto noticmpv6; 2418 } 2419 icmp6 = (icmp6_t *)&mp->b_rptr[hdr_length]; 2420 ip6h = (ip6_t *)&icmp6[1]; 2421 /* 2422 * Verify we have a complete ICMP and inner IP header. 2423 */ 2424 if ((uchar_t *)&ip6h[1] > mp->b_wptr) 2425 goto noticmpv6; 2426 2427 if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &hdr_length, &nexthdrp)) 2428 goto noticmpv6; 2429 udpha = (udpha_t *)((char *)ip6h + hdr_length); 2430 /* 2431 * Validate inner header. If the ULP is not IPPROTO_UDP or if we don't 2432 * have at least ICMP_MIN_UDP_HDR bytes of UDP header send the 2433 * packet upstream. 2434 */ 2435 if ((*nexthdrp != IPPROTO_UDP) || 2436 ((uchar_t *)udpha + ICMP_MIN_UDP_HDR) > mp->b_wptr) { 2437 goto noticmpv6; 2438 } 2439 2440 switch (icmp6->icmp6_type) { 2441 case ICMP6_DST_UNREACH: 2442 switch (icmp6->icmp6_code) { 2443 case ICMP6_DST_UNREACH_NOPORT: 2444 error = ECONNREFUSED; 2445 break; 2446 case ICMP6_DST_UNREACH_ADMIN: 2447 case ICMP6_DST_UNREACH_NOROUTE: 2448 case ICMP6_DST_UNREACH_BEYONDSCOPE: 2449 case ICMP6_DST_UNREACH_ADDR: 2450 /* Transient errors */ 2451 break; 2452 default: 2453 break; 2454 } 2455 break; 2456 case ICMP6_PACKET_TOO_BIG: { 2457 struct T_unitdata_ind *tudi; 2458 struct T_opthdr *toh; 2459 size_t udi_size; 2460 mblk_t *newmp; 2461 t_scalar_t opt_length = sizeof (struct T_opthdr) + 2462 sizeof (struct ip6_mtuinfo); 2463 sin6_t *sin6; 2464 struct ip6_mtuinfo *mtuinfo; 2465 2466 /* 2467 * If the application has requested to receive path mtu 2468 * information, send up an empty message containing an 2469 * IPV6_PATHMTU ancillary data item. 2470 */ 2471 if (!udp->udp_ipv6_recvpathmtu) 2472 break; 2473 2474 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) + 2475 opt_length; 2476 if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) { 2477 BUMP_MIB(&udp_mib, udpInErrors); 2478 break; 2479 } 2480 2481 /* 2482 * newmp->b_cont is left to NULL on purpose. This is an 2483 * empty message containing only ancillary data. 2484 */ 2485 newmp->b_datap->db_type = M_PROTO; 2486 tudi = (struct T_unitdata_ind *)newmp->b_rptr; 2487 newmp->b_wptr = (uchar_t *)tudi + udi_size; 2488 tudi->PRIM_type = T_UNITDATA_IND; 2489 tudi->SRC_length = sizeof (sin6_t); 2490 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 2491 tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t); 2492 tudi->OPT_length = opt_length; 2493 2494 sin6 = (sin6_t *)&tudi[1]; 2495 bzero(sin6, sizeof (sin6_t)); 2496 sin6->sin6_family = AF_INET6; 2497 sin6->sin6_addr = udp->udp_v6dst; 2498 2499 toh = (struct T_opthdr *)&sin6[1]; 2500 toh->level = IPPROTO_IPV6; 2501 toh->name = IPV6_PATHMTU; 2502 toh->len = opt_length; 2503 toh->status = 0; 2504 2505 mtuinfo = (struct ip6_mtuinfo *)&toh[1]; 2506 bzero(mtuinfo, sizeof (struct ip6_mtuinfo)); 2507 mtuinfo->ip6m_addr.sin6_family = AF_INET6; 2508 mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst; 2509 mtuinfo->ip6m_mtu = icmp6->icmp6_mtu; 2510 /* 2511 * We've consumed everything we need from the original 2512 * message. Free it, then send our empty message. 2513 */ 2514 freemsg(mp); 2515 putnext(UDP_RD(q), newmp); 2516 return; 2517 } 2518 case ICMP6_TIME_EXCEEDED: 2519 /* Transient errors */ 2520 break; 2521 case ICMP6_PARAM_PROB: 2522 /* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */ 2523 if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER && 2524 (uchar_t *)ip6h + icmp6->icmp6_pptr == 2525 (uchar_t *)nexthdrp) { 2526 error = ECONNREFUSED; 2527 break; 2528 } 2529 break; 2530 } 2531 if (error == 0) { 2532 freemsg(mp); 2533 return; 2534 } 2535 2536 sin6 = sin6_null; 2537 sin6.sin6_family = AF_INET6; 2538 sin6.sin6_addr = ip6h->ip6_dst; 2539 sin6.sin6_port = udpha->uha_dst_port; 2540 sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK; 2541 2542 mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t), NULL, 0, 2543 error); 2544 if (mp1) 2545 putnext(UDP_RD(q), mp1); 2546 freemsg(mp); 2547 } 2548 2549 /* 2550 * This routine responds to T_ADDR_REQ messages. It is called by udp_wput. 2551 * The local address is filled in if endpoint is bound. The remote address 2552 * is filled in if remote address has been precified ("connected endpoint") 2553 * (The concept of connected CLTS sockets is alien to published TPI 2554 * but we support it anyway). 2555 */ 2556 static void 2557 udp_addr_req(queue_t *q, mblk_t *mp) 2558 { 2559 sin_t *sin; 2560 sin6_t *sin6; 2561 mblk_t *ackmp; 2562 struct T_addr_ack *taa; 2563 udp_t *udp = Q_TO_UDP(q); 2564 2565 /* Make it large enough for worst case */ 2566 ackmp = reallocb(mp, sizeof (struct T_addr_ack) + 2567 2 * sizeof (sin6_t), 1); 2568 if (ackmp == NULL) { 2569 udp_err_ack(q, mp, TSYSERR, ENOMEM); 2570 return; 2571 } 2572 taa = (struct T_addr_ack *)ackmp->b_rptr; 2573 2574 bzero(taa, sizeof (struct T_addr_ack)); 2575 ackmp->b_wptr = (uchar_t *)&taa[1]; 2576 2577 taa->PRIM_type = T_ADDR_ACK; 2578 ackmp->b_datap->db_type = M_PCPROTO; 2579 /* 2580 * Note: Following code assumes 32 bit alignment of basic 2581 * data structures like sin_t and struct T_addr_ack. 2582 */ 2583 if (udp->udp_state != TS_UNBND) { 2584 /* 2585 * Fill in local address first 2586 */ 2587 taa->LOCADDR_offset = sizeof (*taa); 2588 if (udp->udp_family == AF_INET) { 2589 taa->LOCADDR_length = sizeof (sin_t); 2590 sin = (sin_t *)&taa[1]; 2591 /* Fill zeroes and then initialize non-zero fields */ 2592 *sin = sin_null; 2593 sin->sin_family = AF_INET; 2594 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 2595 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2596 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, 2597 sin->sin_addr.s_addr); 2598 } else { 2599 /* 2600 * INADDR_ANY 2601 * udp_v6src is not set, we might be bound to 2602 * broadcast/multicast. Use udp_bound_v6src as 2603 * local address instead (that could 2604 * also still be INADDR_ANY) 2605 */ 2606 IN6_V4MAPPED_TO_IPADDR(&udp->udp_bound_v6src, 2607 sin->sin_addr.s_addr); 2608 } 2609 sin->sin_port = udp->udp_port; 2610 ackmp->b_wptr = (uchar_t *)&sin[1]; 2611 if (udp->udp_state == TS_DATA_XFER) { 2612 /* 2613 * connected, fill remote address too 2614 */ 2615 taa->REMADDR_length = sizeof (sin_t); 2616 /* assumed 32-bit alignment */ 2617 taa->REMADDR_offset = taa->LOCADDR_offset + 2618 taa->LOCADDR_length; 2619 2620 sin = (sin_t *)(ackmp->b_rptr + 2621 taa->REMADDR_offset); 2622 /* initialize */ 2623 *sin = sin_null; 2624 sin->sin_family = AF_INET; 2625 sin->sin_addr.s_addr = 2626 V4_PART_OF_V6(udp->udp_v6dst); 2627 sin->sin_port = udp->udp_dstport; 2628 ackmp->b_wptr = (uchar_t *)&sin[1]; 2629 } 2630 } else { 2631 taa->LOCADDR_length = sizeof (sin6_t); 2632 sin6 = (sin6_t *)&taa[1]; 2633 /* Fill zeroes and then initialize non-zero fields */ 2634 *sin6 = sin6_null; 2635 sin6->sin6_family = AF_INET6; 2636 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 2637 sin6->sin6_addr = udp->udp_v6src; 2638 } else { 2639 /* 2640 * UNSPECIFIED 2641 * udp_v6src is not set, we might be bound to 2642 * broadcast/multicast. Use udp_bound_v6src as 2643 * local address instead (that could 2644 * also still be UNSPECIFIED) 2645 */ 2646 sin6->sin6_addr = 2647 udp->udp_bound_v6src; 2648 } 2649 sin6->sin6_port = udp->udp_port; 2650 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2651 if (udp->udp_state == TS_DATA_XFER) { 2652 /* 2653 * connected, fill remote address too 2654 */ 2655 taa->REMADDR_length = sizeof (sin6_t); 2656 /* assumed 32-bit alignment */ 2657 taa->REMADDR_offset = taa->LOCADDR_offset + 2658 taa->LOCADDR_length; 2659 2660 sin6 = (sin6_t *)(ackmp->b_rptr + 2661 taa->REMADDR_offset); 2662 /* initialize */ 2663 *sin6 = sin6_null; 2664 sin6->sin6_family = AF_INET6; 2665 sin6->sin6_addr = udp->udp_v6dst; 2666 sin6->sin6_port = udp->udp_dstport; 2667 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2668 } 2669 ackmp->b_wptr = (uchar_t *)&sin6[1]; 2670 } 2671 } 2672 ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim); 2673 putnext(UDP_RD(q), ackmp); 2674 } 2675 2676 static void 2677 udp_copy_info(struct T_info_ack *tap, udp_t *udp) 2678 { 2679 if (udp->udp_family == AF_INET) { 2680 *tap = udp_g_t_info_ack_ipv4; 2681 } else { 2682 *tap = udp_g_t_info_ack_ipv6; 2683 } 2684 tap->CURRENT_state = udp->udp_state; 2685 tap->OPT_size = udp_max_optsize; 2686 } 2687 2688 /* 2689 * This routine responds to T_CAPABILITY_REQ messages. It is called by 2690 * udp_wput. Much of the T_CAPABILITY_ACK information is copied from 2691 * udp_g_t_info_ack. The current state of the stream is copied from 2692 * udp_state. 2693 */ 2694 static void 2695 udp_capability_req(queue_t *q, mblk_t *mp) 2696 { 2697 t_uscalar_t cap_bits1; 2698 struct T_capability_ack *tcap; 2699 udp_t *udp = Q_TO_UDP(q); 2700 2701 cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1; 2702 2703 mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack), 2704 mp->b_datap->db_type, T_CAPABILITY_ACK); 2705 if (!mp) 2706 return; 2707 2708 tcap = (struct T_capability_ack *)mp->b_rptr; 2709 tcap->CAP_bits1 = 0; 2710 2711 if (cap_bits1 & TC1_INFO) { 2712 udp_copy_info(&tcap->INFO_ack, udp); 2713 tcap->CAP_bits1 |= TC1_INFO; 2714 } 2715 2716 putnext(UDP_RD(q), mp); 2717 } 2718 2719 /* 2720 * This routine responds to T_INFO_REQ messages. It is called by udp_wput. 2721 * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack. 2722 * The current state of the stream is copied from udp_state. 2723 */ 2724 static void 2725 udp_info_req(queue_t *q, mblk_t *mp) 2726 { 2727 udp_t *udp = Q_TO_UDP(q); 2728 2729 /* Create a T_INFO_ACK message. */ 2730 mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO, 2731 T_INFO_ACK); 2732 if (!mp) 2733 return; 2734 udp_copy_info((struct T_info_ack *)mp->b_rptr, udp); 2735 putnext(UDP_RD(q), mp); 2736 } 2737 2738 /* 2739 * IP recognizes seven kinds of bind requests: 2740 * 2741 * - A zero-length address binds only to the protocol number. 2742 * 2743 * - A 4-byte address is treated as a request to 2744 * validate that the address is a valid local IPv4 2745 * address, appropriate for an application to bind to. 2746 * IP does the verification, but does not make any note 2747 * of the address at this time. 2748 * 2749 * - A 16-byte address contains is treated as a request 2750 * to validate a local IPv6 address, as the 4-byte 2751 * address case above. 2752 * 2753 * - A 16-byte sockaddr_in to validate the local IPv4 address and also 2754 * use it for the inbound fanout of packets. 2755 * 2756 * - A 24-byte sockaddr_in6 to validate the local IPv6 address and also 2757 * use it for the inbound fanout of packets. 2758 * 2759 * - A 12-byte address (ipa_conn_t) containing complete IPv4 fanout 2760 * information consisting of local and remote addresses 2761 * and ports. In this case, the addresses are both 2762 * validated as appropriate for this operation, and, if 2763 * so, the information is retained for use in the 2764 * inbound fanout. 2765 * 2766 * - A 36-byte address address (ipa6_conn_t) containing complete IPv6 2767 * fanout information, like the 12-byte case above. 2768 * 2769 * IP will also fill in the IRE request mblk with information 2770 * regarding our peer. In all cases, we notify IP of our protocol 2771 * type by appending a single protocol byte to the bind request. 2772 */ 2773 static mblk_t * 2774 udp_ip_bind_mp(udp_t *udp, t_scalar_t bind_prim, t_scalar_t addr_length) 2775 { 2776 char *cp; 2777 mblk_t *mp; 2778 struct T_bind_req *tbr; 2779 ipa_conn_t *ac; 2780 ipa6_conn_t *ac6; 2781 sin_t *sin; 2782 sin6_t *sin6; 2783 2784 ASSERT(bind_prim == O_T_BIND_REQ || bind_prim == T_BIND_REQ); 2785 2786 mp = allocb(sizeof (*tbr) + addr_length + 1, BPRI_HI); 2787 if (!mp) 2788 return (mp); 2789 mp->b_datap->db_type = M_PROTO; 2790 tbr = (struct T_bind_req *)mp->b_rptr; 2791 tbr->PRIM_type = bind_prim; 2792 tbr->ADDR_offset = sizeof (*tbr); 2793 tbr->CONIND_number = 0; 2794 tbr->ADDR_length = addr_length; 2795 cp = (char *)&tbr[1]; 2796 switch (addr_length) { 2797 case sizeof (ipa_conn_t): 2798 ASSERT(udp->udp_family == AF_INET); 2799 /* Append a request for an IRE */ 2800 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2801 if (!mp->b_cont) { 2802 freemsg(mp); 2803 return (NULL); 2804 } 2805 mp->b_cont->b_wptr += sizeof (ire_t); 2806 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2807 2808 /* cp known to be 32 bit aligned */ 2809 ac = (ipa_conn_t *)cp; 2810 ac->ac_laddr = V4_PART_OF_V6(udp->udp_v6src); 2811 ac->ac_faddr = V4_PART_OF_V6(udp->udp_v6dst); 2812 ac->ac_fport = udp->udp_dstport; 2813 ac->ac_lport = udp->udp_port; 2814 break; 2815 2816 case sizeof (ipa6_conn_t): 2817 ASSERT(udp->udp_family == AF_INET6); 2818 /* Append a request for an IRE */ 2819 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2820 if (!mp->b_cont) { 2821 freemsg(mp); 2822 return (NULL); 2823 } 2824 mp->b_cont->b_wptr += sizeof (ire_t); 2825 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2826 2827 /* cp known to be 32 bit aligned */ 2828 ac6 = (ipa6_conn_t *)cp; 2829 ac6->ac6_laddr = udp->udp_v6src; 2830 ac6->ac6_faddr = udp->udp_v6dst; 2831 ac6->ac6_fport = udp->udp_dstport; 2832 ac6->ac6_lport = udp->udp_port; 2833 break; 2834 2835 case sizeof (sin_t): 2836 ASSERT(udp->udp_family == AF_INET); 2837 /* Append a request for an IRE */ 2838 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2839 if (!mp->b_cont) { 2840 freemsg(mp); 2841 return (NULL); 2842 } 2843 mp->b_cont->b_wptr += sizeof (ire_t); 2844 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2845 2846 sin = (sin_t *)cp; 2847 *sin = sin_null; 2848 sin->sin_family = AF_INET; 2849 sin->sin_addr.s_addr = V4_PART_OF_V6(udp->udp_bound_v6src); 2850 sin->sin_port = udp->udp_port; 2851 break; 2852 2853 case sizeof (sin6_t): 2854 ASSERT(udp->udp_family == AF_INET6); 2855 /* Append a request for an IRE */ 2856 mp->b_cont = allocb(sizeof (ire_t), BPRI_HI); 2857 if (!mp->b_cont) { 2858 freemsg(mp); 2859 return (NULL); 2860 } 2861 mp->b_cont->b_wptr += sizeof (ire_t); 2862 mp->b_cont->b_datap->db_type = IRE_DB_REQ_TYPE; 2863 2864 sin6 = (sin6_t *)cp; 2865 *sin6 = sin6_null; 2866 sin6->sin6_family = AF_INET6; 2867 sin6->sin6_addr = udp->udp_bound_v6src; 2868 sin6->sin6_port = udp->udp_port; 2869 break; 2870 } 2871 /* Add protocol number to end */ 2872 cp[addr_length] = (char)IPPROTO_UDP; 2873 mp->b_wptr = (uchar_t *)&cp[addr_length + 1]; 2874 return (mp); 2875 } 2876 2877 /* 2878 * This is the open routine for udp. It allocates a udp_t structure for 2879 * the stream and, on the first open of the module, creates an ND table. 2880 */ 2881 /* ARGSUSED */ 2882 static int 2883 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2884 { 2885 int err; 2886 udp_t *udp; 2887 conn_t *connp; 2888 zoneid_t zoneid = getzoneid(); 2889 queue_t *ip_wq; 2890 char *name; 2891 2892 TRACE_1(TR_FAC_UDP, TR_UDP_OPEN, "udp_open: q %p", q); 2893 2894 /* If the stream is already open, return immediately. */ 2895 if (q->q_ptr != NULL) 2896 return (0); 2897 2898 /* If this is not a push of udp as a module, fail. */ 2899 if (sflag != MODOPEN) 2900 return (EINVAL); 2901 2902 q->q_hiwat = udp_recv_hiwat; 2903 WR(q)->q_hiwat = udp_xmit_hiwat; 2904 WR(q)->q_lowat = udp_xmit_lowat; 2905 2906 /* Insert ourselves in the stream since we're about to walk q_next */ 2907 qprocson(q); 2908 2909 udp = kmem_cache_alloc(udp_cache, KM_SLEEP); 2910 bzero(udp, sizeof (*udp)); 2911 2912 /* 2913 * UDP is supported only as a module and it has to be pushed directly 2914 * above the device instance of IP. If UDP is pushed anywhere else 2915 * on a stream, it will support just T_SVR4_OPTMGMT_REQ for the 2916 * sake of MIB browsers and fail everything else. 2917 */ 2918 ip_wq = WR(q)->q_next; 2919 if (ip_wq->q_next != NULL || 2920 (name = ip_wq->q_qinfo->qi_minfo->mi_idname) == NULL || 2921 strcmp(name, IP_MOD_NAME) != 0 || 2922 ip_wq->q_qinfo->qi_minfo->mi_idnum != IP_MOD_ID) { 2923 /* Support just SNMP for MIB browsers */ 2924 connp = ipcl_conn_create(IPCL_IPCCONN, KM_SLEEP); 2925 connp->conn_rq = q; 2926 connp->conn_wq = WR(q); 2927 connp->conn_flags |= IPCL_UDPMOD; 2928 connp->conn_cred = credp; 2929 connp->conn_zoneid = zoneid; 2930 connp->conn_udp = udp; 2931 udp->udp_connp = connp; 2932 q->q_ptr = WR(q)->q_ptr = connp; 2933 crhold(credp); 2934 q->q_qinfo = &udp_snmp_rinit; 2935 WR(q)->q_qinfo = &udp_snmp_winit; 2936 return (0); 2937 } 2938 2939 /* 2940 * Initialize the udp_t structure for this stream. 2941 */ 2942 q = RD(ip_wq); 2943 connp = Q_TO_CONN(q); 2944 mutex_enter(&connp->conn_lock); 2945 connp->conn_proto = IPPROTO_UDP; 2946 connp->conn_flags |= IPCL_UDP; 2947 connp->conn_sqp = IP_SQUEUE_GET(lbolt); 2948 connp->conn_udp = udp; 2949 2950 /* Set the initial state of the stream and the privilege status. */ 2951 udp->udp_connp = connp; 2952 udp->udp_state = TS_UNBND; 2953 udp->udp_mode = UDP_MT_HOT; 2954 if (getmajor(*devp) == (major_t)UDP6_MAJ) { 2955 udp->udp_family = AF_INET6; 2956 udp->udp_ipversion = IPV6_VERSION; 2957 udp->udp_max_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 2958 udp->udp_ttl = udp_ipv6_hoplimit; 2959 connp->conn_af_isv6 = B_TRUE; 2960 connp->conn_flags |= IPCL_ISV6; 2961 } else { 2962 udp->udp_family = AF_INET; 2963 udp->udp_ipversion = IPV4_VERSION; 2964 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE; 2965 udp->udp_ttl = udp_ipv4_ttl; 2966 connp->conn_af_isv6 = B_FALSE; 2967 connp->conn_flags &= ~IPCL_ISV6; 2968 } 2969 2970 udp->udp_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 2971 connp->conn_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; 2972 connp->conn_zoneid = zoneid; 2973 2974 udp->udp_open_time = lbolt64; 2975 udp->udp_open_pid = curproc->p_pid; 2976 2977 /* 2978 * If the caller has the process-wide flag set, then default to MAC 2979 * exempt mode. This allows read-down to unlabeled hosts. 2980 */ 2981 if (getpflags(NET_MAC_AWARE, credp) != 0) 2982 udp->udp_mac_exempt = B_TRUE; 2983 2984 if (connp->conn_flags & IPCL_SOCKET) { 2985 udp->udp_issocket = B_TRUE; 2986 udp->udp_direct_sockfs = B_TRUE; 2987 } 2988 2989 connp->conn_ulp_labeled = is_system_labeled(); 2990 2991 mutex_exit(&connp->conn_lock); 2992 2993 /* 2994 * The transmit hiwat/lowat is only looked at on IP's queue. 2995 * Store in q_hiwat in order to return on SO_SNDBUF/SO_RCVBUF 2996 * getsockopts. 2997 */ 2998 q->q_hiwat = udp_recv_hiwat; 2999 WR(q)->q_hiwat = udp_xmit_hiwat; 3000 WR(q)->q_lowat = udp_xmit_lowat; 3001 3002 if (udp->udp_family == AF_INET6) { 3003 /* Build initial header template for transmit */ 3004 if ((err = udp_build_hdrs(q, udp)) != 0) { 3005 error: 3006 qprocsoff(UDP_RD(q)); 3007 udp->udp_connp = NULL; 3008 connp->conn_udp = NULL; 3009 kmem_cache_free(udp_cache, udp); 3010 return (err); 3011 } 3012 } 3013 3014 /* Set the Stream head write offset and high watermark. */ 3015 (void) mi_set_sth_wroff(UDP_RD(q), 3016 udp->udp_max_hdr_len + udp_wroff_extra); 3017 (void) mi_set_sth_hiwat(UDP_RD(q), udp_set_rcv_hiwat(udp, q->q_hiwat)); 3018 3019 WR(UDP_RD(q))->q_qinfo = &udp_winit; 3020 3021 return (0); 3022 } 3023 3024 /* 3025 * Which UDP options OK to set through T_UNITDATA_REQ... 3026 */ 3027 /* ARGSUSED */ 3028 static boolean_t 3029 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name) 3030 { 3031 return (B_TRUE); 3032 } 3033 3034 /* 3035 * This routine gets default values of certain options whose default 3036 * values are maintained by protcol specific code 3037 */ 3038 /* ARGSUSED */ 3039 int 3040 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3041 { 3042 int *i1 = (int *)ptr; 3043 3044 switch (level) { 3045 case IPPROTO_IP: 3046 switch (name) { 3047 case IP_MULTICAST_TTL: 3048 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL; 3049 return (sizeof (uchar_t)); 3050 case IP_MULTICAST_LOOP: 3051 *ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP; 3052 return (sizeof (uchar_t)); 3053 } 3054 break; 3055 case IPPROTO_IPV6: 3056 switch (name) { 3057 case IPV6_MULTICAST_HOPS: 3058 *i1 = IP_DEFAULT_MULTICAST_TTL; 3059 return (sizeof (int)); 3060 case IPV6_MULTICAST_LOOP: 3061 *i1 = IP_DEFAULT_MULTICAST_LOOP; 3062 return (sizeof (int)); 3063 case IPV6_UNICAST_HOPS: 3064 *i1 = udp_ipv6_hoplimit; 3065 return (sizeof (int)); 3066 } 3067 break; 3068 } 3069 return (-1); 3070 } 3071 3072 /* 3073 * This routine retrieves the current status of socket options 3074 * and expects the caller to pass in the queue pointer of the 3075 * upper instance. It returns the size of the option retrieved. 3076 */ 3077 int 3078 udp_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr) 3079 { 3080 int *i1 = (int *)ptr; 3081 conn_t *connp; 3082 udp_t *udp; 3083 ip6_pkt_t *ipp; 3084 int len; 3085 3086 q = UDP_WR(q); 3087 connp = Q_TO_CONN(q); 3088 udp = connp->conn_udp; 3089 ipp = &udp->udp_sticky_ipp; 3090 3091 switch (level) { 3092 case SOL_SOCKET: 3093 switch (name) { 3094 case SO_DEBUG: 3095 *i1 = udp->udp_debug; 3096 break; /* goto sizeof (int) option return */ 3097 case SO_REUSEADDR: 3098 *i1 = udp->udp_reuseaddr; 3099 break; /* goto sizeof (int) option return */ 3100 case SO_TYPE: 3101 *i1 = SOCK_DGRAM; 3102 break; /* goto sizeof (int) option return */ 3103 3104 /* 3105 * The following three items are available here, 3106 * but are only meaningful to IP. 3107 */ 3108 case SO_DONTROUTE: 3109 *i1 = udp->udp_dontroute; 3110 break; /* goto sizeof (int) option return */ 3111 case SO_USELOOPBACK: 3112 *i1 = udp->udp_useloopback; 3113 break; /* goto sizeof (int) option return */ 3114 case SO_BROADCAST: 3115 *i1 = udp->udp_broadcast; 3116 break; /* goto sizeof (int) option return */ 3117 3118 case SO_SNDBUF: 3119 *i1 = q->q_hiwat; 3120 break; /* goto sizeof (int) option return */ 3121 case SO_RCVBUF: 3122 *i1 = RD(q)->q_hiwat; 3123 break; /* goto sizeof (int) option return */ 3124 case SO_DGRAM_ERRIND: 3125 *i1 = udp->udp_dgram_errind; 3126 break; /* goto sizeof (int) option return */ 3127 case SO_RECVUCRED: 3128 *i1 = udp->udp_recvucred; 3129 break; /* goto sizeof (int) option return */ 3130 case SO_TIMESTAMP: 3131 *i1 = udp->udp_timestamp; 3132 break; /* goto sizeof (int) option return */ 3133 case SO_ANON_MLP: 3134 *i1 = udp->udp_anon_mlp; 3135 break; /* goto sizeof (int) option return */ 3136 case SO_MAC_EXEMPT: 3137 *i1 = udp->udp_mac_exempt; 3138 break; /* goto sizeof (int) option return */ 3139 case SO_ALLZONES: 3140 *i1 = connp->conn_allzones; 3141 break; /* goto sizeof (int) option return */ 3142 case SO_EXCLBIND: 3143 *i1 = udp->udp_exclbind ? SO_EXCLBIND : 0; 3144 break; 3145 default: 3146 return (-1); 3147 } 3148 break; 3149 case IPPROTO_IP: 3150 if (udp->udp_family != AF_INET) 3151 return (-1); 3152 switch (name) { 3153 case IP_OPTIONS: 3154 case T_IP_OPTIONS: 3155 len = udp->udp_ip_rcv_options_len - udp->udp_label_len; 3156 if (len > 0) { 3157 bcopy(udp->udp_ip_rcv_options + 3158 udp->udp_label_len, ptr, len); 3159 } 3160 return (len); 3161 case IP_TOS: 3162 case T_IP_TOS: 3163 *i1 = (int)udp->udp_type_of_service; 3164 break; /* goto sizeof (int) option return */ 3165 case IP_TTL: 3166 *i1 = (int)udp->udp_ttl; 3167 break; /* goto sizeof (int) option return */ 3168 case IP_NEXTHOP: 3169 /* Handled at IP level */ 3170 return (-EINVAL); 3171 case IP_MULTICAST_IF: 3172 /* 0 address if not set */ 3173 *(ipaddr_t *)ptr = udp->udp_multicast_if_addr; 3174 return (sizeof (ipaddr_t)); 3175 case IP_MULTICAST_TTL: 3176 *(uchar_t *)ptr = udp->udp_multicast_ttl; 3177 return (sizeof (uchar_t)); 3178 case IP_MULTICAST_LOOP: 3179 *ptr = connp->conn_multicast_loop; 3180 return (sizeof (uint8_t)); 3181 case IP_RECVOPTS: 3182 *i1 = udp->udp_recvopts; 3183 break; /* goto sizeof (int) option return */ 3184 case IP_RECVDSTADDR: 3185 *i1 = udp->udp_recvdstaddr; 3186 break; /* goto sizeof (int) option return */ 3187 case IP_RECVIF: 3188 *i1 = udp->udp_recvif; 3189 break; /* goto sizeof (int) option return */ 3190 case IP_RECVSLLA: 3191 *i1 = udp->udp_recvslla; 3192 break; /* goto sizeof (int) option return */ 3193 case IP_RECVTTL: 3194 *i1 = udp->udp_recvttl; 3195 break; /* goto sizeof (int) option return */ 3196 case IP_ADD_MEMBERSHIP: 3197 case IP_DROP_MEMBERSHIP: 3198 case IP_BLOCK_SOURCE: 3199 case IP_UNBLOCK_SOURCE: 3200 case IP_ADD_SOURCE_MEMBERSHIP: 3201 case IP_DROP_SOURCE_MEMBERSHIP: 3202 case MCAST_JOIN_GROUP: 3203 case MCAST_LEAVE_GROUP: 3204 case MCAST_BLOCK_SOURCE: 3205 case MCAST_UNBLOCK_SOURCE: 3206 case MCAST_JOIN_SOURCE_GROUP: 3207 case MCAST_LEAVE_SOURCE_GROUP: 3208 case IP_DONTFAILOVER_IF: 3209 /* cannot "get" the value for these */ 3210 return (-1); 3211 case IP_BOUND_IF: 3212 /* Zero if not set */ 3213 *i1 = udp->udp_bound_if; 3214 break; /* goto sizeof (int) option return */ 3215 case IP_UNSPEC_SRC: 3216 *i1 = udp->udp_unspec_source; 3217 break; /* goto sizeof (int) option return */ 3218 case IP_XMIT_IF: 3219 *i1 = udp->udp_xmit_if; 3220 break; /* goto sizeof (int) option return */ 3221 default: 3222 return (-1); 3223 } 3224 break; 3225 case IPPROTO_IPV6: 3226 if (udp->udp_family != AF_INET6) 3227 return (-1); 3228 switch (name) { 3229 case IPV6_UNICAST_HOPS: 3230 *i1 = (unsigned int)udp->udp_ttl; 3231 break; /* goto sizeof (int) option return */ 3232 case IPV6_MULTICAST_IF: 3233 /* 0 index if not set */ 3234 *i1 = udp->udp_multicast_if_index; 3235 break; /* goto sizeof (int) option return */ 3236 case IPV6_MULTICAST_HOPS: 3237 *i1 = udp->udp_multicast_ttl; 3238 break; /* goto sizeof (int) option return */ 3239 case IPV6_MULTICAST_LOOP: 3240 *i1 = connp->conn_multicast_loop; 3241 break; /* goto sizeof (int) option return */ 3242 case IPV6_JOIN_GROUP: 3243 case IPV6_LEAVE_GROUP: 3244 case MCAST_JOIN_GROUP: 3245 case MCAST_LEAVE_GROUP: 3246 case MCAST_BLOCK_SOURCE: 3247 case MCAST_UNBLOCK_SOURCE: 3248 case MCAST_JOIN_SOURCE_GROUP: 3249 case MCAST_LEAVE_SOURCE_GROUP: 3250 /* cannot "get" the value for these */ 3251 return (-1); 3252 case IPV6_BOUND_IF: 3253 /* Zero if not set */ 3254 *i1 = udp->udp_bound_if; 3255 break; /* goto sizeof (int) option return */ 3256 case IPV6_UNSPEC_SRC: 3257 *i1 = udp->udp_unspec_source; 3258 break; /* goto sizeof (int) option return */ 3259 case IPV6_RECVPKTINFO: 3260 *i1 = udp->udp_ipv6_recvpktinfo; 3261 break; /* goto sizeof (int) option return */ 3262 case IPV6_RECVTCLASS: 3263 *i1 = udp->udp_ipv6_recvtclass; 3264 break; /* goto sizeof (int) option return */ 3265 case IPV6_RECVPATHMTU: 3266 *i1 = udp->udp_ipv6_recvpathmtu; 3267 break; /* goto sizeof (int) option return */ 3268 case IPV6_RECVHOPLIMIT: 3269 *i1 = udp->udp_ipv6_recvhoplimit; 3270 break; /* goto sizeof (int) option return */ 3271 case IPV6_RECVHOPOPTS: 3272 *i1 = udp->udp_ipv6_recvhopopts; 3273 break; /* goto sizeof (int) option return */ 3274 case IPV6_RECVDSTOPTS: 3275 *i1 = udp->udp_ipv6_recvdstopts; 3276 break; /* goto sizeof (int) option return */ 3277 case _OLD_IPV6_RECVDSTOPTS: 3278 *i1 = udp->udp_old_ipv6_recvdstopts; 3279 break; /* goto sizeof (int) option return */ 3280 case IPV6_RECVRTHDRDSTOPTS: 3281 *i1 = udp->udp_ipv6_recvrthdrdstopts; 3282 break; /* goto sizeof (int) option return */ 3283 case IPV6_RECVRTHDR: 3284 *i1 = udp->udp_ipv6_recvrthdr; 3285 break; /* goto sizeof (int) option return */ 3286 case IPV6_PKTINFO: { 3287 /* XXX assumes that caller has room for max size! */ 3288 struct in6_pktinfo *pkti; 3289 3290 pkti = (struct in6_pktinfo *)ptr; 3291 if (ipp->ipp_fields & IPPF_IFINDEX) 3292 pkti->ipi6_ifindex = ipp->ipp_ifindex; 3293 else 3294 pkti->ipi6_ifindex = 0; 3295 if (ipp->ipp_fields & IPPF_ADDR) 3296 pkti->ipi6_addr = ipp->ipp_addr; 3297 else 3298 pkti->ipi6_addr = ipv6_all_zeros; 3299 return (sizeof (struct in6_pktinfo)); 3300 } 3301 case IPV6_TCLASS: 3302 if (ipp->ipp_fields & IPPF_TCLASS) 3303 *i1 = ipp->ipp_tclass; 3304 else 3305 *i1 = IPV6_FLOW_TCLASS( 3306 IPV6_DEFAULT_VERS_AND_FLOW); 3307 break; /* goto sizeof (int) option return */ 3308 case IPV6_NEXTHOP: { 3309 sin6_t *sin6 = (sin6_t *)ptr; 3310 3311 if (!(ipp->ipp_fields & IPPF_NEXTHOP)) 3312 return (0); 3313 *sin6 = sin6_null; 3314 sin6->sin6_family = AF_INET6; 3315 sin6->sin6_addr = ipp->ipp_nexthop; 3316 return (sizeof (sin6_t)); 3317 } 3318 case IPV6_HOPOPTS: 3319 if (!(ipp->ipp_fields & IPPF_HOPOPTS)) 3320 return (0); 3321 if (ipp->ipp_hopoptslen <= udp->udp_label_len_v6) 3322 return (0); 3323 /* 3324 * The cipso/label option is added by kernel. 3325 * User is not usually aware of this option. 3326 * We copy out the hbh opt after the label option. 3327 */ 3328 bcopy((char *)ipp->ipp_hopopts + udp->udp_label_len_v6, 3329 ptr, ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3330 if (udp->udp_label_len_v6 > 0) { 3331 ptr[0] = ((char *)ipp->ipp_hopopts)[0]; 3332 ptr[1] = (ipp->ipp_hopoptslen - 3333 udp->udp_label_len_v6 + 7) / 8 - 1; 3334 } 3335 return (ipp->ipp_hopoptslen - udp->udp_label_len_v6); 3336 case IPV6_RTHDRDSTOPTS: 3337 if (!(ipp->ipp_fields & IPPF_RTDSTOPTS)) 3338 return (0); 3339 bcopy(ipp->ipp_rtdstopts, ptr, ipp->ipp_rtdstoptslen); 3340 return (ipp->ipp_rtdstoptslen); 3341 case IPV6_RTHDR: 3342 if (!(ipp->ipp_fields & IPPF_RTHDR)) 3343 return (0); 3344 bcopy(ipp->ipp_rthdr, ptr, ipp->ipp_rthdrlen); 3345 return (ipp->ipp_rthdrlen); 3346 case IPV6_DSTOPTS: 3347 if (!(ipp->ipp_fields & IPPF_DSTOPTS)) 3348 return (0); 3349 bcopy(ipp->ipp_dstopts, ptr, ipp->ipp_dstoptslen); 3350 return (ipp->ipp_dstoptslen); 3351 case IPV6_PATHMTU: 3352 return (ip_fill_mtuinfo(&udp->udp_v6dst, 3353 udp->udp_dstport, (struct ip6_mtuinfo *)ptr)); 3354 default: 3355 return (-1); 3356 } 3357 break; 3358 case IPPROTO_UDP: 3359 switch (name) { 3360 case UDP_ANONPRIVBIND: 3361 *i1 = udp->udp_anon_priv_bind; 3362 break; 3363 case UDP_EXCLBIND: 3364 *i1 = udp->udp_exclbind ? UDP_EXCLBIND : 0; 3365 break; 3366 case UDP_RCVHDR: 3367 *i1 = udp->udp_rcvhdr ? 1 : 0; 3368 break; 3369 default: 3370 return (-1); 3371 } 3372 break; 3373 default: 3374 return (-1); 3375 } 3376 return (sizeof (int)); 3377 } 3378 3379 /* 3380 * This routine sets socket options; it expects the caller 3381 * to pass in the queue pointer of the upper instance. 3382 */ 3383 /* ARGSUSED */ 3384 int 3385 udp_opt_set(queue_t *q, uint_t optset_context, int level, 3386 int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp, 3387 uchar_t *outvalp, void *thisdg_attrs, cred_t *cr, mblk_t *mblk) 3388 { 3389 udpattrs_t *attrs = thisdg_attrs; 3390 int *i1 = (int *)invalp; 3391 boolean_t onoff = (*i1 == 0) ? 0 : 1; 3392 boolean_t checkonly; 3393 int error; 3394 conn_t *connp; 3395 udp_t *udp; 3396 uint_t newlen; 3397 3398 q = UDP_WR(q); 3399 connp = Q_TO_CONN(q); 3400 udp = connp->conn_udp; 3401 3402 switch (optset_context) { 3403 case SETFN_OPTCOM_CHECKONLY: 3404 checkonly = B_TRUE; 3405 /* 3406 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ 3407 * inlen != 0 implies value supplied and 3408 * we have to "pretend" to set it. 3409 * inlen == 0 implies that there is no 3410 * value part in T_CHECK request and just validation 3411 * done elsewhere should be enough, we just return here. 3412 */ 3413 if (inlen == 0) { 3414 *outlenp = 0; 3415 return (0); 3416 } 3417 break; 3418 case SETFN_OPTCOM_NEGOTIATE: 3419 checkonly = B_FALSE; 3420 break; 3421 case SETFN_UD_NEGOTIATE: 3422 case SETFN_CONN_NEGOTIATE: 3423 checkonly = B_FALSE; 3424 /* 3425 * Negotiating local and "association-related" options 3426 * through T_UNITDATA_REQ. 3427 * 3428 * Following routine can filter out ones we do not 3429 * want to be "set" this way. 3430 */ 3431 if (!udp_opt_allow_udr_set(level, name)) { 3432 *outlenp = 0; 3433 return (EINVAL); 3434 } 3435 break; 3436 default: 3437 /* 3438 * We should never get here 3439 */ 3440 *outlenp = 0; 3441 return (EINVAL); 3442 } 3443 3444 ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) || 3445 (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0)); 3446 3447 /* 3448 * For fixed length options, no sanity check 3449 * of passed in length is done. It is assumed *_optcom_req() 3450 * routines do the right thing. 3451 */ 3452 3453 switch (level) { 3454 case SOL_SOCKET: 3455 switch (name) { 3456 case SO_REUSEADDR: 3457 if (!checkonly) 3458 udp->udp_reuseaddr = onoff; 3459 break; 3460 case SO_DEBUG: 3461 if (!checkonly) 3462 udp->udp_debug = onoff; 3463 break; 3464 /* 3465 * The following three items are available here, 3466 * but are only meaningful to IP. 3467 */ 3468 case SO_DONTROUTE: 3469 if (!checkonly) 3470 udp->udp_dontroute = onoff; 3471 break; 3472 case SO_USELOOPBACK: 3473 if (!checkonly) 3474 udp->udp_useloopback = onoff; 3475 break; 3476 case SO_BROADCAST: 3477 if (!checkonly) 3478 udp->udp_broadcast = onoff; 3479 break; 3480 3481 case SO_SNDBUF: 3482 if (*i1 > udp_max_buf) { 3483 *outlenp = 0; 3484 return (ENOBUFS); 3485 } 3486 if (!checkonly) { 3487 q->q_hiwat = *i1; 3488 WR(UDP_RD(q))->q_hiwat = *i1; 3489 } 3490 break; 3491 case SO_RCVBUF: 3492 if (*i1 > udp_max_buf) { 3493 *outlenp = 0; 3494 return (ENOBUFS); 3495 } 3496 if (!checkonly) { 3497 RD(q)->q_hiwat = *i1; 3498 UDP_RD(q)->q_hiwat = *i1; 3499 (void) mi_set_sth_hiwat(UDP_RD(q), 3500 udp_set_rcv_hiwat(udp, *i1)); 3501 } 3502 break; 3503 case SO_DGRAM_ERRIND: 3504 if (!checkonly) 3505 udp->udp_dgram_errind = onoff; 3506 break; 3507 case SO_RECVUCRED: 3508 if (!checkonly) 3509 udp->udp_recvucred = onoff; 3510 break; 3511 case SO_ALLZONES: 3512 /* 3513 * "soft" error (negative) 3514 * option not handled at this level 3515 * Do not modify *outlenp. 3516 */ 3517 return (-EINVAL); 3518 case SO_TIMESTAMP: 3519 if (!checkonly) 3520 udp->udp_timestamp = onoff; 3521 break; 3522 case SO_ANON_MLP: 3523 if (!checkonly) 3524 udp->udp_anon_mlp = onoff; 3525 break; 3526 case SO_MAC_EXEMPT: 3527 if (secpolicy_net_mac_aware(cr) != 0 || 3528 udp->udp_state != TS_UNBND) 3529 return (EACCES); 3530 if (!checkonly) 3531 udp->udp_mac_exempt = onoff; 3532 break; 3533 case SCM_UCRED: { 3534 struct ucred_s *ucr; 3535 cred_t *cr, *newcr; 3536 ts_label_t *tsl; 3537 3538 /* 3539 * Only sockets that have proper privileges and are 3540 * bound to MLPs will have any other value here, so 3541 * this implicitly tests for privilege to set label. 3542 */ 3543 if (connp->conn_mlp_type == mlptSingle) 3544 break; 3545 ucr = (struct ucred_s *)invalp; 3546 if (inlen != ucredsize || 3547 ucr->uc_labeloff < sizeof (*ucr) || 3548 ucr->uc_labeloff + sizeof (bslabel_t) > inlen) 3549 return (EINVAL); 3550 if (!checkonly) { 3551 mblk_t *mb; 3552 3553 if (attrs == NULL || 3554 (mb = attrs->udpattr_mb) == NULL) 3555 return (EINVAL); 3556 if ((cr = DB_CRED(mb)) == NULL) 3557 cr = udp->udp_connp->conn_cred; 3558 ASSERT(cr != NULL); 3559 if ((tsl = crgetlabel(cr)) == NULL) 3560 return (EINVAL); 3561 newcr = copycred_from_bslabel(cr, UCLABEL(ucr), 3562 tsl->tsl_doi, KM_NOSLEEP); 3563 if (newcr == NULL) 3564 return (ENOSR); 3565 mblk_setcred(mb, newcr); 3566 attrs->udpattr_credset = B_TRUE; 3567 crfree(newcr); 3568 } 3569 break; 3570 } 3571 case SO_EXCLBIND: 3572 if (!checkonly) 3573 udp->udp_exclbind = onoff; 3574 break; 3575 default: 3576 *outlenp = 0; 3577 return (EINVAL); 3578 } 3579 break; 3580 case IPPROTO_IP: 3581 if (udp->udp_family != AF_INET) { 3582 *outlenp = 0; 3583 return (ENOPROTOOPT); 3584 } 3585 switch (name) { 3586 case IP_OPTIONS: 3587 case T_IP_OPTIONS: 3588 /* Save options for use by IP. */ 3589 newlen = inlen + udp->udp_label_len; 3590 if ((inlen & 0x3) || newlen > IP_MAX_OPT_LENGTH) { 3591 *outlenp = 0; 3592 return (EINVAL); 3593 } 3594 if (checkonly) 3595 break; 3596 3597 if (!tsol_option_set(&udp->udp_ip_snd_options, 3598 &udp->udp_ip_snd_options_len, 3599 udp->udp_label_len, invalp, inlen)) { 3600 *outlenp = 0; 3601 return (ENOMEM); 3602 } 3603 3604 udp->udp_max_hdr_len = IP_SIMPLE_HDR_LENGTH + 3605 UDPH_SIZE + udp->udp_ip_snd_options_len; 3606 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 3607 udp_wroff_extra); 3608 break; 3609 3610 case IP_TTL: 3611 if (!checkonly) { 3612 udp->udp_ttl = (uchar_t)*i1; 3613 } 3614 break; 3615 case IP_TOS: 3616 case T_IP_TOS: 3617 if (!checkonly) { 3618 udp->udp_type_of_service = (uchar_t)*i1; 3619 } 3620 break; 3621 case IP_MULTICAST_IF: { 3622 /* 3623 * TODO should check OPTMGMT reply and undo this if 3624 * there is an error. 3625 */ 3626 struct in_addr *inap = (struct in_addr *)invalp; 3627 if (!checkonly) { 3628 udp->udp_multicast_if_addr = 3629 inap->s_addr; 3630 } 3631 break; 3632 } 3633 case IP_MULTICAST_TTL: 3634 if (!checkonly) 3635 udp->udp_multicast_ttl = *invalp; 3636 break; 3637 case IP_MULTICAST_LOOP: 3638 if (!checkonly) 3639 connp->conn_multicast_loop = *invalp; 3640 break; 3641 case IP_RECVOPTS: 3642 if (!checkonly) 3643 udp->udp_recvopts = onoff; 3644 break; 3645 case IP_RECVDSTADDR: 3646 if (!checkonly) 3647 udp->udp_recvdstaddr = onoff; 3648 break; 3649 case IP_RECVIF: 3650 if (!checkonly) 3651 udp->udp_recvif = onoff; 3652 break; 3653 case IP_RECVSLLA: 3654 if (!checkonly) 3655 udp->udp_recvslla = onoff; 3656 break; 3657 case IP_RECVTTL: 3658 if (!checkonly) 3659 udp->udp_recvttl = onoff; 3660 break; 3661 case IP_ADD_MEMBERSHIP: 3662 case IP_DROP_MEMBERSHIP: 3663 case IP_BLOCK_SOURCE: 3664 case IP_UNBLOCK_SOURCE: 3665 case IP_ADD_SOURCE_MEMBERSHIP: 3666 case IP_DROP_SOURCE_MEMBERSHIP: 3667 case MCAST_JOIN_GROUP: 3668 case MCAST_LEAVE_GROUP: 3669 case MCAST_BLOCK_SOURCE: 3670 case MCAST_UNBLOCK_SOURCE: 3671 case MCAST_JOIN_SOURCE_GROUP: 3672 case MCAST_LEAVE_SOURCE_GROUP: 3673 case IP_SEC_OPT: 3674 case IP_NEXTHOP: 3675 /* 3676 * "soft" error (negative) 3677 * option not handled at this level 3678 * Do not modify *outlenp. 3679 */ 3680 return (-EINVAL); 3681 case IP_BOUND_IF: 3682 if (!checkonly) 3683 udp->udp_bound_if = *i1; 3684 break; 3685 case IP_UNSPEC_SRC: 3686 if (!checkonly) 3687 udp->udp_unspec_source = onoff; 3688 break; 3689 case IP_XMIT_IF: 3690 if (!checkonly) 3691 udp->udp_xmit_if = *i1; 3692 break; 3693 default: 3694 *outlenp = 0; 3695 return (EINVAL); 3696 } 3697 break; 3698 case IPPROTO_IPV6: { 3699 ip6_pkt_t *ipp; 3700 boolean_t sticky; 3701 3702 if (udp->udp_family != AF_INET6) { 3703 *outlenp = 0; 3704 return (ENOPROTOOPT); 3705 } 3706 /* 3707 * Deal with both sticky options and ancillary data 3708 */ 3709 sticky = B_FALSE; 3710 if (attrs == NULL || (ipp = attrs->udpattr_ipp) == NULL) { 3711 /* sticky options, or none */ 3712 ipp = &udp->udp_sticky_ipp; 3713 sticky = B_TRUE; 3714 } 3715 3716 switch (name) { 3717 case IPV6_MULTICAST_IF: 3718 if (!checkonly) 3719 udp->udp_multicast_if_index = *i1; 3720 break; 3721 case IPV6_UNICAST_HOPS: 3722 /* -1 means use default */ 3723 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3724 *outlenp = 0; 3725 return (EINVAL); 3726 } 3727 if (!checkonly) { 3728 if (*i1 == -1) { 3729 udp->udp_ttl = ipp->ipp_unicast_hops = 3730 udp_ipv6_hoplimit; 3731 ipp->ipp_fields &= ~IPPF_UNICAST_HOPS; 3732 /* Pass modified value to IP. */ 3733 *i1 = udp->udp_ttl; 3734 } else { 3735 udp->udp_ttl = ipp->ipp_unicast_hops = 3736 (uint8_t)*i1; 3737 ipp->ipp_fields |= IPPF_UNICAST_HOPS; 3738 } 3739 /* Rebuild the header template */ 3740 error = udp_build_hdrs(q, udp); 3741 if (error != 0) { 3742 *outlenp = 0; 3743 return (error); 3744 } 3745 } 3746 break; 3747 case IPV6_MULTICAST_HOPS: 3748 /* -1 means use default */ 3749 if (*i1 < -1 || *i1 > IPV6_MAX_HOPS) { 3750 *outlenp = 0; 3751 return (EINVAL); 3752 } 3753 if (!checkonly) { 3754 if (*i1 == -1) { 3755 udp->udp_multicast_ttl = 3756 ipp->ipp_multicast_hops = 3757 IP_DEFAULT_MULTICAST_TTL; 3758 ipp->ipp_fields &= ~IPPF_MULTICAST_HOPS; 3759 /* Pass modified value to IP. */ 3760 *i1 = udp->udp_multicast_ttl; 3761 } else { 3762 udp->udp_multicast_ttl = 3763 ipp->ipp_multicast_hops = 3764 (uint8_t)*i1; 3765 ipp->ipp_fields |= IPPF_MULTICAST_HOPS; 3766 } 3767 } 3768 break; 3769 case IPV6_MULTICAST_LOOP: 3770 if (*i1 != 0 && *i1 != 1) { 3771 *outlenp = 0; 3772 return (EINVAL); 3773 } 3774 if (!checkonly) 3775 connp->conn_multicast_loop = *i1; 3776 break; 3777 case IPV6_JOIN_GROUP: 3778 case IPV6_LEAVE_GROUP: 3779 case MCAST_JOIN_GROUP: 3780 case MCAST_LEAVE_GROUP: 3781 case MCAST_BLOCK_SOURCE: 3782 case MCAST_UNBLOCK_SOURCE: 3783 case MCAST_JOIN_SOURCE_GROUP: 3784 case MCAST_LEAVE_SOURCE_GROUP: 3785 /* 3786 * "soft" error (negative) 3787 * option not handled at this level 3788 * Note: Do not modify *outlenp 3789 */ 3790 return (-EINVAL); 3791 case IPV6_BOUND_IF: 3792 if (!checkonly) 3793 udp->udp_bound_if = *i1; 3794 break; 3795 case IPV6_UNSPEC_SRC: 3796 if (!checkonly) 3797 udp->udp_unspec_source = onoff; 3798 break; 3799 /* 3800 * Set boolean switches for ancillary data delivery 3801 */ 3802 case IPV6_RECVPKTINFO: 3803 if (!checkonly) 3804 udp->udp_ipv6_recvpktinfo = onoff; 3805 break; 3806 case IPV6_RECVTCLASS: 3807 if (!checkonly) { 3808 udp->udp_ipv6_recvtclass = onoff; 3809 } 3810 break; 3811 case IPV6_RECVPATHMTU: 3812 if (!checkonly) { 3813 udp->udp_ipv6_recvpathmtu = onoff; 3814 } 3815 break; 3816 case IPV6_RECVHOPLIMIT: 3817 if (!checkonly) 3818 udp->udp_ipv6_recvhoplimit = onoff; 3819 break; 3820 case IPV6_RECVHOPOPTS: 3821 if (!checkonly) 3822 udp->udp_ipv6_recvhopopts = onoff; 3823 break; 3824 case IPV6_RECVDSTOPTS: 3825 if (!checkonly) 3826 udp->udp_ipv6_recvdstopts = onoff; 3827 break; 3828 case _OLD_IPV6_RECVDSTOPTS: 3829 if (!checkonly) 3830 udp->udp_old_ipv6_recvdstopts = onoff; 3831 break; 3832 case IPV6_RECVRTHDRDSTOPTS: 3833 if (!checkonly) 3834 udp->udp_ipv6_recvrthdrdstopts = onoff; 3835 break; 3836 case IPV6_RECVRTHDR: 3837 if (!checkonly) 3838 udp->udp_ipv6_recvrthdr = onoff; 3839 break; 3840 /* 3841 * Set sticky options or ancillary data. 3842 * If sticky options, (re)build any extension headers 3843 * that might be needed as a result. 3844 */ 3845 case IPV6_PKTINFO: 3846 /* 3847 * The source address and ifindex are verified 3848 * in ip_opt_set(). For ancillary data the 3849 * source address is checked in ip_wput_v6. 3850 */ 3851 if (inlen != 0 && inlen != sizeof (struct in6_pktinfo)) 3852 return (EINVAL); 3853 if (checkonly) 3854 break; 3855 3856 if (inlen == 0) { 3857 ipp->ipp_fields &= ~(IPPF_IFINDEX|IPPF_ADDR); 3858 ipp->ipp_sticky_ignored |= 3859 (IPPF_IFINDEX|IPPF_ADDR); 3860 } else { 3861 struct in6_pktinfo *pkti; 3862 3863 pkti = (struct in6_pktinfo *)invalp; 3864 ipp->ipp_ifindex = pkti->ipi6_ifindex; 3865 ipp->ipp_addr = pkti->ipi6_addr; 3866 if (ipp->ipp_ifindex != 0) 3867 ipp->ipp_fields |= IPPF_IFINDEX; 3868 else 3869 ipp->ipp_fields &= ~IPPF_IFINDEX; 3870 if (!IN6_IS_ADDR_UNSPECIFIED( 3871 &ipp->ipp_addr)) 3872 ipp->ipp_fields |= IPPF_ADDR; 3873 else 3874 ipp->ipp_fields &= ~IPPF_ADDR; 3875 } 3876 if (sticky) { 3877 error = udp_build_hdrs(q, udp); 3878 if (error != 0) 3879 return (error); 3880 } 3881 break; 3882 case IPV6_HOPLIMIT: 3883 if (sticky) 3884 return (EINVAL); 3885 if (inlen != 0 && inlen != sizeof (int)) 3886 return (EINVAL); 3887 if (checkonly) 3888 break; 3889 3890 if (inlen == 0) { 3891 ipp->ipp_fields &= ~IPPF_HOPLIMIT; 3892 ipp->ipp_sticky_ignored |= IPPF_HOPLIMIT; 3893 } else { 3894 if (*i1 > 255 || *i1 < -1) 3895 return (EINVAL); 3896 if (*i1 == -1) 3897 ipp->ipp_hoplimit = udp_ipv6_hoplimit; 3898 else 3899 ipp->ipp_hoplimit = *i1; 3900 ipp->ipp_fields |= IPPF_HOPLIMIT; 3901 } 3902 break; 3903 case IPV6_TCLASS: 3904 if (inlen != 0 && inlen != sizeof (int)) 3905 return (EINVAL); 3906 if (checkonly) 3907 break; 3908 3909 if (inlen == 0) { 3910 ipp->ipp_fields &= ~IPPF_TCLASS; 3911 ipp->ipp_sticky_ignored |= IPPF_TCLASS; 3912 } else { 3913 if (*i1 > 255 || *i1 < -1) 3914 return (EINVAL); 3915 if (*i1 == -1) 3916 ipp->ipp_tclass = 0; 3917 else 3918 ipp->ipp_tclass = *i1; 3919 ipp->ipp_fields |= IPPF_TCLASS; 3920 } 3921 if (sticky) { 3922 error = udp_build_hdrs(q, udp); 3923 if (error != 0) 3924 return (error); 3925 } 3926 break; 3927 case IPV6_NEXTHOP: 3928 /* 3929 * IP will verify that the nexthop is reachable 3930 * and fail for sticky options. 3931 */ 3932 if (inlen != 0 && inlen != sizeof (sin6_t)) 3933 return (EINVAL); 3934 if (checkonly) 3935 break; 3936 3937 if (inlen == 0) { 3938 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3939 ipp->ipp_sticky_ignored |= IPPF_NEXTHOP; 3940 } else { 3941 sin6_t *sin6 = (sin6_t *)invalp; 3942 3943 if (sin6->sin6_family != AF_INET6) 3944 return (EAFNOSUPPORT); 3945 if (IN6_IS_ADDR_V4MAPPED( 3946 &sin6->sin6_addr)) 3947 return (EADDRNOTAVAIL); 3948 ipp->ipp_nexthop = sin6->sin6_addr; 3949 if (!IN6_IS_ADDR_UNSPECIFIED( 3950 &ipp->ipp_nexthop)) 3951 ipp->ipp_fields |= IPPF_NEXTHOP; 3952 else 3953 ipp->ipp_fields &= ~IPPF_NEXTHOP; 3954 } 3955 if (sticky) { 3956 error = udp_build_hdrs(q, udp); 3957 if (error != 0) 3958 return (error); 3959 } 3960 break; 3961 case IPV6_HOPOPTS: { 3962 ip6_hbh_t *hopts = (ip6_hbh_t *)invalp; 3963 /* 3964 * Sanity checks - minimum size, size a multiple of 3965 * eight bytes, and matching size passed in. 3966 */ 3967 if (inlen != 0 && 3968 inlen != (8 * (hopts->ip6h_len + 1))) 3969 return (EINVAL); 3970 3971 if (checkonly) 3972 break; 3973 3974 error = optcom_pkt_set(invalp, inlen, sticky, 3975 (uchar_t **)&ipp->ipp_hopopts, 3976 &ipp->ipp_hopoptslen, 3977 sticky ? udp->udp_label_len_v6 : 0); 3978 if (error != 0) 3979 return (error); 3980 if (ipp->ipp_hopoptslen == 0) { 3981 ipp->ipp_fields &= ~IPPF_HOPOPTS; 3982 ipp->ipp_sticky_ignored |= IPPF_HOPOPTS; 3983 } else { 3984 ipp->ipp_fields |= IPPF_HOPOPTS; 3985 } 3986 if (sticky) { 3987 error = udp_build_hdrs(q, udp); 3988 if (error != 0) 3989 return (error); 3990 } 3991 break; 3992 } 3993 case IPV6_RTHDRDSTOPTS: { 3994 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 3995 3996 /* 3997 * Sanity checks - minimum size, size a multiple of 3998 * eight bytes, and matching size passed in. 3999 */ 4000 if (inlen != 0 && 4001 inlen != (8 * (dopts->ip6d_len + 1))) 4002 return (EINVAL); 4003 4004 if (checkonly) 4005 break; 4006 4007 if (inlen == 0) { 4008 if (sticky && 4009 (ipp->ipp_fields & IPPF_RTDSTOPTS) != 0) { 4010 kmem_free(ipp->ipp_rtdstopts, 4011 ipp->ipp_rtdstoptslen); 4012 ipp->ipp_rtdstopts = NULL; 4013 ipp->ipp_rtdstoptslen = 0; 4014 } 4015 4016 ipp->ipp_fields &= ~IPPF_RTDSTOPTS; 4017 ipp->ipp_sticky_ignored |= IPPF_RTDSTOPTS; 4018 } else { 4019 error = optcom_pkt_set(invalp, inlen, sticky, 4020 (uchar_t **)&ipp->ipp_rtdstopts, 4021 &ipp->ipp_rtdstoptslen, 0); 4022 if (error != 0) 4023 return (error); 4024 ipp->ipp_fields |= IPPF_RTDSTOPTS; 4025 } 4026 if (sticky) { 4027 error = udp_build_hdrs(q, udp); 4028 if (error != 0) 4029 return (error); 4030 } 4031 break; 4032 } 4033 case IPV6_DSTOPTS: { 4034 ip6_dest_t *dopts = (ip6_dest_t *)invalp; 4035 4036 /* 4037 * Sanity checks - minimum size, size a multiple of 4038 * eight bytes, and matching size passed in. 4039 */ 4040 if (inlen != 0 && 4041 inlen != (8 * (dopts->ip6d_len + 1))) 4042 return (EINVAL); 4043 4044 if (checkonly) 4045 break; 4046 4047 if (inlen == 0) { 4048 if (sticky && 4049 (ipp->ipp_fields & IPPF_DSTOPTS) != 0) { 4050 kmem_free(ipp->ipp_dstopts, 4051 ipp->ipp_dstoptslen); 4052 ipp->ipp_dstopts = NULL; 4053 ipp->ipp_dstoptslen = 0; 4054 } 4055 ipp->ipp_fields &= ~IPPF_DSTOPTS; 4056 ipp->ipp_sticky_ignored |= IPPF_DSTOPTS; 4057 } else { 4058 error = optcom_pkt_set(invalp, inlen, sticky, 4059 (uchar_t **)&ipp->ipp_dstopts, 4060 &ipp->ipp_dstoptslen, 0); 4061 if (error != 0) 4062 return (error); 4063 ipp->ipp_fields |= IPPF_DSTOPTS; 4064 } 4065 if (sticky) { 4066 error = udp_build_hdrs(q, udp); 4067 if (error != 0) 4068 return (error); 4069 } 4070 break; 4071 } 4072 case IPV6_RTHDR: { 4073 ip6_rthdr_t *rt = (ip6_rthdr_t *)invalp; 4074 4075 /* 4076 * Sanity checks - minimum size, size a multiple of 4077 * eight bytes, and matching size passed in. 4078 */ 4079 if (inlen != 0 && 4080 inlen != (8 * (rt->ip6r_len + 1))) 4081 return (EINVAL); 4082 4083 if (checkonly) 4084 break; 4085 4086 if (inlen == 0) { 4087 if (sticky && 4088 (ipp->ipp_fields & IPPF_RTHDR) != 0) { 4089 kmem_free(ipp->ipp_rthdr, 4090 ipp->ipp_rthdrlen); 4091 ipp->ipp_rthdr = NULL; 4092 ipp->ipp_rthdrlen = 0; 4093 } 4094 ipp->ipp_fields &= ~IPPF_RTHDR; 4095 ipp->ipp_sticky_ignored |= IPPF_RTHDR; 4096 } else { 4097 error = optcom_pkt_set(invalp, inlen, sticky, 4098 (uchar_t **)&ipp->ipp_rthdr, 4099 &ipp->ipp_rthdrlen, 0); 4100 if (error != 0) 4101 return (error); 4102 ipp->ipp_fields |= IPPF_RTHDR; 4103 } 4104 if (sticky) { 4105 error = udp_build_hdrs(q, udp); 4106 if (error != 0) 4107 return (error); 4108 } 4109 break; 4110 } 4111 4112 case IPV6_DONTFRAG: 4113 if (checkonly) 4114 break; 4115 4116 if (onoff) { 4117 ipp->ipp_fields |= IPPF_DONTFRAG; 4118 } else { 4119 ipp->ipp_fields &= ~IPPF_DONTFRAG; 4120 } 4121 break; 4122 4123 case IPV6_USE_MIN_MTU: 4124 if (inlen != sizeof (int)) 4125 return (EINVAL); 4126 4127 if (*i1 < -1 || *i1 > 1) 4128 return (EINVAL); 4129 4130 if (checkonly) 4131 break; 4132 4133 ipp->ipp_fields |= IPPF_USE_MIN_MTU; 4134 ipp->ipp_use_min_mtu = *i1; 4135 break; 4136 4137 case IPV6_BOUND_PIF: 4138 case IPV6_SEC_OPT: 4139 case IPV6_DONTFAILOVER_IF: 4140 case IPV6_SRC_PREFERENCES: 4141 case IPV6_V6ONLY: 4142 /* Handled at the IP level */ 4143 return (-EINVAL); 4144 default: 4145 *outlenp = 0; 4146 return (EINVAL); 4147 } 4148 break; 4149 } /* end IPPROTO_IPV6 */ 4150 case IPPROTO_UDP: 4151 switch (name) { 4152 case UDP_ANONPRIVBIND: 4153 if ((error = secpolicy_net_privaddr(cr, 0)) != 0) { 4154 *outlenp = 0; 4155 return (error); 4156 } 4157 if (!checkonly) { 4158 udp->udp_anon_priv_bind = onoff; 4159 } 4160 break; 4161 case UDP_EXCLBIND: 4162 if (!checkonly) 4163 udp->udp_exclbind = onoff; 4164 break; 4165 case UDP_RCVHDR: 4166 if (!checkonly) 4167 udp->udp_rcvhdr = onoff; 4168 break; 4169 default: 4170 *outlenp = 0; 4171 return (EINVAL); 4172 } 4173 break; 4174 default: 4175 *outlenp = 0; 4176 return (EINVAL); 4177 } 4178 /* 4179 * Common case of OK return with outval same as inval. 4180 */ 4181 if (invalp != outvalp) { 4182 /* don't trust bcopy for identical src/dst */ 4183 (void) bcopy(invalp, outvalp, inlen); 4184 } 4185 *outlenp = inlen; 4186 return (0); 4187 } 4188 4189 /* 4190 * Update udp_sticky_hdrs based on udp_sticky_ipp, udp_v6src, and udp_ttl. 4191 * The headers include ip6i_t (if needed), ip6_t, any sticky extension 4192 * headers, and the udp header. 4193 * Returns failure if can't allocate memory. 4194 */ 4195 static int 4196 udp_build_hdrs(queue_t *q, udp_t *udp) 4197 { 4198 uchar_t *hdrs; 4199 uint_t hdrs_len; 4200 ip6_t *ip6h; 4201 ip6i_t *ip6i; 4202 udpha_t *udpha; 4203 ip6_pkt_t *ipp = &udp->udp_sticky_ipp; 4204 4205 hdrs_len = ip_total_hdrs_len_v6(ipp) + UDPH_SIZE; 4206 ASSERT(hdrs_len != 0); 4207 if (hdrs_len != udp->udp_sticky_hdrs_len) { 4208 /* Need to reallocate */ 4209 hdrs = kmem_alloc(hdrs_len, KM_NOSLEEP); 4210 if (hdrs == NULL) 4211 return (ENOMEM); 4212 4213 if (udp->udp_sticky_hdrs_len != 0) { 4214 kmem_free(udp->udp_sticky_hdrs, 4215 udp->udp_sticky_hdrs_len); 4216 } 4217 udp->udp_sticky_hdrs = hdrs; 4218 udp->udp_sticky_hdrs_len = hdrs_len; 4219 } 4220 ip_build_hdrs_v6(udp->udp_sticky_hdrs, 4221 udp->udp_sticky_hdrs_len - UDPH_SIZE, ipp, IPPROTO_UDP); 4222 4223 /* Set header fields not in ipp */ 4224 if (ipp->ipp_fields & IPPF_HAS_IP6I) { 4225 ip6i = (ip6i_t *)udp->udp_sticky_hdrs; 4226 ip6h = (ip6_t *)&ip6i[1]; 4227 } else { 4228 ip6h = (ip6_t *)udp->udp_sticky_hdrs; 4229 } 4230 4231 if (!(ipp->ipp_fields & IPPF_ADDR)) 4232 ip6h->ip6_src = udp->udp_v6src; 4233 4234 udpha = (udpha_t *)(udp->udp_sticky_hdrs + hdrs_len - UDPH_SIZE); 4235 udpha->uha_src_port = udp->udp_port; 4236 4237 /* Try to get everything in a single mblk */ 4238 if (hdrs_len > udp->udp_max_hdr_len) { 4239 udp->udp_max_hdr_len = hdrs_len; 4240 (void) mi_set_sth_wroff(RD(q), udp->udp_max_hdr_len + 4241 udp_wroff_extra); 4242 } 4243 return (0); 4244 } 4245 4246 /* 4247 * This routine retrieves the value of an ND variable in a udpparam_t 4248 * structure. It is called through nd_getset when a user reads the 4249 * variable. 4250 */ 4251 /* ARGSUSED */ 4252 static int 4253 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 4254 { 4255 udpparam_t *udppa = (udpparam_t *)cp; 4256 4257 (void) mi_mpprintf(mp, "%d", udppa->udp_param_value); 4258 return (0); 4259 } 4260 4261 /* 4262 * Walk through the param array specified registering each element with the 4263 * named dispatch (ND) handler. 4264 */ 4265 static boolean_t 4266 udp_param_register(udpparam_t *udppa, int cnt) 4267 { 4268 for (; cnt-- > 0; udppa++) { 4269 if (udppa->udp_param_name && udppa->udp_param_name[0]) { 4270 if (!nd_load(&udp_g_nd, udppa->udp_param_name, 4271 udp_param_get, udp_param_set, 4272 (caddr_t)udppa)) { 4273 nd_free(&udp_g_nd); 4274 return (B_FALSE); 4275 } 4276 } 4277 } 4278 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports", 4279 udp_extra_priv_ports_get, NULL, NULL)) { 4280 nd_free(&udp_g_nd); 4281 return (B_FALSE); 4282 } 4283 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_add", 4284 NULL, udp_extra_priv_ports_add, NULL)) { 4285 nd_free(&udp_g_nd); 4286 return (B_FALSE); 4287 } 4288 if (!nd_load(&udp_g_nd, "udp_extra_priv_ports_del", 4289 NULL, udp_extra_priv_ports_del, NULL)) { 4290 nd_free(&udp_g_nd); 4291 return (B_FALSE); 4292 } 4293 if (!nd_load(&udp_g_nd, "udp_status", udp_status_report, NULL, 4294 NULL)) { 4295 nd_free(&udp_g_nd); 4296 return (B_FALSE); 4297 } 4298 if (!nd_load(&udp_g_nd, "udp_bind_hash", udp_bind_hash_report, NULL, 4299 NULL)) { 4300 nd_free(&udp_g_nd); 4301 return (B_FALSE); 4302 } 4303 return (B_TRUE); 4304 } 4305 4306 /* This routine sets an ND variable in a udpparam_t structure. */ 4307 /* ARGSUSED */ 4308 static int 4309 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr) 4310 { 4311 long new_value; 4312 udpparam_t *udppa = (udpparam_t *)cp; 4313 4314 /* 4315 * Fail the request if the new value does not lie within the 4316 * required bounds. 4317 */ 4318 if (ddi_strtol(value, NULL, 10, &new_value) != 0 || 4319 new_value < udppa->udp_param_min || 4320 new_value > udppa->udp_param_max) { 4321 return (EINVAL); 4322 } 4323 4324 /* Set the new value */ 4325 udppa->udp_param_value = new_value; 4326 return (0); 4327 } 4328 4329 /* 4330 * Copy hop-by-hop option from ipp->ipp_hopopts to the buffer provided (with 4331 * T_opthdr) and return the number of bytes copied. 'dbuf' may be NULL to 4332 * just count the length needed for allocation. If 'dbuf' is non-NULL, 4333 * then it's assumed to be allocated to be large enough. 4334 * 4335 * Returns zero if trimming of the security option causes all options to go 4336 * away. 4337 */ 4338 static size_t 4339 copy_hop_opts(const ip6_pkt_t *ipp, uchar_t *dbuf) 4340 { 4341 struct T_opthdr *toh; 4342 size_t hol = ipp->ipp_hopoptslen; 4343 ip6_hbh_t *dstopt = NULL; 4344 const ip6_hbh_t *srcopt = ipp->ipp_hopopts; 4345 size_t tlen, olen, plen; 4346 boolean_t deleting; 4347 const struct ip6_opt *sopt, *lastpad; 4348 struct ip6_opt *dopt; 4349 4350 if ((toh = (struct T_opthdr *)dbuf) != NULL) { 4351 toh->level = IPPROTO_IPV6; 4352 toh->name = IPV6_HOPOPTS; 4353 toh->status = 0; 4354 dstopt = (ip6_hbh_t *)(toh + 1); 4355 } 4356 4357 /* 4358 * If labeling is enabled, then skip the label option 4359 * but get other options if there are any. 4360 */ 4361 if (is_system_labeled()) { 4362 dopt = NULL; 4363 if (dstopt != NULL) { 4364 /* will fill in ip6h_len later */ 4365 dstopt->ip6h_nxt = srcopt->ip6h_nxt; 4366 dopt = (struct ip6_opt *)(dstopt + 1); 4367 } 4368 sopt = (const struct ip6_opt *)(srcopt + 1); 4369 hol -= sizeof (*srcopt); 4370 tlen = sizeof (*dstopt); 4371 lastpad = NULL; 4372 deleting = B_FALSE; 4373 /* 4374 * This loop finds the first (lastpad pointer) of any number of 4375 * pads that preceeds the security option, then treats the 4376 * security option as though it were a pad, and then finds the 4377 * next non-pad option (or end of list). 4378 * 4379 * It then treats the entire block as one big pad. To preserve 4380 * alignment of any options that follow, or just the end of the 4381 * list, it computes a minimal new padding size that keeps the 4382 * same alignment for the next option. 4383 * 4384 * If it encounters just a sequence of pads with no security 4385 * option, those are copied as-is rather than collapsed. 4386 * 4387 * Note that to handle the end of list case, the code makes one 4388 * loop with 'hol' set to zero. 4389 */ 4390 for (;;) { 4391 if (hol > 0) { 4392 if (sopt->ip6o_type == IP6OPT_PAD1) { 4393 if (lastpad == NULL) 4394 lastpad = sopt; 4395 sopt = (const struct ip6_opt *) 4396 &sopt->ip6o_len; 4397 hol--; 4398 continue; 4399 } 4400 olen = sopt->ip6o_len + sizeof (*sopt); 4401 if (olen > hol) 4402 olen = hol; 4403 if (sopt->ip6o_type == IP6OPT_PADN || 4404 sopt->ip6o_type == ip6opt_ls) { 4405 if (sopt->ip6o_type == ip6opt_ls) 4406 deleting = B_TRUE; 4407 if (lastpad == NULL) 4408 lastpad = sopt; 4409 sopt = (const struct ip6_opt *) 4410 ((const char *)sopt + olen); 4411 hol -= olen; 4412 continue; 4413 } 4414 } else { 4415 /* if nothing was copied at all, then delete */ 4416 if (tlen == sizeof (*dstopt)) 4417 return (0); 4418 /* last pass; pick up any trailing padding */ 4419 olen = 0; 4420 } 4421 if (deleting) { 4422 /* 4423 * compute aligning effect of deleted material 4424 * to reproduce with pad. 4425 */ 4426 plen = ((const char *)sopt - 4427 (const char *)lastpad) & 7; 4428 tlen += plen; 4429 if (dopt != NULL) { 4430 if (plen == 1) { 4431 dopt->ip6o_type = IP6OPT_PAD1; 4432 } else if (plen > 1) { 4433 plen -= sizeof (*dopt); 4434 dopt->ip6o_type = IP6OPT_PADN; 4435 dopt->ip6o_len = plen; 4436 if (plen > 0) 4437 bzero(dopt + 1, plen); 4438 } 4439 dopt = (struct ip6_opt *) 4440 ((char *)dopt + plen); 4441 } 4442 deleting = B_FALSE; 4443 lastpad = NULL; 4444 } 4445 /* if there's uncopied padding, then copy that now */ 4446 if (lastpad != NULL) { 4447 olen += (const char *)sopt - 4448 (const char *)lastpad; 4449 sopt = lastpad; 4450 lastpad = NULL; 4451 } 4452 if (dopt != NULL && olen > 0) { 4453 bcopy(sopt, dopt, olen); 4454 dopt = (struct ip6_opt *)((char *)dopt + olen); 4455 } 4456 if (hol == 0) 4457 break; 4458 tlen += olen; 4459 sopt = (const struct ip6_opt *) 4460 ((const char *)sopt + olen); 4461 hol -= olen; 4462 } 4463 /* go back and patch up the length value, rounded upward */ 4464 if (dstopt != NULL) 4465 dstopt->ip6h_len = (tlen - 1) >> 3; 4466 } else { 4467 tlen = hol; 4468 if (dstopt != NULL) 4469 bcopy(srcopt, dstopt, hol); 4470 } 4471 4472 tlen += sizeof (*toh); 4473 if (toh != NULL) 4474 toh->len = tlen; 4475 4476 return (tlen); 4477 } 4478 4479 static void 4480 udp_input(conn_t *connp, mblk_t *mp) 4481 { 4482 struct T_unitdata_ind *tudi; 4483 uchar_t *rptr; /* Pointer to IP header */ 4484 int hdr_length; /* Length of IP+UDP headers */ 4485 int udi_size; /* Size of T_unitdata_ind */ 4486 int mp_len; 4487 udp_t *udp; 4488 udpha_t *udpha; 4489 int ipversion; 4490 ip6_pkt_t ipp; 4491 ip6_t *ip6h; 4492 ip6i_t *ip6i; 4493 mblk_t *mp1; 4494 mblk_t *options_mp = NULL; 4495 in_pktinfo_t *pinfo = NULL; 4496 cred_t *cr = NULL; 4497 queue_t *q = connp->conn_rq; 4498 pid_t cpid; 4499 cred_t *rcr = connp->conn_cred; 4500 4501 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 4502 "udp_rput_start: q %p mp %p", q, mp); 4503 4504 udp = connp->conn_udp; 4505 rptr = mp->b_rptr; 4506 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_CTL); 4507 ASSERT(OK_32PTR(rptr)); 4508 4509 /* 4510 * IP should have prepended the options data in an M_CTL 4511 * Check M_CTL "type" to make sure are not here bcos of 4512 * a valid ICMP message 4513 */ 4514 if (DB_TYPE(mp) == M_CTL) { 4515 if (MBLKL(mp) == sizeof (in_pktinfo_t) && 4516 ((in_pktinfo_t *)mp->b_rptr)->in_pkt_ulp_type == 4517 IN_PKTINFO) { 4518 /* 4519 * IP_RECVIF or IP_RECVSLLA information has been 4520 * appended to the packet by IP. We need to 4521 * extract the mblk and adjust the rptr 4522 */ 4523 pinfo = (in_pktinfo_t *)mp->b_rptr; 4524 options_mp = mp; 4525 mp = mp->b_cont; 4526 rptr = mp->b_rptr; 4527 UDP_STAT(udp_in_pktinfo); 4528 } else { 4529 /* 4530 * ICMP messages. 4531 */ 4532 udp_icmp_error(q, mp); 4533 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4534 "udp_rput_end: q %p (%S)", q, "m_ctl"); 4535 return; 4536 } 4537 } 4538 4539 mp_len = msgdsize(mp); 4540 /* 4541 * This is the inbound data path. 4542 * First, we check to make sure the IP version number is correct, 4543 * and then pull the IP and UDP headers into the first mblk. 4544 * Assume IP provides aligned packets - otherwise toss. 4545 * Also, check if we have a complete IP header. 4546 */ 4547 4548 /* Initialize regardless if ipversion is IPv4 or IPv6 */ 4549 ipp.ipp_fields = 0; 4550 4551 ipversion = IPH_HDR_VERSION(rptr); 4552 switch (ipversion) { 4553 case IPV4_VERSION: 4554 ASSERT(MBLKL(mp) >= sizeof (ipha_t)); 4555 ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP); 4556 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 4557 if ((hdr_length > IP_SIMPLE_HDR_LENGTH + UDPH_SIZE) || 4558 (udp->udp_ip_rcv_options_len)) { 4559 /* 4560 * Handle IPv4 packets with options outside of the 4561 * main data path. Not needed for AF_INET6 sockets 4562 * since they don't support a getsockopt of IP_OPTIONS. 4563 */ 4564 if (udp->udp_family == AF_INET6) 4565 break; 4566 /* 4567 * UDP length check performed for IPv4 packets with 4568 * options to check whether UDP length specified in 4569 * the header is the same as the physical length of 4570 * the packet. 4571 */ 4572 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4573 if (mp_len != (ntohs(udpha->uha_length) + 4574 hdr_length - UDPH_SIZE)) { 4575 goto tossit; 4576 } 4577 /* 4578 * Handle the case where the packet has IP options 4579 * and the IP_RECVSLLA & IP_RECVIF are set 4580 */ 4581 if (pinfo != NULL) 4582 mp = options_mp; 4583 udp_become_writer(connp, mp, udp_rput_other_wrapper, 4584 SQTAG_UDP_INPUT); 4585 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4586 "udp_rput_end: q %p (%S)", q, "end"); 4587 return; 4588 } 4589 4590 /* Handle IPV6_RECVHOPLIMIT. */ 4591 if ((udp->udp_family == AF_INET6) && (pinfo != NULL) && 4592 udp->udp_ipv6_recvpktinfo) { 4593 if (pinfo->in_pkt_flags & IPF_RECVIF) { 4594 ipp.ipp_fields |= IPPF_IFINDEX; 4595 ipp.ipp_ifindex = pinfo->in_pkt_ifindex; 4596 } 4597 } 4598 break; 4599 case IPV6_VERSION: 4600 /* 4601 * IPv6 packets can only be received by applications 4602 * that are prepared to receive IPv6 addresses. 4603 * The IP fanout must ensure this. 4604 */ 4605 ASSERT(udp->udp_family == AF_INET6); 4606 4607 ip6h = (ip6_t *)rptr; 4608 ASSERT((uchar_t *)&ip6h[1] <= mp->b_wptr); 4609 4610 if (ip6h->ip6_nxt != IPPROTO_UDP) { 4611 uint8_t nexthdrp; 4612 /* Look for ifindex information */ 4613 if (ip6h->ip6_nxt == IPPROTO_RAW) { 4614 ip6i = (ip6i_t *)ip6h; 4615 if ((uchar_t *)&ip6i[1] > mp->b_wptr) 4616 goto tossit; 4617 4618 if (ip6i->ip6i_flags & IP6I_IFINDEX) { 4619 ASSERT(ip6i->ip6i_ifindex != 0); 4620 ipp.ipp_fields |= IPPF_IFINDEX; 4621 ipp.ipp_ifindex = ip6i->ip6i_ifindex; 4622 } 4623 rptr = (uchar_t *)&ip6i[1]; 4624 mp->b_rptr = rptr; 4625 if (rptr == mp->b_wptr) { 4626 mp1 = mp->b_cont; 4627 freeb(mp); 4628 mp = mp1; 4629 rptr = mp->b_rptr; 4630 } 4631 if (MBLKL(mp) < (IPV6_HDR_LEN + UDPH_SIZE)) 4632 goto tossit; 4633 ip6h = (ip6_t *)rptr; 4634 mp_len = msgdsize(mp); 4635 } 4636 /* 4637 * Find any potentially interesting extension headers 4638 * as well as the length of the IPv6 + extension 4639 * headers. 4640 */ 4641 hdr_length = ip_find_hdr_v6(mp, ip6h, &ipp, &nexthdrp) + 4642 UDPH_SIZE; 4643 ASSERT(nexthdrp == IPPROTO_UDP); 4644 } else { 4645 hdr_length = IPV6_HDR_LEN + UDPH_SIZE; 4646 ip6i = NULL; 4647 } 4648 break; 4649 default: 4650 ASSERT(0); 4651 } 4652 4653 /* 4654 * IP inspected the UDP header thus all of it must be in the mblk. 4655 * UDP length check is performed for IPv6 packets and IPv4 packets 4656 * without options to check if the size of the packet as specified 4657 * by the header is the same as the physical size of the packet. 4658 */ 4659 udpha = (udpha_t *)(rptr + (hdr_length - UDPH_SIZE)); 4660 if ((MBLKL(mp) < hdr_length) || 4661 (mp_len != (ntohs(udpha->uha_length) + hdr_length - UDPH_SIZE))) { 4662 goto tossit; 4663 } 4664 4665 /* Walk past the headers. */ 4666 if (!udp->udp_rcvhdr) { 4667 mp->b_rptr = rptr + hdr_length; 4668 mp_len -= hdr_length; 4669 } 4670 4671 /* 4672 * This is the inbound data path. Packets are passed upstream as 4673 * T_UNITDATA_IND messages with full IP headers still attached. 4674 */ 4675 if (udp->udp_family == AF_INET) { 4676 sin_t *sin; 4677 4678 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 4679 4680 /* 4681 * Normally only send up the address. 4682 * If IP_RECVDSTADDR is set we include the destination IP 4683 * address as an option. With IP_RECVOPTS we include all 4684 * the IP options. Only ip_rput_other() handles packets 4685 * that contain IP options. 4686 */ 4687 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 4688 if (udp->udp_recvdstaddr) { 4689 udi_size += sizeof (struct T_opthdr) + 4690 sizeof (struct in_addr); 4691 UDP_STAT(udp_in_recvdstaddr); 4692 } 4693 4694 /* 4695 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 4696 * space accordingly 4697 */ 4698 if (udp->udp_recvif && (pinfo != NULL) && 4699 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4700 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 4701 UDP_STAT(udp_in_recvif); 4702 } 4703 4704 if (udp->udp_recvslla && (pinfo != NULL) && 4705 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4706 udi_size += sizeof (struct T_opthdr) + 4707 sizeof (struct sockaddr_dl); 4708 UDP_STAT(udp_in_recvslla); 4709 } 4710 4711 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4712 udi_size += sizeof (struct T_opthdr) + ucredsize; 4713 cpid = DB_CPID(mp); 4714 UDP_STAT(udp_in_recvucred); 4715 } 4716 4717 /* 4718 * If SO_TIMESTAMP is set allocate the appropriate sized 4719 * buffer. Since gethrestime() expects a pointer aligned 4720 * argument, we allocate space necessary for extra 4721 * alignment (even though it might not be used). 4722 */ 4723 if (udp->udp_timestamp) { 4724 udi_size += sizeof (struct T_opthdr) + 4725 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4726 UDP_STAT(udp_in_timestamp); 4727 } 4728 4729 /* 4730 * If IP_RECVTTL is set allocate the appropriate sized buffer 4731 */ 4732 if (udp->udp_recvttl) { 4733 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 4734 UDP_STAT(udp_in_recvttl); 4735 } 4736 ASSERT(IPH_HDR_LENGTH((ipha_t *)rptr) == IP_SIMPLE_HDR_LENGTH); 4737 4738 /* Allocate a message block for the T_UNITDATA_IND structure. */ 4739 mp1 = allocb(udi_size, BPRI_MED); 4740 if (mp1 == NULL) { 4741 freemsg(mp); 4742 if (options_mp != NULL) 4743 freeb(options_mp); 4744 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4745 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4746 BUMP_MIB(&udp_mib, udpInErrors); 4747 return; 4748 } 4749 mp1->b_cont = mp; 4750 mp = mp1; 4751 mp->b_datap->db_type = M_PROTO; 4752 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4753 mp->b_wptr = (uchar_t *)tudi + udi_size; 4754 tudi->PRIM_type = T_UNITDATA_IND; 4755 tudi->SRC_length = sizeof (sin_t); 4756 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4757 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4758 sizeof (sin_t); 4759 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 4760 tudi->OPT_length = udi_size; 4761 sin = (sin_t *)&tudi[1]; 4762 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 4763 sin->sin_port = udpha->uha_src_port; 4764 sin->sin_family = udp->udp_family; 4765 *(uint32_t *)&sin->sin_zero[0] = 0; 4766 *(uint32_t *)&sin->sin_zero[4] = 0; 4767 4768 /* 4769 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 4770 * IP_RECVTTL has been set. 4771 */ 4772 if (udi_size != 0) { 4773 /* 4774 * Copy in destination address before options to avoid 4775 * any padding issues. 4776 */ 4777 char *dstopt; 4778 4779 dstopt = (char *)&sin[1]; 4780 if (udp->udp_recvdstaddr) { 4781 struct T_opthdr *toh; 4782 ipaddr_t *dstptr; 4783 4784 toh = (struct T_opthdr *)dstopt; 4785 toh->level = IPPROTO_IP; 4786 toh->name = IP_RECVDSTADDR; 4787 toh->len = sizeof (struct T_opthdr) + 4788 sizeof (ipaddr_t); 4789 toh->status = 0; 4790 dstopt += sizeof (struct T_opthdr); 4791 dstptr = (ipaddr_t *)dstopt; 4792 *dstptr = ((ipha_t *)rptr)->ipha_dst; 4793 dstopt = (char *)toh + toh->len; 4794 udi_size -= toh->len; 4795 } 4796 4797 if (udp->udp_recvslla && (pinfo != NULL) && 4798 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 4799 4800 struct T_opthdr *toh; 4801 struct sockaddr_dl *dstptr; 4802 4803 toh = (struct T_opthdr *)dstopt; 4804 toh->level = IPPROTO_IP; 4805 toh->name = IP_RECVSLLA; 4806 toh->len = sizeof (struct T_opthdr) + 4807 sizeof (struct sockaddr_dl); 4808 toh->status = 0; 4809 dstopt += sizeof (struct T_opthdr); 4810 dstptr = (struct sockaddr_dl *)dstopt; 4811 bcopy(&pinfo->in_pkt_slla, dstptr, 4812 sizeof (struct sockaddr_dl)); 4813 dstopt = (char *)toh + toh->len; 4814 udi_size -= toh->len; 4815 } 4816 4817 if (udp->udp_recvif && (pinfo != NULL) && 4818 (pinfo->in_pkt_flags & IPF_RECVIF)) { 4819 4820 struct T_opthdr *toh; 4821 uint_t *dstptr; 4822 4823 toh = (struct T_opthdr *)dstopt; 4824 toh->level = IPPROTO_IP; 4825 toh->name = IP_RECVIF; 4826 toh->len = sizeof (struct T_opthdr) + 4827 sizeof (uint_t); 4828 toh->status = 0; 4829 dstopt += sizeof (struct T_opthdr); 4830 dstptr = (uint_t *)dstopt; 4831 *dstptr = pinfo->in_pkt_ifindex; 4832 dstopt = (char *)toh + toh->len; 4833 udi_size -= toh->len; 4834 } 4835 4836 if (cr != NULL) { 4837 struct T_opthdr *toh; 4838 4839 toh = (struct T_opthdr *)dstopt; 4840 toh->level = SOL_SOCKET; 4841 toh->name = SCM_UCRED; 4842 toh->len = sizeof (struct T_opthdr) + ucredsize; 4843 toh->status = 0; 4844 (void) cred2ucred(cr, cpid, &toh[1], rcr); 4845 dstopt = (char *)toh + toh->len; 4846 udi_size -= toh->len; 4847 } 4848 4849 if (udp->udp_timestamp) { 4850 struct T_opthdr *toh; 4851 4852 toh = (struct T_opthdr *)dstopt; 4853 toh->level = SOL_SOCKET; 4854 toh->name = SCM_TIMESTAMP; 4855 toh->len = sizeof (struct T_opthdr) + 4856 sizeof (timestruc_t) + _POINTER_ALIGNMENT; 4857 toh->status = 0; 4858 dstopt += sizeof (struct T_opthdr); 4859 /* Align for gethrestime() */ 4860 dstopt = (char *)P2ROUNDUP((intptr_t)dstopt, 4861 sizeof (intptr_t)); 4862 gethrestime((timestruc_t *)dstopt); 4863 dstopt = (char *)toh + toh->len; 4864 udi_size -= toh->len; 4865 } 4866 4867 /* 4868 * CAUTION: 4869 * Due to aligment issues 4870 * Processing of IP_RECVTTL option 4871 * should always be the last. Adding 4872 * any option processing after this will 4873 * cause alignment panic. 4874 */ 4875 if (udp->udp_recvttl) { 4876 struct T_opthdr *toh; 4877 uint8_t *dstptr; 4878 4879 toh = (struct T_opthdr *)dstopt; 4880 toh->level = IPPROTO_IP; 4881 toh->name = IP_RECVTTL; 4882 toh->len = sizeof (struct T_opthdr) + 4883 sizeof (uint8_t); 4884 toh->status = 0; 4885 dstopt += sizeof (struct T_opthdr); 4886 dstptr = (uint8_t *)dstopt; 4887 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 4888 dstopt = (char *)toh + toh->len; 4889 udi_size -= toh->len; 4890 } 4891 4892 /* Consumed all of allocated space */ 4893 ASSERT(udi_size == 0); 4894 } 4895 } else { 4896 sin6_t *sin6; 4897 4898 /* 4899 * Handle both IPv4 and IPv6 packets for IPv6 sockets. 4900 * 4901 * Normally we only send up the address. If receiving of any 4902 * optional receive side information is enabled, we also send 4903 * that up as options. 4904 * [ Only udp_rput_other() handles packets that contain IP 4905 * options so code to account for does not appear immediately 4906 * below but elsewhere ] 4907 */ 4908 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t); 4909 4910 if (ipp.ipp_fields & (IPPF_HOPOPTS|IPPF_DSTOPTS|IPPF_RTDSTOPTS| 4911 IPPF_RTHDR|IPPF_IFINDEX)) { 4912 if (udp->udp_ipv6_recvhopopts && 4913 (ipp.ipp_fields & IPPF_HOPOPTS)) { 4914 size_t hlen; 4915 4916 UDP_STAT(udp_in_recvhopopts); 4917 hlen = copy_hop_opts(&ipp, NULL); 4918 if (hlen == 0) 4919 ipp.ipp_fields &= ~IPPF_HOPOPTS; 4920 udi_size += hlen; 4921 } 4922 if ((udp->udp_ipv6_recvdstopts || 4923 udp->udp_old_ipv6_recvdstopts) && 4924 (ipp.ipp_fields & IPPF_DSTOPTS)) { 4925 udi_size += sizeof (struct T_opthdr) + 4926 ipp.ipp_dstoptslen; 4927 UDP_STAT(udp_in_recvdstopts); 4928 } 4929 if (((udp->udp_ipv6_recvdstopts && 4930 udp->udp_ipv6_recvrthdr && 4931 (ipp.ipp_fields & IPPF_RTHDR)) || 4932 udp->udp_ipv6_recvrthdrdstopts) && 4933 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 4934 udi_size += sizeof (struct T_opthdr) + 4935 ipp.ipp_rtdstoptslen; 4936 UDP_STAT(udp_in_recvrtdstopts); 4937 } 4938 if (udp->udp_ipv6_recvrthdr && 4939 (ipp.ipp_fields & IPPF_RTHDR)) { 4940 udi_size += sizeof (struct T_opthdr) + 4941 ipp.ipp_rthdrlen; 4942 UDP_STAT(udp_in_recvrthdr); 4943 } 4944 if (udp->udp_ipv6_recvpktinfo && 4945 (ipp.ipp_fields & IPPF_IFINDEX)) { 4946 udi_size += sizeof (struct T_opthdr) + 4947 sizeof (struct in6_pktinfo); 4948 UDP_STAT(udp_in_recvpktinfo); 4949 } 4950 4951 } 4952 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 4953 udi_size += sizeof (struct T_opthdr) + ucredsize; 4954 cpid = DB_CPID(mp); 4955 UDP_STAT(udp_in_recvucred); 4956 } 4957 4958 if (udp->udp_ipv6_recvhoplimit) { 4959 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4960 UDP_STAT(udp_in_recvhoplimit); 4961 } 4962 4963 if (udp->udp_ipv6_recvtclass) { 4964 udi_size += sizeof (struct T_opthdr) + sizeof (int); 4965 UDP_STAT(udp_in_recvtclass); 4966 } 4967 4968 mp1 = allocb(udi_size, BPRI_MED); 4969 if (mp1 == NULL) { 4970 freemsg(mp); 4971 if (options_mp != NULL) 4972 freeb(options_mp); 4973 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 4974 "udp_rput_end: q %p (%S)", q, "allocbfail"); 4975 BUMP_MIB(&udp_mib, udpInErrors); 4976 return; 4977 } 4978 mp1->b_cont = mp; 4979 mp = mp1; 4980 mp->b_datap->db_type = M_PROTO; 4981 tudi = (struct T_unitdata_ind *)mp->b_rptr; 4982 mp->b_wptr = (uchar_t *)tudi + udi_size; 4983 tudi->PRIM_type = T_UNITDATA_IND; 4984 tudi->SRC_length = sizeof (sin6_t); 4985 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 4986 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + 4987 sizeof (sin6_t); 4988 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t)); 4989 tudi->OPT_length = udi_size; 4990 sin6 = (sin6_t *)&tudi[1]; 4991 if (ipversion == IPV4_VERSION) { 4992 in6_addr_t v6dst; 4993 4994 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src, 4995 &sin6->sin6_addr); 4996 IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst, 4997 &v6dst); 4998 sin6->sin6_flowinfo = 0; 4999 sin6->sin6_scope_id = 0; 5000 sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst, 5001 connp->conn_zoneid); 5002 } else { 5003 sin6->sin6_addr = ip6h->ip6_src; 5004 /* No sin6_flowinfo per API */ 5005 sin6->sin6_flowinfo = 0; 5006 /* For link-scope source pass up scope id */ 5007 if ((ipp.ipp_fields & IPPF_IFINDEX) && 5008 IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src)) 5009 sin6->sin6_scope_id = ipp.ipp_ifindex; 5010 else 5011 sin6->sin6_scope_id = 0; 5012 sin6->__sin6_src_id = ip_srcid_find_addr( 5013 &ip6h->ip6_dst, connp->conn_zoneid); 5014 } 5015 sin6->sin6_port = udpha->uha_src_port; 5016 sin6->sin6_family = udp->udp_family; 5017 5018 if (udi_size != 0) { 5019 uchar_t *dstopt; 5020 5021 dstopt = (uchar_t *)&sin6[1]; 5022 if (udp->udp_ipv6_recvpktinfo && 5023 (ipp.ipp_fields & IPPF_IFINDEX)) { 5024 struct T_opthdr *toh; 5025 struct in6_pktinfo *pkti; 5026 5027 toh = (struct T_opthdr *)dstopt; 5028 toh->level = IPPROTO_IPV6; 5029 toh->name = IPV6_PKTINFO; 5030 toh->len = sizeof (struct T_opthdr) + 5031 sizeof (*pkti); 5032 toh->status = 0; 5033 dstopt += sizeof (struct T_opthdr); 5034 pkti = (struct in6_pktinfo *)dstopt; 5035 if (ipversion == IPV6_VERSION) 5036 pkti->ipi6_addr = ip6h->ip6_dst; 5037 else 5038 IN6_IPADDR_TO_V4MAPPED( 5039 ((ipha_t *)rptr)->ipha_dst, 5040 &pkti->ipi6_addr); 5041 pkti->ipi6_ifindex = ipp.ipp_ifindex; 5042 dstopt += sizeof (*pkti); 5043 udi_size -= toh->len; 5044 } 5045 if (udp->udp_ipv6_recvhoplimit) { 5046 struct T_opthdr *toh; 5047 5048 toh = (struct T_opthdr *)dstopt; 5049 toh->level = IPPROTO_IPV6; 5050 toh->name = IPV6_HOPLIMIT; 5051 toh->len = sizeof (struct T_opthdr) + 5052 sizeof (uint_t); 5053 toh->status = 0; 5054 dstopt += sizeof (struct T_opthdr); 5055 if (ipversion == IPV6_VERSION) 5056 *(uint_t *)dstopt = ip6h->ip6_hops; 5057 else 5058 *(uint_t *)dstopt = 5059 ((ipha_t *)rptr)->ipha_ttl; 5060 dstopt += sizeof (uint_t); 5061 udi_size -= toh->len; 5062 } 5063 if (udp->udp_ipv6_recvtclass) { 5064 struct T_opthdr *toh; 5065 5066 toh = (struct T_opthdr *)dstopt; 5067 toh->level = IPPROTO_IPV6; 5068 toh->name = IPV6_TCLASS; 5069 toh->len = sizeof (struct T_opthdr) + 5070 sizeof (uint_t); 5071 toh->status = 0; 5072 dstopt += sizeof (struct T_opthdr); 5073 if (ipversion == IPV6_VERSION) { 5074 *(uint_t *)dstopt = 5075 IPV6_FLOW_TCLASS(ip6h->ip6_flow); 5076 } else { 5077 ipha_t *ipha = (ipha_t *)rptr; 5078 *(uint_t *)dstopt = 5079 ipha->ipha_type_of_service; 5080 } 5081 dstopt += sizeof (uint_t); 5082 udi_size -= toh->len; 5083 } 5084 if (udp->udp_ipv6_recvhopopts && 5085 (ipp.ipp_fields & IPPF_HOPOPTS)) { 5086 size_t hlen; 5087 5088 hlen = copy_hop_opts(&ipp, dstopt); 5089 dstopt += hlen; 5090 udi_size -= hlen; 5091 } 5092 if (udp->udp_ipv6_recvdstopts && 5093 udp->udp_ipv6_recvrthdr && 5094 (ipp.ipp_fields & IPPF_RTHDR) && 5095 (ipp.ipp_fields & IPPF_RTDSTOPTS)) { 5096 struct T_opthdr *toh; 5097 5098 toh = (struct T_opthdr *)dstopt; 5099 toh->level = IPPROTO_IPV6; 5100 toh->name = IPV6_DSTOPTS; 5101 toh->len = sizeof (struct T_opthdr) + 5102 ipp.ipp_rtdstoptslen; 5103 toh->status = 0; 5104 dstopt += sizeof (struct T_opthdr); 5105 bcopy(ipp.ipp_rtdstopts, dstopt, 5106 ipp.ipp_rtdstoptslen); 5107 dstopt += ipp.ipp_rtdstoptslen; 5108 udi_size -= toh->len; 5109 } 5110 if (udp->udp_ipv6_recvrthdr && 5111 (ipp.ipp_fields & IPPF_RTHDR)) { 5112 struct T_opthdr *toh; 5113 5114 toh = (struct T_opthdr *)dstopt; 5115 toh->level = IPPROTO_IPV6; 5116 toh->name = IPV6_RTHDR; 5117 toh->len = sizeof (struct T_opthdr) + 5118 ipp.ipp_rthdrlen; 5119 toh->status = 0; 5120 dstopt += sizeof (struct T_opthdr); 5121 bcopy(ipp.ipp_rthdr, dstopt, ipp.ipp_rthdrlen); 5122 dstopt += ipp.ipp_rthdrlen; 5123 udi_size -= toh->len; 5124 } 5125 if (udp->udp_ipv6_recvdstopts && 5126 (ipp.ipp_fields & IPPF_DSTOPTS)) { 5127 struct T_opthdr *toh; 5128 5129 toh = (struct T_opthdr *)dstopt; 5130 toh->level = IPPROTO_IPV6; 5131 toh->name = IPV6_DSTOPTS; 5132 toh->len = sizeof (struct T_opthdr) + 5133 ipp.ipp_dstoptslen; 5134 toh->status = 0; 5135 dstopt += sizeof (struct T_opthdr); 5136 bcopy(ipp.ipp_dstopts, dstopt, 5137 ipp.ipp_dstoptslen); 5138 dstopt += ipp.ipp_dstoptslen; 5139 udi_size -= toh->len; 5140 } 5141 5142 if (cr != NULL) { 5143 struct T_opthdr *toh; 5144 5145 toh = (struct T_opthdr *)dstopt; 5146 toh->level = SOL_SOCKET; 5147 toh->name = SCM_UCRED; 5148 toh->len = sizeof (struct T_opthdr) + ucredsize; 5149 toh->status = 0; 5150 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5151 dstopt += toh->len; 5152 udi_size -= toh->len; 5153 } 5154 /* Consumed all of allocated space */ 5155 ASSERT(udi_size == 0); 5156 } 5157 #undef sin6 5158 /* No IP_RECVDSTADDR for IPv6. */ 5159 } 5160 5161 BUMP_MIB(&udp_mib, udpHCInDatagrams); 5162 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5163 "udp_rput_end: q %p (%S)", q, "end"); 5164 if (options_mp != NULL) 5165 freeb(options_mp); 5166 5167 if (udp->udp_direct_sockfs) { 5168 /* 5169 * There is nothing above us except for the stream head; 5170 * use the read-side synchronous stream interface in 5171 * order to reduce the time spent in interrupt thread. 5172 */ 5173 ASSERT(udp->udp_issocket); 5174 udp_rcv_enqueue(UDP_RD(q), udp, mp, mp_len); 5175 } else { 5176 /* 5177 * Use regular STREAMS interface to pass data upstream 5178 * if this is not a socket endpoint, or if we have 5179 * switched over to the slow mode due to sockmod being 5180 * popped or a module being pushed on top of us. 5181 */ 5182 putnext(UDP_RD(q), mp); 5183 } 5184 return; 5185 5186 tossit: 5187 freemsg(mp); 5188 if (options_mp != NULL) 5189 freeb(options_mp); 5190 BUMP_MIB(&udp_mib, udpInErrors); 5191 } 5192 5193 void 5194 udp_conn_recv(conn_t *connp, mblk_t *mp) 5195 { 5196 _UDP_ENTER(connp, mp, udp_input_wrapper, SQTAG_UDP_FANOUT); 5197 } 5198 5199 /* ARGSUSED */ 5200 static void 5201 udp_input_wrapper(void *arg, mblk_t *mp, void *arg2) 5202 { 5203 udp_input((conn_t *)arg, mp); 5204 _UDP_EXIT((conn_t *)arg); 5205 } 5206 5207 /* 5208 * Process non-M_DATA messages as well as M_DATA messages that requires 5209 * modifications to udp_ip_rcv_options i.e. IPv4 packets with IP options. 5210 */ 5211 static void 5212 udp_rput_other(queue_t *q, mblk_t *mp) 5213 { 5214 struct T_unitdata_ind *tudi; 5215 mblk_t *mp1; 5216 uchar_t *rptr; 5217 uchar_t *new_rptr; 5218 int hdr_length; 5219 int udi_size; /* Size of T_unitdata_ind */ 5220 int opt_len; /* Length of IP options */ 5221 sin_t *sin; 5222 struct T_error_ack *tea; 5223 mblk_t *options_mp = NULL; 5224 in_pktinfo_t *pinfo; 5225 boolean_t recv_on = B_FALSE; 5226 cred_t *cr = NULL; 5227 udp_t *udp = Q_TO_UDP(q); 5228 pid_t cpid; 5229 cred_t *rcr = udp->udp_connp->conn_cred; 5230 5231 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_START, 5232 "udp_rput_other: q %p mp %p", q, mp); 5233 5234 ASSERT(OK_32PTR(mp->b_rptr)); 5235 rptr = mp->b_rptr; 5236 5237 switch (mp->b_datap->db_type) { 5238 case M_CTL: 5239 /* 5240 * We are here only if IP_RECVSLLA and/or IP_RECVIF are set 5241 */ 5242 recv_on = B_TRUE; 5243 options_mp = mp; 5244 pinfo = (in_pktinfo_t *)options_mp->b_rptr; 5245 5246 /* 5247 * The actual data is in mp->b_cont 5248 */ 5249 mp = mp->b_cont; 5250 ASSERT(OK_32PTR(mp->b_rptr)); 5251 rptr = mp->b_rptr; 5252 break; 5253 case M_DATA: 5254 /* 5255 * M_DATA messages contain IPv4 datagrams. They are handled 5256 * after this switch. 5257 */ 5258 break; 5259 case M_PROTO: 5260 case M_PCPROTO: 5261 /* M_PROTO messages contain some type of TPI message. */ 5262 ASSERT((uintptr_t)(mp->b_wptr - rptr) <= (uintptr_t)INT_MAX); 5263 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 5264 freemsg(mp); 5265 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5266 "udp_rput_other_end: q %p (%S)", q, "protoshort"); 5267 return; 5268 } 5269 tea = (struct T_error_ack *)rptr; 5270 5271 switch (tea->PRIM_type) { 5272 case T_ERROR_ACK: 5273 switch (tea->ERROR_prim) { 5274 case O_T_BIND_REQ: 5275 case T_BIND_REQ: { 5276 /* 5277 * If our O_T_BIND_REQ/T_BIND_REQ fails, 5278 * clear out the associated port and source 5279 * address before passing the message 5280 * upstream. If this was caused by a T_CONN_REQ 5281 * revert back to bound state. 5282 */ 5283 udp_fanout_t *udpf; 5284 5285 udpf = &udp_bind_fanout[ 5286 UDP_BIND_HASH(udp->udp_port)]; 5287 mutex_enter(&udpf->uf_lock); 5288 if (udp->udp_state == TS_DATA_XFER) { 5289 /* Connect failed */ 5290 tea->ERROR_prim = T_CONN_REQ; 5291 /* Revert back to the bound source */ 5292 udp->udp_v6src = udp->udp_bound_v6src; 5293 udp->udp_state = TS_IDLE; 5294 mutex_exit(&udpf->uf_lock); 5295 if (udp->udp_family == AF_INET6) 5296 (void) udp_build_hdrs(q, udp); 5297 break; 5298 } 5299 5300 if (udp->udp_discon_pending) { 5301 tea->ERROR_prim = T_DISCON_REQ; 5302 udp->udp_discon_pending = 0; 5303 } 5304 V6_SET_ZERO(udp->udp_v6src); 5305 V6_SET_ZERO(udp->udp_bound_v6src); 5306 udp->udp_state = TS_UNBND; 5307 udp_bind_hash_remove(udp, B_TRUE); 5308 udp->udp_port = 0; 5309 mutex_exit(&udpf->uf_lock); 5310 if (udp->udp_family == AF_INET6) 5311 (void) udp_build_hdrs(q, udp); 5312 break; 5313 } 5314 default: 5315 break; 5316 } 5317 break; 5318 case T_BIND_ACK: 5319 udp_rput_bind_ack(q, mp); 5320 return; 5321 5322 case T_OPTMGMT_ACK: 5323 case T_OK_ACK: 5324 break; 5325 default: 5326 freemsg(mp); 5327 return; 5328 } 5329 putnext(UDP_RD(q), mp); 5330 return; 5331 } 5332 5333 /* 5334 * This is the inbound data path. 5335 * First, we make sure the data contains both IP and UDP headers. 5336 * 5337 * This handle IPv4 packets for only AF_INET sockets. 5338 * AF_INET6 sockets can never access udp_ip_rcv_options thus there 5339 * is no need saving the options. 5340 */ 5341 ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION); 5342 hdr_length = IPH_HDR_LENGTH(rptr) + UDPH_SIZE; 5343 if (mp->b_wptr - rptr < hdr_length) { 5344 if (!pullupmsg(mp, hdr_length)) { 5345 freemsg(mp); 5346 if (options_mp != NULL) 5347 freeb(options_mp); 5348 BUMP_MIB(&udp_mib, udpInErrors); 5349 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5350 "udp_rput_other_end: q %p (%S)", q, "hdrshort"); 5351 BUMP_MIB(&udp_mib, udpInErrors); 5352 return; 5353 } 5354 rptr = mp->b_rptr; 5355 } 5356 /* Walk past the headers. */ 5357 new_rptr = rptr + hdr_length; 5358 if (!udp->udp_rcvhdr) 5359 mp->b_rptr = new_rptr; 5360 5361 /* Save the options if any */ 5362 opt_len = hdr_length - (IP_SIMPLE_HDR_LENGTH + UDPH_SIZE); 5363 if (opt_len > 0) { 5364 if (opt_len > udp->udp_ip_rcv_options_len) { 5365 if (udp->udp_ip_rcv_options_len) 5366 mi_free((char *)udp->udp_ip_rcv_options); 5367 udp->udp_ip_rcv_options_len = 0; 5368 udp->udp_ip_rcv_options = 5369 (uchar_t *)mi_alloc(opt_len, BPRI_HI); 5370 if (udp->udp_ip_rcv_options) 5371 udp->udp_ip_rcv_options_len = opt_len; 5372 } 5373 if (udp->udp_ip_rcv_options_len) { 5374 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, 5375 udp->udp_ip_rcv_options, opt_len); 5376 /* Adjust length if we are resusing the space */ 5377 udp->udp_ip_rcv_options_len = opt_len; 5378 } 5379 } else if (udp->udp_ip_rcv_options_len) { 5380 mi_free((char *)udp->udp_ip_rcv_options); 5381 udp->udp_ip_rcv_options = NULL; 5382 udp->udp_ip_rcv_options_len = 0; 5383 } 5384 5385 /* 5386 * Normally only send up the address. 5387 * If IP_RECVDSTADDR is set we include the destination IP 5388 * address as an option. With IP_RECVOPTS we include all 5389 * the IP options. 5390 */ 5391 udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5392 if (udp->udp_recvdstaddr) { 5393 udi_size += sizeof (struct T_opthdr) + sizeof (struct in_addr); 5394 UDP_STAT(udp_in_recvdstaddr); 5395 } 5396 if (udp->udp_recvopts && opt_len > 0) { 5397 udi_size += sizeof (struct T_opthdr) + opt_len; 5398 UDP_STAT(udp_in_recvopts); 5399 } 5400 5401 /* 5402 * If the IP_RECVSLLA or the IP_RECVIF is set then allocate 5403 * space accordingly 5404 */ 5405 if (udp->udp_recvif && recv_on && 5406 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5407 udi_size += sizeof (struct T_opthdr) + sizeof (uint_t); 5408 UDP_STAT(udp_in_recvif); 5409 } 5410 5411 if (udp->udp_recvslla && recv_on && 5412 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5413 udi_size += sizeof (struct T_opthdr) + 5414 sizeof (struct sockaddr_dl); 5415 UDP_STAT(udp_in_recvslla); 5416 } 5417 5418 if (udp->udp_recvucred && (cr = DB_CRED(mp)) != NULL) { 5419 udi_size += sizeof (struct T_opthdr) + ucredsize; 5420 cpid = DB_CPID(mp); 5421 UDP_STAT(udp_in_recvucred); 5422 } 5423 /* 5424 * If IP_RECVTTL is set allocate the appropriate sized buffer 5425 */ 5426 if (udp->udp_recvttl) { 5427 udi_size += sizeof (struct T_opthdr) + sizeof (uint8_t); 5428 UDP_STAT(udp_in_recvttl); 5429 } 5430 5431 /* Allocate a message block for the T_UNITDATA_IND structure. */ 5432 mp1 = allocb(udi_size, BPRI_MED); 5433 if (mp1 == NULL) { 5434 freemsg(mp); 5435 if (options_mp != NULL) 5436 freeb(options_mp); 5437 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5438 "udp_rput_other_end: q %p (%S)", q, "allocbfail"); 5439 BUMP_MIB(&udp_mib, udpInErrors); 5440 return; 5441 } 5442 mp1->b_cont = mp; 5443 mp = mp1; 5444 mp->b_datap->db_type = M_PROTO; 5445 tudi = (struct T_unitdata_ind *)mp->b_rptr; 5446 mp->b_wptr = (uchar_t *)tudi + udi_size; 5447 tudi->PRIM_type = T_UNITDATA_IND; 5448 tudi->SRC_length = sizeof (sin_t); 5449 tudi->SRC_offset = sizeof (struct T_unitdata_ind); 5450 tudi->OPT_offset = sizeof (struct T_unitdata_ind) + sizeof (sin_t); 5451 udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t)); 5452 tudi->OPT_length = udi_size; 5453 5454 sin = (sin_t *)&tudi[1]; 5455 sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src; 5456 sin->sin_port = ((in_port_t *) 5457 new_rptr)[-(UDPH_SIZE/sizeof (in_port_t))]; 5458 sin->sin_family = AF_INET; 5459 *(uint32_t *)&sin->sin_zero[0] = 0; 5460 *(uint32_t *)&sin->sin_zero[4] = 0; 5461 5462 /* 5463 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or 5464 * IP_RECVTTL has been set. 5465 */ 5466 if (udi_size != 0) { 5467 /* 5468 * Copy in destination address before options to avoid any 5469 * padding issues. 5470 */ 5471 char *dstopt; 5472 5473 dstopt = (char *)&sin[1]; 5474 if (udp->udp_recvdstaddr) { 5475 struct T_opthdr *toh; 5476 ipaddr_t *dstptr; 5477 5478 toh = (struct T_opthdr *)dstopt; 5479 toh->level = IPPROTO_IP; 5480 toh->name = IP_RECVDSTADDR; 5481 toh->len = sizeof (struct T_opthdr) + sizeof (ipaddr_t); 5482 toh->status = 0; 5483 dstopt += sizeof (struct T_opthdr); 5484 dstptr = (ipaddr_t *)dstopt; 5485 *dstptr = (((ipaddr_t *)rptr)[4]); 5486 dstopt += sizeof (ipaddr_t); 5487 udi_size -= toh->len; 5488 } 5489 if (udp->udp_recvopts && udi_size != 0) { 5490 struct T_opthdr *toh; 5491 5492 toh = (struct T_opthdr *)dstopt; 5493 toh->level = IPPROTO_IP; 5494 toh->name = IP_RECVOPTS; 5495 toh->len = sizeof (struct T_opthdr) + opt_len; 5496 toh->status = 0; 5497 dstopt += sizeof (struct T_opthdr); 5498 bcopy(rptr + IP_SIMPLE_HDR_LENGTH, dstopt, opt_len); 5499 dstopt += opt_len; 5500 udi_size -= toh->len; 5501 } 5502 5503 if (udp->udp_recvslla && recv_on && 5504 (pinfo->in_pkt_flags & IPF_RECVSLLA)) { 5505 5506 struct T_opthdr *toh; 5507 struct sockaddr_dl *dstptr; 5508 5509 toh = (struct T_opthdr *)dstopt; 5510 toh->level = IPPROTO_IP; 5511 toh->name = IP_RECVSLLA; 5512 toh->len = sizeof (struct T_opthdr) + 5513 sizeof (struct sockaddr_dl); 5514 toh->status = 0; 5515 dstopt += sizeof (struct T_opthdr); 5516 dstptr = (struct sockaddr_dl *)dstopt; 5517 bcopy(&pinfo->in_pkt_slla, dstptr, 5518 sizeof (struct sockaddr_dl)); 5519 dstopt += sizeof (struct sockaddr_dl); 5520 udi_size -= toh->len; 5521 } 5522 5523 if (udp->udp_recvif && recv_on && 5524 (pinfo->in_pkt_flags & IPF_RECVIF)) { 5525 5526 struct T_opthdr *toh; 5527 uint_t *dstptr; 5528 5529 toh = (struct T_opthdr *)dstopt; 5530 toh->level = IPPROTO_IP; 5531 toh->name = IP_RECVIF; 5532 toh->len = sizeof (struct T_opthdr) + 5533 sizeof (uint_t); 5534 toh->status = 0; 5535 dstopt += sizeof (struct T_opthdr); 5536 dstptr = (uint_t *)dstopt; 5537 *dstptr = pinfo->in_pkt_ifindex; 5538 dstopt += sizeof (uint_t); 5539 udi_size -= toh->len; 5540 } 5541 5542 if (cr != NULL) { 5543 struct T_opthdr *toh; 5544 5545 toh = (struct T_opthdr *)dstopt; 5546 toh->level = SOL_SOCKET; 5547 toh->name = SCM_UCRED; 5548 toh->len = sizeof (struct T_opthdr) + ucredsize; 5549 toh->status = 0; 5550 (void) cred2ucred(cr, cpid, &toh[1], rcr); 5551 dstopt += toh->len; 5552 udi_size -= toh->len; 5553 } 5554 5555 if (udp->udp_recvttl) { 5556 struct T_opthdr *toh; 5557 uint8_t *dstptr; 5558 5559 toh = (struct T_opthdr *)dstopt; 5560 toh->level = IPPROTO_IP; 5561 toh->name = IP_RECVTTL; 5562 toh->len = sizeof (struct T_opthdr) + 5563 sizeof (uint8_t); 5564 toh->status = 0; 5565 dstopt += sizeof (struct T_opthdr); 5566 dstptr = (uint8_t *)dstopt; 5567 *dstptr = ((ipha_t *)rptr)->ipha_ttl; 5568 dstopt += sizeof (uint8_t); 5569 udi_size -= toh->len; 5570 } 5571 5572 ASSERT(udi_size == 0); /* "Consumed" all of allocated space */ 5573 } 5574 BUMP_MIB(&udp_mib, udpHCInDatagrams); 5575 TRACE_2(TR_FAC_UDP, TR_UDP_RPUT_END, 5576 "udp_rput_other_end: q %p (%S)", q, "end"); 5577 if (options_mp != NULL) 5578 freeb(options_mp); 5579 5580 if (udp->udp_direct_sockfs) { 5581 /* 5582 * There is nothing above us except for the stream head; 5583 * use the read-side synchronous stream interface in 5584 * order to reduce the time spent in interrupt thread. 5585 */ 5586 ASSERT(udp->udp_issocket); 5587 udp_rcv_enqueue(UDP_RD(q), udp, mp, msgdsize(mp)); 5588 } else { 5589 /* 5590 * Use regular STREAMS interface to pass data upstream 5591 * if this is not a socket endpoint, or if we have 5592 * switched over to the slow mode due to sockmod being 5593 * popped or a module being pushed on top of us. 5594 */ 5595 putnext(UDP_RD(q), mp); 5596 } 5597 } 5598 5599 /* ARGSUSED */ 5600 static void 5601 udp_rput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 5602 { 5603 conn_t *connp = arg; 5604 5605 udp_rput_other(connp->conn_rq, mp); 5606 udp_exit(connp); 5607 } 5608 5609 /* 5610 * Process a T_BIND_ACK 5611 */ 5612 static void 5613 udp_rput_bind_ack(queue_t *q, mblk_t *mp) 5614 { 5615 udp_t *udp = Q_TO_UDP(q); 5616 mblk_t *mp1; 5617 ire_t *ire; 5618 struct T_bind_ack *tba; 5619 uchar_t *addrp; 5620 ipa_conn_t *ac; 5621 ipa6_conn_t *ac6; 5622 5623 if (udp->udp_discon_pending) 5624 udp->udp_discon_pending = 0; 5625 5626 /* 5627 * If a broadcast/multicast address was bound set 5628 * the source address to 0. 5629 * This ensures no datagrams with broadcast address 5630 * as source address are emitted (which would violate 5631 * RFC1122 - Hosts requirements) 5632 * 5633 * Note that when connecting the returned IRE is 5634 * for the destination address and we only perform 5635 * the broadcast check for the source address (it 5636 * is OK to connect to a broadcast/multicast address.) 5637 */ 5638 mp1 = mp->b_cont; 5639 if (mp1 != NULL && mp1->b_datap->db_type == IRE_DB_TYPE) { 5640 ire = (ire_t *)mp1->b_rptr; 5641 5642 /* 5643 * Note: we get IRE_BROADCAST for IPv6 to "mark" a multicast 5644 * local address. 5645 */ 5646 if (ire->ire_type == IRE_BROADCAST && 5647 udp->udp_state != TS_DATA_XFER) { 5648 /* This was just a local bind to a broadcast addr */ 5649 V6_SET_ZERO(udp->udp_v6src); 5650 if (udp->udp_family == AF_INET6) 5651 (void) udp_build_hdrs(q, udp); 5652 } else if (V6_OR_V4_INADDR_ANY(udp->udp_v6src)) { 5653 /* 5654 * Local address not yet set - pick it from the 5655 * T_bind_ack 5656 */ 5657 tba = (struct T_bind_ack *)mp->b_rptr; 5658 addrp = &mp->b_rptr[tba->ADDR_offset]; 5659 switch (udp->udp_family) { 5660 case AF_INET: 5661 if (tba->ADDR_length == sizeof (ipa_conn_t)) { 5662 ac = (ipa_conn_t *)addrp; 5663 } else { 5664 ASSERT(tba->ADDR_length == 5665 sizeof (ipa_conn_x_t)); 5666 ac = &((ipa_conn_x_t *)addrp)->acx_conn; 5667 } 5668 IN6_IPADDR_TO_V4MAPPED(ac->ac_laddr, 5669 &udp->udp_v6src); 5670 break; 5671 case AF_INET6: 5672 if (tba->ADDR_length == sizeof (ipa6_conn_t)) { 5673 ac6 = (ipa6_conn_t *)addrp; 5674 } else { 5675 ASSERT(tba->ADDR_length == 5676 sizeof (ipa6_conn_x_t)); 5677 ac6 = &((ipa6_conn_x_t *) 5678 addrp)->ac6x_conn; 5679 } 5680 udp->udp_v6src = ac6->ac6_laddr; 5681 (void) udp_build_hdrs(q, udp); 5682 break; 5683 } 5684 } 5685 mp1 = mp1->b_cont; 5686 } 5687 /* 5688 * Look for one or more appended ACK message added by 5689 * udp_connect or udp_disconnect. 5690 * If none found just send up the T_BIND_ACK. 5691 * udp_connect has appended a T_OK_ACK and a T_CONN_CON. 5692 * udp_disconnect has appended a T_OK_ACK. 5693 */ 5694 if (mp1 != NULL) { 5695 if (mp->b_cont == mp1) 5696 mp->b_cont = NULL; 5697 else { 5698 ASSERT(mp->b_cont->b_cont == mp1); 5699 mp->b_cont->b_cont = NULL; 5700 } 5701 freemsg(mp); 5702 mp = mp1; 5703 while (mp != NULL) { 5704 mp1 = mp->b_cont; 5705 mp->b_cont = NULL; 5706 putnext(UDP_RD(q), mp); 5707 mp = mp1; 5708 } 5709 return; 5710 } 5711 freemsg(mp->b_cont); 5712 mp->b_cont = NULL; 5713 putnext(UDP_RD(q), mp); 5714 } 5715 5716 /* 5717 * return SNMP stuff in buffer in mpdata 5718 */ 5719 int 5720 udp_snmp_get(queue_t *q, mblk_t *mpctl) 5721 { 5722 mblk_t *mpdata; 5723 mblk_t *mp_conn_ctl; 5724 mblk_t *mp_attr_ctl; 5725 mblk_t *mp6_conn_ctl; 5726 mblk_t *mp6_attr_ctl; 5727 mblk_t *mp_conn_tail; 5728 mblk_t *mp_attr_tail; 5729 mblk_t *mp6_conn_tail; 5730 mblk_t *mp6_attr_tail; 5731 struct opthdr *optp; 5732 mib2_udpEntry_t ude; 5733 mib2_udp6Entry_t ude6; 5734 mib2_transportMLPEntry_t mlp; 5735 int state; 5736 zoneid_t zoneid; 5737 int i; 5738 connf_t *connfp; 5739 conn_t *connp = Q_TO_CONN(q); 5740 udp_t *udp = connp->conn_udp; 5741 int v4_conn_idx; 5742 int v6_conn_idx; 5743 boolean_t needattr; 5744 5745 mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL; 5746 if (mpctl == NULL || 5747 (mpdata = mpctl->b_cont) == NULL || 5748 (mp_conn_ctl = copymsg(mpctl)) == NULL || 5749 (mp_attr_ctl = copymsg(mpctl)) == NULL || 5750 (mp6_conn_ctl = copymsg(mpctl)) == NULL || 5751 (mp6_attr_ctl = copymsg(mpctl)) == NULL) { 5752 freemsg(mp_conn_ctl); 5753 freemsg(mp_attr_ctl); 5754 freemsg(mp6_conn_ctl); 5755 return (0); 5756 } 5757 5758 zoneid = connp->conn_zoneid; 5759 5760 /* fixed length structure for IPv4 and IPv6 counters */ 5761 SET_MIB(udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t)); 5762 SET_MIB(udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t)); 5763 /* synchronize 64- and 32-bit counters */ 5764 SYNC32_MIB(&udp_mib, udpInDatagrams, udpHCInDatagrams); 5765 SYNC32_MIB(&udp_mib, udpOutDatagrams, udpHCOutDatagrams); 5766 5767 optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)]; 5768 optp->level = MIB2_UDP; 5769 optp->name = 0; 5770 (void) snmp_append_data(mpdata, (char *)&udp_mib, sizeof (udp_mib)); 5771 optp->len = msgdsize(mpdata); 5772 qreply(q, mpctl); 5773 5774 mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL; 5775 v4_conn_idx = v6_conn_idx = 0; 5776 5777 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 5778 connfp = &ipcl_globalhash_fanout[i]; 5779 connp = NULL; 5780 5781 while ((connp = ipcl_get_next_conn(connfp, connp, 5782 IPCL_UDP))) { 5783 udp = connp->conn_udp; 5784 if (zoneid != connp->conn_zoneid) 5785 continue; 5786 5787 /* 5788 * Note that the port numbers are sent in 5789 * host byte order 5790 */ 5791 5792 if (udp->udp_state == TS_UNBND) 5793 state = MIB2_UDP_unbound; 5794 else if (udp->udp_state == TS_IDLE) 5795 state = MIB2_UDP_idle; 5796 else if (udp->udp_state == TS_DATA_XFER) 5797 state = MIB2_UDP_connected; 5798 else 5799 state = MIB2_UDP_unknown; 5800 5801 needattr = B_FALSE; 5802 bzero(&mlp, sizeof (mlp)); 5803 if (connp->conn_mlp_type != mlptSingle) { 5804 if (connp->conn_mlp_type == mlptShared || 5805 connp->conn_mlp_type == mlptBoth) 5806 mlp.tme_flags |= MIB2_TMEF_SHARED; 5807 if (connp->conn_mlp_type == mlptPrivate || 5808 connp->conn_mlp_type == mlptBoth) 5809 mlp.tme_flags |= MIB2_TMEF_PRIVATE; 5810 needattr = B_TRUE; 5811 } 5812 5813 /* 5814 * Create an IPv4 table entry for IPv4 entries and also 5815 * any IPv6 entries which are bound to in6addr_any 5816 * (i.e. anything a IPv4 peer could connect/send to). 5817 */ 5818 if (udp->udp_ipversion == IPV4_VERSION || 5819 (udp->udp_state <= TS_IDLE && 5820 IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src))) { 5821 ude.udpEntryInfo.ue_state = state; 5822 /* 5823 * If in6addr_any this will set it to 5824 * INADDR_ANY 5825 */ 5826 ude.udpLocalAddress = 5827 V4_PART_OF_V6(udp->udp_v6src); 5828 ude.udpLocalPort = ntohs(udp->udp_port); 5829 if (udp->udp_state == TS_DATA_XFER) { 5830 /* 5831 * Can potentially get here for 5832 * v6 socket if another process 5833 * (say, ping) has just done a 5834 * sendto(), changing the state 5835 * from the TS_IDLE above to 5836 * TS_DATA_XFER by the time we hit 5837 * this part of the code. 5838 */ 5839 ude.udpEntryInfo.ue_RemoteAddress = 5840 V4_PART_OF_V6(udp->udp_v6dst); 5841 ude.udpEntryInfo.ue_RemotePort = 5842 ntohs(udp->udp_dstport); 5843 } else { 5844 ude.udpEntryInfo.ue_RemoteAddress = 0; 5845 ude.udpEntryInfo.ue_RemotePort = 0; 5846 } 5847 5848 /* 5849 * We make the assumption that all udp_t 5850 * structs will be created within an address 5851 * region no larger than 32-bits. 5852 */ 5853 ude.udpInstance = (uint32_t)(uintptr_t)udp; 5854 ude.udpCreationProcess = 5855 (udp->udp_open_pid < 0) ? 5856 MIB2_UNKNOWN_PROCESS : 5857 udp->udp_open_pid; 5858 ude.udpCreationTime = udp->udp_open_time; 5859 5860 (void) snmp_append_data2(mp_conn_ctl->b_cont, 5861 &mp_conn_tail, (char *)&ude, sizeof (ude)); 5862 mlp.tme_connidx = v4_conn_idx++; 5863 if (needattr) 5864 (void) snmp_append_data2( 5865 mp_attr_ctl->b_cont, &mp_attr_tail, 5866 (char *)&mlp, sizeof (mlp)); 5867 } 5868 if (udp->udp_ipversion == IPV6_VERSION) { 5869 ude6.udp6EntryInfo.ue_state = state; 5870 ude6.udp6LocalAddress = udp->udp_v6src; 5871 ude6.udp6LocalPort = ntohs(udp->udp_port); 5872 ude6.udp6IfIndex = udp->udp_bound_if; 5873 if (udp->udp_state == TS_DATA_XFER) { 5874 ude6.udp6EntryInfo.ue_RemoteAddress = 5875 udp->udp_v6dst; 5876 ude6.udp6EntryInfo.ue_RemotePort = 5877 ntohs(udp->udp_dstport); 5878 } else { 5879 ude6.udp6EntryInfo.ue_RemoteAddress = 5880 sin6_null.sin6_addr; 5881 ude6.udp6EntryInfo.ue_RemotePort = 0; 5882 } 5883 /* 5884 * We make the assumption that all udp_t 5885 * structs will be created within an address 5886 * region no larger than 32-bits. 5887 */ 5888 ude6.udp6Instance = (uint32_t)(uintptr_t)udp; 5889 ude6.udp6CreationProcess = 5890 (udp->udp_open_pid < 0) ? 5891 MIB2_UNKNOWN_PROCESS : 5892 udp->udp_open_pid; 5893 ude6.udp6CreationTime = udp->udp_open_time; 5894 5895 (void) snmp_append_data2(mp6_conn_ctl->b_cont, 5896 &mp6_conn_tail, (char *)&ude6, 5897 sizeof (ude6)); 5898 mlp.tme_connidx = v6_conn_idx++; 5899 if (needattr) 5900 (void) snmp_append_data2( 5901 mp6_attr_ctl->b_cont, 5902 &mp6_attr_tail, (char *)&mlp, 5903 sizeof (mlp)); 5904 } 5905 } 5906 } 5907 5908 /* IPv4 UDP endpoints */ 5909 optp = (struct opthdr *)&mp_conn_ctl->b_rptr[ 5910 sizeof (struct T_optmgmt_ack)]; 5911 optp->level = MIB2_UDP; 5912 optp->name = MIB2_UDP_ENTRY; 5913 optp->len = msgdsize(mp_conn_ctl->b_cont); 5914 qreply(q, mp_conn_ctl); 5915 5916 /* table of MLP attributes... */ 5917 optp = (struct opthdr *)&mp_attr_ctl->b_rptr[ 5918 sizeof (struct T_optmgmt_ack)]; 5919 optp->level = MIB2_UDP; 5920 optp->name = EXPER_XPORT_MLP; 5921 optp->len = msgdsize(mp_attr_ctl->b_cont); 5922 if (optp->len == 0) 5923 freemsg(mp_attr_ctl); 5924 else 5925 qreply(q, mp_attr_ctl); 5926 5927 /* IPv6 UDP endpoints */ 5928 optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[ 5929 sizeof (struct T_optmgmt_ack)]; 5930 optp->level = MIB2_UDP6; 5931 optp->name = MIB2_UDP6_ENTRY; 5932 optp->len = msgdsize(mp6_conn_ctl->b_cont); 5933 qreply(q, mp6_conn_ctl); 5934 5935 /* table of MLP attributes... */ 5936 optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[ 5937 sizeof (struct T_optmgmt_ack)]; 5938 optp->level = MIB2_UDP6; 5939 optp->name = EXPER_XPORT_MLP; 5940 optp->len = msgdsize(mp6_attr_ctl->b_cont); 5941 if (optp->len == 0) 5942 freemsg(mp6_attr_ctl); 5943 else 5944 qreply(q, mp6_attr_ctl); 5945 5946 return (1); 5947 } 5948 5949 /* 5950 * Return 0 if invalid set request, 1 otherwise, including non-udp requests. 5951 * NOTE: Per MIB-II, UDP has no writable data. 5952 * TODO: If this ever actually tries to set anything, it needs to be 5953 * to do the appropriate locking. 5954 */ 5955 /* ARGSUSED */ 5956 int 5957 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name, 5958 uchar_t *ptr, int len) 5959 { 5960 switch (level) { 5961 case MIB2_UDP: 5962 return (0); 5963 default: 5964 return (1); 5965 } 5966 } 5967 5968 static void 5969 udp_report_item(mblk_t *mp, udp_t *udp) 5970 { 5971 char *state; 5972 char addrbuf1[INET6_ADDRSTRLEN]; 5973 char addrbuf2[INET6_ADDRSTRLEN]; 5974 uint_t print_len, buf_len; 5975 5976 buf_len = mp->b_datap->db_lim - mp->b_wptr; 5977 ASSERT(buf_len >= 0); 5978 if (buf_len == 0) 5979 return; 5980 5981 if (udp->udp_state == TS_UNBND) 5982 state = "UNBOUND"; 5983 else if (udp->udp_state == TS_IDLE) 5984 state = "IDLE"; 5985 else if (udp->udp_state == TS_DATA_XFER) 5986 state = "CONNECTED"; 5987 else 5988 state = "UnkState"; 5989 print_len = snprintf((char *)mp->b_wptr, buf_len, 5990 MI_COL_PTRFMT_STR "%4d %5u %s %s %5u %s\n", 5991 (void *)udp, udp->udp_connp->conn_zoneid, ntohs(udp->udp_port), 5992 inet_ntop(AF_INET6, &udp->udp_v6src, 5993 addrbuf1, sizeof (addrbuf1)), 5994 inet_ntop(AF_INET6, &udp->udp_v6dst, 5995 addrbuf2, sizeof (addrbuf2)), 5996 ntohs(udp->udp_dstport), state); 5997 if (print_len < buf_len) { 5998 mp->b_wptr += print_len; 5999 } else { 6000 mp->b_wptr += buf_len; 6001 } 6002 } 6003 6004 /* Report for ndd "udp_status" */ 6005 /* ARGSUSED */ 6006 static int 6007 udp_status_report(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr) 6008 { 6009 zoneid_t zoneid; 6010 connf_t *connfp; 6011 conn_t *connp = Q_TO_CONN(q); 6012 udp_t *udp = connp->conn_udp; 6013 int i; 6014 6015 /* 6016 * Because of the ndd constraint, at most we can have 64K buffer 6017 * to put in all UDP info. So to be more efficient, just 6018 * allocate a 64K buffer here, assuming we need that large buffer. 6019 * This may be a problem as any user can read udp_status. Therefore 6020 * we limit the rate of doing this using udp_ndd_get_info_interval. 6021 * This should be OK as normal users should not do this too often. 6022 */ 6023 if (cr == NULL || secpolicy_net_config(cr, B_TRUE) != 0) { 6024 if (ddi_get_lbolt() - udp_last_ndd_get_info_time < 6025 drv_usectohz(udp_ndd_get_info_interval * 1000)) { 6026 (void) mi_mpprintf(mp, NDD_TOO_QUICK_MSG); 6027 return (0); 6028 } 6029 } 6030 if ((mp->b_cont = allocb(ND_MAX_BUF_LEN, BPRI_HI)) == NULL) { 6031 /* The following may work even if we cannot get a large buf. */ 6032 (void) mi_mpprintf(mp, NDD_OUT_OF_BUF_MSG); 6033 return (0); 6034 } 6035 (void) mi_mpprintf(mp, 6036 "UDP " MI_COL_HDRPAD_STR 6037 /* 12345678[89ABCDEF] */ 6038 " zone lport src addr dest addr port state"); 6039 /* 1234 12345 xxx.xxx.xxx.xxx xxx.xxx.xxx.xxx 12345 UNBOUND */ 6040 6041 zoneid = connp->conn_zoneid; 6042 6043 for (i = 0; i < CONN_G_HASH_SIZE; i++) { 6044 connfp = &ipcl_globalhash_fanout[i]; 6045 connp = NULL; 6046 6047 while ((connp = ipcl_get_next_conn(connfp, connp, 6048 IPCL_UDP))) { 6049 udp = connp->conn_udp; 6050 if (zoneid != GLOBAL_ZONEID && 6051 zoneid != connp->conn_zoneid) 6052 continue; 6053 6054 udp_report_item(mp->b_cont, udp); 6055 } 6056 } 6057 udp_last_ndd_get_info_time = ddi_get_lbolt(); 6058 return (0); 6059 } 6060 6061 /* 6062 * This routine creates a T_UDERROR_IND message and passes it upstream. 6063 * The address and options are copied from the T_UNITDATA_REQ message 6064 * passed in mp. This message is freed. 6065 */ 6066 static void 6067 udp_ud_err(queue_t *q, mblk_t *mp, uchar_t *destaddr, t_scalar_t destlen, 6068 t_scalar_t err) 6069 { 6070 struct T_unitdata_req *tudr; 6071 mblk_t *mp1; 6072 uchar_t *optaddr; 6073 t_scalar_t optlen; 6074 6075 if (DB_TYPE(mp) == M_DATA) { 6076 ASSERT(destaddr != NULL && destlen != 0); 6077 optaddr = NULL; 6078 optlen = 0; 6079 } else { 6080 if ((mp->b_wptr < mp->b_rptr) || 6081 (MBLKL(mp)) < sizeof (struct T_unitdata_req)) { 6082 goto done; 6083 } 6084 tudr = (struct T_unitdata_req *)mp->b_rptr; 6085 destaddr = mp->b_rptr + tudr->DEST_offset; 6086 if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr || 6087 destaddr + tudr->DEST_length < mp->b_rptr || 6088 destaddr + tudr->DEST_length > mp->b_wptr) { 6089 goto done; 6090 } 6091 optaddr = mp->b_rptr + tudr->OPT_offset; 6092 if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr || 6093 optaddr + tudr->OPT_length < mp->b_rptr || 6094 optaddr + tudr->OPT_length > mp->b_wptr) { 6095 goto done; 6096 } 6097 destlen = tudr->DEST_length; 6098 optlen = tudr->OPT_length; 6099 } 6100 6101 mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen, 6102 (char *)optaddr, optlen, err); 6103 if (mp1 != NULL) 6104 putnext(UDP_RD(q), mp1); 6105 6106 done: 6107 freemsg(mp); 6108 } 6109 6110 /* 6111 * This routine removes a port number association from a stream. It 6112 * is called by udp_wput to handle T_UNBIND_REQ messages. 6113 */ 6114 static void 6115 udp_unbind(queue_t *q, mblk_t *mp) 6116 { 6117 udp_t *udp = Q_TO_UDP(q); 6118 6119 /* If a bind has not been done, we can't unbind. */ 6120 if (udp->udp_state == TS_UNBND) { 6121 udp_err_ack(q, mp, TOUTSTATE, 0); 6122 return; 6123 } 6124 if (cl_inet_unbind != NULL) { 6125 /* 6126 * Running in cluster mode - register unbind information 6127 */ 6128 if (udp->udp_ipversion == IPV4_VERSION) { 6129 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET, 6130 (uint8_t *)(&V4_PART_OF_V6(udp->udp_v6src)), 6131 (in_port_t)udp->udp_port); 6132 } else { 6133 (*cl_inet_unbind)(IPPROTO_UDP, AF_INET6, 6134 (uint8_t *)&(udp->udp_v6src), 6135 (in_port_t)udp->udp_port); 6136 } 6137 } 6138 6139 udp_bind_hash_remove(udp, B_FALSE); 6140 V6_SET_ZERO(udp->udp_v6src); 6141 V6_SET_ZERO(udp->udp_bound_v6src); 6142 udp->udp_port = 0; 6143 udp->udp_state = TS_UNBND; 6144 6145 if (udp->udp_family == AF_INET6) { 6146 int error; 6147 6148 /* Rebuild the header template */ 6149 error = udp_build_hdrs(q, udp); 6150 if (error != 0) { 6151 udp_err_ack(q, mp, TSYSERR, error); 6152 return; 6153 } 6154 } 6155 /* 6156 * Pass the unbind to IP; T_UNBIND_REQ is larger than T_OK_ACK 6157 * and therefore ip_unbind must never return NULL. 6158 */ 6159 mp = ip_unbind(q, mp); 6160 ASSERT(mp != NULL); 6161 putnext(UDP_RD(q), mp); 6162 } 6163 6164 /* 6165 * Don't let port fall into the privileged range. 6166 * Since the extra privileged ports can be arbitrary we also 6167 * ensure that we exclude those from consideration. 6168 * udp_g_epriv_ports is not sorted thus we loop over it until 6169 * there are no changes. 6170 */ 6171 static in_port_t 6172 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random) 6173 { 6174 int i; 6175 in_port_t nextport; 6176 boolean_t restart = B_FALSE; 6177 6178 if (random && udp_random_anon_port != 0) { 6179 (void) random_get_pseudo_bytes((uint8_t *)&port, 6180 sizeof (in_port_t)); 6181 /* 6182 * Unless changed by a sys admin, the smallest anon port 6183 * is 32768 and the largest anon port is 65535. It is 6184 * very likely (50%) for the random port to be smaller 6185 * than the smallest anon port. When that happens, 6186 * add port % (anon port range) to the smallest anon 6187 * port to get the random port. It should fall into the 6188 * valid anon port range. 6189 */ 6190 if (port < udp_smallest_anon_port) { 6191 port = udp_smallest_anon_port + 6192 port % (udp_largest_anon_port - 6193 udp_smallest_anon_port); 6194 } 6195 } 6196 6197 retry: 6198 if (port < udp_smallest_anon_port) 6199 port = udp_smallest_anon_port; 6200 6201 if (port > udp_largest_anon_port) { 6202 port = udp_smallest_anon_port; 6203 if (restart) 6204 return (0); 6205 restart = B_TRUE; 6206 } 6207 6208 if (port < udp_smallest_nonpriv_port) 6209 port = udp_smallest_nonpriv_port; 6210 6211 for (i = 0; i < udp_g_num_epriv_ports; i++) { 6212 if (port == udp_g_epriv_ports[i]) { 6213 port++; 6214 /* 6215 * Make sure that the port is in the 6216 * valid range. 6217 */ 6218 goto retry; 6219 } 6220 } 6221 6222 if (is_system_labeled() && 6223 (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred), 6224 port, IPPROTO_UDP, B_TRUE)) != 0) { 6225 port = nextport; 6226 goto retry; 6227 } 6228 6229 return (port); 6230 } 6231 6232 static int 6233 udp_update_label(queue_t *wq, mblk_t *mp, ipaddr_t dst) 6234 { 6235 int err; 6236 uchar_t opt_storage[IP_MAX_OPT_LENGTH]; 6237 udp_t *udp = Q_TO_UDP(wq); 6238 6239 err = tsol_compute_label(DB_CREDDEF(mp, udp->udp_connp->conn_cred), dst, 6240 opt_storage, udp->udp_mac_exempt); 6241 if (err == 0) { 6242 err = tsol_update_options(&udp->udp_ip_snd_options, 6243 &udp->udp_ip_snd_options_len, &udp->udp_label_len, 6244 opt_storage); 6245 } 6246 if (err != 0) { 6247 DTRACE_PROBE4( 6248 tx__ip__log__info__updatelabel__udp, 6249 char *, "queue(1) failed to update options(2) on mp(3)", 6250 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6251 } else { 6252 IN6_IPADDR_TO_V4MAPPED(dst, &udp->udp_v6lastdst); 6253 } 6254 return (err); 6255 } 6256 6257 static mblk_t * 6258 udp_output_v4(conn_t *connp, mblk_t *mp, ipaddr_t v4dst, uint16_t port, 6259 uint_t srcid, int *error) 6260 { 6261 udp_t *udp = connp->conn_udp; 6262 queue_t *q = connp->conn_wq; 6263 mblk_t *mp1 = mp; 6264 mblk_t *mp2; 6265 ipha_t *ipha; 6266 int ip_hdr_length; 6267 uint32_t ip_len; 6268 udpha_t *udpha; 6269 udpattrs_t attrs; 6270 uchar_t ip_snd_opt[IP_MAX_OPT_LENGTH]; 6271 uint32_t ip_snd_opt_len = 0; 6272 6273 *error = 0; 6274 6275 if (v4dst == INADDR_ANY) 6276 v4dst = htonl(INADDR_LOOPBACK); 6277 6278 /* 6279 * If options passed in, feed it for verification and handling 6280 */ 6281 attrs.udpattr_credset = B_FALSE; 6282 if (DB_TYPE(mp) != M_DATA) { 6283 mp1 = mp->b_cont; 6284 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 6285 attrs.udpattr_ipp = NULL; 6286 attrs.udpattr_mb = mp; 6287 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 6288 goto done; 6289 /* 6290 * Note: success in processing options. 6291 * mp option buffer represented by 6292 * OPT_length/offset now potentially modified 6293 * and contain option setting results 6294 */ 6295 ASSERT(*error == 0); 6296 } 6297 } 6298 6299 /* mp1 points to the M_DATA mblk carrying the packet */ 6300 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 6301 6302 /* 6303 * Check if our saved options are valid; update if not 6304 * TSOL Note: Since we are not in WRITER mode, UDP packets 6305 * to different destination may require different labels. 6306 * We use conn_lock to ensure that lastdst, ip_snd_options, 6307 * and ip_snd_options_len are consistent for the current 6308 * destination and are updated atomically. 6309 */ 6310 mutex_enter(&connp->conn_lock); 6311 if (is_system_labeled()) { 6312 /* Using UDP MLP requires SCM_UCRED from user */ 6313 if (connp->conn_mlp_type != mlptSingle && 6314 !attrs.udpattr_credset) { 6315 mutex_exit(&connp->conn_lock); 6316 DTRACE_PROBE4( 6317 tx__ip__log__info__output__udp, 6318 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 6319 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 6320 *error = ECONNREFUSED; 6321 goto done; 6322 } 6323 if ((!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6lastdst) || 6324 V4_PART_OF_V6(udp->udp_v6lastdst) != v4dst) && 6325 (*error = udp_update_label(q, mp, v4dst)) != 0) { 6326 mutex_exit(&connp->conn_lock); 6327 goto done; 6328 } 6329 } 6330 if (udp->udp_ip_snd_options_len > 0) { 6331 ip_snd_opt_len = udp->udp_ip_snd_options_len; 6332 bcopy(udp->udp_ip_snd_options, ip_snd_opt, ip_snd_opt_len); 6333 } 6334 mutex_exit(&connp->conn_lock); 6335 6336 /* Add an IP header */ 6337 ip_hdr_length = IP_SIMPLE_HDR_LENGTH + UDPH_SIZE + ip_snd_opt_len; 6338 ipha = (ipha_t *)&mp1->b_rptr[-ip_hdr_length]; 6339 if (DB_REF(mp1) != 1 || (uchar_t *)ipha < DB_BASE(mp1) || 6340 !OK_32PTR(ipha)) { 6341 mp2 = allocb(ip_hdr_length + udp_wroff_extra, BPRI_LO); 6342 if (mp2 == NULL) { 6343 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6344 "udp_wput_end: q %p (%S)", q, "allocbfail2"); 6345 *error = ENOMEM; 6346 goto done; 6347 } 6348 mp2->b_wptr = DB_LIM(mp2); 6349 mp2->b_cont = mp1; 6350 mp1 = mp2; 6351 if (DB_TYPE(mp) != M_DATA) 6352 mp->b_cont = mp1; 6353 else 6354 mp = mp1; 6355 6356 ipha = (ipha_t *)(mp1->b_wptr - ip_hdr_length); 6357 } 6358 ip_hdr_length -= UDPH_SIZE; 6359 #ifdef _BIG_ENDIAN 6360 /* Set version, header length, and tos */ 6361 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6362 ((((IP_VERSION << 4) | (ip_hdr_length>>2)) << 8) | 6363 udp->udp_type_of_service); 6364 /* Set ttl and protocol */ 6365 *(uint16_t *)&ipha->ipha_ttl = (udp->udp_ttl << 8) | IPPROTO_UDP; 6366 #else 6367 /* Set version, header length, and tos */ 6368 *(uint16_t *)&ipha->ipha_version_and_hdr_length = 6369 ((udp->udp_type_of_service << 8) | 6370 ((IP_VERSION << 4) | (ip_hdr_length>>2))); 6371 /* Set ttl and protocol */ 6372 *(uint16_t *)&ipha->ipha_ttl = (IPPROTO_UDP << 8) | udp->udp_ttl; 6373 #endif 6374 /* 6375 * Copy our address into the packet. If this is zero, 6376 * first look at __sin6_src_id for a hint. If we leave the source 6377 * as INADDR_ANY then ip will fill in the real source address. 6378 */ 6379 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6src, ipha->ipha_src); 6380 if (srcid != 0 && ipha->ipha_src == INADDR_ANY) { 6381 in6_addr_t v6src; 6382 6383 ip_srcid_find_id(srcid, &v6src, connp->conn_zoneid); 6384 IN6_V4MAPPED_TO_IPADDR(&v6src, ipha->ipha_src); 6385 } 6386 6387 ipha->ipha_fragment_offset_and_flags = 0; 6388 ipha->ipha_ident = 0; 6389 6390 mp1->b_rptr = (uchar_t *)ipha; 6391 6392 ASSERT((uintptr_t)(mp1->b_wptr - (uchar_t *)ipha) <= 6393 (uintptr_t)UINT_MAX); 6394 6395 /* Determine length of packet */ 6396 ip_len = (uint32_t)(mp1->b_wptr - (uchar_t *)ipha); 6397 if ((mp2 = mp1->b_cont) != NULL) { 6398 do { 6399 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 6400 ip_len += (uint32_t)MBLKL(mp2); 6401 } while ((mp2 = mp2->b_cont) != NULL); 6402 } 6403 /* 6404 * If the size of the packet is greater than the maximum allowed by 6405 * ip, return an error. Passing this down could cause panics because 6406 * the size will have wrapped and be inconsistent with the msg size. 6407 */ 6408 if (ip_len > IP_MAXPACKET) { 6409 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6410 "udp_wput_end: q %p (%S)", q, "IP length exceeded"); 6411 *error = EMSGSIZE; 6412 goto done; 6413 } 6414 ipha->ipha_length = htons((uint16_t)ip_len); 6415 ip_len -= ip_hdr_length; 6416 ip_len = htons((uint16_t)ip_len); 6417 udpha = (udpha_t *)(((uchar_t *)ipha) + ip_hdr_length); 6418 6419 /* 6420 * Copy in the destination address 6421 */ 6422 ipha->ipha_dst = v4dst; 6423 6424 /* 6425 * Set ttl based on IP_MULTICAST_TTL to match IPv6 logic. 6426 */ 6427 if (CLASSD(v4dst)) 6428 ipha->ipha_ttl = udp->udp_multicast_ttl; 6429 6430 udpha->uha_dst_port = port; 6431 udpha->uha_src_port = udp->udp_port; 6432 6433 if (ip_hdr_length > IP_SIMPLE_HDR_LENGTH) { 6434 uint32_t cksum; 6435 6436 bcopy(ip_snd_opt, &ipha[1], ip_snd_opt_len); 6437 /* 6438 * Massage source route putting first source route in ipha_dst. 6439 * Ignore the destination in T_unitdata_req. 6440 * Create a checksum adjustment for a source route, if any. 6441 */ 6442 cksum = ip_massage_options(ipha); 6443 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6444 cksum -= ((ipha->ipha_dst >> 16) & 0xFFFF) + 6445 (ipha->ipha_dst & 0xFFFF); 6446 if ((int)cksum < 0) 6447 cksum--; 6448 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6449 /* 6450 * IP does the checksum if uha_checksum is non-zero, 6451 * We make it easy for IP to include our pseudo header 6452 * by putting our length in uha_checksum. 6453 */ 6454 cksum += ip_len; 6455 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6456 /* There might be a carry. */ 6457 cksum = (cksum & 0xFFFF) + (cksum >> 16); 6458 #ifdef _LITTLE_ENDIAN 6459 if (udp_do_checksum) 6460 ip_len = (cksum << 16) | ip_len; 6461 #else 6462 if (udp_do_checksum) 6463 ip_len = (ip_len << 16) | cksum; 6464 else 6465 ip_len <<= 16; 6466 #endif 6467 } else { 6468 /* 6469 * IP does the checksum if uha_checksum is non-zero, 6470 * We make it easy for IP to include our pseudo header 6471 * by putting our length in uha_checksum. 6472 */ 6473 if (udp_do_checksum) 6474 ip_len |= (ip_len << 16); 6475 #ifndef _LITTLE_ENDIAN 6476 else 6477 ip_len <<= 16; 6478 #endif 6479 } 6480 /* Set UDP length and checksum */ 6481 *((uint32_t *)&udpha->uha_length) = ip_len; 6482 if (DB_CRED(mp) != NULL) 6483 mblk_setcred(mp1, DB_CRED(mp)); 6484 6485 if (DB_TYPE(mp) != M_DATA) { 6486 ASSERT(mp != mp1); 6487 freeb(mp); 6488 } 6489 6490 /* mp has been consumed and we'll return success */ 6491 ASSERT(*error == 0); 6492 mp = NULL; 6493 6494 /* We're done. Pass the packet to ip. */ 6495 BUMP_MIB(&udp_mib, udpHCOutDatagrams); 6496 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6497 "udp_wput_end: q %p (%S)", q, "end"); 6498 6499 if ((connp->conn_flags & IPCL_CHECK_POLICY) != 0 || 6500 CONN_OUTBOUND_POLICY_PRESENT(connp) || 6501 connp->conn_dontroute || connp->conn_xmit_if_ill != NULL || 6502 connp->conn_nofailover_ill != NULL || 6503 connp->conn_outgoing_ill != NULL || 6504 ipha->ipha_version_and_hdr_length != IP_SIMPLE_HDR_VERSION || 6505 IPP_ENABLED(IPP_LOCAL_OUT) || ip_g_mrouter != NULL) { 6506 UDP_STAT(udp_ip_send); 6507 ip_output(connp, mp1, connp->conn_wq, IP_WPUT); 6508 } else { 6509 udp_send_data(udp, connp->conn_wq, mp1, ipha); 6510 } 6511 6512 done: 6513 if (*error != 0) { 6514 ASSERT(mp != NULL); 6515 BUMP_MIB(&udp_mib, udpOutErrors); 6516 } 6517 return (mp); 6518 } 6519 6520 static void 6521 udp_send_data(udp_t *udp, queue_t *q, mblk_t *mp, ipha_t *ipha) 6522 { 6523 conn_t *connp = udp->udp_connp; 6524 ipaddr_t src, dst; 6525 ill_t *ill; 6526 ire_t *ire; 6527 ipif_t *ipif = NULL; 6528 mblk_t *ire_fp_mp; 6529 uint_t ire_fp_mp_len; 6530 uint16_t *up; 6531 uint32_t cksum, hcksum_txflags; 6532 queue_t *dev_q; 6533 boolean_t retry_caching; 6534 6535 dst = ipha->ipha_dst; 6536 src = ipha->ipha_src; 6537 ASSERT(ipha->ipha_ident == 0); 6538 6539 if (CLASSD(dst)) { 6540 int err; 6541 6542 ipif = conn_get_held_ipif(connp, 6543 &connp->conn_multicast_ipif, &err); 6544 6545 if (ipif == NULL || ipif->ipif_isv6 || 6546 (ipif->ipif_ill->ill_phyint->phyint_flags & 6547 PHYI_LOOPBACK)) { 6548 if (ipif != NULL) 6549 ipif_refrele(ipif); 6550 UDP_STAT(udp_ip_send); 6551 ip_output(connp, mp, q, IP_WPUT); 6552 return; 6553 } 6554 } 6555 6556 retry_caching = B_FALSE; 6557 mutex_enter(&connp->conn_lock); 6558 ire = connp->conn_ire_cache; 6559 ASSERT(!(connp->conn_state_flags & CONN_INCIPIENT)); 6560 6561 if (ire == NULL || ire->ire_addr != dst || 6562 (ire->ire_marks & IRE_MARK_CONDEMNED)) { 6563 retry_caching = B_TRUE; 6564 } else if (CLASSD(dst) && (ire->ire_type & IRE_CACHE)) { 6565 ill_t *stq_ill = (ill_t *)ire->ire_stq->q_ptr; 6566 6567 ASSERT(ipif != NULL); 6568 if (stq_ill != ipif->ipif_ill && (stq_ill->ill_group == NULL || 6569 stq_ill->ill_group != ipif->ipif_ill->ill_group)) 6570 retry_caching = B_TRUE; 6571 } 6572 6573 if (!retry_caching) { 6574 ASSERT(ire != NULL); 6575 IRE_REFHOLD(ire); 6576 mutex_exit(&connp->conn_lock); 6577 } else { 6578 boolean_t cached = B_FALSE; 6579 6580 connp->conn_ire_cache = NULL; 6581 mutex_exit(&connp->conn_lock); 6582 6583 /* Release the old ire */ 6584 if (ire != NULL) { 6585 IRE_REFRELE_NOTR(ire); 6586 ire = NULL; 6587 } 6588 6589 if (CLASSD(dst)) { 6590 ASSERT(ipif != NULL); 6591 ire = ire_ctable_lookup(dst, 0, 0, ipif, 6592 connp->conn_zoneid, MBLK_GETLABEL(mp), 6593 MATCH_IRE_ILL_GROUP); 6594 } else { 6595 ASSERT(ipif == NULL); 6596 ire = ire_cache_lookup(dst, connp->conn_zoneid, 6597 MBLK_GETLABEL(mp)); 6598 } 6599 6600 if (ire == NULL) { 6601 if (ipif != NULL) 6602 ipif_refrele(ipif); 6603 UDP_STAT(udp_ire_null); 6604 ip_output(connp, mp, q, IP_WPUT); 6605 return; 6606 } 6607 IRE_REFHOLD_NOTR(ire); 6608 6609 mutex_enter(&connp->conn_lock); 6610 if (!(connp->conn_state_flags & CONN_CLOSING) && 6611 connp->conn_ire_cache == NULL) { 6612 rw_enter(&ire->ire_bucket->irb_lock, RW_READER); 6613 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) { 6614 connp->conn_ire_cache = ire; 6615 cached = B_TRUE; 6616 } 6617 rw_exit(&ire->ire_bucket->irb_lock); 6618 } 6619 mutex_exit(&connp->conn_lock); 6620 6621 /* 6622 * We can continue to use the ire but since it was not 6623 * cached, we should drop the extra reference. 6624 */ 6625 if (!cached) 6626 IRE_REFRELE_NOTR(ire); 6627 } 6628 ASSERT(ire != NULL && ire->ire_ipversion == IPV4_VERSION); 6629 ASSERT(!CLASSD(dst) || ipif != NULL); 6630 6631 /* 6632 * Check if we can take the fast-path. 6633 * Note that "incomplete" ire's (where the link-layer for next hop 6634 * is not resolved, or where the fast-path header in nce_fp_mp is not 6635 * available yet) are sent down the legacy (slow) path 6636 */ 6637 if ((ire->ire_type & (IRE_BROADCAST|IRE_LOCAL|IRE_LOOPBACK)) || 6638 (ire->ire_flags & RTF_MULTIRT) || (ire->ire_stq == NULL) || 6639 (ire->ire_max_frag < ntohs(ipha->ipha_length)) || 6640 (connp->conn_nexthop_set) || 6641 (ire->ire_nce == NULL) || 6642 ((ire_fp_mp = ire->ire_nce->nce_fp_mp) == NULL) || 6643 ((ire_fp_mp_len = MBLKL(ire_fp_mp)) > MBLKHEAD(mp))) { 6644 if (ipif != NULL) 6645 ipif_refrele(ipif); 6646 UDP_STAT(udp_ip_ire_send); 6647 IRE_REFRELE(ire); 6648 ip_output(connp, mp, q, IP_WPUT); 6649 return; 6650 } 6651 6652 ill = ire_to_ill(ire); 6653 ASSERT(ill != NULL); 6654 6655 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutRequests); 6656 6657 dev_q = ire->ire_stq->q_next; 6658 ASSERT(dev_q != NULL); 6659 /* 6660 * If the service thread is already running, or if the driver 6661 * queue is currently flow-controlled, queue this packet. 6662 */ 6663 if ((q->q_first != NULL || connp->conn_draining) || 6664 ((dev_q->q_next || dev_q->q_first) && !canput(dev_q))) { 6665 if (ip_output_queue) { 6666 (void) putq(q, mp); 6667 } else { 6668 BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards); 6669 freemsg(mp); 6670 } 6671 if (ipif != NULL) 6672 ipif_refrele(ipif); 6673 IRE_REFRELE(ire); 6674 return; 6675 } 6676 6677 ipha->ipha_ident = (uint16_t)atomic_add_32_nv(&ire->ire_ident, 1); 6678 #ifndef _BIG_ENDIAN 6679 ipha->ipha_ident = (ipha->ipha_ident << 8) | (ipha->ipha_ident >> 8); 6680 #endif 6681 6682 if (src == INADDR_ANY && !connp->conn_unspec_src) { 6683 if (CLASSD(dst) && !(ire->ire_flags & RTF_SETSRC)) 6684 src = ipha->ipha_src = ipif->ipif_src_addr; 6685 else 6686 src = ipha->ipha_src = ire->ire_src_addr; 6687 } 6688 6689 if (ILL_HCKSUM_CAPABLE(ill) && dohwcksum) { 6690 ASSERT(ill->ill_hcksum_capab != NULL); 6691 hcksum_txflags = ill->ill_hcksum_capab->ill_hcksum_txflags; 6692 } else { 6693 hcksum_txflags = 0; 6694 } 6695 6696 /* pseudo-header checksum (do it in parts for IP header checksum) */ 6697 cksum = (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF); 6698 6699 ASSERT(ipha->ipha_version_and_hdr_length == IP_SIMPLE_HDR_VERSION); 6700 up = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 6701 if (*up != 0) { 6702 IP_CKSUM_XMIT_FAST(ire->ire_ipversion, hcksum_txflags, 6703 mp, ipha, up, IPPROTO_UDP, IP_SIMPLE_HDR_LENGTH, 6704 ntohs(ipha->ipha_length), cksum); 6705 6706 /* Software checksum? */ 6707 if (DB_CKSUMFLAGS(mp) == 0) { 6708 UDP_STAT(udp_out_sw_cksum); 6709 UDP_STAT_UPDATE(udp_out_sw_cksum_bytes, 6710 ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH); 6711 } 6712 } 6713 6714 ipha->ipha_fragment_offset_and_flags |= 6715 (uint32_t)htons(ire->ire_frag_flag); 6716 6717 /* Calculate IP header checksum if hardware isn't capable */ 6718 if (!(DB_CKSUMFLAGS(mp) & HCK_IPV4_HDRCKSUM)) { 6719 IP_HDR_CKSUM(ipha, cksum, ((uint32_t *)ipha)[0], 6720 ((uint16_t *)ipha)[4]); 6721 } 6722 6723 if (CLASSD(dst)) { 6724 ilm_t *ilm; 6725 6726 ILM_WALKER_HOLD(ill); 6727 ilm = ilm_lookup_ill(ill, dst, ALL_ZONES); 6728 ILM_WALKER_RELE(ill); 6729 if (ilm != NULL) { 6730 ip_multicast_loopback(q, ill, mp, 6731 connp->conn_multicast_loop ? 0 : 6732 IP_FF_NO_MCAST_LOOP, connp->conn_zoneid); 6733 } 6734 6735 /* If multicast TTL is 0 then we are done */ 6736 if (ipha->ipha_ttl == 0) { 6737 if (ipif != NULL) 6738 ipif_refrele(ipif); 6739 freemsg(mp); 6740 IRE_REFRELE(ire); 6741 return; 6742 } 6743 } 6744 6745 ASSERT(DB_TYPE(ire_fp_mp) == M_DATA); 6746 mp->b_rptr = (uchar_t *)ipha - ire_fp_mp_len; 6747 bcopy(ire_fp_mp->b_rptr, mp->b_rptr, ire_fp_mp_len); 6748 6749 UPDATE_OB_PKT_COUNT(ire); 6750 ire->ire_last_used_time = lbolt; 6751 6752 BUMP_MIB(ill->ill_ip_mib, ipIfStatsHCOutTransmits); 6753 UPDATE_MIB(ill->ill_ip_mib, ipIfStatsHCOutOctets, 6754 ntohs(ipha->ipha_length)); 6755 6756 if (ILL_DLS_CAPABLE(ill)) { 6757 /* 6758 * Send the packet directly to DLD, where it may be queued 6759 * depending on the availability of transmit resources at 6760 * the media layer. 6761 */ 6762 IP_DLS_ILL_TX(ill, ipha, mp); 6763 } else { 6764 DTRACE_PROBE4(ip4__physical__out__start, 6765 ill_t *, NULL, ill_t *, ill, 6766 ipha_t *, ipha, mblk_t *, mp); 6767 FW_HOOKS(ip4_physical_out_event, ipv4firewall_physical_out, 6768 NULL, ill, ipha, mp, mp); 6769 DTRACE_PROBE1(ip4__physical__out__end, mblk_t *, mp); 6770 if (mp != NULL) 6771 putnext(ire->ire_stq, mp); 6772 } 6773 6774 if (ipif != NULL) 6775 ipif_refrele(ipif); 6776 IRE_REFRELE(ire); 6777 } 6778 6779 static boolean_t 6780 udp_update_label_v6(queue_t *wq, mblk_t *mp, in6_addr_t *dst) 6781 { 6782 udp_t *udp = Q_TO_UDP(wq); 6783 int err; 6784 uchar_t opt_storage[TSOL_MAX_IPV6_OPTION]; 6785 6786 err = tsol_compute_label_v6(DB_CREDDEF(mp, udp->udp_connp->conn_cred), 6787 dst, opt_storage, udp->udp_mac_exempt); 6788 if (err == 0) { 6789 err = tsol_update_sticky(&udp->udp_sticky_ipp, 6790 &udp->udp_label_len_v6, opt_storage); 6791 } 6792 if (err != 0) { 6793 DTRACE_PROBE4( 6794 tx__ip__log__drop__updatelabel__udp6, 6795 char *, "queue(1) failed to update options(2) on mp(3)", 6796 queue_t *, wq, char *, opt_storage, mblk_t *, mp); 6797 } else { 6798 udp->udp_v6lastdst = *dst; 6799 } 6800 return (err); 6801 } 6802 6803 /* 6804 * This routine handles all messages passed downstream. It either 6805 * consumes the message or passes it downstream; it never queues a 6806 * a message. 6807 */ 6808 static void 6809 udp_output(conn_t *connp, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 6810 { 6811 sin6_t *sin6; 6812 sin_t *sin; 6813 ipaddr_t v4dst; 6814 uint16_t port; 6815 uint_t srcid; 6816 queue_t *q = connp->conn_wq; 6817 udp_t *udp = connp->conn_udp; 6818 int error = 0; 6819 struct sockaddr_storage ss; 6820 6821 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_START, 6822 "udp_wput_start: connp %p mp %p", connp, mp); 6823 6824 /* 6825 * We directly handle several cases here: T_UNITDATA_REQ message 6826 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for both 6827 * connected and non-connected socket. The latter carries the 6828 * address structure along when this routine gets called. 6829 */ 6830 switch (DB_TYPE(mp)) { 6831 case M_DATA: 6832 if (!udp->udp_direct_sockfs || udp->udp_state != TS_DATA_XFER) { 6833 if (!udp->udp_direct_sockfs || 6834 addr == NULL || addrlen == 0) { 6835 /* Not connected; address is required */ 6836 BUMP_MIB(&udp_mib, udpOutErrors); 6837 UDP_STAT(udp_out_err_notconn); 6838 freemsg(mp); 6839 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6840 "udp_wput_end: connp %p (%S)", connp, 6841 "not-connected; address required"); 6842 return; 6843 } 6844 ASSERT(udp->udp_issocket); 6845 UDP_DBGSTAT(udp_data_notconn); 6846 /* Not connected; do some more checks below */ 6847 break; 6848 } 6849 /* M_DATA for connected socket */ 6850 UDP_DBGSTAT(udp_data_conn); 6851 IN6_V4MAPPED_TO_IPADDR(&udp->udp_v6dst, v4dst); 6852 6853 /* Initialize addr and addrlen as if they're passed in */ 6854 if (udp->udp_family == AF_INET) { 6855 sin = (sin_t *)&ss; 6856 sin->sin_family = AF_INET; 6857 sin->sin_port = udp->udp_dstport; 6858 sin->sin_addr.s_addr = v4dst; 6859 addr = (struct sockaddr *)sin; 6860 addrlen = sizeof (*sin); 6861 } else { 6862 sin6 = (sin6_t *)&ss; 6863 sin6->sin6_family = AF_INET6; 6864 sin6->sin6_port = udp->udp_dstport; 6865 sin6->sin6_flowinfo = udp->udp_flowinfo; 6866 sin6->sin6_addr = udp->udp_v6dst; 6867 sin6->sin6_scope_id = 0; 6868 sin6->__sin6_src_id = 0; 6869 addr = (struct sockaddr *)sin6; 6870 addrlen = sizeof (*sin6); 6871 } 6872 6873 if (udp->udp_family == AF_INET || 6874 IN6_IS_ADDR_V4MAPPED(&udp->udp_v6dst)) { 6875 /* 6876 * Handle both AF_INET and AF_INET6; the latter 6877 * for IPV4 mapped destination addresses. Note 6878 * here that both addr and addrlen point to the 6879 * corresponding struct depending on the address 6880 * family of the socket. 6881 */ 6882 mp = udp_output_v4(connp, mp, v4dst, 6883 udp->udp_dstport, 0, &error); 6884 } else { 6885 mp = udp_output_v6(connp, mp, sin6, &error); 6886 } 6887 if (error != 0) { 6888 ASSERT(addr != NULL && addrlen != 0); 6889 goto ud_error; 6890 } 6891 return; 6892 case M_PROTO: 6893 case M_PCPROTO: { 6894 struct T_unitdata_req *tudr; 6895 6896 ASSERT((uintptr_t)MBLKL(mp) <= (uintptr_t)INT_MAX); 6897 tudr = (struct T_unitdata_req *)mp->b_rptr; 6898 6899 /* Handle valid T_UNITDATA_REQ here */ 6900 if (MBLKL(mp) >= sizeof (*tudr) && 6901 ((t_primp_t)mp->b_rptr)->type == T_UNITDATA_REQ) { 6902 if (mp->b_cont == NULL) { 6903 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6904 "udp_wput_end: q %p (%S)", q, "badaddr"); 6905 error = EPROTO; 6906 goto ud_error; 6907 } 6908 6909 if (!MBLKIN(mp, 0, tudr->DEST_offset + 6910 tudr->DEST_length)) { 6911 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6912 "udp_wput_end: q %p (%S)", q, "badaddr"); 6913 error = EADDRNOTAVAIL; 6914 goto ud_error; 6915 } 6916 /* 6917 * If a port has not been bound to the stream, fail. 6918 * This is not a problem when sockfs is directly 6919 * above us, because it will ensure that the socket 6920 * is first bound before allowing data to be sent. 6921 */ 6922 if (udp->udp_state == TS_UNBND) { 6923 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6924 "udp_wput_end: q %p (%S)", q, "outstate"); 6925 error = EPROTO; 6926 goto ud_error; 6927 } 6928 addr = (struct sockaddr *) 6929 &mp->b_rptr[tudr->DEST_offset]; 6930 addrlen = tudr->DEST_length; 6931 if (tudr->OPT_length != 0) 6932 UDP_STAT(udp_out_opt); 6933 break; 6934 } 6935 /* FALLTHRU */ 6936 } 6937 default: 6938 udp_become_writer(connp, mp, udp_wput_other_wrapper, 6939 SQTAG_UDP_OUTPUT); 6940 return; 6941 } 6942 ASSERT(addr != NULL); 6943 6944 switch (udp->udp_family) { 6945 case AF_INET6: 6946 sin6 = (sin6_t *)addr; 6947 if (!OK_32PTR((char *)sin6) || addrlen != sizeof (sin6_t) || 6948 sin6->sin6_family != AF_INET6) { 6949 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6950 "udp_wput_end: q %p (%S)", q, "badaddr"); 6951 error = EADDRNOTAVAIL; 6952 goto ud_error; 6953 } 6954 6955 if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { 6956 /* 6957 * Destination is a non-IPv4-compatible IPv6 address. 6958 * Send out an IPv6 format packet. 6959 */ 6960 mp = udp_output_v6(connp, mp, sin6, &error); 6961 if (error != 0) 6962 goto ud_error; 6963 6964 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6965 "udp_wput_end: q %p (%S)", q, "udp_output_v6"); 6966 return; 6967 } 6968 /* 6969 * If the local address is not zero or a mapped address 6970 * return an error. It would be possible to send an IPv4 6971 * packet but the response would never make it back to the 6972 * application since it is bound to a non-mapped address. 6973 */ 6974 if (!IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src) && 6975 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 6976 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6977 "udp_wput_end: q %p (%S)", q, "badaddr"); 6978 error = EADDRNOTAVAIL; 6979 goto ud_error; 6980 } 6981 /* Send IPv4 packet without modifying udp_ipversion */ 6982 /* Extract port and ipaddr */ 6983 port = sin6->sin6_port; 6984 IN6_V4MAPPED_TO_IPADDR(&sin6->sin6_addr, v4dst); 6985 srcid = sin6->__sin6_src_id; 6986 break; 6987 6988 case AF_INET: 6989 sin = (sin_t *)addr; 6990 if (!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t) || 6991 sin->sin_family != AF_INET) { 6992 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_END, 6993 "udp_wput_end: q %p (%S)", q, "badaddr"); 6994 error = EADDRNOTAVAIL; 6995 goto ud_error; 6996 } 6997 /* Extract port and ipaddr */ 6998 port = sin->sin_port; 6999 v4dst = sin->sin_addr.s_addr; 7000 srcid = 0; 7001 break; 7002 } 7003 7004 mp = udp_output_v4(connp, mp, v4dst, port, srcid, &error); 7005 if (error != 0) { 7006 ud_error: 7007 UDP_STAT(udp_out_err_output); 7008 ASSERT(mp != NULL); 7009 /* mp is freed by the following routine */ 7010 udp_ud_err(q, mp, (uchar_t *)addr, (t_scalar_t)addrlen, 7011 (t_scalar_t)error); 7012 } 7013 } 7014 7015 /* ARGSUSED */ 7016 static void 7017 udp_output_wrapper(void *arg, mblk_t *mp, void *arg2) 7018 { 7019 udp_output((conn_t *)arg, mp, NULL, 0); 7020 _UDP_EXIT((conn_t *)arg); 7021 } 7022 7023 static void 7024 udp_wput(queue_t *q, mblk_t *mp) 7025 { 7026 _UDP_ENTER(Q_TO_CONN(UDP_WR(q)), mp, udp_output_wrapper, 7027 SQTAG_UDP_WPUT); 7028 } 7029 7030 /* 7031 * Allocate and prepare a T_UNITDATA_REQ message. 7032 */ 7033 static mblk_t * 7034 udp_tudr_alloc(struct sockaddr *addr, socklen_t addrlen) 7035 { 7036 struct T_unitdata_req *tudr; 7037 mblk_t *mp; 7038 7039 mp = allocb(sizeof (*tudr) + addrlen, BPRI_MED); 7040 if (mp != NULL) { 7041 mp->b_wptr += sizeof (*tudr) + addrlen; 7042 DB_TYPE(mp) = M_PROTO; 7043 7044 tudr = (struct T_unitdata_req *)mp->b_rptr; 7045 tudr->PRIM_type = T_UNITDATA_REQ; 7046 tudr->DEST_length = addrlen; 7047 tudr->DEST_offset = (t_scalar_t)sizeof (*tudr); 7048 tudr->OPT_length = 0; 7049 tudr->OPT_offset = 0; 7050 bcopy(addr, tudr+1, addrlen); 7051 } 7052 return (mp); 7053 } 7054 7055 /* 7056 * Entry point for sockfs when udp is in "direct sockfs" mode. This mode 7057 * is valid when we are directly beneath the stream head, and thus sockfs 7058 * is able to bypass STREAMS and directly call us, passing along the sockaddr 7059 * structure without the cumbersome T_UNITDATA_REQ interface. Note that 7060 * this is done for both connected and non-connected endpoint. 7061 */ 7062 void 7063 udp_wput_data(queue_t *q, mblk_t *mp, struct sockaddr *addr, socklen_t addrlen) 7064 { 7065 conn_t *connp; 7066 udp_t *udp; 7067 7068 q = UDP_WR(q); 7069 connp = Q_TO_CONN(q); 7070 udp = connp->conn_udp; 7071 7072 /* udpsockfs should only send down M_DATA for this entry point */ 7073 ASSERT(DB_TYPE(mp) == M_DATA); 7074 7075 mutex_enter(&connp->conn_lock); 7076 UDP_MODE_ASSERTIONS(udp, UDP_ENTER); 7077 7078 if (udp->udp_mode != UDP_MT_HOT) { 7079 /* 7080 * We can't enter this conn right away because another 7081 * thread is currently executing as writer; therefore we 7082 * need to deposit the message into the squeue to be 7083 * drained later. If a socket address is present, we 7084 * need to create a T_UNITDATA_REQ message as placeholder. 7085 */ 7086 if (addr != NULL && addrlen != 0) { 7087 mblk_t *tudr_mp = udp_tudr_alloc(addr, addrlen); 7088 7089 if (tudr_mp == NULL) { 7090 mutex_exit(&connp->conn_lock); 7091 BUMP_MIB(&udp_mib, udpOutErrors); 7092 UDP_STAT(udp_out_err_tudr); 7093 freemsg(mp); 7094 return; 7095 } 7096 /* Tag the packet with T_UNITDATA_REQ */ 7097 tudr_mp->b_cont = mp; 7098 mp = tudr_mp; 7099 } 7100 mutex_exit(&connp->conn_lock); 7101 udp_enter(connp, mp, udp_output_wrapper, SQTAG_UDP_WPUT); 7102 return; 7103 } 7104 7105 /* We can execute as reader right away. */ 7106 UDP_READERS_INCREF(udp); 7107 mutex_exit(&connp->conn_lock); 7108 7109 udp_output(connp, mp, addr, addrlen); 7110 7111 udp_exit(connp); 7112 } 7113 7114 /* 7115 * udp_output_v6(): 7116 * Assumes that udp_wput did some sanity checking on the destination 7117 * address. 7118 */ 7119 static mblk_t * 7120 udp_output_v6(conn_t *connp, mblk_t *mp, sin6_t *sin6, int *error) 7121 { 7122 ip6_t *ip6h; 7123 ip6i_t *ip6i; /* mp1->b_rptr even if no ip6i_t */ 7124 mblk_t *mp1 = mp; 7125 mblk_t *mp2; 7126 int udp_ip_hdr_len = IPV6_HDR_LEN + UDPH_SIZE; 7127 size_t ip_len; 7128 udpha_t *udph; 7129 udp_t *udp = connp->conn_udp; 7130 queue_t *q = connp->conn_wq; 7131 ip6_pkt_t ipp_s; /* For ancillary data options */ 7132 ip6_pkt_t *ipp = &ipp_s; 7133 ip6_pkt_t *tipp; /* temporary ipp */ 7134 uint32_t csum = 0; 7135 uint_t ignore = 0; 7136 uint_t option_exists = 0, is_sticky = 0; 7137 uint8_t *cp; 7138 uint8_t *nxthdr_ptr; 7139 in6_addr_t ip6_dst; 7140 udpattrs_t attrs; 7141 boolean_t opt_present; 7142 ip6_hbh_t *hopoptsptr = NULL; 7143 uint_t hopoptslen = 0; 7144 boolean_t is_ancillary = B_FALSE; 7145 7146 *error = 0; 7147 7148 /* 7149 * If the local address is a mapped address return 7150 * an error. 7151 * It would be possible to send an IPv6 packet but the 7152 * response would never make it back to the application 7153 * since it is bound to a mapped address. 7154 */ 7155 if (IN6_IS_ADDR_V4MAPPED(&udp->udp_v6src)) { 7156 *error = EADDRNOTAVAIL; 7157 goto done; 7158 } 7159 7160 ipp->ipp_fields = 0; 7161 ipp->ipp_sticky_ignored = 0; 7162 7163 /* 7164 * If TPI options passed in, feed it for verification and handling 7165 */ 7166 attrs.udpattr_credset = B_FALSE; 7167 opt_present = B_FALSE; 7168 if (DB_TYPE(mp) != M_DATA) { 7169 mp1 = mp->b_cont; 7170 if (((struct T_unitdata_req *)mp->b_rptr)->OPT_length != 0) { 7171 attrs.udpattr_ipp = ipp; 7172 attrs.udpattr_mb = mp; 7173 if (udp_unitdata_opt_process(q, mp, error, &attrs) < 0) 7174 goto done; 7175 ASSERT(*error == 0); 7176 opt_present = B_TRUE; 7177 } 7178 } 7179 ignore = ipp->ipp_sticky_ignored; 7180 7181 /* mp1 points to the M_DATA mblk carrying the packet */ 7182 ASSERT(mp1 != NULL && DB_TYPE(mp1) == M_DATA); 7183 7184 if (sin6->sin6_scope_id != 0 && 7185 IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr)) { 7186 /* 7187 * IPPF_SCOPE_ID is special. It's neither a sticky 7188 * option nor ancillary data. It needs to be 7189 * explicitly set in options_exists. 7190 */ 7191 option_exists |= IPPF_SCOPE_ID; 7192 } 7193 7194 /* 7195 * Compute the destination address 7196 */ 7197 ip6_dst = sin6->sin6_addr; 7198 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 7199 ip6_dst = ipv6_loopback; 7200 7201 /* 7202 * If we're not going to the same destination as last time, then 7203 * recompute the label required. This is done in a separate routine to 7204 * avoid blowing up our stack here. 7205 * 7206 * TSOL Note: Since we are not in WRITER mode, UDP packets 7207 * to different destination may require different labels. 7208 * We use conn_lock to ensure that lastdst, sticky ipp_hopopts, 7209 * and sticky ipp_hopoptslen are consistent for the current 7210 * destination and are updated atomically. 7211 */ 7212 mutex_enter(&connp->conn_lock); 7213 if (is_system_labeled()) { 7214 /* Using UDP MLP requires SCM_UCRED from user */ 7215 if (connp->conn_mlp_type != mlptSingle && 7216 !attrs.udpattr_credset) { 7217 DTRACE_PROBE4( 7218 tx__ip__log__info__output__udp6, 7219 char *, "MLP mp(1) lacks SCM_UCRED attr(2) on q(3)", 7220 mblk_t *, mp1, udpattrs_t *, &attrs, queue_t *, q); 7221 *error = ECONNREFUSED; 7222 mutex_exit(&connp->conn_lock); 7223 goto done; 7224 } 7225 if ((opt_present || 7226 !IN6_ARE_ADDR_EQUAL(&udp->udp_v6lastdst, &ip6_dst)) && 7227 (*error = udp_update_label_v6(q, mp, &ip6_dst)) != 0) { 7228 mutex_exit(&connp->conn_lock); 7229 goto done; 7230 } 7231 } 7232 7233 /* 7234 * If there's a security label here, then we ignore any options the 7235 * user may try to set. We keep the peer's label as a hidden sticky 7236 * option. We make a private copy of this label before releasing the 7237 * lock so that label is kept consistent with the destination addr. 7238 */ 7239 if (udp->udp_label_len_v6 > 0) { 7240 ignore &= ~IPPF_HOPOPTS; 7241 ipp->ipp_fields &= ~IPPF_HOPOPTS; 7242 } 7243 7244 if ((udp->udp_sticky_ipp.ipp_fields == 0) && (ipp->ipp_fields == 0)) { 7245 /* No sticky options nor ancillary data. */ 7246 mutex_exit(&connp->conn_lock); 7247 goto no_options; 7248 } 7249 7250 /* 7251 * Go through the options figuring out where each is going to 7252 * come from and build two masks. The first mask indicates if 7253 * the option exists at all. The second mask indicates if the 7254 * option is sticky or ancillary. 7255 */ 7256 if (!(ignore & IPPF_HOPOPTS)) { 7257 if (ipp->ipp_fields & IPPF_HOPOPTS) { 7258 option_exists |= IPPF_HOPOPTS; 7259 udp_ip_hdr_len += ipp->ipp_hopoptslen; 7260 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_HOPOPTS) { 7261 option_exists |= IPPF_HOPOPTS; 7262 is_sticky |= IPPF_HOPOPTS; 7263 ASSERT(udp->udp_sticky_ipp.ipp_hopoptslen != 0); 7264 hopoptsptr = kmem_alloc( 7265 udp->udp_sticky_ipp.ipp_hopoptslen, KM_NOSLEEP); 7266 if (hopoptsptr == NULL) { 7267 *error = ENOMEM; 7268 mutex_exit(&connp->conn_lock); 7269 goto done; 7270 } 7271 hopoptslen = udp->udp_sticky_ipp.ipp_hopoptslen; 7272 bcopy(udp->udp_sticky_ipp.ipp_hopopts, hopoptsptr, 7273 hopoptslen); 7274 udp_ip_hdr_len += hopoptslen; 7275 } 7276 } 7277 mutex_exit(&connp->conn_lock); 7278 7279 if (!(ignore & IPPF_RTHDR)) { 7280 if (ipp->ipp_fields & IPPF_RTHDR) { 7281 option_exists |= IPPF_RTHDR; 7282 udp_ip_hdr_len += ipp->ipp_rthdrlen; 7283 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTHDR) { 7284 option_exists |= IPPF_RTHDR; 7285 is_sticky |= IPPF_RTHDR; 7286 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rthdrlen; 7287 } 7288 } 7289 7290 if (!(ignore & IPPF_RTDSTOPTS) && (option_exists & IPPF_RTHDR)) { 7291 if (ipp->ipp_fields & IPPF_RTDSTOPTS) { 7292 option_exists |= IPPF_RTDSTOPTS; 7293 udp_ip_hdr_len += ipp->ipp_rtdstoptslen; 7294 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_RTDSTOPTS) { 7295 option_exists |= IPPF_RTDSTOPTS; 7296 is_sticky |= IPPF_RTDSTOPTS; 7297 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_rtdstoptslen; 7298 } 7299 } 7300 7301 if (!(ignore & IPPF_DSTOPTS)) { 7302 if (ipp->ipp_fields & IPPF_DSTOPTS) { 7303 option_exists |= IPPF_DSTOPTS; 7304 udp_ip_hdr_len += ipp->ipp_dstoptslen; 7305 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DSTOPTS) { 7306 option_exists |= IPPF_DSTOPTS; 7307 is_sticky |= IPPF_DSTOPTS; 7308 udp_ip_hdr_len += udp->udp_sticky_ipp.ipp_dstoptslen; 7309 } 7310 } 7311 7312 if (!(ignore & IPPF_IFINDEX)) { 7313 if (ipp->ipp_fields & IPPF_IFINDEX) { 7314 option_exists |= IPPF_IFINDEX; 7315 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_IFINDEX) { 7316 option_exists |= IPPF_IFINDEX; 7317 is_sticky |= IPPF_IFINDEX; 7318 } 7319 } 7320 7321 if (!(ignore & IPPF_ADDR)) { 7322 if (ipp->ipp_fields & IPPF_ADDR) { 7323 option_exists |= IPPF_ADDR; 7324 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_ADDR) { 7325 option_exists |= IPPF_ADDR; 7326 is_sticky |= IPPF_ADDR; 7327 } 7328 } 7329 7330 if (!(ignore & IPPF_DONTFRAG)) { 7331 if (ipp->ipp_fields & IPPF_DONTFRAG) { 7332 option_exists |= IPPF_DONTFRAG; 7333 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_DONTFRAG) { 7334 option_exists |= IPPF_DONTFRAG; 7335 is_sticky |= IPPF_DONTFRAG; 7336 } 7337 } 7338 7339 if (!(ignore & IPPF_USE_MIN_MTU)) { 7340 if (ipp->ipp_fields & IPPF_USE_MIN_MTU) { 7341 option_exists |= IPPF_USE_MIN_MTU; 7342 } else if (udp->udp_sticky_ipp.ipp_fields & 7343 IPPF_USE_MIN_MTU) { 7344 option_exists |= IPPF_USE_MIN_MTU; 7345 is_sticky |= IPPF_USE_MIN_MTU; 7346 } 7347 } 7348 7349 if (!(ignore & IPPF_HOPLIMIT) && (ipp->ipp_fields & IPPF_HOPLIMIT)) 7350 option_exists |= IPPF_HOPLIMIT; 7351 /* IPV6_HOPLIMIT can never be sticky */ 7352 ASSERT(!(udp->udp_sticky_ipp.ipp_fields & IPPF_HOPLIMIT)); 7353 7354 if (!(ignore & IPPF_UNICAST_HOPS) && 7355 (udp->udp_sticky_ipp.ipp_fields & IPPF_UNICAST_HOPS)) { 7356 option_exists |= IPPF_UNICAST_HOPS; 7357 is_sticky |= IPPF_UNICAST_HOPS; 7358 } 7359 7360 if (!(ignore & IPPF_MULTICAST_HOPS) && 7361 (udp->udp_sticky_ipp.ipp_fields & IPPF_MULTICAST_HOPS)) { 7362 option_exists |= IPPF_MULTICAST_HOPS; 7363 is_sticky |= IPPF_MULTICAST_HOPS; 7364 } 7365 7366 if (!(ignore & IPPF_TCLASS)) { 7367 if (ipp->ipp_fields & IPPF_TCLASS) { 7368 option_exists |= IPPF_TCLASS; 7369 } else if (udp->udp_sticky_ipp.ipp_fields & IPPF_TCLASS) { 7370 option_exists |= IPPF_TCLASS; 7371 is_sticky |= IPPF_TCLASS; 7372 } 7373 } 7374 7375 if (!(ignore & IPPF_NEXTHOP) && 7376 (udp->udp_sticky_ipp.ipp_fields & IPPF_NEXTHOP)) { 7377 option_exists |= IPPF_NEXTHOP; 7378 is_sticky |= IPPF_NEXTHOP; 7379 } 7380 7381 no_options: 7382 7383 /* 7384 * If any options carried in the ip6i_t were specified, we 7385 * need to account for the ip6i_t in the data we'll be sending 7386 * down. 7387 */ 7388 if (option_exists & IPPF_HAS_IP6I) 7389 udp_ip_hdr_len += sizeof (ip6i_t); 7390 7391 /* check/fix buffer config, setup pointers into it */ 7392 ip6h = (ip6_t *)&mp1->b_rptr[-udp_ip_hdr_len]; 7393 if (DB_REF(mp1) != 1 || ((unsigned char *)ip6h < DB_BASE(mp1)) || 7394 !OK_32PTR(ip6h)) { 7395 /* Try to get everything in a single mblk next time */ 7396 if (udp_ip_hdr_len > udp->udp_max_hdr_len) { 7397 udp->udp_max_hdr_len = udp_ip_hdr_len; 7398 (void) mi_set_sth_wroff(UDP_RD(q), 7399 udp->udp_max_hdr_len + udp_wroff_extra); 7400 } 7401 mp2 = allocb(udp_ip_hdr_len + udp_wroff_extra, BPRI_LO); 7402 if (mp2 == NULL) { 7403 *error = ENOMEM; 7404 goto done; 7405 } 7406 mp2->b_wptr = DB_LIM(mp2); 7407 mp2->b_cont = mp1; 7408 mp1 = mp2; 7409 if (DB_TYPE(mp) != M_DATA) 7410 mp->b_cont = mp1; 7411 else 7412 mp = mp1; 7413 7414 ip6h = (ip6_t *)(mp1->b_wptr - udp_ip_hdr_len); 7415 } 7416 mp1->b_rptr = (unsigned char *)ip6h; 7417 ip6i = (ip6i_t *)ip6h; 7418 7419 #define ANCIL_OR_STICKY_PTR(f) ((is_sticky & f) ? &udp->udp_sticky_ipp : ipp) 7420 if (option_exists & IPPF_HAS_IP6I) { 7421 ip6h = (ip6_t *)&ip6i[1]; 7422 ip6i->ip6i_flags = 0; 7423 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7424 7425 /* sin6_scope_id takes precendence over IPPF_IFINDEX */ 7426 if (option_exists & IPPF_SCOPE_ID) { 7427 ip6i->ip6i_flags |= IP6I_IFINDEX; 7428 ip6i->ip6i_ifindex = sin6->sin6_scope_id; 7429 } else if (option_exists & IPPF_IFINDEX) { 7430 tipp = ANCIL_OR_STICKY_PTR(IPPF_IFINDEX); 7431 ASSERT(tipp->ipp_ifindex != 0); 7432 ip6i->ip6i_flags |= IP6I_IFINDEX; 7433 ip6i->ip6i_ifindex = tipp->ipp_ifindex; 7434 } 7435 7436 if (option_exists & IPPF_ADDR) { 7437 /* 7438 * Enable per-packet source address verification if 7439 * IPV6_PKTINFO specified the source address. 7440 * ip6_src is set in the transport's _wput function. 7441 */ 7442 ip6i->ip6i_flags |= IP6I_VERIFY_SRC; 7443 } 7444 7445 if (option_exists & IPPF_DONTFRAG) { 7446 ip6i->ip6i_flags |= IP6I_DONTFRAG; 7447 } 7448 7449 if (option_exists & IPPF_USE_MIN_MTU) { 7450 ip6i->ip6i_flags = IP6I_API_USE_MIN_MTU( 7451 ip6i->ip6i_flags, ipp->ipp_use_min_mtu); 7452 } 7453 7454 if (option_exists & IPPF_NEXTHOP) { 7455 tipp = ANCIL_OR_STICKY_PTR(IPPF_NEXTHOP); 7456 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_nexthop)); 7457 ip6i->ip6i_flags |= IP6I_NEXTHOP; 7458 ip6i->ip6i_nexthop = tipp->ipp_nexthop; 7459 } 7460 7461 /* 7462 * tell IP this is an ip6i_t private header 7463 */ 7464 ip6i->ip6i_nxt = IPPROTO_RAW; 7465 } 7466 7467 /* Initialize IPv6 header */ 7468 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 7469 bzero(&ip6h->ip6_src, sizeof (ip6h->ip6_src)); 7470 7471 /* Set the hoplimit of the outgoing packet. */ 7472 if (option_exists & IPPF_HOPLIMIT) { 7473 /* IPV6_HOPLIMIT ancillary data overrides all other settings. */ 7474 ip6h->ip6_hops = ipp->ipp_hoplimit; 7475 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7476 } else if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 7477 ip6h->ip6_hops = udp->udp_multicast_ttl; 7478 if (option_exists & IPPF_MULTICAST_HOPS) 7479 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7480 } else { 7481 ip6h->ip6_hops = udp->udp_ttl; 7482 if (option_exists & IPPF_UNICAST_HOPS) 7483 ip6i->ip6i_flags |= IP6I_HOPLIMIT; 7484 } 7485 7486 if (option_exists & IPPF_ADDR) { 7487 tipp = ANCIL_OR_STICKY_PTR(IPPF_ADDR); 7488 ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&tipp->ipp_addr)); 7489 ip6h->ip6_src = tipp->ipp_addr; 7490 } else { 7491 /* 7492 * The source address was not set using IPV6_PKTINFO. 7493 * First look at the bound source. 7494 * If unspecified fallback to __sin6_src_id. 7495 */ 7496 ip6h->ip6_src = udp->udp_v6src; 7497 if (sin6->__sin6_src_id != 0 && 7498 IN6_IS_ADDR_UNSPECIFIED(&ip6h->ip6_src)) { 7499 ip_srcid_find_id(sin6->__sin6_src_id, 7500 &ip6h->ip6_src, connp->conn_zoneid); 7501 } 7502 } 7503 7504 nxthdr_ptr = (uint8_t *)&ip6h->ip6_nxt; 7505 cp = (uint8_t *)&ip6h[1]; 7506 7507 /* 7508 * Here's where we have to start stringing together 7509 * any extension headers in the right order: 7510 * Hop-by-hop, destination, routing, and final destination opts. 7511 */ 7512 if (option_exists & IPPF_HOPOPTS) { 7513 /* Hop-by-hop options */ 7514 ip6_hbh_t *hbh = (ip6_hbh_t *)cp; 7515 tipp = ANCIL_OR_STICKY_PTR(IPPF_HOPOPTS); 7516 if (hopoptslen == 0) { 7517 hopoptsptr = tipp->ipp_hopopts; 7518 hopoptslen = tipp->ipp_hopoptslen; 7519 is_ancillary = B_TRUE; 7520 } 7521 7522 *nxthdr_ptr = IPPROTO_HOPOPTS; 7523 nxthdr_ptr = &hbh->ip6h_nxt; 7524 7525 bcopy(hopoptsptr, cp, hopoptslen); 7526 cp += hopoptslen; 7527 7528 if (hopoptsptr != NULL && !is_ancillary) { 7529 kmem_free(hopoptsptr, hopoptslen); 7530 hopoptsptr = NULL; 7531 hopoptslen = 0; 7532 } 7533 } 7534 /* 7535 * En-route destination options 7536 * Only do them if there's a routing header as well 7537 */ 7538 if (option_exists & IPPF_RTDSTOPTS) { 7539 ip6_dest_t *dst = (ip6_dest_t *)cp; 7540 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTDSTOPTS); 7541 7542 *nxthdr_ptr = IPPROTO_DSTOPTS; 7543 nxthdr_ptr = &dst->ip6d_nxt; 7544 7545 bcopy(tipp->ipp_rtdstopts, cp, tipp->ipp_rtdstoptslen); 7546 cp += tipp->ipp_rtdstoptslen; 7547 } 7548 /* 7549 * Routing header next 7550 */ 7551 if (option_exists & IPPF_RTHDR) { 7552 ip6_rthdr_t *rt = (ip6_rthdr_t *)cp; 7553 tipp = ANCIL_OR_STICKY_PTR(IPPF_RTHDR); 7554 7555 *nxthdr_ptr = IPPROTO_ROUTING; 7556 nxthdr_ptr = &rt->ip6r_nxt; 7557 7558 bcopy(tipp->ipp_rthdr, cp, tipp->ipp_rthdrlen); 7559 cp += tipp->ipp_rthdrlen; 7560 } 7561 /* 7562 * Do ultimate destination options 7563 */ 7564 if (option_exists & IPPF_DSTOPTS) { 7565 ip6_dest_t *dest = (ip6_dest_t *)cp; 7566 tipp = ANCIL_OR_STICKY_PTR(IPPF_DSTOPTS); 7567 7568 *nxthdr_ptr = IPPROTO_DSTOPTS; 7569 nxthdr_ptr = &dest->ip6d_nxt; 7570 7571 bcopy(tipp->ipp_dstopts, cp, tipp->ipp_dstoptslen); 7572 cp += tipp->ipp_dstoptslen; 7573 } 7574 /* 7575 * Now set the last header pointer to the proto passed in 7576 */ 7577 ASSERT((int)(cp - (uint8_t *)ip6i) == (udp_ip_hdr_len - UDPH_SIZE)); 7578 *nxthdr_ptr = IPPROTO_UDP; 7579 7580 /* Update UDP header */ 7581 udph = (udpha_t *)((uchar_t *)ip6i + udp_ip_hdr_len - UDPH_SIZE); 7582 udph->uha_dst_port = sin6->sin6_port; 7583 udph->uha_src_port = udp->udp_port; 7584 7585 /* 7586 * Copy in the destination address 7587 */ 7588 ip6h->ip6_dst = ip6_dst; 7589 7590 ip6h->ip6_vcf = 7591 (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) | 7592 (sin6->sin6_flowinfo & ~IPV6_VERS_AND_FLOW_MASK); 7593 7594 if (option_exists & IPPF_TCLASS) { 7595 tipp = ANCIL_OR_STICKY_PTR(IPPF_TCLASS); 7596 ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf, 7597 tipp->ipp_tclass); 7598 } 7599 7600 if (option_exists & IPPF_RTHDR) { 7601 ip6_rthdr_t *rth; 7602 7603 /* 7604 * Perform any processing needed for source routing. 7605 * We know that all extension headers will be in the same mblk 7606 * as the IPv6 header. 7607 */ 7608 rth = ip_find_rthdr_v6(ip6h, mp1->b_wptr); 7609 if (rth != NULL && rth->ip6r_segleft != 0) { 7610 if (rth->ip6r_type != IPV6_RTHDR_TYPE_0) { 7611 /* 7612 * Drop packet - only support Type 0 routing. 7613 * Notify the application as well. 7614 */ 7615 *error = EPROTO; 7616 goto done; 7617 } 7618 7619 /* 7620 * rth->ip6r_len is twice the number of 7621 * addresses in the header. Thus it must be even. 7622 */ 7623 if (rth->ip6r_len & 0x1) { 7624 *error = EPROTO; 7625 goto done; 7626 } 7627 /* 7628 * Shuffle the routing header and ip6_dst 7629 * addresses, and get the checksum difference 7630 * between the first hop (in ip6_dst) and 7631 * the destination (in the last routing hdr entry). 7632 */ 7633 csum = ip_massage_options_v6(ip6h, rth); 7634 /* 7635 * Verify that the first hop isn't a mapped address. 7636 * Routers along the path need to do this verification 7637 * for subsequent hops. 7638 */ 7639 if (IN6_IS_ADDR_V4MAPPED(&ip6h->ip6_dst)) { 7640 *error = EADDRNOTAVAIL; 7641 goto done; 7642 } 7643 7644 cp += (rth->ip6r_len + 1)*8; 7645 } 7646 } 7647 7648 /* count up length of UDP packet */ 7649 ip_len = (mp1->b_wptr - (unsigned char *)ip6h) - IPV6_HDR_LEN; 7650 if ((mp2 = mp1->b_cont) != NULL) { 7651 do { 7652 ASSERT((uintptr_t)MBLKL(mp2) <= (uintptr_t)UINT_MAX); 7653 ip_len += (uint32_t)MBLKL(mp2); 7654 } while ((mp2 = mp2->b_cont) != NULL); 7655 } 7656 7657 /* 7658 * If the size of the packet is greater than the maximum allowed by 7659 * ip, return an error. Passing this down could cause panics because 7660 * the size will have wrapped and be inconsistent with the msg size. 7661 */ 7662 if (ip_len > IP_MAXPACKET) { 7663 *error = EMSGSIZE; 7664 goto done; 7665 } 7666 7667 /* Store the UDP length. Subtract length of extension hdrs */ 7668 udph->uha_length = htons(ip_len + IPV6_HDR_LEN - 7669 (int)((uchar_t *)udph - (uchar_t *)ip6h)); 7670 7671 /* 7672 * We make it easy for IP to include our pseudo header 7673 * by putting our length in uh_checksum, modified (if 7674 * we have a routing header) by the checksum difference 7675 * between the ultimate destination and first hop addresses. 7676 * Note: UDP over IPv6 must always checksum the packet. 7677 */ 7678 csum += udph->uha_length; 7679 csum = (csum & 0xFFFF) + (csum >> 16); 7680 udph->uha_checksum = (uint16_t)csum; 7681 7682 #ifdef _LITTLE_ENDIAN 7683 ip_len = htons(ip_len); 7684 #endif 7685 ip6h->ip6_plen = ip_len; 7686 if (DB_CRED(mp) != NULL) 7687 mblk_setcred(mp1, DB_CRED(mp)); 7688 7689 if (DB_TYPE(mp) != M_DATA) { 7690 ASSERT(mp != mp1); 7691 freeb(mp); 7692 } 7693 7694 /* mp has been consumed and we'll return success */ 7695 ASSERT(*error == 0); 7696 mp = NULL; 7697 7698 /* We're done. Pass the packet to IP */ 7699 BUMP_MIB(&udp_mib, udpHCOutDatagrams); 7700 ip_output_v6(connp, mp1, q, IP_WPUT); 7701 7702 done: 7703 if (hopoptsptr != NULL && !is_ancillary) { 7704 kmem_free(hopoptsptr, hopoptslen); 7705 hopoptsptr = NULL; 7706 } 7707 if (*error != 0) { 7708 ASSERT(mp != NULL); 7709 BUMP_MIB(&udp_mib, udpOutErrors); 7710 } 7711 return (mp); 7712 } 7713 7714 static void 7715 udp_wput_other(queue_t *q, mblk_t *mp) 7716 { 7717 uchar_t *rptr = mp->b_rptr; 7718 struct datab *db; 7719 struct iocblk *iocp; 7720 cred_t *cr; 7721 conn_t *connp = Q_TO_CONN(q); 7722 udp_t *udp = connp->conn_udp; 7723 7724 TRACE_1(TR_FAC_UDP, TR_UDP_WPUT_OTHER_START, 7725 "udp_wput_other_start: q %p", q); 7726 7727 db = mp->b_datap; 7728 7729 cr = DB_CREDDEF(mp, connp->conn_cred); 7730 7731 switch (db->db_type) { 7732 case M_PROTO: 7733 case M_PCPROTO: 7734 if (mp->b_wptr - rptr < sizeof (t_scalar_t)) { 7735 freemsg(mp); 7736 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7737 "udp_wput_other_end: q %p (%S)", 7738 q, "protoshort"); 7739 return; 7740 } 7741 switch (((t_primp_t)rptr)->type) { 7742 case T_ADDR_REQ: 7743 udp_addr_req(q, mp); 7744 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7745 "udp_wput_other_end: q %p (%S)", q, "addrreq"); 7746 return; 7747 case O_T_BIND_REQ: 7748 case T_BIND_REQ: 7749 udp_bind(q, mp); 7750 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7751 "udp_wput_other_end: q %p (%S)", q, "bindreq"); 7752 return; 7753 case T_CONN_REQ: 7754 udp_connect(q, mp); 7755 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7756 "udp_wput_other_end: q %p (%S)", q, "connreq"); 7757 return; 7758 case T_CAPABILITY_REQ: 7759 udp_capability_req(q, mp); 7760 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7761 "udp_wput_other_end: q %p (%S)", q, "capabreq"); 7762 return; 7763 case T_INFO_REQ: 7764 udp_info_req(q, mp); 7765 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7766 "udp_wput_other_end: q %p (%S)", q, "inforeq"); 7767 return; 7768 case T_UNITDATA_REQ: 7769 /* 7770 * If a T_UNITDATA_REQ gets here, the address must 7771 * be bad. Valid T_UNITDATA_REQs are handled 7772 * in udp_wput. 7773 */ 7774 udp_ud_err(q, mp, NULL, 0, EADDRNOTAVAIL); 7775 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7776 "udp_wput_other_end: q %p (%S)", 7777 q, "unitdatareq"); 7778 return; 7779 case T_UNBIND_REQ: 7780 udp_unbind(q, mp); 7781 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7782 "udp_wput_other_end: q %p (%S)", q, "unbindreq"); 7783 return; 7784 case T_SVR4_OPTMGMT_REQ: 7785 if (!snmpcom_req(q, mp, udp_snmp_set, udp_snmp_get, cr)) 7786 /* 7787 * Use upper queue for option processing in 7788 * case the request is not handled at this 7789 * level and needs to be passed down to IP. 7790 */ 7791 (void) svr4_optcom_req(_WR(UDP_RD(q)), 7792 mp, cr, &udp_opt_obj); 7793 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7794 "udp_wput_other_end: q %p (%S)", 7795 q, "optmgmtreq"); 7796 return; 7797 7798 case T_OPTMGMT_REQ: 7799 /* 7800 * Use upper queue for option processing in 7801 * case the request is not handled at this 7802 * level and needs to be passed down to IP. 7803 */ 7804 (void) tpi_optcom_req(_WR(UDP_RD(q)), 7805 mp, cr, &udp_opt_obj); 7806 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7807 "udp_wput_other_end: q %p (%S)", 7808 q, "optmgmtreq"); 7809 return; 7810 7811 case T_DISCON_REQ: 7812 udp_disconnect(q, mp); 7813 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7814 "udp_wput_other_end: q %p (%S)", 7815 q, "disconreq"); 7816 return; 7817 7818 /* The following TPI message is not supported by udp. */ 7819 case O_T_CONN_RES: 7820 case T_CONN_RES: 7821 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7822 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7823 "udp_wput_other_end: q %p (%S)", 7824 q, "connres/disconreq"); 7825 return; 7826 7827 /* The following 3 TPI messages are illegal for udp. */ 7828 case T_DATA_REQ: 7829 case T_EXDATA_REQ: 7830 case T_ORDREL_REQ: 7831 udp_err_ack(q, mp, TNOTSUPPORT, 0); 7832 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7833 "udp_wput_other_end: q %p (%S)", 7834 q, "data/exdata/ordrel"); 7835 return; 7836 default: 7837 break; 7838 } 7839 break; 7840 case M_FLUSH: 7841 if (*rptr & FLUSHW) 7842 flushq(q, FLUSHDATA); 7843 break; 7844 case M_IOCTL: 7845 iocp = (struct iocblk *)mp->b_rptr; 7846 switch (iocp->ioc_cmd) { 7847 case TI_GETPEERNAME: 7848 if (udp->udp_state != TS_DATA_XFER) { 7849 /* 7850 * If a default destination address has not 7851 * been associated with the stream, then we 7852 * don't know the peer's name. 7853 */ 7854 iocp->ioc_error = ENOTCONN; 7855 iocp->ioc_count = 0; 7856 mp->b_datap->db_type = M_IOCACK; 7857 putnext(UDP_RD(q), mp); 7858 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7859 "udp_wput_other_end: q %p (%S)", 7860 q, "getpeername"); 7861 return; 7862 } 7863 /* FALLTHRU */ 7864 case TI_GETMYNAME: { 7865 /* 7866 * For TI_GETPEERNAME and TI_GETMYNAME, we first 7867 * need to copyin the user's strbuf structure. 7868 * Processing will continue in the M_IOCDATA case 7869 * below. 7870 */ 7871 mi_copyin(q, mp, NULL, 7872 SIZEOF_STRUCT(strbuf, iocp->ioc_flag)); 7873 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7874 "udp_wput_other_end: q %p (%S)", 7875 q, "getmyname"); 7876 return; 7877 } 7878 case ND_SET: 7879 /* nd_getset performs the necessary checking */ 7880 case ND_GET: 7881 if (nd_getset(q, udp_g_nd, mp)) { 7882 putnext(UDP_RD(q), mp); 7883 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7884 "udp_wput_other_end: q %p (%S)", 7885 q, "get"); 7886 return; 7887 } 7888 break; 7889 case _SIOCSOCKFALLBACK: 7890 /* 7891 * Either sockmod is about to be popped and the 7892 * socket would now be treated as a plain stream, 7893 * or a module is about to be pushed so we could 7894 * no longer use read-side synchronous stream. 7895 * Drain any queued data and disable direct sockfs 7896 * interface from now on. 7897 */ 7898 if (!udp->udp_issocket) { 7899 DB_TYPE(mp) = M_IOCNAK; 7900 iocp->ioc_error = EINVAL; 7901 } else { 7902 udp->udp_issocket = B_FALSE; 7903 if (udp->udp_direct_sockfs) { 7904 /* 7905 * Disable read-side synchronous 7906 * stream interface and drain any 7907 * queued data. 7908 */ 7909 udp_rcv_drain(UDP_RD(q), udp, 7910 B_FALSE); 7911 ASSERT(!udp->udp_direct_sockfs); 7912 UDP_STAT(udp_sock_fallback); 7913 } 7914 DB_TYPE(mp) = M_IOCACK; 7915 iocp->ioc_error = 0; 7916 } 7917 iocp->ioc_count = 0; 7918 iocp->ioc_rval = 0; 7919 putnext(UDP_RD(q), mp); 7920 return; 7921 default: 7922 break; 7923 } 7924 break; 7925 case M_IOCDATA: 7926 udp_wput_iocdata(q, mp); 7927 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7928 "udp_wput_other_end: q %p (%S)", q, "iocdata"); 7929 return; 7930 default: 7931 /* Unrecognized messages are passed through without change. */ 7932 break; 7933 } 7934 TRACE_2(TR_FAC_UDP, TR_UDP_WPUT_OTHER_END, 7935 "udp_wput_other_end: q %p (%S)", q, "end"); 7936 ip_output(connp, mp, q, IP_WPUT); 7937 } 7938 7939 /* ARGSUSED */ 7940 static void 7941 udp_wput_other_wrapper(void *arg, mblk_t *mp, void *arg2) 7942 { 7943 udp_wput_other(((conn_t *)arg)->conn_wq, mp); 7944 udp_exit((conn_t *)arg); 7945 } 7946 7947 /* 7948 * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA 7949 * messages. 7950 */ 7951 static void 7952 udp_wput_iocdata(queue_t *q, mblk_t *mp) 7953 { 7954 mblk_t *mp1; 7955 STRUCT_HANDLE(strbuf, sb); 7956 uint16_t port; 7957 in6_addr_t v6addr; 7958 ipaddr_t v4addr; 7959 uint32_t flowinfo = 0; 7960 int addrlen; 7961 udp_t *udp = Q_TO_UDP(q); 7962 7963 /* Make sure it is one of ours. */ 7964 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 7965 case TI_GETMYNAME: 7966 case TI_GETPEERNAME: 7967 break; 7968 default: 7969 ip_output(udp->udp_connp, mp, q, IP_WPUT); 7970 return; 7971 } 7972 7973 q = WR(UDP_RD(q)); 7974 switch (mi_copy_state(q, mp, &mp1)) { 7975 case -1: 7976 return; 7977 case MI_COPY_CASE(MI_COPY_IN, 1): 7978 break; 7979 case MI_COPY_CASE(MI_COPY_OUT, 1): 7980 /* 7981 * The address has been copied out, so now 7982 * copyout the strbuf. 7983 */ 7984 mi_copyout(q, mp); 7985 return; 7986 case MI_COPY_CASE(MI_COPY_OUT, 2): 7987 /* 7988 * The address and strbuf have been copied out. 7989 * We're done, so just acknowledge the original 7990 * M_IOCTL. 7991 */ 7992 mi_copy_done(q, mp, 0); 7993 return; 7994 default: 7995 /* 7996 * Something strange has happened, so acknowledge 7997 * the original M_IOCTL with an EPROTO error. 7998 */ 7999 mi_copy_done(q, mp, EPROTO); 8000 return; 8001 } 8002 8003 /* 8004 * Now we have the strbuf structure for TI_GETMYNAME 8005 * and TI_GETPEERNAME. Next we copyout the requested 8006 * address and then we'll copyout the strbuf. 8007 */ 8008 STRUCT_SET_HANDLE(sb, ((struct iocblk *)mp->b_rptr)->ioc_flag, 8009 (void *)mp1->b_rptr); 8010 if (udp->udp_family == AF_INET) 8011 addrlen = sizeof (sin_t); 8012 else 8013 addrlen = sizeof (sin6_t); 8014 8015 if (STRUCT_FGET(sb, maxlen) < addrlen) { 8016 mi_copy_done(q, mp, EINVAL); 8017 return; 8018 } 8019 switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) { 8020 case TI_GETMYNAME: 8021 if (udp->udp_family == AF_INET) { 8022 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8023 if (!IN6_IS_ADDR_V4MAPPED_ANY(&udp->udp_v6src) && 8024 !IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8025 v4addr = V4_PART_OF_V6(udp->udp_v6src); 8026 } else { 8027 /* 8028 * INADDR_ANY 8029 * udp_v6src is not set, we might be bound to 8030 * broadcast/multicast. Use udp_bound_v6src as 8031 * local address instead (that could 8032 * also still be INADDR_ANY) 8033 */ 8034 v4addr = V4_PART_OF_V6(udp->udp_bound_v6src); 8035 } 8036 } else { 8037 /* udp->udp_family == AF_INET6 */ 8038 if (!IN6_IS_ADDR_UNSPECIFIED(&udp->udp_v6src)) { 8039 v6addr = udp->udp_v6src; 8040 } else { 8041 /* 8042 * UNSPECIFIED 8043 * udp_v6src is not set, we might be bound to 8044 * broadcast/multicast. Use udp_bound_v6src as 8045 * local address instead (that could 8046 * also still be UNSPECIFIED) 8047 */ 8048 v6addr = udp->udp_bound_v6src; 8049 } 8050 } 8051 port = udp->udp_port; 8052 break; 8053 case TI_GETPEERNAME: 8054 if (udp->udp_state != TS_DATA_XFER) { 8055 mi_copy_done(q, mp, ENOTCONN); 8056 return; 8057 } 8058 if (udp->udp_family == AF_INET) { 8059 ASSERT(udp->udp_ipversion == IPV4_VERSION); 8060 v4addr = V4_PART_OF_V6(udp->udp_v6dst); 8061 } else { 8062 /* udp->udp_family == AF_INET6) */ 8063 v6addr = udp->udp_v6dst; 8064 flowinfo = udp->udp_flowinfo; 8065 } 8066 port = udp->udp_dstport; 8067 break; 8068 default: 8069 mi_copy_done(q, mp, EPROTO); 8070 return; 8071 } 8072 mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE); 8073 if (!mp1) 8074 return; 8075 8076 if (udp->udp_family == AF_INET) { 8077 sin_t *sin; 8078 8079 STRUCT_FSET(sb, len, (int)sizeof (sin_t)); 8080 sin = (sin_t *)mp1->b_rptr; 8081 mp1->b_wptr = (uchar_t *)&sin[1]; 8082 *sin = sin_null; 8083 sin->sin_family = AF_INET; 8084 sin->sin_addr.s_addr = v4addr; 8085 sin->sin_port = port; 8086 } else { 8087 /* udp->udp_family == AF_INET6 */ 8088 sin6_t *sin6; 8089 8090 STRUCT_FSET(sb, len, (int)sizeof (sin6_t)); 8091 sin6 = (sin6_t *)mp1->b_rptr; 8092 mp1->b_wptr = (uchar_t *)&sin6[1]; 8093 *sin6 = sin6_null; 8094 sin6->sin6_family = AF_INET6; 8095 sin6->sin6_flowinfo = flowinfo; 8096 sin6->sin6_addr = v6addr; 8097 sin6->sin6_port = port; 8098 } 8099 /* Copy out the address */ 8100 mi_copyout(q, mp); 8101 } 8102 8103 8104 static int 8105 udp_unitdata_opt_process(queue_t *q, mblk_t *mp, int *errorp, 8106 udpattrs_t *udpattrs) 8107 { 8108 struct T_unitdata_req *udreqp; 8109 int is_absreq_failure; 8110 cred_t *cr; 8111 conn_t *connp = Q_TO_CONN(q); 8112 8113 ASSERT(((t_primp_t)mp->b_rptr)->type); 8114 8115 cr = DB_CREDDEF(mp, connp->conn_cred); 8116 8117 udreqp = (struct T_unitdata_req *)mp->b_rptr; 8118 8119 /* 8120 * Use upper queue for option processing since the callback 8121 * routines expect to be called in UDP instance instead of IP. 8122 */ 8123 *errorp = tpi_optcom_buf(_WR(UDP_RD(q)), mp, &udreqp->OPT_length, 8124 udreqp->OPT_offset, cr, &udp_opt_obj, 8125 udpattrs, &is_absreq_failure); 8126 8127 if (*errorp != 0) { 8128 /* 8129 * Note: No special action needed in this 8130 * module for "is_absreq_failure" 8131 */ 8132 return (-1); /* failure */ 8133 } 8134 ASSERT(is_absreq_failure == 0); 8135 return (0); /* success */ 8136 } 8137 8138 void 8139 udp_ddi_init(void) 8140 { 8141 int i; 8142 8143 UDP6_MAJ = ddi_name_to_major(UDP6); 8144 8145 udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr, 8146 udp_opt_obj.odb_opt_arr_cnt); 8147 8148 if (udp_bind_fanout_size & (udp_bind_fanout_size - 1)) { 8149 /* Not a power of two. Round up to nearest power of two */ 8150 for (i = 0; i < 31; i++) { 8151 if (udp_bind_fanout_size < (1 << i)) 8152 break; 8153 } 8154 udp_bind_fanout_size = 1 << i; 8155 } 8156 udp_bind_fanout = kmem_zalloc(udp_bind_fanout_size * 8157 sizeof (udp_fanout_t), KM_SLEEP); 8158 for (i = 0; i < udp_bind_fanout_size; i++) { 8159 mutex_init(&udp_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT, 8160 NULL); 8161 } 8162 (void) udp_param_register(udp_param_arr, A_CNT(udp_param_arr)); 8163 8164 udp_kstat_init(); 8165 8166 udp_cache = kmem_cache_create("udp_cache", sizeof (udp_t), 8167 CACHE_ALIGN_SIZE, NULL, NULL, NULL, NULL, NULL, 0); 8168 } 8169 8170 void 8171 udp_ddi_destroy(void) 8172 { 8173 int i; 8174 8175 nd_free(&udp_g_nd); 8176 8177 for (i = 0; i < udp_bind_fanout_size; i++) { 8178 mutex_destroy(&udp_bind_fanout[i].uf_lock); 8179 } 8180 8181 kmem_free(udp_bind_fanout, udp_bind_fanout_size * 8182 sizeof (udp_fanout_t)); 8183 8184 udp_kstat_fini(); 8185 8186 kmem_cache_destroy(udp_cache); 8187 } 8188 8189 static void 8190 udp_kstat_init(void) 8191 { 8192 udp_named_kstat_t template = { 8193 { "inDatagrams", KSTAT_DATA_UINT64, 0 }, 8194 { "inErrors", KSTAT_DATA_UINT32, 0 }, 8195 { "outDatagrams", KSTAT_DATA_UINT64, 0 }, 8196 { "entrySize", KSTAT_DATA_INT32, 0 }, 8197 { "entry6Size", KSTAT_DATA_INT32, 0 }, 8198 { "outErrors", KSTAT_DATA_UINT32, 0 }, 8199 }; 8200 8201 udp_mibkp = kstat_create(UDP_MOD_NAME, 0, UDP_MOD_NAME, 8202 "mib2", KSTAT_TYPE_NAMED, NUM_OF_FIELDS(udp_named_kstat_t), 0); 8203 8204 if (udp_mibkp == NULL) 8205 return; 8206 8207 template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t); 8208 template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t); 8209 8210 bcopy(&template, udp_mibkp->ks_data, sizeof (template)); 8211 8212 udp_mibkp->ks_update = udp_kstat_update; 8213 8214 kstat_install(udp_mibkp); 8215 8216 if ((udp_ksp = kstat_create(UDP_MOD_NAME, 0, "udpstat", 8217 "net", KSTAT_TYPE_NAMED, 8218 sizeof (udp_statistics) / sizeof (kstat_named_t), 8219 KSTAT_FLAG_VIRTUAL)) != NULL) { 8220 udp_ksp->ks_data = &udp_statistics; 8221 kstat_install(udp_ksp); 8222 } 8223 } 8224 8225 static void 8226 udp_kstat_fini(void) 8227 { 8228 if (udp_ksp != NULL) { 8229 kstat_delete(udp_ksp); 8230 udp_ksp = NULL; 8231 } 8232 if (udp_mibkp != NULL) { 8233 kstat_delete(udp_mibkp); 8234 udp_mibkp = NULL; 8235 } 8236 } 8237 8238 static int 8239 udp_kstat_update(kstat_t *kp, int rw) 8240 { 8241 udp_named_kstat_t *udpkp; 8242 8243 if ((kp == NULL) || (kp->ks_data == NULL)) 8244 return (EIO); 8245 8246 if (rw == KSTAT_WRITE) 8247 return (EACCES); 8248 8249 udpkp = (udp_named_kstat_t *)kp->ks_data; 8250 8251 udpkp->inDatagrams.value.ui64 = udp_mib.udpHCInDatagrams; 8252 udpkp->inErrors.value.ui32 = udp_mib.udpInErrors; 8253 udpkp->outDatagrams.value.ui64 = udp_mib.udpHCOutDatagrams; 8254 udpkp->outErrors.value.ui32 = udp_mib.udpOutErrors; 8255 8256 return (0); 8257 } 8258 8259 /* ARGSUSED */ 8260 static void 8261 udp_rput(queue_t *q, mblk_t *mp) 8262 { 8263 /* 8264 * We get here whenever we do qreply() from IP, 8265 * i.e as part of handlings ioctls, etc. 8266 */ 8267 putnext(q, mp); 8268 } 8269 8270 /* 8271 * Read-side synchronous stream info entry point, called as a 8272 * result of handling certain STREAMS ioctl operations. 8273 */ 8274 static int 8275 udp_rinfop(queue_t *q, infod_t *dp) 8276 { 8277 mblk_t *mp; 8278 uint_t cmd = dp->d_cmd; 8279 int res = 0; 8280 int error = 0; 8281 udp_t *udp = Q_TO_UDP(RD(UDP_WR(q))); 8282 struct stdata *stp = STREAM(q); 8283 8284 mutex_enter(&udp->udp_drain_lock); 8285 /* If shutdown on read has happened, return nothing */ 8286 mutex_enter(&stp->sd_lock); 8287 if (stp->sd_flag & STREOF) { 8288 mutex_exit(&stp->sd_lock); 8289 goto done; 8290 } 8291 mutex_exit(&stp->sd_lock); 8292 8293 if ((mp = udp->udp_rcv_list_head) == NULL) 8294 goto done; 8295 8296 ASSERT(DB_TYPE(mp) != M_DATA && mp->b_cont != NULL); 8297 8298 if (cmd & INFOD_COUNT) { 8299 /* 8300 * Return the number of messages. 8301 */ 8302 dp->d_count += udp->udp_rcv_msgcnt; 8303 res |= INFOD_COUNT; 8304 } 8305 if (cmd & INFOD_BYTES) { 8306 /* 8307 * Return size of all data messages. 8308 */ 8309 dp->d_bytes += udp->udp_rcv_cnt; 8310 res |= INFOD_BYTES; 8311 } 8312 if (cmd & INFOD_FIRSTBYTES) { 8313 /* 8314 * Return size of first data message. 8315 */ 8316 dp->d_bytes = msgdsize(mp); 8317 res |= INFOD_FIRSTBYTES; 8318 dp->d_cmd &= ~INFOD_FIRSTBYTES; 8319 } 8320 if (cmd & INFOD_COPYOUT) { 8321 mblk_t *mp1 = mp->b_cont; 8322 int n; 8323 /* 8324 * Return data contents of first message. 8325 */ 8326 ASSERT(DB_TYPE(mp1) == M_DATA); 8327 while (mp1 != NULL && dp->d_uiop->uio_resid > 0) { 8328 n = MIN(dp->d_uiop->uio_resid, MBLKL(mp1)); 8329 if (n != 0 && (error = uiomove((char *)mp1->b_rptr, n, 8330 UIO_READ, dp->d_uiop)) != 0) { 8331 goto done; 8332 } 8333 mp1 = mp1->b_cont; 8334 } 8335 res |= INFOD_COPYOUT; 8336 dp->d_cmd &= ~INFOD_COPYOUT; 8337 } 8338 done: 8339 mutex_exit(&udp->udp_drain_lock); 8340 8341 dp->d_res |= res; 8342 8343 return (error); 8344 } 8345 8346 /* 8347 * Read-side synchronous stream entry point. This is called as a result 8348 * of recv/read operation done at sockfs, and is guaranteed to execute 8349 * outside of the interrupt thread context. It returns a single datagram 8350 * (b_cont chain of T_UNITDATA_IND plus data) to the upper layer. 8351 */ 8352 static int 8353 udp_rrw(queue_t *q, struiod_t *dp) 8354 { 8355 mblk_t *mp; 8356 udp_t *udp = Q_TO_UDP(_RD(UDP_WR(q))); 8357 8358 /* We should never get here when we're in SNMP mode */ 8359 ASSERT(!(udp->udp_connp->conn_flags & IPCL_UDPMOD)); 8360 8361 /* 8362 * Dequeue datagram from the head of the list and return 8363 * it to caller; also ensure that RSLEEP sd_wakeq flag is 8364 * set/cleared depending on whether or not there's data 8365 * remaining in the list. 8366 */ 8367 mutex_enter(&udp->udp_drain_lock); 8368 if (!udp->udp_direct_sockfs) { 8369 mutex_exit(&udp->udp_drain_lock); 8370 UDP_STAT(udp_rrw_busy); 8371 return (EBUSY); 8372 } 8373 if ((mp = udp->udp_rcv_list_head) != NULL) { 8374 uint_t size = msgdsize(mp); 8375 8376 /* Last datagram in the list? */ 8377 if ((udp->udp_rcv_list_head = mp->b_next) == NULL) 8378 udp->udp_rcv_list_tail = NULL; 8379 mp->b_next = NULL; 8380 8381 udp->udp_rcv_cnt -= size; 8382 udp->udp_rcv_msgcnt--; 8383 UDP_STAT(udp_rrw_msgcnt); 8384 8385 /* No longer flow-controlling? */ 8386 if (udp->udp_rcv_cnt < udp->udp_rcv_hiwat && 8387 udp->udp_rcv_msgcnt < udp->udp_rcv_hiwat) 8388 udp->udp_drain_qfull = B_FALSE; 8389 } 8390 if (udp->udp_rcv_list_head == NULL) { 8391 /* 8392 * Either we just dequeued the last datagram or 8393 * we get here from sockfs and have nothing to 8394 * return; in this case clear RSLEEP. 8395 */ 8396 ASSERT(udp->udp_rcv_cnt == 0); 8397 ASSERT(udp->udp_rcv_msgcnt == 0); 8398 ASSERT(udp->udp_rcv_list_tail == NULL); 8399 STR_WAKEUP_CLEAR(STREAM(q)); 8400 } else { 8401 /* 8402 * More data follows; we need udp_rrw() to be 8403 * called in future to pick up the rest. 8404 */ 8405 STR_WAKEUP_SET(STREAM(q)); 8406 } 8407 mutex_exit(&udp->udp_drain_lock); 8408 dp->d_mp = mp; 8409 return (0); 8410 } 8411 8412 /* 8413 * Enqueue a completely-built T_UNITDATA_IND message into the receive 8414 * list; this is typically executed within the interrupt thread context 8415 * and so we do things as quickly as possible. 8416 */ 8417 static void 8418 udp_rcv_enqueue(queue_t *q, udp_t *udp, mblk_t *mp, uint_t pkt_len) 8419 { 8420 ASSERT(q == RD(q)); 8421 ASSERT(pkt_len == msgdsize(mp)); 8422 ASSERT(mp->b_next == NULL && mp->b_cont != NULL); 8423 ASSERT(DB_TYPE(mp) == M_PROTO && DB_TYPE(mp->b_cont) == M_DATA); 8424 ASSERT(MBLKL(mp) >= sizeof (struct T_unitdata_ind)); 8425 8426 mutex_enter(&udp->udp_drain_lock); 8427 /* 8428 * Wake up and signal the receiving app; it is okay to do this 8429 * before enqueueing the mp because we are holding the drain lock. 8430 * One of the advantages of synchronous stream is the ability for 8431 * us to find out when the application performs a read on the 8432 * socket by way of udp_rrw() entry point being called. We need 8433 * to generate SIGPOLL/SIGIO for each received data in the case 8434 * of asynchronous socket just as in the strrput() case. However, 8435 * we only wake the application up when necessary, i.e. during the 8436 * first enqueue. When udp_rrw() is called, we send up a single 8437 * datagram upstream and call STR_WAKEUP_SET() again when there 8438 * are still data remaining in our receive queue. 8439 */ 8440 if (udp->udp_rcv_list_head == NULL) { 8441 STR_WAKEUP_SET(STREAM(q)); 8442 udp->udp_rcv_list_head = mp; 8443 } else { 8444 udp->udp_rcv_list_tail->b_next = mp; 8445 } 8446 udp->udp_rcv_list_tail = mp; 8447 udp->udp_rcv_cnt += pkt_len; 8448 udp->udp_rcv_msgcnt++; 8449 8450 /* Need to flow-control? */ 8451 if (udp->udp_rcv_cnt >= udp->udp_rcv_hiwat || 8452 udp->udp_rcv_msgcnt >= udp->udp_rcv_hiwat) 8453 udp->udp_drain_qfull = B_TRUE; 8454 8455 /* Update poll events and send SIGPOLL/SIGIO if necessary */ 8456 STR_SENDSIG(STREAM(q)); 8457 mutex_exit(&udp->udp_drain_lock); 8458 } 8459 8460 /* 8461 * Drain the contents of receive list to the module upstream; we do 8462 * this during close or when we fallback to the slow mode due to 8463 * sockmod being popped or a module being pushed on top of us. 8464 */ 8465 static void 8466 udp_rcv_drain(queue_t *q, udp_t *udp, boolean_t closing) 8467 { 8468 mblk_t *mp; 8469 8470 ASSERT(q == RD(q)); 8471 8472 mutex_enter(&udp->udp_drain_lock); 8473 /* 8474 * There is no race with a concurrent udp_input() sending 8475 * up packets using putnext() after we have cleared the 8476 * udp_direct_sockfs flag but before we have completed 8477 * sending up the packets in udp_rcv_list, since we are 8478 * either a writer or we have quiesced the conn. 8479 */ 8480 udp->udp_direct_sockfs = B_FALSE; 8481 mutex_exit(&udp->udp_drain_lock); 8482 8483 if (udp->udp_rcv_list_head != NULL) 8484 UDP_STAT(udp_drain); 8485 8486 /* 8487 * Send up everything via putnext(); note here that we 8488 * don't need the udp_drain_lock to protect us since 8489 * nothing can enter udp_rrw() and that we currently 8490 * have exclusive access to this udp. 8491 */ 8492 while ((mp = udp->udp_rcv_list_head) != NULL) { 8493 udp->udp_rcv_list_head = mp->b_next; 8494 mp->b_next = NULL; 8495 udp->udp_rcv_cnt -= msgdsize(mp); 8496 udp->udp_rcv_msgcnt--; 8497 if (closing) { 8498 freemsg(mp); 8499 } else { 8500 putnext(q, mp); 8501 } 8502 } 8503 ASSERT(udp->udp_rcv_cnt == 0); 8504 ASSERT(udp->udp_rcv_msgcnt == 0); 8505 ASSERT(udp->udp_rcv_list_head == NULL); 8506 udp->udp_rcv_list_tail = NULL; 8507 udp->udp_drain_qfull = B_FALSE; 8508 } 8509 8510 static size_t 8511 udp_set_rcv_hiwat(udp_t *udp, size_t size) 8512 { 8513 /* We add a bit of extra buffering */ 8514 size += size >> 1; 8515 if (size > udp_max_buf) 8516 size = udp_max_buf; 8517 8518 udp->udp_rcv_hiwat = size; 8519 return (size); 8520 } 8521 8522 /* 8523 * Little helper for IPsec's NAT-T processing. 8524 */ 8525 boolean_t 8526 udp_compute_checksum(void) 8527 { 8528 return (udp_do_checksum); 8529 } 8530